# /u/sy/beebe/tex/tugboat/tugboat-kwic-bib.awk, Wed Nov 15 17:03:47 1995

# /u/sy/beebe/tex/tugboat/tugboat-kwic-bib.awk, Wed Nov 15 17:03:47 1995
# Edit by Nelson H. F. Beebe <[email protected]>
# Add tugboat- prefix to kwic-* file names.
# /u/sy/beebe/tex/bib/tugboat-kwic-bib.awk, Mon Nov 14 11:46:24 1988
# Edit by Nelson H.F. Beebe (beebe at plot79.utah.edu)
# /u/sy/beebe/tex/tugboat-kwic-bib.awk, Thu Oct 13 11:58:32 1988
# Edit by Nelson H.F. Beebe (beebe at plot79.utah.edu)
# ======================================================================
#
# This awk program filters a BibTeX file to produce an input file for
# the Unix ptx(1) program which prepares a keyword-in-context (kwic),
# or permuted, index. The output of this script is a collection of
# lines, on each of which the initial token is the bibliography key.
#
# Because the output of ptx is permuted, we cannot easily preserve
# font changes (word permutations would lose font boundaries), so we
# simply strip them. To prevent confusion with quotes (") used in the
# ptx output, we drop umlauts as well. The backslash control sequence
# is changed to forward slash; otherwise the index would contain
# permutations on \bs. All remaining braces and backslashes are
# stripped.
#
# To facilitate finding secondary authors, the author field is included
# in the output as well.
#
# The reference tag will appear as the first non-blank field on each
# output line. The ptx -r option will move it to the 5th argument of
# the .xx macro in its output.
#
# Usage:
# nawk -f tugboat-kwic-bib.awk foo.bib | ptx -r -f >foo.nro
#
# The troff output of ptx is converted to lines of the form
# \kwic{head}{pre-key}{key}{post-key}{tag}
# by the step
#
# sed -f ptx.sed <foo.nro >foo.out
#
# and this can then be input to LaTeX as, e.g., a supertabular
# environment, with a suitable definition of the \kwic macro.
#
# ======================================================================

# Assume bibliography entries are keyed by an initial capital, thus
# omitting the @string{} entries.

# blank trim lines
/[ \t]*$/ {gsub(/[ \t]*$/,"");}

# extract tag from "@field{tag,"
/^@[A-Z][a-z]*/ {
tag = substr($0,index($0,"{")+1);
tag = substr(tag,1,length(tag)-1); # strip final comma
gsub(/[{}\\ \t]/,"",tag);# strip whitespace, braces and backslashes
}

# collect complete author list
/^[ \t]*author[ \t]*=[ \t]*\"/ {\
author = substr($0,index($0,"\"")+1);
while (substr(author,length(author)-1) != "\",")
{
getline $0;
author = author " " $0;
}
if (substr(author,length(author)-1) == "\",")
author = substr(author,1,length(author)-2);
author = filter(author)
printf("%s\t%s\n",tag,author);
}

# collect complete title list
/^[ \t]*title[ \t]*=[ \t]*\"/ {\
title = substr($0,index($0,"\"")+1);
while (substr(title,length(title)-1) != "\",")
{
getline $0;
title = title " " $0;
}
if (substr(title,length(title)-1) == "\",")
title = substr(title,1,length(title)-2);
title = filter(title);
printf("%s\t%s\n",tag,title);
}

function filter(s, t)
{
t = s;
gsub(/[~ \t]+/," ",t); # collapse whitespace and ties

gsub(/\\bs/,"/",t); # change \bs control sequence to forward slash

gsub(/\\[a-z][a-z] /," ",t); # remove font changes
gsub(/\\[a-z][a-z]\\/,"\\",t);

gsub(/\\"{/,"{",t); # drop umlauts

gsub(/[{}\\]/,"",t); # drop remaining braces and backslashes

gsub(/&/," and ",t); # change & to and

gsub(/_/,"\\_",t); # protect TeX special characters
gsub(/%/,"\\%",t);
gsub(/\$/,"\\$",t);

gsub(/[ ]+/," ",t); # collapse whitespace again

return (t);
}