# Copyright (c) 1996, 1997 Russell Quong.

#! /usr/bin/perl

# Copyright (c) 1996, 1997 Russell Quong.
#
# In the following, the "author" refers to "Russell Quong."
#
# Permission to use, copy, modify, distribute, and sell this software and
# its documentation for any purpose is hereby granted without fee, provided
# that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. All advertising materials mentioning features or use of this software
# must display the following acknowledgement:
# This product includes software developed by Russell Quong.
# 3. All HTML generated by ltoh must retain a visible notice that it
# was generated by ltoh and contain a link to the ltoh web page
#
# Any or all of these provisions can be waived if you have specific,
# prior permission from the author.
#
# THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
# EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
# WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
#
# IN NO EVENT SHALL RUSSELL QUONG BE LIABLE FOR ANY SPECIAL,
# INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY
# THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.

# ltoh.pl = a LaTeX to HTML converter
# Russell Quong 1996, 1997
# Version 97e.

# enable readable "English names" for variables, like $MATCH or $PERL_VERSION
# instead of " punctuation names" for variables, like $& or $]
use English;

$false = 0;
$true = 1;

$author = "Russell W. Quong";
$version = "Version 97e.";
$reldate = "Mar 1997";
$status = "Experimental";
$dirsep = "/"; # directory separator character

$qval{"today"} = strip_hms_and_day( scalar(localtime()) );
$qval{"title"} = "";
$qval{"author"} = "Unknown document author";
$qval{"email"} = "Unknown email";
$qval{"url"} = "Unknown Web URL";
$qval{"keep_comments"} = 0;

$warnlevel = 3;

sub bad_command {
my($err_msg, $descrip) = @_;
return " $err_msg $descrip ";
}

# lookup(varname) ==> get the value of variable VARNAME.
sub lookup {
my($idx) = @_;
if ( ! defined $qval{$idx} ) {
return "$idx undefined";
} else {
return $qval{$idx};
}
}

sub print_html_header {
my($filename) = @_;
print '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">', "\n";
print "<HTML>\n";
if ($qval{"title"} eq "") {
$qval{"title"} = "$filename : [LaTeX --> HTML]";
}
print "<HEAD>\n<TITLE>\n", $qval{title} , "\n</TITLE>\n</HEAD>\n";
print "<BODY>\n";
}

sub print_html_trailer {
my($filename) = @_;
# need to declare ctime separately from next line, ugh. (scalar vs. array)
print "<HR>\n";
print "[Converted LaTeX --> HTML by";
print ' <a href="http://www.best.com/~quong/ltoh.html">ltoh</a>]';
print " \n";

print "<ADDRESS>\n";
if ($qval{"url"} =~ /Unknown/) {
if (! ($qval{"author"} =~ /Unknown/)) {
print "$qval{author}\n" ;
}
} else {
print "<A href=\"$qval{url}\">$qval{author}</A>\n";
}
if (! ($qval{"email"} =~ /Unknown/)) {
print "(<A href=\"mailto:$qval{email}\"><SAMP>$qval{email}";
print "</SAMP></A>)\n";
}

my($ctime);
$ctime = localtime;
$ctime = strip_hms_and_day($ctime);
print "Last modified: <samp>$ctime</samp>\n";

my($mtime) = get_file_mtime($filename);
if ($mtime ne $ctime) {
print "(LaTeX doc modified: <samp>";
print $mtime;
print "</samp>) \n";
}
print "</ADDRESS>\n";
print "</BODY>\n</HTML>\n";
}

sub strip_hms_and_day {
my($str_time) = @_;

# The next two lines strip off
# the hours:min:secs and the leading day of the week, giving
# Jul 29 1996
$str_time =~ s/^[A-Z][a-z][a-z][ \t]*//;
$str_time =~ s/\d+:\d+:\d+[ \t]*([A-Z][A-Z]T[ \t]*)?//;
return $str_time;
}

#
# return the time (as a string) that the file XXX was last modified.
# called via:
# $string = get_file_mtime (XXX);
#
sub get_file_mtime {
my($filename) = @_;
my($str_time) = '<Unable to determine time>'; # default value

my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks)
= stat($filename);
if ($dev != null) {
$str_time = localtime($mtime);
# We now have string like:
# Mon Jul 29 19:53:49 PDT 1996
$str_time = strip_hms_and_day($str_time);

warning(9, "File $filename last modified on $str_time");
}
return $str_time;
}

sub logmessage {
my($prefix, @params) = @_;
my($level) = 0;
if (@params > 1) {
if ($params[0] =~ /\d/) {
$level = shift(@params);
} else {
print ("No level given: @_");
}
}
if ($level <= $warnlevel) {
print "$prefix :: @params\n";
} else {
# print "level $level > warnlevel $warnleve\n";
}
}

sub errmsg {
logmessage(" * Error: ", @_);
}

sub warning {
logmessage(" + Warning: ", @_);
}

sub fyi {
logmessage(" - fyi: ", @_);
}

#
# Try to figure out where the perl script is actually located.
# (1) We can follow symbolic links to ABSOLUTE PATHS correctly,
# (2) But we can't handle relative symbolic links to another directory,
# as we need to keep track of where the link is relative to.
# Case (1) handles the common case of a link: xyz --> /whereever/bin/ltoh
# Also, we can handle the last link referring to a file in the same dir.

sub get_scriptdir {
fyi("PROGRAM_NAME = $PROGRAM_NAME");
my($scriptfile) = $PROGRAM_NAME; # maybe a link. We trace it down.
my($scriptdir, $fulldir) = ("." , "");

while (-l $scriptfile) {
fyi(9, "$scriptfile is link to -->");
if ($scriptfile =~ /(.*) $dirsep ( [^$dirsep]+ )/x ) {
$scriptdir = $1;
# if ($scriptdir =~ /^$dirsep/) {
# $fulldir = $scriptdir;
# } else {
# $fulldir .= $scriptdir;
# }
# fyi(9, "scriptdir = $scriptdir");
# fyi(9, "fulldir = $fulldir");
}
$scriptfile = readlink $scriptfile;
fyi(9, " $scriptfile.");
fyi(7, "scriptdir = $scriptdir");
}
return $scriptdir;
}

#
# handle all the arguments, in the @ARGV array.
#
sub main {
fyi("PROGRAM_VERSION = $version");
fyi(" by $author. 1996, 1997.");

my($nextline, $f);
my($nspecfiles) = 0;
my($scriptdir) = get_scriptdir(); # attempt to trace down a sym link.

# list of spec files
my(@specfiles) = (
"$scriptdir$dirsep" . "ltoh.specs" ,
"~$dirsep.ltoh.specs" ,
".$dirsep.ltoh.specs"
);

foreach $f (@specfiles) {
$nspecfiles += read_specfile($f);
}
if ($nspecfiles == 0) {
$nspecfiles += read_specfile("/usr/local/bin/ltoh.specs");
}
if ($nspecfiles == 0) {
$nspecfiles += read_specfile("/usr/bin/ltoh.specs");
}
if ($nspecfiles == 0) {
warning("Tried to read spec files: @specfiles");
errmsg("No specification files found. Bye-bye.");
exit(1);
}

if (@ARGV == 0) {
handle_file('-');
}
foreach $i (@ARGV) {
my($texfile) = $i;
if ($i =~ /^(.+)\.([ltex]+)$/ ) {
warning(7, "filebase = $1, suffix = $2, $input file = $texfile");
} else {
$texfile = $i . ".tex";
}
if (! -r $texfile) {
print "Cannot read file: $texfile\n";
return -1;
}
handle_file($texfile);
}
}

#
# process a specific file. Called via: handle_file($filename);
#
sub handle_file {
my($texfilename) = @_;
my($INFILE) = $texfilename;
if ($texfilename eq '-') {
$INFILE = STDIN;
} else {
open($INFILE, $texfilename);
}
$qval{"title"} = "Unnamed Web page";
fyi(5, "+ Reading LaTeX input ... handle_file($texfilename)");
@orig = <$INFILE>;
print_arr('Orig $lineno:', \@orig) if ($warnlevel >= 8);
do_comment_verbatim(\@orig);
print_arr('Verb $lineno:', \@orig) if ($warnlevel >= 8);
do_tables(\@orig);
print_arr('Tabl $lineno:', \@orig) if ($warnlevel >= 8);
do_begin_end(\@orig);
print_arr('BegE $lineno:', \@orig) if ($warnlevel >= 8);
do_tex_comms(\@orig);
print_arr('Comm $lineno:', \@orig) if ($warnlevel >= 8);
mark_delims(\@orig);
print_arr('Mark $lineno:\n', \@delim_lines) if ($warnlevel >= 8);
do_simple_latex_defs(\@delim_lines);
print_arr('{} $lineno:\n', \@delim_lines) if ($warnlevel >= 8);
do_complicated_latex_defs(\@delim_lines);
print_arr('{N} $lineno:\n', \@delim_lines) if ($warnlevel >= 8);
my($i);
final_cleanup(\@delim_lines);
print_arr('[<$lineno>]', \@delim_lines) if ($warnlevel >= 8);
print_arr("", \@delim_lines) if ($warnlevel >= 8);
print_html_file($texfilename);
}

# generate the HTML file. Pass in the name of the .tex file
# print_html_file( "filename.tex" );
#
sub print_html_file {
my($texfile) = @_;
my($base, $suffix) = ($texfile, "");
my($outfile) = $qval{"htmlfile_spec"};
fyi(8,"texfile = $texfile, outfile = $outfile");

if ($texfile =~ /^(.+)\.([^.]*)$/ ) {
$base = $1;
$suffix = $2;
}
fyi(8, "base = $base, suffix = $suffix, $input file = $texfile");

$outfile =~ s/\$BASE/$base/e;
$outfile =~ s/\$SUFFIX/$suffix/e;
fyi(3, "+ texbase = $base ; HTML output file => $outfile .");
if (-e $outfile) {
#
# comment out the following warning if desired.
#
# print "Warn, file $outfile already exists\n";
}
open(OUTF , ">"."$outfile");

#
# print to OUTF by default, by selecting it.
#
select OUTF;

print_html_header($texfile);
#
# print the generated HTML.
# @delim_lines is the final HTML after processing
#
print_arr("", \@delim_lines);
print_html_trailer($texfile);

select STDOUT;
}

$re_fields = 4;

#
# DO NOT EDIT THEM MANUALLY.
# associative arrays indexed by the LaTeX command
# containing the corresponding the HTML
# E.g, we might have something like:
# %html_start{'textbf'} = '';
# %html_end{'textbf'} = '';
# The values are read in from a specification file.
# DO NOT EDIT THEM MANUALLY.

sub do_spec_pseudo_op {
my($line) = @_;
if ($line =~ /^=\*\*\*=[ \t]+([^ \t\n]*)/ ) {
chomp($line);
my($op) = $1;
fyi(7, "Psuedo-op $op: ($line)");
if ($line =~ /endfile/) {
return $op;
}
if ($line =~ /=[ \t]*echo./) {
print "$POSTMATCH\n";
}
if ($line =~ /=[ \t]*exit/) {
fyi(6, "Exit request, line $INPUT_LINE_NUMBER, $spec_fname\n");
exit(1);
}
if ($line =~ /=[ \t]*warnlevel\s*:=\s*(\d+)/i) {
$warnlevel = $1;
}
return "pseudo-op";
} elsif ($line =~ /^([a-zA-Z_][\w]*)\s*:=(.*)$/ ) {
my($name, $val, $oldval) = ($1, $2, "");
$val =~ s/^[\s]*["]?//;
$val =~ s/["]?[\s]*$//;
$oldval = $qval{$name};
$qval{$name} = $val;
fyi(6, "set var: $name := ($val), oldval = $oldval");
return "set-var";
} else {
return "";
}
}

# Example
# For the specification
# ':b/e :\begin{rqitemize}(\{[^\}]*\})*:<ul>:</>:'
# We get:
# $html_start{'\begin{rqitemize}'} = '<ul>';
# $html_end{'\begin{rqitemize}'} = '</ul>';
# $comm_regex{'\begin{rqitemize}'} = '\begin{rqitemize}(\{[^\}]*\})*';
# $comm_attr{'\begin{rqitemize}'} = 'b/e';
#
sub read_specfile {
my($globbed_name) = @_;
my($spec_fname) = glob($globbed_name);

fyi(7, "globbed_name = $globbed_name, spec_fname = $spec_fname");

if (! -r $spec_fname) {
warning(7, "Unable to read ltoh spec file $spec_fname.");
return 0;
}
open(SPECFILE, $spec_fname);

my($line, $status);
my($lineno) = 0;
while ($line = <SPECFILE>) {
$lineno++;
# skip comments
if ($line =~ /^[ \t]*[#;]/ || $line =~ /^[ \t]*$/) {
fyi(9, "Skipping comment: $line");
next;
}
$status = handle_specification($line);
if ($status eq "endfile") {
last;
}
}
fyi(3, "+ Read spec file ($spec_fname).");
if ($lineno == 0) {
warning(3, "+ Empty spec file ($spec_fname).");
}
return 1;
}

sub handle_specification {
my($line) = @_;

my(@arr);
my($junk, $from, $to, $rest, $status);
if ( ($status = do_spec_pseudo_op($line)) ne "") {
return $status;
}
$splitter = substr($line, 0, 1);
if ($splitter eq '[ \t]') {
$splitter = "[ \t]+";
} else {
$splitter = "[" . quotemeta("$splitter") . "]";
}
fyi(8, "splitter = $splitter");
@arr = split($splitter, $line, $re_fields+3);
if (@arr < $re_fields+2) {
warning("Malformed spec, line $lineno, too few fields" . "$line");
next;
} elsif (@arr > $re_fields+2) {
warning("Malformed spec, line $lineno, too many fields" . "$line");
next;
} else {
# everything is OK. do nothing.
}
($raw_spec, $from, $to_start, $to_end) = @arr[1 .. $re_fields ];

# (a) if $from is "\begin{xyz}MORE-STUFF", strip off MORE-STUFF
# (b) if $from is "\xyz{MORE-STUFF}", strip off MORE-STUFF
# must check if $from looks like "\begin{...}" as must not apply (b)
# to a \begin{...} construct
if ($from =~ /\\begin\{.+\}/ ) {
if ($from =~ s/(\\begin\{[^\}]+\})(.+)/$1/ ) {
fyi(7, "Stripped begin\{...\} args: $arr[2] --> $from");
}
} elsif ($from =~ /(\\[A-Za-z_]+)([^A-Za-z_].*)/ ) {
$from =~ s/(\\[A-Za-z_]+)([^A-Za-z_].*)/$1/;
fyi(7, "Stripped args: $arr[2] --> $from");
}

# if $to_end is "...</>...",
# generate the corresponding closing tags, automatically. Cool.
# Have to reverse the order, hence the while loop.
#
my($scratch) = "";
if ($to_end =~ '</>') {
$junk = $to_start;
$scratch = "";
while ($junk =~ s:<([^ \t>]+)([^>]*)>:\@$1\@:o ) {
$scratch = "</$1>" . $scratch;
}
$to_end =~ s:</>:$scratch:;
fyi(7, "Gen to_end: to_start = $to_start, to_end = $to_end");
}

fyi(7, "from = $from, to_start = $to_start, to_end = $to_end");
fyi(8, "raw_spec = $raw_spec");

$comm_regex{$from} = $arr[2];
$comm_attr{$from} = $raw_spec;
$html_start{$from} = $to_start;
$html_end{$from} = $to_end;

if ($raw_spec =~ /re/i && $to_end ne "") {
warning(5, "Unused specification: $to_end");
}
}

# Simple protection of special character sequences on a line
# (1) group backslashes by pairs,
# (2) '@' to @AT@, and
# (3) protect \{ and \}.
sub escape_line {
my($line) = @_;
$line =~ s:\@:\@AT\@:g;

# handle latex math characters/macros. Strip off math mode $...$ for now
$line =~ s:\$ ([<>]) \$:\1:ogx;
$line =~ s:\$ (&[gl]t;) \$:\1:ogx;
$line =~ s:\$ (\\[a-zA-Z]+) \$:\1:ogx;

# grab pairs of backslash characters and escape them.
# as we need to distinguish between
# \{ ==> preserve '{' (escape it)
# \\{ ==> preserve '\' (escape it), but process '{' normally.
# \\\{ ==> preserve both '\' and '{' alone, follows from above
$line =~ s:\\\\:\@BS\@\@BS\@:og;
$line =~ s:\\\{:\@<<\@:og;
$line =~ s:\\\}:\@>>\@:og;

# unescape pairs of backslack characters.
$line =~ s:\@BS\@\@BS\@:\\\\:og;

# not needed, yet. (We run this function early in the processing,
# and this change unnecessarily mucks things up at this early stage).
# escape newlines
# $line =~ s:\@AT\@NL\@AT\@:\@NL\@:og;

return $line;
}

#
# print_arr(prefix_string, reference_to_an_array);
# In prefix_string the string '$lineno' is converted to the array index.
# If prefix_string ends with a '\n' (newline), append a newline to the line.
# Ex:
# @lines = <>; # read all input into an array
# print_arr('$lineno:', \@lines); # pass in a reference to the array
sub print_arr {
my($prefix, $arr) = @_;
my($i);
my($ndigits) = int( (log($#{$arr}) / exp(1) + .999) ) ;
my($newline) = "";
if ($prefix =~ s/(\\n|\n)$//o) {
$newline = "\n";
}
foreach $i (0 .. $#{$arr}) {
$s = $prefix;
if ($prefix =~ /\$lineno/o) {
$s =~ s/\$lineno/sprintf("%$ndigits" . "d", $i)/ge;
}
print "$s$arr->[$i]$newline";
}
}

$orig_inh = "";

#
# I've hard-wired the following "early actions"
# 1) handle \begin{verbatim} for now.
# 2) You can set HTML/Perl variables via lines that look like:
# %-ltoh- pseudo-op-line
# %-tex2html- pseudo-op-line ## backward compatibility for me [RQ]
#
# 2) I've hard coded a simplistic table hander too. Needs updating.
# It handles \multicolumn{ncols}{.}{contents} quite crudely
# It handles \multirow{nrows}{.}{contents} quite crudely
# NOT any more, see
sub do_comment_verbatim {
my($arr) = @_;
my($arrlen) = scalar(@$arr);
vec($orig_inh, $arrlen-1, 0) = 0; # pre-extend
my($inh) = $false;
for ($i=0; $i < $arrlen; $i++) {
vec($orig_inh, $i, 1) = $inh;
my($line) = $arr->[$i];

if ($line =~ /^[%]+-tex2html-\s*(.*)/ ) {
warning(3, "Use of -tex2html- is deprecated. Change to -ltoh-");
handle_specification($1);
} elsif ($line =~ /^[%]+-ltoh-\s*(.*)/ ) {
fyi(6, "Processing psuedo-op in LaTeX source ($1)");
handle_specification($1);
}
# preserve special chars in html, always
$line =~ s/\\&/&/xg;
$line =~ s/ < /</xg;
$line =~ s/ > />/xg;
if ($line =~ s:\\begin{verbatim}:<pre>: ) {
vec($orig_inh, $i, 1) = $true; # inhibit further processing
$inh = $true;
} elsif ($line =~ s:\\end{verbatim}:</pre>: ) {
vec($orig_inh, $i, 1) = $true; # inhibit further processing
$inh = $false; # allow processing on next line
} elsif ($inh == $false && $line =~ s:^([ \t]*%.*):: ) {
if (! lookup("keep_comments") ) {
$line = "";
}
vec($orig_inh, $i, 1) = $true; # inhibit further processing
} elsif ($inh == $false && $line =~ /^\\end{document}/ ) {
$#$arr = $i;
fyi(8, "Truncating down to $i lines");
last;
}
$arr->[$i] = $line;
}
}

# handle nested braces/parens/brackets.
# convert { ==> {:level: level starts at 0
#
# E.g. a{b{c}d{e{f}}g} ==> a{:0:b{:1:c:1}d{:1:e{:2:f:2:}:1:}g:0:}
#
sub mark_braces_one_line {
my($ldelim, $rdelim, $line) = @_;
my($newline, $level) = ("", 0);
my($lookfor) = ($ldelim eq $rdelim) ? "[$ldelim]" : "[$ldelim$rdelim]";
while ( $line =~ /(.*[^\\])($lookfor)(.*)/) {
my($prev, $del, $after) = ($1, $2, $3);
my($repl);
if ($del eq $ldelim) {
$repl = "$del:$level:";
$level++;
} elsif ($del eq $rdelim) {
$repl = ":$level:$del";
$level--;
}
$newline .= "$prev$repl";
$line = $after;
}
return $newline;
}

# handle nested braces/parens/brackets.
# convert "{" ==> " {:level " where level starts at 0
#
# E.g. a{b{c}d{e{f}}g}h ==> a {:0 b {:1 c 1:} d {:1 e {:2 f 2:} 1:} g 0:} h
#
sub mark_braces_one_line {
my($ldelim, $rdelim, $origline) = @_;
my($newline, $level, $line) = ("", 0, $origline);
my($lookfor) = ($ldelim eq $rdelim) ? "[$ldelim]" : "[$ldelim$rdelim]";
my($prev, $del, $after); # need after to survive the loop.

$line =~ s/\@/ \@AT\@_/g;
$line =~ s/\\\\/ \@2BS\@_/g;
my($qldel) = quotemeta($ldelim);
$line =~ s/\\$qldel/ \@L\@_/g;
my($qrdel) = quotemeta($rdelim);
$line =~ s/\\$qrdel/ \@R\@_/g;

# need ".../s" in the following while() to preserve newlines. Ugh.
while ( $line =~ /(.*?)($lookfor)(.*)/s) {
($prev, $del, $after) = ($1, $2, $3);
fyi(9,"mark_braces_one_line:prev=($prev),del=($del),after=($after)\n");
my($repl);
if ($del eq $ldelim) {
$repl = " $del:$level ";
$level++;
} elsif ($del eq $rdelim) {
$level--;
$repl = " $level:$del ";
}
$newline .= "$prev$repl";
$line = $after;
}
$newline .= $after;

$newline =~ s/ \@R\@_/\\$qrdel/g;
$newline =~ s/ \@L\@_/\\$qldel/g;
$newline =~ s/ \@2BS\@_/\\\\/g;
$newline =~ s/ \@AT\@_/\@/g;
return $newline;
}

$talign{"l"} = "left";
$talign{"r"} = "right";
$talign{"c"} = "center";
$talign{"p"} = "left";
$talign{"X"} = "left";

sub do_tables {
my($arr) = @_;
my($arrlen) = scalar(@$arr);
my($tlev, $endrow, $colnum) = (0, undef, 0);
my(@cols) = ();
for ($i=0; $i < $arrlen; $i++) {
my($origline) = $arr->[$i];
my($line) = $origline; #
fyi(9, "Tables orig-line $i: $origline");
if ($origline =~ /\\begin\{[A-Za-z]*tabular[A-Za-z]*\}/) {
$line =~ s/(\\\\)? \s* \\[ch]line.*//x;
$line = mark_braces_one_line("{", "}", $line);
fyi(9, "Tables delim-line $i: $line");
$line =~ s/ \{:1 .*? 1:\} //g; # remove nested brace stuff
$line =~ s/ \{:0 /\{/g;
$line =~ s/ 0:\} /\}/g;
fyi(9, "Tables squashed-line $i: $line");
my(@fragments) = split(/[{}]+\s*/,$line);
my($aligns) = $fragments[$#fragments];
my($tabletag);
if ( $aligns =~ /[|]/) {
$tabletag = "<table border>";
} else {
$tabletag = "<table>";
}
while ( $aligns =~ /([lcrpX])(.*)/s ) {
my($lcr) = $1;
my($rest) = $2;
fyi(9, "col-align lcr = $lcr, rest = $rest");
$aligns = $rest;
push (@cols, $talign{$lcr});
}
$tlev ++;
$endrow = 1;
$colnum = 0;
$line =~ s/\\begin\{tabular[A-Za-z]*\}\{.*\}/$tabletag/;
fyi(9, "line =-> $line");
fyi(7, "Table with $#cols+1 columns. Aligns = @cols");
} elsif ($line =~ s:\\end{tabular[A-Za-z]*}:</table>: ) {
$tlev --;
@cols = ();
} elsif ($tlev > 0) {
if ($endrow) {
$line = "<TR> <TD> $line";
$endrow = 0;
}
$line =~ s:\& (?![ampltg]+;):</TD> <TD>:gx;

# "&" ==> "</TD><TD>", but skip ">" "<" and "&".
$line =~ s:\& (?![ampltg]+;):</TD> <TD>:gx;

# convert \multicolumn --> \mc
if ( $line =~ s:\\multicolumn{:\\mc\{:xg ) {
fyi(9, "mc line: $line");
}

# add individual column alignments.
while (
$line =~ m/(.*?) (<TD>) (\s* \\mc\{\d+\}\{.+?\}\{)? (.*)/sx ) {
my($before, $after) = ($1, $4);
fyi(9, "cols[colnum=$colnum] = $cols[$colnum]");
my($mcspec) = $3;
fyi(9, "mc: before=($before),mcspec=$mcspec,after=($after)");

if ($mcspec eq "") {
if ($colnum > $#cols) {
warning("Too many columns in table, line $i:\n + ($origline)");
$line = "$before<TD align=left" . ">" . $after;
} else {
$line = "$before<TD align=$cols[$colnum]>" . $after;
}
$colnum++;
} else {
if ($mcspec =~ m:\\mc \{(\d+)\} \{([^}]+)\}:x ) {
my($colspan) = $1;
$colnum += $colspan;
$2 =~ / ([lcrpX]) /x;
$line = "$before<TD align=$talign{$1} colspan=$colspan>{" . $after;
}
}
}
# strip off trailing \\ and/or \hline. Each might be on sep line.
if ( $line =~ s:\\\\:</TD></TR>:xg ) {
$endrow = 1;
$colnum = 0;
}
$line =~ s/\s* \\hline//g;
$line =~ s/\\cline\{[0-9-]+\}//;
}
$arr->[$i] = $line;
}
}

$re_tex_arg_0 = "(\[[^\]*\]|{[^}]*})*"; # no nesting
$re_tex_arg_1 = "({(([^{}]*{[^{}]*}[^{}]*)*)})*"; # nesting up to level 1.

sub do_begin_end {
my($arr) = @_;
my($arrlen) = scalar(@$arr);
my($i, $level, $inh);
my($comm, $commregex, $fromx, $tox, $latexcomm);
for ($i=0; $i < $arrlen; $i++) {
if ( vec($orig_inh, $i, 1) > 0) { # skip if processing inhibited
next;
}
$line = $arr->[$i];
$line = escape_line($line);
if ( $line =~ m/\\((begin|end)\{[a-zA-Z_]+\})/ ) {
# Example:
# $latexcomm = \begin{rqitemize}[12pt]{article}
# $comm = \begin{rqitemize}
# $fromx = \begin{rqitemize}
# $tox = <ul>
#
$latexcomm = $comm = $MATCH;
$comm =~ s/end/begin/; # convert \end{comm} to \begin{comm}
if ( defined $html_start{$comm} ) {
if ($latexcomm =~ /^\\end/) {
$fromx = $latexcomm;
$tox = $html_end{$comm};
$commregex = $latexcomm;
} else {
$fromx = $comm_regex{$comm};
$tox = $html_start{$comm};
$commregex = $comm_regex{$comm};

# Flag an attempt to allow any \begin{xxx} to take further args, as
# error, as \begin{center} should not take any further args
#
if ($commregex eq $comm) {
$commregex .= $re_tex_arg_0;
fyi(7, " B/E Spruce: $comm --> $commregex");
}
}
$commregex =~ s/^\\/\\\\/g;
fyi(7, " B/E Apply: $commregex ==> $tox, on: ($line)");
$line =~ s/$commregex/$tox/;
} else {
warning(6, "UNDEFINED COMMAND: $comm, ignoring.");
$html_start{$comm} = "";
$html_end{$comm} = "";
}
}
$arr->[$i] = $line;
}
}

#
# handle straight-forward substitutions:
# \xxx ---> yyy
#
sub do_tex_comms {
my($arr) = @_;
my($arrlen) = scalar(@$arr);
my($i, $comm, $comm_re, $fromx, $tox);
for ($i=0; $i < $arrlen; $i++) {
if ( vec($orig_inh, $i, 1) > 0) { # skip if processing inhibited
next;
}
$line = $arr->[$i];

# Find the latex command \xxx
# We mark each command, incase there are multiple commands on a line.
# First convert all \xxx --> @BS@xxx,
# Next, one-by-one convert @BS@xxx --> \xxx and process it.
# Thus, we are left with the original \xxx (not breaking old code).
# 9/16/96.
#
$line =~ s/\\([A-Za-z]+)/\@BS\@$1/g;
while ( $line =~ s/\@BS\@([A-Za-z]+)/\\$1/ ) {
$comm = '\\' . $1;
if (defined $html_start{$comm} && ($comm_attr{$comm} =~ /comm/)) {
$tox = $html_start{$comm};
# might have:
# $comm = '\documentclass'
# $comm_re = '\documentclass[^ \t]+'
# apply $comm_re --> $tox
#
$comm_re = $comm_regex{$comm};
$comm_re =~ s/^\\/\\\\/g; # escape the darn backslashes
warning(7, " TeXComm Apply: $comm_re ==> $tox on: ($line)");
$line =~ s/$comm_re/$tox/g;
my($a, $b, $c, $d, $e) = ($1, $2, $3, $4, $5);
$line =~ s/\$1/$a/g;
$line =~ s/\$2/$b/g;
$line =~ s/\$3/$c/g;
$line =~ s/\$4/$d/g;
$line =~ s/\$5/$e/g;
}
if ( ! defined $html_start{$comm} ) {
warning(3, " Unknown \\comm: $comm, on: ($line)");
}
}

#
# Hardwired. \today --> current date.
#
my($today) = $qval{"today"};
$line =~ s/\\today/$today/g;

if ($line ne $arr->[$i]) {
warning(8, "regex: $arr->[$i] ==> $line");
}
$arr->[$i] = $line;
}
}

# dstack = delimiter stack
# dsp = delimiter stack pointer
$dstack[40] = ( "" );
$dsp = 0;
$dcount = 1; # we reserve dcount=0 as an error.
$argspan[100] = undef; # argspan[i] = range of lines between brace i
$lstack[40] = ( "" ); # last id at this depth (upto depth 40).
$llinestack[40] = ( "" ); # last line containing id at this depth

#
# the following apply only to complex commands (those processing their arg).
#
$commstack[40] = ( "" ); # the command to
$argsofar[40] = ( "" ); # number of args seen at level i so-far
$argwant[40] = ( "" ); # number of args wanted at level i
$arglist[40][20] = ( "" ); # the actual args for the level i command

$argrem_stack[40] = ( "" ); # num of args remaining at this depth.
$nextarg[100] = undef; # nextarg[i] = id of next arg at same level

#
# label all { and } with id's.
# Split strings via braces namely up after '{' and before '}'
#
# Also link up arguments for "complicated" latex commands, which take
# arguments. We must defer processing of these commands
# until we have seen the closing brace for the last argument,
# in case the arguments (i) undergo processing and (ii) are nested. Ugh.
#
sub mark_delims {
my($arr) = @_;
my($arrlen) = scalar(@$arr);
my($i, $j, $id, $level, $line, $frag, $inh);
$#delim_lines = $arrlen; # pre extend array delim_lines
$ndelim_lines = 0;
for ($i=0; $i < $arrlen; $i++) {
$inh = vec($orig_inh, $i, 1);
$line = $arr->[$i];
#
# Also, handle $line == "", as subsequent code would mishandle it.
# (The split yields zero fragments, and the line is not passed on).
if ($inh > 0 || $line eq "") {
warning(8, "AFTER $ndelim_lines: $line\n");
vec($delim_inh, $ndelim_lines, 1) = $inh;
$delim_lines[$ndelim_lines] = $line;
$ndelim_lines ++;
next;
}
warning(8, "mark_delims($line)");

my($splitstr) = ' @x@ ';
$line =~ s/\\[a-zA-Z_]+\{/$splitstr$MATCH/g;
$line =~ s/{/\{$splitstr/g;
$line =~ s/\}/$splitstr\}/g;
my(@fragments) = split(/$splitstr/,$line);
for ($j=0; $j < scalar(@fragments); $j++) {
$frag = $fragments[$j];
warning(9, "Frag $j: $frag");
if ( $frag =~ /^\}/ ) {
$dsp --;
if ($dsp < 0 || ! defined($dstack[$dsp]) ) {
my($lineno) = "Line: $INPUT_LINE_NUMBER";
warning(
"$lineno, IMBALANCED BRACES: Too many closing braces");
$id = -1;
}
$id = $dstack[$dsp];
$dstack[$dsp] = undef;
$lstack[$dsp] = $id;
$lstack[$dsp+1] = undef;
$llinestack[$dsp] = $ndelim_lines;

my($tmp) = $ndelim_lines - 1;
$argspan{$id} .= "$tmp";
warning(7, " mark: argspan{$id} = $argspan{$id}");

$x = ":$id:\]";
$frag =~ s/^\}/$x/;
}
if ( $frag =~ /\{$/ ) {
$id = $dstack[$dsp] = $dcount;
my($lastid) = $lstack[$dsp];
#
# link args only if we find things like: xxx}{yyy
# Whoops, no, as a line break may split the }{ pair
# In either case the prev arg must be on the prev line.
$nextarg[$lastid] = undef;
if ($lastid != undef && $llinestack[$dsp] >= $ndelim_lines-1) {
if ($lastid <= 0 || $lastid > $dcount) {
warning("ERROR linking id's. Contact author.");
}
$nextarg[$lastid] = $id;
warning(7, " mark: nextarg[$lastid] = $nextarg[$lastid]");
warning(8, " mark: id2comm{$lastid} = $id2comm{$lastid}");
}
$lstack[$dsp] = $id;

$dsp ++;
$dcount ++;

if ( $frag =~ /(\\[\w]+)\{$/) {
warning(8, "mark: id2comm{$id} = $1");
$id2comm{$id} = $1;
} else {
$id2comm{$id} = "_:NULL_COMMAND:_";
}
my($tmp) = $ndelim_lines + 1;
$argspan{$id} = "$tmp:";

$x = "\[:$id:";
$frag =~ s/\{$/$x/;
}
$frag =~ s/\n/\@NL\@/g;
warning(8, "END-MARK $ndelim_lines: $frag");

$delim_lines[$ndelim_lines] = $frag;
vec($delim_inh, $ndelim_lines, 1) = $inh;
$ndelim_lines ++;
}
}
}

#
# handle "simple" latex commands not dependent on the data, namely:
# \command{ XXX } ===> <html_start_sequence> XXX <html_end_sequence>
# In particular, the conversion does not depend on XXX.
# Of course, we allow arbitrary nesting of braces in XXX,
# which was the hard part of all this.
# At this point, all braces have been munged and tagged with an unique ID.
#
sub do_simple_latex_defs {
my($arr) = @_;
my($arrlen) = scalar(@$arr);
my($comm, $fromx, $tox, $id, $inh, $i, $j, $k, $l, $nargs, $argidx);
for ($i=0; $i < $arrlen; $i++) {
$inh = vec($delim_inh, $i, 1);
$line = $arr->[$i];
if ($inh > 0) {
next;
}
warning(8, "do_simple_brace_comms($line)");

if ($line =~ /\[:([0-9]+):/) {
$id = $1;
$comm = $id2comm{$id};
warning(8, " id2comm{$id} is $id2comm{$id}");
if ($comm ne "_:NULL_COMMAND:_") {
if ($comm_attr{$comm} =~ /\{\d+\}/) {
next; # next line. We don't handle args.
}
$fromx = $comm . $MATCH;
$fromx = quotemeta($fromx);
if ( defined $html_start{$comm} ) {
$tox = $html_start{$comm};
} else {
$tox = bad_command("Unknown LaTeX command", "$comm\{");
if ($nextarg[$id] != undef) {
# it's tnot clear what error we should report
}
}
warning(8, " {} Apply: $fromx ==> $tox on: $line");
if (! ($line =~ s/$fromx/$tox/)) {
warning(3, " {} FAILED: $fromx ==> $tox on: $line");
}
}
}
if ($line =~ /:([0-9]+):\]/) {
$id = $1;
$comm = $id2comm{$id};
if ($comm ne "_:NULL_COMMAND:_") {
if ($comm_attr{$comm} =~ /\{\d+\}/) {
next; # next line. We don't handle args.
}
$fromx = $MATCH;
$fromx =~ s/:\]/:\\]/;
if ( defined $html_end{$comm} ) {
$tox = $html_end{$comm};
} else {
$tox = bad_command("", "\}");
}
warning(8, " {} Apply: $fromx ==> $tox on: $line");
$line =~ s/$fromx/$tox/;
}
}
$arr->[$i] = $line;
}
}

sub getspan {
my($arr, $span) = @_;
my($start,$end, $z) = split(/:/, $span, 3);
my($k, $result) = (0, "");
for ($k = $start; $k <= $end; $k++) {
$result .= $arr->[$k];
}
return $result;
}

#
# Mark the "end arg" via ID of the arguments for multi-arg commands.
# We process the complicated command when we reach the last brace.
#
sub do_complicated_latex_defs {
my($arr) = @_;
my($arrlen) = scalar(@$arr);
my($comm, $fromx, $tox, $id, $inh, $i, $j, $k, $l, $nwant, $argidx);
for ($i=0; $i < $arrlen; $i++) {
$inh = vec($delim_inh, $i, 1);
if ($inh > 0) {
next;
}
$line = $arr->[$i];
warning(8, "do_complicated_brace_comms($line)");
if ($line =~ /\[:([0-9]+):/) {
$id = $1;
$from = $comm = $id2comm{$id};
my($multarg) = scalar($comm_attr{$comm} =~ /\{(\d+)\}/);
warning(9, " multarg = $multarg");
if (! $multarg) {
next;
}
$nwant = $1;
$argidx = $id;
#
# start from 2, as the first arg will always exist.
#
$argarr[$id][1] = $id;
for ($j=2; $j <= $nwant; $j++) {
$argidx = $nextarg[$argidx];
if ($argidx == undef) {
warning("Not enough arguments for $comm\n");
last;
}
$argarr[$id][$j] = $argidx;
}
$comm_trigger{$argidx} = $id;
warning(9, " comm_trigger{$argidx} is $comm_trigger{$argidx}");
}
if ($line =~ /:([0-9]+):\]/) {
my($trigger_id) = $1;
if ( defined $comm_trigger{$trigger_id} ) {
$id = $comm_trigger{$trigger_id};
$comm = $id2comm{$id};
if ($comm_attr{$comm} =~ /\{(\d+)\}/) {
my(@arglist);
$nwant = $1;
$tox = $html_start{$comm};
for ($j=1; $j <= $nwant; $j++) {
$k = $argarr[$id][$j];
my($currarg) = getspan($arr, $argspan{$k});
warning(9, " comp_def: arg[$id][$j] = $currarg");
$tox =~ s/#$j/$currarg/g;
$arglist[$j] = $currarg;
}
warning(7, "CPX $comm --> tox = $tox");
my($a, $b) = split(/:/, $argspan{$id}, 2);
$arr->[$a-1] = $tox;
my($c, $d, $e) = split(/:/, $argspan{$argarr[$id][$nwant]}, 3);
warning(9, "CPX erasing lines $a to $d");
for ($j=$a; $j <= $d; $j++) {
my($o) = $arr->[$j];
if ($o ne "") {
warning(8, " Wiping out line[$j] --> nothing");
}
$arr->[$j] = "";
}
}
$line =~ s/:$trigger_id:\]/$html_end{$comm}/;
}
}
$arr->[$i] = $line;
}
}

sub final_cleanup {
my($arr) = @_;
my($arrlen) = scalar(@$arr);
my($inh);
my($lastline);
for ($i=0; $i < $arrlen; $i++) {
$inh = vec($delim_inh, $i, 1);
if ($inh > 0) {
next;
}
$line = $arr->[$i];

# restore newlines
$line =~ s/(\@NL\@)+/\n/g;

# restore backslashes
$line =~ s/\@BS\@/\\/g;

# restore newlines
$line =~ s/(\@NL\@)+/\n/g;

# convert \\ (force line breaks) to HTML line breaks
$line =~ s/\\\\/ /;

# restore '@'
$line =~ s:\@AT\@:\@:g;

# convert any remaining unprocessed braces back to their orig form
$line =~ s/\[:\d+:/\{/;
$line =~ s/:\d+:\]/\}/;

# restore real '{' and '}', after removing extraneous '{' '}'
$line =~ s:\{::g;
$line =~ s:\}::g;
$line =~ s:\@<<\@:{:g;
$line =~ s:\@>>\@:}:g;
$line =~ s:\@<<\@:\\\{:g;
$line =~ s:\@>>\@:\\\}:g;

# hard-wired. Conversions. Ugh. No, double ugh.
# convert protected latex chars into single chars
$line =~ s/\\([\{\}\_\&\$\#\%])/$1/og;
# drop tildes (forced spaces in LaTeX).
$line =~ s/\.[~]+/\. /og;
# convert multiple dashes to a single dash.
$line =~ s/[-][-]+/-/og;

# if (($i > 0) && ($lastline =~ /^\s*\n$/) && ($line =~ /[^\s]+/)) {
if (($i>=2) && ($lastline =~ /^\s*\n$/) && ($arr->[$i-2] =~ /\n$/)) {
$line = "$line";
}

$lastline = $line;
$arr->[$i] = $line;
}
}

#
# start executing at main()
#

main();