Article 11100 of comp.lang.perl:
Path: feenix.metronet.com!news.utdallas.edu!wupost!howland.reston.ans.net!gatech!usenet.ufl.edu!travis.csd.harris.com!amber!brad
From:
[email protected] (Brad Appleton)
Newsgroups: comp.lang.perl
Subject: texinfo preprocessor - need help optimizing
Date: 2 Mar 1994 15:21:24 GMT
Organization: Harris CSD, Ft. Lauderdale, FL
Lines: 430
Message-ID: <
[email protected]>
NNTP-Posting-Host: amber.ssd.csd.harris.com
Summary: I need your help
I have a perl script/library function to preprocess a texinfo
file. What it does is read the texinfo file, expanding all
the @include's and reading all the @iftex/@ifinfo/@ignore
commands and spit out the actual commands that would NOT
be ignored by makeinfo (or by TeX).
The problem is, it seems to frequently dump core when I
get to somewhere around 25,000 lines of input and I really
dont know why. I have used "perl -d" but it really hasnt
given me any reason to suspect that I am doing something
wrong (as opposed to "correct but inefficient") in my script.
I have also debugged the perl executable (a stripped version
of it anyway) and have not found any evidence of infinite
recursion.
I am using perl4 patchlevel 36 on a Harris NightHawk running
CX/UX 7.X (a "real-time" variant of Unix).
I be much obliged if some of you would be willing to read the script
and point out some inefficiencies (speed-wise or memory-wise), or
things that you think could be done "better" (in your possibly
subjective opinion).
Here is the script - your help is appreciated!
-------------------------------------------------------------------------------
# texinfo.pl --
# ---------------------------------------------------------------------------
# Preprocess a texinfo file and let the user see only those lines that will
# actually appear in in the info file (@include'd files are expanded).
#
# Usage:
# require 'texinfo.pl';
# &texinfo(name);
#
# where <name> is the name of the users function (which takes an input
# line as its parameter) that will process the input lines, one by one.
# If no "name" parameter is given than the default action (which is to
# print the line) is used. The "&texinfo" function will parse @ARGV
# looking for 'makeinfo' options and then will preprocess the input files
# specified by the rest of @ARGV.
#
# Other useful items:
# &texinfo'where -- Subroutine (takes no parameters) to return the current
# input file name and line number (in a single string).
#
# $texinfo'DEBUG -- Set this variable to non-zero if you want some simple
# debugging output.
# ---------------------------------------------------------------------------
# I have to do my own option processing cuz getopt.pl, getopts.pl, and
# newgetopt.pl wont do what I want.
#
# Need to accept the following makeinfo options:
# -I DIR to add DIR to the directory search list for including
# files with the `@include' command.
# -D VAR to define a variable, as with `@set'.
# -U VAR to undefine a variable, as with `@clear'.
# --no-validate to suppress node cross reference validation.
# --no-warn to suppress warning messages (errors are still output).
# --no-split to suppress the splitting of large files.
# --no-headers to suppress the output of Node: Foo headers.
# --verbose to print information about what is being done.
# --version to print the version number of Makeinfo.
# --output FILE or -o FILE
# to specify the output file. When you specify the
# output file in this way, any `@setfilename' in the
# input file is ignored.
# --paragraph-indent NUM
# to set the paragraph indent to NUM (default 3).
# --fill-column NUM to set the filling column to NUM (default 72).
# --error-limit NUM to set the error limit to NUM (default 100).
# --reference-limit NUM
# to set the reference warning limit to NUM (default 1000).
# --footnote-style STYLE
# to set the footnote style to STYLE. STYLE should
# either be `separate' to place footnotes in their own
# node, or `end', to place the footnotes at the end of
# the node in which they are defined (the default).
#
# In addition, I will allow the following:
# -X FILE dont expand @includes of the given FILE
# --debug generate output for debugging
# --no-include dont expand '@include'ed files
# --tex generate output that TeX would see (instead of makeinfo).
# reverses the interpretation of @ifinfo, @iftex, and @tex
# directives.
#
package texinfo;
$DEBUG = 0; ## Debugging?
$IS_INFO = 1; ## Should @ifinfo be TRUE?
$IGNORE_INCLUDES = 0; ## Ignore @include directives?
%IGNORE_INCS = (); ## List of files to NEVER @include
$CMD='@'; ## Texinfo command prefix.
$USERSUB = "texinfo'default_usersub" ; ## default line-processing action
@CONDS = (); ## Stack of active texinfo conditions
@INCS = (); ## Directory search path for @included files
@HANDLES = (); ## Stack of input-file handles
%LINES = (); ## Keep track of current line in each active handle
$LINES = 0; ## Total # of input lines seen so far
# Default line-processing action
sub texinfo'default_usersub {
# just print to standard output
print STDOUT @_ ;
}
# Get makeinfo-options from main'@ARGV
sub texinfo'getopts {
local($opt, $_);
## get options from @main'ARGV
while ($main'ARGV[0] =~ /^-/) {
$_ = shift(@main'ARGV);
last if ($_ eq "--");
if (/^-([UDIXo])(.*)$/o) {
$opt = $1;
$_ = $2;
$_ = shift(@main'ARGV) if (! $_);
if ($opt eq "D") {
if (/^([^=]+)=(.*)$/) {
$DEFS{$1} = $2;
} else {
$DEFS{$_} = 1;
}
} elsif ($opt eq "U") {
undef $DEFS{$_};
} elsif ($opt eq "I") {
unshift(@INCS, "$_");
} elsif ($opt eq "X") {
++$IGNORE_INCS{$_};
} elsif ($opt eq "o") {
;
}
} elsif (/^--o.*$/o) {
$_ = shift(@main'ARGV);
} elsif (/^--no-inc.*$/o) {
$IGNORE_INCLUDES = 1;
} elsif (/^--[Dd][Ee][Bb][Uu][Gg].*$/o) {
$DEBUG =1;
} elsif (/^--[Tt][Ee][Xx].*$/o) {
$IS_INFO = 0;
} elsif (/^--([^o].*)$/o) {
$_ = $1;
if (/^[^nv]/) {
$_ = shift(@main'ARGV);
}
} else {
print STDERR "$0: unrecognized 'makeinfo' option \"$_\" -- ignored.\n";
}
}
}
# Return the current filename and line-number in the format "file:line"
# (good for printing error-messages)
#
sub texinfo'where {
local($fhandle) = $HANDLES[$#HANDLES];
return "$LINES (${fhandle}:$LINES{$fhandle})" if ($DEBUG);
return "${fhandle}:$LINES{$fhandle}";
}
# ------------------------------------------------------------------
# The array CONDS is a "stack" of conditions!!! Each time we see
# a statement that implies conditional inclusion/exclusion of text,
# we push the statement type on the stack (along with an indication
# of whether the condition was true or false).
# ------------------------------------------------------------------
# Push the active condition on the stack, remembering its type, result
# and where in the input it occurred.
#
sub texinfo'push_cond {
local($type, $result) = @_;
local($where) = &texinfo'where;
local($_) = "${type}\t${result}\t${where}";
push(@CONDS, $_);
}
# Get the current active condition. returns the list (type, result, where).
# If there is no active condition, returns the list ('', 1, 0).
#
sub texinfo'top_cond {
return ('', 1, 0) if (0 == @CONDS);
split(" ", $CONDS[$#CONDS]);
}
# Same as top_cond, but also removes the topmost condition from the stack
sub texinfo'pop_cond {
return ('', 1, 0) if (0 == @CONDS);
split(" ", pop(@CONDS));
}
# Complain about any conditions that are still active in the given input file
sub texinfo'extra_cond {
local($fhandle) = @_;
local($cond_type, $cond_result, $cond_where);
local($fname, $line);
## Check for any un-terminated conditions in this file
## (they are not allowed to span across files)
while (1) {
($cond_type, $cond_result, $cond_where) = &texinfo'top_cond;
last if (! $cond_type);
($fname, $line) = split(':', $cond_where);
last if ($fname ne $fhandle);
print STDERR "$0: ($where) expecting \"${CMD}end $cond_type\" for ${CMD}$cond_type at $cond_where.\n";
&texinfo'pop_cond;
}
}
# Evaluate an inclusion/exclusion condition. The type should be one
# of 'ifset', 'ifclear', 'iftex', 'ifinfo', 'ignore', or 'tex'. The
# expression should only be present if the type is 'ifset' or 'ifclear',
# and should be the name of the corresponding value to test.
#
sub texinfo'eval_cond {
local($type, $expr) = @_;
local($result) = 0;
## trim whitespace
$type =~ s/\s*(\S*)\s*/\1/;
$expr =~ s/\s*(\S*)\s*/\1/;
## evaluate the expression
if ($type eq "ifset") {
$result = 1 if (defined($DEFS{$expr}));
} elsif ($type eq "ifclear") {
$result = 1 if (! defined($DEFS{$expr}));
} elsif ($type eq "iftex") {
$result = 1 - $IS_INFO;
} elsif ($type eq "ifinfo") {
$result = $IS_INFO;
} elsif ($type eq "tex") {
$result = 1 - $IS_INFO;
} elsif ($type eq "ignore") {
$result = 0;
} else {
local($where) = &texinfo'where;
print STDERR "$0: ($where) unknown command \"${CMD}$type\"\n.";
}
print STDERR "> $type $expr = $result\n" if ($DEBUG);
$result;
}
# Start conditional inclusion/exclusion command.
#
# We are given the type of the current condition, and its result.
# The result we "push" onto the condition stack will be TRUE *only* if the
# previous condition result and the given condition result are both TRUE.
#
sub texinfo'start_cond {
local($type, $expr) = @_;
local($result, $_);
local($prev_type, $prev_result, $prev_where) = &texinfo'top_cond;
$result = $prev_result && &eval_cond($type, $expr);
&texinfo'push_cond($type, $result);
}
# End conditional inclusion/exclusion command
#
# We are given the type of condition we expect to be in the middle of.
# If we are in the middle of that condition-type, then pop it from the
# stack, otherwise complain about an @end for the wrong type.
#
sub texinfo'end_cond {
local($end_type) = @_;
local($where) = &texinfo'where;
local($cond_type, $cond_result, $cond_where) = &texinfo'top_cond;
if ($cond_type eq $end_type) {
&texinfo'pop_cond;
} elsif ($cond_type) {
print STDERR "$0: ($where) expecting \"${CMD}end $cond_type\" for ${CMD}$cond_type at $cond_where but saw \"${CMD}end $end_type\" instead.\n" ;
} else {
print STDERR "$0: ($where) \"${CMD}end $end_type\" has no corresponding ${CMD}$cond_type.\n";
}
}
# Handle @set VALUE
sub texinfo'def_cmd {
local($name, $value) = @_;
$value = 1 if (! $value);
$DEFS{$name} = $value ;
}
# Handle @clear VALUE
sub texinfo'undef_cmd {
local($name) = @_;
undef $DEFS{$name} ;
}
# Find an @included file
sub texinfo'find_file {
local($fname) = @_;
local($found) = 0;
local($dir);
local($result);
## look for file in current directory
if (-f $fname) {
++$found;
$result = $fname;
} else {
## look for file in directory list
foreach $dir (@INCS) {
if (-f "$dir/$fname") {
++$found;
$result = "$dir/$fname";
last;
}
}
}
$result;
}
# Handle @include's
sub texinfo'include_cmd {
local($included) = @_;
local($result) = 0;
local($where) = &texinfo'where;
local($fname) = &texinfo'find_file($included);
if ($IGNORE_INCLUDES ||
defined($IGNORE_INCS{$fname}) ||
defined($IGNORE_INCS{$included})) {
print STDERR "> Ignoring @include $included\n" if ($DEBUG);
} elsif ($fname) {
&texinfo'preprocess_files($fname);
$result = 1;
} else {
print STDERR "$0: ($where) file $included not found.\n";
}
$result;
}
# Process a texinfo input line
sub texinfo'preprocess_line {
local($_) = @_;
local($where) = &texinfo'where;
print STDERR "$where\n" if ($DEBUG);
if (/^\s*${CMD}(if\w*|tex|ignore)\s*(.*)\s*$/) {
print STDERR "> $_\n" if ($DEBUG);
&texinfo'start_cond($1, $2);
} elsif (/^\s*${CMD}end\s+(if\w*|tex|ignore)\s*/) {
print STDERR "> $_\n" if ($DEBUG);
&texinfo'end_cond($1);
} else {
local($type, $result, $where) = &texinfo'top_cond;
if ($result) {
if (/^\s*${CMD}set\s+(\S*)\s*(\S*)/) {
&texinfo'def_cmd($1, $2);
} elsif (/^\s*${CMD}clear\s+(\S*)/) {
&texinfo'undef_cmd($1);
} elsif (/^\s*${CMD}include\s+(\S*)/) {
s/^/@c / if (&texinfo'include_cmd($1));
}
## Regular line - pass it to the user
$_ .= "\n";
eval "&${USERSUB}(\$_);" ;
}
}
}
# Handle a file of texinfo commands
sub texinfo'preprocess_files {
local(@argv) = @_;
local($fname, $fhandle);
foreach $fname (@argv) {
$fhandle = $fname;
if (open($fhandle, $fname)) {
&texinfo'preprocess_handle($fhandle);
close($fhandle);
} else {
print STDERR "$0: unable to open $fname for reading: $!\n" ;
}
}
}
# Handle an open filehandle of texinfo commands
sub texinfo'preprocess_handle {
local($_) = @_;
local($fhandle) = $_;
push(@HANDLES, $fhandle);
while (<$fhandle>) {
++$LINES{$fhandle}; ## Update the line-count for this file
++$LINES;
chop;
&texinfo'preprocess_line("$_");
}
&texinfo'extra_cond($fhandle);
pop(@HANDLES);
}
# If a list is given, preprocess the given files, otherwise preprocess STDIN
sub texinfo'preprocess {
local(@argv) = @_;
if (0 < @argv) {
&texinfo'preprocess_files(@argv);
} elsif (0 == @HANDLES) {
&texinfo'preprocess_handle(STDIN);
}
}
# Here is the main routine that parses @ARGV for options and then
# processes the remaining filenames in @ARGV ...
package main;
sub main'texinfo {
local($_) = @_;
local($old_usersub) = $texinfo'USERSUB;
if ($_) {
s/^/main\'/ if (! /^\w+\'\w+$/);
$texinfo'USERSUB = $_;
}
&texinfo'getopts();
&texinfo'preprocess(@main'ARGV);
$texinfo'USERSUB = $old_usersub;
}
1;
--
_______________________"And miles to go before I sleep."______________________
Brad Appleton Harris Computer Systems, Fort Lauderdale, FL USA
[email protected] Disclaimer: I said it, not my employer!