# latexcount

#!/usr/bin/perl -n

# latexcount
# ver 1.1
# 2003ii28

# a script for counting words in LaTeX documents
# by P.D. Magnus <http://www.fecundity.com/pmagnus>

# The script runs from the command line:
# >perl latexcount.pl foo.tex

# The only tags that are not counted are those explicitly
# mentioned in the cutlist. If there are substantal tags
# that you'd like not to be counted, add them to the list.

BEGIN {
%cutlist = (
'begin' => 1,
'end' => 1,
'usepackage' => 1,
'addtolength' => 1,
'documentclass' => 1,
'author' => 1,
'title' => 1,
'chapter' => 1,
'bibliography' => 1,
'bibliographystyle' => 1,
'section' => 1,
'subsection' => 1,
'subsubsection' => 1,
'thanks' => 1,
'pagestyle' => 1,
);
my $line = '';
my $cumline = '';
my $depth = 0;
my $words = 0;
my $fnwords = 0;
my $i = 0;
my @tags = ();
my $thistag = '';
}

# Take a hunk of the input file.

# Using the -n argument, the script runs for while (<>);
# that is, it runs through every line of the input file.

$line = $_;

# Regularize line endings
$line =~ s/\r/\n/g;

# Remove comments
$line =~ s/(?<!\\)%.*?\n//g;

# Count curly braces
while($line =~ /\{/g){$depth++}
while($line =~ /\}/g){$depth--}

# Concatenate the new hunk of input to any
# left over from previous cycles.

$cumline .= $line;

# If the number of {'s matches the number of }'s in
# the hunk collected so far, eliminate tags and
# count words.
if ($depth == 0) {
# Eliminate all tags, and eliminate the contents
# of tags on the cut list.

# Replace the most deeply nested tag/argument pair
# '\foo{bar}' with '<0<bar>0>' and puts 'foo'
# in the list @tags; the nth tag is replaced with
# '<n<bar>n>', etc.
while($cumline =~ s/(\\\w+)?\s*\{([^\{\}]*)\}/<"$i"<$2>"$i">/s){push @tags, $1; $i++;}

# Look through the list @tags, starting with the
# outermost tag/argument pair.
$i = 0;
while($#tags >= 0){
$thistag = shift @tags;
$thistag =~ s/\\//;
if ($thistag eq 'footnote') {
# Footnotes are counted separately.
$cumline =~ s/<"$i"<(.*)>"$i">//s;
$line = $1;
while($line =~ /\b\w+\b/g){$fnwords++};
} elsif (defined($cutlist{$thistag})) {
# The arguments of these tags are removed.
$cumline =~ s/<"$i"<.*>"$i">//s;
} else {
# The arguments of other tags are left in.
$cumline =~ s/<"$i"<(.*)>"$i">/$1/s;
};
$i++;
}

# Remove any tags that weren't part of tag/argument
# pairs.
$cumline =~ s/\\\w+//g;

# Count the remaining words in the present bit of text.
while($cumline =~ /\b\w+\b/g){$words++}
$cumline = '';
$i = 0;
}

END{
print "\n$words words in the main text\n$fnwords in the footnotes\n";
print ($words+$fnwords);
print " total\n\n";
}