% Copyright (c) 2005 Jonathan Fine <
[email protected]>
% License: GPL version 2 or (at your option) any later version.
% $Source: /cvsroot/pytex/pytex/tex2tok/_tex2tok.tex,v $
% usage: '\LEX{story}
\catcode`|=0 % for document control sequences
\catcode`{=1
\catcode`}=2
\catcode`$=3
\catcode`&=4
\catcode`#=6
\catcode`^=7
\catcode`_=8
\catcode`@=11
% useful constants
\let\bgroup {
\let\egroup }
\chardef\zero 0
\chardef\one 1
\def\?{\let\space = } \? % define \space
\let\? \undefined % clean up afterwards
\chardef\tokfile 16 % change if you wish
\def\loop
% loop through sequence of tokens
% assign to \value the meaning of the next token
{%
\futurelet\value\loop@A
}
\def\loop@continue
% pick up the next token, and continue the loop
% required, for dealing with space and brace characters
{%
\afterassignment\loop
\let\@temp =
}
\def\loop@A
% is the next token unexpandable?
% always, unexpandable tokens are characters
% always, control sequences are expandable (or undefined)
% sometimes, a character may be expandable
{%
\expandafter\ifx\noexpand\value\value
\expandafter\loop@char
\else
\expandafter\loop@macro
\fi
}
\def\loop@macro
% suppress implicit \outer token at end of file
{\expandafter\loop@macro@A\noexpand}
\long\def\loop@macro@A #1%
% #1 - expandable token, picked up from input stream
% execute the token, and resume the loop
% if the token is undefined, call
{
\ifx #1\undefined
\process@undefined #1%
\expandafter\loop
\else
\expandafter #1%
\fi
}
\long\def\process@undefined #1%
% #1 - undefined control sequence
% default value - can be redefined
{%
\immediate\write\tokfile{\string #1}%
}
\def\endloop
% when read by \loop, this ends its execution
{}
\def\loop@char
% process next token, which is a character
% might be letter, other, brace or whatever
% afterwards, continue with the loop
{%
% some tricky code to deal with common cases at high speed
\ifcase
% first deal with the common cases, at high speed
\ifcat a\value \one\fi % it's a letter
\ifcat ?\value \one\fi % it's punctuation, digit, etc.
\zero
\expandafter\loop@char@A % it's something else
\or % case of letter or digit
\expandafter\loop@char@default
\fi
}
\def\loop@char@default #1%
% #1 - a character we can pick up in this way
{%
\immediate\write\tokfile{#1}%
\loop
}
\def\loop@char@A
% next token is a special character
% pick it up, and process it
% we already have the \
{%
\ifx\value
\ifx\value\@space
\immediate\write16{ \@percent}
\else
\immediate\write16{%
\expandafter\@char@strip\meaning\value
\@percent
}%
\fi
}
\def\empty{}
\def\loop@char@A
{%
\edef\temp
{%
\ifx\value\space
\if11 \fi
\else\ifx\value\bgroup
+\string{%
\else\ifx\value\egroup
-\string}%
\fi\fi\fi
}
\ifx\temp\empty
\expandafter\loop@char@default
\else
\immediate\write\tokfile{\temp}%
\expandafter\loop@continue
\fi
}
\def |LEX #1%
{%
\begingroup
\let|par\undefined % blank lines in input document file
\let\LEX\undefined % in case it appears in document file
\catcode`!=12 % restore to normal value
\catcode`|=12 % restore to normal value
\catcode`@=12 % restore to normal value
\chardef\tokfile 15
\def\next{\immediate\openout\tokfile \jobname.tok }%
\expandafter\next
\expandafter\loop
\input #1 % space to terminate file name
\endloop
\immediate\closeout\tokfile
\endgroup
\end
}
\dump