% Copyright (c) 2005 Jonathan Fine <[email protected]>
% License: GPL version 2 or (at your option) any later version.
% $Source: /cvsroot/pytex/pytex/tex2tok/_tex2tok.tex,v $

%  usage: '\LEX{story}

\catcode`|=0                    % for document control sequences
\catcode`{=1
\catcode`}=2
\catcode`$=3
\catcode`&=4
\catcode`#=6
\catcode`^=7
\catcode`_=8
\catcode`@=11

% useful constants
\let\bgroup {
\let\egroup }
\chardef\zero 0
\chardef\one 1

\def\?{\let\space = } \?        % define \space
\let\? \undefined               % clean up afterwards

\chardef\tokfile 16             % change if you wish


\def\loop
% loop through sequence of tokens
% assign to \value the meaning of the next token
{%
 \futurelet\value\loop@A
}


\def\loop@continue
% pick up the next token, and continue the loop
% required, for dealing with space and brace characters
{%
 \afterassignment\loop
 \let\@temp =
}


\def\loop@A
% is the next token unexpandable?
% always, unexpandable tokens are characters
% always, control sequences are expandable (or undefined)
% sometimes, a character may be expandable
{%
 \expandafter\ifx\noexpand\value\value
    \expandafter\loop@char
 \else
    \expandafter\loop@macro
 \fi
}


\def\loop@macro
% suppress implicit \outer token at end of file
{\expandafter\loop@macro@A\noexpand}

\long\def\loop@macro@A #1%
% #1 - expandable token, picked up from input stream
% execute the token, and resume the loop
% if the token is undefined, call
{
 \ifx #1\undefined
   \process@undefined #1%
   \expandafter\loop
 \else
   \expandafter #1%
 \fi
}


\long\def\process@undefined #1%
% #1 - undefined control sequence
% default value - can be redefined
{%
 \immediate\write\tokfile{\string #1}%
}


\def\endloop
% when read by \loop, this ends its execution
{}


\def\loop@char
% process next token, which is a character
% might be letter, other, brace or whatever
% afterwards, continue with the loop
{%
 % some tricky code to deal with common cases at high speed
 \ifcase
   % first deal with the common cases, at high speed
   \ifcat a\value \one\fi      % it's a letter
   \ifcat ?\value \one\fi      % it's punctuation, digit, etc.
   \zero
   \expandafter\loop@char@A    % it's something else
 \or                           % case of letter or digit
   \expandafter\loop@char@default
 \fi
}


\def\loop@char@default #1%
% #1 - a character we can pick up in this way
{%
 \immediate\write\tokfile{#1}%
 \loop
}


\def\loop@char@A
% next token is a special character
% pick it up, and process it
% we already have the \
{%
 \ifx\value
 \ifx\value\@space
   \immediate\write16{ \@percent}
 \else
   \immediate\write16{%
     \expandafter\@char@strip\meaning\value
     \@percent
   }%
 \fi
}


\def\empty{}
\def\loop@char@A
{%
 \edef\temp
 {%
   \ifx\value\space
     \if11 \fi
   \else\ifx\value\bgroup
     +\string{%
   \else\ifx\value\egroup
     -\string}%
   \fi\fi\fi
 }
 \ifx\temp\empty
   \expandafter\loop@char@default
 \else
   \immediate\write\tokfile{\temp}%
   \expandafter\loop@continue
 \fi
}

\def |LEX #1%
{%
 \begingroup
   \let|par\undefined          % blank lines in input document file
   \let\LEX\undefined          % in case it appears in document file
   \catcode`!=12               % restore to normal value
   \catcode`|=12               % restore to normal value
   \catcode`@=12               % restore to normal value
   \chardef\tokfile 15
   \def\next{\immediate\openout\tokfile \jobname.tok }%
   \expandafter\next
   \expandafter\loop
     \input #1                 % space to terminate file name
   \endloop
   \immediate\closeout\tokfile
 \endgroup
 \end
}

\dump