@q Copyright 2012-2024 Alexander Shibakov@>
@q Copyright 2002-2014 Free Software Foundation, Inc.@>
@q This file is part of SPLinT@>

@q SPLinT is free software: you can redistribute it and/or modify@>
@q it under the terms of the GNU General Public License as published by@>
@q the Free Software Foundation, either version 3 of the License, or@>
@q (at your option) any later version.@>

@q SPLinT is distributed in the hope that it will be useful,@>
@q but WITHOUT ANY WARRANTY; without even the implied warranty of@>
@q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the@>
@q GNU General Public License for more details.@>

@q You should have received a copy of the GNU General Public License@>
@q along with SPLinT.  If not, see <http://www.gnu.org/licenses/>.@>

@**The scanner for \ifx\bison\UNDEFINED\.{bison}\else\bison\fi\ syntax.
\ifx\bison\UNDEFINED
   \input limbo.sty
   \input yystype.sty
   \input grabstates.sty
   \immediate\openout\stlist=lo_states.h
\fi
The fact that \bison\ has a relatively straightforward grammar is
partly due to the sophistication of its scanner. The primary reason for this
increased complexity is \bison's awareness
of syntax variations in its input files. In addition to the grammar
syntax, the parser has to be able to deal with extended \Cee\ syntax
inside \bison's actions.

Since the names\namedspot{state.grabbing} of the scanner
{\em states@^scanner states@>\/} reside in the common
namespace with other variables, in order to make the \TeX\ version of
the scanner aware of the numerical values of the states, a special
procedure is required. It is executed as part of \flex's user
initialization code but the data for it has to be collected
separately. The procedure is declared in the preamble section of the scanner.

Below, we follow the same convention (of italicizing the original
comments) as in the code for the parser.
@(lo.ll@>=
@G
@> @<Grammar lexer definitions@> @=
%{@> @<Grammar lexer \Cee\ preamble@> @=%}
@> @<Grammar lexer options@> @=
%%
@> @<Grammar token regular expressions@> @=
%%
@O
void define_all_states( void ) {
 @<Collect state definitions for the grammar lexer@>@;
}
@o
@g

@*1 Definitions and state declarations.
It is convenient to abbreviate some commonly used subexpressions.
@<Grammar lexer definitions@>=
 @<Grammar lexer states@>@;
@G(fs1)
letter    [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
id        {letter}({letter}|[-0-9])*
int       [0-9]+
@g

@ {\it Zero or more instances of backslash-newline.  Following \gcc, allow
white space between the backslash and the newline}.
@<Grammar lexer definitions@>=
@G(fs1)
splice   (\\[ \f\t\v]*\n)*
@g

@ {\it An equal sign, with optional leading whitespaces. This is used in some
deprecated constructs}.
@<Grammar lexer definitions@>=
@G(fs1)
eqopt    ([[:space:]]*=)?
@g

@ This is how the code for state value output is put inside the
routine mentioned above. The state information is collected by a
special small scanner that is coupled with the bootstrap parser. This
way, all the necessary token information comes `hardwired' in the
bootstrap parser, and the small scanner itself does not use any state
manipulation and thus can get away with using no state setup. It can,
however, scan just enough of the \flex\ syntax to extract the state
information from it (only the state {\it names\/} are needed) and
output it in the form of a header file for the `real' lexer output
`driver' to use.
@<Collect state definitions for the grammar lexer@>=
#define _register_name( name ) @[Define_State( #name, name )@]
#include "lo_states.h"
#undef _register_name

@ {\it A \Cee-like comment in directives/rules}.
@<Grammar lexer states@>=
@G(fs1)
%x SC_YACC_COMMENT
@g

@ {\it Strings and characters in directives/rules}.
%\yyflexdebugtrue
%\traceparserstatestrue
%\tracestackstrue
%\tracerulestrue
%\traceactionstrue
%\tracebadcharstrue
%\traceparseresultstrue
%\traceparserstatestrue
%\prodstyle{\%\%}%
@<Grammar lexer states@>=
@G(fs1)
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
@g

@ {\it A identifier was just read in directives/rules.  Special state
to capture the sequence `\.{identifier:}'}.
\traceparserstatesfalse
\tracestacksfalse
\tracerulesfalse
\traceactionsfalse
\tracebadcharsfalse
\yyflexdebugfalse
\traceparseresultsfalse
\traceparserstatesfalse
@<Grammar lexer states@>=
@G(fs1)
%x SC_AFTER_IDENTIFIER
@g

@ {\it \POSIX\ says that a tag must be both an id and a \Cee\ union member, but
historically almost any character is allowed in a tag.  We
disallow \prodstyle{NUL}, as this simplifies our implementation.  We match
angle brackets in nested pairs: several languages use them for
generics/template types}.
@<Grammar lexer states@>=
@G(fs1)
%x SC_TAG
@g

@ {\it
\def\aterm{\item{\sqbullet}\ignorespaces}%
\setbox0=\hbox{\sqbullet\enspace}%
\parindent=0pt
\advance\parindent by \wd0
Four types of user code:
\aterm prologue (code between \.{\%\{} \.{\%\}} in the first section, before \prodstyle{\%\%});

\aterm actions, printers, union, etc, (between braced in the middle section);

\aterm epilogue (everything after the second \prodstyle{\%\%});

\aterm predicate (code between \.{\%?\{} and \.{\}} in middle section);
}%
@<Grammar lexer states@>=
@G(fs1)
%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
@g

@ {\it \Cee\ and \Ceepp\ comments in code}.
@<Grammar lexer states@>=
@G(fs1)
%x SC_COMMENT SC_LINE_COMMENT
@g

@ {\it Strings and characters in code}.
@<Grammar lexer states@>=
@G(fs1)
%x SC_STRING SC_CHARACTER
@g

@ Bracketed identifiers support.
@<Grammar lexer states@>=
@G(fs1)
%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
@g

@ @<Grammar lexer \Cee\ preamble@>=

#include <stdint.h>
#include <stdbool.h>

@ The code for the generated scanner is highly dependent on the options
supplied. Most of the options below are essential for the scheme
adopted in this package to work.\gtextidx{\flex\ options example}{flex options example}{\flexidxdomain}%
@<Grammar lexer options@>=
@G(fs1)
%option bison-bridge
%option noyywrap nounput noinput reentrant
%option noyy_top_state
%option debug
%option stack
%option outfile="lo.c"
@g

@*1 Tokenizing with regular expressions.
Here is a full list of regular expressions recognized by the \bison\ scanner.
@<Grammar token regular expressions@>=
 @<Scan grammar white space@>@;
 @<Scan \bison\ directives@>@;
 @<Do not support zero characters@>@;
 @<Scan after an identifier, check whether a colon is next@>@;
 @<Scan bracketed identifiers@>@;
 @<Scan a \yacc\ comment@>@;
 @<Scan a \Cee\ comment@>@;
 @<Scan a line comment@>@;
 @<Scan a \bison\ string@>@;
 @<Scan a character literal@>@;
 @<Scan a tag@>@;
 @<Decode escaped characters@>@;
 @<Scan user-code characters and strings@>@;
 @<Strings, comments etc.\ found in user code@>@;
 @<Scan code in braces@>@;
 @<Scan prologue@>@;
 @<Scan the epilogue@>@;
 @<Add the scanned symbol to the current string@>@;

@ @<Scan grammar white space@>=
@G(fs2)
<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
{
@t}\vb{\insertraw{\inscomment{\it comments and white space}}}{@>
 ","               {@> @[TeX_( "/yywarn{stray `,' treated as white space}" );@]@=}
 [ \f\n\t\v]       |
   "//".*          {@> @[TeX_( "/yylexnext" );@]@=}
@=  "/*"              {@> @[TeX_( "/contextstate/YYSTART /yyBEGIN{SC_YACC_COMMENT}/yylexnext" );@]@=}@>@/

@t}\vb{\insertraw{\inscomment{\it \.{\#line} directives are not documented, and may be withdrawn or modified in future versions of \bison}}}{@>

 ^"#line "{int}(" \"".*"\"")?"\n" {@> @[TeX_( "/yylexnext" );@]@=}
}
@g

@ {\it For directives that are also command line options, the regex must be
\.{"\%..."} after \.{"[-\_]"}'s are removed, and the directive must match the \.{--long}
option name, with a single string argument.  Otherwise, add exceptions
to \.{../build-aux/cross-options.pl}}. For most options the scanner
returns a pair of pointers as the value.

@<Scan \bison\ directives@>=
@G(fs2)
<INITIAL>
{
 "%binary"                         {@> @[TeX_( "/yylexreturnptr{PERCENT_NONASSOC}" );@]@=}
 "%code"                           {@> @[TeX_( "/yylexreturnptr{PERCENT_CODE}" );@]@=}
 "%debug"                          {@> @[@<Set \prodstyle{\%debug} flag@>@]@=}
 "%default-prec"                   {@> @[TeX_( "/yylexreturnptr{PERCENT_DEFAULT_PREC}" );@]@=}
 "%define"                         {@> @[TeX_( "/yylexreturnptr{PERCENT_DEFINE}" );@]@=}
 "%defines"                        {@> @[TeX_( "/yylexreturnptr{PERCENT_DEFINES}" );@]@=}
 "%destructor"                     {@> @[TeX_( "/yylexreturnptr{PERCENT_DESTRUCTOR}" );@]@=}
 "%dprec"                          {@> @[TeX_( "/yylexreturnptr{PERCENT_DPREC}" );@]@=}
 "%empty"                          {@> @[TeX_( "/yylexreturnptr{PERCENT_EMPTY}" );@]@=}
 "%error-verbose"                  {@> @[TeX_( "/yylexreturnptr{PERCENT_ERROR_VERBOSE}" );@]@=}
 "%expect"                         {@> @[TeX_( "/yylexreturnptr{PERCENT_EXPECT}" );@]@=}
 "%expect-rr"                      {@> @[TeX_( "/yylexreturnptr{PERCENT_EXPECT_RR}" );@]@=}
 "%file-prefix"                    {@> @[TeX_( "/yylexreturnptr{PERCENT_FILE_PREFIX}" );@]@=}
 "%fixed-output-files"             {@> @[TeX_( "/yylexreturnptr{PERCENT_YACC}" );@]@=}
 "%initial-action"                 {@> @[TeX_( "/yylexreturnptr{PERCENT_INITIAL_ACTION}" );@]@=}
 "%glr-parser"                     {@> @[TeX_( "/yylexreturnptr{PERCENT_GLR_PARSER}" );@]@=}
 "%language"                       {@> @[TeX_( "/yylexreturnptr{PERCENT_LANGUAGE}" );@]@=}
 "%left"                           {@> @[TeX_( "/yylexreturnptr{PERCENT_LEFT}" );@]@=}
 "%lex-param"                      {@> @[@<Return lexer parameters@>@]@=}
 "%locations"                      {@> @[@<Set \prodstyle{\%locations} flag@>@]@=}
 "%merge"                          {@> @[TeX_( "/yylexreturnptr{PERCENT_MERGE}" );@]@=}
 "%name-prefix"                    {@> @[TeX_( "/yylexreturnptr{PERCENT_NAME_PREFIX}" );@]@=}
 "%no-default-prec"                {@> @[TeX_( "/yylexreturnptr{PERCENT_NO_DEFAULT_PREC}" );@]@=}
 "%no-lines"                       {@> @[TeX_( "/yylexreturnptr{PERCENT_NO_LINES}" );@]@=}
 "%nonassoc"                       {@> @[TeX_( "/yylexreturnptr{PERCENT_NONASSOC}" );@]@=}
 "%nondeterministic-parser"        {@> @[TeX_( "/yylexreturnptr{PERCENT_NONDETERMINISTIC_PARSER}" );@]@=}
 "%nterm"                          {@> @[TeX_( "/yylexreturnptr{PERCENT_NTERM}" );@]@=}
 "%output"                         {@> @[TeX_( "/yylexreturnptr{PERCENT_OUTPUT}" );@]@=}
 "%param"                          {@> @[@<Return lexer and parser parameters@>@]@=}
 "%parse-param"                    {@> @[@<Return parser parameters@>@]@=}
 "%prec"                           {@> @[TeX_( "/yylexreturnptr{PERCENT_PREC}" );@]@=}
 "%precedence"                     {@> @[TeX_( "/yylexreturnptr{PERCENT_PRECEDENCE}" );@]@=}
 "%printer"                        {@> @[TeX_( "/yylexreturnptr{PERCENT_PRINTER}" );@]@=}
 "%pure-parser"                    {@> @[@<Set \prodstyle{\%pure-parser} flag@>@]@=}
 "%require"                        {@> @[TeX_( "/yylexreturnptr{PERCENT_REQUIRE}" );@]@=}
 "%right"                          {@> @[TeX_( "/yylexreturnptr{PERCENT_RIGHT}" );@]@=}
 "%skeleton"                       {@> @[TeX_( "/yylexreturnptr{PERCENT_SKELETON}" );@]@=}
 "%start"                          {@> @[TeX_( "/yylexreturnptr{PERCENT_START}" );@]@=}
 "%term"                           {@> @[TeX_( "/yylexreturnptr{PERCENT_TOKEN}" );@]@=}
 "%token"                          {@> @[TeX_( "/yylexreturnptr{PERCENT_TOKEN}" );@]@=}
 "%token-table"                    {@> @[TeX_( "/yylexreturnptr{PERCENT_TOKEN_TABLE}" );@]@=}
 "%type"                           {@> @[TeX_( "/yylexreturnptr{PERCENT_TYPE}" );@]@=}
 "%union"                          {@> @[TeX_( "/yylexreturnptr{PERCENT_UNION}" );@]@=}
 "%verbose"                        {@> @[TeX_( "/yylexreturnptr{PERCENT_VERBOSE}" );@]@=}
 "%yacc"                           {@> @[TeX_( "/yylexreturnptr{PERCENT_YACC}" );@]@=}
@t}\vb{\insertraw{\inscomment{\it deprecated}}}{@>
 "%default"[-_]"prec"              {@> @[TeX_( "/yypdeprecated{\\%default-prec}" );@]@=}
 "%error"[-_]"verbose"             {@> @[TeX_( "/yypdeprecated{\\%define parse.error verbose}" );@]@=}
 "%expect"[-_]"rr"                 {@> @[TeX_( "/yypdeprecated{\\%expect-rr}" );@]@=}
 "%file-prefix"{eqopt}             {@> @[TeX_( "/yypdeprecated{\\%file-prefix}" );@]@=}
 "%fixed"[-_]"output"[-_]"files"   {@> @[TeX_( "/yypdeprecated{\\%fixed-output-files}" );@]@=}
 "%name"[-_]"prefix"{eqopt}        {@> @[TeX_( "/yypdeprecated{\\%name-prefix}" );@]@=}
 "%no"[-_]"default"[-_]"prec"      {@> @[TeX_( "/yypdeprecated{\\%no-default-prec}" );@]@=}
 "%no"[-_]"lines"                  {@> @[TeX_( "/yypdeprecated{\\%no-lines}" );@]@=}
 "%output"{eqopt}                  {@> @[TeX_( "/yypdeprecated{\\%output}" );@]@=}
 "%pure"[-_]"parser"               {@> @[TeX_( "/yypdeprecated{\\%pure-parser}" );@]@=}
 "%token"[-_]"table"               {@> @[TeX_( "/yypdeprecated{\\%token-table}" );@]@=}

@t}\vb{\insertraw{\inscomment{\it semantic predicate}}}{@>

 "%?"[ \f\n\t\v]*"{"               {@> @[TeX_( "/yyBEGIN{SC_PREDICATE}/yylexnext" );@]@=}

 "%"{id}|"%"{notletter}([[:graph:]])+ {@> @[@<Possibly complain about a bad directive@>@]@=}

 "="                               {@> @[TeX_( "/yylexreturnptr{EQUAL}" );@]@=}
 "|"                               {@> @[TeX_( "/yylexreturnptr{PIPE}" );@]@=}
 ";"                               {@> @[TeX_( "/yylexreturnptr{SEMICOLON}" );@]@=}

 {id}                              {@> @[@<Prepare an identifier@>@]@=}
 {int}                             {@> @[TeX_( "/edef/next{/yylval{/nx/anint{/the/yytext}" );@]@=
                                        @> @[TeX_( "    {/the/yyfmark}{/the/yysmark}}}/next" );@]@=
                                    @> @[TeX_( "/yylexreturn{INT}" );@]@=}
 0[xX][0-9abcdefABCDEF]+           {@> @[TeX_( "/edef/next{/yylval{/nx/hexint{/the/yytext}" );@]@=
                                        @> @[TeX_( "    {/the/yyfmark}{/the/yysmark}}}/next" );@]@=
                                    @> @[TeX_( "/yylexreturn{INT}" );@]@=}
@t}\vb{\insertraw{\inscomment{\it identifiers may not start with a digit;  yet, don't silently accept \.{1foo} as \.{1 foo}}}}{@>
 {int}{id}                         {@> @[TeX_( "/yyfatal{invalid identifier: /the/yytext}" );@]@=}
@t}\vb{\insertraw{\inscomment{\it characters}}}{@>
 "'"                               {@> @[TeX_( "/yyBEGIN{SC_ESCAPED_CHARACTER}/yylexnext" );@]@=}
@t}\vb{\insertraw{\inscomment{\it strings}}}{@>
 "\""                              {@> @[TeX_( "/yyBEGIN{SC_ESCAPED_STRING}/yylexnext" );@]@=}
@t}\vb{\insertraw{\inscomment{\it prologue}}}{@>
 "%{"                              {@> @[@<Start assembling prologue code@>@]@=}
@t}\vb{\insertraw{\inscomment{{\it code in between braces}; originally preceded by \.{\\STRINGGROW} but it is omitted here}}}{@>
 "{"                               {@> @[TeX_( "/lonesting/z@@/yyBEGIN{SC_BRACED_CODE}/yylexnext" );@]@=}
@t}\vb{\insertraw{\inscomment{\it a type}}}{@>
 "<*>"                             {@> @[TeX_( "/yylexreturnptr{TAG_ANY}" );@]@=}
 "<>"                              {@> @[TeX_( "/yylexreturnptr{TAG_NONE}" );@]@=}
 "<"                               {@> @[TeX_( "/lonesting=/z@@/yyBEGIN{SC_TAG}/yylexnext" );@]@=}

 "%%"                              {@> @[@<Switch sections@>@]@=}
 "["                               {@> @[TeX_( "/let/bracketedidstr=/empty" );@]
                                    @> @[TeX_( "/bracketedidcontextstate/YYSTART" );@]
                                    @> @[TeX_( "/yyBEGIN{SC_BRACKETED_ID}/yylexnext" );@]@=}

 <<EOF>>                           {@> @[TeX_( "/yyterminate" );@]/* \flexrenstyle{EOF} in \flexsnstyle{INITIAL} */@=}

 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {@> @[@<Process a bad character@>@]@=}
}
@g

@ We present the `bad character' code first, before going into the details
of the character matching by the rest of the lexer.
@<Process a bad character@>=
@[TeX_( "/expandafter/let/expandafter/next/csname lexspecial[/the/yytextpure]/endcsname" );@]@;
@[TeX_( "/ifx/next/relax" );@]@;
@[TeX_( "    /iftracebadchars" );@]@;
@[TeX_( "        /yyfatal{invalid character(s): /the/yytext}" );@]@;
@[TeX_( "    /fi" );@]@;
@[TeX_( "/else" );@]@;
@[TeX_( "    /expandafter/lexspecialchar/expandafter{/next}{/the/yyfmark}{/the/yysmark}/yylexnext" );@]@;
@[TeX_( "/fi" );@]@;

@ @<Set \prodstyle{\%debug} flag@>=
 @[TeX_( "/edef/next{/yylval{{parse.trace}{debug}{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yylexreturn{PERCENT_FLAG}" );@]@;

@ @<Return lexer parameters@>=
 @[TeX_( "/edef/next{/yylval{{lex-param}{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yylexreturn{PERCENT_PARAM}" );@]@;

@ @<Set \prodstyle{\%locations} flag@>=
 @[TeX_( "/edef/next{/yylval{{locations}{}{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yylexreturn{PERCENT_FLAG}" );@]@;

@ @<Return lexer and parser parameters@>=
 @[TeX_( "/edef/next{/yylval{{both-param}{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yylexreturn{PERCENT_PARAM}" );@]@;

@ @<Return parser parameters@>=
 @[TeX_( "/edef/next{/yylval{{parse-param}{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yylexreturn{PERCENT_PARAM}" );@]@;

@ @<Set \prodstyle{\%pure-parser} flag@>=
 @[TeX_( "/edef/next{/yylval{{api.pure}{pure-parser}{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yylexreturn{PERCENT_FLAG}" );@]@;

@ @<Possibly complain about a bad directive@>=
@[TeX_( "/iftracebadchars" );@]@;
@[TeX_( "    /yywarn{invalid directive: /the/yytext}" );@]@;
@[TeX_( "/fi" );@]@;

@ At this point we save the spelling and the location of the identifier. The token is returned
later, after the context is known.
@<Prepare an identifier@>=
 @[TeX_( "/edef/next{/yylval{{/the/yytextpure}{/the/yytext}" );@]@;
 @[TeX_( "    {/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/let/bracketedidstr=/empty" );@]@;
 @[TeX_( "/yyBEGIN{SC_AFTER_IDENTIFIER}/yylexnext" );@]@;

@ @<Switch sections@>=
 @[TeX_( "/advance/percentpercentcount/@@ne" );@]@;
 @[TeX_( "/ifnum/percentpercentcount=/tw@@" );@]@;
 @[TeX_( "    /yyBEGIN{SC_EPILOGUE}" );@]@;
 @[TeX_( "/fi" );@]@;
 @[TeX_( "/yylexreturnptr{PERCENT_PERCENT}" );@]@;

@ @<Start assembling prologue code@>=
 @[TeX_( "/edef/next{/postoks{{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yyBEGIN{SC_PROLOGUE}/yylexnext" );@]@;

@ {\it Supporting \flexrestyle{\\0} complexifies our implementation for no expected added value}.

@<Do not support zero characters@>=
@G(fs2)
<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
{
 \0                                {@> @[TeX_( "/yywarn{invalid null character}" );@]@=}
}
@g

@ @<Scan after an identifier, check whether a colon is next@>=
@G(fs2)
<SC_AFTER_IDENTIFIER>
{
 "["                               {@> @[@<Process the bracketed part of an identifier@>@]@=}
 ":"                               {@> @[@<Process a colon after an identifier@>@]@=}
 <<EOF>>                           {@> @[@<End the scan with an identifier@>@]@=}
 .                                 {@> @[@<Process a character after an identifier@>@]@=}
}
@g

@ @<Process the bracketed part of an identifier@>=
 @[TeX_( "/ifx/bracketedidstr/empty" );@]@;
 @[TeX_( "    /bracketedidcontextstate/YYSTART /yyBEGIN{SC_BRACKETED_ID}" );@]@;
 @[TeX_( "    /yybreak/yylexnext" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /ROLLBACKCURRENTTOKEN" );@]@;
 @[TeX_( "    /yyBEGIN{SC_RETURN_BRACKETED_ID}" );@]@;
 @[TeX_( "    /yybreak{/yylexreturn{ID}}" );@]@;
 @[TeX_( "/yycontinue" );@]@;

@ @<Process a colon after an identifier@>=
 @[TeX_( "/ifx/bracketedidstr/empty" );@]@;
 @[TeX_( "    /yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /yyBEGIN{SC_RETURN_BRACKETED_ID}" );@]@;
 @[TeX_( "/fi" );@]@;
 @[TeX_( "/yylexreturn{ID_COLON}" );@]@;

@ @<Process a character after an identifier@>=
 @[TeX_( "/ROLLBACKCURRENTTOKEN" );@]@;
 @[TeX_( "/ifx/bracketedidstr/empty" );@]@;
 @[TeX_( "    /yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /yyBEGIN{SC_RETURN_BRACKETED_ID}" );@]@;
 @[TeX_( "/fi" );@]@;
 @[TeX_( "/yylexreturn{ID}" );@]@;

@ @<End the scan with an identifier@>=
 @[TeX_( "/ifx/bracketedidstr/empty" );@]@;
 @[TeX_( "    /yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /yyBEGIN{SC_RETURN_BRACKETED_ID}" );@]@;
 @[TeX_( "/fi" );@]@;
 @[TeX_( "/ROLLBACKCURRENTTOKEN" );@]@;
 @[TeX_( "/yylexreturn{ID}" );@]@;

@ @<Scan bracketed identifiers@>=
@G(fs2)
<SC_BRACKETED_ID>
{
 <<EOF>>                           {@> @[@<Complain about unexpected end of file inside brackets@>@]@=}
 {id}                              {@> @[@<Process bracketed identifier@>@]@=}
 "]"                               {@> @[@<Finish processing bracketed identifier@>@]@=}
 [^\].A-Za-z0-9_/ \f\n\t\v]+|.     {@> @[@<Complain about improper identifier characters@>@]@=}
}
@g

@ @<Process bracketed identifier@>=
 @[TeX_( "/ifx/bracketedidstr/empty" );@]@;
 @[TeX_( "    /edef/bracketedidstr{{/the/yytextpure}{/the/yytext}" );@]@;
 @[TeX_( "        {/the/yyfmark}{/the/yysmark}}" );@]@;
 @[TeX_( "    /yybreak/yylexnext" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /yybreak{/yywarn{unexpected identifier " );@]@;
 @[TeX_( "        in bracketed name: /the/yytext}}" );@]@;
 @[TeX_( "/yycontinue" );@]@;

@ @<Finish processing bracketed identifier@>=
 @[TeX_( "/yyBEGINr/bracketedidcontextstate" );@]@;
 @[TeX_( "/ifx/bracketedidstr/empty" );@]@;
 @[TeX_( "    /yybreak{/yywarn{an identifier expected}}" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /ifnum/bracketedidcontextstate=/yylexstate{INITIAL}/relax" );@]@;
 @[TeX_( "        /expandafter/yylval/expandafter{/bracketedidstr}" );@]@;
 @[TeX_( "        /let/bracketedidstr=/empty" );@]@;
 @[TeX_( "        /yybreak@@{/yylexreturn{BRACKETED_ID}}" );@]@;
 @[TeX_( "    /else" );@]@;
 @[TeX_( "        /yybreak@@/yylexnext" );@]@;
 @[TeX_( "    /fi" );@]@;
 @[TeX_( "/yycontinue" );@]@;

@ @<Complain about improper identifier characters@>=
 @[TeX_( "/yyfatal{invalid character(s) in bracketed name: /the/yytext}" );@]@;

@ @<Complain about unexpected end of file inside brackets@>=
 @[TeX_( "/yyBEGINr/bracketedidcontextstate" );@]@;
 @[TeX_( "/yyfatal{unexpected end of file inside brackets}" );@]@;

@ @<Scan bracketed identifiers@>=
@G(fs2)
<SC_RETURN_BRACKETED_ID>
{
 .                                 {@> @[@<Return a bracketed identifier@>@]@=}
}
@g

@ @<Return a bracketed identifier@>=
 @[TeX_( "/ROLLBACKCURRENTTOKEN" );@]@;
 @[TeX_( "/expandafter/yylval/expandafter{/bracketedidstr}" );@]@;
 @[TeX_( "/let/bracketedidstr=/empty" );@]@;
 @[TeX_( "/yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "/yylexreturn{BRACKETED_ID}" );@]@;

@ {\it Scanning a \yacc\ comment.  The initial \.{/*} is already eaten}.
@<Scan a \yacc\ comment@>=
@G(fs2)
<SC_YACC_COMMENT>
{
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of file in a comment}" );@]@=}
 "*/"                              {@> @[TeX_( "/yyBEGINr/contextstate /yylexnext" );@]@=}
 .|\n                              {@> @[TeX_( "/yylexnext" );@]@=}
}
@g

@ {\it Scanning a \Cee\ comment.  The initial \.{/*} is already eaten}.
@<Scan a \Cee\ comment@>=
@G(fs2)
<SC_COMMENT>
{
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of file in a comment}" );@]@=}
 "*"{splice}"/"                    {@> @[TeX_( "/STRINGGROW/yyBEGINr/contextstate /yylexnext" );@]@=}
}
@g

@ {\it Scanning a line comment.  The initial \.{//} is already eaten}.
@<Scan a line comment@>=
@G(fs2)
<SC_LINE_COMMENT>
{
 <<EOF>>                           {@> @[TeX_( "/yyBEGINr/contextstate /ROLLBACKCURRENTTOKEN" );@]
                                    @> @[TeX_( "    /yylexnext" );@]@=}
 "\n"                              {@> @[TeX_( "/STRINGGROW/yyBEGINr/contextstate /yylexnext" );@]@=}
 {splice}                          {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
}
@g

@ {\it Scanning a \bison\ string, including its escapes.
The initial quote is already eaten}.
@<Scan a \bison\ string@>=
@G(fs2)
<SC_ESCAPED_STRING>
{
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of file in a string}" );@]@=}
 "\""                              {@> @[@<Finish a \bison\ string@>@]@=}
 "\n"                              {@> @[TeX_( "/yyfatal{unexpected end of line in a string}" );@]@=}
}
@g

@ @<Finish a \bison\ string@>=
 @[TeX_( "/STRINGFINISH" );@]@;
 @[TeX_( "/edef/next{/yylval{{/the/laststring}{/the/laststringraw}" );@]@;
 @[TeX_( "    {/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "/yylexreturn{STRING}" );@]@;

@ {\it Scanning a \bison\ character literal, decoding its escapes.
The initial quote is already eaten}.
@<Scan a character literal@>=
@G(fs2)
<SC_ESCAPED_CHARACTER>
{
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of file in a literal}" );@]@=}
 "'"                               {@> @[@<Return an escaped character@>@]@=}
 "\n"                              {@> @[TeX_( "/yyfatal{unexpected end of line in a literal}" );@]@=}
}
@g

@ @<Return an escaped character@>=
 @[TeX_( "/STRINGFINISH" );@]@;
 @[TeX_( "/edef/next{/yylval{{/the/laststring}{/the/laststringraw}" );@]@;
 @[TeX_( "    {/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/STRINGFREE" );@]@;
 @[TeX_( "/yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "/yylexreturn{CHAR}" );@]@;

@ {\it Scanning a tag.  The initial angle bracket is already eaten}.
@<Scan a tag@>=
@G(fs2)
<SC_TAG>
{
 ">"                               {@> @[@<Finish a tag@>@]@=}
 ([^<>]|->)+                       {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 "<"                               {@> @[@<Raise nesting level@>@]@=}
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of file in a literal}" );@]@=}
}
@g

@ @<Finish a tag@>=
 @[TeX_( "/advance/lonesting/m@@ne" );@]@;
 @[TeX_( "/ifnum/lonesting</z@@" );@]@;
 @[TeX_( "    /STRINGFINISH" );@]@;
 @[TeX_( "    /edef/next{/yylval{{/the/laststring}{/the/laststringraw}" );@]@;
 @[TeX_( "        {/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "    /STRINGFREE" );@]@;
 @[TeX_( "    /yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "    /yybreak{/yylexreturn{TAG}}" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /STRINGGROW/yybreak/yylexnext" );@]@;
 @[TeX_( "/yycontinue" );@]@;

@ This is a slightly different rule from the original scanner. We do not perform |yyleng| computations,
so it makes sense to raise the nesting level one by one.
@<Raise nesting level@>=
 @[TeX_( "/STRINGGROW" );@]@;
 @[TeX_( "/advance/lonesting/@@ne" );@]@;
 @[TeX_( "/yylexnext" );@]@;

@ @<Decode escaped characters@>=
@G(fs2)
<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
 \\[0-7]{1,3}                      {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\x[0-9abcdefABCDEF]+             {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\a                               {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\b                               {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\f                               {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\n                               {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\r                               {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\t                               {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\v                               {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}

 /* {\it \flexrestyle{\\\\[\\"\\'?\\\\]} is shorter but confuses |xgettext|}  */
 \\("\""|"'"|"?"|"\\")             {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}

 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
 \\(.|\n)                          {@> @[TeX_( "/yyfatal{invalid character after /\\: /the/yytext}" );@]@=}
}
@g

@ @<Scan user-code characters and strings@>=
@G(fs2)
<SC_CHARACTER,SC_STRING>
{
 {splice}|\\{splice}[^\n\[\]]      {@> @[TeX_( "/STRINGGROW/yylexnext" );@]@=}
}

<SC_CHARACTER>
{
 "'"                               {@> @[TeX_( "/STRINGGROW /yyBEGINr/contextstate /yylexnext" );@]@=}
 \n                                {@> @[TeX_( "/yyfatal{unexpected end of line instead of a character}" );@]@=}
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of file instead of a character}" );@]@=}
}

<SC_STRING>
{
 "\""                              {@> @[TeX_( "/STRINGGROW /yyBEGINr/contextstate /yylexnext" );@]@=}
 \n                                {@> @[TeX_( "/yyfatal{unexpected end of line instead of a character}" );@]@=}
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of file instead of a character}" );@]@=}
}
@g

@ @<Strings, comments etc.\ found in user code@>=
@G(fs2)
<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
{
 "'"                               {@> @[TeX_( "/STRINGGROW /contextstate/YYSTART /yyBEGIN{SC_CHARACTER}/yylexnext" );@]@=}
 "\""                              {@> @[TeX_( "/STRINGGROW /contextstate/YYSTART /yyBEGIN{SC_STRING}/yylexnext" );@]@=}
 "/"{splice}"*"                    {@> @[TeX_( "/STRINGGROW /contextstate/YYSTART /yyBEGIN{SC_COMMENT}/yylexnext" );@]@=}
 "/"{splice}"/"                    {@> @[TeX_( "/STRINGGROW /contextstate/YYSTART /yyBEGIN{SC_LINE_COMMENT}/yylexnext" );@]@=}
}
@g

@ {\it Scanning some code in braces (actions, predicates). The
initial \.{\{} is already eaten}.
@<Scan code in braces@>=
@G(fs2)
<SC_BRACED_CODE,SC_PREDICATE>
{
 "{"|"<"{splice}"%"                {@> @[TeX_( "/STRINGGROW /advance/lonesting/@@ne /yylexnext" );@]@=}
 "%"{splice}">"                    {@> @[TeX_( "/STRINGGROW /advance/lonesting/m@@ne /yylexnext" );@]@=}

 /* {\it Tokenize \.{<<\%} correctly (as \.{<<} \.{\%}) rather than incorrectly (as \.{<} \.{<\%}).}  */
 "<"{splice}"<"                    {@> @[TeX_( "/STRINGGROW /yylexnext" );@]@=}
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of line inside braced code}" );@]@=}
}

<SC_BRACED_CODE>
{
 "}"                               {@> @[@<Add closing brace to the braced code@>@]@=}
}

<SC_PREDICATE>
{
 "}"                               {@> @[@<Add closing brace to a predicate@>@]@=}
}
@g

@ Unlike the original lexer, we do not return the closing brace as part of the
braced code.

@<Add closing brace to the braced code@>=
 @[TeX_( "/advance/lonesting/m@@ne" );@]@;
 @[TeX_( "/ifnum/lonesting</z@@" );@]@;
 @[TeX_( "    /STRINGFINISH" );@]@;
 @[TeX_( "    /edef/next{/yylval{{/the/laststring}{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "    /yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "    /yybreak{/yylexreturn{BRACED_CODE}}" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /STRINGGROW" );@]@;
 @[TeX_( "    /yybreak/yylexnext" );@]@;
 @[TeX_( "/yycontinue" );@]@;

@ @<Add closing brace to a predicate@>=
 @[TeX_( "/advance/lonesting/m@@ne" );@]@;
 @[TeX_( "/ifnum/lonesting</z@@" );@]@;
 @[TeX_( "    /STRINGFINISH" );@]@;
 @[TeX_( "    /edef/next{/yylval{{/the/laststring}{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "    /yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "    /yybreak{/yylexreturn{BRACED_PREDICATE}}" );@]@;
 @[TeX_( "/else" );@]@;
 @[TeX_( "    /STRINGGROW" );@]@;
 @[TeX_( "    /yybreak/yylexnext" );@]@;
 @[TeX_( "/yycontinue" );@]@;

@ {\it Scanning some prologue: from \.{\%\{} (already scanned) to \.{\%\}}}.
@<Scan prologue@>=
@G(fs2)
<SC_PROLOGUE>
{
 "%}"                              {@> @[@<Finish braced code@>@]@=}
 <<EOF>>                           {@> @[TeX_( "/yyfatal{unexpected end of file inside prologue}" );@]@=}
}
@g

@ @<Finish braced code@>=
 @[TeX_( "/STRINGFINISH" );@]@;
 @[TeX_( "/edef/next{/yylval{{/the/laststring}/the/postoks{/the/yyfmark}{/the/yysmark}}}/next" );@]@;
 @[TeX_( "/yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "/yylexreturn{PROLOGUE}" );@]@;

@ {\it Scanning the epilogue (everything after the second \prodstyle{\%\%}, which
has already been eaten)}.
@<Scan the epilogue@>=
@G(fs2)
<SC_EPILOGUE>
{
 <<EOF>>                           {@> @[@<Handle end of file in the epilogue@>@]@=}
}
@g

@ @<Handle end of file in the epilogue@>=
 @[TeX_( "/ROLLBACKCURRENTTOKEN" );@]@;
 @[TeX_( "/STRINGFINISH" );@]@;
 @[TeX_( "/yylval=/laststring" );@]@;
 @[TeX_( "/yyBEGIN{INITIAL}" );@]@;
 @[TeX_( "/yylexreturn{EPILOGUE}" );@]@;

@ {\it By default, grow the string obstack with the input}.
\ifbootstrapmode % only if this file is used to extract state information
   \immediate\closeout\stlist
\fi
@<Add the scanned symbol to the current string@>=
@G(fs2)
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,
SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,
SC_PROLOGUE,SC_EPILOGUE>\n         {@> @[TeX_( "/STRINGGROW /yylexnext" );@]@=}
@g