@q Copyright 2012-2024, Alexander Shibakov@>
@q This file is part of SPLinT@>

@q SPLinT is free software: you can redistribute it and/or modify@>
@q it under the terms of the GNU General Public License as published by@>
@q the Free Software Foundation, either version 3 of the License, or@>
@q (at your option) any later version.@>

@q SPLinT is distributed in the hope that it will be useful,@>
@q but WITHOUT ANY WARRANTY; without even the implied warranty of@>
@q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the@>
@q GNU General Public License for more details.@>

@q You should have received a copy of the GNU General Public License@>
@q along with SPLinT.  If not, see <http://www.gnu.org/licenses/>.@>

@*1\eatone{Bison}\bison\ specific routines.
The placeholder code left blank in the common routines is filed in
with the code relevant to the output of parser tables in the following sections.

@*2Tables. \namedspot{bsfile}Here are all the parser table names. Some tables are not output but adding
one to the list in the future will be easy: it does not even have to
be done here.
@<Parser table names@>=
 _register_table_d(yytranslate)@;
 _register_table_d(yyr1)@;
 _register_table_d(yyr2)@;
 _register_table_d(yydefact)@;
 _register_table_d(yydefgoto)@;
 _register_table_d(yypact)@;
 _register_table_d(yypgoto)@;
 _register_table_d(yytable)@;
 _register_table_d(yycheck)@;
 _register_table_d(yystos)@;
 _register_table_d(yytname)@;
 _register_table_d(yyprhs)@;
 _register_table_d(yyrhs)@;

@ One special table requires a little bit more preparation. This is a
table that lists the depth of the stack before an implicit terminal. It
is not one of the tables that is used by \bison\ itself but is needed
if the symbolic name processing is to be implemented (\bison\ has
access to this information `on the fly'). The `new' \bison\ (starting
with version~\.{3.0}) does not generate |yyprhs| and |yyrhs| or any
other arrays that contain similar information, so we fake them here if
such a crippled version of \bison\ is used.

The |yyrimplicit| array will be used by the table output code, together with
the postprocessor to output right hand side lengths for the term references that
require them in the case when the `native' \bison\ references are
used.
@<Variables and types local to the parser@>=
 unsigned int yyrthree[YYNRULES + 1] = { 0 };
 int yyrimplicit[YYNRULES + 1] = { 0 };
#ifdef BISON_IS_CRIPPLED
 unsigned int yyrhs[YYNRULES + 1] = { -1 };
 unsigned int yyprhs[YYNRULES + 1] = { 0 };
#endif

@ We populate this table below $\ldots$
@<Parser defaults@>=
#ifndef BISON_IS_CRIPPLED
 assert( YYNRULES + 1 == sizeof(yyprhs)/sizeof(yyprhs[0]) );

 { int i, j;

     for ( i = 1; i <= YYNRULES; i++ ) {

         for ( j = 0; yyrhs[ yyprhs[i] + j ] != -1; j++ ) {

             assert( yyprhs[i] + j < sizeof(yyrhs) );
             assert( j < yyr1[i] );

             if ( @<This is an implicit term@> ) {

                 @<Find the rule that defines it and set |yyrthree|@>@;

             }

         }
     }
 }
#endif

@ @<This is an implicit term@>=
 ( strlen( yytname[ yyrhs[yyprhs[i]+j] ] ) > 1 ) &&
 ( yytname[ yyrhs[yyprhs[i]+j] ][0] == '$' ) &&
 ( yytname[ yyrhs[yyprhs[i]+j] ][1] == '@@' )

@ @<Find the rule that defines it...@>=
 int rule_number;

 for ( rule_number = 1; rule_number < YYNRULES; rule_number++ ) {

     if ( yyr1[rule_number] == yyrhs[yyprhs[i]+j] ) {

         yyrthree[rule_number] = j;
         break;

     }
 }

 assert( rule_number < YYNRULES );

@ $\ldots$ and add its name to the list.
@<Parser table names@>=
 _register_table_d(yyrthree)@;

@ We list  some macros that are used to assist the post
processor and take advantage of the |yyrimlicit| array. As at this
time the size of the array is unknown (the preamble is included before
the parser file by \.{mkeparser.w} so the number of rules is unknown
at this point), we declare the array as a pointer.
@d BZ( term, anchor ) ( ((YYSTYPE *)&(term)) - ((YYSTYPE *)&(anchor)) + 1 )
@d BZZ( term, anchor ) (
   (yyrimplicit_p[yyn] = ((yyrimplicit_p[yyn] < 0) ?
    yyrimplicit_p[yyn] : ((YYSTYPE *)&(term)) - ((YYSTYPE *)&(anchor)) + 1)),
   ((YYSTYPE *)&(term)) - ((YYSTYPE *)&(anchor)) + 1
)
@<\Cee\ setup code specific to \bison@>=
 int *yyrimplicit_p;

@ @<Output parser semantic actions@>=
 yyrimplicit_p = yyrimplicit;

@*2Actions. There are several ways of making |yyparse()| execute all portions of
the action code. The one chosen here makes sure that none of the
tables gets written past its last element. To see how it works, it
might be helpful to `walk through' \bison's output to see how each
change affects the generated parser.
@<Output parser semantic actions@>=
 if ( output_desc.output_actions ) {

     int i, j;

     fprintf( tables_out, "%s", action_desc.preamble );

     if ( !bare_actions ) {

         yypact[0] = YYPACT_NINF;
         yypgoto[0] = -1;
         yydefgoto[0] = YYFINAL;

     }

     for ( i = 1; i < sizeof(yyr1)/sizeof(yyr1[0]); i++ ) {

         fprintf( tables_out, action_desc.act_setup, i, yyr2[i] - 1 );

         if ( action_desc.print_rule ) {

             action_desc.print_rule( i );

         }

         if ( yyr2[i] > 0 ) {

             if ( action_desc.action1 ) {

                 fprintf( tables_out, "%s", action_desc.action1 );

             }
         }

         for ( j = 2; j <= yyr2[i]; j++ ) {

             if ( action_desc.actionn ) {

                 fprintf( tables_out, action_desc.actionn, j );
             }

         }

         if ( !bare_actions ) {

             yyr1[i] = YYNTOKENS;
             yydefact[0] = i;
             yyrimplicit[i] = -yyr2[i];
             yyr2[i] = 0;
             yyparse(YYPARSE_PARAMETERS);

         }

         fprintf( tables_out, action_desc.act_suffix, i, yyr2[i] - 1 );

     }

     fprintf( tables_out, "%s", action_desc.postamble );

     if ( action_desc.cleanup ) {

         action_desc.cleanup( &action_desc );

     }

 }

 for ( int i = 1; i < YYNRULES + 1; i++ ) {

     if ( yyrimplicit[i] > 0 ) {
         fprintf( tables_out, "\\yyimplicitlengthset{%d}{%d}%%\n", i, yyrimplicit[i] );
     }

 }

@*2Constants. A generic list of constants to be used later in different contexts is defined below.
As before, the appropriate macro will be defined generically to do what is required with these
names (for example, we can turn each name into a string for reporting purposes).
@<Parser constants@>=
 _register_const_d(YYEMPTY)@;
 _register_const_d(YYPACT_NINF)@;
 _register_const_d(YYLAST)@;
 _register_const_d(YYNTOKENS)@;
 _register_const_d(YYNRULES)@;
 _register_const_d(YYNSTATES)@;
 _register_const_d(YYFINAL)@;
#ifndef YYEOF
 _register_const_d(YYSYMBOL_YYEOF)@;
#endif

@ Constants defined to maintain compatibility with the older versions of \bison.
@<Parser virtual constants@>=
 _register_const_d(YYSYMBOL_YYEOF, YYEOF)@;

@*2Tokens.
Similar techniques are employed in token output. Tokens are parser
specific (the scanner only needs their numeric values) so we need {\it
some\/} flexibility to output them in a desired format. For special
purposes (say changing the way tokens are typeset) we can control the
format tokens are output in.
@<Variables and types local to the parser@>=
 char *token_format_char = NULL;
 char *token_format_affix = NULL;
 char *token_format_suffix = NULL;
 char *bootstrap_token_format = NULL;

@ @<Parser specific options without shortcuts@>=
 register_option_("token-format-char", required_argument, 0, TOKEN_FORMAT_CHAR, "")@;
 register_option_("token-format-affix", required_argument, 0, TOKEN_FORMAT_AFFIX, "")@;
 register_option_("token-format-suffix", required_argument, 0, TOKEN_FORMAT_SUFFIX, "")@;
 register_option_("bootstrap-token-format", required_argument, 0, BOOTSTRAP_TOKEN_FORMAT, "")@;

@ @<Handle parser output options@>=
 case TOKEN_FORMAT_CHAR:@;
     token_format_char = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
     strcpy(token_format_char, optarg);
     break;

 case TOKEN_FORMAT_AFFIX:@;
     token_format_affix = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
     strcpy(token_format_affix, optarg);
     break;

 case TOKEN_FORMAT_SUFFIX:@;
     token_format_suffix = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
     strcpy(token_format_suffix, optarg);
     break;

 case BOOTSTRAP_TOKEN_FORMAT:@;
     bootstrap_token_format = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
     strcpy(bootstrap_token_format, optarg);
     break;

@ @<Parser specific output descriptor fields@>=
 bool output_tokens:1;

@ No tokens are output by default.
@<Parser specific default outputs@>=
 @[@].output_tokens = 0,@[@]

@ The only part of the code below that needs any explanation is the
`bootstrap' token output. In \bison\ every token has three attributes:
its `macro name' (say, \.{STRING}) that is used by the parse code
internally, its `print name' (\.{"string"} to continue the example)
that \bison\ uses to print the token names in its diagnostic messages,
and its numeric value (that can be assigned implicitly by \bison\
itself or explicitly by the user). Only the `print names' are kept in
the |yytname| array so to reuse the scanner used by \bison\ we either
have to extract the token `macro names' from the \Cee\ code ourselves
to pass them on to the lexer, or use a special `stripped down' version
of a \bison\ grammar parser to extract the names from the parser's
\bison\ grammar. To do this, some token names would still need to be
known to the scanner. These tokens are selected by hand to make the
`bootstrapping' parser operational. The token list for the \bison\
grammar parser can be examined as part of the appropriate
\locallink{bootstraptokens}driver file\endlink.
@<Output parser tokens@>=
 if ( output_desc.output_tokens ) {

     int i;
     int length;
     char token;
     char *token_name;
     bool too_creative = false;

     for ( i = 258; i < sizeof(yytranslate)/sizeof(yytranslate[0]) ; i++ ) {

         token_name = yytname[yytranslate[i]];

         if ( token_name ) {

             fprintf( tables_out, token_format_affix, yytranslate[i], i );

             length = 0;

             while ( (token = *token_name) ) {

                 if ( token_format_char ) {

                     length += fprintf( tables_out, token_format_char, (unsigned int)token );

                 }

                 if ( token < 040 || token == 0177 ) {

                     too_creative = true;

                 }

                 token_name++;
             }

             fprintf( tables_out, token_format_suffix, too_creative ? ".unprintable." : yytname[yytranslate[i]] );

         }
     }
 }

#ifdef BISON_BOOTSTRAP_MODE
   fprintf( tables_out, "\\bootstrapmodetrue\n" );
   fprintf( tables_out, "%% token values needed to bootstrap the parser\n" );
   bootstrap_tokens( bootstrap_token_format );
#endif

@ The size of the token name table is useful to determine, say, how
many `named' tokens the parser uses.
@<Output parser constants@>=
 fprintf( tables_out, "\\constset{YYTRANSLATESIZE}{%d}%%\n", (int)(sizeof(yytranslate)/sizeof(yytranslate[0])) );

@*2Output modes.
The code below can be easily extended and modified to output parser
tables, actions, and constants in a language of one's choice. We are
only interested in \TeX, however, thus other modes are very
rudimentary or non-existent at this point.

@*3 Token only mode.
Token only output mode does exactly what is expected: outputs token
names and values in the format of your choosing.

@<Parser specific output modes@>=
 TOKEN_ONLY_OUT,@[@]

@ @<Handle parser related output modes@>=
 case TOKEN_ONLY_OUT:@;
     @<Prepare token only output environment@>@;
     break;

@ @<Parser specific options without shortcuts@>=
 register_option_("token-only-mode", no_argument, 0, TOKEN_ONLY_MODE, "")@;

@ @<Configure parser output modes@>=
 case TOKEN_ONLY_MODE:@;
     mode = TOKEN_ONLY_OUT;
     break;

@ @<Prepare token only output environment@>=
 if ( !token_format_char ) {

     token_format_char = "{%u}";

 }

 if ( !token_format_affix ) {

     token_format_affix = "%% token: %d, token value: %d\n\\prettytoken@@{";

 }

 if ( !token_format_suffix ) {

     token_format_suffix = "}%% %s\n";

 }

 output_desc.output_tokens = 1;

@*3Generic output. Generic output is not programmed yet.

@<Parser specific output modes@>=
 GENERIC_OUT,@[@]

@ @<Handle parser related output modes@>=
 case GENERIC_OUT:@;
     printf( "This mode is not supported yet\n" );
     exit(0);
     break;

@*3\TeX\ output. The \TeX\ mode is the main reason for this software.

@<Parser specific output modes@>=
 TEX_OUT,@[@]

@ @<Handle parser related output modes@>=
 case TEX_OUT:@;
     @<Set up \TeX\ table output for parser tables@>@;
     @<Prepare \TeX\ format for semantic action output@>@;
     @<Prepare \TeX\ format for parser constants@>@;
     @<Prepare \TeX\ format for parser tokens@>@;
     break;

@ Some tables require name adjustments due to \TeX's
reluctance to treat digits as part of a name.
@<Set up \TeX\ table output for parser tables@>=
#define _register_table_d(name) tex_table(name);
 @<Table names@>@;
#undef _register_table_d

 yyr1_desc.name = "yyrone";
 yyr2_desc.name = "yyrtwo";

@ The memory allocated for the |yytname| table is released at the end.
@<Helper functions declarations for for parser output@>=
 void yytname_cleanup( struct table_d *table );
 int yytname_formatter_tex( FILE *stream, int index );
 int yytname_formatter( FILE *stream, int index );

@ There are a number of helper functions to output complicated names
in \TeX. The safest way seems to be to output those as sequences of
{\sc ASCII} codes to accommodate names like \.{\$}\.{end}
safely. \TeX's \.{\^\^}$\ldots$ convention is supported as well.
@<Helper functions for parser output@>=
 void yytname_cleanup( struct table_d *table ) {

     free( table->separator );
     free( table->null );

 }

 int yytname_formatter_tex( FILE *stream, int index ) {

     char *token_name = yytname[index];
     unsigned char token;
     int length = 0;

     fprintf( stream, "\\addname " );

     while ( (token = *token_name) ) {

         if ( token < 040 || token == 0177 ) { /* unprintable characters */

             fprintf( stream, "^^%c", token < 0100 ? (unsigned char)(token + 0100) : (unsigned char)(token - 100) );
             length += 3;

         } else {

             fprintf( stream, "%c", token );
             length++;

         }

         token_name++;

     }
     fprintf( stream, "\n" );

     return length;

 }

 int yytname_formatter( FILE *stream, int index ) {

     char *token_name;
     unsigned char token;
     int length = 0;
     bool too_creative = false; /* to indicate if the name is too dangerous to print */

     fprintf( stream, "\\addname" );

     if ( index >= 0 ) { /* this is not the last name */

         token_name = yytname[index];

         if ( token_name == NULL ) {

             token_name = "$impossible";

         }

         while ( (token = *token_name) ) {

             length += fprintf( stream, "{%u}", (unsigned int)token );

             if ( token < 040 || token == 0177 ) {

                 too_creative = true;

             }

             token_name++;

         }

         fprintf( stream, "%% %s\n", too_creative ? ".unprintable." : yytname[index] );

     } else { /* this is the last name */

         token_name = yytname[-index];

         if ( token_name == NULL ) {

             token_name = "$impossible";

         }

         while ( (token = *token_name) ) {

             length += fprintf( stream, "{%u}", (unsigned int)token );
             token_name++;

             if ( token < 040 || token == 0177 ) {

                 too_creative = true;

             }

         }

         fprintf( stream, "%% %s\n\\end\n%%\n", too_creative ? ".unprintable." :
                                                             ( yytname[-index] ? yytname[-index] : "end of array" ) );

     }

     return length;

 }

@ @<Set up \TeX\ table output for parser tables@>=
 yytname_desc.preamble = "%%\n\\newtable{yytname}{}\\tempca0\\relax%% a robust way to add the yytname array\n";
 yytname_desc.separator = NULL;
 yytname_desc.postamble = NULL;
 yytname_desc.null = NULL;
 yytname_desc.null_postamble = NULL;
 yytname_desc.optimized_numeric = NULL;
 yytname_desc.prettify = false;
 yytname_desc.formatter = yytname_formatter;

 yytname_desc.cleanup = NULL;

 output_desc.output_yytname = 1;

@ @<Prepare \TeX\ format for semantic action output@>=

 if ( optimize_actions ) {

     action_desc.preamble  = "%\n% the big switch\n%\n"@/
                             "\\catcode`\\/=0\\relax % see the documentation for an explanation of this trick\n"@/
                             "\\def\\yybigswitch#1{%%\n"@/
                             "    \\csname dobisonaction\\number #1\\parsernamespace\\endcsname\n"@;
                             "}\\stashswitch{yybigswitch}%%\n";
     action_desc.act_setup = "\n\\expandafter\\def\\csname dobisonaction%d\\parsernamespace\\endcsname{%%\n%%";
     action_desc.act_suffix = "}%% end of rule %d\n";
     action_desc.action1   = NULL;
     action_desc.actionn   = NULL;
     action_desc.postamble = "\n\\catcode`\\/=12\\relax\n\n";
     action_desc.print_rule = print_rule;
     action_desc.cleanup = NULL;
     output_desc.output_actions = 1;

 } else {

     action_desc.preamble  = "%\n% the big switch\n%\n"@/
                             "\\catcode`\\/=0\\relax % see the documentation for an explanation of this trick\n"@/
                             "\\def\\yybigswitch#1{%%\n"@;
                             "  \\ifcase#1\\relax\n";
     action_desc.act_setup = "      \\or %% (rule %d) ";
     action_desc.act_suffix = "";
     action_desc.action1   = NULL;
     action_desc.actionn   = NULL;
     action_desc.postamble = "  \\else\n  \\fi\n}\\stashswitch{yybigswitch}%%\n\\catcode`\\/=12\\relax\n\n";
     action_desc.print_rule = print_rule;
     action_desc.cleanup = NULL;
     output_desc.output_actions = 1;

}

@ Grammar rules are listed in a readable form alongside the action
code to make it possible to quickly find an appropriate action. The
rules are not output if a crippled \bison\ is used.
@<Helper functions for parser output@>=
 void print_rule( int n ) {

     fprintf( tables_out, "%s%s:  ", (n < 10 && !optimize_actions ? " " : ""), yytname[yyr1[n]] );

#ifndef BISON_IS_CRIPPLED
     int i;

     i = yyprhs[n];

     if ( yyrhs[i] < 0 ) {

         fprintf( tables_out, "<empty>" );

     } else {

         while( yyrhs[i] > 0 ) {

             fprintf( tables_out, "%s ", yytname[yyrhs[i]] );
             i++;

         }

     }
#endif

     fprintf( tables_out, "\n" );

 }

@ \TeX\ constant output is another place where the techniques described above are applied.
As before, the macro handles the repetitive work of initialization, declaration, etc in
each place where the corresponding constant is mentioned. The exceptions are \.{YYPACT\_NINF}
and \.{YYSYMBOL\_YYEOF} that have to be handled separately because the
underscore in its name makes it difficult to use it as a command sequence name.
\def\YYPACTxNINFxdesc{\.{YYPACT\_NINF\_}\\{desc}}
\def\YYSYMBOLxYYEOFxdesc{\.{YYSYMBOL\_YYEOF\_}\\{desc}}

@s YYPACT_NINF_desc TeX
@s YYSYMBOL_YYEOF_desc TeX

@<Prepare \TeX\ format for parser constants@>=
#define _register_const_d(c_name) @[c_name##_desc.format = "\\constset{%s}{%d}%%\n"; \
                                   c_name##_desc.name = #c_name; \
                                   c_name##_desc.value = c_name; \
                                   output_desc.output_##c_name = 1;@]
 @<Parser constants@>@;
#undef _register_const_d
#ifdef YYEOF /* other values have already been set correctly */
#define _register_const_d(c_name,vvalue) @[c_name##_desc.format = "\\constset{%s}{%d}%%\n"; \
                                   c_name##_desc.name = #c_name; \
                                   c_name##_desc.value = vvalue; \
                                   output_desc.output_##c_name = 1;@]
 @<Parser virtual constants@>@;
#undef _register_const_d
#endif
YYPACT_NINF_desc.name = "YYPACTNINF";
YYSYMBOL_YYEOF_desc.name = "YYSYMBOLxYYEOF";

@ Token definitions round off the \TeX\ output mode.

@<Prepare \TeX\ format for parser tokens@>=

   token_format_char = NULL; /* do not output individual characters */

   if ( !token_format_affix ) {

       token_format_affix = "\\tokenset{%d}{%d}";

   }

   if ( !token_format_suffix ) {

       token_format_suffix = "%% %s\n";

   }


   if ( !bootstrap_token_format ) {

       bootstrap_token_format = "\\expandafter\\def\\csname token\\parsernamespace %s\\endcsname{%d}%% %s\n";

   }

   /* |output_desc.output_tokens = 1;| is no longer necessary as it is done entirely in \TeX */

@*2 Command line options.
We start with the most obvious option, the one begging for help.

@ @<Parser specific options without shortcuts@>=
 register_option_("help", no_argument, 0, LONG_HELP, "")@;

@ @<Shortcuts for command line options affecting parser output@>=
 @[@[@], 'h'@]

@ @<Handle parser output options@>=
 case 'h': /* short help */@;
   fprintf(stderr, "Usage: %s [options] output_file\n", argv[0]);
   exit(0);
   break; /* should not be needed */

 case LONG_HELP:@;
   fprintf(stderr, "%s [--mode=TeX:options] output_file outputs tables\n"
                   "    and constants for a TeX parser\n", argv[0]);
   exit(0);
   break; /* should not be needed */

@ @<Parser specific options with shortcuts@>=
 register_option_("debug", optional_argument, 0, 'b', "")@;
 register_option_("mode", required_argument, 0, 'm', "")@;
 register_option_("table-separator", required_argument, 0, 'z', "")@;

 register_option_("format", required_argument, 0, 'f', "")@; /* name? */
 register_option_("table", required_argument, 0, 't', "")@; /* specific table */
 register_option_("constant", required_argument, 0, 'c', "")@; /* specific constant */
 register_option_("name-length", required_argument, 0, 'l', "")@; /* change |MAX_NAME_LENGTH| */
 register_option_("token", required_argument, 0, 'n', "")@; /* specific token */
 register_option_("run-parse", required_argument, 0, 'p', "")@; /* run the parser */
 register_option_("parse-file", required_argument, 0, 'i', "")@; /* input for the parser */

@ The string below is a list of short options.

@ A few options can be discussed immediately.

@<Variables and types local to the parser@>=
 char *table_separator = "%s ";

@ @<Handle parser output options@>=
 case 'm': /* output mode */@;
     switch( optarg[0] ) {

         case 'T':
         case 't':@;
             mode = TEX_OUT;
             break;

         case 'b':
         case 'B':
         case 'g':
         case 'G':@;
             mode = GENERIC_OUT;
             break;

         default:@;
             break;

     }
     break;

 case 'z':
     table_separator = (char *)malloc( (strlen(optarg) + 1)*sizeof(char) );
     strcpy(table_separator, optarg);
     break;