/* l2xiscan.c  LTX2X Interpreter lexing routines */
/*  Written by: Peter Wilson, CUA  [email protected]                */
/*  This code is partly based on algorithms presented by Ronald Mak in */
/*  "Writing Compilers & Interpreters", John Wiley & Sons, 1991        */


#include <stdio.h>
#include <math.h>
#include <sys/types.h>
#include <sys/timeb.h>
#include "l2xicmon.h"
#include "l2xierr.h"
#include "l2xiscan.h"
#include "l2xiidbg.h"
#ifndef l2xicpr_h
#include "l2xicpr.h"           /* before/after token code lists */
#endif

#define EOF_CHAR 257
#define TAB_SIZE 8
#define QUOTE_CHAR '\''

#define MIN_RESERVED_WORD_LENGTH 2
#define MAX_RESERVED_WORD_LENGTH 15

/* US_CHAR is for ltx2x, adding undescore character */
#define US_CHAR '_'

/* CHARACTER CODES */

typedef enum{
 LETTER,
 DIGIT,
 QUOTE,
 SPECIAL,
 EOF_CODE,
 USCORE,       /* for ltx2x underscore */
} CHAR_CODE;

#define EOS '\0'

/* RESERVED WORD TABLES */

typedef struct {
 char *string;
 TOKEN_CODE token_code;
} RW_STRUCT;

RW_STRUCT rw_2[] = {
 {"if", IF}, /* P&E */
 {"in", IN}, /* P&E */
 {"of", OF}, /* P&E */
 {"or", OR}, /* P&E */
 {"to", TO}, /* P&E */
 {"as", XAS},
 {"by", XBY},
 {"pi", XPI},
 {NULL, 0 },
};

RW_STRUCT rw_3[] = {
 {"and", AND}, /* P&E */
 {"div", DIV}, /* P&E */
 {"end", END}, /* P&E */
 {"for", FOR}, /* P&E */
 {"mod", MOD}, /* P&E */
 {"not", NOT}, /* P&E */
 {"set", SET}, /* P&E */
 {"var", VAR}, /* P&E */
 {"bag", XBAG},
 {"use", XUSE},
 {"xor", XXOR},
 {NULL, 0 },
};

RW_STRUCT rw_4[] = {
 {"case", CASE}, /* P&E */
 {"else", ELSE}, /* P&E */
 {"file", FFILE},
 {"then", THEN}, /* P&E */
 {"type", TYPE}, /* P&E */
 {"with", WITH}, /* P&E */
 {"from", XFROM},
 {"list", XLIST},
 {"real", XREAL},
 {"rule", XRULE},
 {"skip", XSKIP},
 {"like", XLIKE},
 {"self", XSELF},
 {"true", XTRUE},
 {"call", XCALL},
 {NULL, 0 },
};

RW_STRUCT rw_5[] = {
 {"array", ARRAY}, /* P&E */
 {"begin", BEGIN}, /* P&E */
 {"until", UNTIL}, /* P&E */
 {"while", WHILE}, /* P&E */
 {"alias", XALIAS},
 {"fixed", XFIXED},
 {"local", XLOCAL},
 {"model", XMODEL},
 {"oneof", XONEOF},
 {"query", XQUERY},
 {"where", XWHERE},
 {"andor", XANDOR},
 {"false", XFALSE},
 {"notes", XNOTES},
 {"subof", XSUBOF},
 {"supof", XSUPOF},
 {"using", XUSING},
 {NULL, 0 },
};

RW_STRUCT rw_6[] = {
 {"repeat", REPEAT}, /* P&E */
 {"binary", XBINARY},
 {"derive", XDERIVE},
 {"end_if", XEND_IF},
 {"entity", XENTITY},
 {"escape", XESCAPE},
 {"number", XNUMBER},
 {"return", XRETURN},
 {"schema", XSCHEMA},
 {"select", XSELECT},
 {"string", XSTRING},
 {"unique", XUNIQUE},
 {"import", XIMPORT},
 {NULL, 0 },
};

RW_STRUCT rw_7[] = {
 {"boolean", XBOOLEAN},
 {"context", XCONTEXT},
 {"generic", XGENERIC},
 {"integer", XINTEGER},
 {"inverse", XINVERSE},
 {"logical", XLOGICAL},
 {"subtype", XSUBTYPE},
 {"const_e", XCONST_E},
 {"unknown", XUNKNOWN},
 {"purpose", XPURPOSE},
 {"the_day", THE_DAY},
 {NULL, 0 },
};

RW_STRUCT rw_8[] = {
 {"end_code", ENDCODE},       /* for ltx2x */
 {"function", FUNCTION}, /* P&E */
 {"abstract", XABSTRACT},
 {"constant", XCONSTANT},
 {"end_case", XEND_CASE},
 {"end_rule", XEND_RULE},
 {"end_type", XEND_TYPE},
 {"optional", XOPTIONAL},
 {"criteria", XCRITERIA},
 {"the_year", THE_YEAR},
 {NULL, 0 },
};

RW_STRUCT rw_9[] = {
 {"procedure", PROCEDURE}, /* P&E */
 {"aggregate", XAGGREGATE},
 {"end_alias", XEND_ALIAS},
 {"end_local", XEND_LOCAL},
 {"end_model", XEND_MODEL},
 {"otherwise", XOTHERWISE},
 {"reference", XREFERENCE},
 {"supertype", XSUPERTYPE},
 {"end_notes", XEND_NOTES},
 {"objective", XOBJECTIVE},
 {"parameter", XPARAMETER},
 {"test_case", XTEST_CASE},
 {"the_month", THE_MONTH},
 {NULL, 0 },
};

RW_STRUCT rw_10[] = {
 {"end_entity", XEND_ENTITY},
 {"end_repeat", XEND_REPEAT},
 {"end_schema", XEND_SCHEMA},
 {"references", XREFERENCES},
 {NULL, 0},
};

RW_STRUCT rw_11[] = {
 {"end_context", XEND_CONTEXT},
 {"enumeration", XENUMERATION},
 {"end_purpose", XEND_PURPOSE},
 {"realization", XREALIZATION},
 {"schema_data", XSCHEMA_DATA},
 {NULL, 0},
};

RW_STRUCT rw_12[] = {
 {"end_constant", XEND_CONSTANT},
 {"end_function", XEND_FUNCTION},
 {"end_criteria", XEND_CRITERIA},
 {NULL, 0},
};

RW_STRUCT rw_13[] = {
 {"end_procedure", XEND_PROCEDURE},
 {"end_objective", XEND_OBJECTIVE},
 {"end_parameter", XEND_PARAMETER},
 {"end_test_case", XEND_TEST_CASE},
 {NULL, 0},
};

RW_STRUCT rw_14[] = {
 {"end_references", XEND_REFERENCES},
 {NULL, 0},
};

RW_STRUCT rw_15[] = {
 {"end_realization", XEND_REALIZATION},
 {"end_schema_data", XEND_SCHEMA_DATA},
 {NULL, 0},
};



RW_STRUCT *rw_table[] = {
 NULL, NULL, rw_2, rw_3, rw_4, rw_5, rw_6, rw_7, rw_8, rw_9,
             rw_10, rw_11, rw_12, rw_13, rw_14, rw_15,
};

/* token lists */
/* Tokens that start or follow a statement */
TOKEN_CODE statement_start_list[] = {BEGIN, CASE, IF, REPEAT,
                                    IDENTIFIER, XRETURN, XSKIP, XESCAPE, 0};
TOKEN_CODE statement_end_list[] = {SEMICOLON, TO, ENDCODE, END_OF_FILE, 0};

/* Tokens that start a declaration */
TOKEN_CODE declaration_start_list[] = {XENTITY, TYPE, XRULE, XCONSTANT,
                                      XLOCAL, PROCEDURE, FUNCTION,
                                      0};

TOKEN_CODE follow_indexes_list[] = {OF, IDENTIFIER, ARRAY, XBAG, XLIST, SET,
                                   XBINARY, XBOOLEAN, XINTEGER, XLOGICAL,
                                   XNUMBER, XREAL, XSTRING, XGENERIC,
                                   SEMICOLON, ENDCODE, END_OF_FILE, 0};

/* Operator tokens */
TOKEN_CODE rel_op_list[] = {LT, LE, EQUAL, NE, GE, GT, COLONEQUALCOLON,
                           COLONNEQCOLON, IN, XLIKE, 0};
TOKEN_CODE add_op_list[] = {PLUS, MINUS, OR, XXOR, 0};
TOKEN_CODE mult_op_list[] = {STAR, SLASH, DIV, MOD, AND, 0};

TOKEN_CODE follow_header_list[] = {SEMICOLON, ENDCODE, END_OF_FILE, 0};

TOKEN_CODE follow_proc_id_list[] = {LPAREN, SEMICOLON, ENDCODE, END_OF_FILE, 0};

TOKEN_CODE follow_func_id_list[] = {LPAREN, COLON, SEMICOLON, END_OF_FILE, 0};

/* Tokens that follow a formal parameter list */
TOKEN_CODE follow_parms_list[] = {RPAREN, SEMICOLON, ENDCODE, END_OF_FILE, 0};
/* Tokens that follow an actual parameter list */
TOKEN_CODE follow_parm_list[] = {COMMA, RPAREN, 0};

/* Tokens that can follow an expression */
TOKEN_CODE follow_expr_list[] = {OF, SEMICOLON, ENDCODE, END_OF_FILE, 0};

/* Tokens that can start or follow a case label */
TOKEN_CODE case_label_start_list[] = {IDENTIFIER, NUMBER_LITERAL, PLUS, MINUS,
                                     STRING_LITERAL, XOTHERWISE, 0};
TOKEN_CODE follow_case_label_list[] = {COLON, SEMICOLON, 0};

/* Tokens that follow declarations in SETUP code */
TOKEN_CODE ltx2x_follow_decls_list[] = {SEMICOLON, BEGIN, CASE, IF, REPEAT,
                                     IDENTIFIER, ENDCODE, 0};
/* Tokens that follow declarations in procedures/functions */
TOKEN_CODE follow_decls_list[] = {SEMICOLON, BEGIN, CASE, IF, REPEAT,
                                 IDENTIFIER, XRETURN, XEND_FUNCTION,
                                 XEND_PROCEDURE, ENDCODE, END_OF_FILE, 0};

/* The tokens for simple types */
TOKEN_CODE simple_type_list[] = {XBINARY, XBOOLEAN, XINTEGER, XLOGICAL,
                                XNUMBER, XREAL, XSTRING, XGENERIC, 0};

/* the tokens of constants */
TOKEN_CODE constant_list[] = {QUERY_CHAR, XCONST_E, XFALSE, XPI, XSELF, XTRUE,
                             XUNKNOWN, THE_DAY, THE_MONTH, THE_YEAR, 0};

/* The tokens of aggregates */
TOKEN_CODE aggregation_type_list[] = {ARRAY, SET, XAGGREGATE, XBAG, XLIST, 0};

TOKEN_CODE follow_min_bound_list[] = {COLON, NUMBER_LITERAL, LPAREN, MINUS,
                                     PLUS, 0};

/* The tokens that can follow an entity's explicit attributes */
TOKEN_CODE follow_attributes_list[] = {XDERIVE, XUNIQUE, XINVERSE, XWHERE,
                                      XEND_ENTITY, 0};

/* GLOBALS */

/* char ch;    */
int ch;
TOKEN_CODE token;            /* code of current token */
LITERAL literal;             /* value of a literal */
int buffer_offset;           /* char offset into source buffer */
int level = 0;               /* current nesting level */
int line_number = 0;         /* current source line number */
BOOLEAN print_flag = TRUE;   /* TRUE to print source lines */
BOOLEAN block_flag = FALSE;  /* TRUE iff parsing a block */

BOOLEAN in_comment = FALSE;  /* TRUE iff in an EXPRESS comment */
BOOLEAN in_eol_comment = FALSE; /* TRUE iff in an EXPRESS eol comment */
int num_opencom = 0;         /* # of open EXPRESS comment markers */

char source_buffer[MAX_SOURCE_LINE_LENGTH]; /* source file buffer */
char token_string[MAX_TOKEN_STRING_LENGTH]; /* token string */
char word_string[MAX_TOKEN_STRING_LENGTH];  /* single-cased token string */
char *bufferp = source_buffer;              /* ptr to source buffer */
char *tokenp = token_string;                /* ptr to token string */

int digit_count;                   /* total # of digits in number */
BOOLEAN count_error;               /* TRUE iff too many digits in number */

/* int page_number = 0;                  current o/p page number */
/* int line_count = MAX_LINES_PER_PAGE;  # of lines on current o/p page */

char source_name[MAX_FILE_NAME_LENGTH];  /* name of source file */
char date[DATE_STRING_LENGTH];           /* current date and time */

FILE *source_file;

/* CHAR_CODE char_table[256];   ----------- add two to this */
CHAR_CODE char_table[258];  /*  ----------- added two to this */

/* array of string representation of token codes */
char *tok_code_strs[] = {
#define sctc(a, b, c) b,
#include "l2xisctc.h"
#undef sctc
};

/* MACROS */

/* char_code(ch) return the character code of ch */
#define char_code(ch) char_table[ch]

/* FORWARDS */

/* init_tok_code_strs(); */

/***************************************************************************/
/* init_scanner(name)  Initialise the scanner globals and open source      */
/*                     file.                                               */

/***************************************************************************/
/* init_scanner(afil)  Initialise the scanner globals and set the source   */
/*                     file.                                               */

init_scanner(afil)
FILE *afil;             /* source file */
{
 int ich;

/*
*  sprintf(dbuffer,"\n    init_scanner called with name = %s\n", name);
*  debug_print(dbuffer);
*/

 /* initialise character table */
 for (ich = 0;    ich < 256;  ++ich)  char_table[ich] = SPECIAL;
 for (ich = '0';  ich <= '9'; ++ich)  char_table[ich] = DIGIT;
 for (ich = 'A';  ich <= 'Z'; ++ich)  char_table[ich] = LETTER;
 for (ich = 'a';  ich <= 'z'; ++ich)  char_table[ich] = LETTER;

 char_table[QUOTE_CHAR] = QUOTE;
 char_table[US_CHAR] = USCORE;
 char_table[EOF_CHAR] = EOF_CODE;

 source_file = afil;

 /* get the first character */
 bufferp = "";
 get_char();

 debug_print("    init_scanner: All done\n");
 return;
}                                                      /* end init_scanner */
/***************************************************************************/


/***************************************************************************/
/* quit_scanner()  no more scanning                                        */

quit_scanner()
{
/*  close_source_file(); */
}                                                      /* end quit_scanner */
/***************************************************************************/


/* CHARACTER ROUTINES */


/***************************************************************************/
/* get_char()    set ch to next char from the source buffer                */

get_char()
{
 BOOLEAN get_source_line();


 /* if current source line exhausted, get next line */
 /* if file exhausted, set ch to EOF character and return */
 if (*bufferp == EOS) {
   if (!get_source_line()) {
     ch = EOF_CHAR;
     return;
   }
   bufferp = source_buffer;
   buffer_offset = 0;
 }

 ch = *bufferp++;

 /* special character processing */
 /* tab --- up buffer offset to next multiple of TAB_SIZE, and replace */
 /*         ch with a blank                                            */
 /* newline -- replace ch with a blank                                 */
 /* { (start of comment) -- skip over comment and replace with a blank */
 switch (ch) {
   case '\t': {
     buffer_offset += TAB_SIZE - buffer_offset%TAB_SIZE;
     ch = ' ';
     break;
   }
   case '\n': {
     ++buffer_offset;
     ch = ' ';
     break;
   }
   case '-': {                 /* start of eol comment? */
     if (*bufferp == '-') {    /* yest, an eol comment */
       skip_eol_comment();
       ch = ' ';
     }
     else {
       ++buffer_offset;
     }
     break;
   }
   case '(': {               /* start of long comment? */
     if (*bufferp == '*') {  /* yes, a long comment */
       skip_long_comment();
       ch = ' ';
     }
     else {
       ++buffer_offset;
     }
     break;
   }
   default: {
     ++buffer_offset;
     break;
   }
 } /* end switch */
}                                                          /* end get_char */
/***************************************************************************/



/***************************************************************************/
/* skip_comment() skip over a comment and set ch to 'end of comment' char  */

skip_comment()
{
 do {
   get_char();
 } while ((ch != '}') && (ch != EOF_CHAR));
}                                                       /* end skip_coment */
/***************************************************************************/



/***************************************************************************/
/* skip_eol_comment()  skip over an EXPRESS eol comment                    */

skip_eol_comment()
{

   /* skip to the next line */
 if (!get_source_line()) {
   ch = EOF_CHAR;
   return;
 }
 bufferp = source_buffer;
 buffer_offset = 0;
 return;

}                                                  /* end SKIP_EOL_COMMENT */
/***************************************************************************/



/***************************************************************************/
/* skip_long_comment()  skip over an EXPRESS long comment                  */

skip_long_comment()
{
 BOOLEAN get_source_line();
 num_opencom = 1;

 /* update buffer pointers to the * after the ( */
 *bufferp++;
 ++buffer_offset;

 while (num_opencom > 0) {
   /* if current source line exhausted, get next line */
   /* if file exhausted, set ch to EOF character and return */
   if (*bufferp == EOS) {
     if (!get_source_line()) {
       ch = EOF_CHAR;
       return;
     }
     bufferp = source_buffer;
     buffer_offset = 0;
   }

   ch = *bufferp++;
   switch (ch) {
     case '(': {                  /* start of long comment? */
       if (*bufferp == '*') {     /* yes */
         num_opencom++;
         *bufferp++;
         ++buffer_offset;
       }
       break;
     }
     case '*': {                  /* end of long comment? */
       if (*bufferp == ')') {     /* yes */
         num_opencom--;
         *bufferp++;
         ++buffer_offset;
       }
       break;
     }
     default: {
       break;
     }
   }  /* end switch */

   ++buffer_offset;
 } /* end while */

 return;
}                                                 /* end SKIP_LONG_COMMENT */
/***************************************************************************/




/***************************************************************************/
/* skip_blanks  skip past any blanks at current location in source         */
/*              buffer and set ch to next non-blank char                   */

skip_blanks()
{
 while (ch == ' ') get_char();
}                                                       /* end skip_blanks */
/***************************************************************************/


/* TOKEN ROUTINES */
   /* after a token has been extracted, ch is the first char after the token */

/***************************************************************************/
/* get_token    extract next token from source buffer                      */

get_token()
{
 entry_debug("get_token");
 scan_source_debug();

 skip_blanks();
 tokenp = token_string;

 switch (char_code(ch)) {
   case LETTER: {
     get_word();
     break;
   }
   case DIGIT: {
     get_number();
     break;
   }
   case QUOTE: {
     get_string();
     break;
   }
   case EOF_CODE: {
     token = END_OF_FILE;
     break;
   }
   default: {
     get_special();
     break;
   }
 } /* end switch */

 scan_token_debug();

 /* for the interpeter: If parsing a block, crunch the token code */
 /* and append it to the code buffer */
 if (block_flag) crunch_token();

 exit_debug("get_token");
}                                                         /* end get_token */
/***************************************************************************/



/***************************************************************************/
/* get_word  extract a word, shift into single case. If not a reserved     */
/*           set token to IDENTIFIER                                       */
/*                for ltx2x, added underscore as an allowed word character */

get_word()
{
 BOOLEAN is_reserved_word();
 entry_debug("get_word");
 scan_source_debug();

 /* extract the word */
 while ((char_code(ch) == LETTER) ||
        (char_code(ch) == DIGIT) ||
        (char_code(ch) == USCORE)) {
   *tokenp++ = ch;
   get_char();
 }
 *tokenp = EOS;
 shift_word();

 if (!is_reserved_word()) token = IDENTIFIER;

 exit_debug("get_word");
}                                                          /* end get_word */
/***************************************************************************/



/***************************************************************************/
/* shift_word()  copy a word token into word_string with all letters       */
/*               in the same (lower? higher?) case                         */

shift_word()
{
 int offset = 'a' - 'A';              /* offset to downshift a letter */
 char *wp = word_string;
 char *tp = token_string;

 /* copy word into word_string, shifting as we go */
 do {
   *wp++ = (*tp >= 'A') && (*tp <= 'Z')    /* check for wrong case letter */
             ? *tp + offset                /* shift and copy */
             : *tp;                        /* or just copy */
   ++tp;
 } while (*tp != EOS);
 *wp = EOS;

}                                                        /* end shift_word */
/***************************************************************************/



/***************************************************************************/
/* get_number  extract a number token and set literal to its value.        */
/*             set token to NUMBER_LITERAL                                 */

get_number()
{
 int whole_count = 0;           /* # of digits in whole part */
 int decimal_offset = 0;        /* # of digits to move decimal */
 char exponent_sign = '+';
 int exponent = 0;              /* value of exponent */
 XPRSAREAL nvalue = 0.0;            /* value of number */
 XPRSAREAL evalue = 0.0;            /* value of exponent */
 BOOLEAN saw_dotdot = FALSE;    /* TRUE if found .. */
 entry_debug("get_number");

 digit_count = 0;
 count_error = FALSE;
 token = NO_TOKEN;

 literal.type = INTEGER_LIT;

 /* extract the whole part of the number */
 accumulate_value(&nvalue, INVALID_NUMBER);
 if (token == ERROR) {
 exit_debug("get_number");
   return;
 }
 whole_count = digit_count;

 /* if current char is a . then either start of fraction or .. */
 if (ch == '.') {
   get_char();
   if (ch == '.') {
     /* have a .. token, backup bufferp so that the token can be */
     /* extracted next */
     saw_dotdot = TRUE;
     --bufferp;
   }
   else {           /* start of a fraction */
     literal.type = REAL_LIT;
     *tokenp++ = '.';
     /* accumulate fraction part */
     accumulate_value(&nvalue, INVALID_FRACTION);
     if (token == ERROR) return;
     decimal_offset = whole_count - digit_count;
   }
 }

 /* extract the exponent part, if any. None if seen a .. */
 if (!saw_dotdot && ((ch == 'E') || (ch == 'e'))) {
   literal.type = REAL_LIT;
   *tokenp++ = ch;
   get_char();

   /* get sign, if any */
   if ((ch == '+') || (ch == '-')) {
     *tokenp++ = exponent_sign = ch;
     get_char();
   }
   /* extract the exponent and accumulate into evalue */
   accumulate_value(&evalue, INVALID_EXPONENT);
   if (token == ERROR) return;
   if (exponent_sign == '-') evalue = -evalue;
 }

 /* Too many digits? */
 if (count_error) {
   error(TOO_MANY_DIGITS);
   token = ERROR;
 exit_debug("get_number");
   return;
 }

 /* adjust the number's value using decimal_offset and exponent */
 exponent = evalue + decimal_offset;
 if ((exponent + whole_count < -MAX_EXPONENT) ||
     (exponent + whole_count > MAX_EXPONENT)) {
   error(REAL_OUT_OF_RANGE);
   token = ERROR;
 exit_debug("get_number");
   return;
 }
 if (exponent != 0) {
   nvalue = nvalue*pow(10.0, (double) exponent);
 }

 /* set the literal's value */
 if (literal.type == INTEGER_LIT) {
   if ((nvalue < -MAX_INTEGER) ||
       (nvalue > MAX_INTEGER)) {
     error(INTEGER_OUT_OF_RANGE);
     token = ERROR;
 exit_debug("get_number");
     return;
   }
   literal.value.integer = (XPRSAINT) nvalue;
 }
 else {
   literal.value.real = nvalue;
 }

 *tokenp = EOS;
 token = NUMBER_LITERAL;

 exit_debug("get_number");

}                                                        /* end get_number */
/***************************************************************************/



/***************************************************************************/
/* get_string()  Extract a string token. Set token to STRING_LITERAL.      */
/*               Quotes are stored as part of token_string, but not        */
/*               as part of literal.value.string                           */

get_string()
{
 char *sp = literal.value.string;

 *tokenp++ = QUOTE_CHAR;
 get_char();

 /* extract the string (two consecutive quotes represent a single quote) */
 while (ch != EOF_CHAR) {
   if (ch == QUOTE_CHAR) {
     *tokenp++ = ch;
     get_char();
     if (ch != QUOTE_CHAR) break;
   }
   *tokenp++ = ch;
   *sp++ = ch;
   get_char();
 }
 *tokenp = EOS;
 *sp = EOS;
 token = STRING_LITERAL;
 literal.type = STRING_LIT;

 sprintf(dbuffer, "Scanned string: sp = %s,  tokenp = %s\n",
                   literal.value.string, token_string);
 debug_print(dbuffer);

}                                                        /* end get_string */
/***************************************************************************/



/***************************************************************************/
/* get_special() Extract a special token. Most are single chars, but some  */
/*               are multiple. Set appropriate token value                 */

get_special()
{
 *tokenp++ = ch;
 switch (ch) {
   case ')': token = RPAREN; get_char(); break;
   case '+': token = PLUS; get_char(); break;
   case '[': token = LBRACKET; get_char(); break;
   case ']': token = RBRACKET; get_char(); break;
   case ';': token = SEMICOLON; get_char(); break;
   case ',': token = COMMA; get_char(); break;
   case '.': token = PERIOD; get_char(); break;
   case '/': token = SLASH; get_char(); break;
             /* extra for EXPRESS */
   case '{': token = LBRACE; get_char(); break;
   case '}': token = RBRACE; get_char(); break;
   case '?': token = QUERY_CHAR; get_char(); break;
   case '%': token = PERCENT; get_char(); break;
   case '\\': token = BACKSLASH; get_char(); break;
   case '@': token = COMMERCIAL_AT; get_char(); break;
   case '!': token = EXCLAMATION; get_char(); break;
   case '"': token = DOUBLEQUOTE; get_char(); break;

   case '*': {       /* * (EXPRESS or ** or *) */
     get_char();
     if (ch == '*') {
       *tokenp++ = '*';
       token = STARSTAR;
       get_char();
     }
     else if (ch == ')') {
       *tokenp++ = ')';
       token = STARPAREN;
       get_char();
     }
     else {
       token = STAR;
     }
     break;
   }

   case '-': {       /* - (EXPRESS or -- or -> ) */
     get_char();
     if (ch == '-') {
       *tokenp++ = '-';
       token = MINUSMINUS;
       get_char();
     }
     else if (ch == '>') {
       *tokenp++ = '>';
       token = MINUSGT;
       get_char();
     }
     else {
       token = MINUS;
     }
     break;
   }

   case '=': {       /*  =  (EXPRESS == ) */
     get_char();
     if (ch == '=') {
       *tokenp++ = '=';
       token = EQUALEQUAL;
       get_char();
     }
     else {
       token = EQUAL;
     }
     break;
   }


   case ':': {       /* : or := (EXPRESS or :=: or :<>: ) */
     get_char();
     if (ch == '=') {
       *tokenp++ = '=';
       token = COLONEQUAL;
       get_char();
       if (ch == ':') {
         *tokenp++ = ':';
         token = COLONEQUALCOLON;
         get_char();
       }
     }
     else if (ch == '<') {
       get_char();
       if (ch == '>') {
         get_char();
         if (ch == ':') {
           *tokenp++ = '<';
           *tokenp++ = '>';
           *tokenp++ = ':';
           token = COLONNEQCOLON;
           get_token();
         }
       }
     }
     else {
       token = COLON;
     }
     break;
   }

   case '<': {        /* < or <= or <> (EXPRESS-I <- ) */
     get_char();
     if (ch == '=') {
       *tokenp++ = '=';
       token = LE;
       get_char();
     }
     else if (ch == '>') {
       *tokenp++ = '>';
       token = NE;
       get_char();
     }
     else if (ch == '-') {
       *tokenp++ = '-';
       token = LTMINUS;
       get_char();
     }
     else {
       token = LT;
     }
     break;
   }

   case '>': {        /* > or >=  */
     get_char();
     if (ch == '=') {
       *tokenp++ = '=';
       token = GE;
       get_char();
     }
     else token = GT;
     break;
   }


   case '(': {       /* ( (EXPRESS (* ) */
     get_char();
     if (ch == '*') {
       *tokenp++ = '*';
       token = PARENSTAR;
       get_char();
     }
     else {
       token = LPAREN;
     }
     break;
   }

   case '|': {       /*  (EXPRESS | or || ) */
     get_char();
     if (ch == '|') {
       *tokenp++ = '|';
       token = BARBAR;
       get_char();
     }
     else {
       token = BAR;
     }
     break;
   }


   default: {
     token = ERROR;
     get_char();
     break;
   }

 } /* end switch */
 *tokenp = EOS;
}                                                       /* end get_special */
/***************************************************************************/



/***************************************************************************/
/* accumulate_value(valuep, error_code) Extract a number part (digits)     */
/*                   and accumulate its value.                             */
/*          Error if first char not a digit                                */

accumulate_value(valuep, error_code)
XPRSAREAL *valuep;
ERROR_CODE error_code;
{
 XPRSAREAL value = *valuep;

 /* error if first char not a digit */
 if (char_code(ch) != DIGIT) {
   error(error_code);
   token = ERROR;
   return;
 }

 /* accumulate the value, provided not too many digits */
 do {
   *tokenp++ = ch;
   if (++digit_count <= MAX_DIGIT_COUNT) value = 10*value + (ch - '0');
   else count_error = TRUE;
   get_char();
 } while (char_code(ch) == DIGIT); /* end do */
 *valuep = value;


}                                                  /* end accumulate_value */
/***************************************************************************/


/* TOKEN TESTERS */


/***************************************************************************/
/* token_in(token_list) If the current token is in token_list              */
/* return TRUE, else FALSE.                                                */

BOOLEAN token_in(token_list)
TOKEN_CODE token_list[];
{
 TOKEN_CODE *atokenp;

 if (token_list == NULL) return(FALSE);
 for (atokenp = &token_list[0]; *atokenp; ++atokenp) {
   if (token == *atokenp) return(TRUE);
 }
 return(FALSE);
} /* end  */
/***************************************************************************/



/***************************************************************************/
/* synchronize(token_list1, token_list2, token_list3) If the current token */
/*    is not in any of the lists, flag it as an error. Then skip tokens    */
/*    until one of those in the lists is found                             */

synchronize(token_list1, token_list2, token_list3)
TOKEN_CODE token_list1[], token_list2[], token_list3[];
{
 BOOLEAN error_flag = (!token_in(token_list1)) &&
                      (!token_in(token_list2)) &&
                      (!token_in(token_list3));

 if (error_flag) {
/*    error(token == END_OF_FILE ? UNEXPECTED_END_OF_FILE
*                               : UNEXPECTED_TOKEN);
*/
   if (token == END_OF_FILE) error(UNEXPECTED_END_OF_FILE);
   else if (token == ENDCODE) error(UNEXPECTED_ENDCODE);
   else error(UNEXPECTED_TOKEN);
   /* skip tokens to synchronize */
   while ((!token_in(token_list1)) &&
          (!token_in(token_list2)) &&
          (!token_in(token_list3)) &&
          ((token != END_OF_FILE) || (token != ENDCODE))) {
     get_token();
   }
 }
} /* end synchronize */
/***************************************************************************/



/***************************************************************************/
/* is_reserved_word() If token is a reserved word, set token and           */
/* return TRUE, else return FALSE                                          */

BOOLEAN is_reserved_word()
{
 int word_length = strlen(word_string);
 RW_STRUCT *rwp;

 /* is length in range? */
 if ((word_length >= MIN_RESERVED_WORD_LENGTH) &&
     (word_length <= MAX_RESERVED_WORD_LENGTH)) { /* check in approp. word list */
   for (rwp = rw_table[word_length]; rwp->string != NULL; ++rwp) {
     if (strcmp(word_string, rwp->string) == 0) {
       token = rwp->token_code;
       return(TRUE);
     }
   }
 }
 return(FALSE);
}                                                  /* end is_reserved_word */
/***************************************************************************/


/* SOURCE FILE ROUTINES */


/***************************************************************************/
/* open_source_file(name) Open the named file and fetch the first char     */

open_source_file(name)
char *name;
{
 if ((name == NULL) || ((source_file = fopen(name, "r")) == NULL)) {
   error(FAILED_SOURCE_FILE_OPEN);
   exit(-FAILED_SOURCE_FILE_OPEN);
 }

 /* get the first character */
 bufferp = "";
 get_char();
}                                                  /* end open_source_file */
/***************************************************************************/



/***************************************************************************/
/* close_source_file()                                                     */

close_source_file()
{
 fclose(source_file);
}                                                 /* end close_source_file */
/***************************************************************************/



/***************************************************************************/
/* get_source_line()  Read the next line from the source file. If there is */
/*                    one, print it out and return TRUE.                   */
/* return FALSE for end of file                                            */

BOOLEAN get_source_line()
{
 char print_buffer[MAX_SOURCE_LINE_LENGTH + 9];
 entry_debug("get_source_line");

 if ((fgets(source_buffer, MAX_SOURCE_LINE_LENGTH, source_file)) != NULL) {
   ++line_number;

   if (print_flag) {
     sprintf(print_buffer, "%4d %d: %s", line_number, level, source_buffer);
     print_line(print_buffer);
   }
   exit_debug("get_source_line");
   return(TRUE);
 }
 exit_debug("get_source_line");
 return(FALSE);
}                                                   /* end get_source_line */
/***************************************************************************/


/* PRINTOUT ROUTINES */


/***************************************************************************/
/* print_line(line)  Print out a line.                                     */

print_line(line)
char line[];
{
 char save_ch;
 char *save_chp = NULL;
 entry_debug("print_line");

 if (strlen(line) > MAX_PRINT_LINE_LENGTH) {
   save_chp = &line[MAX_PRINT_LINE_LENGTH];
   save_ch = *save_chp;
   *save_chp = EOS;
 }
/*  fprintf(filout, "%s", line); */
   /* ltx2x change, print to stdout, not o/p file */
/*  printf("%s", line);  just print to error file */
 fprintf(ferr,   "%s", line);     /* PW addition -> error file */

 if (save_chp) *save_chp = save_ch;

 exit_debug("print_line");
 return;
}                                                        /* end print_line */
/***************************************************************************/




/***************************************************************************/
/* tok_code_to_str(tok)    Returns a string wrt a TOKEN_CODE               */

char *tok_code_to_str(tok)
TOKEN_CODE tok;
{

 if ((tok >= NO_TOKEN) && (tok <= EOTC)) return(tok_code_strs[tok]);

 return(NULL);

}                                                   /* end tok_code_to_str */
/***************************************************************************/