/*
* TADS vocabulary checker
* 23-May-94  Dave Baggett <[email protected]>
*
* This program examines TADS code vocabulary properties and gives warnings
* when multiple vocabulary words are identical given six characters of
* significance and case-independence.  E.g., the definition
*
*     noun = 'transmogrifier' 'Transmogrifyer'
*
* will trigger a warning because, ignoring case, the two words do not
* differ in the first six characters as the TADS parser requires.
*
* Definitions like the above cause the now-infamous
*
* Which transmogrifier do you mean: the transmogrifier, or the transmogrifier?
*
* problem, where TADS assigns two different (but identical to six characters)
* vocabulary items to the same object.
*
* ----------------------------------------------------------------------
* HOW TO USE THIS PROGRAM
*
* This is a Unix filter.  You pipe stuff to it and it outputs warnings
* if it sees things wrong.  E.g.,
*
* % vocab < cheez.t > errors.out
*
* or
*
* % cat cheez.t | something | vocab | more
*
* etc.
* ----------------------------------------------------------------------
* HOW TO COMPILE THIS PROGRAM
*
* To compile this program, you need either the "lex" or "flex" lexical
* anaylzer generators.  Fortunately, these are standard Unix tools, so
* on most machines the following should work:
*
* % flex vocab.l       *OR*    % lex vocab.l
*
* Then run your system's C compiler on the resulting lex.yy.c file.  One
* of the following commands should accomplish this:
*
* % cc lex.yy.c -o vocab -ll  *OR*  % gcc lex.yy.c -o vocab -ll
*
* That oughta do it!  If it doesn't, ask your system administrator
* for help, or send me email.
*
* ----------------------------------------------------------------------
* BUT I DON'T HAVE UNIX!
*
* Well, too bad.  This would have been a pain to write in straight C,
* so you get what you pay for.  However, if you want to run this program
* on a 386 or later PC compatible, you're in luck -- send me email for
* details.  Otherwise, you'll have to ask people who know how to get
* flex running on your computer how to do it.
* ----------------------------------------------------------------------
* TERMS
*
* This program is in the public domain.
*
*/
BW [    \n]
SQ [']
CO (("//".*\n)|("/*"([^*]|"\n"|("*"[^/]))*"*"+"/"))
CH ([^\n\t']|(\\'))

PROP ("noun"|"plural")

%{
       int line = 1;
%}

%%

{PROP}{BW}+"="(({BW}|{CO})*{SQ}{CH}+{SQ})+ {
       check(yytext);
}

\n {
       line++;
}

{

}

%%

#define MAX_WORDS 256   /* max # vocabulary words for single property */
#define MAX_LENGTH 128  /* max vocab word length */

#define quote(c) ((c) == '\"' ? '\"' : ((c) == '\'' ? '\'' : (c) - 'a'))

check(s)
       char    *s;
{
       static char word[MAX_WORDS][7];
       static char orig[MAX_WORDS][MAX_LENGTH];

       char    *sbase = s;
       int     i, j, w = 0;
       int     lineadd = 0;

       /*
        * Count newlines in this string
        */
       for (; *s; s++)
               if (*s == '\n')
                       lineadd++;

       /*
        * Erase comments
        */
       for (s = sbase; *s;) {
               /*
                * Find next /
                */
               for (; *s && *s != '/'; s++)
                       ;


               if (!*s)
                       break;

               /*
                * If followed by /, skip to newline.
                * If followed by *, skip to end-comment (star-slash).
                */
               if (s[1] == '/') {
                       for (; *s && *s != '\n'; s++)
                               *s = ' ';

                       if (*s)
                               *s = ' ';
               }
               else if (s[1] == '*') {
                       for (; *s && s[1] && !(*s == '*' && s[1] == '/'); s++)
                               *s = ' ';

                       *s = ' ';
                       if (s[1])
                               s[1] = ' ';
               }
               else
                       s++;
       }

       /*
        * Build word list, significant to six characters.
        */
       for (s = sbase; *s; ) {
               /*
                * Find start of next word (= single quoted string).
                */
               for (; *s && *s != '\''; s++)
                       ;

               if (!*s)
                       break;

               /*
                * Copy characters from string to current word list entry.
                */
               s++;
               for (i = 0; *s; i++, s++) {
                       char    c;

                       if (*s == '\'') {
                               s++;
                               break;
                       }
                       else if (*s == '\\') {
                               c = quote(s[1]);
                               s++;
                       }
                       else
                               c = *s;

                       orig[w][i] = c;
                       if (i < 6) {
                               if (isupper(c))
                                       word[w][i] = tolower(c);
                               else
                                       word[w][i] = c;
                       }
               }

               orig[w][i] = 0;
               word[w][i <= 6 ? i : 6] = 0;
               w++;
       }

       /*
        * Compare each word to every other word.  O(n^2)
        *
        * You could do this faster [O(n lg n)] by first sorting and
        * then checking only adjacent words for similarity.  I
        * didn't feel like bothering with it.
        */
       for (i = 0; i < w - 1; i++) {
               for (j = i + 1; j < w; j++) {
                       if (!strcmp(word[i], word[j])) {
                               printf("line %d: duplicate vocabulary:", line);
                               printf(" [%s], [%s] -> [%s]\n", orig[i], orig[j], word[i]);
                               break;
                       }
               }
       }

       line += lineadd;
}

main()
{
       yylex();
}