* TADS vocabulary checker

/*
* TADS vocabulary checker
* 23-May-94 Dave Baggett <[email protected]>
*
* This program examines TADS code vocabulary properties and gives warnings
* when multiple vocabulary words are identical given six characters of
* significance and case-independence. E.g., the definition
*
* noun = 'transmogrifier' 'Transmogrifyer'
*
* will trigger a warning because, ignoring case, the two words do not
* differ in the first six characters as the TADS parser requires.
*
* Definitions like the above cause the now-infamous
*
* Which transmogrifier do you mean: the transmogrifier, or the transmogrifier?
*
* problem, where TADS assigns two different (but identical to six characters)
* vocabulary items to the same object.
*
* ----------------------------------------------------------------------
* HOW TO USE THIS PROGRAM
*
* This is a Unix filter. You pipe stuff to it and it outputs warnings
* if it sees things wrong. E.g.,
*
* % vocab < cheez.t > errors.out
*
* or
*
* % cat cheez.t | something | vocab | more
*
* etc.
* ----------------------------------------------------------------------
* HOW TO COMPILE THIS PROGRAM
*
* To compile this program, you need either the "lex" or "flex" lexical
* anaylzer generators. Fortunately, these are standard Unix tools, so
* on most machines the following should work:
*
* % flex vocab.l *OR* % lex vocab.l
*
* Then run your system's C compiler on the resulting lex.yy.c file. One
* of the following commands should accomplish this:
*
* % cc lex.yy.c -o vocab -ll *OR* % gcc lex.yy.c -o vocab -ll
*
* That oughta do it! If it doesn't, ask your system administrator
* for help, or send me email.
*
* ----------------------------------------------------------------------
* BUT I DON'T HAVE UNIX!
*
* Well, too bad. This would have been a pain to write in straight C,
* so you get what you pay for. However, if you want to run this program
* on a 386 or later PC compatible, you're in luck -- send me email for
* details. Otherwise, you'll have to ask people who know how to get
* flex running on your computer how to do it.
* ----------------------------------------------------------------------
* TERMS
*
* This program is in the public domain.
*
*/
BW [ \n]
SQ [']
CO (("//".*\n)|("/*"([^*]|"\n"|("*"[^/]))*"*"+"/"))
CH ([^\n\t']|(\\'))

PROP ("noun"|"plural")

%{
int line = 1;
%}

%%

{PROP}{BW}+"="(({BW}|{CO})*{SQ}{CH}+{SQ})+ {
check(yytext);
}

\n {
line++;
}

{

}

%%

#define MAX_WORDS 256 /* max # vocabulary words for single property */
#define MAX_LENGTH 128 /* max vocab word length */

#define quote(c) ((c) == '\"' ? '\"' : ((c) == '\'' ? '\'' : (c) - 'a'))

check(s)
char *s;
{
static char word[MAX_WORDS][7];
static char orig[MAX_WORDS][MAX_LENGTH];

char *sbase = s;
int i, j, w = 0;
int lineadd = 0;

/*
* Count newlines in this string
*/
for (; *s; s++)
if (*s == '\n')
lineadd++;

/*
* Erase comments
*/
for (s = sbase; *s;) {
/*
* Find next /
*/
for (; *s && *s != '/'; s++)
;

if (!*s)
break;

/*
* If followed by /, skip to newline.
* If followed by *, skip to end-comment (star-slash).
*/
if (s[1] == '/') {
for (; *s && *s != '\n'; s++)
*s = ' ';

if (*s)
*s = ' ';
}
else if (s[1] == '*') {
for (; *s && s[1] && !(*s == '*' && s[1] == '/'); s++)
*s = ' ';

*s = ' ';
if (s[1])
s[1] = ' ';
}
else
s++;
}

/*
* Build word list, significant to six characters.
*/
for (s = sbase; *s; ) {
/*
* Find start of next word (= single quoted string).
*/
for (; *s && *s != '\''; s++)
;

if (!*s)
break;

/*
* Copy characters from string to current word list entry.
*/
s++;
for (i = 0; *s; i++, s++) {
char c;

if (*s == '\'') {
s++;
break;
}
else if (*s == '\\') {
c = quote(s[1]);
s++;
}
else
c = *s;

orig[w][i] = c;
if (i < 6) {
if (isupper(c))
word[w][i] = tolower(c);
else
word[w][i] = c;
}
}

orig[w][i] = 0;
word[w][i <= 6 ? i : 6] = 0;
w++;
}

/*
* Compare each word to every other word. O(n^2)
*
* You could do this faster [O(n lg n)] by first sorting and
* then checking only adjacent words for similarity. I
* didn't feel like bothering with it.
*/
for (i = 0; i < w - 1; i++) {
for (j = i + 1; j < w; j++) {
if (!strcmp(word[i], word[j])) {
printf("line %d: duplicate vocabulary:", line);
printf(" [%s], [%s] -> [%s]\n", orig[i], orig[j], word[i]);
break;
}
}
}

line += lineadd;
}

main()
{
yylex();
}