# line 7 "xxx.l"
/*
* detex [-e environment-list] [-c] [-l] [-n] [-s] [-t] [-w] [file[.tex]]
*
* This program is used to remove TeX or LaTeX constructs from a text
* file.
*
* Written by:
* Daniel Trinkle
* Department of Computer Science
* Purdue University
*
*/
#define LaBEGIN if (fLatex) BEGIN
#define CITEBEGIN if (fLatex && !fCite) BEGIN
#define IGNORE if (fSpace && !fWord) putchar(' ')
#define SPACE if (!fWord) putchar(' ')
#define NEWLINE if (!fWord) putchar('\n')
char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */
char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */
char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */
char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */
char *sbProgName; /* name we were invoked with */
FILE *rgfp[NOFILE+1]; /* stack of input/include files */
int cfp = 0; /* count of files in stack */
int cOpenBrace = 0; /* count of `{' in <SG> */
int csbEnvIgnore; /* count of environments ignored */
int csbIncList = 0; /* count of includeonly files */
int csbInputPaths; /* count of input paths */
int fLatex = 0; /* flag to indicated delatex */
int fWord = 0; /* flag for -w option */
int fFollow = 1; /* flag to follow input/include */
int fCite = 0; /* flag to echo \cite and \ref args */
int fSpace = 0; /* flag to replace \cs with space */
int fForcetex = 0; /* flag to inhibit latex mode */
# define SJ 2
# define SK 4
# define SL 6
# define SM 8
# define SN 10
# define SO 12
# define SP 14
# define SA 16
# define SB 18
# define SC 20
# define SD 22
# define SE 24
# define SF 26
# define SH 28
# define SG 30
# define SI 32
# define YYNEWLINE 10
yylex(){
int nstr; extern int yyprevious;
#ifdef __cplusplus
/* to avoid CC and lint complaining yyfussy not being used ...*/
static int __lex_hack = 0;
if (__lex_hack) goto yyfussy;
#endif
while((nstr = yylook()) >= 0)
yyfussy: switch(nstr){
case 0:
if(yywrap()) return(0); break;
case 1:
# line 75 "xxx.l"
/* ignore comments */ ;
break;
case 2:
# line 77 "xxx.l"
{fLatex = !fForcetex; IGNORE;}
break;
case 3:
# line 79 "xxx.l"
/* environment start */ {LaBEGIN SA; IGNORE;}
break;
case 4:
# line 81 "xxx.l"
{ if (BeginEnv("verbatim"))
BEGIN SD;
else
BEGIN SI;
IGNORE;
}
break;
case 5:
# line 88 "xxx.l"
/* verbatim mode */ {BEGIN SO; IGNORE;}
break;
case 6:
# line 89 "xxx.l"
ECHO;
break;
case 7:
# line 91 "xxx.l"
{ if (BeginEnv(yytext))
BEGIN SD;
else
BEGIN SH;
IGNORE;
}
break;
case 8:
# line 97 "xxx.l"
NEWLINE;
break;
case 9:
# line 98 "xxx.l"
;
break;
case 10:
# line 100 "xxx.l"
/* absorb some environments */ {LaBEGIN SC; IGNORE;}
break;
case 11:
# line 101 "xxx.l"
NEWLINE;
break;
case 12:
# line 102 "xxx.l"
;
break;
case 13:
# line 104 "xxx.l"
/* end environment */ { if (EndEnv(yytext))
BEGIN SO;
IGNORE;
}
break;
case 14:
# line 108 "xxx.l"
{BEGIN SD; IGNORE;}
break;
case 15:
# line 109 "xxx.l"
NEWLINE;
break;
case 16:
# line 110 "xxx.l"
;
break;
case 17:
# line 112 "xxx.l"
/* ignore args */ {LaBEGIN SG; IGNORE;}
break;
case 18:
# line 113 "xxx.l"
/* of these \cs */ {LaBEGIN SH; IGNORE;}
break;
case 19:
# line 114 "xxx.l"
{LaBEGIN SH; IGNORE;}
break;
case 20:
# line 115 "xxx.l"
{CITEBEGIN SG; IGNORE;}
break;
case 21:
# line 116 "xxx.l"
{LaBEGIN SH; IGNORE;}
break;
case 22:
# line 117 "xxx.l"
{LaBEGIN SH; IGNORE;}
break;
case 23:
# line 118 "xxx.l"
{SPACE;}
break;
case 24:
# line 119 "xxx.l"
{LaBEGIN SG; SPACE;}
break;
case 25:
# line 120 "xxx.l"
{LaBEGIN SH; IGNORE;}
break;
case 26:
# line 121 "xxx.l"
{CITEBEGIN SH; IGNORE;}
break;
case 27:
# line 122 "xxx.l"
{LaBEGIN SH; IGNORE;}
break;
case 28:
# line 123 "xxx.l"
{CITEBEGIN SH; IGNORE;}
break;
case 29:
# line 124 "xxx.l"
{LaBEGIN SH; IGNORE;}
break;
case 30:
# line 125 "xxx.l"
/* ignore \verb<ch>...<ch> */ { if (fLatex) {
char verbchar, c;
verbchar = input();
while ((c = input()) != verbchar)
if (c == '\n')
NEWLINE;
}
IGNORE;
}
break;
case 31:
# line 134 "xxx.l"
BEGIN SO;
break;
case 32:
# line 135 "xxx.l"
NEWLINE;
break;
case 33:
# line 136 "xxx.l"
;
break;
case 34:
# line 137 "xxx.l"
{ cOpenBrace++; }
break;
case 35:
# line 138 "xxx.l"
{ cOpenBrace--;
if (cOpenBrace == 0)
BEGIN SO;
}
break;
case 36:
# line 142 "xxx.l"
NEWLINE;
break;
case 37:
# line 143 "xxx.l"
;
break;
case 38:
# line 145 "xxx.l"
/* ignore def begin */ {BEGIN SJ; IGNORE;}
break;
case 39:
# line 146 "xxx.l"
BEGIN SO;
break;
case 40:
# line 147 "xxx.l"
NEWLINE;
break;
case 41:
# line 148 "xxx.l"
;
break;
case 42:
# line 150 "xxx.l"
/* formula mode */ {LaBEGIN SE; IGNORE;}
break;
case 43:
# line 151 "xxx.l"
BEGIN SO;
break;
case 44:
# line 152 "xxx.l"
NEWLINE;
break;
case 45:
# line 153 "xxx.l"
;
break;
case 46:
# line 155 "xxx.l"
/* display mode */ {LaBEGIN SB; IGNORE;}
break;
case 47:
# line 156 "xxx.l"
BEGIN SO;
break;
case 48:
# line 157 "xxx.l"
NEWLINE;
break;
case 49:
# line 158 "xxx.l"
;
break;
case 50:
# line 160 "xxx.l"
/* display mode */ {BEGIN SK; IGNORE;}
break;
case 51:
# line 161 "xxx.l"
BEGIN SO;
break;
case 52:
# line 162 "xxx.l"
NEWLINE;
break;
case 53:
# line 163 "xxx.l"
;
break;
case 54:
# line 165 "xxx.l"
/* math mode */ {BEGIN SN; IGNORE;}
break;
case 55:
# line 166 "xxx.l"
BEGIN SO;
break;
case 56:
# line 167 "xxx.l"
NEWLINE;
break;
case 57:
# line 168 "xxx.l"
;
break;
case 58:
# line 169 "xxx.l"
;
break;
case 59:
# line 171 "xxx.l"
/* process files */ {LaBEGIN SF; IGNORE;}
break;
case 60:
# line 172 "xxx.l"
{ IncludeFile(yytext);
BEGIN SO;
}
break;
case 61:
# line 175 "xxx.l"
NEWLINE;
break;
case 62:
# line 176 "xxx.l"
;
break;
case 63:
# line 178 "xxx.l"
{BEGIN SL; IGNORE;}
break;
case 64:
# line 179 "xxx.l"
AddInclude(yytext);
break;
case 65:
# line 180 "xxx.l"
{ if (csbIncList == 0)
rgsbIncList[csbIncList++] = '\0';
BEGIN SO;
}
break;
case 66:
# line 184 "xxx.l"
NEWLINE;
break;
case 67:
# line 185 "xxx.l"
;
break;
case 68:
# line 187 "xxx.l"
{BEGIN SM; IGNORE;}
break;
case 69:
# line 188 "xxx.l"
{ InputFile(yytext);
BEGIN SO;
}
break;
case 70:
# line 191 "xxx.l"
NEWLINE;
break;
case 71:
# line 192 "xxx.l"
;
break;
case 72:
# line 194 "xxx.l"
/* handle ligatures */ {(void)printf("%.2s", yytext+1);}
break;
case 73:
# line 195 "xxx.l"
{(void)printf("%.1s", yytext+1);}
break;
case 74:
# line 197 "xxx.l"
/* ignore other \cs */ {BEGIN SP; IGNORE;}
break;
case 75:
# line 198 "xxx.l"
SPACE;
break;
case 76:
# line 199 "xxx.l"
IGNORE;
break;
case 77:
# line 200 "xxx.l"
IGNORE;
break;
case 78:
# line 201 "xxx.l"
IGNORE;
break;
case 79:
# line 202 "xxx.l"
{BEGIN SO; NEWLINE;}
break;
case 80:
# line 203 "xxx.l"
{BEGIN SO; IGNORE;}
break;
case 81:
# line 204 "xxx.l"
{yyless(0);BEGIN SO;}
break;
case 82:
# line 206 "xxx.l"
/* special characters */ IGNORE;
break;
case 83:
# line 207 "xxx.l"
IGNORE;
break;
case 84:
# line 208 "xxx.l"
SPACE;
break;
case 85:
# line 210 "xxx.l"
{ if (fWord)
(void)printf("%s\n", yytext);
else
ECHO;
}
break;
case 86:
# line 215 "xxx.l"
if (!fWord) ECHO;
break;
case 87:
# line 216 "xxx.l"
if (!fWord) ECHO;
break;
case -1:
break;
default:
(void)fprintf(yyout,"bad switch yylook %d",nstr);
} return(0); }
/* end of yylex */
/******
** main --
** Set sbProgName to the base of arg 0.
** Set the input paths.
** Check for options
** -c echo LaTeX \cite, \ref, and \pageref values
** -e <env-list> list of LaTeX environments to ignore
** -l force latex mode
** -n do not follow \input and \include
** -s replace control sequences with space
** -t force tex mode
** -w word only output
** Set the list of LaTeX environments to ignore.
** Process each input file.
** If no input files are specified on the command line, process stdin.
******/
/* get base name and decide what we are doing, detex or delatex */
#ifdef OS2
char drive[_MAX_DRIVE], dir[_MAX_DIR];
char fname[_MAX_FNAME], ext[_MAX_EXT];
#ifdef __EMX__
_wildcard(&cArgs, &rgsbArgs);
_response(&cArgs, &rgsbArgs);
#endif
_splitpath (rgsbArgs[0], drive, dir, fname, ext);
sbProgName = strlwr(fname);
#else
if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL)
sbProgName++;
else
sbProgName = rgsbArgs[0];
#endif
if (strcmp("delatex",sbProgName) == 0)
fLatex = 1;
/* set rgsbInputPaths for use with TexOpen() */
SetInputPaths();
/* process command line options */
while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) {
while (*++pch)
switch (*pch) {
case CHCITEOPT:
fCite = 1;
break;
case CHENVOPT:
sbEnvList = rgsbArgs[++iArgs];
break;
case CHLATEXOPT:
fLatex = 1;
break;
case CHNOFOLLOWOPT:
fFollow = 0;
break;
case CHSPACEOPT:
fSpace = 1;
break;
case CHTEXOPT:
fForcetex = 1;
break;
case CHWORDOPT:
fWord = 1;
break;
default:
#ifdef OS2
OS2UsageExit();
#else
sbBadOpt[0] = *pch;
sbBadOpt[1] = '\0';
Warning("unknown option ignored -", sbBadOpt);
#endif
}
iArgs++;
}
SetEnvIgnore(sbEnvList);
/* process input files */
for (; iArgs < cArgs; iArgs++) {
fSawFile++;
if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) {
Warning("can't open file", rgsbArgs[iArgs]);
continue;;
}
BEGIN SO;
(void)yylex();
}
/* if there were no input files, assume stdin */
if (!fSawFile) {
yyin = stdin;
#ifdef OS2
if (isatty(fileno(stdin)))
OS2UsageExit();
#endif
BEGIN SO;
(void)yylex();
}
#ifndef FLEX_SCANNER
if (YYSTATE != SO)
ErrorExit("input contains an unterminated mode or environment");
#endif
return(0);
}
#ifdef FLEX_SCANNER
#undef yywrap
#endif
/******
** yywrap -- handles EOF for lex. Check to see if the stack of open files
** has anything on it. If it does, set yyin to the to value. If not
** return the termination signal for lex.
******/
sb = SafeMalloc(strlen(sbEnvList) + 1, "malloc for SetEnvIgnore failed");
(void) strcpy(sb, sbEnvList);
csbEnvIgnore = SeparateList(sb, rgsbEnvIgnore, CHENVSEP, MAXENVS);
if (csbEnvIgnore == ERROR)
ErrorExit("The environtment list contains too many environments");
}
/******
** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore. If it
** is, sbCurrentEnv is set to sbEnv.
******/
BeginEnv(sbEnv)
char *sbEnv;
{
int i;
if (!fLatex) return(0);
for (i = 0; i < csbEnvIgnore; i++)
if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) {
(void)strcpy(sbCurrentEnv, sbEnv);
return(1);
}
return(0);
}
/******
** EndEnv -- checks to see if sbEnv is the current environment being ignored.
******/
EndEnv(sbEnv)
char *sbEnv;
{
if (!fLatex) return(0);
if (strcmp(sbEnv, sbCurrentEnv) == 0)
return(1);
return(0);
}
/******
** InputFile -- push the current yyin and open sbFile. If the open fails,
** the sbFile is ignored.
******/
if (!fFollow)
return;
if (!InList(sbFile))
return;
rgfp[cfp++] = yyin;
if ((yyin = TexOpen(sbFile)) == NULL) {
Warning("can't open \\include file", sbFile);
yyin = rgfp[--cfp];
}
}
/******
** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList.
** If the include list is too long, sbFile is ignored.
******/
AddInclude(sbFile)
char *sbFile;
{
if (!fFollow)
return;
if (csbIncList >= MAXINCLIST)
Warning("\\includeonly list is too long, ignoring", sbFile);
rgsbIncList[csbIncList] = SafeMalloc(strlen(sbFile) + 1, "malloc for AddInclude failed");
(void)strcpy(rgsbIncList[csbIncList++], sbFile);
}
/******
** InList -- checks to see if sbFile is in the rgsbIncList. If there is
** no list, all files are assumed to be "in the list".
******/
InList(sbFile)
char *sbFile;
{
char *pch, sbBase[MAXPATHLEN];
int i;
if (csbIncList == 0) /* no list */
return(1);
(void)strcpy(sbBase, sbFile);
if ((pch = rindex(sbBase, '.')) != NULL)
*pch = '\0';
i = 0;
while ((i < csbIncList) && rgsbIncList[i])
if (strcmp(rgsbIncList[i++], sbBase) == 0)
return(1);
return(0);
}
/******
** SetInputPaths -- sets rgsbInputPaths to the values indicated by the
** TEXINPUTS environment variable if set or else DEFAULTINPUTS. If
** the user's TEXINPUTS has a leading ':' prepend the DEFAULTINPUTS
** to the path, if there is a trailing ':' append the DEFAULTINPUTS.
** This is consistent with the most recent TeX. However, this
** routine does not honor the '//' construct (expand subdirs).
******/
SetInputPaths()
{
char *sb, *sbPaths, *getenv();
int cchDefaults, cchPaths;
cchDefaults = strlen(DEFAULTINPUTS);
#ifdef OS2
if ((sb = getenv("TEXINPUT")) == NULL)
#endif
if ((sb = getenv("TEXINPUTS")) == NULL)
sb = DEFAULTINPUTS;
cchPaths = strlen(sb);
if (sb[0] == CHPATHSEP)
cchPaths += cchDefaults;
if (sb[strlen(sb) - 1] == CHPATHSEP)
cchPaths += cchDefaults;
sbPaths = SafeMalloc(cchPaths + 1, "malloc for SetInputPaths failed");
sbPaths[0] = '\0';
if (sb[0] == CHPATHSEP)
(void)strcat(sbPaths, DEFAULTINPUTS);
(void)strcat(sbPaths, sb);
if (sb[strlen(sb) - 1] == CHPATHSEP)
(void)strcat(sbPaths, DEFAULTINPUTS);
csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS);
if (csbInputPaths == ERROR)
#ifdef OS2
ErrorExit("TEXINPUT(S) environment variable has too many paths");
#else
ErrorExit("TEXINPUTS environment variable has too many paths");
#endif
}
/******
** SeparateList -- takes a chSep separated list sbList, replaces the
** chSep's with NULLs and sets rgsbList[i] to the beginning of
** the ith word in sbList. The number of words is returned. A
** ERROR is returned if there are more than csbMax words.
******/
SeparateList(sbList, rgsbList, chSep, csbMax)
char *sbList, *rgsbList[], chSep;
int csbMax;
{
int csbList = 0;
/******
** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn.
** For each input path the following order is used:
** file.tex - must be as named, if not there go to the next path
** file.ext - random extension, try it
** file - base name, add .tex and try it
** file - try it as is
** Notice that if file exists in the first path and file.tex exists in
** one of the other paths, file in the first path is what is opened.
** If the sbFile begins with a '/', no paths are searched.
******/
/* If sbFile ends in .tex then it must be there */
if ((pch = rindex(sbFullPath, '.')) != NULL
&& (strcmp(pch, ".tex") == 0))
if ((fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
else
continue;
/* if .<ext> then try to open it. the '.' represents */
/* the beginning of an extension if it is not the first */
/* character and it does not follow a '.' or a '/' */
if (pch != NULL && pch > &(sbFullPath[0])
&& *(pch - 1) != '.' && *(pch - 1) != '/'
&& (fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
/* just base name, add .tex to the name */
sbNew = SafeMalloc(strlen(sbFullPath) + 5, "malloc for TexOpen failed");
(void)strcpy(sbNew, sbFullPath);
(void)strcat(sbNew, ".tex");
if ((fp = fopen(sbNew, "r")) != NULL)
return(fp);
/* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T */
/* The copyright notice above does not evidence any */
/* actual or intended publication of such source code. */