/*
* detex [-e environment-list] [-c] [-l] [-n] [-s] [-t] [-w] [file[.tex]]
*
* This program is used to remove TeX or LaTeX constructs from a text
* file.
*
* Written by:
* Daniel Trinkle
* Department of Computer Science
* Purdue University
*
*/
#define LaBEGIN if (fLatex) BEGIN
#define CITEBEGIN if (fLatex && !fCite) BEGIN
#define IGNORE if (fSpace && !fWord) putchar(' ')
#define SPACE if (!fWord) putchar(' ')
#define NEWLINE if (!fWord) putchar('\n')
char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */
char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */
char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */
char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */
char *sbProgName; /* name we were invoked with */
FILE *rgfp[NOFILE+1]; /* stack of input/include files */
int cfp = 0; /* count of files in stack */
int cOpenBrace = 0; /* count of `{' in <LaMacro2> */
int csbEnvIgnore; /* count of environments ignored */
int csbIncList = 0; /* count of includeonly files */
int csbInputPaths; /* count of input paths */
int fLatex = 0; /* flag to indicated delatex */
int fWord = 0; /* flag for -w option */
int fFollow = 1; /* flag to follow input/include */
int fCite = 0; /* flag to echo \cite and \ref args */
int fSpace = 0; /* flag to replace \cs with space */
int fForcetex = 0; /* flag to inhibit latex mode */
%}
S [ \t\n]*
W [a-zA-Z]+
%Start Define Display IncludeOnly Input Math Normal Control
%Start LaBegin LaDisplay LaEnd LaEnv LaFormula LaInclude
%Start LaMacro LaMacro2 LaVerbatim
<Normal>[{}\\|] /* special characters */ IGNORE;
<Normal>[!?]"`" IGNORE;
<Normal>~ SPACE;
<Normal>{W}[']*{W} { if (fWord)
(void)printf("%s\n", yytext);
else
ECHO;
}
<Normal>[0-9]+ if (!fWord) ECHO;
<Normal>(.|\n) if (!fWord) ECHO;
%%
/******
** main --
** Set sbProgName to the base of arg 0.
** Set the input paths.
** Check for options
** -c echo LaTeX \cite, \ref, and \pageref values
** -e <env-list> list of LaTeX environments to ignore
** -l force latex mode
** -n do not follow \input and \include
** -s replace control sequences with space
** -t force tex mode
** -w word only output
** Set the list of LaTeX environments to ignore.
** Process each input file.
** If no input files are specified on the command line, process stdin.
******/
/* get base name and decide what we are doing, detex or delatex */
#ifdef OS2
char drive[_MAX_DRIVE], dir[_MAX_DIR];
char fname[_MAX_FNAME], ext[_MAX_EXT];
#ifdef __EMX__
_wildcard(&cArgs, &rgsbArgs);
_response(&cArgs, &rgsbArgs);
#endif
_splitpath (rgsbArgs[0], drive, dir, fname, ext);
sbProgName = strlwr(fname);
#else
if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL)
sbProgName++;
else
sbProgName = rgsbArgs[0];
#endif
if (strcmp("delatex",sbProgName) == 0)
fLatex = 1;
/* set rgsbInputPaths for use with TexOpen() */
SetInputPaths();
/* process command line options */
while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) {
while (*++pch)
switch (*pch) {
case CHCITEOPT:
fCite = 1;
break;
case CHENVOPT:
sbEnvList = rgsbArgs[++iArgs];
break;
case CHLATEXOPT:
fLatex = 1;
break;
case CHNOFOLLOWOPT:
fFollow = 0;
break;
case CHSPACEOPT:
fSpace = 1;
break;
case CHTEXOPT:
fForcetex = 1;
break;
case CHWORDOPT:
fWord = 1;
break;
default:
#ifdef OS2
OS2UsageExit();
#else
sbBadOpt[0] = *pch;
sbBadOpt[1] = '\0';
Warning("unknown option ignored -", sbBadOpt);
#endif
}
iArgs++;
}
SetEnvIgnore(sbEnvList);
/* process input files */
for (; iArgs < cArgs; iArgs++) {
fSawFile++;
if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) {
Warning("can't open file", rgsbArgs[iArgs]);
continue;;
}
BEGIN Normal;
(void)yylex();
}
/* if there were no input files, assume stdin */
if (!fSawFile) {
yyin = stdin;
#ifdef OS2
if (isatty(fileno(stdin)))
OS2UsageExit();
#endif
BEGIN Normal;
(void)yylex();
}
#ifndef FLEX_SCANNER
if (YYSTATE != Normal)
ErrorExit("input contains an unterminated mode or environment");
#endif
return(0);
}
#ifdef FLEX_SCANNER
#undef yywrap
#endif
/******
** yywrap -- handles EOF for lex. Check to see if the stack of open files
** has anything on it. If it does, set yyin to the to value. If not
** return the termination signal for lex.
******/
sb = SafeMalloc(strlen(sbEnvList) + 1, "malloc for SetEnvIgnore failed");
(void) strcpy(sb, sbEnvList);
csbEnvIgnore = SeparateList(sb, rgsbEnvIgnore, CHENVSEP, MAXENVS);
if (csbEnvIgnore == ERROR)
ErrorExit("The environtment list contains too many environments");
}
/******
** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore. If it
** is, sbCurrentEnv is set to sbEnv.
******/
BeginEnv(sbEnv)
char *sbEnv;
{
int i;
if (!fLatex) return(0);
for (i = 0; i < csbEnvIgnore; i++)
if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) {
(void)strcpy(sbCurrentEnv, sbEnv);
return(1);
}
return(0);
}
/******
** EndEnv -- checks to see if sbEnv is the current environment being ignored.
******/
EndEnv(sbEnv)
char *sbEnv;
{
if (!fLatex) return(0);
if (strcmp(sbEnv, sbCurrentEnv) == 0)
return(1);
return(0);
}
/******
** InputFile -- push the current yyin and open sbFile. If the open fails,
** the sbFile is ignored.
******/
if (!fFollow)
return;
if (!InList(sbFile))
return;
rgfp[cfp++] = yyin;
if ((yyin = TexOpen(sbFile)) == NULL) {
Warning("can't open \\include file", sbFile);
yyin = rgfp[--cfp];
}
}
/******
** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList.
** If the include list is too long, sbFile is ignored.
******/
AddInclude(sbFile)
char *sbFile;
{
if (!fFollow)
return;
if (csbIncList >= MAXINCLIST)
Warning("\\includeonly list is too long, ignoring", sbFile);
rgsbIncList[csbIncList] = SafeMalloc(strlen(sbFile) + 1, "malloc for AddInclude failed");
(void)strcpy(rgsbIncList[csbIncList++], sbFile);
}
/******
** InList -- checks to see if sbFile is in the rgsbIncList. If there is
** no list, all files are assumed to be "in the list".
******/
InList(sbFile)
char *sbFile;
{
char *pch, sbBase[MAXPATHLEN];
int i;
if (csbIncList == 0) /* no list */
return(1);
(void)strcpy(sbBase, sbFile);
if ((pch = rindex(sbBase, '.')) != NULL)
*pch = '\0';
i = 0;
while ((i < csbIncList) && rgsbIncList[i])
if (strcmp(rgsbIncList[i++], sbBase) == 0)
return(1);
return(0);
}
/******
** SetInputPaths -- sets rgsbInputPaths to the values indicated by the
** TEXINPUTS environment variable if set or else DEFAULTINPUTS. If
** the user's TEXINPUTS has a leading ':' prepend the DEFAULTINPUTS
** to the path, if there is a trailing ':' append the DEFAULTINPUTS.
** This is consistent with the most recent TeX. However, this
** routine does not honor the '//' construct (expand subdirs).
******/
SetInputPaths()
{
char *sb, *sbPaths, *getenv();
int cchDefaults, cchPaths;
cchDefaults = strlen(DEFAULTINPUTS);
#ifdef OS2
if ((sb = getenv("TEXINPUT")) == NULL)
#endif
if ((sb = getenv("TEXINPUTS")) == NULL)
sb = DEFAULTINPUTS;
cchPaths = strlen(sb);
if (sb[0] == CHPATHSEP)
cchPaths += cchDefaults;
if (sb[strlen(sb) - 1] == CHPATHSEP)
cchPaths += cchDefaults;
sbPaths = SafeMalloc(cchPaths + 1, "malloc for SetInputPaths failed");
sbPaths[0] = '\0';
if (sb[0] == CHPATHSEP)
(void)strcat(sbPaths, DEFAULTINPUTS);
(void)strcat(sbPaths, sb);
if (sb[strlen(sb) - 1] == CHPATHSEP)
(void)strcat(sbPaths, DEFAULTINPUTS);
csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS);
if (csbInputPaths == ERROR)
#ifdef OS2
ErrorExit("TEXINPUT(S) environment variable has too many paths");
#else
ErrorExit("TEXINPUTS environment variable has too many paths");
#endif
}
/******
** SeparateList -- takes a chSep separated list sbList, replaces the
** chSep's with NULLs and sets rgsbList[i] to the beginning of
** the ith word in sbList. The number of words is returned. A
** ERROR is returned if there are more than csbMax words.
******/
SeparateList(sbList, rgsbList, chSep, csbMax)
char *sbList, *rgsbList[], chSep;
int csbMax;
{
int csbList = 0;
/******
** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn.
** For each input path the following order is used:
** file.tex - must be as named, if not there go to the next path
** file.ext - random extension, try it
** file - base name, add .tex and try it
** file - try it as is
** Notice that if file exists in the first path and file.tex exists in
** one of the other paths, file in the first path is what is opened.
** If the sbFile begins with a '/', no paths are searched.
******/
/* If sbFile ends in .tex then it must be there */
if ((pch = rindex(sbFullPath, '.')) != NULL
&& (strcmp(pch, ".tex") == 0))
if ((fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
else
continue;
/* if .<ext> then try to open it. the '.' represents */
/* the beginning of an extension if it is not the first */
/* character and it does not follow a '.' or a '/' */
if (pch != NULL && pch > &(sbFullPath[0])
&& *(pch - 1) != '.' && *(pch - 1) != '/'
&& (fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
/* just base name, add .tex to the name */
sbNew = SafeMalloc(strlen(sbFullPath) + 5, "malloc for TexOpen failed");
(void)strcpy(sbNew, sbFullPath);
(void)strcat(sbNew, ".tex");
if ((fp = fopen(sbNew, "r")) != NULL)
return(fp);