/* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */
/*-
* Copyright (c) 1988, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (C) Caldera International Inc. 2001-2002.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code and documentation must retain the above
* copyright notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed or owned by Caldera
* International, Inc.
* 4. Neither the name of Caldera International, Inc. nor the names of other
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
* INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
* INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Deroff command -- strip troff, eqn, and Tbl sequences from
* a file. Has two flags argument, -w, to cause output one word per line
* rather than in the original format.
* -mm (or -ms) causes the corresponding macro's to be interpreted
* so that just sentences are output
* -ml also gets rid of lists.
* Deroff follows .so and .nx commands, removes contents of macro
* definitions, equations (both .EQ ... .EN and $...$),
* Tbl command sequences, and Troff backslash constructions.
*
* All input is through the Cget macro;
* the most recently read character is in c.
*
* Modified by Robert Henry to process -me and -man macros.
*/
#define NOCHAR -2
#define SPECIAL 0
#define APOS 1
#define PUNCT 2
#define DIGIT 3
#define LETTER 4
#define MAXFILES 20
static int iflag;
static int wordflag;
static int msflag; /* processing a source written using a mac package */
static int mac; /* which package */
static int disp;
static int parag;
static int inmacro;
static int intable;
static int keepblock; /* keep blocks of text; normally false when msflag */
static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
static char line[LINE_MAX];
static char *lp;
static int c;
static int pc;
static int ldelim;
static int rdelim;
/*
* Flags for matching conditions other than
* the macro name
*/
#define NONE 0
#define FNEST 1 /* no nested files */
#define NOMAC 2 /* no macro */
#define MAC 3 /* macro */
#define PARAG 4 /* in a paragraph */
#define MSF 5 /* msflag is on */
#define NBLK 6 /* set if no blocks to be kept */
/*
* Return codes from macro minions, determine where to jump,
* how to repeat/reprocess text
*/
#define COMX 1 /* goto comx */
#define COM 2 /* goto com */
static int skeqn(void);
static int eof(void);
#ifdef DEBUG
static int _C1(void);
static int _C(void);
#endif
static int EQ(pacmac);
static int domacro(pacmac);
static int PS(pacmac);
static int skip(pacmac);
static int intbl(pacmac);
static int outtbl(pacmac);
static int so(pacmac);
static int nx(pacmac);
static int skiptocom(pacmac);
static int PP(pacmac);
static int AU(pacmac);
static int SH(pacmac);
static int UX(pacmac);
static int MMHU(pacmac);
static int mesnblock(pacmac);
static int mssnblock(pacmac);
static int nf(pacmac);
static int ce(pacmac);
static int meip(pacmac);
static int mepp(pacmac);
static int mesh(pacmac);
static int mefont(pacmac);
static int manfont(pacmac);
static int manpp(pacmac);
static int macsort(const void *, const void *);
static int sizetab(const struct mactab *);
static void getfname(void);
static void textline(char *, int);
static void work(void) __dead;
static void regline(void (*)(char *, int), int);
static void macro(void);
static void tbl(void);
static void stbl(void);
static void eqn(void);
static void backsl(void);
static void sce(void);
static void refer(int);
static void inpic(void);
static void msputmac(char *, int);
static void msputwords(void);
static void meputmac(char *, int);
static void meputwords(void);
static void noblock(char, char);
static void defcomline(pacmac);
static void comline(void);
static void buildtab(const struct mactab **, int *);
static FILE *opn(char *);
static struct mactab *macfill(struct mactab *, const struct mactab *);
static void usage(void) __dead;
int
main(int ac, char **av)
{
int i, ch;
int errflg = 0;
int kflag = NO;
while (C != '.')
; /* nothing */
SKIP_TO_COM;
if (c != 'T' || C != 'E') {
SKIP;
pc = c;
while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
pc = c;
}
}
static void
eqn(void)
{
int c1, c2;
int dflg;
char last;
last=0;
dflg = 1;
SKIP;
for (;;) {
if (C1 == '.' || c == '\'') {
while (C1 == ' ' || c == '\t')
;
if (c == 'E' && C1 == 'N') {
SKIP;
if (msflag && dflg) {
putchar('x');
putchar(' ');
if (last) {
putchar(last);
putchar('\n');
}
}
return;
}
} else if (c == 'd') {
/* look for delim */
if (C1 == 'e' && C1 == 'l')
if (C1 == 'i' && C1 == 'm') {
while (C1 == ' ')
; /* nothing */
if (c != '\n')
while (C1 != '\n') {
if (chars[c] == PUNCT)
last = c;
else if (c != ' ')
last = 0;
}
}
}
/* skip over a complete backslash construction */
static void
backsl(void)
{
int bdelim;
sw:
switch (C) {
case '"':
SKIP;
return;
case 's':
if (C == '\\')
backsl();
else {
while (C >= '0' && c <= '9')
; /* nothing */
ungetc(c, infile);
c = '0';
}
--lp;
return;
case 'f':
case 'n':
case '*':
if (C != '(')
return;
/* FALLTHROUGH */
case '(':
if (msflag) {
if (C == 'e') {
if (C == 'm') {
*lp = '-';
return;
}
}
else if (c != '\n')
C;
return;
}
if (C != '\n')
C;
return;
case '$':
C; /* discard argument number */
return;
case 'b':
case 'x':
case 'v':
case 'h':
case 'w':
case 'o':
case 'l':
case 'L':
if ((bdelim = C) == '\n')
return;
while (C != '\n' && c != bdelim)
if (c == '\\')
backsl();
return;
case '\\':
if (inmacro)
goto sw;
return;
default:
return;
}
}
static void
sce(void)
{
char *ap;
int n, i;
char a[10];
for (ap = a; C != '\n'; ap++) {
*ap = c;
if (ap == &a[9]) {
SKIP;
ap = a;
break;
}
}
if (ap != a)
n = atoi(a);
else
n = 1;
for (i = 0; i < n;) {
if (C == '.') {
if (C == 'c') {
if (C == 'e') {
while (C == ' ')
; /* nothing */
if (c == '0') {
SKIP;
break;
} else
SKIP;
}
else
SKIP;
} else if (c == 'P' || C == 'P') {
if (c != '\n')
SKIP;
break;
} else if (c != '\n')
SKIP;
} else {
SKIP;
i++;
}
}
}
static void
refer(int c1)
{
int c2;
if (c1 != '\n')
SKIP;
for (c2 = -1;;) {
if (C != '.')
SKIP;
else {
if (C != ']')
SKIP;
else {
while (C != '\n')
c2 = c;
if (c2 != -1 && chars[c2] == PUNCT)
putchar(c2);
return;
}
}
}
}
static void
inpic(void)
{
int c1;
char *p1;
SKIP;
p1 = line;
c = '\n';
for (;;) {
c1 = c;
if (C == '.' && c1 == '\n') {
if (C != 'P') {
if (c == '\n')
continue;
else {
SKIP;
c = '\n';
continue;
}
}
if (C != 'E') {
if (c == '\n')
continue;
else {
SKIP;
c = '\n';
continue;
}
}
SKIP;
return;
}
else if (c == '\"') {
while (C != '\"') {
if (c == '\\') {
if (C == '\"')
continue;
ungetc(c, infile);
backsl();
} else
*p1++ = c;
}
*p1++ = ' ';
}
else if (c == '\n' && p1 != line) {
*p1 = '\0';
if (wordflag)
msputwords();
else {
puts(line);
putchar('\n');
}
p1 = line;
}
}
}
#ifdef DEBUG
static int
_C1(void)
{
return C1get;
}
static int
_C(void)
{
return Cget;
}
#endif /* DEBUG */
/*
* Put out a macro line, using ms and mm conventions.
*/
static void
msputmac(char *s, int constant)
{
char *t;
int found;
int last;
last = 0;
found = 0;
if (wordflag) {
msputwords();
return;
}
while (*s) {
while (*s == ' ' || *s == '\t')
putchar(*s++);
for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
; /* nothing */
if (*s == '\"')
s++;
if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
chars[(unsigned char)s[1]] == LETTER) {
while (s < t)
if (*s == '\"')
s++;
else
putchar(*s++);
last = *(t-1);
found++;
} else if (found && chars[(unsigned char)s[0]] == PUNCT &&
s[1] == '\0') {
putchar(*s++);
} else {
last = *(t - 1);
s = t;
}
}
putchar('\n');
if (msflag && chars[last] == PUNCT) {
putchar(last);
putchar('\n');
}
}
/*
* put out words (for the -w option) with ms and mm conventions
*/
static void
msputwords(void)
{
char *p, *p1;
int i, nlet;
for (p1 = line;;) {
/*
* skip initial specials ampersands and apostrophes
*/
while (chars[(unsigned char)*p1] < DIGIT)
if (*p1++ == '\0')
return;
nlet = 0;
for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
if (i == LETTER)
++nlet;
if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
/*
* delete trailing ampersands and apostrophes
*/
while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
i == APOS )
--p;
while (p1 < p)
putchar(*p1++);
putchar('\n');
} else {
p1 = p;
}
}
}
/*
* put out a macro using the me conventions
*/
#define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; }
static void
meputmac(char *cp, int constant)
{
char *np;
int found;
int argno;
int last;
int inquote;
last = 0;
found = 0;
if (wordflag) {
meputwords();
return;
}
for (argno = 0; *cp; argno++) {
SKIPBLANK(cp);
inquote = (*cp == '"');
if (inquote)
cp++;
for (np = cp; *np; np++) {
switch (*np) {
case '\n':
case '\0':
break;
case '\t':
case ' ':
if (inquote)
continue;
else
goto endarg;
default:
continue;
}
}
endarg: ;
/*
* cp points at the first char in the arg
* np points one beyond the last char in the arg
*/
if ((argconcat == 0) || (argconcat != argno))
putchar(' ');
#ifdef FULLDEBUG
{
char *p;
printf("[%d,%d: ", argno, np - cp);
for (p = cp; p < np; p++) {
putchar(*p);
}
printf("]");
}
#endif /* FULLDEBUG */
/*
* Determine if the argument merits being printed
*
* constant is the cut off point below which something
* is not a word.
*/
if (((np - cp) > constant) &&
(inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
for (; cp < np; cp++)
putchar(*cp);
last = np[-1];
found++;
} else if (found && (np - cp == 1) &&
chars[(unsigned char)*cp] == PUNCT) {
putchar(*cp);
} else {
last = np[-1];
}
cp = np;
}
if (msflag && chars[last] == PUNCT)
putchar(last);
putchar('\n');
}
/*
* put out words (for the -w option) with ms and mm conventions
*/
static void
meputwords(void)
{
msputwords();
}
/*
*
* Skip over a nested set of macros
*
* Possible arguments to noblock are:
*
* fi end of unfilled text
* PE pic ending
* DE display ending
*
* for ms and mm only:
* KE keep ending
*
* NE undocumented match to NS (for mm?)
* LE mm only: matches RL or *L (for lists)
*
* for me:
* ([lqbzcdf]
*/
static void
noblock(char a1, char a2)
{
int c1,c2;
int eqnf;
int lct;
lct = 0;
eqnf = 1;
SKIP;
for (;;) {
while (C != '.')
if (c == '\n')
continue;
else
SKIP;
if ((c1 = C) == '\n')
continue;
if ((c2 = C) == '\n')
continue;
if (c1 == a1 && c2 == a2) {
SKIP;
if (lct != 0) {
lct--;
continue;
}
if (eqnf)
putchar('.');
putchar('\n');
return;
} else if (a1 == 'L' && c2 == 'L') {
lct++;
SKIP;
}
/*
* equations (EQ) nested within a display
*/
else if (c1 == 'E' && c2 == 'Q') {
if ((mac == ME && a1 == ')')
|| (mac != ME && a1 == 'D')) {
eqn();
eqnf=0;
}
}
/*
* turning on filling is done by the paragraphing
* macros
*/
else if (a1 == 'f') { /* .fi */
if ((mac == ME && (c2 == 'h' || c2 == 'p'))
|| (mac != ME && (c1 == 'P' || c2 == 'P'))) {
SKIP;
return;
}
} else {
SKIP;
}
}
}
static int
/*ARGSUSED*/
EQ(pacmac unused)
{
eqn();
return 0;
}
static int
/*ARGSUSED*/
domacro(pacmac unused)
{
macro();
return 0;
}
static int
/*ARGSUSED*/
PS(pacmac unused)
{
for (C; c == ' ' || c == '\t'; C)
; /* nothing */
if (c == '<') { /* ".PS < file" -- don't expect a .PE */
SKIP;
return 0;
}
if (!msflag)
inpic();
else
noblock('P', 'E');
return 0;
}
static int
/*ARGSUSED*/
skip(pacmac unused)
{
SKIP;
return 0;
}
static int
/*ARGSUSED*/
intbl(pacmac unused)
{
if (msflag)
stbl();
else
tbl();
return 0;
}
static int
/*ARGSUSED*/
outtbl(pacmac unused)
{
intable = NO;
return 0;
}
static int
/*ARGSUSED*/
so(pacmac unused)
{
if (!iflag) {
getfname();
if (fname[0]) {
if (++filesp - &files[0] > MAXFILES)
err(1, "too many nested files (max %d)",
MAXFILES);
infile = *filesp = opn(fname);
}
}
return 0;
}
static int
/*ARGSUSED*/
nx(pacmac unused)
{
if (!iflag) {
getfname();
if (fname[0] == '\0')
exit(0);
if (infile != stdin)
fclose(infile);
infile = *filesp = opn(fname);
}
return 0;
}
static int
/*ARGSUSED*/
skiptocom(pacmac unused)
{