/*
* Copyright %%\copyright%% 1980 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Regular expression matching routines for lgrind/tgrind/tfontedpr.
*
* These routines were written by Dave Presotto (I think) for vgrind.
* Minor mods & attempts to improve performance by Van Jacobson
* (van@@lbl-rtsg) and Chris Torek (chris@@maryland).
*
* Modifications.
* --------------
* Sep 91 George V Reilly Fixed up some bogus uses of @NIL@ and @NULL@.
* 30 Mar 85 Van & Chris Changed @expmatch()@ to return pointer to
* start of what was matched in addition to
* pointer to match end. Several changes to
* improve performance (too numerous to mention).
* 11 Dec 84 Dave Presotto Written.
*/
int (*re_strncmp)(const char *, const char *, size_t);
/* function used by @expmatch()@ to compare
* strings. The caller should make it point to
* @strncmp()@ if case is significant &
* @lc_strncmp()@ otherwise.
*/
/* @lc_strncmp()@ --- like @strncmp()@ except that we convert the
* first string to lower case before comparing.
*/
int lc_strncmp(register const char *s1, register const char *s2, register size_t len)
{
while (len-- > 0)
if (*s2 - makelower(*s1))
return 1;
else
s2++, s1++;
return 0;
}
/* The following routine converts an irregular expression to
* internal format.
*
* Either meta symbols (%|\a|%, %|\d|%, or %|\p|%) or character strings
* or operations (alternation or parenthesizing) can be specified.
* Each starts with a descriptor byte. The descriptor byte
* has @STR@ set for strings, @META@ set for meta symbols,
* and @OPER@ set for operations. The descriptor byte can also have
* the @OPT@ bit set if the object defined is optional. Also @ALT@
* can be set to indicate an alternation.
*
* For metasymbols, the byte following the descriptor byte identities
* the meta symbol (containing an ASCII @'a'@, @'d'@, @'p'@, @'|'@,
* or @'('@). For strings, the byte after the descriptor is a
* character count for the string:
*@
* meta symbols := descriptor
* symbol
*
* strings := descriptor
* character count
* the string
*
* operations := descriptor
* symbol
* character count@
*/
/*
* handy macros for accessing parts of match blocks
*/
#define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
#define MNEXT(A) (A+2) /* character following a metasymbol block */
#define OSYM(A) (*(A+1)) /* symbol in an operation block */
#define OCNT(A) (*(A+2)) /* character count */
#define ONEXT(A) (A+3) /* next character after the operation */
#define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
#define SCNT(A) (*(A+1)) /* byte count of a string */
#define SSTR(A) (A+2) /* address of the string */
#define SNEXT(A) (A+2+*(A+1)) /* character following the string */
/*
* bit flags in the descriptor
*/
#define OPT 1
#define STR 2
#define META 4
#define ALT 8
#define OPER 16
char *ure; /* pointer current position in unconverted exp */
char *ccre; /* pointer to current position in converted exp */
/*
* Convert an irregular expression to the internal form
*/
char *convexp(char *re)
/* re unconverted irregular expression */
{
register char *cre; /* pointer to converted regular expression */
/* allocate room for the converted expression */
if (re == NULL)
return NULL;
if (*re == '\0')
return NULL;
cre = (char*)malloc(4 * strlen(re) + 3);
ccre = cre;
ure = re;
/* start the conversion with a %|\a|% */
*cre = META | OPT;
MSYM(cre) = 'a';
ccre = MNEXT(cre);
/*
* Actually do the conversion
*/
static void expconv(void)
{
register char *cs; /* pointer to current symbol in converted exp */
register char c; /* character being processed */
register char *acs; /* pointer to last alternate */
register int temp;
/* let the conversion begin */
acs = NULL;
cs = NULL;
while (*ure != '\0') {
switch (c = *ure++) {
/* normal(?) metacharacters */
case 'a':
case 'd':
case 'e':
case 'p':
if (acs != NULL && acs != cs) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
cs = ccre;
*cs = META;
MSYM(cs) = c;
ccre = MNEXT(cs);
break;
}
break;
/* just put the symbol in */
case '^':
case '$':
if (acs != NULL && acs != cs) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
cs = ccre;
*cs = META;
MSYM(cs) = c;
ccre = MNEXT(cs);
break;
/* mark the last match sequence as optional */
case '?':
if (cs)
*cs = *cs | OPT;
break;
/* recurse and define a subexpression */
case '(':
if (acs != NULL && acs != cs) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
cs = ccre;
*cs = OPER;
OSYM(cs) = '(';
ccre = ONEXT(cs);
expconv();
OCNT(cs) = ccre - cs; /* offset to next symbol */
break;
/* return from a recursion */
case ')':
if (acs != NULL) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
cs = ccre;
*cs = META;
MSYM(cs) = c;
ccre = MNEXT(cs);
return;
/* Mark the last match sequence as having an alternate. */
/* The third byte will contain an offset to jump over the */
/* alternate match in case the first did not fail */
case '|':
if (acs != NULL && acs != cs)
OCNT(ccre) = ccre - acs; /* make a back pointer */
else
OCNT(ccre) = 0;
*cs |= ALT;
cs = ccre;
*cs = OPER;
OSYM(cs) = '|';
ccre = ONEXT(cs);
acs = cs; /* remember that the pointer is to be filled */
break;
/* if it's not a metasymbol, just build a character string */
default:
if (cs == NULL || (*cs & STR) == 0) {
cs = ccre;
*cs = STR;
SCNT(cs) = 1;
ccre = SSTR(cs);
} else
SCNT(cs)++;
*ccre++ = c;
break;
}
}
if (acs != NULL) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
return;
} /* end of @expconv()@ */
/*
* The following routine recognises an irregular expression
* with the following special characters:
*
* %|\?|% means last match was optional
* %|\a|% matches any number of characters
* %|\d|% matches any number of spaces and tabs
* %|\p|% matches any number of alphanumeric characters.
* The characters matched will be copied into
* the area pointed to by @mstring@.
* %|\||% alternation
* %|\( \)|% grouping used mostly for alternation and
* optionality
*
* The irregular expression must be translated to internal form
* prior to calling this routine
*
* The value returned is the pointer to the last character matched.
* If @strtptr@ is non-@NULL@, a pointer to the start of the string matched
* (excluding %|\a|% matches) will be returned at @*strtptr@.
* If @mstring@ is non-@NULL@, the string matched by a %|\p|% will be copied
* into @mstring@.
*/
boolean _escaped; /* true if we are currently @_escaped@ */
char *_sstart; /* start of string. Set in @putScp()@. */
char *expmatch(
register char *s, /* string to check for a match in */
register char *re, /* a converted irregular expression */
char **strtptr, /* where to put ptr to start of match */
char *mstring) /* where to put whatever matches a %|\p|% */
{
register char *cs; /* the current symbol */
register char *ptr, *s1; /* temporary pointer */
register char c; /* temporary */
boolean matched; /* a temporary @boolean@ */
/* try to match a metasymbol */
case META:
switch (MSYM(cs)) {
/* try to match anything and remember what was matched */
case 'p':
/*
* This is really the same as trying the match the
* remaining parts of the expression to any subset
* of the string.
*/
s1 = s;
do {
ptr = expmatch(s1, MNEXT(cs), strtptr, mstring);
if (ptr != NULL && s1 != s) {
/* we have a match: remember the match */
if (mstring) {
strncpy(mstring, s, (size_t)(s1 - s));
mstring[(int)(s1 - s)] = '\0';
}
return ptr;
} else if (ptr != NULL && (*cs & OPT)) {
/* %|\p|% was optional, so no match is ok */
return ptr;
} else if (ptr != NULL) {
/* not optional and we still matched */
return NULL;
}
if (!isalnum(*s1) && !strchr(l_id, *s1))
return NULL;
if (*s1 == '\\')
_escaped = _escaped ? FALSE : TRUE;
else
_escaped = FALSE;
} while (*s1++);
return NULL;
/* try to match anything */
case 'a':
/*
* This is really the same as trying the match the
* remaining parts of the expression to any subset
* of the string.
*/
s1 = s;
do {
/*
* Hack for an important special case: if the next thing
* in the pattern is a string, just gobble characters until
* we find something that matches that string (this saves
* the cost of a recursive call on @expmatch()@ while scanning
* for the start of comments or strings). Since many
* patterns end with a string, we also treat that as a
* special case.
*/
if(*(ptr = MNEXT(cs)) == STR) {
c = *SSTR(ptr);
while(*s1 && *s1 != c)
s1++;
if (*s1 == 0)
return NULL;
if (SNEXT(ptr) == 0 && (s1 != s || *cs & OPT)) {
/* next item is a string, it's the last item and
* the %|\a|% match is ok --- just loop to try & match
* the string.
*/
if (strtptr)
*strtptr = s1;
cs = ptr;
s = s1;
break;
}
}
ptr = expmatch(s1, MNEXT(cs), strtptr, mstring);
if (ptr != NULL && (s1 != s || *cs & OPT)) {
/* we have a match */
if (strtptr)
*strtptr = s1;
return ptr;
} else if (ptr != NULL) {
/* not optional and we still matched */
return NULL;
}
if (*s1 == '\\')
_escaped = _escaped ? FALSE : TRUE;
else
_escaped = FALSE;
} while (*s1++);
return NULL;
/* fail if we are currently @_escaped@ */
case 'e':
if (_escaped)
return NULL;
cs = MNEXT(cs);
break;
/* match any number of tabs and spaces */
case 'd':
ptr = s;
while (*s == ' ' || *s == '\t')
s++;
/* check for end of line */
case '$':
if (*s == '\0' || *s == '\n') {
/* match: be happy */
/* s++; was here
* removed; now $ is left in stream to be matched again, which
* is not bad (EOL can end several things at once. Also it
* leaves things like CE on the same line.
*/
matched = 1;
cs = MNEXT(cs);
} else if (*cs & ALT) {
/* try the next part */
matched = 0;
cs = MNEXT(cs);
} else if (*cs & OPT) {