struct Inst
{
long type; /* <= Runemax ==> literal, otherwise action */
union {
int rsid;
int rsubid;
int class;
struct Inst *rother;
struct Inst *rright;
} r;
union{
struct Inst *lleft;
struct Inst *lnext;
} l;
};
#define sid r.rsid
#define subid r.rsubid
#define rclass r.class
#define other r.rother
#define right r.rright
#define left l.lleft
#define next l.lnext
#define NPROG 1024
Inst program[NPROG];
Inst *progp;
Inst *startinst; /* First inst. of program; might not be program[0] */
Inst *bstartinst; /* same for backwards machine */
typedef struct Ilist Ilist;
struct Ilist
{
Inst *inst; /* Instruction of the thread */
Rangeset se;
Posn startp; /* first char of match */
};
#define NLIST 127
Ilist *tl, *nl; /* This list, next list */
Ilist list[2][NLIST+1]; /* +1 for trailing null */
static Rangeset sempty;
/*
* Actions and Tokens
*
* 0x100xx are operators, value == precedence
* 0x200xx are tokens, i.e. operands for operators
*/
enum {
OPERATOR = Runemask+1, /* Bitmask of all operators */
START = OPERATOR, /* Start, used for marker on stack */
RBRA, /* Right bracket, ) */
LBRA, /* Left bracket, ( */
OR, /* Alternation, | */
CAT, /* Concatentation, implicit operator */
STAR, /* Closure, * */
PLUS, /* a+ == aa* */
QUEST, /* a? == a|nothing, i.e. 0 or 1 a's */
ANY = OPERATOR<<1, /* Any character but newline, . */
NOP, /* No operation, internal use only */
BOL, /* Beginning of line, ^ */
EOL, /* End of line, $ */
CCLASS, /* Character class, [] */
NCCLASS, /* Negated character class, [^] */
END, /* Terminate: match found */
#define NSTACK 20
Node andstack[NSTACK];
Node *andp;
int atorstack[NSTACK];
int *atorp;
int lastwasand; /* Last token was operand */
int cursubid;
int subidstack[NSTACK];
int *subidp;
int backwards;
int nbra;
Rune *exprp; /* pointer to next character in source expression */
#define DCLASS 10 /* allocation increment */
int nclass; /* number active */
int Nclass; /* high water mark */
Rune **class;
int negateclass;
switch(c){
case '\\':
if(*exprp)
if((c= *exprp++)=='n')
c='\n';
break;
case 0:
c = END;
--exprp; /* In case we come here again */
break;
case '*':
c = STAR;
break;
case '?':
c = QUEST;
break;
case '+':
c = PLUS;
break;
case '|':
c = OR;
break;
case '.':
c = ANY;
break;
case '(':
c = LBRA;
break;
case ')':
c = RBRA;
break;
case '^':
c = BOL;
break;
case '$':
c = EOL;
break;
case '[':
c = CCLASS;
bldcclass();
break;
}
return c;
}
void
bldcclass(void)
{
long c1, c2, n, na;
Rune *classp;
classp = emalloc(DCLASS*RUNESIZE);
n = 0;
na = DCLASS;
/* we have already seen the '[' */
if(*exprp == '^'){
classp[n++] = '\n'; /* don't match newline in negate case */
negateclass = TRUE;
exprp++;
}else
negateclass = FALSE;
while((c1 = nextrec()) != ']'){
if(c1 == '-'){
Error:
free(classp);
regerror(Ebadclass);
}
if(n+4 >= na){ /* 3 runes plus NUL */
na += DCLASS;
classp = erealloc(classp, na*RUNESIZE);
}
if(*exprp == '-'){
exprp++; /* eat '-' */
if((c2 = nextrec()) == ']')
goto Error;
classp[n+0] = Runemax;
classp[n+1] = c1 & Runemask;
classp[n+2] = c2 & Runemask;
n += 3;
}else
classp[n++] = c1 & Runemask;
}
classp[n] = 0;
if(nclass == Nclass){
Nclass += DCLASS;
class = erealloc(class, Nclass*sizeof(Rune*));
}
class[nclass++] = classp;
}
int
classmatch(int classno, int c, int negate)
{
Rune *p;
p = class[classno];
while(*p){
if(*p == Runemax){
if(p[1]<=c && c<=p[2])
return !negate;
p += 3;
}else if(*p++ == c)
return !negate;
}
return negate;
}
/*
* Note optimization in addinst:
* *l must be pending when addinst called; if *l has been looked
* at already, the optimization is a bug.
*/
int
addinst(Ilist *l, Inst *inst, Rangeset *sep)
{
Ilist *p;
for(p = l; p->inst; p++){
if(p->inst==inst){
if((sep)->p[0].p1 < p->se.p[0].p1)
p->se= *sep; /* this would be bug */
return 0; /* It's already there */
}
}
p->inst = inst;
p->se= *sep;
(p+1)->inst = 0;
return 1;
}
int
execute(File *f, Posn startp, Posn eof)
{
int flag = 0;
Inst *inst;
Ilist *tlp;
Posn p = startp;
int nnl = 0, ntl;
int c;
int wrapped = 0;
int startchar = startinst->type<OPERATOR? startinst->type : 0;
list[0][0].inst = list[1][0].inst = 0;
sel.p[0].p1 = -1;
/* Execute machine once for each character */
for(;;p++){
doloop:
c = filereadc(f, p);
if(p>=eof || c<0){
switch(wrapped++){
case 0: /* let loop run one more click */
case 2:
break;
case 1: /* expired; wrap to beginning */
if(sel.p[0].p1>=0 || eof!=INFINITY)
goto Return;
list[0][0].inst = list[1][0].inst = 0;
p = 0;
goto doloop;
default:
goto Return;
}
}else if(((wrapped && p>=startp) || sel.p[0].p1>0) && nnl==0)
break;
/* fast check for first char */
if(startchar && nnl==0 && c!=startchar)
continue;
tl = list[flag];
nl = list[flag^=1];
nl->inst = 0;
ntl = nnl;
nnl = 0;
if(sel.p[0].p1<0 && (!wrapped || p<startp || startp==eof)){
/* Add first instruction to this list */
sempty.p[0].p1 = p;
if(addinst(tl, startinst, &sempty))
if(++ntl >= NLIST)
Overflow:
error(Eoverflow);
}
/* Execute machine until this list is empty */
for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */
Switchstmt:
switch(inst->type){
default: /* regular character */
if(inst->type==c){
Addinst:
if(addinst(nl, inst->next, &tlp->se))
if(++nnl >= NLIST)
goto Overflow;
}
break;
case LBRA:
if(inst->subid>=0)
tlp->se.p[inst->subid].p1 = p;
inst = inst->next;
goto Switchstmt;
case RBRA:
if(inst->subid>=0)
tlp->se.p[inst->subid].p2 = p;
inst = inst->next;
goto Switchstmt;
case ANY:
if(c!='\n')
goto Addinst;
break;
case BOL:
if(p==0 || filereadc(f, p - 1)=='\n'){
Step:
inst = inst->next;
goto Switchstmt;
}
break;
case EOL:
if(c == '\n')
goto Step;
break;
case CCLASS:
if(c>=0 && classmatch(inst->rclass, c, 0))
goto Addinst;
break;
case NCCLASS:
if(c>=0 && classmatch(inst->rclass, c, 1))
goto Addinst;
break;
case OR:
/* evaluate right choice later */
if(addinst(tlp, inst->right, &tlp->se))
if(++ntl >= NLIST)
goto Overflow;
/* efficiency: advance and re-evaluate */
inst = inst->left;
goto Switchstmt;
case END: /* Match! */
tlp->se.p[0].p2 = p;
newmatch(&tlp->se);
break;
}
}
}
Return:
return sel.p[0].p1>=0;
}
int
bexecute(File *f, Posn startp)
{
int flag = 0;
Inst *inst;
Ilist *tlp;
Posn p = startp;
int nnl = 0, ntl;
int c;
int wrapped = 0;
int startchar = bstartinst->type<OPERATOR? bstartinst->type : 0;
list[0][0].inst = list[1][0].inst = 0;
sel.p[0].p1= -1;
/* Execute machine once for each character, including terminal NUL */
for(;;--p){
doloop:
if((c = filereadc(f, p - 1))==-1){
switch(wrapped++){
case 0: /* let loop run one more click */
case 2:
break;
case 1: /* expired; wrap to end */
if(sel.p[0].p1>=0)
case 3:
goto Return;
list[0][0].inst = list[1][0].inst = 0;
p = f->nc;
goto doloop;
default:
goto Return;
}
}else if(((wrapped && p<=startp) || sel.p[0].p1>0) && nnl==0)
break;
/* fast check for first char */
if(startchar && nnl==0 && c!=startchar)
continue;
tl = list[flag];
nl = list[flag^=1];
nl->inst = 0;
ntl = nnl;
nnl = 0;
if(sel.p[0].p1<0 && (!wrapped || p>startp)){
/* Add first instruction to this list */
/* the minus is so the optimizations in addinst work */
sempty.p[0].p1 = -p;
if(addinst(tl, bstartinst, &sempty))
if(++ntl >= NLIST)
Overflow:
error(Eoverflow);
}
/* Execute machine until this list is empty */
for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */
Switchstmt:
switch(inst->type){
default: /* regular character */
if(inst->type == c){
Addinst:
if(addinst(nl, inst->next, &tlp->se))
if(++nnl >= NLIST)
goto Overflow;
}
break;
case LBRA:
if(inst->subid>=0)
tlp->se.p[inst->subid].p1 = p;
inst = inst->next;
goto Switchstmt;
case RBRA:
if(inst->subid >= 0)
tlp->se.p[inst->subid].p2 = p;
inst = inst->next;
goto Switchstmt;
case ANY:
if(c != '\n')
goto Addinst;
break;
case BOL:
if(c=='\n' || p==0){
Step:
inst = inst->next;
goto Switchstmt;
}
break;
case EOL:
if(p==f->nc || filereadc(f, p)=='\n')
goto Step;
break;
case CCLASS:
if(c>=0 && classmatch(inst->rclass, c, 0))
goto Addinst;
break;
case NCCLASS:
if(c>=0 && classmatch(inst->rclass, c, 1))
goto Addinst;
break;
case OR:
/* evaluate right choice later */
if(addinst(tl, inst->right, &tlp->se))
if(++ntl >= NLIST)
goto Overflow;
/* efficiency: advance and re-evaluate */
inst = inst->left;
goto Switchstmt;
case END: /* Match! */
tlp->se.p[0].p1 = -tlp->se.p[0].p1; /* minus sign */
tlp->se.p[0].p2 = p;
bnewmatch(&tlp->se);
break;
}
}
}
Return:
return sel.p[0].p1>=0;
}