| regexec.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
| git clone git://git.suckless.org/9base | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| regexec.c (5029B) | |
| --- | |
| 1 #include "lib9.h" | |
| 2 #include "regexp9.h" | |
| 3 #include "regcomp.h" | |
| 4 | |
| 5 | |
| 6 /* | |
| 7 * return 0 if no match | |
| 8 * >0 if a match | |
| 9 * <0 if we ran out of _relist space | |
| 10 */ | |
| 11 static int | |
| 12 regexec1(Reprog *progp, /* program to run */ | |
| 13 char *bol, /* string to run machine on */ | |
| 14 Resub *mp, /* subexpression elements */ | |
| 15 int ms, /* number of elements at mp */ | |
| 16 Reljunk *j | |
| 17 ) | |
| 18 { | |
| 19 int flag=0; | |
| 20 Reinst *inst; | |
| 21 Relist *tlp; | |
| 22 char *s; | |
| 23 int i, checkstart; | |
| 24 Rune r, *rp, *ep; | |
| 25 int n; | |
| 26 Relist* tl; /* This list, next list */ | |
| 27 Relist* nl; | |
| 28 Relist* tle; /* ends of this and next list */ | |
| 29 Relist* nle; | |
| 30 int match; | |
| 31 char *p; | |
| 32 | |
| 33 match = 0; | |
| 34 checkstart = j->starttype; | |
| 35 if(mp) | |
| 36 for(i=0; i<ms; i++) { | |
| 37 mp[i].s.sp = 0; | |
| 38 mp[i].e.ep = 0; | |
| 39 } | |
| 40 j->relist[0][0].inst = 0; | |
| 41 j->relist[1][0].inst = 0; | |
| 42 | |
| 43 /* Execute machine once for each character, including terminal N… | |
| 44 s = j->starts; | |
| 45 do{ | |
| 46 /* fast check for first char */ | |
| 47 if(checkstart) { | |
| 48 switch(j->starttype) { | |
| 49 case RUNE: | |
| 50 p = utfrune(s, j->startchar); | |
| 51 if(p == 0 || s == j->eol) | |
| 52 return match; | |
| 53 s = p; | |
| 54 break; | |
| 55 case BOL: | |
| 56 if(s == bol) | |
| 57 break; | |
| 58 p = utfrune(s, '\n'); | |
| 59 if(p == 0 || s == j->eol) | |
| 60 return match; | |
| 61 s = p+1; | |
| 62 break; | |
| 63 } | |
| 64 } | |
| 65 r = *(uchar*)s; | |
| 66 if(r < Runeself) | |
| 67 n = 1; | |
| 68 else | |
| 69 n = chartorune(&r, s); | |
| 70 | |
| 71 /* switch run lists */ | |
| 72 tl = j->relist[flag]; | |
| 73 tle = j->reliste[flag]; | |
| 74 nl = j->relist[flag^=1]; | |
| 75 nle = j->reliste[flag]; | |
| 76 nl->inst = 0; | |
| 77 | |
| 78 /* Add first instruction to current list */ | |
| 79 if(match == 0) | |
| 80 _renewemptythread(tl, progp->startinst, ms, s); | |
| 81 | |
| 82 /* Execute machine until current list is empty */ | |
| 83 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ | |
| 84 for(inst = tlp->inst; ; inst = inst->u2.next){ | |
| 85 switch(inst->type){ | |
| 86 case RUNE: /* regular character */ | |
| 87 if(inst->u1.r == r){ | |
| 88 if(_renewthread(nl, inst… | |
| 89 return -1; | |
| 90 } | |
| 91 break; | |
| 92 case LBRA: | |
| 93 tlp->se.m[inst->u1.subid].s.sp =… | |
| 94 continue; | |
| 95 case RBRA: | |
| 96 tlp->se.m[inst->u1.subid].e.ep =… | |
| 97 continue; | |
| 98 case ANY: | |
| 99 if(r != '\n') | |
| 100 if(_renewthread(nl, inst… | |
| 101 return -1; | |
| 102 break; | |
| 103 case ANYNL: | |
| 104 if(_renewthread(nl, inst->u2.nex… | |
| 105 return -1; | |
| 106 break; | |
| 107 case BOL: | |
| 108 if(s == bol || *(s-1) == '\n') | |
| 109 continue; | |
| 110 break; | |
| 111 case EOL: | |
| 112 if(s == j->eol || r == 0 || r ==… | |
| 113 continue; | |
| 114 break; | |
| 115 case CCLASS: | |
| 116 ep = inst->u1.cp->end; | |
| 117 for(rp = inst->u1.cp->spans; rp … | |
| 118 if(r >= rp[0] && r <= rp… | |
| 119 if(_renewthread(… | |
| 120 return -… | |
| 121 break; | |
| 122 } | |
| 123 break; | |
| 124 case NCCLASS: | |
| 125 ep = inst->u1.cp->end; | |
| 126 for(rp = inst->u1.cp->spans; rp … | |
| 127 if(r >= rp[0] && r <= rp… | |
| 128 break; | |
| 129 if(rp == ep) | |
| 130 if(_renewthread(nl, inst… | |
| 131 return -1; | |
| 132 break; | |
| 133 case OR: | |
| 134 /* evaluate right choice later */ | |
| 135 if(_renewthread(tlp, inst->u1.ri… | |
| 136 return -1; | |
| 137 /* efficiency: advance and re-ev… | |
| 138 continue; | |
| 139 case END: /* Match! */ | |
| 140 match = 1; | |
| 141 tlp->se.m[0].e.ep = s; | |
| 142 if(mp != 0) | |
| 143 _renewmatch(mp, ms, &tlp… | |
| 144 break; | |
| 145 } | |
| 146 break; | |
| 147 } | |
| 148 } | |
| 149 if(s == j->eol) | |
| 150 break; | |
| 151 checkstart = j->starttype && nl->inst==0; | |
| 152 s += n; | |
| 153 }while(r); | |
| 154 return match; | |
| 155 } | |
| 156 | |
| 157 static int | |
| 158 regexec2(Reprog *progp, /* program to run */ | |
| 159 char *bol, /* string to run machine on */ | |
| 160 Resub *mp, /* subexpression elements */ | |
| 161 int ms, /* number of elements at mp */ | |
| 162 Reljunk *j | |
| 163 ) | |
| 164 { | |
| 165 int rv; | |
| 166 Relist *relist0, *relist1; | |
| 167 | |
| 168 /* mark space */ | |
| 169 relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); | |
| 170 if(relist0 == nil) | |
| 171 return -1; | |
| 172 relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); | |
| 173 if(relist1 == nil){ | |
| 174 free(relist1); | |
| 175 return -1; | |
| 176 } | |
| 177 j->relist[0] = relist0; | |
| 178 j->relist[1] = relist1; | |
| 179 j->reliste[0] = relist0 + BIGLISTSIZE - 2; | |
| 180 j->reliste[1] = relist1 + BIGLISTSIZE - 2; | |
| 181 | |
| 182 rv = regexec1(progp, bol, mp, ms, j); | |
| 183 free(relist0); | |
| 184 free(relist1); | |
| 185 return rv; | |
| 186 } | |
| 187 | |
| 188 extern int | |
| 189 regexec(Reprog *progp, /* program to run */ | |
| 190 char *bol, /* string to run machine on */ | |
| 191 Resub *mp, /* subexpression elements */ | |
| 192 int ms) /* number of elements at mp */ | |
| 193 { | |
| 194 Reljunk j; | |
| 195 Relist relist0[LISTSIZE], relist1[LISTSIZE]; | |
| 196 int rv; | |
| 197 | |
| 198 /* | |
| 199 * use user-specified starting/ending location if specified | |
| 200 */ | |
| 201 j.starts = bol; | |
| 202 j.eol = 0; | |
| 203 if(mp && ms>0){ | |
| 204 if(mp->s.sp) | |
| 205 j.starts = mp->s.sp; | |
| 206 if(mp->e.ep) | |
| 207 j.eol = mp->e.ep; | |
| 208 } | |
| 209 j.starttype = 0; | |
| 210 j.startchar = 0; | |
| 211 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Ru… | |
| 212 j.starttype = RUNE; | |
| 213 j.startchar = progp->startinst->u1.r; | |
| 214 } | |
| 215 if(progp->startinst->type == BOL) | |
| 216 j.starttype = BOL; | |
| 217 | |
| 218 /* mark space */ | |
| 219 j.relist[0] = relist0; | |
| 220 j.relist[1] = relist1; | |
| 221 j.reliste[0] = relist0 + nelem(relist0) - 2; | |
| 222 j.reliste[1] = relist1 + nelem(relist1) - 2; | |
| 223 | |
| 224 rv = regexec1(progp, bol, mp, ms, &j); | |
| 225 if(rv >= 0) | |
| 226 return rv; | |
| 227 rv = regexec2(progp, bol, mp, ms, &j); | |
| 228 if(rv >= 0) | |
| 229 return rv; | |
| 230 return -1; | |
| 231 } |