rregexec.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
rregexec.c (4729B) | |
--- | |
1 #include "lib9.h" | |
2 #include "regexp9.h" | |
3 #include "regcomp.h" | |
4 | |
5 /* | |
6 * return 0 if no match | |
7 * >0 if a match | |
8 * <0 if we ran out of _relist space | |
9 */ | |
10 static int | |
11 rregexec1(Reprog *progp, /* program to run */ | |
12 Rune *bol, /* string to run machine on */ | |
13 Resub *mp, /* subexpression elements */ | |
14 int ms, /* number of elements at mp */ | |
15 Reljunk *j) | |
16 { | |
17 int flag=0; | |
18 Reinst *inst; | |
19 Relist *tlp; | |
20 Rune *s; | |
21 int i, checkstart; | |
22 Rune r, *rp, *ep; | |
23 Relist* tl; /* This list, next list */ | |
24 Relist* nl; | |
25 Relist* tle; /* ends of this and next list */ | |
26 Relist* nle; | |
27 int match; | |
28 Rune *p; | |
29 | |
30 match = 0; | |
31 checkstart = j->startchar; | |
32 if(mp) | |
33 for(i=0; i<ms; i++) { | |
34 mp[i].s.rsp = 0; | |
35 mp[i].e.rep = 0; | |
36 } | |
37 j->relist[0][0].inst = 0; | |
38 j->relist[1][0].inst = 0; | |
39 | |
40 /* Execute machine once for each character, including terminal N… | |
41 s = j->rstarts; | |
42 do{ | |
43 | |
44 /* fast check for first char */ | |
45 if(checkstart) { | |
46 switch(j->starttype) { | |
47 case RUNE: | |
48 p = runestrchr(s, j->startchar); | |
49 if(p == 0 || p == j->reol) | |
50 return match; | |
51 s = p; | |
52 break; | |
53 case BOL: | |
54 if(s == bol) | |
55 break; | |
56 p = runestrchr(s, '\n'); | |
57 if(p == 0 || s == j->reol) | |
58 return match; | |
59 s = p+1; | |
60 break; | |
61 } | |
62 } | |
63 | |
64 r = *s; | |
65 | |
66 /* switch run lists */ | |
67 tl = j->relist[flag]; | |
68 tle = j->reliste[flag]; | |
69 nl = j->relist[flag^=1]; | |
70 nle = j->reliste[flag]; | |
71 nl->inst = 0; | |
72 | |
73 /* Add first instruction to current list */ | |
74 _rrenewemptythread(tl, progp->startinst, ms, s); | |
75 | |
76 /* Execute machine until current list is empty */ | |
77 for(tlp=tl; tlp->inst; tlp++){ | |
78 for(inst=tlp->inst; ; inst = inst->u2.next){ | |
79 switch(inst->type){ | |
80 case RUNE: /* regular character */ | |
81 if(inst->u1.r == r) | |
82 if(_renewthread(nl, inst… | |
83 return -1; | |
84 break; | |
85 case LBRA: | |
86 tlp->se.m[inst->u1.subid].s.rsp … | |
87 continue; | |
88 case RBRA: | |
89 tlp->se.m[inst->u1.subid].e.rep … | |
90 continue; | |
91 case ANY: | |
92 if(r != '\n') | |
93 if(_renewthread(nl, inst… | |
94 return -1; | |
95 break; | |
96 case ANYNL: | |
97 if(_renewthread(nl, inst->u2.nex… | |
98 return -1; | |
99 break; | |
100 case BOL: | |
101 if(s == bol || *(s-1) == '\n') | |
102 continue; | |
103 break; | |
104 case EOL: | |
105 if(s == j->reol || r == 0 || r =… | |
106 continue; | |
107 break; | |
108 case CCLASS: | |
109 ep = inst->u1.cp->end; | |
110 for(rp = inst->u1.cp->spans; rp … | |
111 if(r >= rp[0] && r <= rp… | |
112 if(_renewthread(… | |
113 return -… | |
114 break; | |
115 } | |
116 break; | |
117 case NCCLASS: | |
118 ep = inst->u1.cp->end; | |
119 for(rp = inst->u1.cp->spans; rp … | |
120 if(r >= rp[0] && r <= rp… | |
121 break; | |
122 if(rp == ep) | |
123 if(_renewthread(nl, inst… | |
124 return -1; | |
125 break; | |
126 case OR: | |
127 /* evaluate right choice later */ | |
128 if(_renewthread(tlp, inst->u1.ri… | |
129 return -1; | |
130 /* efficiency: advance and re-ev… | |
131 continue; | |
132 case END: /* Match! */ | |
133 match = 1; | |
134 tlp->se.m[0].e.rep = s; | |
135 if(mp != 0) | |
136 _renewmatch(mp, ms, &tlp… | |
137 break; | |
138 } | |
139 break; | |
140 } | |
141 } | |
142 if(s == j->reol) | |
143 break; | |
144 checkstart = j->startchar && nl->inst==0; | |
145 s++; | |
146 }while(r); | |
147 return match; | |
148 } | |
149 | |
150 static int | |
151 rregexec2(Reprog *progp, /* program to run */ | |
152 Rune *bol, /* string to run machine on */ | |
153 Resub *mp, /* subexpression elements */ | |
154 int ms, /* number of elements at mp */ | |
155 Reljunk *j | |
156 ) | |
157 { | |
158 Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; | |
159 | |
160 /* mark space */ | |
161 j->relist[0] = relist0; | |
162 j->relist[1] = relist1; | |
163 j->reliste[0] = relist0 + nelem(relist0) - 2; | |
164 j->reliste[1] = relist1 + nelem(relist1) - 2; | |
165 | |
166 return rregexec1(progp, bol, mp, ms, j); | |
167 } | |
168 | |
169 extern int | |
170 rregexec(Reprog *progp, /* program to run */ | |
171 Rune *bol, /* string to run machine on */ | |
172 Resub *mp, /* subexpression elements */ | |
173 int ms) /* number of elements at mp */ | |
174 { | |
175 Reljunk j; | |
176 Relist relist0[LISTSIZE], relist1[LISTSIZE]; | |
177 int rv; | |
178 | |
179 /* | |
180 * use user-specified starting/ending location if specified | |
181 */ | |
182 j.rstarts = bol; | |
183 j.reol = 0; | |
184 if(mp && ms>0){ | |
185 if(mp->s.sp) | |
186 j.rstarts = mp->s.rsp; | |
187 if(mp->e.ep) | |
188 j.reol = mp->e.rep; | |
189 } | |
190 j.starttype = 0; | |
191 j.startchar = 0; | |
192 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Ru… | |
193 j.starttype = RUNE; | |
194 j.startchar = progp->startinst->u1.r; | |
195 } | |
196 if(progp->startinst->type == BOL) | |
197 j.starttype = BOL; | |
198 | |
199 /* mark space */ | |
200 j.relist[0] = relist0; | |
201 j.relist[1] = relist1; | |
202 j.reliste[0] = relist0 + nelem(relist0) - 2; | |
203 j.reliste[1] = relist1 + nelem(relist1) - 2; | |
204 | |
205 rv = rregexec1(progp, bol, mp, ms, &j); | |
206 if(rv >= 0) | |
207 return rv; | |
208 rv = rregexec2(progp, bol, mp, ms, &j); | |
209 if(rv >= 0) | |
210 return rv; | |
211 return -1; | |
212 } |