regexec.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
regexec.c (5029B) | |
--- | |
1 #include "lib9.h" | |
2 #include "regexp9.h" | |
3 #include "regcomp.h" | |
4 | |
5 | |
6 /* | |
7 * return 0 if no match | |
8 * >0 if a match | |
9 * <0 if we ran out of _relist space | |
10 */ | |
11 static int | |
12 regexec1(Reprog *progp, /* program to run */ | |
13 char *bol, /* string to run machine on */ | |
14 Resub *mp, /* subexpression elements */ | |
15 int ms, /* number of elements at mp */ | |
16 Reljunk *j | |
17 ) | |
18 { | |
19 int flag=0; | |
20 Reinst *inst; | |
21 Relist *tlp; | |
22 char *s; | |
23 int i, checkstart; | |
24 Rune r, *rp, *ep; | |
25 int n; | |
26 Relist* tl; /* This list, next list */ | |
27 Relist* nl; | |
28 Relist* tle; /* ends of this and next list */ | |
29 Relist* nle; | |
30 int match; | |
31 char *p; | |
32 | |
33 match = 0; | |
34 checkstart = j->starttype; | |
35 if(mp) | |
36 for(i=0; i<ms; i++) { | |
37 mp[i].s.sp = 0; | |
38 mp[i].e.ep = 0; | |
39 } | |
40 j->relist[0][0].inst = 0; | |
41 j->relist[1][0].inst = 0; | |
42 | |
43 /* Execute machine once for each character, including terminal N… | |
44 s = j->starts; | |
45 do{ | |
46 /* fast check for first char */ | |
47 if(checkstart) { | |
48 switch(j->starttype) { | |
49 case RUNE: | |
50 p = utfrune(s, j->startchar); | |
51 if(p == 0 || s == j->eol) | |
52 return match; | |
53 s = p; | |
54 break; | |
55 case BOL: | |
56 if(s == bol) | |
57 break; | |
58 p = utfrune(s, '\n'); | |
59 if(p == 0 || s == j->eol) | |
60 return match; | |
61 s = p+1; | |
62 break; | |
63 } | |
64 } | |
65 r = *(uchar*)s; | |
66 if(r < Runeself) | |
67 n = 1; | |
68 else | |
69 n = chartorune(&r, s); | |
70 | |
71 /* switch run lists */ | |
72 tl = j->relist[flag]; | |
73 tle = j->reliste[flag]; | |
74 nl = j->relist[flag^=1]; | |
75 nle = j->reliste[flag]; | |
76 nl->inst = 0; | |
77 | |
78 /* Add first instruction to current list */ | |
79 if(match == 0) | |
80 _renewemptythread(tl, progp->startinst, ms, s); | |
81 | |
82 /* Execute machine until current list is empty */ | |
83 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ | |
84 for(inst = tlp->inst; ; inst = inst->u2.next){ | |
85 switch(inst->type){ | |
86 case RUNE: /* regular character */ | |
87 if(inst->u1.r == r){ | |
88 if(_renewthread(nl, inst… | |
89 return -1; | |
90 } | |
91 break; | |
92 case LBRA: | |
93 tlp->se.m[inst->u1.subid].s.sp =… | |
94 continue; | |
95 case RBRA: | |
96 tlp->se.m[inst->u1.subid].e.ep =… | |
97 continue; | |
98 case ANY: | |
99 if(r != '\n') | |
100 if(_renewthread(nl, inst… | |
101 return -1; | |
102 break; | |
103 case ANYNL: | |
104 if(_renewthread(nl, inst->u2.nex… | |
105 return -1; | |
106 break; | |
107 case BOL: | |
108 if(s == bol || *(s-1) == '\n') | |
109 continue; | |
110 break; | |
111 case EOL: | |
112 if(s == j->eol || r == 0 || r ==… | |
113 continue; | |
114 break; | |
115 case CCLASS: | |
116 ep = inst->u1.cp->end; | |
117 for(rp = inst->u1.cp->spans; rp … | |
118 if(r >= rp[0] && r <= rp… | |
119 if(_renewthread(… | |
120 return -… | |
121 break; | |
122 } | |
123 break; | |
124 case NCCLASS: | |
125 ep = inst->u1.cp->end; | |
126 for(rp = inst->u1.cp->spans; rp … | |
127 if(r >= rp[0] && r <= rp… | |
128 break; | |
129 if(rp == ep) | |
130 if(_renewthread(nl, inst… | |
131 return -1; | |
132 break; | |
133 case OR: | |
134 /* evaluate right choice later */ | |
135 if(_renewthread(tlp, inst->u1.ri… | |
136 return -1; | |
137 /* efficiency: advance and re-ev… | |
138 continue; | |
139 case END: /* Match! */ | |
140 match = 1; | |
141 tlp->se.m[0].e.ep = s; | |
142 if(mp != 0) | |
143 _renewmatch(mp, ms, &tlp… | |
144 break; | |
145 } | |
146 break; | |
147 } | |
148 } | |
149 if(s == j->eol) | |
150 break; | |
151 checkstart = j->starttype && nl->inst==0; | |
152 s += n; | |
153 }while(r); | |
154 return match; | |
155 } | |
156 | |
157 static int | |
158 regexec2(Reprog *progp, /* program to run */ | |
159 char *bol, /* string to run machine on */ | |
160 Resub *mp, /* subexpression elements */ | |
161 int ms, /* number of elements at mp */ | |
162 Reljunk *j | |
163 ) | |
164 { | |
165 int rv; | |
166 Relist *relist0, *relist1; | |
167 | |
168 /* mark space */ | |
169 relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); | |
170 if(relist0 == nil) | |
171 return -1; | |
172 relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); | |
173 if(relist1 == nil){ | |
174 free(relist1); | |
175 return -1; | |
176 } | |
177 j->relist[0] = relist0; | |
178 j->relist[1] = relist1; | |
179 j->reliste[0] = relist0 + BIGLISTSIZE - 2; | |
180 j->reliste[1] = relist1 + BIGLISTSIZE - 2; | |
181 | |
182 rv = regexec1(progp, bol, mp, ms, j); | |
183 free(relist0); | |
184 free(relist1); | |
185 return rv; | |
186 } | |
187 | |
188 extern int | |
189 regexec(Reprog *progp, /* program to run */ | |
190 char *bol, /* string to run machine on */ | |
191 Resub *mp, /* subexpression elements */ | |
192 int ms) /* number of elements at mp */ | |
193 { | |
194 Reljunk j; | |
195 Relist relist0[LISTSIZE], relist1[LISTSIZE]; | |
196 int rv; | |
197 | |
198 /* | |
199 * use user-specified starting/ending location if specified | |
200 */ | |
201 j.starts = bol; | |
202 j.eol = 0; | |
203 if(mp && ms>0){ | |
204 if(mp->s.sp) | |
205 j.starts = mp->s.sp; | |
206 if(mp->e.ep) | |
207 j.eol = mp->e.ep; | |
208 } | |
209 j.starttype = 0; | |
210 j.startchar = 0; | |
211 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Ru… | |
212 j.starttype = RUNE; | |
213 j.startchar = progp->startinst->u1.r; | |
214 } | |
215 if(progp->startinst->type == BOL) | |
216 j.starttype = BOL; | |
217 | |
218 /* mark space */ | |
219 j.relist[0] = relist0; | |
220 j.relist[1] = relist1; | |
221 j.reliste[0] = relist0 + nelem(relist0) - 2; | |
222 j.reliste[1] = relist1 + nelem(relist1) - 2; | |
223 | |
224 rv = regexec1(progp, bol, mp, ms, &j); | |
225 if(rv >= 0) | |
226 return rv; | |
227 rv = regexec2(progp, bol, mp, ms, &j); | |
228 if(rv >= 0) | |
229 return rv; | |
230 return -1; | |
231 } |