comp.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
comp.c (4165B) | |
--- | |
1 #include "grep.h" | |
2 | |
3 /* | |
4 * incremental compiler. | |
5 * add the branch c to the | |
6 * state s. | |
7 */ | |
8 void | |
9 increment(State *s, int c) | |
10 { | |
11 int i; | |
12 State *t, **tt; | |
13 Re *re1, *re2; | |
14 | |
15 nfollow = 0; | |
16 gen++; | |
17 matched = 0; | |
18 for(i=0; i<s->count; i++) | |
19 fol1(s->re[i], c); | |
20 qsort(follow, nfollow, sizeof(*follow), fcmp); | |
21 for(tt=&state0; t = *tt;) { | |
22 if(t->count > nfollow) { | |
23 tt = &t->linkleft; | |
24 goto cont; | |
25 } | |
26 if(t->count < nfollow) { | |
27 tt = &t->linkright; | |
28 goto cont; | |
29 } | |
30 for(i=0; i<nfollow; i++) { | |
31 re1 = t->re[i]; | |
32 re2 = follow[i]; | |
33 if(re1 > re2) { | |
34 tt = &t->linkleft; | |
35 goto cont; | |
36 } | |
37 if(re1 < re2) { | |
38 tt = &t->linkright; | |
39 goto cont; | |
40 } | |
41 } | |
42 if(!!matched && !t->match) { | |
43 tt = &t->linkleft; | |
44 goto cont; | |
45 } | |
46 if(!matched && !!t->match) { | |
47 tt = &t->linkright; | |
48 goto cont; | |
49 } | |
50 s->next[c] = t; | |
51 return; | |
52 cont:; | |
53 } | |
54 | |
55 t = sal(nfollow); | |
56 *tt = t; | |
57 for(i=0; i<nfollow; i++) { | |
58 re1 = follow[i]; | |
59 t->re[i] = re1; | |
60 } | |
61 s->next[c] = t; | |
62 t->match = matched; | |
63 } | |
64 | |
65 int | |
66 fcmp(const void *va, const void *vb) | |
67 { | |
68 Re **aa, **bb; | |
69 Re *a, *b; | |
70 | |
71 aa = (Re**)va; | |
72 bb = (Re**)vb; | |
73 a = *aa; | |
74 b = *bb; | |
75 if(a > b) | |
76 return 1; | |
77 if(a < b) | |
78 return -1; | |
79 return 0; | |
80 } | |
81 | |
82 void | |
83 fol1(Re *r, int c) | |
84 { | |
85 Re *r1; | |
86 | |
87 loop: | |
88 if(r->gen == gen) | |
89 return; | |
90 if(nfollow >= maxfollow) | |
91 error("nfollow"); | |
92 r->gen = gen; | |
93 switch(r->type) { | |
94 default: | |
95 error("fol1"); | |
96 | |
97 case Tcase: | |
98 if(c >= 0 && c < 256) | |
99 if(r1 = r->u.cases[c]) | |
100 follow[nfollow++] = r1; | |
101 if(r = r->next) | |
102 goto loop; | |
103 break; | |
104 | |
105 case Talt: | |
106 case Tor: | |
107 fol1(r->u.alt, c); | |
108 r = r->next; | |
109 goto loop; | |
110 | |
111 case Tbegin: | |
112 if(c == '\n' || c == Cbegin) | |
113 follow[nfollow++] = r->next; | |
114 break; | |
115 | |
116 case Tend: | |
117 if(c == '\n') | |
118 matched = 1; | |
119 break; | |
120 | |
121 case Tclass: | |
122 if(c >= r->u.x.lo && c <= r->u.x.hi) | |
123 follow[nfollow++] = r->next; | |
124 break; | |
125 } | |
126 } | |
127 | |
128 Rune tab1[] = | |
129 { | |
130 0x007f, | |
131 0x07ff | |
132 }; | |
133 Rune tab2[] = | |
134 { | |
135 0x003f, | |
136 0x0fff | |
137 }; | |
138 | |
139 Re2 | |
140 rclass(Rune p0, Rune p1) | |
141 { | |
142 char xc0[6], xc1[6]; | |
143 int i, n, m; | |
144 Re2 x; | |
145 | |
146 if(p0 > p1) | |
147 return re2char(0xff, 0xff); /* no match */ | |
148 | |
149 /* | |
150 * bust range into same length | |
151 * character sequences | |
152 */ | |
153 for(i=0; i<nelem(tab1); i++) { | |
154 m = tab1[i]; | |
155 if(p0 <= m && p1 > m) | |
156 return re2or(rclass(p0, m), rclass(m+1, p1)); | |
157 } | |
158 | |
159 /* | |
160 * bust range into part of a single page | |
161 * or into full pages | |
162 */ | |
163 for(i=0; i<nelem(tab2); i++) { | |
164 m = tab2[i]; | |
165 if((p0 & ~m) != (p1 & ~m)) { | |
166 if((p0 & m) != 0) | |
167 return re2or(rclass(p0, p0|m), rclass((p… | |
168 if((p1 & m) != m) | |
169 return re2or(rclass(p0, (p1&~m)-1), rcla… | |
170 } | |
171 } | |
172 | |
173 n = runetochar(xc0, &p0); | |
174 i = runetochar(xc1, &p1); | |
175 if(i != n) | |
176 error("length"); | |
177 | |
178 x = re2char(xc0[0], xc1[0]); | |
179 for(i=1; i<n; i++) | |
180 x = re2cat(x, re2char(xc0[i], xc1[i])); | |
181 return x; | |
182 } | |
183 | |
184 int | |
185 pcmp(const void *va, const void *vb) | |
186 { | |
187 int n; | |
188 Rune *a, *b; | |
189 | |
190 a = (Rune*)va; | |
191 b = (Rune*)vb; | |
192 | |
193 n = a[0] - b[0]; | |
194 if(n) | |
195 return n; | |
196 return a[1] - b[1]; | |
197 } | |
198 | |
199 /* | |
200 * convert character chass into | |
201 * run-pair ranges of matches. | |
202 * this is 10646/utf specific and | |
203 * needs to be changed for some | |
204 * other input character set. | |
205 * this is the key to a fast | |
206 * regular search of characters | |
207 * by looking at sequential bytes. | |
208 */ | |
209 Re2 | |
210 re2class(char *s) | |
211 { | |
212 Rune pairs[200], *p, *q, ov; | |
213 int nc; | |
214 Re2 x; | |
215 | |
216 nc = 0; | |
217 if(*s == '^') { | |
218 nc = 1; | |
219 s++; | |
220 } | |
221 | |
222 p = pairs; | |
223 s += chartorune(p, s); | |
224 for(;;) { | |
225 if(*p == '\\') | |
226 s += chartorune(p, s); | |
227 if(*p == 0) | |
228 break; | |
229 p[1] = *p; | |
230 p += 2; | |
231 s += chartorune(p, s); | |
232 if(*p != '-') | |
233 continue; | |
234 s += chartorune(p, s); | |
235 if(*p == '\\') | |
236 s += chartorune(p, s); | |
237 if(*p == 0) | |
238 break; | |
239 p[-1] = *p; | |
240 s += chartorune(p, s); | |
241 } | |
242 *p = 0; | |
243 qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp); | |
244 | |
245 q = pairs; | |
246 for(p=pairs+2; *p; p+=2) { | |
247 if(p[0] > p[1]) | |
248 continue; | |
249 if(p[0] > q[1] || p[1] < q[0]) { | |
250 q[2] = p[0]; | |
251 q[3] = p[1]; | |
252 q += 2; | |
253 continue; | |
254 } | |
255 if(p[0] < q[0]) | |
256 q[0] = p[0]; | |
257 if(p[1] > q[1]) | |
258 q[1] = p[1]; | |
259 } | |
260 q[2] = 0; | |
261 | |
262 p = pairs; | |
263 if(nc) { | |
264 x = rclass(0, p[0]-1); | |
265 ov = p[1]+1; | |
266 for(p+=2; *p; p+=2) { | |
267 x = re2or(x, rclass(ov, p[0]-1)); | |
268 ov = p[1]+1; | |
269 } | |
270 x = re2or(x, rclass(ov, 0xffff)); | |
271 } else { | |
272 x = rclass(p[0], p[1]); | |
273 for(p+=2; *p; p+=2) | |
274 x = re2or(x, rclass(p[0], p[1])); | |
275 } | |
276 return x; | |
277 } |