lex.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
lex.c (6203B) | |
--- | |
1 #include "rc.h" | |
2 #include "exec.h" | |
3 #include "io.h" | |
4 #include "getflags.h" | |
5 #include "fns.h" | |
6 int getnext(void); | |
7 | |
8 int | |
9 wordchr(int c) | |
10 { | |
11 return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; | |
12 } | |
13 | |
14 int | |
15 idchr(int c) | |
16 { | |
17 /* | |
18 * Formerly: | |
19 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='… | |
20 * || c=='_' || c=='*'; | |
21 */ | |
22 return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); | |
23 } | |
24 int future = EOF; | |
25 int doprompt = 1; | |
26 int inquote; | |
27 int incomm; | |
28 /* | |
29 * Look ahead in the input stream | |
30 */ | |
31 | |
32 int | |
33 nextc(void) | |
34 { | |
35 if(future==EOF) | |
36 future = getnext(); | |
37 return future; | |
38 } | |
39 /* | |
40 * Consume the lookahead character. | |
41 */ | |
42 | |
43 int | |
44 advance(void) | |
45 { | |
46 int c = nextc(); | |
47 lastc = future; | |
48 future = EOF; | |
49 return c; | |
50 } | |
51 /* | |
52 * read a character from the input stream | |
53 */ | |
54 | |
55 int | |
56 getnext(void) | |
57 { | |
58 int c; | |
59 static int peekc = EOF; | |
60 if(peekc!=EOF){ | |
61 c = peekc; | |
62 peekc = EOF; | |
63 return c; | |
64 } | |
65 if(runq->eof) | |
66 return EOF; | |
67 if(doprompt) | |
68 pprompt(); | |
69 c = rchr(runq->cmdfd); | |
70 if(!inquote && c=='\\'){ | |
71 c = rchr(runq->cmdfd); | |
72 if(c=='\n' && !incomm){ /* don't continue… | |
73 doprompt = 1; | |
74 c=' '; | |
75 } | |
76 else{ | |
77 peekc = c; | |
78 c='\\'; | |
79 } | |
80 } | |
81 doprompt = doprompt || c=='\n' || c==EOF; | |
82 if(c==EOF) | |
83 runq->eof++; | |
84 else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c); | |
85 return c; | |
86 } | |
87 | |
88 void | |
89 pprompt(void) | |
90 { | |
91 var *prompt; | |
92 if(runq->iflag){ | |
93 pstr(err, promptstr); | |
94 flush(err); | |
95 prompt = vlook("prompt"); | |
96 if(prompt->val && prompt->val->next) | |
97 promptstr = prompt->val->next->word; | |
98 else | |
99 promptstr="\t"; | |
100 } | |
101 runq->lineno++; | |
102 doprompt = 0; | |
103 } | |
104 | |
105 void | |
106 skipwhite(void) | |
107 { | |
108 int c; | |
109 for(;;){ | |
110 c = nextc(); | |
111 /* Why did this used to be if(!inquote && c=='#') ?? */ | |
112 if(c=='#'){ | |
113 incomm = 1; | |
114 for(;;){ | |
115 c = nextc(); | |
116 if(c=='\n' || c==EOF) { | |
117 incomm = 0; | |
118 break; | |
119 } | |
120 advance(); | |
121 } | |
122 } | |
123 if(c==' ' || c=='\t') | |
124 advance(); | |
125 else return; | |
126 } | |
127 } | |
128 | |
129 void | |
130 skipnl(void) | |
131 { | |
132 int c; | |
133 for(;;){ | |
134 skipwhite(); | |
135 c = nextc(); | |
136 if(c!='\n') | |
137 return; | |
138 advance(); | |
139 } | |
140 } | |
141 | |
142 int | |
143 nextis(int c) | |
144 { | |
145 if(nextc()==c){ | |
146 advance(); | |
147 return 1; | |
148 } | |
149 return 0; | |
150 } | |
151 | |
152 char* | |
153 addtok(char *p, int val) | |
154 { | |
155 if(p==0) | |
156 return 0; | |
157 if(p==&tok[NTOK-1]){ | |
158 *p = 0; | |
159 yyerror("token buffer too short"); | |
160 return 0; | |
161 } | |
162 *p++=val; | |
163 return p; | |
164 } | |
165 | |
166 char* | |
167 addutf(char *p, int c) | |
168 { | |
169 p = addtok(p, c); | |
170 if(twobyte(c)) /* 2-byte escape */ | |
171 return addtok(p, advance()); | |
172 if(threebyte(c)){ /* 3-byte escape */ | |
173 p = addtok(p, advance()); | |
174 return addtok(p, advance()); | |
175 } | |
176 return p; | |
177 } | |
178 int lastdol; /* was the last token read '$' or '$#' or '"'? */ | |
179 int lastword; /* was the last token read a word or compound word … | |
180 | |
181 int | |
182 yylex(void) | |
183 { | |
184 int c, d = nextc(); | |
185 char *w = tok; | |
186 struct tree *t; | |
187 yylval.tree = 0; | |
188 /* | |
189 * Embarassing sneakiness: if the last token read was a quoted … | |
190 * WORD then we alter the meaning of what follows. If the next … | |
191 * is `(', we return SUB (a subscript paren) and consume the `('… | |
192 * if the next character is the first character of a simple or c… | |
193 * we insert a `^' before it. | |
194 */ | |
195 if(lastword){ | |
196 lastword = 0; | |
197 if(d=='('){ | |
198 advance(); | |
199 strcpy(tok, "( [SUB]"); | |
200 return SUB; | |
201 } | |
202 if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ | |
203 strcpy(tok, "^"); | |
204 return '^'; | |
205 } | |
206 } | |
207 inquote = 0; | |
208 skipwhite(); | |
209 switch(c = advance()){ | |
210 case EOF: | |
211 lastdol = 0; | |
212 strcpy(tok, "EOF"); | |
213 return EOF; | |
214 case '$': | |
215 lastdol = 1; | |
216 if(nextis('#')){ | |
217 strcpy(tok, "$#"); | |
218 return COUNT; | |
219 } | |
220 if(nextis('"')){ | |
221 strcpy(tok, "$\""); | |
222 return '"'; | |
223 } | |
224 strcpy(tok, "$"); | |
225 return '$'; | |
226 case '&': | |
227 lastdol = 0; | |
228 if(nextis('&')){ | |
229 skipnl(); | |
230 strcpy(tok, "&&"); | |
231 return ANDAND; | |
232 } | |
233 strcpy(tok, "&"); | |
234 return '&'; | |
235 case '|': | |
236 lastdol = 0; | |
237 if(nextis(c)){ | |
238 skipnl(); | |
239 strcpy(tok, "||"); | |
240 return OROR; | |
241 } | |
242 case '<': | |
243 case '>': | |
244 lastdol = 0; | |
245 /* | |
246 * funny redirection tokens: | |
247 * redir: arrow | arrow '[' fd ']' | |
248 * arrow: '<' | '<<' | '>' | '>>' | '|' | |
249 * fd: digit | digit '=' | digit '=' digit | |
250 * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|… | |
251 * some possibilities are nonsensical and get a message. | |
252 */ | |
253 *w++=c; | |
254 t = newtree(); | |
255 switch(c){ | |
256 case '|': | |
257 t->type = PIPE; | |
258 t->fd0 = 1; | |
259 t->fd1 = 0; | |
260 break; | |
261 case '>': | |
262 t->type = REDIR; | |
263 if(nextis(c)){ | |
264 t->rtype = APPEND; | |
265 *w++=c; | |
266 } | |
267 else t->rtype = WRITE; | |
268 t->fd0 = 1; | |
269 break; | |
270 case '<': | |
271 t->type = REDIR; | |
272 if(nextis(c)){ | |
273 t->rtype = HERE; | |
274 *w++=c; | |
275 } else if (nextis('>')){ | |
276 t->rtype = RDWR; | |
277 *w++=c; | |
278 } else t->rtype = READ; | |
279 t->fd0 = 0; | |
280 break; | |
281 } | |
282 if(nextis('[')){ | |
283 *w++='['; | |
284 c = advance(); | |
285 *w++=c; | |
286 if(c<'0' || '9'<c){ | |
287 RedirErr: | |
288 *w = 0; | |
289 yyerror(t->type==PIPE?"pipe syntax" | |
290 :"redirection syntax"); | |
291 return EOF; | |
292 } | |
293 t->fd0 = 0; | |
294 do{ | |
295 t->fd0 = t->fd0*10+c-'0'; | |
296 *w++=c; | |
297 c = advance(); | |
298 }while('0'<=c && c<='9'); | |
299 if(c=='='){ | |
300 *w++='='; | |
301 if(t->type==REDIR) | |
302 t->type = DUP; | |
303 c = advance(); | |
304 if('0'<=c && c<='9'){ | |
305 t->rtype = DUPFD; | |
306 t->fd1 = t->fd0; | |
307 t->fd0 = 0; | |
308 do{ | |
309 t->fd0 = t->fd0*10+c-'0'; | |
310 *w++=c; | |
311 c = advance(); | |
312 }while('0'<=c && c<='9'); | |
313 } | |
314 else{ | |
315 if(t->type==PIPE) | |
316 goto RedirErr; | |
317 t->rtype = CLOSE; | |
318 } | |
319 } | |
320 if(c!=']' | |
321 || t->type==DUP && (t->rtype==HERE || t->rtype==… | |
322 goto RedirErr; | |
323 *w++=']'; | |
324 } | |
325 *w='\0'; | |
326 yylval.tree = t; | |
327 if(t->type==PIPE) | |
328 skipnl(); | |
329 return t->type; | |
330 case '\'': | |
331 lastdol = 0; | |
332 lastword = 1; | |
333 inquote = 1; | |
334 for(;;){ | |
335 c = advance(); | |
336 if(c==EOF) | |
337 break; | |
338 if(c=='\''){ | |
339 if(nextc()!='\'') | |
340 break; | |
341 advance(); | |
342 } | |
343 w = addutf(w, c); | |
344 } | |
345 if(w!=0) | |
346 *w='\0'; | |
347 t = token(tok, WORD); | |
348 t->quoted = 1; | |
349 yylval.tree = t; | |
350 return t->type; | |
351 } | |
352 if(!wordchr(c)){ | |
353 lastdol = 0; | |
354 tok[0] = c; | |
355 tok[1]='\0'; | |
356 return c; | |
357 } | |
358 for(;;){ | |
359 /* next line should have (char)c==GLOB, but ken's compil… | |
360 if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB) | |
361 w = addtok(w, GLOB); | |
362 w = addutf(w, c); | |
363 c = nextc(); | |
364 if(lastdol?!idchr(c):!wordchr(c)) break; | |
365 advance(); | |
366 } | |
367 | |
368 lastword = 1; | |
369 lastdol = 0; | |
370 if(w!=0) | |
371 *w='\0'; | |
372 t = klook(tok); | |
373 if(t->type!=WORD) | |
374 lastword = 0; | |
375 t->quoted = 0; | |
376 yylval.tree = t; | |
377 return t->type; | |
378 } |