| lex.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
| git clone git://git.suckless.org/9base | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| lex.c (6203B) | |
| --- | |
| 1 #include "rc.h" | |
| 2 #include "exec.h" | |
| 3 #include "io.h" | |
| 4 #include "getflags.h" | |
| 5 #include "fns.h" | |
| 6 int getnext(void); | |
| 7 | |
| 8 int | |
| 9 wordchr(int c) | |
| 10 { | |
| 11 return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; | |
| 12 } | |
| 13 | |
| 14 int | |
| 15 idchr(int c) | |
| 16 { | |
| 17 /* | |
| 18 * Formerly: | |
| 19 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='… | |
| 20 * || c=='_' || c=='*'; | |
| 21 */ | |
| 22 return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); | |
| 23 } | |
| 24 int future = EOF; | |
| 25 int doprompt = 1; | |
| 26 int inquote; | |
| 27 int incomm; | |
| 28 /* | |
| 29 * Look ahead in the input stream | |
| 30 */ | |
| 31 | |
| 32 int | |
| 33 nextc(void) | |
| 34 { | |
| 35 if(future==EOF) | |
| 36 future = getnext(); | |
| 37 return future; | |
| 38 } | |
| 39 /* | |
| 40 * Consume the lookahead character. | |
| 41 */ | |
| 42 | |
| 43 int | |
| 44 advance(void) | |
| 45 { | |
| 46 int c = nextc(); | |
| 47 lastc = future; | |
| 48 future = EOF; | |
| 49 return c; | |
| 50 } | |
| 51 /* | |
| 52 * read a character from the input stream | |
| 53 */ | |
| 54 | |
| 55 int | |
| 56 getnext(void) | |
| 57 { | |
| 58 int c; | |
| 59 static int peekc = EOF; | |
| 60 if(peekc!=EOF){ | |
| 61 c = peekc; | |
| 62 peekc = EOF; | |
| 63 return c; | |
| 64 } | |
| 65 if(runq->eof) | |
| 66 return EOF; | |
| 67 if(doprompt) | |
| 68 pprompt(); | |
| 69 c = rchr(runq->cmdfd); | |
| 70 if(!inquote && c=='\\'){ | |
| 71 c = rchr(runq->cmdfd); | |
| 72 if(c=='\n' && !incomm){ /* don't continue… | |
| 73 doprompt = 1; | |
| 74 c=' '; | |
| 75 } | |
| 76 else{ | |
| 77 peekc = c; | |
| 78 c='\\'; | |
| 79 } | |
| 80 } | |
| 81 doprompt = doprompt || c=='\n' || c==EOF; | |
| 82 if(c==EOF) | |
| 83 runq->eof++; | |
| 84 else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c); | |
| 85 return c; | |
| 86 } | |
| 87 | |
| 88 void | |
| 89 pprompt(void) | |
| 90 { | |
| 91 var *prompt; | |
| 92 if(runq->iflag){ | |
| 93 pstr(err, promptstr); | |
| 94 flush(err); | |
| 95 prompt = vlook("prompt"); | |
| 96 if(prompt->val && prompt->val->next) | |
| 97 promptstr = prompt->val->next->word; | |
| 98 else | |
| 99 promptstr="\t"; | |
| 100 } | |
| 101 runq->lineno++; | |
| 102 doprompt = 0; | |
| 103 } | |
| 104 | |
| 105 void | |
| 106 skipwhite(void) | |
| 107 { | |
| 108 int c; | |
| 109 for(;;){ | |
| 110 c = nextc(); | |
| 111 /* Why did this used to be if(!inquote && c=='#') ?? */ | |
| 112 if(c=='#'){ | |
| 113 incomm = 1; | |
| 114 for(;;){ | |
| 115 c = nextc(); | |
| 116 if(c=='\n' || c==EOF) { | |
| 117 incomm = 0; | |
| 118 break; | |
| 119 } | |
| 120 advance(); | |
| 121 } | |
| 122 } | |
| 123 if(c==' ' || c=='\t') | |
| 124 advance(); | |
| 125 else return; | |
| 126 } | |
| 127 } | |
| 128 | |
| 129 void | |
| 130 skipnl(void) | |
| 131 { | |
| 132 int c; | |
| 133 for(;;){ | |
| 134 skipwhite(); | |
| 135 c = nextc(); | |
| 136 if(c!='\n') | |
| 137 return; | |
| 138 advance(); | |
| 139 } | |
| 140 } | |
| 141 | |
| 142 int | |
| 143 nextis(int c) | |
| 144 { | |
| 145 if(nextc()==c){ | |
| 146 advance(); | |
| 147 return 1; | |
| 148 } | |
| 149 return 0; | |
| 150 } | |
| 151 | |
| 152 char* | |
| 153 addtok(char *p, int val) | |
| 154 { | |
| 155 if(p==0) | |
| 156 return 0; | |
| 157 if(p==&tok[NTOK-1]){ | |
| 158 *p = 0; | |
| 159 yyerror("token buffer too short"); | |
| 160 return 0; | |
| 161 } | |
| 162 *p++=val; | |
| 163 return p; | |
| 164 } | |
| 165 | |
| 166 char* | |
| 167 addutf(char *p, int c) | |
| 168 { | |
| 169 p = addtok(p, c); | |
| 170 if(twobyte(c)) /* 2-byte escape */ | |
| 171 return addtok(p, advance()); | |
| 172 if(threebyte(c)){ /* 3-byte escape */ | |
| 173 p = addtok(p, advance()); | |
| 174 return addtok(p, advance()); | |
| 175 } | |
| 176 return p; | |
| 177 } | |
| 178 int lastdol; /* was the last token read '$' or '$#' or '"'? */ | |
| 179 int lastword; /* was the last token read a word or compound word … | |
| 180 | |
| 181 int | |
| 182 yylex(void) | |
| 183 { | |
| 184 int c, d = nextc(); | |
| 185 char *w = tok; | |
| 186 struct tree *t; | |
| 187 yylval.tree = 0; | |
| 188 /* | |
| 189 * Embarassing sneakiness: if the last token read was a quoted … | |
| 190 * WORD then we alter the meaning of what follows. If the next … | |
| 191 * is `(', we return SUB (a subscript paren) and consume the `('… | |
| 192 * if the next character is the first character of a simple or c… | |
| 193 * we insert a `^' before it. | |
| 194 */ | |
| 195 if(lastword){ | |
| 196 lastword = 0; | |
| 197 if(d=='('){ | |
| 198 advance(); | |
| 199 strcpy(tok, "( [SUB]"); | |
| 200 return SUB; | |
| 201 } | |
| 202 if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ | |
| 203 strcpy(tok, "^"); | |
| 204 return '^'; | |
| 205 } | |
| 206 } | |
| 207 inquote = 0; | |
| 208 skipwhite(); | |
| 209 switch(c = advance()){ | |
| 210 case EOF: | |
| 211 lastdol = 0; | |
| 212 strcpy(tok, "EOF"); | |
| 213 return EOF; | |
| 214 case '$': | |
| 215 lastdol = 1; | |
| 216 if(nextis('#')){ | |
| 217 strcpy(tok, "$#"); | |
| 218 return COUNT; | |
| 219 } | |
| 220 if(nextis('"')){ | |
| 221 strcpy(tok, "$\""); | |
| 222 return '"'; | |
| 223 } | |
| 224 strcpy(tok, "$"); | |
| 225 return '$'; | |
| 226 case '&': | |
| 227 lastdol = 0; | |
| 228 if(nextis('&')){ | |
| 229 skipnl(); | |
| 230 strcpy(tok, "&&"); | |
| 231 return ANDAND; | |
| 232 } | |
| 233 strcpy(tok, "&"); | |
| 234 return '&'; | |
| 235 case '|': | |
| 236 lastdol = 0; | |
| 237 if(nextis(c)){ | |
| 238 skipnl(); | |
| 239 strcpy(tok, "||"); | |
| 240 return OROR; | |
| 241 } | |
| 242 case '<': | |
| 243 case '>': | |
| 244 lastdol = 0; | |
| 245 /* | |
| 246 * funny redirection tokens: | |
| 247 * redir: arrow | arrow '[' fd ']' | |
| 248 * arrow: '<' | '<<' | '>' | '>>' | '|' | |
| 249 * fd: digit | digit '=' | digit '=' digit | |
| 250 * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|… | |
| 251 * some possibilities are nonsensical and get a message. | |
| 252 */ | |
| 253 *w++=c; | |
| 254 t = newtree(); | |
| 255 switch(c){ | |
| 256 case '|': | |
| 257 t->type = PIPE; | |
| 258 t->fd0 = 1; | |
| 259 t->fd1 = 0; | |
| 260 break; | |
| 261 case '>': | |
| 262 t->type = REDIR; | |
| 263 if(nextis(c)){ | |
| 264 t->rtype = APPEND; | |
| 265 *w++=c; | |
| 266 } | |
| 267 else t->rtype = WRITE; | |
| 268 t->fd0 = 1; | |
| 269 break; | |
| 270 case '<': | |
| 271 t->type = REDIR; | |
| 272 if(nextis(c)){ | |
| 273 t->rtype = HERE; | |
| 274 *w++=c; | |
| 275 } else if (nextis('>')){ | |
| 276 t->rtype = RDWR; | |
| 277 *w++=c; | |
| 278 } else t->rtype = READ; | |
| 279 t->fd0 = 0; | |
| 280 break; | |
| 281 } | |
| 282 if(nextis('[')){ | |
| 283 *w++='['; | |
| 284 c = advance(); | |
| 285 *w++=c; | |
| 286 if(c<'0' || '9'<c){ | |
| 287 RedirErr: | |
| 288 *w = 0; | |
| 289 yyerror(t->type==PIPE?"pipe syntax" | |
| 290 :"redirection syntax"); | |
| 291 return EOF; | |
| 292 } | |
| 293 t->fd0 = 0; | |
| 294 do{ | |
| 295 t->fd0 = t->fd0*10+c-'0'; | |
| 296 *w++=c; | |
| 297 c = advance(); | |
| 298 }while('0'<=c && c<='9'); | |
| 299 if(c=='='){ | |
| 300 *w++='='; | |
| 301 if(t->type==REDIR) | |
| 302 t->type = DUP; | |
| 303 c = advance(); | |
| 304 if('0'<=c && c<='9'){ | |
| 305 t->rtype = DUPFD; | |
| 306 t->fd1 = t->fd0; | |
| 307 t->fd0 = 0; | |
| 308 do{ | |
| 309 t->fd0 = t->fd0*10+c-'0'; | |
| 310 *w++=c; | |
| 311 c = advance(); | |
| 312 }while('0'<=c && c<='9'); | |
| 313 } | |
| 314 else{ | |
| 315 if(t->type==PIPE) | |
| 316 goto RedirErr; | |
| 317 t->rtype = CLOSE; | |
| 318 } | |
| 319 } | |
| 320 if(c!=']' | |
| 321 || t->type==DUP && (t->rtype==HERE || t->rtype==… | |
| 322 goto RedirErr; | |
| 323 *w++=']'; | |
| 324 } | |
| 325 *w='\0'; | |
| 326 yylval.tree = t; | |
| 327 if(t->type==PIPE) | |
| 328 skipnl(); | |
| 329 return t->type; | |
| 330 case '\'': | |
| 331 lastdol = 0; | |
| 332 lastword = 1; | |
| 333 inquote = 1; | |
| 334 for(;;){ | |
| 335 c = advance(); | |
| 336 if(c==EOF) | |
| 337 break; | |
| 338 if(c=='\''){ | |
| 339 if(nextc()!='\'') | |
| 340 break; | |
| 341 advance(); | |
| 342 } | |
| 343 w = addutf(w, c); | |
| 344 } | |
| 345 if(w!=0) | |
| 346 *w='\0'; | |
| 347 t = token(tok, WORD); | |
| 348 t->quoted = 1; | |
| 349 yylval.tree = t; | |
| 350 return t->type; | |
| 351 } | |
| 352 if(!wordchr(c)){ | |
| 353 lastdol = 0; | |
| 354 tok[0] = c; | |
| 355 tok[1]='\0'; | |
| 356 return c; | |
| 357 } | |
| 358 for(;;){ | |
| 359 /* next line should have (char)c==GLOB, but ken's compil… | |
| 360 if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB) | |
| 361 w = addtok(w, GLOB); | |
| 362 w = addutf(w, c); | |
| 363 c = nextc(); | |
| 364 if(lastdol?!idchr(c):!wordchr(c)) break; | |
| 365 advance(); | |
| 366 } | |
| 367 | |
| 368 lastword = 1; | |
| 369 lastdol = 0; | |
| 370 if(w!=0) | |
| 371 *w='\0'; | |
| 372 t = klook(tok); | |
| 373 if(t->type!=WORD) | |
| 374 lastword = 0; | |
| 375 t->quoted = 0; | |
| 376 yylval.tree = t; | |
| 377 return t->type; | |
| 378 } |