GopherProxy

	lex.c - 9base - revived minimalist port of Plan 9 userland to Unix
	git clone git://git.suckless.org/9base
	Log
	Files
	Refs
	README
	LICENSE
	---
	lex.c (6203B)
	---
	1 #include "rc.h"
	2 #include "exec.h"
	3 #include "io.h"
	4 #include "getflags.h"
	5 #include "fns.h"
	6 int getnext(void);
	7
	8 int
	9 wordchr(int c)
	10 {
	11 return !strchr("\n \t#;&\|^$=`'{}()<>", c) && c!=EOF;
	12 }
	13
	14 int
	15 idchr(int c)
	16 {
	17 /*
	18 * Formerly:
	19 * return 'a'<=c && c<='z' \|\| 'A'<=c && c<='Z' \|\| '0'<=c && c<='…
	20 * \|\| c=='_' \|\| c=='*';
	21 */
	22 return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{\|}~", c);
	23 }
	24 int future = EOF;
	25 int doprompt = 1;
	26 int inquote;
	27 int incomm;
	28 /*
	29 * Look ahead in the input stream
	30 */
	31
	32 int
	33 nextc(void)
	34 {
	35 if(future==EOF)
	36 future = getnext();
	37 return future;
	38 }
	39 /*
	40 * Consume the lookahead character.
	41 */
	42
	43 int
	44 advance(void)
	45 {
	46 int c = nextc();
	47 lastc = future;
	48 future = EOF;
	49 return c;
	50 }
	51 /*
	52 * read a character from the input stream
	53 */
	54
	55 int
	56 getnext(void)
	57 {
	58 int c;
	59 static int peekc = EOF;
	60 if(peekc!=EOF){
	61 c = peekc;
	62 peekc = EOF;
	63 return c;
	64 }
	65 if(runq->eof)
	66 return EOF;
	67 if(doprompt)
	68 pprompt();
	69 c = rchr(runq->cmdfd);
	70 if(!inquote && c=='\\'){
	71 c = rchr(runq->cmdfd);
	72 if(c=='\n' && !incomm){ /* don't continue…
	73 doprompt = 1;
	74 c=' ';
	75 }
	76 else{
	77 peekc = c;
	78 c='\\';
	79 }
	80 }
	81 doprompt = doprompt \|\| c=='\n' \|\| c==EOF;
	82 if(c==EOF)
	83 runq->eof++;
	84 else if(flag['V'] \|\| ndot>=2 && flag['v']) pchr(err, c);
	85 return c;
	86 }
	87
	88 void
	89 pprompt(void)
	90 {
	91 var *prompt;
	92 if(runq->iflag){
	93 pstr(err, promptstr);
	94 flush(err);
	95 prompt = vlook("prompt");
	96 if(prompt->val && prompt->val->next)
	97 promptstr = prompt->val->next->word;
	98 else
	99 promptstr="\t";
	100 }
	101 runq->lineno++;
	102 doprompt = 0;
	103 }
	104
	105 void
	106 skipwhite(void)
	107 {
	108 int c;
	109 for(;;){
	110 c = nextc();
	111 /* Why did this used to be if(!inquote && c=='#') ?? */
	112 if(c=='#'){
	113 incomm = 1;
	114 for(;;){
	115 c = nextc();
	116 if(c=='\n' \|\| c==EOF) {
	117 incomm = 0;
	118 break;
	119 }
	120 advance();
	121 }
	122 }
	123 if(c==' ' \|\| c=='\t')
	124 advance();
	125 else return;
	126 }
	127 }
	128
	129 void
	130 skipnl(void)
	131 {
	132 int c;
	133 for(;;){
	134 skipwhite();
	135 c = nextc();
	136 if(c!='\n')
	137 return;
	138 advance();
	139 }
	140 }
	141
	142 int
	143 nextis(int c)
	144 {
	145 if(nextc()==c){
	146 advance();
	147 return 1;
	148 }
	149 return 0;
	150 }
	151
	152 char*
	153 addtok(char *p, int val)
	154 {
	155 if(p==0)
	156 return 0;
	157 if(p==&tok[NTOK-1]){
	158 *p = 0;
	159 yyerror("token buffer too short");
	160 return 0;
	161 }
	162 *p++=val;
	163 return p;
	164 }
	165
	166 char*
	167 addutf(char *p, int c)
	168 {
	169 p = addtok(p, c);
	170 if(twobyte(c)) /* 2-byte escape */
	171 return addtok(p, advance());
	172 if(threebyte(c)){ /* 3-byte escape */
	173 p = addtok(p, advance());
	174 return addtok(p, advance());
	175 }
	176 return p;
	177 }
	178 int lastdol; /* was the last token read '$' or '$#' or '"'? */
	179 int lastword; /* was the last token read a word or compound word …
	180
	181 int
	182 yylex(void)
	183 {
	184 int c, d = nextc();
	185 char *w = tok;
	186 struct tree *t;
	187 yylval.tree = 0;
	188 /*
	189 * Embarassing sneakiness: if the last token read was a quoted …
	190 * WORD then we alter the meaning of what follows. If the next …
	191 * is `(', we return SUB (a subscript paren) and consume the `('…
	192 * if the next character is the first character of a simple or c…
	193 * we insert a `^' before it.
	194 */
	195 if(lastword){
	196 lastword = 0;
	197 if(d=='('){
	198 advance();
	199 strcpy(tok, "( [SUB]");
	200 return SUB;
	201 }
	202 if(wordchr(d) \|\| d=='\'' \|\| d=='`' \|\| d=='$' \|\| d=='"'){
	203 strcpy(tok, "^");
	204 return '^';
	205 }
	206 }
	207 inquote = 0;
	208 skipwhite();
	209 switch(c = advance()){
	210 case EOF:
	211 lastdol = 0;
	212 strcpy(tok, "EOF");
	213 return EOF;
	214 case '$':
	215 lastdol = 1;
	216 if(nextis('#')){
	217 strcpy(tok, "$#");
	218 return COUNT;
	219 }
	220 if(nextis('"')){
	221 strcpy(tok, "$\"");
	222 return '"';
	223 }
	224 strcpy(tok, "$");
	225 return '$';
	226 case '&':
	227 lastdol = 0;
	228 if(nextis('&')){
	229 skipnl();
	230 strcpy(tok, "&&");
	231 return ANDAND;
	232 }
	233 strcpy(tok, "&");
	234 return '&';
	235 case '\|':
	236 lastdol = 0;
	237 if(nextis(c)){
	238 skipnl();
	239 strcpy(tok, "\|\|");
	240 return OROR;
	241 }
	242 case '<':
	243 case '>':
	244 lastdol = 0;
	245 /*
	246 * funny redirection tokens:
	247 * redir: arrow \| arrow '[' fd ']'
	248 * arrow: '<' \| '<<' \| '>' \| '>>' \| '\|'
	249 * fd: digit \| digit '=' \| digit '=' digit
	250 * digit: '0'\|'1'\|'2'\|'3'\|'4'\|'5'\|'6'\|'7'\|…
	251 * some possibilities are nonsensical and get a message.
	252 */
	253 *w++=c;
	254 t = newtree();
	255 switch(c){
	256 case '\|':
	257 t->type = PIPE;
	258 t->fd0 = 1;
	259 t->fd1 = 0;
	260 break;
	261 case '>':
	262 t->type = REDIR;
	263 if(nextis(c)){
	264 t->rtype = APPEND;
	265 *w++=c;
	266 }
	267 else t->rtype = WRITE;
	268 t->fd0 = 1;
	269 break;
	270 case '<':
	271 t->type = REDIR;
	272 if(nextis(c)){
	273 t->rtype = HERE;
	274 *w++=c;
	275 } else if (nextis('>')){
	276 t->rtype = RDWR;
	277 *w++=c;
	278 } else t->rtype = READ;
	279 t->fd0 = 0;
	280 break;
	281 }
	282 if(nextis('[')){
	283 *w++='[';
	284 c = advance();
	285 *w++=c;
	286 if(c<'0' \|\| '9'<c){
	287 RedirErr:
	288 *w = 0;
	289 yyerror(t->type==PIPE?"pipe syntax"
	290 :"redirection syntax");
	291 return EOF;
	292 }
	293 t->fd0 = 0;
	294 do{
	295 t->fd0 = t->fd0*10+c-'0';
	296 *w++=c;
	297 c = advance();
	298 }while('0'<=c && c<='9');
	299 if(c=='='){
	300 *w++='=';
	301 if(t->type==REDIR)
	302 t->type = DUP;
	303 c = advance();
	304 if('0'<=c && c<='9'){
	305 t->rtype = DUPFD;
	306 t->fd1 = t->fd0;
	307 t->fd0 = 0;
	308 do{
	309 t->fd0 = t->fd0*10+c-'0';
	310 *w++=c;
	311 c = advance();
	312 }while('0'<=c && c<='9');
	313 }
	314 else{
	315 if(t->type==PIPE)
	316 goto RedirErr;
	317 t->rtype = CLOSE;
	318 }
	319 }
	320 if(c!=']'
	321 \|\| t->type==DUP && (t->rtype==HERE \|\| t->rtype==…
	322 goto RedirErr;
	323 *w++=']';
	324 }
	325 *w='\0';
	326 yylval.tree = t;
	327 if(t->type==PIPE)
	328 skipnl();
	329 return t->type;
	330 case '\'':
	331 lastdol = 0;
	332 lastword = 1;
	333 inquote = 1;
	334 for(;;){
	335 c = advance();
	336 if(c==EOF)
	337 break;
	338 if(c=='\''){
	339 if(nextc()!='\'')
	340 break;
	341 advance();
	342 }
	343 w = addutf(w, c);
	344 }
	345 if(w!=0)
	346 *w='\0';
	347 t = token(tok, WORD);
	348 t->quoted = 1;
	349 yylval.tree = t;
	350 return t->type;
	351 }
	352 if(!wordchr(c)){
	353 lastdol = 0;
	354 tok[0] = c;
	355 tok[1]='\0';
	356 return c;
	357 }
	358 for(;;){
	359 /* next line should have (char)c==GLOB, but ken's compil…
	360 if(c=='*' \|\| c=='[' \|\| c=='?' \|\| c==(unsigned char)GLOB)
	361 w = addtok(w, GLOB);
	362 w = addutf(w, c);
	363 c = nextc();
	364 if(lastdol?!idchr(c):!wordchr(c)) break;
	365 advance();
	366 }
	367
	368 lastword = 1;
	369 lastdol = 0;
	370 if(w!=0)
	371 *w='\0';
	372 t = klook(tok);
	373 if(t->type!=WORD)
	374 lastword = 0;
	375 t->quoted = 0;
	376 yylval.tree = t;
	377 return t->type;
	378 }