sed.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
sed.c (27240B) | |
--- | |
1 /* | |
2 * sed -- stream editor | |
3 * | |
4 * | |
5 */ | |
6 #include <u.h> | |
7 #include <libc.h> | |
8 #include <bio.h> | |
9 #include <regexp.h> | |
10 | |
11 enum { | |
12 DEPTH = 20, /* max nesting depth o… | |
13 MAXCMDS = 512, /* max sed commands… | |
14 ADDSIZE = 10000, /* size of add & read buf… | |
15 MAXADDS = 20, /* max pending adds … | |
16 LBSIZE = 8192, /* input line size … | |
17 LABSIZE = 50, /* max label name si… | |
18 MAXSUB = 10, /* max number of sub … | |
19 MAXFILES = 120 /* max output files */ | |
20 }; | |
21 /* An address is a line #, a R.E., "$", a reference to the last | |
22 * R.E., or nothing. | |
23 */ | |
24 typedef struct { | |
25 enum { | |
26 A_NONE, | |
27 A_DOL, | |
28 A_LINE, | |
29 A_RE, | |
30 A_LAST | |
31 }type; | |
32 union { | |
33 long line; /* Line # */ | |
34 Reprog *rp; /* Compiled R.E. */ | |
35 } u; | |
36 } Addr; | |
37 | |
38 typedef struct SEDCOM { | |
39 Addr ad1; /* optional start addres… | |
40 Addr ad2; /* optional end address … | |
41 union { | |
42 Reprog *re1; /* compiled R.E. */ | |
43 Rune *text; /* added text or file … | |
44 struct SEDCOM *lb1; /* destination … | |
45 } u; | |
46 Rune *rhs; /* Right-hand side of s… | |
47 Biobuf* fcode; /* File ID for read… | |
48 char command; /* command code -see below */ | |
49 char gfl; /* 'Global' flag for sub… | |
50 char pfl; /* 'print' flag for subs… | |
51 char active; /* 1 => data between … | |
52 char negfl; /* negation flag */ | |
53 } SedCom; | |
54 | |
55 /* Command Codes for field SedCom.command */ | |
56 #define ACOM 01 | |
57 #define BCOM 020 | |
58 #define CCOM 02 | |
59 #define CDCOM 025 | |
60 #define CNCOM 022 | |
61 #define COCOM 017 | |
62 #define CPCOM 023 | |
63 #define DCOM 03 | |
64 #define ECOM 015 | |
65 #define EQCOM 013 | |
66 #define FCOM 016 | |
67 #define GCOM 027 | |
68 #define CGCOM 030 | |
69 #define HCOM 031 | |
70 #define CHCOM 032 | |
71 #define ICOM 04 | |
72 #define LCOM 05 | |
73 #define NCOM 012 | |
74 #define PCOM 010 | |
75 #define QCOM 011 | |
76 #define RCOM 06 | |
77 #define SCOM 07 | |
78 #define TCOM 021 | |
79 #define WCOM 014 | |
80 #define CWCOM 024 | |
81 #define YCOM 026 | |
82 #define XCOM 033 | |
83 | |
84 | |
85 typedef struct label { /* Label symbol table */ | |
86 Rune asc[9]; /* Label name */ | |
87 SedCom *chain; | |
88 SedCom *address; /* Command associated wit… | |
89 } Label; | |
90 | |
91 typedef struct FILE_CACHE { /* Data file co… | |
92 struct FILE_CACHE *next; /* Forward Link */ | |
93 char *name; /* Name of file */ | |
94 } FileCache; | |
95 | |
96 SedCom pspace[MAXCMDS]; /* Command storage */ | |
97 SedCom *pend = pspace+MAXCMDS; /* End of command storage … | |
98 SedCom *rep = pspace; /* Current fill point */ | |
99 | |
100 Reprog *lastre = 0; /* Last regular expres… | |
101 Resub subexp[MAXSUB]; /* sub-patterns of p… | |
102 | |
103 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i co… | |
104 Rune *addend = addspace+ADDSIZE; | |
105 | |
106 SedCom *abuf[MAXADDS]; /* Queue of pending… | |
107 SedCom **aptr = abuf; | |
108 | |
109 struct { /* Sed program input control blo… | |
110 enum PTYPE /* Either on command line or … | |
111 { P_ARG, | |
112 P_FILE | |
113 } type; | |
114 union PCTL { /* Pointer to data */ | |
115 Biobuf *bp; | |
116 char *curr; | |
117 } pctl; | |
118 } prog; | |
119 | |
120 Rune genbuf[LBSIZE]; /* Miscellaneous buff… | |
121 | |
122 FileCache *fhead = 0; /* Head of File Cache Chain … | |
123 FileCache *ftail = 0; /* Tail of File Cache Chain … | |
124 | |
125 Rune *loc1; /* Start of pattern ma… | |
126 Rune *loc2; /* End of pattern matc… | |
127 Rune seof; /* Pattern delimiter ch… | |
128 | |
129 Rune linebuf[LBSIZE+1]; /* Input data buffer */ | |
130 Rune *lbend = linebuf+LBSIZE; /* End of buffer */ | |
131 Rune *spend = linebuf; /* End of input dat… | |
132 Rune *cp; /* Current scan point in… | |
133 | |
134 Rune holdsp[LBSIZE+1]; /* Hold buffer */ | |
135 Rune *hend = holdsp+LBSIZE; /* End of hold buffer … | |
136 Rune *hspend = holdsp; /* End of hold data */ | |
137 | |
138 int nflag; /* Command line flags */ | |
139 int gflag; | |
140 int lflag; | |
141 | |
142 int dolflag; /* Set when at true EOF */ | |
143 int sflag; /* Set when substitutio… | |
144 int jflag; /* Set when jump requir… | |
145 int delflag; /* Delete current line when s… | |
146 | |
147 long lnum = 0; /* Input line count */ | |
148 | |
149 char fname[MAXFILES][40]; /* File name cache */ | |
150 Biobuf *fcode[MAXFILES]; /* File ID cache */ | |
151 int nfiles = 0; /* Cache fill point */ | |
152 | |
153 Biobuf fout; /* Output stream */ | |
154 Biobuf bstdin; /* Default input */ | |
155 Biobuf* f = 0; /* Input data */ | |
156 | |
157 Label ltab[LABSIZE]; /* Label name symbol … | |
158 Label *labend = ltab+LABSIZE; /* End of label tabl… | |
159 Label *lab = ltab+1; /* Current Fill point… | |
160 | |
161 int depth = 0; /* {} stack pointer */ | |
162 | |
163 Rune bad; /* Dummy err ptr referen… | |
164 Rune *badp = &bad; | |
165 | |
166 | |
167 char CGMES[] = "Command garbled: %S"; | |
168 char TMMES[] = "Too much text: %S"; | |
169 char LTL[] = "Label too long: %S"; | |
170 char AD0MES[] = "No addresses allowed: %S"; | |
171 char AD1MES[] = "Only one address allowed: %S"; | |
172 | |
173 void address(Addr *); | |
174 void arout(void); | |
175 int cmp(char *, char *); | |
176 int rcmp(Rune *, Rune *); | |
177 void command(SedCom *); | |
178 Reprog *compile(void); | |
179 Rune *compsub(Rune *, Rune *); | |
180 void dechain(void); | |
181 void dosub(Rune *); | |
182 int ecmp(Rune *, Rune *, int); | |
183 void enroll(char *); | |
184 void errexit(void); | |
185 int executable(SedCom *); | |
186 void execute(void); | |
187 void fcomp(void); | |
188 long getrune(void); | |
189 Rune *gline(Rune *); | |
190 int match(Reprog *, Rune *); | |
191 void newfile(enum PTYPE, char *); | |
192 int opendata(void); | |
193 Biobuf *open_file(char *); | |
194 Rune *place(Rune *, Rune *, Rune *); | |
195 void quit(char *, char *); | |
196 int rline(Rune *, Rune *); | |
197 Label *search(Label *); | |
198 int substitute(SedCom *); | |
199 char *text(char *); | |
200 Rune *stext(Rune *, Rune *); | |
201 int ycomp(SedCom *); | |
202 char * trans(int c); | |
203 void putline(Biobuf *bp, Rune *buf, int n); | |
204 | |
205 void | |
206 main(int argc, char **argv) | |
207 { | |
208 int compfl; | |
209 | |
210 lnum = 0; | |
211 Binit(&fout, 1, OWRITE); | |
212 fcode[nfiles++] = &fout; | |
213 compfl = 0; | |
214 | |
215 if(argc == 1) | |
216 exits(0); | |
217 ARGBEGIN{ | |
218 case 'n': | |
219 nflag++; | |
220 continue; | |
221 case 'f': | |
222 if(argc <= 1) | |
223 quit("no pattern-file", 0); | |
224 newfile(P_FILE, ARGF()); | |
225 fcomp(); | |
226 compfl = 1; | |
227 continue; | |
228 case 'e': | |
229 if (argc <= 1) | |
230 quit("missing pattern", 0); | |
231 newfile(P_ARG, ARGF()); | |
232 fcomp(); | |
233 compfl = 1; | |
234 continue; | |
235 case 'g': | |
236 gflag++; | |
237 continue; | |
238 case 'l': | |
239 lflag++; | |
240 continue; | |
241 default: | |
242 fprint(2, "sed: Unknown flag: %c\n", ARGC()); | |
243 continue; | |
244 } ARGEND | |
245 | |
246 if(compfl == 0) { | |
247 if (--argc < 0) | |
248 quit("missing pattern", 0); | |
249 newfile(P_ARG, *argv++); | |
250 fcomp(); | |
251 } | |
252 | |
253 if(depth) | |
254 quit("Too many {'s", 0); | |
255 | |
256 ltab[0].address = rep; | |
257 | |
258 dechain(); | |
259 | |
260 if(argc <= 0) | |
261 enroll(0); /* Add stdin to cache */ | |
262 else while(--argc >= 0) { | |
263 enroll(*argv++); | |
264 } | |
265 execute(); | |
266 exits(0); | |
267 } | |
268 void | |
269 fcomp(void) | |
270 { | |
271 Rune *tp; | |
272 SedCom *pt, *pt1; | |
273 int i; | |
274 Label *lpt; | |
275 | |
276 static Rune *p = addspace; | |
277 static SedCom **cmpend[DEPTH]; /* stack of {} oper… | |
278 | |
279 while (rline(linebuf, lbend) >= 0) { | |
280 cp = linebuf; | |
281 comploop: | |
282 while(*cp == ' ' || *cp == '\t') | |
283 cp++; | |
284 if(*cp == '\0' || *cp == '#') | |
285 continue; | |
286 if(*cp == ';') { | |
287 cp++; | |
288 goto comploop; | |
289 } | |
290 | |
291 address(&rep->ad1); | |
292 if (rep->ad1.type != A_NONE) { | |
293 if (rep->ad1.type == A_LAST) { | |
294 if (!lastre) | |
295 quit("First RE may not be null",… | |
296 rep->ad1.type = A_RE; | |
297 rep->ad1.u.rp = lastre; | |
298 } | |
299 if(*cp == ',' || *cp == ';') { | |
300 cp++; | |
301 address(&rep->ad2); | |
302 if (rep->ad2.type == A_LAST) { | |
303 rep->ad1.type = A_RE; | |
304 rep->ad2.u.rp = lastre; | |
305 } | |
306 } else | |
307 rep->ad2.type = A_NONE; | |
308 } | |
309 while(*cp == ' ' || *cp == '\t') | |
310 cp++; | |
311 | |
312 swit: | |
313 switch(*cp++) { | |
314 | |
315 default: | |
316 quit("Unrecognized command: %S", (char *… | |
317 | |
318 case '!': | |
319 rep->negfl = 1; | |
320 goto swit; | |
321 | |
322 case '{': | |
323 rep->command = BCOM; | |
324 rep->negfl = !(rep->negfl); | |
325 cmpend[depth++] = &rep->u.lb1; | |
326 if(++rep >= pend) | |
327 quit("Too many commands: %S", (c… | |
328 if(*cp == '\0') continue; | |
329 goto comploop; | |
330 | |
331 case '}': | |
332 if(rep->ad1.type != A_NONE) | |
333 quit(AD0MES, (char *) linebuf); | |
334 if(--depth < 0) | |
335 quit("Too many }'s", 0); | |
336 *cmpend[depth] = rep; | |
337 if(*cp == 0) continue; | |
338 goto comploop; | |
339 | |
340 case '=': | |
341 rep->command = EQCOM; | |
342 if(rep->ad2.type != A_NONE) | |
343 quit(AD1MES, (char *) linebuf); | |
344 break; | |
345 | |
346 case ':': | |
347 if(rep->ad1.type != A_NONE) | |
348 quit(AD0MES, (char *) linebuf); | |
349 | |
350 while(*cp == ' ') | |
351 cp++; | |
352 tp = lab->asc; | |
353 while (*cp && *cp != ';' && *cp != ' ' &… | |
354 *tp++ = *cp++; | |
355 if(tp >= &(lab->asc[8])) | |
356 quit(LTL, (char *) lineb… | |
357 } | |
358 *tp = '\0'; | |
359 | |
360 if(lpt = search(lab)) { | |
361 if(lpt->address) | |
362 quit("Duplicate labels: … | |
363 } else { | |
364 lab->chain = 0; | |
365 lpt = lab; | |
366 if(++lab >= labend) | |
367 quit("Too many labels: %… | |
368 } | |
369 lpt->address = rep; | |
370 if (*cp == '#') | |
371 continue; | |
372 rep--; /* reuse t… | |
373 break; | |
374 | |
375 case 'a': | |
376 rep->command = ACOM; | |
377 if(rep->ad2.type != A_NONE) | |
378 quit(AD1MES, (char *) linebuf); | |
379 if(*cp == '\\') cp++; | |
380 if(*cp++ != '\n') | |
381 quit(CGMES, (char *) linebuf); | |
382 rep->u.text = p; | |
383 p = stext(p, addend); | |
384 break; | |
385 case 'c': | |
386 rep->command = CCOM; | |
387 if(*cp == '\\') cp++; | |
388 if(*cp++ != '\n') | |
389 quit(CGMES, (char *) linebuf); | |
390 rep->u.text = p; | |
391 p = stext(p, addend); | |
392 break; | |
393 case 'i': | |
394 rep->command = ICOM; | |
395 if(rep->ad2.type != A_NONE) | |
396 quit(AD1MES, (char *) linebuf); | |
397 if(*cp == '\\') cp++; | |
398 if(*cp++ != '\n') | |
399 quit(CGMES, (char *) linebuf); | |
400 rep->u.text = p; | |
401 p = stext(p, addend); | |
402 break; | |
403 | |
404 case 'g': | |
405 rep->command = GCOM; | |
406 break; | |
407 | |
408 case 'G': | |
409 rep->command = CGCOM; | |
410 break; | |
411 | |
412 case 'h': | |
413 rep->command = HCOM; | |
414 break; | |
415 | |
416 case 'H': | |
417 rep->command = CHCOM; | |
418 break; | |
419 | |
420 case 't': | |
421 rep->command = TCOM; | |
422 goto jtcommon; | |
423 | |
424 case 'b': | |
425 rep->command = BCOM; | |
426 jtcommon: | |
427 while(*cp == ' ')cp++; | |
428 if(*cp == '\0') { | |
429 if(pt = ltab[0].chain) { | |
430 while(pt1 = pt->u.lb1) | |
431 pt = pt1; | |
432 pt->u.lb1 = rep; | |
433 } else | |
434 ltab[0].chain = rep; | |
435 break; | |
436 } | |
437 tp = lab->asc; | |
438 while((*tp++ = *cp++)) | |
439 if(tp >= &(lab->asc[8])) | |
440 quit(LTL, (char *) lineb… | |
441 cp--; | |
442 tp[-1] = '\0'; | |
443 | |
444 if(lpt = search(lab)) { | |
445 if(lpt->address) { | |
446 rep->u.lb1 = lpt->addres… | |
447 } else { | |
448 pt = lpt->chain; | |
449 while(pt1 = pt->u.lb1) | |
450 pt = pt1; | |
451 pt->u.lb1 = rep; | |
452 } | |
453 } else { | |
454 lab->chain = rep; | |
455 lab->address = 0; | |
456 if(++lab >= labend) | |
457 quit("Too many labels: %… | |
458 (char *) linebuf… | |
459 } | |
460 break; | |
461 | |
462 case 'n': | |
463 rep->command = NCOM; | |
464 break; | |
465 | |
466 case 'N': | |
467 rep->command = CNCOM; | |
468 break; | |
469 | |
470 case 'p': | |
471 rep->command = PCOM; | |
472 break; | |
473 | |
474 case 'P': | |
475 rep->command = CPCOM; | |
476 break; | |
477 | |
478 case 'r': | |
479 rep->command = RCOM; | |
480 if(rep->ad2.type != A_NONE) | |
481 quit(AD1MES, (char *) linebuf); | |
482 if(*cp++ != ' ') | |
483 quit(CGMES, (char *) linebuf); | |
484 rep->u.text = p; | |
485 p = stext(p, addend); | |
486 break; | |
487 | |
488 case 'd': | |
489 rep->command = DCOM; | |
490 break; | |
491 | |
492 case 'D': | |
493 rep->command = CDCOM; | |
494 rep->u.lb1 = pspace; | |
495 break; | |
496 | |
497 case 'q': | |
498 rep->command = QCOM; | |
499 if(rep->ad2.type != A_NONE) | |
500 quit(AD1MES, (char *) linebuf); | |
501 break; | |
502 | |
503 case 'l': | |
504 rep->command = LCOM; | |
505 break; | |
506 | |
507 case 's': | |
508 rep->command = SCOM; | |
509 seof = *cp++; | |
510 if ((rep->u.re1 = compile()) == 0) { | |
511 if(!lastre) | |
512 quit("First RE may not b… | |
513 rep->u.re1 = lastre; | |
514 } | |
515 rep->rhs = p; | |
516 if((p = compsub(p, addend)) == 0) | |
517 quit(CGMES, (char *) linebuf); | |
518 if(*cp == 'g') { | |
519 cp++; | |
520 rep->gfl++; | |
521 } else if(gflag) | |
522 rep->gfl++; | |
523 | |
524 if(*cp == 'p') { | |
525 cp++; | |
526 rep->pfl = 1; | |
527 } | |
528 | |
529 if(*cp == 'P') { | |
530 cp++; | |
531 rep->pfl = 2; | |
532 } | |
533 | |
534 if(*cp == 'w') { | |
535 cp++; | |
536 if(*cp++ != ' ') | |
537 quit(CGMES, (char *) lin… | |
538 text(fname[nfiles]); | |
539 for(i = nfiles - 1; i >= 0; i--) | |
540 if(cmp(fname[nfiles],fna… | |
541 rep->fcode = fco… | |
542 goto done; | |
543 } | |
544 if(nfiles >= MAXFILES) | |
545 quit("Too many files in … | |
546 rep->fcode = open_file(fname[nfi… | |
547 } | |
548 break; | |
549 | |
550 case 'w': | |
551 rep->command = WCOM; | |
552 if(*cp++ != ' ') | |
553 quit(CGMES, (char *) linebuf); | |
554 text(fname[nfiles]); | |
555 for(i = nfiles - 1; i >= 0; i--) | |
556 if(cmp(fname[nfiles], fname[i]) … | |
557 rep->fcode = fcode[i]; | |
558 goto done; | |
559 } | |
560 if(nfiles >= MAXFILES){ | |
561 fprint(2, "sed: Too many files i… | |
562 fprint(2, "nfiles = %d; MAXF = %… | |
563 errexit(); | |
564 } | |
565 rep->fcode = open_file(fname[nfiles]); | |
566 break; | |
567 | |
568 case 'x': | |
569 rep->command = XCOM; | |
570 break; | |
571 | |
572 case 'y': | |
573 rep->command = YCOM; | |
574 seof = *cp++; | |
575 if (ycomp(rep) == 0) | |
576 quit(CGMES, (char *) linebuf); | |
577 break; | |
578 | |
579 } | |
580 done: | |
581 if(++rep >= pend) | |
582 quit("Too many commands, last: %S", (char *) lin… | |
583 | |
584 if(*cp++ != '\0') { | |
585 if(cp[-1] == ';') | |
586 goto comploop; | |
587 quit(CGMES, (char *) linebuf); | |
588 } | |
589 | |
590 } | |
591 } | |
592 | |
593 Biobuf * | |
594 open_file(char *name) | |
595 { | |
596 Biobuf *bp; | |
597 int fd; | |
598 | |
599 if ((bp = malloc(sizeof(Biobuf))) == 0) | |
600 quit("Out of memory", 0); | |
601 if ((fd = open(name, OWRITE)) < 0 && | |
602 (fd = create(name, OWRITE, 0666)) < 0) | |
603 quit("Cannot create %s", name); | |
604 Binit(bp, fd, OWRITE); | |
605 Bseek(bp, 0, 2); | |
606 fcode[nfiles++] = bp; | |
607 return bp; | |
608 } | |
609 | |
610 Rune * | |
611 compsub(Rune *rhs, Rune *end) | |
612 { | |
613 Rune r; | |
614 | |
615 while ((r = *cp++) != '\0') { | |
616 if(r == '\\') { | |
617 if (rhs < end) | |
618 *rhs++ = 0xFFFF; | |
619 else | |
620 return 0; | |
621 r = *cp++; | |
622 if(r == 'n') | |
623 r = '\n'; | |
624 } else { | |
625 if(r == seof) { | |
626 if (rhs < end) | |
627 *rhs++ = '\0'; | |
628 else | |
629 return 0; | |
630 return rhs; | |
631 } | |
632 } | |
633 if (rhs < end) | |
634 *rhs++ = r; | |
635 else | |
636 return 0; | |
637 | |
638 } | |
639 return 0; | |
640 } | |
641 | |
642 Reprog * | |
643 compile(void) | |
644 { | |
645 Rune c; | |
646 char *ep; | |
647 char expbuf[512]; | |
648 | |
649 if((c = *cp++) == seof) /* '//' */ | |
650 return 0; | |
651 ep = expbuf; | |
652 do { | |
653 if (c == 0 || c == '\n') | |
654 quit(TMMES, (char *) linebuf); | |
655 if (c == '\\') { | |
656 if (ep >= expbuf+sizeof(expbuf)) | |
657 quit(TMMES, (char *) linebuf); | |
658 ep += runetochar(ep, &c); | |
659 if ((c = *cp++) == 'n') | |
660 c = '\n'; | |
661 } | |
662 if (ep >= expbuf+sizeof(expbuf)) | |
663 quit(TMMES, (char *) linebuf); | |
664 ep += runetochar(ep, &c); | |
665 } while ((c = *cp++) != seof); | |
666 *ep = 0; | |
667 return lastre = regcomp(expbuf); | |
668 } | |
669 | |
670 void | |
671 regerror(char *s) | |
672 { | |
673 USED(s); | |
674 quit(CGMES, (char *) linebuf); | |
675 } | |
676 | |
677 void | |
678 newfile(enum PTYPE type, char *name) | |
679 { | |
680 if (type == P_ARG) | |
681 prog.pctl.curr = name; | |
682 else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0) | |
683 quit("Cannot open pattern-file: %s\n", name); | |
684 prog.type = type; | |
685 } | |
686 | |
687 int | |
688 rline(Rune *buf, Rune *end) | |
689 { | |
690 long c; | |
691 Rune r; | |
692 | |
693 while ((c = getrune()) >= 0) { | |
694 r = c; | |
695 if (r == '\\') { | |
696 if (buf <= end) | |
697 *buf++ = r; | |
698 if ((c = getrune()) < 0) | |
699 break; | |
700 r = c; | |
701 } else if (r == '\n') { | |
702 *buf = '\0'; | |
703 return(1); | |
704 } | |
705 if (buf <= end) | |
706 *buf++ = r; | |
707 } | |
708 *buf = '\0'; | |
709 return(-1); | |
710 } | |
711 | |
712 long | |
713 getrune(void) | |
714 { | |
715 char *p; | |
716 long c; | |
717 Rune r; | |
718 | |
719 if (prog.type == P_ARG) { | |
720 if ((p = prog.pctl.curr) != 0) { | |
721 if (*p) { | |
722 prog.pctl.curr += chartorune(&r, p); | |
723 c = r; | |
724 } else { | |
725 c = '\n'; /* fake an end-of-line … | |
726 prog.pctl.curr = 0; | |
727 } | |
728 } else | |
729 c = -1; | |
730 } else if ((c = Bgetrune(prog.pctl.bp)) < 0) | |
731 Bterm(prog.pctl.bp); | |
732 return c; | |
733 } | |
734 | |
735 void | |
736 address(Addr *ap) | |
737 { | |
738 int c; | |
739 long lno; | |
740 | |
741 if((c = *cp++) == '$') | |
742 ap->type = A_DOL; | |
743 else if(c == '/') { | |
744 seof = c; | |
745 if (ap->u.rp = compile()) | |
746 ap->type = A_RE; | |
747 else | |
748 ap->type = A_LAST; | |
749 } | |
750 else if (c >= '0' && c <= '9') { | |
751 lno = c-'0'; | |
752 while ((c = *cp) >= '0' && c <= '9') | |
753 lno = lno*10 + *cp++-'0'; | |
754 if(!lno) | |
755 quit("line number 0 is illegal",0); | |
756 ap->type = A_LINE; | |
757 ap->u.line = lno; | |
758 } | |
759 else { | |
760 cp--; | |
761 ap->type = A_NONE; | |
762 } | |
763 } | |
764 | |
765 int | |
766 cmp(char *a, char *b) /* compare characters */ | |
767 { | |
768 while(*a == *b++) | |
769 if (*a == '\0') | |
770 return(0); | |
771 else a++; | |
772 return(1); | |
773 } | |
774 | |
775 int | |
776 rcmp(Rune *a, Rune *b) /* compare runes */ | |
777 { | |
778 while(*a == *b++) | |
779 if (*a == '\0') | |
780 return(0); | |
781 else a++; | |
782 return(1); | |
783 } | |
784 | |
785 char * | |
786 text(char *p) /* extract character string */ | |
787 { | |
788 Rune r; | |
789 | |
790 while(*cp == '\t' || *cp == ' ') | |
791 cp++; | |
792 while (*cp) { | |
793 if ((r = *cp++) == '\\') | |
794 if ((r = *cp++) == 0) | |
795 break;; | |
796 if (r == '\n') | |
797 while (*cp == '\t' || *cp == ' ') | |
798 cp++; | |
799 p += runetochar(p, &r); | |
800 } | |
801 *p++ = '\0'; | |
802 return p; | |
803 } | |
804 | |
805 Rune * | |
806 stext(Rune *p, Rune *end) /* extract rune string */ | |
807 { | |
808 while(*cp == '\t' || *cp == ' ') | |
809 cp++; | |
810 while (*cp) { | |
811 if (*cp == '\\') | |
812 if (*++cp == 0) | |
813 break; | |
814 if (p >= end-1) | |
815 quit(TMMES, (char *) linebuf); | |
816 if ((*p++ = *cp++) == '\n') | |
817 while(*cp == '\t' || *cp == ' ') | |
818 cp++; | |
819 } | |
820 *p++ = 0; | |
821 return p; | |
822 } | |
823 | |
824 | |
825 Label * | |
826 search (Label *ptr) | |
827 { | |
828 Label *rp; | |
829 | |
830 for (rp = ltab; rp < ptr; rp++) | |
831 if(rcmp(rp->asc, ptr->asc) == 0) | |
832 return(rp); | |
833 return(0); | |
834 } | |
835 | |
836 void | |
837 dechain(void) | |
838 { | |
839 Label *lptr; | |
840 SedCom *rptr, *trptr; | |
841 | |
842 for(lptr = ltab; lptr < lab; lptr++) { | |
843 | |
844 if(lptr->address == 0) | |
845 quit("Undefined label: %S", (char *) lptr->asc); | |
846 | |
847 if(lptr->chain) { | |
848 rptr = lptr->chain; | |
849 while(trptr = rptr->u.lb1) { | |
850 rptr->u.lb1 = lptr->address; | |
851 rptr = trptr; | |
852 } | |
853 rptr->u.lb1 = lptr->address; | |
854 } | |
855 } | |
856 } | |
857 | |
858 int | |
859 ycomp(SedCom *r) | |
860 { | |
861 int i; | |
862 Rune *rp; | |
863 Rune c, *tsp, highc; | |
864 Rune *sp; | |
865 | |
866 highc = 0; | |
867 for(tsp = cp; *tsp != seof; tsp++) { | |
868 if(*tsp == '\\') | |
869 tsp++; | |
870 if(*tsp == '\n' || *tsp == '\0') | |
871 return(0); | |
872 if (*tsp > highc) highc = *tsp; | |
873 } | |
874 tsp++; | |
875 if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) =… | |
876 quit("Out of memory", 0); | |
877 *rp++ = highc; /* save upper boun… | |
878 for (i = 0; i <= highc; i++) | |
879 rp[i] = i; | |
880 sp = cp; | |
881 while((c = *sp++) != seof) { | |
882 if(c == '\\' && *sp == 'n') { | |
883 sp++; | |
884 c = '\n'; | |
885 } | |
886 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { | |
887 rp[c] = '\n'; | |
888 tsp++; | |
889 } | |
890 if(rp[c] == seof || rp[c] == '\0') { | |
891 free(r->u.re1); | |
892 r->u.re1 = 0; | |
893 return(0); | |
894 } | |
895 } | |
896 if(*tsp != seof) { | |
897 free(r->u.re1); | |
898 r->u.re1 = 0; | |
899 return(0); | |
900 } | |
901 cp = tsp+1; | |
902 return(1); | |
903 } | |
904 | |
905 void | |
906 execute(void) | |
907 { | |
908 SedCom *ipc; | |
909 | |
910 while (spend = gline(linebuf)){ | |
911 for(ipc = pspace; ipc->command; ) { | |
912 if (!executable(ipc)) { | |
913 ipc++; | |
914 continue; | |
915 } | |
916 command(ipc); | |
917 | |
918 if(delflag) | |
919 break; | |
920 if(jflag) { | |
921 jflag = 0; | |
922 if((ipc = ipc->u.lb1) == 0) | |
923 break; | |
924 } else | |
925 ipc++; | |
926 | |
927 } | |
928 if(!nflag && !delflag) | |
929 putline(&fout, linebuf, spend-linebuf); | |
930 if(aptr > abuf) { | |
931 arout(); | |
932 } | |
933 delflag = 0; | |
934 } | |
935 } | |
936 /* determine if a statement should be applied to an input line */ | |
937 int | |
938 executable(SedCom *ipc) | |
939 { | |
940 if (ipc->active) { /* Addr1 satisfied - accept until Addr… | |
941 if (ipc->active == 1) /* Second line */ | |
942 ipc->active = 2; | |
943 switch(ipc->ad2.type) { | |
944 case A_NONE: /* No second addr; use first… | |
945 ipc->active = 0; | |
946 break; | |
947 case A_DOL: /* Accept everything */ | |
948 return !ipc->negfl; | |
949 case A_LINE: /* Line at end of range? */ | |
950 if (lnum <= ipc->ad2.u.line) { | |
951 if (ipc->ad2.u.line == lnum) | |
952 ipc->active = 0; | |
953 return !ipc->negfl; | |
954 } | |
955 ipc->active = 0; /* out of range … | |
956 return ipc->negfl; | |
957 case A_RE: /* Check for matching R.E. */ | |
958 if (match(ipc->ad2.u.rp, linebuf)) | |
959 ipc->active = 0; | |
960 return !ipc->negfl; | |
961 default: /* internal error */ | |
962 quit("Internal error", 0); | |
963 } | |
964 } | |
965 switch (ipc->ad1.type) { /* Check first address */ | |
966 case A_NONE: /* Everything matche… | |
967 return !ipc->negfl; | |
968 case A_DOL: /* Only last line */ | |
969 if (dolflag) | |
970 return !ipc->negfl; | |
971 break; | |
972 case A_LINE: /* Check line number… | |
973 if (ipc->ad1.u.line == lnum) { | |
974 ipc->active = 1; /* In range */ | |
975 return !ipc->negfl; | |
976 } | |
977 break; | |
978 case A_RE: /* Check R.E. */ | |
979 if (match(ipc->ad1.u.rp, linebuf)) { | |
980 ipc->active = 1; /* In range */ | |
981 return !ipc->negfl; | |
982 } | |
983 break; | |
984 default: | |
985 quit("Internal error", 0); | |
986 } | |
987 return ipc->negfl; | |
988 } | |
989 | |
990 int | |
991 match(Reprog *pattern, Rune *buf) | |
992 { | |
993 if (!pattern) | |
994 return 0; | |
995 subexp[0].s.rsp = buf; | |
996 subexp[0].e.rep = 0; | |
997 if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) { | |
998 loc1 = subexp[0].s.rsp; | |
999 loc2 = subexp[0].e.rep; | |
1000 return 1; | |
1001 } | |
1002 loc1 = loc2 = 0; | |
1003 return 0; | |
1004 } | |
1005 | |
1006 int | |
1007 substitute(SedCom *ipc) | |
1008 { | |
1009 int len; | |
1010 | |
1011 if(!match(ipc->u.re1, linebuf)) | |
1012 return 0; | |
1013 | |
1014 /* | |
1015 * we have at least one match. some patterns, e.g. '$' or '^', … | |
1016 * produce zero-length matches, so during a global substitute we | |
1017 * must bump to the character after a zero-length match to keep … | |
1018 */ | |
1019 sflag = 1; | |
1020 if(ipc->gfl == 0) /* single substitution */ | |
1021 dosub(ipc->rhs); | |
1022 else | |
1023 do{ /* global substitution */ | |
1024 len = loc2-loc1; /* length of match */ | |
1025 dosub(ipc->rhs); /* dosub moves loc2 */ | |
1026 if(*loc2 == 0) /* end of string */ | |
1027 break; | |
1028 if(len == 0) /* zero-length R.E. match */ | |
1029 loc2++; /* bump over zero-length … | |
1030 if(*loc2 == 0) /* end of string */ | |
1031 break; | |
1032 } while(match(ipc->u.re1, loc2)); | |
1033 return 1; | |
1034 } | |
1035 | |
1036 void | |
1037 dosub(Rune *rhsbuf) | |
1038 { | |
1039 Rune *lp, *sp; | |
1040 Rune *rp; | |
1041 int c, n; | |
1042 | |
1043 lp = linebuf; | |
1044 sp = genbuf; | |
1045 rp = rhsbuf; | |
1046 while (lp < loc1) | |
1047 *sp++ = *lp++; | |
1048 while(c = *rp++) { | |
1049 if (c == '&') { | |
1050 sp = place(sp, loc1, loc2); | |
1051 continue; | |
1052 } | |
1053 if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0')… | |
1054 n = c-'0'; | |
1055 if (subexp[n].s.rsp && subexp[n].e.rep) { | |
1056 sp = place(sp, subexp[n].s.rsp, subexp[n… | |
1057 continue; | |
1058 } | |
1059 else { | |
1060 fprint(2, "sed: Invalid back reference \… | |
1061 errexit(); | |
1062 } | |
1063 } | |
1064 *sp++ = c; | |
1065 if (sp >= &genbuf[LBSIZE]) | |
1066 fprint(2, "sed: Output line too long.\n"); | |
1067 } | |
1068 lp = loc2; | |
1069 loc2 = sp - genbuf + linebuf; | |
1070 while (*sp++ = *lp++) | |
1071 if (sp >= &genbuf[LBSIZE]) | |
1072 fprint(2, "sed: Output line too long.\n"); | |
1073 lp = linebuf; | |
1074 sp = genbuf; | |
1075 while (*lp++ = *sp++) | |
1076 ; | |
1077 spend = lp-1; | |
1078 } | |
1079 | |
1080 Rune * | |
1081 place(Rune *sp, Rune *l1, Rune *l2) | |
1082 { | |
1083 while (l1 < l2) { | |
1084 *sp++ = *l1++; | |
1085 if (sp >= &genbuf[LBSIZE]) | |
1086 fprint(2, "sed: Output line too long.\n"); | |
1087 } | |
1088 return(sp); | |
1089 } | |
1090 | |
1091 char * | |
1092 trans(int c) | |
1093 { | |
1094 static char buf[] = "\\x0000"; | |
1095 static char hex[] = "0123456789abcdef"; | |
1096 | |
1097 switch(c) { | |
1098 case '\b': | |
1099 return "\\b"; | |
1100 case '\n': | |
1101 return "\\n"; | |
1102 case '\r': | |
1103 return "\\r"; | |
1104 case '\t': | |
1105 return "\\t"; | |
1106 case '\\': | |
1107 return "\\\\"; | |
1108 } | |
1109 buf[2] = hex[(c>>12)&0xF]; | |
1110 buf[3] = hex[(c>>8)&0xF]; | |
1111 buf[4] = hex[(c>>4)&0xF]; | |
1112 buf[5] = hex[c&0xF]; | |
1113 return buf; | |
1114 } | |
1115 | |
1116 void | |
1117 command(SedCom *ipc) | |
1118 { | |
1119 int i, c; | |
1120 Rune *p1, *p2; | |
1121 char *ucp; | |
1122 Rune *rp; | |
1123 Rune *execp; | |
1124 | |
1125 switch(ipc->command) { | |
1126 | |
1127 case ACOM: | |
1128 *aptr++ = ipc; | |
1129 if(aptr >= abuf+MAXADDS) { | |
1130 quit("sed: Too many appends after line %… | |
1131 (char *) lnum); | |
1132 } | |
1133 *aptr = 0; | |
1134 break; | |
1135 case CCOM: | |
1136 delflag = 1; | |
1137 if(ipc->active == 1) { | |
1138 for(rp = ipc->u.text; *rp; rp++) | |
1139 Bputrune(&fout, *rp); | |
1140 Bputc(&fout, '\n'); | |
1141 } | |
1142 break; | |
1143 case DCOM: | |
1144 delflag++; | |
1145 break; | |
1146 case CDCOM: | |
1147 p1 = p2 = linebuf; | |
1148 while(*p1 != '\n') { | |
1149 if(*p1++ == 0) { | |
1150 delflag++; | |
1151 return; | |
1152 } | |
1153 } | |
1154 p1++; | |
1155 while(*p2++ = *p1++) | |
1156 ; | |
1157 spend = p2-1; | |
1158 jflag++; | |
1159 break; | |
1160 case EQCOM: | |
1161 Bprint(&fout, "%ld\n", lnum); | |
1162 break; | |
1163 case GCOM: | |
1164 p1 = linebuf; | |
1165 p2 = holdsp; | |
1166 while(*p1++ = *p2++) | |
1167 ; | |
1168 spend = p1-1; | |
1169 break; | |
1170 case CGCOM: | |
1171 *spend++ = '\n'; | |
1172 p1 = spend; | |
1173 p2 = holdsp; | |
1174 while(*p1++ = *p2++) | |
1175 if(p1 >= lbend) | |
1176 break; | |
1177 spend = p1-1; | |
1178 break; | |
1179 case HCOM: | |
1180 p1 = holdsp; | |
1181 p2 = linebuf; | |
1182 while(*p1++ = *p2++); | |
1183 hspend = p1-1; | |
1184 break; | |
1185 case CHCOM: | |
1186 *hspend++ = '\n'; | |
1187 p1 = hspend; | |
1188 p2 = linebuf; | |
1189 while(*p1++ = *p2++) | |
1190 if(p1 >= hend) | |
1191 break; | |
1192 hspend = p1-1; | |
1193 break; | |
1194 case ICOM: | |
1195 for(rp = ipc->u.text; *rp; rp++) | |
1196 Bputrune(&fout, *rp); | |
1197 Bputc(&fout, '\n'); | |
1198 break; | |
1199 case BCOM: | |
1200 jflag = 1; | |
1201 break; | |
1202 case LCOM: | |
1203 c = 0; | |
1204 for (i = 0, rp = linebuf; *rp; rp++) { | |
1205 c = *rp; | |
1206 if(c >= 0x20 && c < 0x7F && c != '\\') { | |
1207 Bputc(&fout, c); | |
1208 if(i++ > 71) { | |
1209 Bprint(&fout, "\\\n"); | |
1210 i = 0; | |
1211 } | |
1212 } else { | |
1213 for (ucp = trans(*rp); *ucp; ucp… | |
1214 c = *ucp; | |
1215 Bputc(&fout, c); | |
1216 if(i++ > 71) { | |
1217 Bprint(&fout, "\… | |
1218 i = 0; | |
1219 } | |
1220 } | |
1221 } | |
1222 } | |
1223 if(c == ' ') | |
1224 Bprint(&fout, "\\n"); | |
1225 Bputc(&fout, '\n'); | |
1226 break; | |
1227 case NCOM: | |
1228 if(!nflag) | |
1229 putline(&fout, linebuf, spend-linebuf); | |
1230 | |
1231 if(aptr > abuf) | |
1232 arout(); | |
1233 if((execp = gline(linebuf)) == 0) { | |
1234 delflag = 1; | |
1235 break; | |
1236 } | |
1237 spend = execp; | |
1238 break; | |
1239 case CNCOM: | |
1240 if(aptr > abuf) | |
1241 arout(); | |
1242 *spend++ = '\n'; | |
1243 if((execp = gline(spend)) == 0) { | |
1244 delflag = 1; | |
1245 break; | |
1246 } | |
1247 spend = execp; | |
1248 break; | |
1249 case PCOM: | |
1250 putline(&fout, linebuf, spend-linebuf); | |
1251 break; | |
1252 case CPCOM: | |
1253 cpcom: | |
1254 for(rp = linebuf; *rp && *rp != '\n'; rp++) | |
1255 Bputc(&fout, *rp); | |
1256 Bputc(&fout, '\n'); | |
1257 break; | |
1258 case QCOM: | |
1259 if(!nflag) | |
1260 putline(&fout, linebuf, spend-linebuf); | |
1261 if(aptr > abuf) | |
1262 arout(); | |
1263 exits(0); | |
1264 case RCOM: | |
1265 *aptr++ = ipc; | |
1266 if(aptr >= &abuf[MAXADDS]) | |
1267 quit("sed: Too many reads after line %ld… | |
1268 (char *) lnum); | |
1269 *aptr = 0; | |
1270 break; | |
1271 case SCOM: | |
1272 i = substitute(ipc); | |
1273 if(i && ipc->pfl) | |
1274 if(ipc->pfl == 1) | |
1275 putline(&fout, linebuf, spend-li… | |
1276 else | |
1277 goto cpcom; | |
1278 if(i && ipc->fcode) | |
1279 goto wcom; | |
1280 break; | |
1281 | |
1282 case TCOM: | |
1283 if(sflag == 0) break; | |
1284 sflag = 0; | |
1285 jflag = 1; | |
1286 break; | |
1287 | |
1288 wcom: | |
1289 case WCOM: | |
1290 putline(ipc->fcode,linebuf, spend-linebuf); | |
1291 break; | |
1292 case XCOM: | |
1293 p1 = linebuf; | |
1294 p2 = genbuf; | |
1295 while(*p2++ = *p1++); | |
1296 p1 = holdsp; | |
1297 p2 = linebuf; | |
1298 while(*p2++ = *p1++); | |
1299 spend = p2 - 1; | |
1300 p1 = genbuf; | |
1301 p2 = holdsp; | |
1302 while(*p2++ = *p1++); | |
1303 hspend = p2 - 1; | |
1304 break; | |
1305 case YCOM: | |
1306 p1 = linebuf; | |
1307 p2 = ipc->u.text; | |
1308 for (i = *p2++; *p1; p1++){ | |
1309 if (*p1 <= i) *p1 = p2[*p1]; | |
1310 } | |
1311 break; | |
1312 } | |
1313 | |
1314 } | |
1315 | |
1316 void | |
1317 putline(Biobuf *bp, Rune *buf, int n) | |
1318 { | |
1319 while (n--) | |
1320 Bputrune(bp, *buf++); | |
1321 Bputc(bp, '\n'); | |
1322 if(lflag) | |
1323 Bflush(bp); | |
1324 } | |
1325 | |
1326 int | |
1327 ecmp(Rune *a, Rune *b, int count) | |
1328 { | |
1329 while(count--) | |
1330 if(*a++ != *b++) return(0); | |
1331 return(1); | |
1332 } | |
1333 | |
1334 void | |
1335 arout(void) | |
1336 { | |
1337 Rune *p1; | |
1338 Biobuf *fi; | |
1339 int c; | |
1340 char *s; | |
1341 char buf[128]; | |
1342 | |
1343 for (aptr = abuf; *aptr; aptr++) { | |
1344 if((*aptr)->command == ACOM) { | |
1345 for(p1 = (*aptr)->u.text; *p1; p1++ ) | |
1346 Bputrune(&fout, *p1); | |
1347 Bputc(&fout, '\n'); | |
1348 } else { | |
1349 for(s = buf, p1= (*aptr)->u.text; *p1; p1++) | |
1350 s += runetochar(s, p1); | |
1351 *s = '\0'; | |
1352 if((fi = Bopen(buf, OREAD)) == 0) | |
1353 continue; | |
1354 while((c = Bgetc(fi)) >= 0) | |
1355 Bputc(&fout, c); | |
1356 Bterm(fi); | |
1357 } | |
1358 } | |
1359 aptr = abuf; | |
1360 *aptr = 0; | |
1361 } | |
1362 | |
1363 void | |
1364 errexit(void) | |
1365 { | |
1366 exits("error"); | |
1367 } | |
1368 | |
1369 void | |
1370 quit (char *msg, char *arg) | |
1371 { | |
1372 fprint(2, "sed: "); | |
1373 fprint(2, msg, arg); | |
1374 fprint(2, "\n"); | |
1375 errexit(); | |
1376 } | |
1377 | |
1378 Rune * | |
1379 gline(Rune *addr) | |
1380 { | |
1381 long c; | |
1382 Rune *p; | |
1383 | |
1384 static long peekc = 0; | |
1385 | |
1386 if (f == 0 && opendata() < 0) | |
1387 return 0; | |
1388 sflag = 0; | |
1389 lnum++; | |
1390 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/ | |
1391 do { | |
1392 p = addr; | |
1393 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bget… | |
1394 if (c == '\n') { | |
1395 if ((peekc = Bgetrune(f)) < 0) { | |
1396 if (fhead == 0) | |
1397 dolflag = 1; | |
1398 } | |
1399 *p = '\0'; | |
1400 return p; | |
1401 } | |
1402 if (c && p < lbend) | |
1403 *p++ = c; | |
1404 } | |
1405 /* return partial final line, adding implicit newline */ | |
1406 if(p != addr) { | |
1407 *p = '\0'; | |
1408 peekc = -1; | |
1409 if (fhead == 0) | |
1410 dolflag = 1; | |
1411 return p; | |
1412 } | |
1413 peekc = 0; | |
1414 Bterm(f); | |
1415 } while (opendata() > 0); /* Switch to next stream */ | |
1416 f = 0; | |
1417 return 0; | |
1418 } | |
1419 | |
1420 /* Data file input section - the intent is to transparently | |
1421 * catenate all data input streams. | |
1422 */ | |
1423 void | |
1424 enroll(char *filename) /* Add a file to the input file ca… | |
1425 { | |
1426 FileCache *fp; | |
1427 | |
1428 if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0) | |
1429 quit("Out of memory", 0); | |
1430 if (ftail == 0) | |
1431 fhead = fp; | |
1432 else | |
1433 ftail->next = fp; | |
1434 ftail = fp; | |
1435 fp->next = 0; | |
1436 fp->name = filename; /* 0 => stdin */ | |
1437 } | |
1438 | |
1439 int | |
1440 opendata(void) | |
1441 { | |
1442 if (fhead == 0) | |
1443 return -1; | |
1444 if (fhead->name) { | |
1445 if ((f = Bopen(fhead->name, OREAD)) == 0) | |
1446 quit("Can't open %s", fhead->name); | |
1447 } else { | |
1448 Binit(&bstdin, 0, OREAD); | |
1449 f = &bstdin; | |
1450 } | |
1451 fhead = fhead->next; | |
1452 return 1; | |
1453 } |