sed.c - sbase - suckless unix tools | |
git clone git://git.suckless.org/sbase | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
sed.c (41896B) | |
--- | |
1 /* FIXME: summary | |
2 * decide whether we enforce valid UTF-8, right now it's enforced in cer… | |
3 * parts of the script, but not the input... | |
4 * nul bytes cause explosions due to use of libc string functions. thoug… | |
5 * lack of newline at end of file, currently we add one. what should we … | |
6 * allow "\\t" for "\t" etc. in regex? in replacement text? | |
7 * POSIX says don't flush on N when out of input, but GNU and busybox do. | |
8 */ | |
9 | |
10 #include <ctype.h> | |
11 #include <errno.h> | |
12 #include <regex.h> | |
13 #include <stdlib.h> | |
14 #include <string.h> | |
15 | |
16 #include "utf.h" | |
17 #include "util.h" | |
18 | |
19 /* Types */ | |
20 | |
21 /* used as queue for writes and stack for {,:,b,t */ | |
22 typedef struct { | |
23 void **data; | |
24 size_t size; | |
25 size_t cap; | |
26 } Vec; | |
27 | |
28 /* used for arbitrary growth, str is a C string | |
29 * FIXME: does it make sense to keep track of length? or just rely on li… | |
30 * string functions? If we want to support nul bytes everything c… | |
31 */ | |
32 typedef struct { | |
33 char *str; | |
34 size_t cap; | |
35 } String; | |
36 | |
37 typedef struct Cmd Cmd; | |
38 typedef struct { | |
39 void (*fn)(Cmd *); | |
40 char *(*getarg)(Cmd *, char *); | |
41 void (*freearg)(Cmd *); | |
42 unsigned char naddr; | |
43 } Fninfo; | |
44 | |
45 typedef struct { | |
46 union { | |
47 size_t lineno; | |
48 regex_t *re; | |
49 } u; | |
50 enum { | |
51 IGNORE, /* empty address, ignore */ | |
52 EVERY , /* every line */ | |
53 LINE , /* line number */ | |
54 LAST , /* last line ($) */ | |
55 REGEX , /* use included regex */ | |
56 LASTRE, /* use most recently used regex */ | |
57 } type; | |
58 } Addr; | |
59 | |
60 /* DISCUSS: naddr is not strictly necessary, but very helpful | |
61 * naddr == 0 iff beg.type == EVERY && end.type == IGNORE | |
62 * naddr == 1 iff beg.type != IGNORE && end.type == IGNORE | |
63 * naddr == 2 iff beg.type != IGNORE && end.type != IGNORE | |
64 */ | |
65 typedef struct { | |
66 Addr beg; | |
67 Addr end; | |
68 unsigned char naddr; | |
69 } Range; | |
70 | |
71 typedef struct { | |
72 regex_t *re; /* if NULL use last regex */ | |
73 String repl; | |
74 FILE *file; | |
75 size_t occurrence; /* 0 for all (g flag) */ | |
76 Rune delim; | |
77 unsigned int p:1; | |
78 } Sarg; | |
79 | |
80 typedef struct { | |
81 Rune *set1; | |
82 Rune *set2; | |
83 } Yarg; | |
84 | |
85 typedef struct { | |
86 String str; /* a,c,i text. r file path */ | |
87 void (*print)(char *, FILE *); /* check_puts for a, write_file … | |
88 } ACIRarg; | |
89 | |
90 struct Cmd { | |
91 Range range; | |
92 Fninfo *fninfo; | |
93 union { | |
94 Cmd *jump; /* used for b,t when running */ | |
95 char *label; /* used for :,b,t when building */ | |
96 ptrdiff_t offset; /* used for { (pointers break during r… | |
97 FILE *file; /* used for w */ | |
98 | |
99 /* FIXME: Should the following be in the union? or point… | |
100 Sarg s; | |
101 Yarg y; | |
102 ACIRarg acir; | |
103 } u; /* I find your lack of anonymous unions disturbing */ | |
104 unsigned int in_match:1; | |
105 unsigned int negate :1; | |
106 }; | |
107 | |
108 /* Files for w command (and s' w flag) */ | |
109 typedef struct { | |
110 char *path; | |
111 FILE *file; | |
112 } Wfile; | |
113 | |
114 /* | |
115 * Function Declarations | |
116 */ | |
117 | |
118 /* Dynamically allocated arrays and strings */ | |
119 static void resize(void **ptr, size_t *nmemb, size_t size, size_t new_nm… | |
120 static void *pop(Vec *v); | |
121 static void push(Vec *v, void *p); | |
122 static void stracat(String *dst, char *src); | |
123 static void strnacat(String *dst, char *src, size_t n); | |
124 static void stracpy(String *dst, char *src); | |
125 | |
126 /* Cleanup and errors */ | |
127 static void usage(void); | |
128 | |
129 /* Parsing functions and related utilities */ | |
130 static void compile(char *s, int isfile); | |
131 static int read_line(FILE *f, String *s); | |
132 static char *make_range(Range *range, char *s); | |
133 static char *make_addr(Addr *addr, char *s); | |
134 static char *find_delim(char *s, Rune delim, int do_brackets); | |
135 static char *chompr(char *s, Rune rune); | |
136 static char *chomp(char *s); | |
137 static Rune *strtorunes(char *s, size_t nrunes); | |
138 static long stol(char *s, char **endp); | |
139 static size_t escapes(char *beg, char *end, Rune delim, int n_newline); | |
140 static size_t echarntorune(Rune *r, char *s, size_t n); | |
141 static void insert_labels(void); | |
142 | |
143 /* Get and Free arg and related utilities */ | |
144 static char *get_aci_arg(Cmd *c, char *s); | |
145 static void aci_append(Cmd *c, char *s); | |
146 static void free_acir_arg(Cmd *c); | |
147 static char *get_bt_arg(Cmd *c, char *s); | |
148 static char *get_r_arg(Cmd *c, char *s); | |
149 static char *get_s_arg(Cmd *c, char *s); | |
150 static void free_s_arg(Cmd *c); | |
151 static char *get_w_arg(Cmd *c, char *s); | |
152 static char *get_y_arg(Cmd *c, char *s); | |
153 static void free_y_arg(Cmd *c); | |
154 static char *get_colon_arg(Cmd *c, char *s); | |
155 static char *get_lbrace_arg(Cmd *c, char *s); | |
156 static char *get_rbrace_arg(Cmd *c, char *s); | |
157 static char *semicolon_arg(char *s); | |
158 | |
159 /* Running */ | |
160 static void run(void); | |
161 static int in_range(Cmd *c); | |
162 static int match_addr(Addr *a); | |
163 static int next_file(void); | |
164 static int is_eof(FILE *f); | |
165 static void do_writes(void); | |
166 static void write_file(char *path, FILE *out); | |
167 static void check_puts(char *s, FILE *f); | |
168 static void update_ranges(Cmd *beg, Cmd *end); | |
169 | |
170 /* Sed functions */ | |
171 static void cmd_y(Cmd *c); | |
172 static void cmd_x(Cmd *c); | |
173 static void cmd_w(Cmd *c); | |
174 static void cmd_t(Cmd *c); | |
175 static void cmd_s(Cmd *c); | |
176 static void cmd_r(Cmd *c); | |
177 static void cmd_q(Cmd *c); | |
178 static void cmd_P(Cmd *c); | |
179 static void cmd_p(Cmd *c); | |
180 static void cmd_N(Cmd *c); | |
181 static void cmd_n(Cmd *c); | |
182 static void cmd_l(Cmd *c); | |
183 static void cmd_i(Cmd *c); | |
184 static void cmd_H(Cmd *c); | |
185 static void cmd_h(Cmd *c); | |
186 static void cmd_G(Cmd *c); | |
187 static void cmd_g(Cmd *c); | |
188 static void cmd_D(Cmd *c); | |
189 static void cmd_d(Cmd *c); | |
190 static void cmd_c(Cmd *c); | |
191 static void cmd_b(Cmd *c); | |
192 static void cmd_a(Cmd *c); | |
193 static void cmd_colon(Cmd *c); | |
194 static void cmd_equal(Cmd *c); | |
195 static void cmd_lbrace(Cmd *c); | |
196 static void cmd_rbrace(Cmd *c); | |
197 static void cmd_last(Cmd *c); | |
198 | |
199 /* Actions */ | |
200 static void new_line(void); | |
201 static void app_line(void); | |
202 static void new_next(void); | |
203 static void old_next(void); | |
204 | |
205 /* | |
206 * Globals | |
207 */ | |
208 static Vec braces, labels, branches; /* holds ptrdiff_t. addrs of {, :, … | |
209 static Vec writes; /* holds cmd*. writes scheduled by a and r commands */ | |
210 static Vec wfiles; /* holds Wfile*. files for w and s///w commands */ | |
211 | |
212 static Cmd *prog, *pc; /* Program, program counter */ | |
213 static size_t pcap; | |
214 static size_t lineno; | |
215 | |
216 static regex_t *lastre; /* last used regex for empty regex search */ | |
217 static char **files; /* list of file names from argv */ | |
218 static FILE *file; /* current file we are reading */ | |
219 static int ret; /* exit status */ | |
220 | |
221 static String patt, hold, genbuf; | |
222 | |
223 static struct { | |
224 unsigned int n :1; /* -n (no print) */ | |
225 unsigned int E :1; /* -E (extended re) */ | |
226 unsigned int s :1; /* s/// replacement happened */ | |
227 unsigned int aci_cont:1; /* a,c,i text continuation */ | |
228 unsigned int s_cont :1; /* s/// replacement text continuation */ | |
229 unsigned int halt :1; /* halt execution */ | |
230 } gflags; | |
231 | |
232 /* FIXME: move character inside Fninfo and only use 26*sizeof(Fninfo) in… | |
233 static Fninfo fns[] = { | |
234 ['a'] = { cmd_a , get_aci_arg , free_acir_arg , 1 }, /* sc… | |
235 ['b'] = { cmd_b , get_bt_arg , NULL , 2 }, /* br… | |
236 ['c'] = { cmd_c , get_aci_arg , free_acir_arg , 2 }, /* de… | |
237 ['d'] = { cmd_d , NULL , NULL , 2 }, /* de… | |
238 ['D'] = { cmd_D , NULL , NULL , 2 }, /* de… | |
239 ['g'] = { cmd_g , NULL , NULL , 2 }, /* re… | |
240 ['G'] = { cmd_G , NULL , NULL , 2 }, /* ap… | |
241 ['h'] = { cmd_h , NULL , NULL , 2 }, /* re… | |
242 ['H'] = { cmd_H , NULL , NULL , 2 }, /* ap… | |
243 ['i'] = { cmd_i , get_aci_arg , free_acir_arg , 1 }, /* wr… | |
244 ['l'] = { cmd_l , NULL , NULL , 2 }, /* wr… | |
245 ['n'] = { cmd_n , NULL , NULL , 2 }, /* wr… | |
246 ['N'] = { cmd_N , NULL , NULL , 2 }, /* ap… | |
247 ['p'] = { cmd_p , NULL , NULL , 2 }, /* wr… | |
248 ['P'] = { cmd_P , NULL , NULL , 2 }, /* wr… | |
249 ['q'] = { cmd_q , NULL , NULL , 1 }, /* qu… | |
250 ['r'] = { cmd_r , get_r_arg , free_acir_arg , 1 }, /* wr… | |
251 ['s'] = { cmd_s , get_s_arg , free_s_arg , 2 }, /* fi… | |
252 ['t'] = { cmd_t , get_bt_arg , NULL , 2 }, /* if… | |
253 ['w'] = { cmd_w , get_w_arg , NULL , 2 }, /* ap… | |
254 ['x'] = { cmd_x , NULL , NULL , 2 }, /* ex… | |
255 ['y'] = { cmd_y , get_y_arg , free_y_arg , 2 }, /* re… | |
256 [':'] = { cmd_colon , get_colon_arg , NULL , 0 }, /* de… | |
257 ['='] = { cmd_equal , NULL , NULL , 1 }, /* pr… | |
258 ['{'] = { cmd_lbrace, get_lbrace_arg, NULL , 2 }, /* if… | |
259 ['}'] = { cmd_rbrace, get_rbrace_arg, NULL , 0 }, /* no… | |
260 | |
261 [0x7f] = { NULL, NULL, NULL, 0 }, /* index is checked with isasc… | |
262 }; | |
263 | |
264 /* | |
265 * Function Definitions | |
266 */ | |
267 | |
268 /* given memory pointed to by *ptr that currently holds *nmemb members o… | |
269 * size, realloc to hold new_nmemb members, return new_nmemb in *memb an… | |
270 * past old end in *next. if realloc fails...explode | |
271 */ | |
272 static void | |
273 resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **… | |
274 { | |
275 void *n, *tmp; | |
276 | |
277 if (new_nmemb) { | |
278 tmp = ereallocarray(*ptr, new_nmemb, size); | |
279 } else { /* turns out realloc(*ptr, 0) != free(*ptr) */ | |
280 free(*ptr); | |
281 tmp = NULL; | |
282 } | |
283 n = (char *)tmp + *nmemb * size; | |
284 *nmemb = new_nmemb; | |
285 *ptr = tmp; | |
286 if (next) | |
287 *next = n; | |
288 } | |
289 | |
290 static void * | |
291 pop(Vec *v) | |
292 { | |
293 if (!v->size) | |
294 return NULL; | |
295 return v->data[--v->size]; | |
296 } | |
297 | |
298 static void | |
299 push(Vec *v, void *p) | |
300 { | |
301 if (v->size == v->cap) | |
302 resize((void **)&v->data, &v->cap, sizeof(*v->data), v->… | |
303 v->data[v->size++] = p; | |
304 } | |
305 | |
306 static void | |
307 stracat(String *dst, char *src) | |
308 { | |
309 int new = !dst->cap; | |
310 size_t len; | |
311 | |
312 len = (new ? 0 : strlen(dst->str)) + strlen(src) + 1; | |
313 if (dst->cap < len) | |
314 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL); | |
315 if (new) | |
316 *dst->str = '\0'; | |
317 strcat(dst->str, src); | |
318 } | |
319 | |
320 static void | |
321 strnacat(String *dst, char *src, size_t n) | |
322 { | |
323 int new = !dst->cap; | |
324 size_t len; | |
325 | |
326 len = strlen(src); | |
327 len = (new ? 0 : strlen(dst->str)) + MIN(n, len) + 1; | |
328 if (dst->cap < len) | |
329 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL); | |
330 if (new) | |
331 *dst->str = '\0'; | |
332 strlcat(dst->str, src, len); | |
333 } | |
334 | |
335 static void | |
336 stracpy(String *dst, char *src) | |
337 { | |
338 size_t len; | |
339 | |
340 len = strlen(src) + 1; | |
341 if (dst->cap < len) | |
342 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL); | |
343 strcpy(dst->str, src); | |
344 } | |
345 | |
346 static void | |
347 leprintf(char *s) | |
348 { | |
349 if (errno) | |
350 eprintf("%zu: %s: %s\n", lineno, s, strerror(errno)); | |
351 else | |
352 eprintf("%zu: %s\n", lineno, s); | |
353 } | |
354 | |
355 /* FIXME: write usage message */ | |
356 static void | |
357 usage(void) | |
358 { | |
359 eprintf("usage: sed [-nrE] script [file ...]\n" | |
360 " sed [-nrE] -e script [-e script] ... [-f scriptf… | |
361 " sed [-nrE] [-e script] ... -f scriptfile [-f scr… | |
362 } | |
363 | |
364 /* Differences from POSIX | |
365 * we allows semicolons and trailing blanks inside {} | |
366 * we allow spaces after ! (and in between !s) | |
367 * we allow extended regular expressions (-E) | |
368 */ | |
369 static void | |
370 compile(char *s, int isfile) | |
371 { | |
372 FILE *f; | |
373 | |
374 if (isfile) { | |
375 f = fopen(s, "r"); | |
376 if (!f) | |
377 eprintf("fopen %s:", s); | |
378 } else { | |
379 if (!*s) /* empty string script */ | |
380 return; | |
381 f = fmemopen(s, strlen(s), "r"); | |
382 if (!f) | |
383 eprintf("fmemopen:"); | |
384 } | |
385 | |
386 /* NOTE: get arg functions can't use genbuf */ | |
387 while (read_line(f, &genbuf) != EOF) { | |
388 s = genbuf.str; | |
389 | |
390 /* if the first two characters of the script are "#n" de… | |
391 if (++lineno == 1 && *s == '#' && s[1] == 'n') { | |
392 gflags.n = 1; | |
393 continue; | |
394 } | |
395 | |
396 if (gflags.aci_cont) { | |
397 aci_append(pc - 1, s); | |
398 continue; | |
399 } | |
400 if (gflags.s_cont) | |
401 s = (pc - 1)->fninfo->getarg(pc - 1, s); | |
402 | |
403 while (*s) { | |
404 s = chompr(s, ';'); | |
405 if (!*s || *s == '#') | |
406 break; | |
407 | |
408 if ((size_t)(pc - prog) == pcap) | |
409 resize((void **)&prog, &pcap, sizeof(*pr… | |
410 | |
411 pc->range.beg.type = pc->range.end.type = IGNORE; | |
412 pc->fninfo = NULL; | |
413 pc->in_match = 0; | |
414 | |
415 s = make_range(&pc->range, s); | |
416 s = chomp(s); | |
417 pc->negate = *s == '!'; | |
418 s = chompr(s, '!'); | |
419 | |
420 if (!isascii(*s) || !(pc->fninfo = &fns[(unsigne… | |
421 leprintf("bad sed function"); | |
422 if (pc->range.naddr > pc->fninfo->naddr) | |
423 leprintf("wrong number of addresses"); | |
424 s++; | |
425 | |
426 if (pc->fninfo->getarg) | |
427 s = pc->fninfo->getarg(pc, s); | |
428 | |
429 pc++; | |
430 } | |
431 } | |
432 | |
433 fshut(f, s); | |
434 } | |
435 | |
436 /* FIXME: if we decide to honor lack of trailing newline, set/clear a gl… | |
437 * flag when reading a line | |
438 */ | |
439 static int | |
440 read_line(FILE *f, String *s) | |
441 { | |
442 ssize_t len; | |
443 | |
444 if (!f) | |
445 return EOF; | |
446 | |
447 if ((len = getline(&s->str, &s->cap, f)) < 0) { | |
448 if (ferror(f)) | |
449 eprintf("getline:"); | |
450 return EOF; | |
451 } | |
452 if (s->str[--len] == '\n') | |
453 s->str[len] = '\0'; | |
454 return 0; | |
455 } | |
456 | |
457 /* read first range from s, return pointer to one past end of range */ | |
458 static char * | |
459 make_range(Range *range, char *s) | |
460 { | |
461 s = make_addr(&range->beg, s); | |
462 | |
463 if (*s == ',') | |
464 s = make_addr(&range->end, s + 1); | |
465 else | |
466 range->end.type = IGNORE; | |
467 | |
468 if (range->beg.type == EVERY && range->end.type == IGNORE)… | |
469 else if (range->beg.type != IGNORE && range->end.type == IGNORE)… | |
470 else if (range->beg.type != IGNORE && range->end.type != IGNORE)… | |
471 else leprintf("this is impossible..."); | |
472 | |
473 return s; | |
474 } | |
475 | |
476 /* read first addr from s, return pointer to one past end of addr */ | |
477 static char * | |
478 make_addr(Addr *addr, char *s) | |
479 { | |
480 Rune r; | |
481 char *p = s + strlen(s); | |
482 size_t rlen = echarntorune(&r, s, p - s); | |
483 | |
484 if (r == '$') { | |
485 addr->type = LAST; | |
486 s += rlen; | |
487 } else if (isdigitrune(r)) { | |
488 addr->type = LINE; | |
489 addr->u.lineno = stol(s, &s); | |
490 } else if (r == '/' || r == '\\') { | |
491 Rune delim; | |
492 if (r == '\\') { | |
493 s += rlen; | |
494 rlen = echarntorune(&r, s, p - s); | |
495 } | |
496 if (r == '\\') | |
497 leprintf("bad delimiter '\\'"); | |
498 delim = r; | |
499 s += rlen; | |
500 rlen = echarntorune(&r, s, p - s); | |
501 if (r == delim) { | |
502 addr->type = LASTRE; | |
503 s += rlen; | |
504 } else { | |
505 addr->type = REGEX; | |
506 p = find_delim(s, delim, 1); | |
507 if (!*p) | |
508 leprintf("unclosed regex"); | |
509 p -= escapes(s, p, delim, 0); | |
510 *p++ = '\0'; | |
511 addr->u.re = emalloc(sizeof(*addr->u.re)); | |
512 eregcomp(addr->u.re, s, gflags.E ? REG_EXTENDED … | |
513 s = p; | |
514 } | |
515 } else { | |
516 addr->type = EVERY; | |
517 } | |
518 | |
519 return s; | |
520 } | |
521 | |
522 /* return pointer to first delim in s that is not escaped | |
523 * and if do_brackets is set, not in [] (note possible [::], [..], [==],… | |
524 * return pointer to trailing nul byte if no delim found | |
525 * | |
526 * any escaped character that is not special is just itself (POSIX undef… | |
527 * FIXME: pull out into some util thing, will be useful for ed as well | |
528 */ | |
529 static char * | |
530 find_delim(char *s, Rune delim, int do_brackets) | |
531 { | |
532 enum { | |
533 OUTSIDE , /* not in brackets */ | |
534 BRACKETS_OPENING, /* last char was first [ or last two w… | |
535 BRACKETS_INSIDE , /* inside [] */ | |
536 INSIDE_OPENING , /* inside [] and last char was [ */ | |
537 CLASS_INSIDE , /* inside class [::], or colating elem… | |
538 CLASS_CLOSING , /* inside class [::], or colating elem… | |
539 } state = OUTSIDE; | |
540 | |
541 Rune r, c = 0; /* no c won't be used uninitialized, shutup -Wall… | |
542 size_t rlen; | |
543 int escape = 0; | |
544 char *end = s + strlen(s); | |
545 | |
546 for (; *s; s += rlen) { | |
547 rlen = echarntorune(&r, s, end - s); | |
548 | |
549 if (state == BRACKETS_OPENING && r == '^' )… | |
550 else if (state == BRACKETS_OPENING && r == ']' )… | |
551 else if (state == BRACKETS_OPENING )… | |
552 | |
553 if (state == CLASS_CLOSING && r == ']' )… | |
554 else if (state == CLASS_CLOSING )… | |
555 else if (state == CLASS_INSIDE && r == c )… | |
556 else if (state == INSIDE_OPENING && (r == ':' || | |
557 r == '.' || | |
558 r == '=') )… | |
559 else if (state == INSIDE_OPENING && r == ']' )… | |
560 else if (state == INSIDE_OPENING )… | |
561 else if (state == BRACKETS_INSIDE && r == '[' )… | |
562 else if (state == BRACKETS_INSIDE && r == ']' )… | |
563 else if (state == OUTSIDE && escape )… | |
564 else if (state == OUTSIDE && r == '\\' )… | |
565 else if (state == OUTSIDE && r == delim)… | |
566 else if (state == OUTSIDE && do_brackets && r == '[' )… | |
567 } | |
568 return s; | |
569 } | |
570 | |
571 static char * | |
572 chomp(char *s) | |
573 { | |
574 return chompr(s, 0); | |
575 } | |
576 | |
577 /* eat all leading whitespace and occurrences of rune */ | |
578 static char * | |
579 chompr(char *s, Rune rune) | |
580 { | |
581 Rune r; | |
582 size_t rlen; | |
583 char *end = s + strlen(s); | |
584 | |
585 while (*s && (rlen = echarntorune(&r, s, end - s)) && (isspaceru… | |
586 s += rlen; | |
587 return s; | |
588 } | |
589 | |
590 /* convert first nrunes Runes from UTF-8 string s in allocated Rune* | |
591 * NOTE: sequence must be valid UTF-8, check first */ | |
592 static Rune * | |
593 strtorunes(char *s, size_t nrunes) | |
594 { | |
595 Rune *rs, *rp; | |
596 | |
597 rp = rs = ereallocarray(NULL, nrunes + 1, sizeof(*rs)); | |
598 | |
599 while (nrunes--) | |
600 s += chartorune(rp++, s); | |
601 | |
602 *rp = '\0'; | |
603 return rs; | |
604 } | |
605 | |
606 static long | |
607 stol(char *s, char **endp) | |
608 { | |
609 long n; | |
610 errno = 0; | |
611 n = strtol(s, endp, 10); | |
612 | |
613 if (errno) | |
614 leprintf("strtol:"); | |
615 if (*endp == s) | |
616 leprintf("strtol: invalid number"); | |
617 | |
618 return n; | |
619 } | |
620 | |
621 /* from beg to end replace "\\d" with "d" and "\\n" with "\n" (where d i… | |
622 * if delim is 'n' and n_newline is 0 then "\\n" is replaced with "n" (n… | |
623 * if delim is 'n' and n_newline is 1 then "\\n" is replaced with "\n" (… | |
624 * if delim is 0 all escaped characters represent themselves (aci text) | |
625 * memmove rest of string (beyond end) into place | |
626 * return the number of converted escapes (backslashes removed) | |
627 * FIXME: this has had too many corner cases slapped on and is ugly. rew… | |
628 */ | |
629 static size_t | |
630 escapes(char *beg, char *end, Rune delim, int n_newline) | |
631 { | |
632 size_t num = 0; | |
633 char *src = beg, *dst = beg; | |
634 | |
635 while (src < end) { | |
636 /* handle escaped backslash specially so we don't think … | |
637 * backslash is escaping something */ | |
638 if (*src == '\\' && src[1] == '\\') { | |
639 *dst++ = *src++; | |
640 if (delim) | |
641 *dst++ = *src++; | |
642 else | |
643 src++; | |
644 } else if (*src == '\\' && !delim) { | |
645 src++; | |
646 } else if (*src == '\\' && src[1]) { | |
647 Rune r; | |
648 size_t rlen; | |
649 num++; | |
650 src++; | |
651 rlen = echarntorune(&r, src, end - src); | |
652 | |
653 if (r == 'n' && delim == 'n') { | |
654 *src = n_newline ? '\n' : 'n'; /* src so… | |
655 } else if (r == 'n') { | |
656 *src = '\n'; | |
657 } else if (r != delim) { | |
658 *dst++ = '\\'; | |
659 num--; | |
660 } | |
661 | |
662 memmove(dst, src, rlen); | |
663 dst += rlen; | |
664 src += rlen; | |
665 } else { | |
666 *dst++ = *src++; | |
667 } | |
668 } | |
669 memmove(dst, src, strlen(src) + 1); | |
670 return num; | |
671 } | |
672 | |
673 static size_t | |
674 echarntorune(Rune *r, char *s, size_t n) | |
675 { | |
676 size_t rlen = charntorune(r, s, n); | |
677 if (!rlen || *r == Runeerror) | |
678 leprintf("invalid UTF-8"); | |
679 return rlen; | |
680 } | |
681 | |
682 static void | |
683 insert_labels(void) | |
684 { | |
685 size_t i; | |
686 Cmd *from, *to; | |
687 | |
688 while (branches.size) { | |
689 from = prog + (ptrdiff_t)pop(&branches); | |
690 | |
691 if (!from->u.label) {/* no label branch to end of script… | |
692 from->u.jump = pc - 1; | |
693 } else { | |
694 for (i = 0; i < labels.size; i++) { | |
695 to = prog + (ptrdiff_t)labels.data[i]; | |
696 if (!strcmp(from->u.label, to->u.label))… | |
697 from->u.jump = to; | |
698 break; | |
699 } | |
700 } | |
701 if (i == labels.size) | |
702 leprintf("bad label"); | |
703 } | |
704 } | |
705 } | |
706 | |
707 /* | |
708 * Getargs / Freeargs | |
709 * Read argument from s, return pointer to one past last character of ar… | |
710 */ | |
711 | |
712 /* POSIX compliant | |
713 * i\ | |
714 * foobar | |
715 * | |
716 * also allow the following non POSIX compliant | |
717 * i # empty line | |
718 * ifoobar | |
719 * ifoobar\ | |
720 * baz | |
721 * | |
722 * FIXME: GNU and busybox discard leading spaces | |
723 * i foobar | |
724 * i foobar | |
725 * ifoobar | |
726 * are equivalent in GNU and busybox. We don't. Should we? | |
727 */ | |
728 static char * | |
729 get_aci_arg(Cmd *c, char *s) | |
730 { | |
731 c->u.acir.print = check_puts; | |
732 c->u.acir.str = (String){ NULL, 0 }; | |
733 | |
734 gflags.aci_cont = !!*s; /* no continue flag if empty string */ | |
735 | |
736 /* neither empty string nor POSIX compliant */ | |
737 if (*s && !(*s == '\\' && !s[1])) | |
738 aci_append(c, s); | |
739 | |
740 return s + strlen(s); | |
741 } | |
742 | |
743 static void | |
744 aci_append(Cmd *c, char *s) | |
745 { | |
746 char *end = s + strlen(s), *p = end; | |
747 | |
748 gflags.aci_cont = 0; | |
749 while (--p >= s && *p == '\\') | |
750 gflags.aci_cont = !gflags.aci_cont; | |
751 | |
752 if (gflags.aci_cont) | |
753 *--end = '\n'; | |
754 | |
755 escapes(s, end, 0, 0); | |
756 stracat(&c->u.acir.str, s); | |
757 } | |
758 | |
759 static void | |
760 free_acir_arg(Cmd *c) | |
761 { | |
762 free(c->u.acir.str.str); | |
763 } | |
764 | |
765 /* POSIX dictates that label is rest of line, including semicolons, trai… | |
766 * whitespace, closing braces, etc. and can be limited to 8 bytes | |
767 * | |
768 * I allow a semicolon or closing brace to terminate a label name, it's … | |
769 * POSIX compliant, but it's useful and every sed version I've tried to … | |
770 * does the same. | |
771 * | |
772 * FIXME: POSIX dictates that leading whitespace is ignored but trailing | |
773 * whitespace is not. This is annoying and we should probably get rid of… | |
774 */ | |
775 static char * | |
776 get_bt_arg(Cmd *c, char *s) | |
777 { | |
778 char *p = semicolon_arg(s = chomp(s)); | |
779 | |
780 if (p != s) { | |
781 c->u.label = estrndup(s, p - s); | |
782 } else { | |
783 c->u.label = NULL; | |
784 } | |
785 | |
786 push(&branches, (void *)(c - prog)); | |
787 | |
788 return p; | |
789 } | |
790 | |
791 /* POSIX dictates file name is rest of line including semicolons, traili… | |
792 * whitespace, closing braces, etc. and file name must be preceded by a … | |
793 * | |
794 * I allow a semicolon or closing brace to terminate a file name and don… | |
795 * enforce leading space. | |
796 * | |
797 * FIXME: decide whether trailing whitespace should be included and fix | |
798 * accordingly | |
799 */ | |
800 static char * | |
801 get_r_arg(Cmd *c, char *s) | |
802 { | |
803 char *p = semicolon_arg(s = chomp(s)); | |
804 | |
805 if (p == s) | |
806 leprintf("no file name"); | |
807 | |
808 c->u.acir.str.str = estrndup(s, p - s); | |
809 c->u.acir.print = write_file; | |
810 | |
811 return p; | |
812 } | |
813 | |
814 /* we allow "\\n" in replacement text to mean "\n" (undefined in POSIX) | |
815 * | |
816 * FIXME: allow other escapes in regex and replacement? if so change esc… | |
817 */ | |
818 static char * | |
819 get_s_arg(Cmd *c, char *s) | |
820 { | |
821 Rune delim, r; | |
822 Cmd buf; | |
823 char *p; | |
824 int esc, lastre; | |
825 | |
826 /* s/Find/Replace/Flags */ | |
827 | |
828 /* Find */ | |
829 if (!gflags.s_cont) { /* NOT continuing from literal newline in … | |
830 lastre = 0; | |
831 c->u.s.repl = (String){ NULL, 0 }; | |
832 c->u.s.occurrence = 1; | |
833 c->u.s.file = NULL; | |
834 c->u.s.p = 0; | |
835 | |
836 if (!*s || *s == '\\') | |
837 leprintf("bad delimiter"); | |
838 | |
839 p = s + strlen(s); | |
840 s += echarntorune(&delim, s, p - s); | |
841 c->u.s.delim = delim; | |
842 | |
843 echarntorune(&r, s, p - s); | |
844 if (r == delim) /* empty regex */ | |
845 lastre = 1; | |
846 | |
847 p = find_delim(s, delim, 1); | |
848 if (!*p) | |
849 leprintf("missing second delimiter"); | |
850 p -= escapes(s, p, delim, 0); | |
851 *p = '\0'; | |
852 | |
853 if (lastre) { | |
854 c->u.s.re = NULL; | |
855 } else { | |
856 c->u.s.re = emalloc(sizeof(*c->u.s.re)); | |
857 /* FIXME: different eregcomp that calls fatal */ | |
858 eregcomp(c->u.s.re, s, gflags.E ? REG_EXTENDED :… | |
859 } | |
860 s = p + runelen(delim); | |
861 } | |
862 | |
863 /* Replace */ | |
864 delim = c->u.s.delim; | |
865 | |
866 p = find_delim(s, delim, 0); | |
867 p -= escapes(s, p, delim, 0); | |
868 if (!*p) { /* no third delimiter */ | |
869 /* FIXME: same backslash counting as aci_append() */ | |
870 if (p[-1] != '\\') | |
871 leprintf("missing third delimiter or <backslash>… | |
872 p[-1] = '\n'; | |
873 gflags.s_cont = 1; | |
874 } else { | |
875 gflags.s_cont = 0; | |
876 } | |
877 | |
878 /* check for bad references in replacement text */ | |
879 *p = '\0'; | |
880 for (esc = 0, p = s; *p; p++) { | |
881 if (esc) { | |
882 esc = 0; | |
883 if (isdigit(*p) && c->u.s.re && (size_t)(*p - '0… | |
884 leprintf("back reference number greater … | |
885 } else if (*p == '\\') { | |
886 esc = 1; | |
887 } | |
888 } | |
889 stracat(&c->u.s.repl, s); | |
890 | |
891 if (gflags.s_cont) | |
892 return p; | |
893 | |
894 s = p + runelen(delim); | |
895 | |
896 /* Flags */ | |
897 p = semicolon_arg(s = chomp(s)); | |
898 | |
899 /* FIXME: currently for simplicity take last of g or occurrence … | |
900 * ignore multiple p flags. need to fix that */ | |
901 for (; s < p; s++) { | |
902 if (isdigit(*s)) { | |
903 c->u.s.occurrence = stol(s, &s); | |
904 s--; /* for loop will advance pointer */ | |
905 } else { | |
906 switch (*s) { | |
907 case 'g': c->u.s.occurrence = 0; break; | |
908 case 'p': c->u.s.p = 1; break; | |
909 case 'w': | |
910 /* must be last flag, take everything up… | |
911 * s == p after this */ | |
912 s = get_w_arg(&buf, chomp(s+1)); | |
913 c->u.s.file = buf.u.file; | |
914 break; | |
915 } | |
916 } | |
917 } | |
918 return p; | |
919 } | |
920 | |
921 static void | |
922 free_s_arg(Cmd *c) | |
923 { | |
924 if (c->u.s.re) | |
925 regfree(c->u.s.re); | |
926 free(c->u.s.re); | |
927 free(c->u.s.repl.str); | |
928 } | |
929 | |
930 /* see get_r_arg notes */ | |
931 static char * | |
932 get_w_arg(Cmd *c, char *s) | |
933 { | |
934 char *p = semicolon_arg(s = chomp(s)); | |
935 Wfile *w, **wp; | |
936 | |
937 if (p == s) | |
938 leprintf("no file name"); | |
939 | |
940 for (wp = (Wfile **)wfiles.data; (size_t)(wp - (Wfile **)wfiles.… | |
941 if (strlen((*wp)->path) == (size_t)(p - s) && !strncmp(s… | |
942 c->u.file = (*wp)->file; | |
943 return p; | |
944 } | |
945 } | |
946 | |
947 w = emalloc(sizeof(*w)); | |
948 w->path = estrndup(s, p - s); | |
949 | |
950 if (!(w->file = fopen(w->path, "w"))) | |
951 leprintf("fopen failed"); | |
952 | |
953 c->u.file = w->file; | |
954 | |
955 push(&wfiles, w); | |
956 return p; | |
957 } | |
958 | |
959 static char * | |
960 get_y_arg(Cmd *c, char *s) | |
961 { | |
962 Rune delim; | |
963 char *p = s + strlen(s); | |
964 size_t rlen = echarntorune(&delim, s, p - s); | |
965 size_t nrunes1, nrunes2; | |
966 | |
967 c->u.y.set1 = c->u.y.set2 = NULL; | |
968 | |
969 s += rlen; | |
970 p = find_delim(s, delim, 0); | |
971 p -= escapes(s, p, delim, 1); | |
972 nrunes1 = utfnlen(s, p - s); | |
973 c->u.y.set1 = strtorunes(s, nrunes1); | |
974 | |
975 s = p + rlen; | |
976 p = find_delim(s, delim, 0); | |
977 p -= escapes(s, p, delim, 1); | |
978 nrunes2 = utfnlen(s, p - s); | |
979 | |
980 if (nrunes1 != nrunes2) | |
981 leprintf("different set lengths"); | |
982 | |
983 c->u.y.set2 = strtorunes(s, utfnlen(s, p - s)); | |
984 | |
985 return p + rlen; | |
986 } | |
987 | |
988 static void | |
989 free_y_arg(Cmd *c) | |
990 { | |
991 free(c->u.y.set1); | |
992 free(c->u.y.set2); | |
993 } | |
994 | |
995 /* see get_bt_arg notes */ | |
996 static char * | |
997 get_colon_arg(Cmd *c, char *s) | |
998 { | |
999 char *p = semicolon_arg(s = chomp(s)); | |
1000 | |
1001 if (p == s) | |
1002 leprintf("no label name"); | |
1003 | |
1004 c->u.label = estrndup(s, p - s); | |
1005 push(&labels, (void *)(c - prog)); | |
1006 return p; | |
1007 } | |
1008 | |
1009 static char * | |
1010 get_lbrace_arg(Cmd *c, char *s) | |
1011 { | |
1012 push(&braces, (void *)(c - prog)); | |
1013 return s; | |
1014 } | |
1015 | |
1016 static char * | |
1017 get_rbrace_arg(Cmd *c, char *s) | |
1018 { | |
1019 Cmd *lbrace; | |
1020 | |
1021 if (!braces.size) | |
1022 leprintf("extra }"); | |
1023 | |
1024 lbrace = prog + (ptrdiff_t)pop(&braces); | |
1025 lbrace->u.offset = c - prog; | |
1026 return s; | |
1027 } | |
1028 | |
1029 /* s points to beginning of an argument that may be semicolon terminated | |
1030 * return pointer to semicolon or nul byte after string | |
1031 * or closing brace as to not force ; before } | |
1032 * FIXME: decide whether or not to eat trailing whitespace for arguments… | |
1033 * we allow semicolon/brace termination that POSIX doesn't | |
1034 * b, r, t, w, : | |
1035 * POSIX says trailing whitespace is part of label name, file nam… | |
1036 * we should probably eat it | |
1037 */ | |
1038 static char * | |
1039 semicolon_arg(char *s) | |
1040 { | |
1041 char *p = strpbrk(s, ";}"); | |
1042 if (!p) | |
1043 p = s + strlen(s); | |
1044 return p; | |
1045 } | |
1046 | |
1047 static void | |
1048 run(void) | |
1049 { | |
1050 lineno = 0; | |
1051 if (braces.size) | |
1052 leprintf("extra {"); | |
1053 | |
1054 /* genbuf has already been initialized, patt will be in new_line | |
1055 * (or we'll halt) */ | |
1056 stracpy(&hold, ""); | |
1057 | |
1058 insert_labels(); | |
1059 next_file(); | |
1060 new_line(); | |
1061 | |
1062 for (pc = prog; !gflags.halt; pc++) | |
1063 pc->fninfo->fn(pc); | |
1064 } | |
1065 | |
1066 /* return true if we are in range for c, set c->in_match appropriately */ | |
1067 static int | |
1068 in_range(Cmd *c) | |
1069 { | |
1070 if (match_addr(&c->range.beg)) { | |
1071 if (c->range.naddr == 2) { | |
1072 if (c->range.end.type == LINE && c->range.end.u.… | |
1073 c->in_match = 0; | |
1074 else | |
1075 c->in_match = 1; | |
1076 } | |
1077 return !c->negate; | |
1078 } | |
1079 if (c->in_match && match_addr(&c->range.end)) { | |
1080 c->in_match = 0; | |
1081 return !c->negate; | |
1082 } | |
1083 return c->in_match ^ c->negate; | |
1084 } | |
1085 | |
1086 /* return true if addr matches current line */ | |
1087 static int | |
1088 match_addr(Addr *a) | |
1089 { | |
1090 switch (a->type) { | |
1091 default: | |
1092 case IGNORE: return 0; | |
1093 case EVERY: return 1; | |
1094 case LINE: return lineno == a->u.lineno; | |
1095 case LAST: | |
1096 while (is_eof(file) && !next_file()) | |
1097 ; | |
1098 return !file; | |
1099 case REGEX: | |
1100 lastre = a->u.re; | |
1101 return !regexec(a->u.re, patt.str, 0, NULL, 0); | |
1102 case LASTRE: | |
1103 if (!lastre) | |
1104 leprintf("no previous regex"); | |
1105 return !regexec(lastre, patt.str, 0, NULL, 0); | |
1106 } | |
1107 } | |
1108 | |
1109 /* move to next input file | |
1110 * stdin if first call and no files | |
1111 * return 0 for success and 1 for no more files | |
1112 */ | |
1113 static int | |
1114 next_file(void) | |
1115 { | |
1116 static unsigned char first = 1; | |
1117 | |
1118 if (file == stdin) | |
1119 clearerr(file); | |
1120 else if (file) | |
1121 fshut(file, "<file>"); | |
1122 /* given no files, default to stdin */ | |
1123 file = first && !*files ? stdin : NULL; | |
1124 first = 0; | |
1125 | |
1126 while (!file && *files) { | |
1127 if (!strcmp(*files, "-")) { | |
1128 file = stdin; | |
1129 } else if (!(file = fopen(*files, "r"))) { | |
1130 /* warn this file didn't open, but move on to ne… | |
1131 weprintf("fopen %s:", *files); | |
1132 ret = 1; | |
1133 } | |
1134 files++; | |
1135 } | |
1136 | |
1137 return !file; | |
1138 } | |
1139 | |
1140 /* test if stream is at EOF */ | |
1141 static int | |
1142 is_eof(FILE *f) | |
1143 { | |
1144 int c; | |
1145 | |
1146 if (!f || feof(f)) | |
1147 return 1; | |
1148 | |
1149 c = fgetc(f); | |
1150 if (c == EOF && ferror(f)) | |
1151 eprintf("fgetc:"); | |
1152 if (c != EOF && ungetc(c, f) == EOF) | |
1153 eprintf("ungetc EOF\n"); | |
1154 | |
1155 return c == EOF; | |
1156 } | |
1157 | |
1158 /* perform writes that were scheduled | |
1159 * for aci this is check_puts(string, stdout) | |
1160 * for r this is write_file(path, stdout) | |
1161 */ | |
1162 static void | |
1163 do_writes(void) | |
1164 { | |
1165 Cmd *c; | |
1166 size_t i; | |
1167 | |
1168 for (i = 0; i < writes.size; i++) { | |
1169 c = writes.data[i]; | |
1170 c->u.acir.print(c->u.acir.str.str, stdout); | |
1171 } | |
1172 writes.size = 0; | |
1173 } | |
1174 | |
1175 /* used for r's u.acir.print() | |
1176 * FIXME: something like util's concat() would be better | |
1177 */ | |
1178 static void | |
1179 write_file(char *path, FILE *out) | |
1180 { | |
1181 FILE *in = fopen(path, "r"); | |
1182 if (!in) /* no file is treated as empty file */ | |
1183 return; | |
1184 | |
1185 while (read_line(in, &genbuf) != EOF) | |
1186 check_puts(genbuf.str, out); | |
1187 | |
1188 fshut(in, path); | |
1189 } | |
1190 | |
1191 static void | |
1192 check_puts(char *s, FILE *f) | |
1193 { | |
1194 if (s && fputs(s, f) == EOF) | |
1195 eprintf("fputs:"); | |
1196 if (fputs("\n", f) == EOF) | |
1197 eprintf("fputs:"); | |
1198 } | |
1199 | |
1200 /* iterate from beg to end updating ranges so we don't miss any commands | |
1201 * e.g. sed -n '1d;1,3p' should still print lines 2 and 3 | |
1202 */ | |
1203 static void | |
1204 update_ranges(Cmd *beg, Cmd *end) | |
1205 { | |
1206 while (beg < end) | |
1207 in_range(beg++); | |
1208 } | |
1209 | |
1210 /* | |
1211 * Sed functions | |
1212 */ | |
1213 static void | |
1214 cmd_a(Cmd *c) | |
1215 { | |
1216 if (in_range(c)) | |
1217 push(&writes, c); | |
1218 } | |
1219 | |
1220 static void | |
1221 cmd_b(Cmd *c) | |
1222 { | |
1223 if (!in_range(c)) | |
1224 return; | |
1225 | |
1226 /* if we jump backwards update to end, otherwise update to desti… | |
1227 update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap); | |
1228 pc = c->u.jump; | |
1229 } | |
1230 | |
1231 static void | |
1232 cmd_c(Cmd *c) | |
1233 { | |
1234 if (!in_range(c)) | |
1235 return; | |
1236 | |
1237 /* write the text on the last line of the match */ | |
1238 if (!c->in_match) | |
1239 check_puts(c->u.acir.str.str, stdout); | |
1240 /* otherwise start the next cycle without printing pattern space | |
1241 * effectively deleting the text */ | |
1242 new_next(); | |
1243 } | |
1244 | |
1245 static void | |
1246 cmd_d(Cmd *c) | |
1247 { | |
1248 if (!in_range(c)) | |
1249 return; | |
1250 | |
1251 new_next(); | |
1252 } | |
1253 | |
1254 static void | |
1255 cmd_D(Cmd *c) | |
1256 { | |
1257 char *p; | |
1258 | |
1259 if (!in_range(c)) | |
1260 return; | |
1261 | |
1262 if ((p = strchr(patt.str, '\n'))) { | |
1263 p++; | |
1264 memmove(patt.str, p, strlen(p) + 1); | |
1265 old_next(); | |
1266 } else { | |
1267 new_next(); | |
1268 } | |
1269 } | |
1270 | |
1271 static void | |
1272 cmd_g(Cmd *c) | |
1273 { | |
1274 if (in_range(c)) | |
1275 stracpy(&patt, hold.str); | |
1276 } | |
1277 | |
1278 static void | |
1279 cmd_G(Cmd *c) | |
1280 { | |
1281 if (!in_range(c)) | |
1282 return; | |
1283 | |
1284 stracat(&patt, "\n"); | |
1285 stracat(&patt, hold.str); | |
1286 } | |
1287 | |
1288 static void | |
1289 cmd_h(Cmd *c) | |
1290 { | |
1291 if (in_range(c)) | |
1292 stracpy(&hold, patt.str); | |
1293 } | |
1294 | |
1295 static void | |
1296 cmd_H(Cmd *c) | |
1297 { | |
1298 if (!in_range(c)) | |
1299 return; | |
1300 | |
1301 stracat(&hold, "\n"); | |
1302 stracat(&hold, patt.str); | |
1303 } | |
1304 | |
1305 static void | |
1306 cmd_i(Cmd *c) | |
1307 { | |
1308 if (in_range(c)) | |
1309 check_puts(c->u.acir.str.str, stdout); | |
1310 } | |
1311 | |
1312 /* I think it makes sense to print invalid UTF-8 sequences in octal to s… | |
1313 * the "visually unambiguous form" sed(1p) | |
1314 */ | |
1315 static void | |
1316 cmd_l(Cmd *c) | |
1317 { | |
1318 Rune r; | |
1319 char *p, *end; | |
1320 size_t rlen; | |
1321 | |
1322 char *escapes[] = { /* FIXME: 7 entries and search instead of 12… | |
1323 ['\\'] = "\\\\", ['\a'] = "\\a", ['\b'] = "\\b", | |
1324 ['\f'] = "\\f" , ['\r'] = "\\r", ['\t'] = "\\t", | |
1325 ['\v'] = "\\v" , [0x7f] = NULL, /* fill out the table */ | |
1326 }; | |
1327 | |
1328 if (!in_range(c)) | |
1329 return; | |
1330 | |
1331 /* FIXME: line wrapping. sed(1p) says "length at which folding o… | |
1332 * unspecified, but should be appropraite for the output device" | |
1333 * just wrap at 80 Runes? | |
1334 */ | |
1335 for (p = patt.str, end = p + strlen(p); p < end; p += rlen) { | |
1336 if (isascii(*p) && escapes[(unsigned int)*p]) { | |
1337 fputs(escapes[(unsigned int)*p], stdout); | |
1338 rlen = 1; | |
1339 } else if (!(rlen = charntorune(&r, p, end - p))) { | |
1340 /* ran out of chars, print the bytes of the shor… | |
1341 for (; p < end; p++) | |
1342 printf("\\%03hho", (unsigned char)*p); | |
1343 break; | |
1344 } else if (r == Runeerror) { | |
1345 for (; rlen; rlen--, p++) | |
1346 printf("\\%03hho", (unsigned char)*p); | |
1347 } else { | |
1348 while (fwrite(p, rlen, 1, stdout) < 1 && errno =… | |
1349 ; | |
1350 if (ferror(stdout)) | |
1351 eprintf("fwrite:"); | |
1352 } | |
1353 } | |
1354 check_puts("$", stdout); | |
1355 } | |
1356 | |
1357 static void | |
1358 cmd_n(Cmd *c) | |
1359 { | |
1360 if (!in_range(c)) | |
1361 return; | |
1362 | |
1363 if (!gflags.n) | |
1364 check_puts(patt.str, stdout); | |
1365 do_writes(); | |
1366 new_line(); | |
1367 } | |
1368 | |
1369 static void | |
1370 cmd_N(Cmd *c) | |
1371 { | |
1372 if (!in_range(c)) | |
1373 return; | |
1374 do_writes(); | |
1375 app_line(); | |
1376 } | |
1377 | |
1378 static void | |
1379 cmd_p(Cmd *c) | |
1380 { | |
1381 if (in_range(c)) | |
1382 check_puts(patt.str, stdout); | |
1383 } | |
1384 | |
1385 static void | |
1386 cmd_P(Cmd *c) | |
1387 { | |
1388 char *p; | |
1389 | |
1390 if (!in_range(c)) | |
1391 return; | |
1392 | |
1393 if ((p = strchr(patt.str, '\n'))) | |
1394 *p = '\0'; | |
1395 | |
1396 check_puts(patt.str, stdout); | |
1397 | |
1398 if (p) | |
1399 *p = '\n'; | |
1400 } | |
1401 | |
1402 static void | |
1403 cmd_q(Cmd *c) | |
1404 { | |
1405 if (!in_range(c)) | |
1406 return; | |
1407 | |
1408 if (!gflags.n) | |
1409 check_puts(patt.str, stdout); | |
1410 do_writes(); | |
1411 gflags.halt = 1; | |
1412 } | |
1413 | |
1414 static void | |
1415 cmd_r(Cmd *c) | |
1416 { | |
1417 if (in_range(c)) | |
1418 push(&writes, c); | |
1419 } | |
1420 | |
1421 static void | |
1422 cmd_s(Cmd *c) | |
1423 { | |
1424 String tmp; | |
1425 Rune r; | |
1426 size_t plen, rlen, len; | |
1427 char *p, *s, *end; | |
1428 unsigned int matches = 0, last_empty = 1, qflag = 0, cflags = 0; | |
1429 regex_t *re; | |
1430 regmatch_t *rm, *pmatch = NULL; | |
1431 | |
1432 if (!in_range(c)) | |
1433 return; | |
1434 | |
1435 if (!c->u.s.re && !lastre) | |
1436 leprintf("no previous regex"); | |
1437 | |
1438 re = c->u.s.re ? c->u.s.re : lastre; | |
1439 lastre = re; | |
1440 | |
1441 plen = re->re_nsub + 1; | |
1442 pmatch = ereallocarray(NULL, plen, sizeof(regmatch_t)); | |
1443 | |
1444 *genbuf.str = '\0'; | |
1445 s = patt.str; | |
1446 | |
1447 while (!qflag && !regexec(re, s, plen, pmatch, cflags)) { | |
1448 cflags = REG_NOTBOL; /* match against beginning of line … | |
1449 if (!*s) /* match against empty string first time, but n… | |
1450 qflag = 1; | |
1451 | |
1452 /* don't substitute if last match was not empty but this… | |
1453 * s_a*_._g | |
1454 * foobar -> .f.o.o.b.r. | |
1455 */ | |
1456 if ((last_empty || pmatch[0].rm_eo) && | |
1457 (++matches == c->u.s.occurrence || !c->u.s.occurrenc… | |
1458 /* copy over everything before the match */ | |
1459 strnacat(&genbuf, s, pmatch[0].rm_so); | |
1460 | |
1461 /* copy over replacement text, taking into accou… | |
1462 for (p = c->u.s.repl.str, len = strcspn(p, "\\&"… | |
1463 strnacat(&genbuf, p, len); | |
1464 p += len; | |
1465 switch (*p) { | |
1466 default: leprintf("this shouldn't be pos… | |
1467 case '\0': | |
1468 /* we're at the end, back up one… | |
1469 * the null byte to break out of… | |
1470 --p; | |
1471 break; | |
1472 case '&': | |
1473 strnacat(&genbuf, s + pmatch[0].… | |
1474 break; | |
1475 case '\\': | |
1476 if (isdigit(*++p)) { /* backrefe… | |
1477 /* only need to check he… | |
1478 if (!c->u.s.re && (size_… | |
1479 leprintf("back r… | |
1480 rm = &pmatch[*p - '0']; | |
1481 strnacat(&genbuf, s + rm… | |
1482 } else { /* character after back… | |
1483 strnacat(&genbuf, p, 1); | |
1484 } | |
1485 break; | |
1486 } | |
1487 } | |
1488 } else { | |
1489 /* not replacing, copy over everything up to and… | |
1490 strnacat(&genbuf, s, pmatch[0].rm_eo); | |
1491 } | |
1492 | |
1493 if (!pmatch[0].rm_eo) { /* empty match, advance one rune… | |
1494 end = s + strlen(s); | |
1495 rlen = charntorune(&r, s, end - s); | |
1496 | |
1497 if (!rlen) { /* ran out of bytes, copy short seq… | |
1498 stracat(&genbuf, s); | |
1499 s = end; | |
1500 } else { /* copy whether or not it's a good rune… | |
1501 strnacat(&genbuf, s, rlen); | |
1502 s += rlen; | |
1503 } | |
1504 } | |
1505 last_empty = !pmatch[0].rm_eo; | |
1506 s += pmatch[0].rm_eo; | |
1507 } | |
1508 free(pmatch); | |
1509 | |
1510 if (!(matches && matches >= c->u.s.occurrence)) /* no replacemen… | |
1511 return; | |
1512 | |
1513 gflags.s = 1; | |
1514 | |
1515 stracat(&genbuf, s); | |
1516 | |
1517 tmp = patt; | |
1518 patt = genbuf; | |
1519 genbuf = tmp; | |
1520 | |
1521 if (c->u.s.p) | |
1522 check_puts(patt.str, stdout); | |
1523 if (c->u.s.file) | |
1524 check_puts(patt.str, c->u.s.file); | |
1525 } | |
1526 | |
1527 static void | |
1528 cmd_t(Cmd *c) | |
1529 { | |
1530 if (!in_range(c) || !gflags.s) | |
1531 return; | |
1532 | |
1533 /* if we jump backwards update to end, otherwise update to desti… | |
1534 update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap); | |
1535 pc = c->u.jump; | |
1536 gflags.s = 0; | |
1537 } | |
1538 | |
1539 static void | |
1540 cmd_w(Cmd *c) | |
1541 { | |
1542 if (in_range(c)) | |
1543 check_puts(patt.str, c->u.file); | |
1544 } | |
1545 | |
1546 static void | |
1547 cmd_x(Cmd *c) | |
1548 { | |
1549 String tmp; | |
1550 | |
1551 if (!in_range(c)) | |
1552 return; | |
1553 | |
1554 tmp = patt; | |
1555 patt = hold; | |
1556 hold = tmp; | |
1557 } | |
1558 | |
1559 static void | |
1560 cmd_y(Cmd *c) | |
1561 { | |
1562 String tmp; | |
1563 Rune r, *rp; | |
1564 size_t n, rlen; | |
1565 char *s, *end, buf[UTFmax]; | |
1566 | |
1567 if (!in_range(c)) | |
1568 return; | |
1569 | |
1570 *genbuf.str = '\0'; | |
1571 for (s = patt.str, end = s + strlen(s); *s; s += rlen) { | |
1572 if (!(rlen = charntorune(&r, s, end - s))) { /* ran out … | |
1573 stracat(&genbuf, s); | |
1574 break; | |
1575 } else if (r == Runeerror) { /* bad UTF-8 sequence, copy… | |
1576 strnacat(&genbuf, s, rlen); | |
1577 } else { | |
1578 for (rp = c->u.y.set1; *rp; rp++) | |
1579 if (*rp == r) | |
1580 break; | |
1581 if (*rp) { /* found r in set1, replace with Rune… | |
1582 n = runetochar(buf, c->u.y.set2 + (rp - … | |
1583 strnacat(&genbuf, buf, n); | |
1584 } else { | |
1585 strnacat(&genbuf, s, rlen); | |
1586 } | |
1587 } | |
1588 } | |
1589 tmp = patt; | |
1590 patt = genbuf; | |
1591 genbuf = tmp; | |
1592 } | |
1593 | |
1594 static void | |
1595 cmd_colon(Cmd *c) | |
1596 { | |
1597 } | |
1598 | |
1599 static void | |
1600 cmd_equal(Cmd *c) | |
1601 { | |
1602 if (in_range(c)) | |
1603 printf("%zu\n", lineno); | |
1604 } | |
1605 | |
1606 static void | |
1607 cmd_lbrace(Cmd *c) | |
1608 { | |
1609 Cmd *jump; | |
1610 | |
1611 if (in_range(c)) | |
1612 return; | |
1613 | |
1614 /* update ranges on all commands we skip */ | |
1615 jump = prog + c->u.offset; | |
1616 update_ranges(c + 1, jump); | |
1617 pc = jump; | |
1618 } | |
1619 | |
1620 static void | |
1621 cmd_rbrace(Cmd *c) | |
1622 { | |
1623 } | |
1624 | |
1625 /* not actually a sed function, but acts like one, put in last spot of s… | |
1626 static void | |
1627 cmd_last(Cmd *c) | |
1628 { | |
1629 if (!gflags.n) | |
1630 check_puts(patt.str, stdout); | |
1631 do_writes(); | |
1632 new_next(); | |
1633 } | |
1634 | |
1635 /* | |
1636 * Actions | |
1637 */ | |
1638 | |
1639 /* read new line, continue current cycle */ | |
1640 static void | |
1641 new_line(void) | |
1642 { | |
1643 while (read_line(file, &patt) == EOF) { | |
1644 if (next_file()) { | |
1645 gflags.halt = 1; | |
1646 return; | |
1647 } | |
1648 } | |
1649 gflags.s = 0; | |
1650 lineno++; | |
1651 } | |
1652 | |
1653 /* append new line, continue current cycle | |
1654 * FIXME: used for N, POSIX specifies do not print pattern space when ou… | |
1655 * input, but GNU does so busybox does as well. Currently we don'… | |
1656 * Should we? | |
1657 */ | |
1658 static void | |
1659 app_line(void) | |
1660 { | |
1661 while (read_line(file, &genbuf) == EOF) { | |
1662 if (next_file()) { | |
1663 gflags.halt = 1; | |
1664 return; | |
1665 } | |
1666 } | |
1667 | |
1668 stracat(&patt, "\n"); | |
1669 stracat(&patt, genbuf.str); | |
1670 gflags.s = 0; | |
1671 lineno++; | |
1672 } | |
1673 | |
1674 /* read new line, start new cycle */ | |
1675 static void | |
1676 new_next(void) | |
1677 { | |
1678 *patt.str = '\0'; | |
1679 update_ranges(pc + 1, prog + pcap); | |
1680 new_line(); | |
1681 pc = prog - 1; | |
1682 } | |
1683 | |
1684 /* keep old pattern space, start new cycle */ | |
1685 static void | |
1686 old_next(void) | |
1687 { | |
1688 update_ranges(pc + 1, prog + pcap); | |
1689 pc = prog - 1; | |
1690 } | |
1691 | |
1692 int | |
1693 main(int argc, char *argv[]) | |
1694 { | |
1695 char *arg; | |
1696 int script = 0; | |
1697 | |
1698 ARGBEGIN { | |
1699 case 'n': | |
1700 gflags.n = 1; | |
1701 break; | |
1702 case 'r': | |
1703 case 'E': | |
1704 gflags.E = 1; | |
1705 break; | |
1706 case 'e': | |
1707 arg = EARGF(usage()); | |
1708 compile(arg, 0); | |
1709 script = 1; | |
1710 break; | |
1711 case 'f': | |
1712 arg = EARGF(usage()); | |
1713 compile(arg, 1); | |
1714 script = 1; | |
1715 break; | |
1716 default : usage(); | |
1717 } ARGEND | |
1718 | |
1719 /* no script to run */ | |
1720 if (!script && !argc) | |
1721 usage(); | |
1722 | |
1723 /* no script yet, next argument is script */ | |
1724 if (!script) | |
1725 compile(*argv++, 0); | |
1726 | |
1727 /* shrink/grow memory to fit and add our last instruction */ | |
1728 resize((void **)&prog, &pcap, sizeof(*prog), pc - prog + 1, NULL… | |
1729 pc = prog + pcap - 1; | |
1730 pc->fninfo = &(Fninfo){ cmd_last, NULL, NULL, 0 }; | |
1731 | |
1732 files = argv; | |
1733 run(); | |
1734 | |
1735 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"); | |
1736 | |
1737 return ret; | |
1738 } |