| sed.c - sbase - suckless unix tools | |
| git clone git://git.suckless.org/sbase | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| sed.c (41896B) | |
| --- | |
| 1 /* FIXME: summary | |
| 2 * decide whether we enforce valid UTF-8, right now it's enforced in cer… | |
| 3 * parts of the script, but not the input... | |
| 4 * nul bytes cause explosions due to use of libc string functions. thoug… | |
| 5 * lack of newline at end of file, currently we add one. what should we … | |
| 6 * allow "\\t" for "\t" etc. in regex? in replacement text? | |
| 7 * POSIX says don't flush on N when out of input, but GNU and busybox do. | |
| 8 */ | |
| 9 | |
| 10 #include <ctype.h> | |
| 11 #include <errno.h> | |
| 12 #include <regex.h> | |
| 13 #include <stdlib.h> | |
| 14 #include <string.h> | |
| 15 | |
| 16 #include "utf.h" | |
| 17 #include "util.h" | |
| 18 | |
| 19 /* Types */ | |
| 20 | |
| 21 /* used as queue for writes and stack for {,:,b,t */ | |
| 22 typedef struct { | |
| 23 void **data; | |
| 24 size_t size; | |
| 25 size_t cap; | |
| 26 } Vec; | |
| 27 | |
| 28 /* used for arbitrary growth, str is a C string | |
| 29 * FIXME: does it make sense to keep track of length? or just rely on li… | |
| 30 * string functions? If we want to support nul bytes everything c… | |
| 31 */ | |
| 32 typedef struct { | |
| 33 char *str; | |
| 34 size_t cap; | |
| 35 } String; | |
| 36 | |
| 37 typedef struct Cmd Cmd; | |
| 38 typedef struct { | |
| 39 void (*fn)(Cmd *); | |
| 40 char *(*getarg)(Cmd *, char *); | |
| 41 void (*freearg)(Cmd *); | |
| 42 unsigned char naddr; | |
| 43 } Fninfo; | |
| 44 | |
| 45 typedef struct { | |
| 46 union { | |
| 47 size_t lineno; | |
| 48 regex_t *re; | |
| 49 } u; | |
| 50 enum { | |
| 51 IGNORE, /* empty address, ignore */ | |
| 52 EVERY , /* every line */ | |
| 53 LINE , /* line number */ | |
| 54 LAST , /* last line ($) */ | |
| 55 REGEX , /* use included regex */ | |
| 56 LASTRE, /* use most recently used regex */ | |
| 57 } type; | |
| 58 } Addr; | |
| 59 | |
| 60 /* DISCUSS: naddr is not strictly necessary, but very helpful | |
| 61 * naddr == 0 iff beg.type == EVERY && end.type == IGNORE | |
| 62 * naddr == 1 iff beg.type != IGNORE && end.type == IGNORE | |
| 63 * naddr == 2 iff beg.type != IGNORE && end.type != IGNORE | |
| 64 */ | |
| 65 typedef struct { | |
| 66 Addr beg; | |
| 67 Addr end; | |
| 68 unsigned char naddr; | |
| 69 } Range; | |
| 70 | |
| 71 typedef struct { | |
| 72 regex_t *re; /* if NULL use last regex */ | |
| 73 String repl; | |
| 74 FILE *file; | |
| 75 size_t occurrence; /* 0 for all (g flag) */ | |
| 76 Rune delim; | |
| 77 unsigned int p:1; | |
| 78 } Sarg; | |
| 79 | |
| 80 typedef struct { | |
| 81 Rune *set1; | |
| 82 Rune *set2; | |
| 83 } Yarg; | |
| 84 | |
| 85 typedef struct { | |
| 86 String str; /* a,c,i text. r file path */ | |
| 87 void (*print)(char *, FILE *); /* check_puts for a, write_file … | |
| 88 } ACIRarg; | |
| 89 | |
| 90 struct Cmd { | |
| 91 Range range; | |
| 92 Fninfo *fninfo; | |
| 93 union { | |
| 94 Cmd *jump; /* used for b,t when running */ | |
| 95 char *label; /* used for :,b,t when building */ | |
| 96 ptrdiff_t offset; /* used for { (pointers break during r… | |
| 97 FILE *file; /* used for w */ | |
| 98 | |
| 99 /* FIXME: Should the following be in the union? or point… | |
| 100 Sarg s; | |
| 101 Yarg y; | |
| 102 ACIRarg acir; | |
| 103 } u; /* I find your lack of anonymous unions disturbing */ | |
| 104 unsigned int in_match:1; | |
| 105 unsigned int negate :1; | |
| 106 }; | |
| 107 | |
| 108 /* Files for w command (and s' w flag) */ | |
| 109 typedef struct { | |
| 110 char *path; | |
| 111 FILE *file; | |
| 112 } Wfile; | |
| 113 | |
| 114 /* | |
| 115 * Function Declarations | |
| 116 */ | |
| 117 | |
| 118 /* Dynamically allocated arrays and strings */ | |
| 119 static void resize(void **ptr, size_t *nmemb, size_t size, size_t new_nm… | |
| 120 static void *pop(Vec *v); | |
| 121 static void push(Vec *v, void *p); | |
| 122 static void stracat(String *dst, char *src); | |
| 123 static void strnacat(String *dst, char *src, size_t n); | |
| 124 static void stracpy(String *dst, char *src); | |
| 125 | |
| 126 /* Cleanup and errors */ | |
| 127 static void usage(void); | |
| 128 | |
| 129 /* Parsing functions and related utilities */ | |
| 130 static void compile(char *s, int isfile); | |
| 131 static int read_line(FILE *f, String *s); | |
| 132 static char *make_range(Range *range, char *s); | |
| 133 static char *make_addr(Addr *addr, char *s); | |
| 134 static char *find_delim(char *s, Rune delim, int do_brackets); | |
| 135 static char *chompr(char *s, Rune rune); | |
| 136 static char *chomp(char *s); | |
| 137 static Rune *strtorunes(char *s, size_t nrunes); | |
| 138 static long stol(char *s, char **endp); | |
| 139 static size_t escapes(char *beg, char *end, Rune delim, int n_newline); | |
| 140 static size_t echarntorune(Rune *r, char *s, size_t n); | |
| 141 static void insert_labels(void); | |
| 142 | |
| 143 /* Get and Free arg and related utilities */ | |
| 144 static char *get_aci_arg(Cmd *c, char *s); | |
| 145 static void aci_append(Cmd *c, char *s); | |
| 146 static void free_acir_arg(Cmd *c); | |
| 147 static char *get_bt_arg(Cmd *c, char *s); | |
| 148 static char *get_r_arg(Cmd *c, char *s); | |
| 149 static char *get_s_arg(Cmd *c, char *s); | |
| 150 static void free_s_arg(Cmd *c); | |
| 151 static char *get_w_arg(Cmd *c, char *s); | |
| 152 static char *get_y_arg(Cmd *c, char *s); | |
| 153 static void free_y_arg(Cmd *c); | |
| 154 static char *get_colon_arg(Cmd *c, char *s); | |
| 155 static char *get_lbrace_arg(Cmd *c, char *s); | |
| 156 static char *get_rbrace_arg(Cmd *c, char *s); | |
| 157 static char *semicolon_arg(char *s); | |
| 158 | |
| 159 /* Running */ | |
| 160 static void run(void); | |
| 161 static int in_range(Cmd *c); | |
| 162 static int match_addr(Addr *a); | |
| 163 static int next_file(void); | |
| 164 static int is_eof(FILE *f); | |
| 165 static void do_writes(void); | |
| 166 static void write_file(char *path, FILE *out); | |
| 167 static void check_puts(char *s, FILE *f); | |
| 168 static void update_ranges(Cmd *beg, Cmd *end); | |
| 169 | |
| 170 /* Sed functions */ | |
| 171 static void cmd_y(Cmd *c); | |
| 172 static void cmd_x(Cmd *c); | |
| 173 static void cmd_w(Cmd *c); | |
| 174 static void cmd_t(Cmd *c); | |
| 175 static void cmd_s(Cmd *c); | |
| 176 static void cmd_r(Cmd *c); | |
| 177 static void cmd_q(Cmd *c); | |
| 178 static void cmd_P(Cmd *c); | |
| 179 static void cmd_p(Cmd *c); | |
| 180 static void cmd_N(Cmd *c); | |
| 181 static void cmd_n(Cmd *c); | |
| 182 static void cmd_l(Cmd *c); | |
| 183 static void cmd_i(Cmd *c); | |
| 184 static void cmd_H(Cmd *c); | |
| 185 static void cmd_h(Cmd *c); | |
| 186 static void cmd_G(Cmd *c); | |
| 187 static void cmd_g(Cmd *c); | |
| 188 static void cmd_D(Cmd *c); | |
| 189 static void cmd_d(Cmd *c); | |
| 190 static void cmd_c(Cmd *c); | |
| 191 static void cmd_b(Cmd *c); | |
| 192 static void cmd_a(Cmd *c); | |
| 193 static void cmd_colon(Cmd *c); | |
| 194 static void cmd_equal(Cmd *c); | |
| 195 static void cmd_lbrace(Cmd *c); | |
| 196 static void cmd_rbrace(Cmd *c); | |
| 197 static void cmd_last(Cmd *c); | |
| 198 | |
| 199 /* Actions */ | |
| 200 static void new_line(void); | |
| 201 static void app_line(void); | |
| 202 static void new_next(void); | |
| 203 static void old_next(void); | |
| 204 | |
| 205 /* | |
| 206 * Globals | |
| 207 */ | |
| 208 static Vec braces, labels, branches; /* holds ptrdiff_t. addrs of {, :, … | |
| 209 static Vec writes; /* holds cmd*. writes scheduled by a and r commands */ | |
| 210 static Vec wfiles; /* holds Wfile*. files for w and s///w commands */ | |
| 211 | |
| 212 static Cmd *prog, *pc; /* Program, program counter */ | |
| 213 static size_t pcap; | |
| 214 static size_t lineno; | |
| 215 | |
| 216 static regex_t *lastre; /* last used regex for empty regex search */ | |
| 217 static char **files; /* list of file names from argv */ | |
| 218 static FILE *file; /* current file we are reading */ | |
| 219 static int ret; /* exit status */ | |
| 220 | |
| 221 static String patt, hold, genbuf; | |
| 222 | |
| 223 static struct { | |
| 224 unsigned int n :1; /* -n (no print) */ | |
| 225 unsigned int E :1; /* -E (extended re) */ | |
| 226 unsigned int s :1; /* s/// replacement happened */ | |
| 227 unsigned int aci_cont:1; /* a,c,i text continuation */ | |
| 228 unsigned int s_cont :1; /* s/// replacement text continuation */ | |
| 229 unsigned int halt :1; /* halt execution */ | |
| 230 } gflags; | |
| 231 | |
| 232 /* FIXME: move character inside Fninfo and only use 26*sizeof(Fninfo) in… | |
| 233 static Fninfo fns[] = { | |
| 234 ['a'] = { cmd_a , get_aci_arg , free_acir_arg , 1 }, /* sc… | |
| 235 ['b'] = { cmd_b , get_bt_arg , NULL , 2 }, /* br… | |
| 236 ['c'] = { cmd_c , get_aci_arg , free_acir_arg , 2 }, /* de… | |
| 237 ['d'] = { cmd_d , NULL , NULL , 2 }, /* de… | |
| 238 ['D'] = { cmd_D , NULL , NULL , 2 }, /* de… | |
| 239 ['g'] = { cmd_g , NULL , NULL , 2 }, /* re… | |
| 240 ['G'] = { cmd_G , NULL , NULL , 2 }, /* ap… | |
| 241 ['h'] = { cmd_h , NULL , NULL , 2 }, /* re… | |
| 242 ['H'] = { cmd_H , NULL , NULL , 2 }, /* ap… | |
| 243 ['i'] = { cmd_i , get_aci_arg , free_acir_arg , 1 }, /* wr… | |
| 244 ['l'] = { cmd_l , NULL , NULL , 2 }, /* wr… | |
| 245 ['n'] = { cmd_n , NULL , NULL , 2 }, /* wr… | |
| 246 ['N'] = { cmd_N , NULL , NULL , 2 }, /* ap… | |
| 247 ['p'] = { cmd_p , NULL , NULL , 2 }, /* wr… | |
| 248 ['P'] = { cmd_P , NULL , NULL , 2 }, /* wr… | |
| 249 ['q'] = { cmd_q , NULL , NULL , 1 }, /* qu… | |
| 250 ['r'] = { cmd_r , get_r_arg , free_acir_arg , 1 }, /* wr… | |
| 251 ['s'] = { cmd_s , get_s_arg , free_s_arg , 2 }, /* fi… | |
| 252 ['t'] = { cmd_t , get_bt_arg , NULL , 2 }, /* if… | |
| 253 ['w'] = { cmd_w , get_w_arg , NULL , 2 }, /* ap… | |
| 254 ['x'] = { cmd_x , NULL , NULL , 2 }, /* ex… | |
| 255 ['y'] = { cmd_y , get_y_arg , free_y_arg , 2 }, /* re… | |
| 256 [':'] = { cmd_colon , get_colon_arg , NULL , 0 }, /* de… | |
| 257 ['='] = { cmd_equal , NULL , NULL , 1 }, /* pr… | |
| 258 ['{'] = { cmd_lbrace, get_lbrace_arg, NULL , 2 }, /* if… | |
| 259 ['}'] = { cmd_rbrace, get_rbrace_arg, NULL , 0 }, /* no… | |
| 260 | |
| 261 [0x7f] = { NULL, NULL, NULL, 0 }, /* index is checked with isasc… | |
| 262 }; | |
| 263 | |
| 264 /* | |
| 265 * Function Definitions | |
| 266 */ | |
| 267 | |
| 268 /* given memory pointed to by *ptr that currently holds *nmemb members o… | |
| 269 * size, realloc to hold new_nmemb members, return new_nmemb in *memb an… | |
| 270 * past old end in *next. if realloc fails...explode | |
| 271 */ | |
| 272 static void | |
| 273 resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **… | |
| 274 { | |
| 275 void *n, *tmp; | |
| 276 | |
| 277 if (new_nmemb) { | |
| 278 tmp = ereallocarray(*ptr, new_nmemb, size); | |
| 279 } else { /* turns out realloc(*ptr, 0) != free(*ptr) */ | |
| 280 free(*ptr); | |
| 281 tmp = NULL; | |
| 282 } | |
| 283 n = (char *)tmp + *nmemb * size; | |
| 284 *nmemb = new_nmemb; | |
| 285 *ptr = tmp; | |
| 286 if (next) | |
| 287 *next = n; | |
| 288 } | |
| 289 | |
| 290 static void * | |
| 291 pop(Vec *v) | |
| 292 { | |
| 293 if (!v->size) | |
| 294 return NULL; | |
| 295 return v->data[--v->size]; | |
| 296 } | |
| 297 | |
| 298 static void | |
| 299 push(Vec *v, void *p) | |
| 300 { | |
| 301 if (v->size == v->cap) | |
| 302 resize((void **)&v->data, &v->cap, sizeof(*v->data), v->… | |
| 303 v->data[v->size++] = p; | |
| 304 } | |
| 305 | |
| 306 static void | |
| 307 stracat(String *dst, char *src) | |
| 308 { | |
| 309 int new = !dst->cap; | |
| 310 size_t len; | |
| 311 | |
| 312 len = (new ? 0 : strlen(dst->str)) + strlen(src) + 1; | |
| 313 if (dst->cap < len) | |
| 314 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL); | |
| 315 if (new) | |
| 316 *dst->str = '\0'; | |
| 317 strcat(dst->str, src); | |
| 318 } | |
| 319 | |
| 320 static void | |
| 321 strnacat(String *dst, char *src, size_t n) | |
| 322 { | |
| 323 int new = !dst->cap; | |
| 324 size_t len; | |
| 325 | |
| 326 len = strlen(src); | |
| 327 len = (new ? 0 : strlen(dst->str)) + MIN(n, len) + 1; | |
| 328 if (dst->cap < len) | |
| 329 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL); | |
| 330 if (new) | |
| 331 *dst->str = '\0'; | |
| 332 strlcat(dst->str, src, len); | |
| 333 } | |
| 334 | |
| 335 static void | |
| 336 stracpy(String *dst, char *src) | |
| 337 { | |
| 338 size_t len; | |
| 339 | |
| 340 len = strlen(src) + 1; | |
| 341 if (dst->cap < len) | |
| 342 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL); | |
| 343 strcpy(dst->str, src); | |
| 344 } | |
| 345 | |
| 346 static void | |
| 347 leprintf(char *s) | |
| 348 { | |
| 349 if (errno) | |
| 350 eprintf("%zu: %s: %s\n", lineno, s, strerror(errno)); | |
| 351 else | |
| 352 eprintf("%zu: %s\n", lineno, s); | |
| 353 } | |
| 354 | |
| 355 /* FIXME: write usage message */ | |
| 356 static void | |
| 357 usage(void) | |
| 358 { | |
| 359 eprintf("usage: sed [-nrE] script [file ...]\n" | |
| 360 " sed [-nrE] -e script [-e script] ... [-f scriptf… | |
| 361 " sed [-nrE] [-e script] ... -f scriptfile [-f scr… | |
| 362 } | |
| 363 | |
| 364 /* Differences from POSIX | |
| 365 * we allows semicolons and trailing blanks inside {} | |
| 366 * we allow spaces after ! (and in between !s) | |
| 367 * we allow extended regular expressions (-E) | |
| 368 */ | |
| 369 static void | |
| 370 compile(char *s, int isfile) | |
| 371 { | |
| 372 FILE *f; | |
| 373 | |
| 374 if (isfile) { | |
| 375 f = fopen(s, "r"); | |
| 376 if (!f) | |
| 377 eprintf("fopen %s:", s); | |
| 378 } else { | |
| 379 if (!*s) /* empty string script */ | |
| 380 return; | |
| 381 f = fmemopen(s, strlen(s), "r"); | |
| 382 if (!f) | |
| 383 eprintf("fmemopen:"); | |
| 384 } | |
| 385 | |
| 386 /* NOTE: get arg functions can't use genbuf */ | |
| 387 while (read_line(f, &genbuf) != EOF) { | |
| 388 s = genbuf.str; | |
| 389 | |
| 390 /* if the first two characters of the script are "#n" de… | |
| 391 if (++lineno == 1 && *s == '#' && s[1] == 'n') { | |
| 392 gflags.n = 1; | |
| 393 continue; | |
| 394 } | |
| 395 | |
| 396 if (gflags.aci_cont) { | |
| 397 aci_append(pc - 1, s); | |
| 398 continue; | |
| 399 } | |
| 400 if (gflags.s_cont) | |
| 401 s = (pc - 1)->fninfo->getarg(pc - 1, s); | |
| 402 | |
| 403 while (*s) { | |
| 404 s = chompr(s, ';'); | |
| 405 if (!*s || *s == '#') | |
| 406 break; | |
| 407 | |
| 408 if ((size_t)(pc - prog) == pcap) | |
| 409 resize((void **)&prog, &pcap, sizeof(*pr… | |
| 410 | |
| 411 pc->range.beg.type = pc->range.end.type = IGNORE; | |
| 412 pc->fninfo = NULL; | |
| 413 pc->in_match = 0; | |
| 414 | |
| 415 s = make_range(&pc->range, s); | |
| 416 s = chomp(s); | |
| 417 pc->negate = *s == '!'; | |
| 418 s = chompr(s, '!'); | |
| 419 | |
| 420 if (!isascii(*s) || !(pc->fninfo = &fns[(unsigne… | |
| 421 leprintf("bad sed function"); | |
| 422 if (pc->range.naddr > pc->fninfo->naddr) | |
| 423 leprintf("wrong number of addresses"); | |
| 424 s++; | |
| 425 | |
| 426 if (pc->fninfo->getarg) | |
| 427 s = pc->fninfo->getarg(pc, s); | |
| 428 | |
| 429 pc++; | |
| 430 } | |
| 431 } | |
| 432 | |
| 433 fshut(f, s); | |
| 434 } | |
| 435 | |
| 436 /* FIXME: if we decide to honor lack of trailing newline, set/clear a gl… | |
| 437 * flag when reading a line | |
| 438 */ | |
| 439 static int | |
| 440 read_line(FILE *f, String *s) | |
| 441 { | |
| 442 ssize_t len; | |
| 443 | |
| 444 if (!f) | |
| 445 return EOF; | |
| 446 | |
| 447 if ((len = getline(&s->str, &s->cap, f)) < 0) { | |
| 448 if (ferror(f)) | |
| 449 eprintf("getline:"); | |
| 450 return EOF; | |
| 451 } | |
| 452 if (s->str[--len] == '\n') | |
| 453 s->str[len] = '\0'; | |
| 454 return 0; | |
| 455 } | |
| 456 | |
| 457 /* read first range from s, return pointer to one past end of range */ | |
| 458 static char * | |
| 459 make_range(Range *range, char *s) | |
| 460 { | |
| 461 s = make_addr(&range->beg, s); | |
| 462 | |
| 463 if (*s == ',') | |
| 464 s = make_addr(&range->end, s + 1); | |
| 465 else | |
| 466 range->end.type = IGNORE; | |
| 467 | |
| 468 if (range->beg.type == EVERY && range->end.type == IGNORE)… | |
| 469 else if (range->beg.type != IGNORE && range->end.type == IGNORE)… | |
| 470 else if (range->beg.type != IGNORE && range->end.type != IGNORE)… | |
| 471 else leprintf("this is impossible..."); | |
| 472 | |
| 473 return s; | |
| 474 } | |
| 475 | |
| 476 /* read first addr from s, return pointer to one past end of addr */ | |
| 477 static char * | |
| 478 make_addr(Addr *addr, char *s) | |
| 479 { | |
| 480 Rune r; | |
| 481 char *p = s + strlen(s); | |
| 482 size_t rlen = echarntorune(&r, s, p - s); | |
| 483 | |
| 484 if (r == '$') { | |
| 485 addr->type = LAST; | |
| 486 s += rlen; | |
| 487 } else if (isdigitrune(r)) { | |
| 488 addr->type = LINE; | |
| 489 addr->u.lineno = stol(s, &s); | |
| 490 } else if (r == '/' || r == '\\') { | |
| 491 Rune delim; | |
| 492 if (r == '\\') { | |
| 493 s += rlen; | |
| 494 rlen = echarntorune(&r, s, p - s); | |
| 495 } | |
| 496 if (r == '\\') | |
| 497 leprintf("bad delimiter '\\'"); | |
| 498 delim = r; | |
| 499 s += rlen; | |
| 500 rlen = echarntorune(&r, s, p - s); | |
| 501 if (r == delim) { | |
| 502 addr->type = LASTRE; | |
| 503 s += rlen; | |
| 504 } else { | |
| 505 addr->type = REGEX; | |
| 506 p = find_delim(s, delim, 1); | |
| 507 if (!*p) | |
| 508 leprintf("unclosed regex"); | |
| 509 p -= escapes(s, p, delim, 0); | |
| 510 *p++ = '\0'; | |
| 511 addr->u.re = emalloc(sizeof(*addr->u.re)); | |
| 512 eregcomp(addr->u.re, s, gflags.E ? REG_EXTENDED … | |
| 513 s = p; | |
| 514 } | |
| 515 } else { | |
| 516 addr->type = EVERY; | |
| 517 } | |
| 518 | |
| 519 return s; | |
| 520 } | |
| 521 | |
| 522 /* return pointer to first delim in s that is not escaped | |
| 523 * and if do_brackets is set, not in [] (note possible [::], [..], [==],… | |
| 524 * return pointer to trailing nul byte if no delim found | |
| 525 * | |
| 526 * any escaped character that is not special is just itself (POSIX undef… | |
| 527 * FIXME: pull out into some util thing, will be useful for ed as well | |
| 528 */ | |
| 529 static char * | |
| 530 find_delim(char *s, Rune delim, int do_brackets) | |
| 531 { | |
| 532 enum { | |
| 533 OUTSIDE , /* not in brackets */ | |
| 534 BRACKETS_OPENING, /* last char was first [ or last two w… | |
| 535 BRACKETS_INSIDE , /* inside [] */ | |
| 536 INSIDE_OPENING , /* inside [] and last char was [ */ | |
| 537 CLASS_INSIDE , /* inside class [::], or colating elem… | |
| 538 CLASS_CLOSING , /* inside class [::], or colating elem… | |
| 539 } state = OUTSIDE; | |
| 540 | |
| 541 Rune r, c = 0; /* no c won't be used uninitialized, shutup -Wall… | |
| 542 size_t rlen; | |
| 543 int escape = 0; | |
| 544 char *end = s + strlen(s); | |
| 545 | |
| 546 for (; *s; s += rlen) { | |
| 547 rlen = echarntorune(&r, s, end - s); | |
| 548 | |
| 549 if (state == BRACKETS_OPENING && r == '^' )… | |
| 550 else if (state == BRACKETS_OPENING && r == ']' )… | |
| 551 else if (state == BRACKETS_OPENING )… | |
| 552 | |
| 553 if (state == CLASS_CLOSING && r == ']' )… | |
| 554 else if (state == CLASS_CLOSING )… | |
| 555 else if (state == CLASS_INSIDE && r == c )… | |
| 556 else if (state == INSIDE_OPENING && (r == ':' || | |
| 557 r == '.' || | |
| 558 r == '=') )… | |
| 559 else if (state == INSIDE_OPENING && r == ']' )… | |
| 560 else if (state == INSIDE_OPENING )… | |
| 561 else if (state == BRACKETS_INSIDE && r == '[' )… | |
| 562 else if (state == BRACKETS_INSIDE && r == ']' )… | |
| 563 else if (state == OUTSIDE && escape )… | |
| 564 else if (state == OUTSIDE && r == '\\' )… | |
| 565 else if (state == OUTSIDE && r == delim)… | |
| 566 else if (state == OUTSIDE && do_brackets && r == '[' )… | |
| 567 } | |
| 568 return s; | |
| 569 } | |
| 570 | |
| 571 static char * | |
| 572 chomp(char *s) | |
| 573 { | |
| 574 return chompr(s, 0); | |
| 575 } | |
| 576 | |
| 577 /* eat all leading whitespace and occurrences of rune */ | |
| 578 static char * | |
| 579 chompr(char *s, Rune rune) | |
| 580 { | |
| 581 Rune r; | |
| 582 size_t rlen; | |
| 583 char *end = s + strlen(s); | |
| 584 | |
| 585 while (*s && (rlen = echarntorune(&r, s, end - s)) && (isspaceru… | |
| 586 s += rlen; | |
| 587 return s; | |
| 588 } | |
| 589 | |
| 590 /* convert first nrunes Runes from UTF-8 string s in allocated Rune* | |
| 591 * NOTE: sequence must be valid UTF-8, check first */ | |
| 592 static Rune * | |
| 593 strtorunes(char *s, size_t nrunes) | |
| 594 { | |
| 595 Rune *rs, *rp; | |
| 596 | |
| 597 rp = rs = ereallocarray(NULL, nrunes + 1, sizeof(*rs)); | |
| 598 | |
| 599 while (nrunes--) | |
| 600 s += chartorune(rp++, s); | |
| 601 | |
| 602 *rp = '\0'; | |
| 603 return rs; | |
| 604 } | |
| 605 | |
| 606 static long | |
| 607 stol(char *s, char **endp) | |
| 608 { | |
| 609 long n; | |
| 610 errno = 0; | |
| 611 n = strtol(s, endp, 10); | |
| 612 | |
| 613 if (errno) | |
| 614 leprintf("strtol:"); | |
| 615 if (*endp == s) | |
| 616 leprintf("strtol: invalid number"); | |
| 617 | |
| 618 return n; | |
| 619 } | |
| 620 | |
| 621 /* from beg to end replace "\\d" with "d" and "\\n" with "\n" (where d i… | |
| 622 * if delim is 'n' and n_newline is 0 then "\\n" is replaced with "n" (n… | |
| 623 * if delim is 'n' and n_newline is 1 then "\\n" is replaced with "\n" (… | |
| 624 * if delim is 0 all escaped characters represent themselves (aci text) | |
| 625 * memmove rest of string (beyond end) into place | |
| 626 * return the number of converted escapes (backslashes removed) | |
| 627 * FIXME: this has had too many corner cases slapped on and is ugly. rew… | |
| 628 */ | |
| 629 static size_t | |
| 630 escapes(char *beg, char *end, Rune delim, int n_newline) | |
| 631 { | |
| 632 size_t num = 0; | |
| 633 char *src = beg, *dst = beg; | |
| 634 | |
| 635 while (src < end) { | |
| 636 /* handle escaped backslash specially so we don't think … | |
| 637 * backslash is escaping something */ | |
| 638 if (*src == '\\' && src[1] == '\\') { | |
| 639 *dst++ = *src++; | |
| 640 if (delim) | |
| 641 *dst++ = *src++; | |
| 642 else | |
| 643 src++; | |
| 644 } else if (*src == '\\' && !delim) { | |
| 645 src++; | |
| 646 } else if (*src == '\\' && src[1]) { | |
| 647 Rune r; | |
| 648 size_t rlen; | |
| 649 num++; | |
| 650 src++; | |
| 651 rlen = echarntorune(&r, src, end - src); | |
| 652 | |
| 653 if (r == 'n' && delim == 'n') { | |
| 654 *src = n_newline ? '\n' : 'n'; /* src so… | |
| 655 } else if (r == 'n') { | |
| 656 *src = '\n'; | |
| 657 } else if (r != delim) { | |
| 658 *dst++ = '\\'; | |
| 659 num--; | |
| 660 } | |
| 661 | |
| 662 memmove(dst, src, rlen); | |
| 663 dst += rlen; | |
| 664 src += rlen; | |
| 665 } else { | |
| 666 *dst++ = *src++; | |
| 667 } | |
| 668 } | |
| 669 memmove(dst, src, strlen(src) + 1); | |
| 670 return num; | |
| 671 } | |
| 672 | |
| 673 static size_t | |
| 674 echarntorune(Rune *r, char *s, size_t n) | |
| 675 { | |
| 676 size_t rlen = charntorune(r, s, n); | |
| 677 if (!rlen || *r == Runeerror) | |
| 678 leprintf("invalid UTF-8"); | |
| 679 return rlen; | |
| 680 } | |
| 681 | |
| 682 static void | |
| 683 insert_labels(void) | |
| 684 { | |
| 685 size_t i; | |
| 686 Cmd *from, *to; | |
| 687 | |
| 688 while (branches.size) { | |
| 689 from = prog + (ptrdiff_t)pop(&branches); | |
| 690 | |
| 691 if (!from->u.label) {/* no label branch to end of script… | |
| 692 from->u.jump = pc - 1; | |
| 693 } else { | |
| 694 for (i = 0; i < labels.size; i++) { | |
| 695 to = prog + (ptrdiff_t)labels.data[i]; | |
| 696 if (!strcmp(from->u.label, to->u.label))… | |
| 697 from->u.jump = to; | |
| 698 break; | |
| 699 } | |
| 700 } | |
| 701 if (i == labels.size) | |
| 702 leprintf("bad label"); | |
| 703 } | |
| 704 } | |
| 705 } | |
| 706 | |
| 707 /* | |
| 708 * Getargs / Freeargs | |
| 709 * Read argument from s, return pointer to one past last character of ar… | |
| 710 */ | |
| 711 | |
| 712 /* POSIX compliant | |
| 713 * i\ | |
| 714 * foobar | |
| 715 * | |
| 716 * also allow the following non POSIX compliant | |
| 717 * i # empty line | |
| 718 * ifoobar | |
| 719 * ifoobar\ | |
| 720 * baz | |
| 721 * | |
| 722 * FIXME: GNU and busybox discard leading spaces | |
| 723 * i foobar | |
| 724 * i foobar | |
| 725 * ifoobar | |
| 726 * are equivalent in GNU and busybox. We don't. Should we? | |
| 727 */ | |
| 728 static char * | |
| 729 get_aci_arg(Cmd *c, char *s) | |
| 730 { | |
| 731 c->u.acir.print = check_puts; | |
| 732 c->u.acir.str = (String){ NULL, 0 }; | |
| 733 | |
| 734 gflags.aci_cont = !!*s; /* no continue flag if empty string */ | |
| 735 | |
| 736 /* neither empty string nor POSIX compliant */ | |
| 737 if (*s && !(*s == '\\' && !s[1])) | |
| 738 aci_append(c, s); | |
| 739 | |
| 740 return s + strlen(s); | |
| 741 } | |
| 742 | |
| 743 static void | |
| 744 aci_append(Cmd *c, char *s) | |
| 745 { | |
| 746 char *end = s + strlen(s), *p = end; | |
| 747 | |
| 748 gflags.aci_cont = 0; | |
| 749 while (--p >= s && *p == '\\') | |
| 750 gflags.aci_cont = !gflags.aci_cont; | |
| 751 | |
| 752 if (gflags.aci_cont) | |
| 753 *--end = '\n'; | |
| 754 | |
| 755 escapes(s, end, 0, 0); | |
| 756 stracat(&c->u.acir.str, s); | |
| 757 } | |
| 758 | |
| 759 static void | |
| 760 free_acir_arg(Cmd *c) | |
| 761 { | |
| 762 free(c->u.acir.str.str); | |
| 763 } | |
| 764 | |
| 765 /* POSIX dictates that label is rest of line, including semicolons, trai… | |
| 766 * whitespace, closing braces, etc. and can be limited to 8 bytes | |
| 767 * | |
| 768 * I allow a semicolon or closing brace to terminate a label name, it's … | |
| 769 * POSIX compliant, but it's useful and every sed version I've tried to … | |
| 770 * does the same. | |
| 771 * | |
| 772 * FIXME: POSIX dictates that leading whitespace is ignored but trailing | |
| 773 * whitespace is not. This is annoying and we should probably get rid of… | |
| 774 */ | |
| 775 static char * | |
| 776 get_bt_arg(Cmd *c, char *s) | |
| 777 { | |
| 778 char *p = semicolon_arg(s = chomp(s)); | |
| 779 | |
| 780 if (p != s) { | |
| 781 c->u.label = estrndup(s, p - s); | |
| 782 } else { | |
| 783 c->u.label = NULL; | |
| 784 } | |
| 785 | |
| 786 push(&branches, (void *)(c - prog)); | |
| 787 | |
| 788 return p; | |
| 789 } | |
| 790 | |
| 791 /* POSIX dictates file name is rest of line including semicolons, traili… | |
| 792 * whitespace, closing braces, etc. and file name must be preceded by a … | |
| 793 * | |
| 794 * I allow a semicolon or closing brace to terminate a file name and don… | |
| 795 * enforce leading space. | |
| 796 * | |
| 797 * FIXME: decide whether trailing whitespace should be included and fix | |
| 798 * accordingly | |
| 799 */ | |
| 800 static char * | |
| 801 get_r_arg(Cmd *c, char *s) | |
| 802 { | |
| 803 char *p = semicolon_arg(s = chomp(s)); | |
| 804 | |
| 805 if (p == s) | |
| 806 leprintf("no file name"); | |
| 807 | |
| 808 c->u.acir.str.str = estrndup(s, p - s); | |
| 809 c->u.acir.print = write_file; | |
| 810 | |
| 811 return p; | |
| 812 } | |
| 813 | |
| 814 /* we allow "\\n" in replacement text to mean "\n" (undefined in POSIX) | |
| 815 * | |
| 816 * FIXME: allow other escapes in regex and replacement? if so change esc… | |
| 817 */ | |
| 818 static char * | |
| 819 get_s_arg(Cmd *c, char *s) | |
| 820 { | |
| 821 Rune delim, r; | |
| 822 Cmd buf; | |
| 823 char *p; | |
| 824 int esc, lastre; | |
| 825 | |
| 826 /* s/Find/Replace/Flags */ | |
| 827 | |
| 828 /* Find */ | |
| 829 if (!gflags.s_cont) { /* NOT continuing from literal newline in … | |
| 830 lastre = 0; | |
| 831 c->u.s.repl = (String){ NULL, 0 }; | |
| 832 c->u.s.occurrence = 1; | |
| 833 c->u.s.file = NULL; | |
| 834 c->u.s.p = 0; | |
| 835 | |
| 836 if (!*s || *s == '\\') | |
| 837 leprintf("bad delimiter"); | |
| 838 | |
| 839 p = s + strlen(s); | |
| 840 s += echarntorune(&delim, s, p - s); | |
| 841 c->u.s.delim = delim; | |
| 842 | |
| 843 echarntorune(&r, s, p - s); | |
| 844 if (r == delim) /* empty regex */ | |
| 845 lastre = 1; | |
| 846 | |
| 847 p = find_delim(s, delim, 1); | |
| 848 if (!*p) | |
| 849 leprintf("missing second delimiter"); | |
| 850 p -= escapes(s, p, delim, 0); | |
| 851 *p = '\0'; | |
| 852 | |
| 853 if (lastre) { | |
| 854 c->u.s.re = NULL; | |
| 855 } else { | |
| 856 c->u.s.re = emalloc(sizeof(*c->u.s.re)); | |
| 857 /* FIXME: different eregcomp that calls fatal */ | |
| 858 eregcomp(c->u.s.re, s, gflags.E ? REG_EXTENDED :… | |
| 859 } | |
| 860 s = p + runelen(delim); | |
| 861 } | |
| 862 | |
| 863 /* Replace */ | |
| 864 delim = c->u.s.delim; | |
| 865 | |
| 866 p = find_delim(s, delim, 0); | |
| 867 p -= escapes(s, p, delim, 0); | |
| 868 if (!*p) { /* no third delimiter */ | |
| 869 /* FIXME: same backslash counting as aci_append() */ | |
| 870 if (p[-1] != '\\') | |
| 871 leprintf("missing third delimiter or <backslash>… | |
| 872 p[-1] = '\n'; | |
| 873 gflags.s_cont = 1; | |
| 874 } else { | |
| 875 gflags.s_cont = 0; | |
| 876 } | |
| 877 | |
| 878 /* check for bad references in replacement text */ | |
| 879 *p = '\0'; | |
| 880 for (esc = 0, p = s; *p; p++) { | |
| 881 if (esc) { | |
| 882 esc = 0; | |
| 883 if (isdigit(*p) && c->u.s.re && (size_t)(*p - '0… | |
| 884 leprintf("back reference number greater … | |
| 885 } else if (*p == '\\') { | |
| 886 esc = 1; | |
| 887 } | |
| 888 } | |
| 889 stracat(&c->u.s.repl, s); | |
| 890 | |
| 891 if (gflags.s_cont) | |
| 892 return p; | |
| 893 | |
| 894 s = p + runelen(delim); | |
| 895 | |
| 896 /* Flags */ | |
| 897 p = semicolon_arg(s = chomp(s)); | |
| 898 | |
| 899 /* FIXME: currently for simplicity take last of g or occurrence … | |
| 900 * ignore multiple p flags. need to fix that */ | |
| 901 for (; s < p; s++) { | |
| 902 if (isdigit(*s)) { | |
| 903 c->u.s.occurrence = stol(s, &s); | |
| 904 s--; /* for loop will advance pointer */ | |
| 905 } else { | |
| 906 switch (*s) { | |
| 907 case 'g': c->u.s.occurrence = 0; break; | |
| 908 case 'p': c->u.s.p = 1; break; | |
| 909 case 'w': | |
| 910 /* must be last flag, take everything up… | |
| 911 * s == p after this */ | |
| 912 s = get_w_arg(&buf, chomp(s+1)); | |
| 913 c->u.s.file = buf.u.file; | |
| 914 break; | |
| 915 } | |
| 916 } | |
| 917 } | |
| 918 return p; | |
| 919 } | |
| 920 | |
| 921 static void | |
| 922 free_s_arg(Cmd *c) | |
| 923 { | |
| 924 if (c->u.s.re) | |
| 925 regfree(c->u.s.re); | |
| 926 free(c->u.s.re); | |
| 927 free(c->u.s.repl.str); | |
| 928 } | |
| 929 | |
| 930 /* see get_r_arg notes */ | |
| 931 static char * | |
| 932 get_w_arg(Cmd *c, char *s) | |
| 933 { | |
| 934 char *p = semicolon_arg(s = chomp(s)); | |
| 935 Wfile *w, **wp; | |
| 936 | |
| 937 if (p == s) | |
| 938 leprintf("no file name"); | |
| 939 | |
| 940 for (wp = (Wfile **)wfiles.data; (size_t)(wp - (Wfile **)wfiles.… | |
| 941 if (strlen((*wp)->path) == (size_t)(p - s) && !strncmp(s… | |
| 942 c->u.file = (*wp)->file; | |
| 943 return p; | |
| 944 } | |
| 945 } | |
| 946 | |
| 947 w = emalloc(sizeof(*w)); | |
| 948 w->path = estrndup(s, p - s); | |
| 949 | |
| 950 if (!(w->file = fopen(w->path, "w"))) | |
| 951 leprintf("fopen failed"); | |
| 952 | |
| 953 c->u.file = w->file; | |
| 954 | |
| 955 push(&wfiles, w); | |
| 956 return p; | |
| 957 } | |
| 958 | |
| 959 static char * | |
| 960 get_y_arg(Cmd *c, char *s) | |
| 961 { | |
| 962 Rune delim; | |
| 963 char *p = s + strlen(s); | |
| 964 size_t rlen = echarntorune(&delim, s, p - s); | |
| 965 size_t nrunes1, nrunes2; | |
| 966 | |
| 967 c->u.y.set1 = c->u.y.set2 = NULL; | |
| 968 | |
| 969 s += rlen; | |
| 970 p = find_delim(s, delim, 0); | |
| 971 p -= escapes(s, p, delim, 1); | |
| 972 nrunes1 = utfnlen(s, p - s); | |
| 973 c->u.y.set1 = strtorunes(s, nrunes1); | |
| 974 | |
| 975 s = p + rlen; | |
| 976 p = find_delim(s, delim, 0); | |
| 977 p -= escapes(s, p, delim, 1); | |
| 978 nrunes2 = utfnlen(s, p - s); | |
| 979 | |
| 980 if (nrunes1 != nrunes2) | |
| 981 leprintf("different set lengths"); | |
| 982 | |
| 983 c->u.y.set2 = strtorunes(s, utfnlen(s, p - s)); | |
| 984 | |
| 985 return p + rlen; | |
| 986 } | |
| 987 | |
| 988 static void | |
| 989 free_y_arg(Cmd *c) | |
| 990 { | |
| 991 free(c->u.y.set1); | |
| 992 free(c->u.y.set2); | |
| 993 } | |
| 994 | |
| 995 /* see get_bt_arg notes */ | |
| 996 static char * | |
| 997 get_colon_arg(Cmd *c, char *s) | |
| 998 { | |
| 999 char *p = semicolon_arg(s = chomp(s)); | |
| 1000 | |
| 1001 if (p == s) | |
| 1002 leprintf("no label name"); | |
| 1003 | |
| 1004 c->u.label = estrndup(s, p - s); | |
| 1005 push(&labels, (void *)(c - prog)); | |
| 1006 return p; | |
| 1007 } | |
| 1008 | |
| 1009 static char * | |
| 1010 get_lbrace_arg(Cmd *c, char *s) | |
| 1011 { | |
| 1012 push(&braces, (void *)(c - prog)); | |
| 1013 return s; | |
| 1014 } | |
| 1015 | |
| 1016 static char * | |
| 1017 get_rbrace_arg(Cmd *c, char *s) | |
| 1018 { | |
| 1019 Cmd *lbrace; | |
| 1020 | |
| 1021 if (!braces.size) | |
| 1022 leprintf("extra }"); | |
| 1023 | |
| 1024 lbrace = prog + (ptrdiff_t)pop(&braces); | |
| 1025 lbrace->u.offset = c - prog; | |
| 1026 return s; | |
| 1027 } | |
| 1028 | |
| 1029 /* s points to beginning of an argument that may be semicolon terminated | |
| 1030 * return pointer to semicolon or nul byte after string | |
| 1031 * or closing brace as to not force ; before } | |
| 1032 * FIXME: decide whether or not to eat trailing whitespace for arguments… | |
| 1033 * we allow semicolon/brace termination that POSIX doesn't | |
| 1034 * b, r, t, w, : | |
| 1035 * POSIX says trailing whitespace is part of label name, file nam… | |
| 1036 * we should probably eat it | |
| 1037 */ | |
| 1038 static char * | |
| 1039 semicolon_arg(char *s) | |
| 1040 { | |
| 1041 char *p = strpbrk(s, ";}"); | |
| 1042 if (!p) | |
| 1043 p = s + strlen(s); | |
| 1044 return p; | |
| 1045 } | |
| 1046 | |
| 1047 static void | |
| 1048 run(void) | |
| 1049 { | |
| 1050 lineno = 0; | |
| 1051 if (braces.size) | |
| 1052 leprintf("extra {"); | |
| 1053 | |
| 1054 /* genbuf has already been initialized, patt will be in new_line | |
| 1055 * (or we'll halt) */ | |
| 1056 stracpy(&hold, ""); | |
| 1057 | |
| 1058 insert_labels(); | |
| 1059 next_file(); | |
| 1060 new_line(); | |
| 1061 | |
| 1062 for (pc = prog; !gflags.halt; pc++) | |
| 1063 pc->fninfo->fn(pc); | |
| 1064 } | |
| 1065 | |
| 1066 /* return true if we are in range for c, set c->in_match appropriately */ | |
| 1067 static int | |
| 1068 in_range(Cmd *c) | |
| 1069 { | |
| 1070 if (match_addr(&c->range.beg)) { | |
| 1071 if (c->range.naddr == 2) { | |
| 1072 if (c->range.end.type == LINE && c->range.end.u.… | |
| 1073 c->in_match = 0; | |
| 1074 else | |
| 1075 c->in_match = 1; | |
| 1076 } | |
| 1077 return !c->negate; | |
| 1078 } | |
| 1079 if (c->in_match && match_addr(&c->range.end)) { | |
| 1080 c->in_match = 0; | |
| 1081 return !c->negate; | |
| 1082 } | |
| 1083 return c->in_match ^ c->negate; | |
| 1084 } | |
| 1085 | |
| 1086 /* return true if addr matches current line */ | |
| 1087 static int | |
| 1088 match_addr(Addr *a) | |
| 1089 { | |
| 1090 switch (a->type) { | |
| 1091 default: | |
| 1092 case IGNORE: return 0; | |
| 1093 case EVERY: return 1; | |
| 1094 case LINE: return lineno == a->u.lineno; | |
| 1095 case LAST: | |
| 1096 while (is_eof(file) && !next_file()) | |
| 1097 ; | |
| 1098 return !file; | |
| 1099 case REGEX: | |
| 1100 lastre = a->u.re; | |
| 1101 return !regexec(a->u.re, patt.str, 0, NULL, 0); | |
| 1102 case LASTRE: | |
| 1103 if (!lastre) | |
| 1104 leprintf("no previous regex"); | |
| 1105 return !regexec(lastre, patt.str, 0, NULL, 0); | |
| 1106 } | |
| 1107 } | |
| 1108 | |
| 1109 /* move to next input file | |
| 1110 * stdin if first call and no files | |
| 1111 * return 0 for success and 1 for no more files | |
| 1112 */ | |
| 1113 static int | |
| 1114 next_file(void) | |
| 1115 { | |
| 1116 static unsigned char first = 1; | |
| 1117 | |
| 1118 if (file == stdin) | |
| 1119 clearerr(file); | |
| 1120 else if (file) | |
| 1121 fshut(file, "<file>"); | |
| 1122 /* given no files, default to stdin */ | |
| 1123 file = first && !*files ? stdin : NULL; | |
| 1124 first = 0; | |
| 1125 | |
| 1126 while (!file && *files) { | |
| 1127 if (!strcmp(*files, "-")) { | |
| 1128 file = stdin; | |
| 1129 } else if (!(file = fopen(*files, "r"))) { | |
| 1130 /* warn this file didn't open, but move on to ne… | |
| 1131 weprintf("fopen %s:", *files); | |
| 1132 ret = 1; | |
| 1133 } | |
| 1134 files++; | |
| 1135 } | |
| 1136 | |
| 1137 return !file; | |
| 1138 } | |
| 1139 | |
| 1140 /* test if stream is at EOF */ | |
| 1141 static int | |
| 1142 is_eof(FILE *f) | |
| 1143 { | |
| 1144 int c; | |
| 1145 | |
| 1146 if (!f || feof(f)) | |
| 1147 return 1; | |
| 1148 | |
| 1149 c = fgetc(f); | |
| 1150 if (c == EOF && ferror(f)) | |
| 1151 eprintf("fgetc:"); | |
| 1152 if (c != EOF && ungetc(c, f) == EOF) | |
| 1153 eprintf("ungetc EOF\n"); | |
| 1154 | |
| 1155 return c == EOF; | |
| 1156 } | |
| 1157 | |
| 1158 /* perform writes that were scheduled | |
| 1159 * for aci this is check_puts(string, stdout) | |
| 1160 * for r this is write_file(path, stdout) | |
| 1161 */ | |
| 1162 static void | |
| 1163 do_writes(void) | |
| 1164 { | |
| 1165 Cmd *c; | |
| 1166 size_t i; | |
| 1167 | |
| 1168 for (i = 0; i < writes.size; i++) { | |
| 1169 c = writes.data[i]; | |
| 1170 c->u.acir.print(c->u.acir.str.str, stdout); | |
| 1171 } | |
| 1172 writes.size = 0; | |
| 1173 } | |
| 1174 | |
| 1175 /* used for r's u.acir.print() | |
| 1176 * FIXME: something like util's concat() would be better | |
| 1177 */ | |
| 1178 static void | |
| 1179 write_file(char *path, FILE *out) | |
| 1180 { | |
| 1181 FILE *in = fopen(path, "r"); | |
| 1182 if (!in) /* no file is treated as empty file */ | |
| 1183 return; | |
| 1184 | |
| 1185 while (read_line(in, &genbuf) != EOF) | |
| 1186 check_puts(genbuf.str, out); | |
| 1187 | |
| 1188 fshut(in, path); | |
| 1189 } | |
| 1190 | |
| 1191 static void | |
| 1192 check_puts(char *s, FILE *f) | |
| 1193 { | |
| 1194 if (s && fputs(s, f) == EOF) | |
| 1195 eprintf("fputs:"); | |
| 1196 if (fputs("\n", f) == EOF) | |
| 1197 eprintf("fputs:"); | |
| 1198 } | |
| 1199 | |
| 1200 /* iterate from beg to end updating ranges so we don't miss any commands | |
| 1201 * e.g. sed -n '1d;1,3p' should still print lines 2 and 3 | |
| 1202 */ | |
| 1203 static void | |
| 1204 update_ranges(Cmd *beg, Cmd *end) | |
| 1205 { | |
| 1206 while (beg < end) | |
| 1207 in_range(beg++); | |
| 1208 } | |
| 1209 | |
| 1210 /* | |
| 1211 * Sed functions | |
| 1212 */ | |
| 1213 static void | |
| 1214 cmd_a(Cmd *c) | |
| 1215 { | |
| 1216 if (in_range(c)) | |
| 1217 push(&writes, c); | |
| 1218 } | |
| 1219 | |
| 1220 static void | |
| 1221 cmd_b(Cmd *c) | |
| 1222 { | |
| 1223 if (!in_range(c)) | |
| 1224 return; | |
| 1225 | |
| 1226 /* if we jump backwards update to end, otherwise update to desti… | |
| 1227 update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap); | |
| 1228 pc = c->u.jump; | |
| 1229 } | |
| 1230 | |
| 1231 static void | |
| 1232 cmd_c(Cmd *c) | |
| 1233 { | |
| 1234 if (!in_range(c)) | |
| 1235 return; | |
| 1236 | |
| 1237 /* write the text on the last line of the match */ | |
| 1238 if (!c->in_match) | |
| 1239 check_puts(c->u.acir.str.str, stdout); | |
| 1240 /* otherwise start the next cycle without printing pattern space | |
| 1241 * effectively deleting the text */ | |
| 1242 new_next(); | |
| 1243 } | |
| 1244 | |
| 1245 static void | |
| 1246 cmd_d(Cmd *c) | |
| 1247 { | |
| 1248 if (!in_range(c)) | |
| 1249 return; | |
| 1250 | |
| 1251 new_next(); | |
| 1252 } | |
| 1253 | |
| 1254 static void | |
| 1255 cmd_D(Cmd *c) | |
| 1256 { | |
| 1257 char *p; | |
| 1258 | |
| 1259 if (!in_range(c)) | |
| 1260 return; | |
| 1261 | |
| 1262 if ((p = strchr(patt.str, '\n'))) { | |
| 1263 p++; | |
| 1264 memmove(patt.str, p, strlen(p) + 1); | |
| 1265 old_next(); | |
| 1266 } else { | |
| 1267 new_next(); | |
| 1268 } | |
| 1269 } | |
| 1270 | |
| 1271 static void | |
| 1272 cmd_g(Cmd *c) | |
| 1273 { | |
| 1274 if (in_range(c)) | |
| 1275 stracpy(&patt, hold.str); | |
| 1276 } | |
| 1277 | |
| 1278 static void | |
| 1279 cmd_G(Cmd *c) | |
| 1280 { | |
| 1281 if (!in_range(c)) | |
| 1282 return; | |
| 1283 | |
| 1284 stracat(&patt, "\n"); | |
| 1285 stracat(&patt, hold.str); | |
| 1286 } | |
| 1287 | |
| 1288 static void | |
| 1289 cmd_h(Cmd *c) | |
| 1290 { | |
| 1291 if (in_range(c)) | |
| 1292 stracpy(&hold, patt.str); | |
| 1293 } | |
| 1294 | |
| 1295 static void | |
| 1296 cmd_H(Cmd *c) | |
| 1297 { | |
| 1298 if (!in_range(c)) | |
| 1299 return; | |
| 1300 | |
| 1301 stracat(&hold, "\n"); | |
| 1302 stracat(&hold, patt.str); | |
| 1303 } | |
| 1304 | |
| 1305 static void | |
| 1306 cmd_i(Cmd *c) | |
| 1307 { | |
| 1308 if (in_range(c)) | |
| 1309 check_puts(c->u.acir.str.str, stdout); | |
| 1310 } | |
| 1311 | |
| 1312 /* I think it makes sense to print invalid UTF-8 sequences in octal to s… | |
| 1313 * the "visually unambiguous form" sed(1p) | |
| 1314 */ | |
| 1315 static void | |
| 1316 cmd_l(Cmd *c) | |
| 1317 { | |
| 1318 Rune r; | |
| 1319 char *p, *end; | |
| 1320 size_t rlen; | |
| 1321 | |
| 1322 char *escapes[] = { /* FIXME: 7 entries and search instead of 12… | |
| 1323 ['\\'] = "\\\\", ['\a'] = "\\a", ['\b'] = "\\b", | |
| 1324 ['\f'] = "\\f" , ['\r'] = "\\r", ['\t'] = "\\t", | |
| 1325 ['\v'] = "\\v" , [0x7f] = NULL, /* fill out the table */ | |
| 1326 }; | |
| 1327 | |
| 1328 if (!in_range(c)) | |
| 1329 return; | |
| 1330 | |
| 1331 /* FIXME: line wrapping. sed(1p) says "length at which folding o… | |
| 1332 * unspecified, but should be appropraite for the output device" | |
| 1333 * just wrap at 80 Runes? | |
| 1334 */ | |
| 1335 for (p = patt.str, end = p + strlen(p); p < end; p += rlen) { | |
| 1336 if (isascii(*p) && escapes[(unsigned int)*p]) { | |
| 1337 fputs(escapes[(unsigned int)*p], stdout); | |
| 1338 rlen = 1; | |
| 1339 } else if (!(rlen = charntorune(&r, p, end - p))) { | |
| 1340 /* ran out of chars, print the bytes of the shor… | |
| 1341 for (; p < end; p++) | |
| 1342 printf("\\%03hho", (unsigned char)*p); | |
| 1343 break; | |
| 1344 } else if (r == Runeerror) { | |
| 1345 for (; rlen; rlen--, p++) | |
| 1346 printf("\\%03hho", (unsigned char)*p); | |
| 1347 } else { | |
| 1348 while (fwrite(p, rlen, 1, stdout) < 1 && errno =… | |
| 1349 ; | |
| 1350 if (ferror(stdout)) | |
| 1351 eprintf("fwrite:"); | |
| 1352 } | |
| 1353 } | |
| 1354 check_puts("$", stdout); | |
| 1355 } | |
| 1356 | |
| 1357 static void | |
| 1358 cmd_n(Cmd *c) | |
| 1359 { | |
| 1360 if (!in_range(c)) | |
| 1361 return; | |
| 1362 | |
| 1363 if (!gflags.n) | |
| 1364 check_puts(patt.str, stdout); | |
| 1365 do_writes(); | |
| 1366 new_line(); | |
| 1367 } | |
| 1368 | |
| 1369 static void | |
| 1370 cmd_N(Cmd *c) | |
| 1371 { | |
| 1372 if (!in_range(c)) | |
| 1373 return; | |
| 1374 do_writes(); | |
| 1375 app_line(); | |
| 1376 } | |
| 1377 | |
| 1378 static void | |
| 1379 cmd_p(Cmd *c) | |
| 1380 { | |
| 1381 if (in_range(c)) | |
| 1382 check_puts(patt.str, stdout); | |
| 1383 } | |
| 1384 | |
| 1385 static void | |
| 1386 cmd_P(Cmd *c) | |
| 1387 { | |
| 1388 char *p; | |
| 1389 | |
| 1390 if (!in_range(c)) | |
| 1391 return; | |
| 1392 | |
| 1393 if ((p = strchr(patt.str, '\n'))) | |
| 1394 *p = '\0'; | |
| 1395 | |
| 1396 check_puts(patt.str, stdout); | |
| 1397 | |
| 1398 if (p) | |
| 1399 *p = '\n'; | |
| 1400 } | |
| 1401 | |
| 1402 static void | |
| 1403 cmd_q(Cmd *c) | |
| 1404 { | |
| 1405 if (!in_range(c)) | |
| 1406 return; | |
| 1407 | |
| 1408 if (!gflags.n) | |
| 1409 check_puts(patt.str, stdout); | |
| 1410 do_writes(); | |
| 1411 gflags.halt = 1; | |
| 1412 } | |
| 1413 | |
| 1414 static void | |
| 1415 cmd_r(Cmd *c) | |
| 1416 { | |
| 1417 if (in_range(c)) | |
| 1418 push(&writes, c); | |
| 1419 } | |
| 1420 | |
| 1421 static void | |
| 1422 cmd_s(Cmd *c) | |
| 1423 { | |
| 1424 String tmp; | |
| 1425 Rune r; | |
| 1426 size_t plen, rlen, len; | |
| 1427 char *p, *s, *end; | |
| 1428 unsigned int matches = 0, last_empty = 1, qflag = 0, cflags = 0; | |
| 1429 regex_t *re; | |
| 1430 regmatch_t *rm, *pmatch = NULL; | |
| 1431 | |
| 1432 if (!in_range(c)) | |
| 1433 return; | |
| 1434 | |
| 1435 if (!c->u.s.re && !lastre) | |
| 1436 leprintf("no previous regex"); | |
| 1437 | |
| 1438 re = c->u.s.re ? c->u.s.re : lastre; | |
| 1439 lastre = re; | |
| 1440 | |
| 1441 plen = re->re_nsub + 1; | |
| 1442 pmatch = ereallocarray(NULL, plen, sizeof(regmatch_t)); | |
| 1443 | |
| 1444 *genbuf.str = '\0'; | |
| 1445 s = patt.str; | |
| 1446 | |
| 1447 while (!qflag && !regexec(re, s, plen, pmatch, cflags)) { | |
| 1448 cflags = REG_NOTBOL; /* match against beginning of line … | |
| 1449 if (!*s) /* match against empty string first time, but n… | |
| 1450 qflag = 1; | |
| 1451 | |
| 1452 /* don't substitute if last match was not empty but this… | |
| 1453 * s_a*_._g | |
| 1454 * foobar -> .f.o.o.b.r. | |
| 1455 */ | |
| 1456 if ((last_empty || pmatch[0].rm_eo) && | |
| 1457 (++matches == c->u.s.occurrence || !c->u.s.occurrenc… | |
| 1458 /* copy over everything before the match */ | |
| 1459 strnacat(&genbuf, s, pmatch[0].rm_so); | |
| 1460 | |
| 1461 /* copy over replacement text, taking into accou… | |
| 1462 for (p = c->u.s.repl.str, len = strcspn(p, "\\&"… | |
| 1463 strnacat(&genbuf, p, len); | |
| 1464 p += len; | |
| 1465 switch (*p) { | |
| 1466 default: leprintf("this shouldn't be pos… | |
| 1467 case '\0': | |
| 1468 /* we're at the end, back up one… | |
| 1469 * the null byte to break out of… | |
| 1470 --p; | |
| 1471 break; | |
| 1472 case '&': | |
| 1473 strnacat(&genbuf, s + pmatch[0].… | |
| 1474 break; | |
| 1475 case '\\': | |
| 1476 if (isdigit(*++p)) { /* backrefe… | |
| 1477 /* only need to check he… | |
| 1478 if (!c->u.s.re && (size_… | |
| 1479 leprintf("back r… | |
| 1480 rm = &pmatch[*p - '0']; | |
| 1481 strnacat(&genbuf, s + rm… | |
| 1482 } else { /* character after back… | |
| 1483 strnacat(&genbuf, p, 1); | |
| 1484 } | |
| 1485 break; | |
| 1486 } | |
| 1487 } | |
| 1488 } else { | |
| 1489 /* not replacing, copy over everything up to and… | |
| 1490 strnacat(&genbuf, s, pmatch[0].rm_eo); | |
| 1491 } | |
| 1492 | |
| 1493 if (!pmatch[0].rm_eo) { /* empty match, advance one rune… | |
| 1494 end = s + strlen(s); | |
| 1495 rlen = charntorune(&r, s, end - s); | |
| 1496 | |
| 1497 if (!rlen) { /* ran out of bytes, copy short seq… | |
| 1498 stracat(&genbuf, s); | |
| 1499 s = end; | |
| 1500 } else { /* copy whether or not it's a good rune… | |
| 1501 strnacat(&genbuf, s, rlen); | |
| 1502 s += rlen; | |
| 1503 } | |
| 1504 } | |
| 1505 last_empty = !pmatch[0].rm_eo; | |
| 1506 s += pmatch[0].rm_eo; | |
| 1507 } | |
| 1508 free(pmatch); | |
| 1509 | |
| 1510 if (!(matches && matches >= c->u.s.occurrence)) /* no replacemen… | |
| 1511 return; | |
| 1512 | |
| 1513 gflags.s = 1; | |
| 1514 | |
| 1515 stracat(&genbuf, s); | |
| 1516 | |
| 1517 tmp = patt; | |
| 1518 patt = genbuf; | |
| 1519 genbuf = tmp; | |
| 1520 | |
| 1521 if (c->u.s.p) | |
| 1522 check_puts(patt.str, stdout); | |
| 1523 if (c->u.s.file) | |
| 1524 check_puts(patt.str, c->u.s.file); | |
| 1525 } | |
| 1526 | |
| 1527 static void | |
| 1528 cmd_t(Cmd *c) | |
| 1529 { | |
| 1530 if (!in_range(c) || !gflags.s) | |
| 1531 return; | |
| 1532 | |
| 1533 /* if we jump backwards update to end, otherwise update to desti… | |
| 1534 update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap); | |
| 1535 pc = c->u.jump; | |
| 1536 gflags.s = 0; | |
| 1537 } | |
| 1538 | |
| 1539 static void | |
| 1540 cmd_w(Cmd *c) | |
| 1541 { | |
| 1542 if (in_range(c)) | |
| 1543 check_puts(patt.str, c->u.file); | |
| 1544 } | |
| 1545 | |
| 1546 static void | |
| 1547 cmd_x(Cmd *c) | |
| 1548 { | |
| 1549 String tmp; | |
| 1550 | |
| 1551 if (!in_range(c)) | |
| 1552 return; | |
| 1553 | |
| 1554 tmp = patt; | |
| 1555 patt = hold; | |
| 1556 hold = tmp; | |
| 1557 } | |
| 1558 | |
| 1559 static void | |
| 1560 cmd_y(Cmd *c) | |
| 1561 { | |
| 1562 String tmp; | |
| 1563 Rune r, *rp; | |
| 1564 size_t n, rlen; | |
| 1565 char *s, *end, buf[UTFmax]; | |
| 1566 | |
| 1567 if (!in_range(c)) | |
| 1568 return; | |
| 1569 | |
| 1570 *genbuf.str = '\0'; | |
| 1571 for (s = patt.str, end = s + strlen(s); *s; s += rlen) { | |
| 1572 if (!(rlen = charntorune(&r, s, end - s))) { /* ran out … | |
| 1573 stracat(&genbuf, s); | |
| 1574 break; | |
| 1575 } else if (r == Runeerror) { /* bad UTF-8 sequence, copy… | |
| 1576 strnacat(&genbuf, s, rlen); | |
| 1577 } else { | |
| 1578 for (rp = c->u.y.set1; *rp; rp++) | |
| 1579 if (*rp == r) | |
| 1580 break; | |
| 1581 if (*rp) { /* found r in set1, replace with Rune… | |
| 1582 n = runetochar(buf, c->u.y.set2 + (rp - … | |
| 1583 strnacat(&genbuf, buf, n); | |
| 1584 } else { | |
| 1585 strnacat(&genbuf, s, rlen); | |
| 1586 } | |
| 1587 } | |
| 1588 } | |
| 1589 tmp = patt; | |
| 1590 patt = genbuf; | |
| 1591 genbuf = tmp; | |
| 1592 } | |
| 1593 | |
| 1594 static void | |
| 1595 cmd_colon(Cmd *c) | |
| 1596 { | |
| 1597 } | |
| 1598 | |
| 1599 static void | |
| 1600 cmd_equal(Cmd *c) | |
| 1601 { | |
| 1602 if (in_range(c)) | |
| 1603 printf("%zu\n", lineno); | |
| 1604 } | |
| 1605 | |
| 1606 static void | |
| 1607 cmd_lbrace(Cmd *c) | |
| 1608 { | |
| 1609 Cmd *jump; | |
| 1610 | |
| 1611 if (in_range(c)) | |
| 1612 return; | |
| 1613 | |
| 1614 /* update ranges on all commands we skip */ | |
| 1615 jump = prog + c->u.offset; | |
| 1616 update_ranges(c + 1, jump); | |
| 1617 pc = jump; | |
| 1618 } | |
| 1619 | |
| 1620 static void | |
| 1621 cmd_rbrace(Cmd *c) | |
| 1622 { | |
| 1623 } | |
| 1624 | |
| 1625 /* not actually a sed function, but acts like one, put in last spot of s… | |
| 1626 static void | |
| 1627 cmd_last(Cmd *c) | |
| 1628 { | |
| 1629 if (!gflags.n) | |
| 1630 check_puts(patt.str, stdout); | |
| 1631 do_writes(); | |
| 1632 new_next(); | |
| 1633 } | |
| 1634 | |
| 1635 /* | |
| 1636 * Actions | |
| 1637 */ | |
| 1638 | |
| 1639 /* read new line, continue current cycle */ | |
| 1640 static void | |
| 1641 new_line(void) | |
| 1642 { | |
| 1643 while (read_line(file, &patt) == EOF) { | |
| 1644 if (next_file()) { | |
| 1645 gflags.halt = 1; | |
| 1646 return; | |
| 1647 } | |
| 1648 } | |
| 1649 gflags.s = 0; | |
| 1650 lineno++; | |
| 1651 } | |
| 1652 | |
| 1653 /* append new line, continue current cycle | |
| 1654 * FIXME: used for N, POSIX specifies do not print pattern space when ou… | |
| 1655 * input, but GNU does so busybox does as well. Currently we don'… | |
| 1656 * Should we? | |
| 1657 */ | |
| 1658 static void | |
| 1659 app_line(void) | |
| 1660 { | |
| 1661 while (read_line(file, &genbuf) == EOF) { | |
| 1662 if (next_file()) { | |
| 1663 gflags.halt = 1; | |
| 1664 return; | |
| 1665 } | |
| 1666 } | |
| 1667 | |
| 1668 stracat(&patt, "\n"); | |
| 1669 stracat(&patt, genbuf.str); | |
| 1670 gflags.s = 0; | |
| 1671 lineno++; | |
| 1672 } | |
| 1673 | |
| 1674 /* read new line, start new cycle */ | |
| 1675 static void | |
| 1676 new_next(void) | |
| 1677 { | |
| 1678 *patt.str = '\0'; | |
| 1679 update_ranges(pc + 1, prog + pcap); | |
| 1680 new_line(); | |
| 1681 pc = prog - 1; | |
| 1682 } | |
| 1683 | |
| 1684 /* keep old pattern space, start new cycle */ | |
| 1685 static void | |
| 1686 old_next(void) | |
| 1687 { | |
| 1688 update_ranges(pc + 1, prog + pcap); | |
| 1689 pc = prog - 1; | |
| 1690 } | |
| 1691 | |
| 1692 int | |
| 1693 main(int argc, char *argv[]) | |
| 1694 { | |
| 1695 char *arg; | |
| 1696 int script = 0; | |
| 1697 | |
| 1698 ARGBEGIN { | |
| 1699 case 'n': | |
| 1700 gflags.n = 1; | |
| 1701 break; | |
| 1702 case 'r': | |
| 1703 case 'E': | |
| 1704 gflags.E = 1; | |
| 1705 break; | |
| 1706 case 'e': | |
| 1707 arg = EARGF(usage()); | |
| 1708 compile(arg, 0); | |
| 1709 script = 1; | |
| 1710 break; | |
| 1711 case 'f': | |
| 1712 arg = EARGF(usage()); | |
| 1713 compile(arg, 1); | |
| 1714 script = 1; | |
| 1715 break; | |
| 1716 default : usage(); | |
| 1717 } ARGEND | |
| 1718 | |
| 1719 /* no script to run */ | |
| 1720 if (!script && !argc) | |
| 1721 usage(); | |
| 1722 | |
| 1723 /* no script yet, next argument is script */ | |
| 1724 if (!script) | |
| 1725 compile(*argv++, 0); | |
| 1726 | |
| 1727 /* shrink/grow memory to fit and add our last instruction */ | |
| 1728 resize((void **)&prog, &pcap, sizeof(*prog), pc - prog + 1, NULL… | |
| 1729 pc = prog + pcap - 1; | |
| 1730 pc->fninfo = &(Fninfo){ cmd_last, NULL, NULL, 0 }; | |
| 1731 | |
| 1732 files = argv; | |
| 1733 run(); | |
| 1734 | |
| 1735 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"); | |
| 1736 | |
| 1737 return ret; | |
| 1738 } |