| join.c - sbase - suckless unix tools | |
| git clone git://git.suckless.org/sbase | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| join.c (9795B) | |
| --- | |
| 1 /* See LICENSE file for copyright and license details. */ | |
| 2 #include <ctype.h> | |
| 3 #include <stdint.h> | |
| 4 #include <stdio.h> | |
| 5 #include <stdlib.h> | |
| 6 #include <string.h> | |
| 7 | |
| 8 #include "text.h" | |
| 9 #include "utf.h" | |
| 10 #include "util.h" | |
| 11 | |
| 12 enum { | |
| 13 INIT = 1, | |
| 14 GROW = 2, | |
| 15 }; | |
| 16 | |
| 17 enum { | |
| 18 EXPAND = 0, | |
| 19 RESET = 1, | |
| 20 }; | |
| 21 | |
| 22 enum { FIELD_ERROR = -2, }; | |
| 23 | |
| 24 struct field { | |
| 25 char *s; | |
| 26 size_t len; | |
| 27 }; | |
| 28 | |
| 29 struct jline { | |
| 30 struct line text; | |
| 31 size_t nf; | |
| 32 size_t maxf; | |
| 33 struct field *fields; | |
| 34 }; | |
| 35 | |
| 36 struct spec { | |
| 37 size_t fileno; | |
| 38 size_t fldno; | |
| 39 }; | |
| 40 | |
| 41 struct outlist { | |
| 42 size_t ns; | |
| 43 size_t maxs; | |
| 44 struct spec **specs; | |
| 45 }; | |
| 46 | |
| 47 struct span { | |
| 48 size_t nl; | |
| 49 size_t maxl; | |
| 50 struct jline **lines; | |
| 51 }; | |
| 52 | |
| 53 static char *sep = NULL; | |
| 54 static char *replace = NULL; | |
| 55 static const char defaultofs = ' '; | |
| 56 static const int jfield = 1; /* POSIX default join field */ | |
| 57 static int unpairsa = 0, unpairsb = 0; | |
| 58 static int oflag = 0; | |
| 59 static int pairs = 1; | |
| 60 static size_t seplen; | |
| 61 static struct outlist output; | |
| 62 | |
| 63 static void | |
| 64 usage(void) | |
| 65 { | |
| 66 eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] " | |
| 67 "[-a | -v fileno] [-t delim] file1 file2\n", argv0); | |
| 68 } | |
| 69 | |
| 70 static void | |
| 71 prfield(struct field *fp) | |
| 72 { | |
| 73 if (fwrite(fp->s, 1, fp->len, stdout) != fp->len) | |
| 74 eprintf("fwrite:"); | |
| 75 } | |
| 76 | |
| 77 static void | |
| 78 prsep(void) | |
| 79 { | |
| 80 if (sep) | |
| 81 fwrite(sep, 1, seplen, stdout); | |
| 82 else | |
| 83 putchar(defaultofs); | |
| 84 } | |
| 85 | |
| 86 static void | |
| 87 swaplines(struct jline *la, struct jline *lb) | |
| 88 { | |
| 89 struct jline tmp; | |
| 90 | |
| 91 tmp = *la; | |
| 92 *la = *lb; | |
| 93 *lb = tmp; | |
| 94 } | |
| 95 | |
| 96 static void | |
| 97 prjoin(struct jline *la, struct jline *lb, size_t jfa, size_t jfb) | |
| 98 { | |
| 99 struct spec *sp; | |
| 100 struct field *joinfield; | |
| 101 size_t i; | |
| 102 | |
| 103 if (jfa >= la->nf || jfb >= lb->nf) | |
| 104 return; | |
| 105 | |
| 106 joinfield = &la->fields[jfa]; | |
| 107 | |
| 108 if (oflag) { | |
| 109 for (i = 0; i < output.ns; i++) { | |
| 110 sp = output.specs[i]; | |
| 111 | |
| 112 if (sp->fileno == 1) { | |
| 113 if (sp->fldno < la->nf) | |
| 114 prfield(&la->fields[sp->fldno]); | |
| 115 else if (replace) | |
| 116 fputs(replace, stdout); | |
| 117 } else if (sp->fileno == 2) { | |
| 118 if (sp->fldno < lb->nf) | |
| 119 prfield(&lb->fields[sp->fldno]); | |
| 120 else if (replace) | |
| 121 fputs(replace, stdout); | |
| 122 } else if (sp->fileno == 0) { | |
| 123 prfield(joinfield); | |
| 124 } | |
| 125 | |
| 126 if (i < output.ns - 1) | |
| 127 prsep(); | |
| 128 } | |
| 129 } else { | |
| 130 prfield(joinfield); | |
| 131 prsep(); | |
| 132 | |
| 133 for (i = 0; i < la->nf; i++) { | |
| 134 if (i != jfa) { | |
| 135 prfield(&la->fields[i]); | |
| 136 prsep(); | |
| 137 } | |
| 138 } | |
| 139 for (i = 0; i < lb->nf; i++) { | |
| 140 if (i != jfb) { | |
| 141 prfield(&lb->fields[i]); | |
| 142 if (i < lb->nf - 1) | |
| 143 prsep(); | |
| 144 } | |
| 145 } | |
| 146 } | |
| 147 putchar('\n'); | |
| 148 } | |
| 149 | |
| 150 static void | |
| 151 prline(struct jline *lp) | |
| 152 { | |
| 153 if (fwrite(lp->text.data, 1, lp->text.len, stdout) != lp->text.l… | |
| 154 eprintf("fwrite:"); | |
| 155 putchar('\n'); | |
| 156 } | |
| 157 | |
| 158 static int | |
| 159 jlinecmp(struct jline *la, struct jline *lb, size_t jfa, size_t jfb) | |
| 160 { | |
| 161 int status; | |
| 162 | |
| 163 /* return FIELD_ERROR if both lines are short */ | |
| 164 if (jfa >= la->nf) { | |
| 165 status = (jfb >= lb->nf) ? FIELD_ERROR : -1; | |
| 166 } else if (jfb >= lb->nf) { | |
| 167 status = 1; | |
| 168 } else { | |
| 169 status = memcmp(la->fields[jfa].s, lb->fields[jfb].s, | |
| 170 MAX(la->fields[jfa].len, lb->fields[jfb]… | |
| 171 LIMIT(status, -1, 1); | |
| 172 } | |
| 173 | |
| 174 return status; | |
| 175 } | |
| 176 | |
| 177 static void | |
| 178 addfield(struct jline *lp, char *sp, size_t len) | |
| 179 { | |
| 180 if (lp->nf >= lp->maxf) { | |
| 181 lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf), | |
| 182 sizeof(struct field)); | |
| 183 lp->maxf *= GROW; | |
| 184 } | |
| 185 lp->fields[lp->nf].s = sp; | |
| 186 lp->fields[lp->nf].len = len; | |
| 187 lp->nf++; | |
| 188 } | |
| 189 | |
| 190 static void | |
| 191 prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb) | |
| 192 { | |
| 193 size_t i, j; | |
| 194 | |
| 195 for (i = 0; i < (spa->nl - 1); i++) | |
| 196 for (j = 0; j < (spb->nl - 1); j++) | |
| 197 prjoin(spa->lines[i], spb->lines[j], jfa, jfb); | |
| 198 } | |
| 199 | |
| 200 static struct jline * | |
| 201 makeline(char *s, size_t len) | |
| 202 { | |
| 203 struct jline *lp; | |
| 204 char *tmp; | |
| 205 size_t i, end; | |
| 206 | |
| 207 if (s[len - 1] == '\n') | |
| 208 s[--len] = '\0'; | |
| 209 | |
| 210 lp = ereallocarray(NULL, INIT, sizeof(struct jline)); | |
| 211 lp->text.data = s; | |
| 212 lp->text.len = len; | |
| 213 lp->fields = ereallocarray(NULL, INIT, sizeof(struct field)); | |
| 214 lp->nf = 0; | |
| 215 lp->maxf = INIT; | |
| 216 | |
| 217 for (i = 0; i < lp->text.len && isblank(lp->text.data[i]); i++) | |
| 218 ; | |
| 219 while (i < lp->text.len) { | |
| 220 if (sep) { | |
| 221 if ((lp->text.len - i) < seplen || | |
| 222 !(tmp = memmem(lp->text.data + i, | |
| 223 lp->text.len - i, sep, seplen… | |
| 224 goto eol; | |
| 225 } | |
| 226 end = tmp - lp->text.data; | |
| 227 addfield(lp, lp->text.data + i, end - i); | |
| 228 i = end + seplen; | |
| 229 } else { | |
| 230 for (end = i; !(isblank(lp->text.data[end])); en… | |
| 231 if (end + 1 == lp->text.len) | |
| 232 goto eol; | |
| 233 } | |
| 234 addfield(lp, lp->text.data + i, end - i); | |
| 235 for (i = end; isblank(lp->text.data[i]); i++) | |
| 236 ; | |
| 237 } | |
| 238 } | |
| 239 eol: | |
| 240 addfield(lp, lp->text.data + i, lp->text.len - i); | |
| 241 | |
| 242 return lp; | |
| 243 } | |
| 244 | |
| 245 static int | |
| 246 addtospan(struct span *sp, FILE *fp, int reset) | |
| 247 { | |
| 248 char *newl = NULL; | |
| 249 ssize_t len; | |
| 250 size_t size = 0; | |
| 251 | |
| 252 if ((len = getline(&newl, &size, fp)) < 0) { | |
| 253 if (ferror(fp)) | |
| 254 eprintf("getline:"); | |
| 255 else | |
| 256 return 0; | |
| 257 } | |
| 258 | |
| 259 if (reset) | |
| 260 sp->nl = 0; | |
| 261 | |
| 262 if (sp->nl >= sp->maxl) { | |
| 263 sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl), | |
| 264 sizeof(struct jline *)); | |
| 265 sp->maxl *= GROW; | |
| 266 } | |
| 267 | |
| 268 sp->lines[sp->nl] = makeline(newl, len); | |
| 269 sp->nl++; | |
| 270 return 1; | |
| 271 } | |
| 272 | |
| 273 static void | |
| 274 initspan(struct span *sp) | |
| 275 { | |
| 276 sp->nl = 0; | |
| 277 sp->maxl = INIT; | |
| 278 sp->lines = ereallocarray(NULL, INIT, sizeof(struct jline *)); | |
| 279 } | |
| 280 | |
| 281 static void | |
| 282 freespan(struct span *sp) | |
| 283 { | |
| 284 size_t i; | |
| 285 | |
| 286 for (i = 0; i < sp->nl; i++) { | |
| 287 free(sp->lines[i]->fields); | |
| 288 free(sp->lines[i]->text.data); | |
| 289 } | |
| 290 free(sp->lines); | |
| 291 } | |
| 292 | |
| 293 static void | |
| 294 initolist(struct outlist *olp) | |
| 295 { | |
| 296 olp->ns = 0; | |
| 297 olp->maxs = 1; | |
| 298 olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *)); | |
| 299 } | |
| 300 | |
| 301 static void | |
| 302 addspec(struct outlist *olp, struct spec *sp) | |
| 303 { | |
| 304 if (olp->ns >= olp->maxs) { | |
| 305 olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs… | |
| 306 sizeof(struct spec *)); | |
| 307 olp->maxs *= GROW; | |
| 308 } | |
| 309 olp->specs[olp->ns] = sp; | |
| 310 olp->ns++; | |
| 311 } | |
| 312 | |
| 313 static struct spec * | |
| 314 makespec(char *s) | |
| 315 { | |
| 316 struct spec *sp; | |
| 317 int fileno; | |
| 318 size_t fldno; | |
| 319 | |
| 320 if (!strcmp(s, "0")) { /* join field must be 0 and nothing els… | |
| 321 fileno = 0; | |
| 322 fldno = 0; | |
| 323 } else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') { | |
| 324 fileno = s[0] - '0'; | |
| 325 fldno = estrtonum(&s[2], 1, MIN(LLONG_MAX, SIZE_MAX)) - … | |
| 326 } else { | |
| 327 eprintf("%s: invalid format\n", s); | |
| 328 } | |
| 329 | |
| 330 sp = ereallocarray(NULL, INIT, sizeof(struct spec)); | |
| 331 sp->fileno = fileno; | |
| 332 sp->fldno = fldno; | |
| 333 return sp; | |
| 334 } | |
| 335 | |
| 336 static void | |
| 337 makeolist(struct outlist *olp, char *s) | |
| 338 { | |
| 339 char *item, *sp; | |
| 340 sp = s; | |
| 341 | |
| 342 while (sp) { | |
| 343 item = sp; | |
| 344 sp = strpbrk(sp, ", \t"); | |
| 345 if (sp) | |
| 346 *sp++ = '\0'; | |
| 347 addspec(olp, makespec(item)); | |
| 348 } | |
| 349 } | |
| 350 | |
| 351 static void | |
| 352 freespecs(struct outlist *olp) | |
| 353 { | |
| 354 size_t i; | |
| 355 | |
| 356 for (i = 0; i < olp->ns; i++) | |
| 357 free(olp->specs[i]); | |
| 358 } | |
| 359 | |
| 360 static void | |
| 361 join(FILE *fa, FILE *fb, size_t jfa, size_t jfb) | |
| 362 { | |
| 363 struct span spa, spb; | |
| 364 int cmp, eofa, eofb; | |
| 365 | |
| 366 initspan(&spa); | |
| 367 initspan(&spb); | |
| 368 cmp = eofa = eofb = 0; | |
| 369 | |
| 370 addtospan(&spa, fa, RESET); | |
| 371 addtospan(&spb, fb, RESET); | |
| 372 | |
| 373 while (spa.nl && spb.nl) { | |
| 374 if ((cmp = jlinecmp(spa.lines[0], spb.lines[0], jfa, jfb… | |
| 375 if (unpairsa) | |
| 376 prline(spa.lines[0]); | |
| 377 if (!addtospan(&spa, fa, RESET)) { | |
| 378 if (unpairsb) { /* a is EOF'd; print … | |
| 379 do | |
| 380 prline(spb.lines[0]); | |
| 381 while (addtospan(&spb, fb, RESET… | |
| 382 } | |
| 383 eofa = eofb = 1; | |
| 384 } else { | |
| 385 continue; | |
| 386 } | |
| 387 } else if (cmp > 0) { | |
| 388 if (unpairsb) | |
| 389 prline(spb.lines[0]); | |
| 390 if (!addtospan(&spb, fb, RESET)) { | |
| 391 if (unpairsa) { /* b is EOF'd; print … | |
| 392 do | |
| 393 prline(spa.lines[0]); | |
| 394 while (addtospan(&spa, fa, RESET… | |
| 395 } | |
| 396 eofa = eofb = 1; | |
| 397 } else { | |
| 398 continue; | |
| 399 } | |
| 400 } else if (cmp == 0) { | |
| 401 /* read all consecutive matching lines from a */ | |
| 402 do { | |
| 403 if (!addtospan(&spa, fa, EXPAND)) { | |
| 404 eofa = 1; | |
| 405 spa.nl++; | |
| 406 break; | |
| 407 } | |
| 408 } while (jlinecmp(spa.lines[spa.nl-1], spb.lines… | |
| 409 | |
| 410 /* read all consecutive matching lines from b */ | |
| 411 do { | |
| 412 if (!addtospan(&spb, fb, EXPAND)) { | |
| 413 eofb = 1; | |
| 414 spb.nl++; | |
| 415 break; | |
| 416 } | |
| 417 } while (jlinecmp(spa.lines[0], spb.lines[spb.nl… | |
| 418 | |
| 419 if (pairs) | |
| 420 prspanjoin(&spa, &spb, jfa, jfb); | |
| 421 | |
| 422 } else { /* FIELD_ERROR: both lines lacked join fie… | |
| 423 if (unpairsa) | |
| 424 prline(spa.lines[0]); | |
| 425 if (unpairsb) | |
| 426 prline(spb.lines[0]); | |
| 427 eofa = addtospan(&spa, fa, RESET) ? 0 : 1; | |
| 428 eofb = addtospan(&spb, fb, RESET) ? 0 : 1; | |
| 429 if (!eofa && !eofb) | |
| 430 continue; | |
| 431 } | |
| 432 | |
| 433 if (eofa) { | |
| 434 spa.nl = 0; | |
| 435 } else { | |
| 436 swaplines(spa.lines[0], spa.lines[spa.nl - 1]); … | |
| 437 spa.nl = 1; | |
| 438 } | |
| 439 | |
| 440 if (eofb) { | |
| 441 spb.nl = 0; | |
| 442 } else { | |
| 443 swaplines(spb.lines[0], spb.lines[spb.nl - 1]); … | |
| 444 spb.nl = 1; | |
| 445 } | |
| 446 } | |
| 447 freespan(&spa); | |
| 448 freespan(&spb); | |
| 449 } | |
| 450 | |
| 451 | |
| 452 int | |
| 453 main(int argc, char *argv[]) | |
| 454 { | |
| 455 size_t jf[2] = { jfield, jfield, }; | |
| 456 FILE *fp[2]; | |
| 457 int ret = 0, n; | |
| 458 char *fno; | |
| 459 | |
| 460 ARGBEGIN { | |
| 461 case '1': | |
| 462 jf[0] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE… | |
| 463 break; | |
| 464 case '2': | |
| 465 jf[1] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE… | |
| 466 break; | |
| 467 case 'a': | |
| 468 fno = EARGF(usage()); | |
| 469 if (strcmp(fno, "1") == 0) | |
| 470 unpairsa = 1; | |
| 471 else if (strcmp(fno, "2") == 0) | |
| 472 unpairsb = 1; | |
| 473 else | |
| 474 usage(); | |
| 475 break; | |
| 476 case 'e': | |
| 477 replace = EARGF(usage()); | |
| 478 break; | |
| 479 case 'o': | |
| 480 oflag = 1; | |
| 481 initolist(&output); | |
| 482 makeolist(&output, EARGF(usage())); | |
| 483 break; | |
| 484 case 't': | |
| 485 sep = EARGF(usage()); | |
| 486 break; | |
| 487 case 'v': | |
| 488 pairs = 0; | |
| 489 fno = EARGF(usage()); | |
| 490 if (strcmp(fno, "1") == 0) | |
| 491 unpairsa = 1; | |
| 492 else if (strcmp(fno, "2") == 0) | |
| 493 unpairsb = 1; | |
| 494 else | |
| 495 usage(); | |
| 496 break; | |
| 497 default: | |
| 498 usage(); | |
| 499 } ARGEND | |
| 500 | |
| 501 if (sep) | |
| 502 seplen = unescape(sep); | |
| 503 | |
| 504 if (argc != 2) | |
| 505 usage(); | |
| 506 | |
| 507 for (n = 0; n < 2; n++) { | |
| 508 if (!strcmp(argv[n], "-")) { | |
| 509 argv[n] = "<stdin>"; | |
| 510 fp[n] = stdin; | |
| 511 } else if (!(fp[n] = fopen(argv[n], "r"))) { | |
| 512 eprintf("fopen %s:", argv[n]); | |
| 513 } | |
| 514 } | |
| 515 | |
| 516 jf[0]--; | |
| 517 jf[1]--; | |
| 518 | |
| 519 join(fp[0], fp[1], jf[0], jf[1]); | |
| 520 | |
| 521 if (oflag) | |
| 522 freespecs(&output); | |
| 523 | |
| 524 if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv… | |
| 525 fshut(stdout, "<stdout>")) | |
| 526 ret = 2; | |
| 527 | |
| 528 return ret; | |
| 529 } |