join.c - sbase - suckless unix tools | |
git clone git://git.suckless.org/sbase | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
join.c (9795B) | |
--- | |
1 /* See LICENSE file for copyright and license details. */ | |
2 #include <ctype.h> | |
3 #include <stdint.h> | |
4 #include <stdio.h> | |
5 #include <stdlib.h> | |
6 #include <string.h> | |
7 | |
8 #include "text.h" | |
9 #include "utf.h" | |
10 #include "util.h" | |
11 | |
12 enum { | |
13 INIT = 1, | |
14 GROW = 2, | |
15 }; | |
16 | |
17 enum { | |
18 EXPAND = 0, | |
19 RESET = 1, | |
20 }; | |
21 | |
22 enum { FIELD_ERROR = -2, }; | |
23 | |
24 struct field { | |
25 char *s; | |
26 size_t len; | |
27 }; | |
28 | |
29 struct jline { | |
30 struct line text; | |
31 size_t nf; | |
32 size_t maxf; | |
33 struct field *fields; | |
34 }; | |
35 | |
36 struct spec { | |
37 size_t fileno; | |
38 size_t fldno; | |
39 }; | |
40 | |
41 struct outlist { | |
42 size_t ns; | |
43 size_t maxs; | |
44 struct spec **specs; | |
45 }; | |
46 | |
47 struct span { | |
48 size_t nl; | |
49 size_t maxl; | |
50 struct jline **lines; | |
51 }; | |
52 | |
53 static char *sep = NULL; | |
54 static char *replace = NULL; | |
55 static const char defaultofs = ' '; | |
56 static const int jfield = 1; /* POSIX default join field */ | |
57 static int unpairsa = 0, unpairsb = 0; | |
58 static int oflag = 0; | |
59 static int pairs = 1; | |
60 static size_t seplen; | |
61 static struct outlist output; | |
62 | |
63 static void | |
64 usage(void) | |
65 { | |
66 eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] " | |
67 "[-a | -v fileno] [-t delim] file1 file2\n", argv0); | |
68 } | |
69 | |
70 static void | |
71 prfield(struct field *fp) | |
72 { | |
73 if (fwrite(fp->s, 1, fp->len, stdout) != fp->len) | |
74 eprintf("fwrite:"); | |
75 } | |
76 | |
77 static void | |
78 prsep(void) | |
79 { | |
80 if (sep) | |
81 fwrite(sep, 1, seplen, stdout); | |
82 else | |
83 putchar(defaultofs); | |
84 } | |
85 | |
86 static void | |
87 swaplines(struct jline *la, struct jline *lb) | |
88 { | |
89 struct jline tmp; | |
90 | |
91 tmp = *la; | |
92 *la = *lb; | |
93 *lb = tmp; | |
94 } | |
95 | |
96 static void | |
97 prjoin(struct jline *la, struct jline *lb, size_t jfa, size_t jfb) | |
98 { | |
99 struct spec *sp; | |
100 struct field *joinfield; | |
101 size_t i; | |
102 | |
103 if (jfa >= la->nf || jfb >= lb->nf) | |
104 return; | |
105 | |
106 joinfield = &la->fields[jfa]; | |
107 | |
108 if (oflag) { | |
109 for (i = 0; i < output.ns; i++) { | |
110 sp = output.specs[i]; | |
111 | |
112 if (sp->fileno == 1) { | |
113 if (sp->fldno < la->nf) | |
114 prfield(&la->fields[sp->fldno]); | |
115 else if (replace) | |
116 fputs(replace, stdout); | |
117 } else if (sp->fileno == 2) { | |
118 if (sp->fldno < lb->nf) | |
119 prfield(&lb->fields[sp->fldno]); | |
120 else if (replace) | |
121 fputs(replace, stdout); | |
122 } else if (sp->fileno == 0) { | |
123 prfield(joinfield); | |
124 } | |
125 | |
126 if (i < output.ns - 1) | |
127 prsep(); | |
128 } | |
129 } else { | |
130 prfield(joinfield); | |
131 prsep(); | |
132 | |
133 for (i = 0; i < la->nf; i++) { | |
134 if (i != jfa) { | |
135 prfield(&la->fields[i]); | |
136 prsep(); | |
137 } | |
138 } | |
139 for (i = 0; i < lb->nf; i++) { | |
140 if (i != jfb) { | |
141 prfield(&lb->fields[i]); | |
142 if (i < lb->nf - 1) | |
143 prsep(); | |
144 } | |
145 } | |
146 } | |
147 putchar('\n'); | |
148 } | |
149 | |
150 static void | |
151 prline(struct jline *lp) | |
152 { | |
153 if (fwrite(lp->text.data, 1, lp->text.len, stdout) != lp->text.l… | |
154 eprintf("fwrite:"); | |
155 putchar('\n'); | |
156 } | |
157 | |
158 static int | |
159 jlinecmp(struct jline *la, struct jline *lb, size_t jfa, size_t jfb) | |
160 { | |
161 int status; | |
162 | |
163 /* return FIELD_ERROR if both lines are short */ | |
164 if (jfa >= la->nf) { | |
165 status = (jfb >= lb->nf) ? FIELD_ERROR : -1; | |
166 } else if (jfb >= lb->nf) { | |
167 status = 1; | |
168 } else { | |
169 status = memcmp(la->fields[jfa].s, lb->fields[jfb].s, | |
170 MAX(la->fields[jfa].len, lb->fields[jfb]… | |
171 LIMIT(status, -1, 1); | |
172 } | |
173 | |
174 return status; | |
175 } | |
176 | |
177 static void | |
178 addfield(struct jline *lp, char *sp, size_t len) | |
179 { | |
180 if (lp->nf >= lp->maxf) { | |
181 lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf), | |
182 sizeof(struct field)); | |
183 lp->maxf *= GROW; | |
184 } | |
185 lp->fields[lp->nf].s = sp; | |
186 lp->fields[lp->nf].len = len; | |
187 lp->nf++; | |
188 } | |
189 | |
190 static void | |
191 prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb) | |
192 { | |
193 size_t i, j; | |
194 | |
195 for (i = 0; i < (spa->nl - 1); i++) | |
196 for (j = 0; j < (spb->nl - 1); j++) | |
197 prjoin(spa->lines[i], spb->lines[j], jfa, jfb); | |
198 } | |
199 | |
200 static struct jline * | |
201 makeline(char *s, size_t len) | |
202 { | |
203 struct jline *lp; | |
204 char *tmp; | |
205 size_t i, end; | |
206 | |
207 if (s[len - 1] == '\n') | |
208 s[--len] = '\0'; | |
209 | |
210 lp = ereallocarray(NULL, INIT, sizeof(struct jline)); | |
211 lp->text.data = s; | |
212 lp->text.len = len; | |
213 lp->fields = ereallocarray(NULL, INIT, sizeof(struct field)); | |
214 lp->nf = 0; | |
215 lp->maxf = INIT; | |
216 | |
217 for (i = 0; i < lp->text.len && isblank(lp->text.data[i]); i++) | |
218 ; | |
219 while (i < lp->text.len) { | |
220 if (sep) { | |
221 if ((lp->text.len - i) < seplen || | |
222 !(tmp = memmem(lp->text.data + i, | |
223 lp->text.len - i, sep, seplen… | |
224 goto eol; | |
225 } | |
226 end = tmp - lp->text.data; | |
227 addfield(lp, lp->text.data + i, end - i); | |
228 i = end + seplen; | |
229 } else { | |
230 for (end = i; !(isblank(lp->text.data[end])); en… | |
231 if (end + 1 == lp->text.len) | |
232 goto eol; | |
233 } | |
234 addfield(lp, lp->text.data + i, end - i); | |
235 for (i = end; isblank(lp->text.data[i]); i++) | |
236 ; | |
237 } | |
238 } | |
239 eol: | |
240 addfield(lp, lp->text.data + i, lp->text.len - i); | |
241 | |
242 return lp; | |
243 } | |
244 | |
245 static int | |
246 addtospan(struct span *sp, FILE *fp, int reset) | |
247 { | |
248 char *newl = NULL; | |
249 ssize_t len; | |
250 size_t size = 0; | |
251 | |
252 if ((len = getline(&newl, &size, fp)) < 0) { | |
253 if (ferror(fp)) | |
254 eprintf("getline:"); | |
255 else | |
256 return 0; | |
257 } | |
258 | |
259 if (reset) | |
260 sp->nl = 0; | |
261 | |
262 if (sp->nl >= sp->maxl) { | |
263 sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl), | |
264 sizeof(struct jline *)); | |
265 sp->maxl *= GROW; | |
266 } | |
267 | |
268 sp->lines[sp->nl] = makeline(newl, len); | |
269 sp->nl++; | |
270 return 1; | |
271 } | |
272 | |
273 static void | |
274 initspan(struct span *sp) | |
275 { | |
276 sp->nl = 0; | |
277 sp->maxl = INIT; | |
278 sp->lines = ereallocarray(NULL, INIT, sizeof(struct jline *)); | |
279 } | |
280 | |
281 static void | |
282 freespan(struct span *sp) | |
283 { | |
284 size_t i; | |
285 | |
286 for (i = 0; i < sp->nl; i++) { | |
287 free(sp->lines[i]->fields); | |
288 free(sp->lines[i]->text.data); | |
289 } | |
290 free(sp->lines); | |
291 } | |
292 | |
293 static void | |
294 initolist(struct outlist *olp) | |
295 { | |
296 olp->ns = 0; | |
297 olp->maxs = 1; | |
298 olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *)); | |
299 } | |
300 | |
301 static void | |
302 addspec(struct outlist *olp, struct spec *sp) | |
303 { | |
304 if (olp->ns >= olp->maxs) { | |
305 olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs… | |
306 sizeof(struct spec *)); | |
307 olp->maxs *= GROW; | |
308 } | |
309 olp->specs[olp->ns] = sp; | |
310 olp->ns++; | |
311 } | |
312 | |
313 static struct spec * | |
314 makespec(char *s) | |
315 { | |
316 struct spec *sp; | |
317 int fileno; | |
318 size_t fldno; | |
319 | |
320 if (!strcmp(s, "0")) { /* join field must be 0 and nothing els… | |
321 fileno = 0; | |
322 fldno = 0; | |
323 } else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') { | |
324 fileno = s[0] - '0'; | |
325 fldno = estrtonum(&s[2], 1, MIN(LLONG_MAX, SIZE_MAX)) - … | |
326 } else { | |
327 eprintf("%s: invalid format\n", s); | |
328 } | |
329 | |
330 sp = ereallocarray(NULL, INIT, sizeof(struct spec)); | |
331 sp->fileno = fileno; | |
332 sp->fldno = fldno; | |
333 return sp; | |
334 } | |
335 | |
336 static void | |
337 makeolist(struct outlist *olp, char *s) | |
338 { | |
339 char *item, *sp; | |
340 sp = s; | |
341 | |
342 while (sp) { | |
343 item = sp; | |
344 sp = strpbrk(sp, ", \t"); | |
345 if (sp) | |
346 *sp++ = '\0'; | |
347 addspec(olp, makespec(item)); | |
348 } | |
349 } | |
350 | |
351 static void | |
352 freespecs(struct outlist *olp) | |
353 { | |
354 size_t i; | |
355 | |
356 for (i = 0; i < olp->ns; i++) | |
357 free(olp->specs[i]); | |
358 } | |
359 | |
360 static void | |
361 join(FILE *fa, FILE *fb, size_t jfa, size_t jfb) | |
362 { | |
363 struct span spa, spb; | |
364 int cmp, eofa, eofb; | |
365 | |
366 initspan(&spa); | |
367 initspan(&spb); | |
368 cmp = eofa = eofb = 0; | |
369 | |
370 addtospan(&spa, fa, RESET); | |
371 addtospan(&spb, fb, RESET); | |
372 | |
373 while (spa.nl && spb.nl) { | |
374 if ((cmp = jlinecmp(spa.lines[0], spb.lines[0], jfa, jfb… | |
375 if (unpairsa) | |
376 prline(spa.lines[0]); | |
377 if (!addtospan(&spa, fa, RESET)) { | |
378 if (unpairsb) { /* a is EOF'd; print … | |
379 do | |
380 prline(spb.lines[0]); | |
381 while (addtospan(&spb, fb, RESET… | |
382 } | |
383 eofa = eofb = 1; | |
384 } else { | |
385 continue; | |
386 } | |
387 } else if (cmp > 0) { | |
388 if (unpairsb) | |
389 prline(spb.lines[0]); | |
390 if (!addtospan(&spb, fb, RESET)) { | |
391 if (unpairsa) { /* b is EOF'd; print … | |
392 do | |
393 prline(spa.lines[0]); | |
394 while (addtospan(&spa, fa, RESET… | |
395 } | |
396 eofa = eofb = 1; | |
397 } else { | |
398 continue; | |
399 } | |
400 } else if (cmp == 0) { | |
401 /* read all consecutive matching lines from a */ | |
402 do { | |
403 if (!addtospan(&spa, fa, EXPAND)) { | |
404 eofa = 1; | |
405 spa.nl++; | |
406 break; | |
407 } | |
408 } while (jlinecmp(spa.lines[spa.nl-1], spb.lines… | |
409 | |
410 /* read all consecutive matching lines from b */ | |
411 do { | |
412 if (!addtospan(&spb, fb, EXPAND)) { | |
413 eofb = 1; | |
414 spb.nl++; | |
415 break; | |
416 } | |
417 } while (jlinecmp(spa.lines[0], spb.lines[spb.nl… | |
418 | |
419 if (pairs) | |
420 prspanjoin(&spa, &spb, jfa, jfb); | |
421 | |
422 } else { /* FIELD_ERROR: both lines lacked join fie… | |
423 if (unpairsa) | |
424 prline(spa.lines[0]); | |
425 if (unpairsb) | |
426 prline(spb.lines[0]); | |
427 eofa = addtospan(&spa, fa, RESET) ? 0 : 1; | |
428 eofb = addtospan(&spb, fb, RESET) ? 0 : 1; | |
429 if (!eofa && !eofb) | |
430 continue; | |
431 } | |
432 | |
433 if (eofa) { | |
434 spa.nl = 0; | |
435 } else { | |
436 swaplines(spa.lines[0], spa.lines[spa.nl - 1]); … | |
437 spa.nl = 1; | |
438 } | |
439 | |
440 if (eofb) { | |
441 spb.nl = 0; | |
442 } else { | |
443 swaplines(spb.lines[0], spb.lines[spb.nl - 1]); … | |
444 spb.nl = 1; | |
445 } | |
446 } | |
447 freespan(&spa); | |
448 freespan(&spb); | |
449 } | |
450 | |
451 | |
452 int | |
453 main(int argc, char *argv[]) | |
454 { | |
455 size_t jf[2] = { jfield, jfield, }; | |
456 FILE *fp[2]; | |
457 int ret = 0, n; | |
458 char *fno; | |
459 | |
460 ARGBEGIN { | |
461 case '1': | |
462 jf[0] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE… | |
463 break; | |
464 case '2': | |
465 jf[1] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE… | |
466 break; | |
467 case 'a': | |
468 fno = EARGF(usage()); | |
469 if (strcmp(fno, "1") == 0) | |
470 unpairsa = 1; | |
471 else if (strcmp(fno, "2") == 0) | |
472 unpairsb = 1; | |
473 else | |
474 usage(); | |
475 break; | |
476 case 'e': | |
477 replace = EARGF(usage()); | |
478 break; | |
479 case 'o': | |
480 oflag = 1; | |
481 initolist(&output); | |
482 makeolist(&output, EARGF(usage())); | |
483 break; | |
484 case 't': | |
485 sep = EARGF(usage()); | |
486 break; | |
487 case 'v': | |
488 pairs = 0; | |
489 fno = EARGF(usage()); | |
490 if (strcmp(fno, "1") == 0) | |
491 unpairsa = 1; | |
492 else if (strcmp(fno, "2") == 0) | |
493 unpairsb = 1; | |
494 else | |
495 usage(); | |
496 break; | |
497 default: | |
498 usage(); | |
499 } ARGEND | |
500 | |
501 if (sep) | |
502 seplen = unescape(sep); | |
503 | |
504 if (argc != 2) | |
505 usage(); | |
506 | |
507 for (n = 0; n < 2; n++) { | |
508 if (!strcmp(argv[n], "-")) { | |
509 argv[n] = "<stdin>"; | |
510 fp[n] = stdin; | |
511 } else if (!(fp[n] = fopen(argv[n], "r"))) { | |
512 eprintf("fopen %s:", argv[n]); | |
513 } | |
514 } | |
515 | |
516 jf[0]--; | |
517 jf[1]--; | |
518 | |
519 join(fp[0], fp[1], jf[0], jf[1]); | |
520 | |
521 if (oflag) | |
522 freespecs(&output); | |
523 | |
524 if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv… | |
525 fshut(stdout, "<stdout>")) | |
526 ret = 2; | |
527 | |
528 return ret; | |
529 } |