Introduction
Introduction Statistics Contact Development Disclaimer Help
join.c - sbase - suckless unix tools
git clone git://git.suckless.org/sbase
Log
Files
Refs
README
LICENSE
---
join.c (9795B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <ctype.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7
8 #include "text.h"
9 #include "utf.h"
10 #include "util.h"
11
12 enum {
13 INIT = 1,
14 GROW = 2,
15 };
16
17 enum {
18 EXPAND = 0,
19 RESET = 1,
20 };
21
22 enum { FIELD_ERROR = -2, };
23
24 struct field {
25 char *s;
26 size_t len;
27 };
28
29 struct jline {
30 struct line text;
31 size_t nf;
32 size_t maxf;
33 struct field *fields;
34 };
35
36 struct spec {
37 size_t fileno;
38 size_t fldno;
39 };
40
41 struct outlist {
42 size_t ns;
43 size_t maxs;
44 struct spec **specs;
45 };
46
47 struct span {
48 size_t nl;
49 size_t maxl;
50 struct jline **lines;
51 };
52
53 static char *sep = NULL;
54 static char *replace = NULL;
55 static const char defaultofs = ' ';
56 static const int jfield = 1; /* POSIX default join field */
57 static int unpairsa = 0, unpairsb = 0;
58 static int oflag = 0;
59 static int pairs = 1;
60 static size_t seplen;
61 static struct outlist output;
62
63 static void
64 usage(void)
65 {
66 eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] "
67 "[-a | -v fileno] [-t delim] file1 file2\n", argv0);
68 }
69
70 static void
71 prfield(struct field *fp)
72 {
73 if (fwrite(fp->s, 1, fp->len, stdout) != fp->len)
74 eprintf("fwrite:");
75 }
76
77 static void
78 prsep(void)
79 {
80 if (sep)
81 fwrite(sep, 1, seplen, stdout);
82 else
83 putchar(defaultofs);
84 }
85
86 static void
87 swaplines(struct jline *la, struct jline *lb)
88 {
89 struct jline tmp;
90
91 tmp = *la;
92 *la = *lb;
93 *lb = tmp;
94 }
95
96 static void
97 prjoin(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
98 {
99 struct spec *sp;
100 struct field *joinfield;
101 size_t i;
102
103 if (jfa >= la->nf || jfb >= lb->nf)
104 return;
105
106 joinfield = &la->fields[jfa];
107
108 if (oflag) {
109 for (i = 0; i < output.ns; i++) {
110 sp = output.specs[i];
111
112 if (sp->fileno == 1) {
113 if (sp->fldno < la->nf)
114 prfield(&la->fields[sp->fldno]);
115 else if (replace)
116 fputs(replace, stdout);
117 } else if (sp->fileno == 2) {
118 if (sp->fldno < lb->nf)
119 prfield(&lb->fields[sp->fldno]);
120 else if (replace)
121 fputs(replace, stdout);
122 } else if (sp->fileno == 0) {
123 prfield(joinfield);
124 }
125
126 if (i < output.ns - 1)
127 prsep();
128 }
129 } else {
130 prfield(joinfield);
131 prsep();
132
133 for (i = 0; i < la->nf; i++) {
134 if (i != jfa) {
135 prfield(&la->fields[i]);
136 prsep();
137 }
138 }
139 for (i = 0; i < lb->nf; i++) {
140 if (i != jfb) {
141 prfield(&lb->fields[i]);
142 if (i < lb->nf - 1)
143 prsep();
144 }
145 }
146 }
147 putchar('\n');
148 }
149
150 static void
151 prline(struct jline *lp)
152 {
153 if (fwrite(lp->text.data, 1, lp->text.len, stdout) != lp->text.l…
154 eprintf("fwrite:");
155 putchar('\n');
156 }
157
158 static int
159 jlinecmp(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
160 {
161 int status;
162
163 /* return FIELD_ERROR if both lines are short */
164 if (jfa >= la->nf) {
165 status = (jfb >= lb->nf) ? FIELD_ERROR : -1;
166 } else if (jfb >= lb->nf) {
167 status = 1;
168 } else {
169 status = memcmp(la->fields[jfa].s, lb->fields[jfb].s,
170 MAX(la->fields[jfa].len, lb->fields[jfb]…
171 LIMIT(status, -1, 1);
172 }
173
174 return status;
175 }
176
177 static void
178 addfield(struct jline *lp, char *sp, size_t len)
179 {
180 if (lp->nf >= lp->maxf) {
181 lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf),
182 sizeof(struct field));
183 lp->maxf *= GROW;
184 }
185 lp->fields[lp->nf].s = sp;
186 lp->fields[lp->nf].len = len;
187 lp->nf++;
188 }
189
190 static void
191 prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb)
192 {
193 size_t i, j;
194
195 for (i = 0; i < (spa->nl - 1); i++)
196 for (j = 0; j < (spb->nl - 1); j++)
197 prjoin(spa->lines[i], spb->lines[j], jfa, jfb);
198 }
199
200 static struct jline *
201 makeline(char *s, size_t len)
202 {
203 struct jline *lp;
204 char *tmp;
205 size_t i, end;
206
207 if (s[len - 1] == '\n')
208 s[--len] = '\0';
209
210 lp = ereallocarray(NULL, INIT, sizeof(struct jline));
211 lp->text.data = s;
212 lp->text.len = len;
213 lp->fields = ereallocarray(NULL, INIT, sizeof(struct field));
214 lp->nf = 0;
215 lp->maxf = INIT;
216
217 for (i = 0; i < lp->text.len && isblank(lp->text.data[i]); i++)
218 ;
219 while (i < lp->text.len) {
220 if (sep) {
221 if ((lp->text.len - i) < seplen ||
222 !(tmp = memmem(lp->text.data + i,
223 lp->text.len - i, sep, seplen…
224 goto eol;
225 }
226 end = tmp - lp->text.data;
227 addfield(lp, lp->text.data + i, end - i);
228 i = end + seplen;
229 } else {
230 for (end = i; !(isblank(lp->text.data[end])); en…
231 if (end + 1 == lp->text.len)
232 goto eol;
233 }
234 addfield(lp, lp->text.data + i, end - i);
235 for (i = end; isblank(lp->text.data[i]); i++)
236 ;
237 }
238 }
239 eol:
240 addfield(lp, lp->text.data + i, lp->text.len - i);
241
242 return lp;
243 }
244
245 static int
246 addtospan(struct span *sp, FILE *fp, int reset)
247 {
248 char *newl = NULL;
249 ssize_t len;
250 size_t size = 0;
251
252 if ((len = getline(&newl, &size, fp)) < 0) {
253 if (ferror(fp))
254 eprintf("getline:");
255 else
256 return 0;
257 }
258
259 if (reset)
260 sp->nl = 0;
261
262 if (sp->nl >= sp->maxl) {
263 sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl),
264 sizeof(struct jline *));
265 sp->maxl *= GROW;
266 }
267
268 sp->lines[sp->nl] = makeline(newl, len);
269 sp->nl++;
270 return 1;
271 }
272
273 static void
274 initspan(struct span *sp)
275 {
276 sp->nl = 0;
277 sp->maxl = INIT;
278 sp->lines = ereallocarray(NULL, INIT, sizeof(struct jline *));
279 }
280
281 static void
282 freespan(struct span *sp)
283 {
284 size_t i;
285
286 for (i = 0; i < sp->nl; i++) {
287 free(sp->lines[i]->fields);
288 free(sp->lines[i]->text.data);
289 }
290 free(sp->lines);
291 }
292
293 static void
294 initolist(struct outlist *olp)
295 {
296 olp->ns = 0;
297 olp->maxs = 1;
298 olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *));
299 }
300
301 static void
302 addspec(struct outlist *olp, struct spec *sp)
303 {
304 if (olp->ns >= olp->maxs) {
305 olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs…
306 sizeof(struct spec *));
307 olp->maxs *= GROW;
308 }
309 olp->specs[olp->ns] = sp;
310 olp->ns++;
311 }
312
313 static struct spec *
314 makespec(char *s)
315 {
316 struct spec *sp;
317 int fileno;
318 size_t fldno;
319
320 if (!strcmp(s, "0")) { /* join field must be 0 and nothing els…
321 fileno = 0;
322 fldno = 0;
323 } else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') {
324 fileno = s[0] - '0';
325 fldno = estrtonum(&s[2], 1, MIN(LLONG_MAX, SIZE_MAX)) - …
326 } else {
327 eprintf("%s: invalid format\n", s);
328 }
329
330 sp = ereallocarray(NULL, INIT, sizeof(struct spec));
331 sp->fileno = fileno;
332 sp->fldno = fldno;
333 return sp;
334 }
335
336 static void
337 makeolist(struct outlist *olp, char *s)
338 {
339 char *item, *sp;
340 sp = s;
341
342 while (sp) {
343 item = sp;
344 sp = strpbrk(sp, ", \t");
345 if (sp)
346 *sp++ = '\0';
347 addspec(olp, makespec(item));
348 }
349 }
350
351 static void
352 freespecs(struct outlist *olp)
353 {
354 size_t i;
355
356 for (i = 0; i < olp->ns; i++)
357 free(olp->specs[i]);
358 }
359
360 static void
361 join(FILE *fa, FILE *fb, size_t jfa, size_t jfb)
362 {
363 struct span spa, spb;
364 int cmp, eofa, eofb;
365
366 initspan(&spa);
367 initspan(&spb);
368 cmp = eofa = eofb = 0;
369
370 addtospan(&spa, fa, RESET);
371 addtospan(&spb, fb, RESET);
372
373 while (spa.nl && spb.nl) {
374 if ((cmp = jlinecmp(spa.lines[0], spb.lines[0], jfa, jfb…
375 if (unpairsa)
376 prline(spa.lines[0]);
377 if (!addtospan(&spa, fa, RESET)) {
378 if (unpairsb) { /* a is EOF'd; print …
379 do
380 prline(spb.lines[0]);
381 while (addtospan(&spb, fb, RESET…
382 }
383 eofa = eofb = 1;
384 } else {
385 continue;
386 }
387 } else if (cmp > 0) {
388 if (unpairsb)
389 prline(spb.lines[0]);
390 if (!addtospan(&spb, fb, RESET)) {
391 if (unpairsa) { /* b is EOF'd; print …
392 do
393 prline(spa.lines[0]);
394 while (addtospan(&spa, fa, RESET…
395 }
396 eofa = eofb = 1;
397 } else {
398 continue;
399 }
400 } else if (cmp == 0) {
401 /* read all consecutive matching lines from a */
402 do {
403 if (!addtospan(&spa, fa, EXPAND)) {
404 eofa = 1;
405 spa.nl++;
406 break;
407 }
408 } while (jlinecmp(spa.lines[spa.nl-1], spb.lines…
409
410 /* read all consecutive matching lines from b */
411 do {
412 if (!addtospan(&spb, fb, EXPAND)) {
413 eofb = 1;
414 spb.nl++;
415 break;
416 }
417 } while (jlinecmp(spa.lines[0], spb.lines[spb.nl…
418
419 if (pairs)
420 prspanjoin(&spa, &spb, jfa, jfb);
421
422 } else { /* FIELD_ERROR: both lines lacked join fie…
423 if (unpairsa)
424 prline(spa.lines[0]);
425 if (unpairsb)
426 prline(spb.lines[0]);
427 eofa = addtospan(&spa, fa, RESET) ? 0 : 1;
428 eofb = addtospan(&spb, fb, RESET) ? 0 : 1;
429 if (!eofa && !eofb)
430 continue;
431 }
432
433 if (eofa) {
434 spa.nl = 0;
435 } else {
436 swaplines(spa.lines[0], spa.lines[spa.nl - 1]); …
437 spa.nl = 1;
438 }
439
440 if (eofb) {
441 spb.nl = 0;
442 } else {
443 swaplines(spb.lines[0], spb.lines[spb.nl - 1]); …
444 spb.nl = 1;
445 }
446 }
447 freespan(&spa);
448 freespan(&spb);
449 }
450
451
452 int
453 main(int argc, char *argv[])
454 {
455 size_t jf[2] = { jfield, jfield, };
456 FILE *fp[2];
457 int ret = 0, n;
458 char *fno;
459
460 ARGBEGIN {
461 case '1':
462 jf[0] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE…
463 break;
464 case '2':
465 jf[1] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE…
466 break;
467 case 'a':
468 fno = EARGF(usage());
469 if (strcmp(fno, "1") == 0)
470 unpairsa = 1;
471 else if (strcmp(fno, "2") == 0)
472 unpairsb = 1;
473 else
474 usage();
475 break;
476 case 'e':
477 replace = EARGF(usage());
478 break;
479 case 'o':
480 oflag = 1;
481 initolist(&output);
482 makeolist(&output, EARGF(usage()));
483 break;
484 case 't':
485 sep = EARGF(usage());
486 break;
487 case 'v':
488 pairs = 0;
489 fno = EARGF(usage());
490 if (strcmp(fno, "1") == 0)
491 unpairsa = 1;
492 else if (strcmp(fno, "2") == 0)
493 unpairsb = 1;
494 else
495 usage();
496 break;
497 default:
498 usage();
499 } ARGEND
500
501 if (sep)
502 seplen = unescape(sep);
503
504 if (argc != 2)
505 usage();
506
507 for (n = 0; n < 2; n++) {
508 if (!strcmp(argv[n], "-")) {
509 argv[n] = "<stdin>";
510 fp[n] = stdin;
511 } else if (!(fp[n] = fopen(argv[n], "r"))) {
512 eprintf("fopen %s:", argv[n]);
513 }
514 }
515
516 jf[0]--;
517 jf[1]--;
518
519 join(fp[0], fp[1], jf[0], jf[1]);
520
521 if (oflag)
522 freespecs(&output);
523
524 if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv…
525 fshut(stdout, "<stdout>"))
526 ret = 2;
527
528 return ret;
529 }
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.