sort.c - sbase - suckless unix tools | |
git clone git://git.suckless.org/sbase | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
sort.c (9436B) | |
--- | |
1 /* See LICENSE file for copyright and license details. */ | |
2 #include <ctype.h> | |
3 #include <stdio.h> | |
4 #include <stdlib.h> | |
5 #include <string.h> | |
6 | |
7 #include "queue.h" | |
8 #include "text.h" | |
9 #include "utf.h" | |
10 #include "util.h" | |
11 | |
12 struct keydef { | |
13 int start_column; | |
14 int end_column; | |
15 int start_char; | |
16 int end_char; | |
17 int flags; | |
18 TAILQ_ENTRY(keydef) entry; | |
19 }; | |
20 | |
21 struct column { | |
22 struct line line; | |
23 size_t cap; | |
24 }; | |
25 | |
26 enum { | |
27 MOD_N = 1 << 0, | |
28 MOD_STARTB = 1 << 1, | |
29 MOD_ENDB = 1 << 2, | |
30 MOD_R = 1 << 3, | |
31 MOD_D = 1 << 4, | |
32 MOD_F = 1 << 5, | |
33 MOD_I = 1 << 6, | |
34 }; | |
35 | |
36 static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead… | |
37 | |
38 static int Cflag = 0, cflag = 0, uflag = 0; | |
39 static char *fieldsep = NULL; | |
40 static size_t fieldseplen = 0; | |
41 static struct column col1, col2; | |
42 | |
43 static void | |
44 skipblank(struct line *a) | |
45 { | |
46 while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) { | |
47 a->data++; | |
48 a->len--; | |
49 } | |
50 } | |
51 | |
52 static void | |
53 skipnonblank(struct line *a) | |
54 { | |
55 while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' && | |
56 *(a->data) != '\t')) { | |
57 a->data++; | |
58 a->len--; | |
59 } | |
60 } | |
61 | |
62 static void | |
63 skipcolumn(struct line *a, int skip_to_next_col) | |
64 { | |
65 char *s; | |
66 | |
67 if (fieldsep) { | |
68 if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))… | |
69 if (skip_to_next_col) | |
70 s += fieldseplen; | |
71 a->len -= s - a->data; | |
72 a->data = s; | |
73 } else { | |
74 a->data += a->len - 1; | |
75 a->len = 1; | |
76 } | |
77 } else { | |
78 skipblank(a); | |
79 skipnonblank(a); | |
80 } | |
81 } | |
82 | |
83 static void | |
84 columns(struct line *line, const struct keydef *kd, struct column *col) | |
85 { | |
86 Rune r; | |
87 struct line start, end; | |
88 size_t utflen, rlen; | |
89 int i; | |
90 | |
91 start.data = line->data; | |
92 start.len = line->len; | |
93 for (i = 1; i < kd->start_column; i++) | |
94 skipcolumn(&start, 1); | |
95 if (kd->flags & MOD_STARTB) | |
96 skipblank(&start); | |
97 for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) { | |
98 rlen = chartorune(&r, start.data); | |
99 start.data += rlen; | |
100 start.len -= rlen; | |
101 utflen++; | |
102 } | |
103 | |
104 end.data = line->data; | |
105 end.len = line->len; | |
106 if (kd->end_column) { | |
107 for (i = 1; i < kd->end_column; i++) | |
108 skipcolumn(&end, 1); | |
109 if (kd->flags & MOD_ENDB) | |
110 skipblank(&end); | |
111 if (kd->end_char) { | |
112 for (utflen = 0; end.len > 1 && utflen < kd->end… | |
113 rlen = chartorune(&r, end.data); | |
114 end.data += rlen; | |
115 end.len -= rlen; | |
116 utflen++; | |
117 } | |
118 } else { | |
119 skipcolumn(&end, 0); | |
120 } | |
121 } else { | |
122 end.data += end.len - 1; | |
123 end.len = 1; | |
124 } | |
125 col->line.len = MAX(0, end.data - start.data); | |
126 if (!(col->line.data) || col->cap < col->line.len + 1) { | |
127 free(col->line.data); | |
128 col->line.data = emalloc(col->line.len + 1); | |
129 } | |
130 memcpy(col->line.data, start.data, col->line.len); | |
131 col->line.data[col->line.len] = '\0'; | |
132 } | |
133 | |
134 static int | |
135 skipmodcmp(struct line *a, struct line *b, int flags) | |
136 { | |
137 Rune r1, r2; | |
138 size_t offa = 0, offb = 0; | |
139 | |
140 do { | |
141 offa += chartorune(&r1, a->data + offa); | |
142 offb += chartorune(&r2, b->data + offb); | |
143 | |
144 if (flags & MOD_D && flags & MOD_I) { | |
145 while (offa < a->len && ((!isblankrune(r1) && | |
146 !isalnumrune(r1)) || (!isprintrune(r1)))) | |
147 offa += chartorune(&r1, a->data + offa); | |
148 while (offb < b->len && ((!isblankrune(r2) && | |
149 !isalnumrune(r2)) || (!isprintrune(r2)))) | |
150 offb += chartorune(&r2, b->data + offb); | |
151 } | |
152 else if (flags & MOD_D) { | |
153 while (offa < a->len && !isblankrune(r1) && | |
154 !isalnumrune(r1)) | |
155 offa += chartorune(&r1, a->data + offa); | |
156 while (offb < b->len && !isblankrune(r2) && | |
157 !isalnumrune(r2)) | |
158 offb += chartorune(&r2, b->data + offb); | |
159 } | |
160 else if (flags & MOD_I) { | |
161 while (offa < a->len && !isprintrune(r1)) | |
162 offa += chartorune(&r1, a->data + offa); | |
163 while (offb < b->len && !isprintrune(r2)) | |
164 offb += chartorune(&r2, b->data + offb); | |
165 } | |
166 if (flags & MOD_F) { | |
167 r1 = toupperrune(r1); | |
168 r2 = toupperrune(r2); | |
169 } | |
170 } while (r1 && r1 == r2); | |
171 | |
172 return r1 - r2; | |
173 } | |
174 | |
175 static int | |
176 slinecmp(struct line *a, struct line *b) | |
177 { | |
178 int res = 0; | |
179 double x, y; | |
180 struct keydef *kd; | |
181 | |
182 TAILQ_FOREACH(kd, &kdhead, entry) { | |
183 columns(a, kd, &col1); | |
184 columns(b, kd, &col2); | |
185 | |
186 /* if -u is given, don't use default key definition | |
187 * unless it is the only one */ | |
188 if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) && | |
189 TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead))… | |
190 res = 0; | |
191 } else if (kd->flags & MOD_N) { | |
192 x = strtod(col1.line.data, NULL); | |
193 y = strtod(col2.line.data, NULL); | |
194 res = (x < y) ? -1 : (x > y); | |
195 } else if (kd->flags & (MOD_D | MOD_F | MOD_I)) { | |
196 res = skipmodcmp(&col1.line, &col2.line, kd->fla… | |
197 } else { | |
198 res = linecmp(&col1.line, &col2.line); | |
199 } | |
200 | |
201 if (kd->flags & MOD_R) | |
202 res = -res; | |
203 if (res) | |
204 break; | |
205 } | |
206 | |
207 return res; | |
208 } | |
209 | |
210 static int | |
211 check(FILE *fp, const char *fname) | |
212 { | |
213 static struct line prev, cur, tmp; | |
214 static size_t prevsize, cursize, tmpsize; | |
215 ssize_t len; | |
216 | |
217 if (!prev.data) { | |
218 if ((len = getline(&prev.data, &prevsize, fp)) < 0) | |
219 eprintf("getline:"); | |
220 prev.len = len; | |
221 } | |
222 while ((len = getline(&cur.data, &cursize, fp)) > 0) { | |
223 cur.len = len; | |
224 if (uflag > slinecmp(&cur, &prev)) { | |
225 if (!Cflag) { | |
226 weprintf("disorder %s: ", fname); | |
227 fwrite(cur.data, 1, cur.len, stderr); | |
228 } | |
229 return 1; | |
230 } | |
231 tmp = cur; | |
232 tmpsize = cursize; | |
233 cur = prev; | |
234 cursize = prevsize; | |
235 prev = tmp; | |
236 prevsize = tmpsize; | |
237 } | |
238 | |
239 return 0; | |
240 } | |
241 | |
242 static int | |
243 parse_flags(char **s, int *flags, int bflag) | |
244 { | |
245 while (isalpha((int)**s)) { | |
246 switch (*((*s)++)) { | |
247 case 'b': | |
248 *flags |= bflag; | |
249 break; | |
250 case 'd': | |
251 *flags |= MOD_D; | |
252 break; | |
253 case 'f': | |
254 *flags |= MOD_F; | |
255 break; | |
256 case 'i': | |
257 *flags |= MOD_I; | |
258 break; | |
259 case 'n': | |
260 *flags |= MOD_N; | |
261 break; | |
262 case 'r': | |
263 *flags |= MOD_R; | |
264 break; | |
265 default: | |
266 return -1; | |
267 } | |
268 } | |
269 | |
270 return 0; | |
271 } | |
272 | |
273 static void | |
274 addkeydef(char *kdstr, int flags) | |
275 { | |
276 struct keydef *kd; | |
277 | |
278 kd = enmalloc(2, sizeof(*kd)); | |
279 | |
280 /* parse key definition kdstr with format | |
281 * start_column[.start_char][flags][,end_column[.end_char][flags… | |
282 */ | |
283 kd->start_column = 1; | |
284 kd->start_char = 1; | |
285 kd->end_column = 0; /* 0 means end of line */ | |
286 kd->end_char = 0; /* 0 means end of column */ | |
287 kd->flags = flags; | |
288 | |
289 if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1) | |
290 enprintf(2, "invalid start column in key definition\n"); | |
291 | |
292 if (*kdstr == '.') { | |
293 if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1) | |
294 enprintf(2, "invalid start character in key " | |
295 "definition\n"); | |
296 } | |
297 if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0) | |
298 enprintf(2, "invalid start flags in key definition\n"); | |
299 | |
300 if (*kdstr == ',') { | |
301 if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0) | |
302 enprintf(2, "invalid end column in key definitio… | |
303 if (*kdstr == '.') { | |
304 if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10… | |
305 enprintf(2, "invalid end character in ke… | |
306 "definition\n"); | |
307 } | |
308 if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0) | |
309 enprintf(2, "invalid end flags in key definition… | |
310 } | |
311 | |
312 if (*kdstr != '\0') | |
313 enprintf(2, "invalid key definition\n"); | |
314 | |
315 TAILQ_INSERT_TAIL(&kdhead, kd, entry); | |
316 } | |
317 | |
318 static void | |
319 usage(void) | |
320 { | |
321 enprintf(2, "usage: %s [-Cbcdfimnru] [-o outfile] [-t delim] " | |
322 "[-k def]... [file ...]\n", argv0); | |
323 } | |
324 | |
325 int | |
326 main(int argc, char *argv[]) | |
327 { | |
328 FILE *fp, *ofp = stdout; | |
329 struct linebuf linebuf = EMPTY_LINEBUF; | |
330 size_t i; | |
331 int global_flags = 0, ret = 0; | |
332 char *outfile = NULL; | |
333 | |
334 ARGBEGIN { | |
335 case 'C': | |
336 Cflag = 1; | |
337 break; | |
338 case 'b': | |
339 global_flags |= MOD_STARTB | MOD_ENDB; | |
340 break; | |
341 case 'c': | |
342 cflag = 1; | |
343 break; | |
344 case 'd': | |
345 global_flags |= MOD_D; | |
346 break; | |
347 case 'f': | |
348 global_flags |= MOD_F; | |
349 break; | |
350 case 'i': | |
351 global_flags |= MOD_I; | |
352 break; | |
353 case 'k': | |
354 addkeydef(EARGF(usage()), global_flags); | |
355 break; | |
356 case 'm': | |
357 /* more or less for free, but for performance-reasons, | |
358 * we should keep this flag in mind and maybe some later | |
359 * day implement it properly so we don't run out of memo… | |
360 * while merging large sorted files. | |
361 */ | |
362 break; | |
363 case 'n': | |
364 global_flags |= MOD_N; | |
365 break; | |
366 case 'o': | |
367 outfile = EARGF(usage()); | |
368 break; | |
369 case 'r': | |
370 global_flags |= MOD_R; | |
371 break; | |
372 case 't': | |
373 fieldsep = EARGF(usage()); | |
374 if (!*fieldsep) | |
375 eprintf("empty delimiter\n"); | |
376 fieldseplen = unescape(fieldsep); | |
377 break; | |
378 case 'u': | |
379 uflag = 1; | |
380 break; | |
381 default: | |
382 usage(); | |
383 } ARGEND | |
384 | |
385 /* -b shall only apply to custom key definitions */ | |
386 if (TAILQ_EMPTY(&kdhead) && global_flags) | |
387 addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB)); | |
388 if (TAILQ_EMPTY(&kdhead) || (!Cflag && !cflag)) | |
389 addkeydef("1", global_flags & MOD_R); | |
390 | |
391 if (!argc) { | |
392 if (Cflag || cflag) { | |
393 if (check(stdin, "<stdin>") && !ret) | |
394 ret = 1; | |
395 } else { | |
396 getlines(stdin, &linebuf); | |
397 } | |
398 } else for (; *argv; argc--, argv++) { | |
399 if (!strcmp(*argv, "-")) { | |
400 *argv = "<stdin>"; | |
401 fp = stdin; | |
402 } else if (!(fp = fopen(*argv, "r"))) { | |
403 enprintf(2, "fopen %s:", *argv); | |
404 continue; | |
405 } | |
406 if (Cflag || cflag) { | |
407 if (check(fp, *argv) && !ret) | |
408 ret = 1; | |
409 } else { | |
410 getlines(fp, &linebuf); | |
411 } | |
412 if (fp != stdin && fshut(fp, *argv)) | |
413 ret = 2; | |
414 } | |
415 | |
416 if (!Cflag && !cflag) { | |
417 if (outfile && !(ofp = fopen(outfile, "w"))) | |
418 eprintf("fopen %s:", outfile); | |
419 | |
420 qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lin… | |
421 (int (*)(const void *, const void *))sli… | |
422 | |
423 for (i = 0; i < linebuf.nlines; i++) { | |
424 if (!uflag || i == 0 || | |
425 slinecmp(&linebuf.lines[i], &linebuf.lines[i… | |
426 fwrite(linebuf.lines[i].data, 1, | |
427 linebuf.lines[i].len, ofp); | |
428 } | |
429 } | |
430 } | |
431 | |
432 if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") | | |
433 fshut(stderr, "<stderr>")) | |
434 ret = 2; | |
435 | |
436 return ret; | |
437 } |