Introduction
Introduction Statistics Contact Development Disclaimer Help
sort.c - sbase - suckless unix tools
git clone git://git.suckless.org/sbase
Log
Files
Refs
README
LICENSE
---
sort.c (9436B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6
7 #include "queue.h"
8 #include "text.h"
9 #include "utf.h"
10 #include "util.h"
11
12 struct keydef {
13 int start_column;
14 int end_column;
15 int start_char;
16 int end_char;
17 int flags;
18 TAILQ_ENTRY(keydef) entry;
19 };
20
21 struct column {
22 struct line line;
23 size_t cap;
24 };
25
26 enum {
27 MOD_N = 1 << 0,
28 MOD_STARTB = 1 << 1,
29 MOD_ENDB = 1 << 2,
30 MOD_R = 1 << 3,
31 MOD_D = 1 << 4,
32 MOD_F = 1 << 5,
33 MOD_I = 1 << 6,
34 };
35
36 static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead…
37
38 static int Cflag = 0, cflag = 0, uflag = 0;
39 static char *fieldsep = NULL;
40 static size_t fieldseplen = 0;
41 static struct column col1, col2;
42
43 static void
44 skipblank(struct line *a)
45 {
46 while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) {
47 a->data++;
48 a->len--;
49 }
50 }
51
52 static void
53 skipnonblank(struct line *a)
54 {
55 while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' &&
56 *(a->data) != '\t')) {
57 a->data++;
58 a->len--;
59 }
60 }
61
62 static void
63 skipcolumn(struct line *a, int skip_to_next_col)
64 {
65 char *s;
66
67 if (fieldsep) {
68 if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))…
69 if (skip_to_next_col)
70 s += fieldseplen;
71 a->len -= s - a->data;
72 a->data = s;
73 } else {
74 a->data += a->len - 1;
75 a->len = 1;
76 }
77 } else {
78 skipblank(a);
79 skipnonblank(a);
80 }
81 }
82
83 static void
84 columns(struct line *line, const struct keydef *kd, struct column *col)
85 {
86 Rune r;
87 struct line start, end;
88 size_t utflen, rlen;
89 int i;
90
91 start.data = line->data;
92 start.len = line->len;
93 for (i = 1; i < kd->start_column; i++)
94 skipcolumn(&start, 1);
95 if (kd->flags & MOD_STARTB)
96 skipblank(&start);
97 for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) {
98 rlen = chartorune(&r, start.data);
99 start.data += rlen;
100 start.len -= rlen;
101 utflen++;
102 }
103
104 end.data = line->data;
105 end.len = line->len;
106 if (kd->end_column) {
107 for (i = 1; i < kd->end_column; i++)
108 skipcolumn(&end, 1);
109 if (kd->flags & MOD_ENDB)
110 skipblank(&end);
111 if (kd->end_char) {
112 for (utflen = 0; end.len > 1 && utflen < kd->end…
113 rlen = chartorune(&r, end.data);
114 end.data += rlen;
115 end.len -= rlen;
116 utflen++;
117 }
118 } else {
119 skipcolumn(&end, 0);
120 }
121 } else {
122 end.data += end.len - 1;
123 end.len = 1;
124 }
125 col->line.len = MAX(0, end.data - start.data);
126 if (!(col->line.data) || col->cap < col->line.len + 1) {
127 free(col->line.data);
128 col->line.data = emalloc(col->line.len + 1);
129 }
130 memcpy(col->line.data, start.data, col->line.len);
131 col->line.data[col->line.len] = '\0';
132 }
133
134 static int
135 skipmodcmp(struct line *a, struct line *b, int flags)
136 {
137 Rune r1, r2;
138 size_t offa = 0, offb = 0;
139
140 do {
141 offa += chartorune(&r1, a->data + offa);
142 offb += chartorune(&r2, b->data + offb);
143
144 if (flags & MOD_D && flags & MOD_I) {
145 while (offa < a->len && ((!isblankrune(r1) &&
146 !isalnumrune(r1)) || (!isprintrune(r1))))
147 offa += chartorune(&r1, a->data + offa);
148 while (offb < b->len && ((!isblankrune(r2) &&
149 !isalnumrune(r2)) || (!isprintrune(r2))))
150 offb += chartorune(&r2, b->data + offb);
151 }
152 else if (flags & MOD_D) {
153 while (offa < a->len && !isblankrune(r1) &&
154 !isalnumrune(r1))
155 offa += chartorune(&r1, a->data + offa);
156 while (offb < b->len && !isblankrune(r2) &&
157 !isalnumrune(r2))
158 offb += chartorune(&r2, b->data + offb);
159 }
160 else if (flags & MOD_I) {
161 while (offa < a->len && !isprintrune(r1))
162 offa += chartorune(&r1, a->data + offa);
163 while (offb < b->len && !isprintrune(r2))
164 offb += chartorune(&r2, b->data + offb);
165 }
166 if (flags & MOD_F) {
167 r1 = toupperrune(r1);
168 r2 = toupperrune(r2);
169 }
170 } while (r1 && r1 == r2);
171
172 return r1 - r2;
173 }
174
175 static int
176 slinecmp(struct line *a, struct line *b)
177 {
178 int res = 0;
179 double x, y;
180 struct keydef *kd;
181
182 TAILQ_FOREACH(kd, &kdhead, entry) {
183 columns(a, kd, &col1);
184 columns(b, kd, &col2);
185
186 /* if -u is given, don't use default key definition
187 * unless it is the only one */
188 if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
189 TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead))…
190 res = 0;
191 } else if (kd->flags & MOD_N) {
192 x = strtod(col1.line.data, NULL);
193 y = strtod(col2.line.data, NULL);
194 res = (x < y) ? -1 : (x > y);
195 } else if (kd->flags & (MOD_D | MOD_F | MOD_I)) {
196 res = skipmodcmp(&col1.line, &col2.line, kd->fla…
197 } else {
198 res = linecmp(&col1.line, &col2.line);
199 }
200
201 if (kd->flags & MOD_R)
202 res = -res;
203 if (res)
204 break;
205 }
206
207 return res;
208 }
209
210 static int
211 check(FILE *fp, const char *fname)
212 {
213 static struct line prev, cur, tmp;
214 static size_t prevsize, cursize, tmpsize;
215 ssize_t len;
216
217 if (!prev.data) {
218 if ((len = getline(&prev.data, &prevsize, fp)) < 0)
219 eprintf("getline:");
220 prev.len = len;
221 }
222 while ((len = getline(&cur.data, &cursize, fp)) > 0) {
223 cur.len = len;
224 if (uflag > slinecmp(&cur, &prev)) {
225 if (!Cflag) {
226 weprintf("disorder %s: ", fname);
227 fwrite(cur.data, 1, cur.len, stderr);
228 }
229 return 1;
230 }
231 tmp = cur;
232 tmpsize = cursize;
233 cur = prev;
234 cursize = prevsize;
235 prev = tmp;
236 prevsize = tmpsize;
237 }
238
239 return 0;
240 }
241
242 static int
243 parse_flags(char **s, int *flags, int bflag)
244 {
245 while (isalpha((int)**s)) {
246 switch (*((*s)++)) {
247 case 'b':
248 *flags |= bflag;
249 break;
250 case 'd':
251 *flags |= MOD_D;
252 break;
253 case 'f':
254 *flags |= MOD_F;
255 break;
256 case 'i':
257 *flags |= MOD_I;
258 break;
259 case 'n':
260 *flags |= MOD_N;
261 break;
262 case 'r':
263 *flags |= MOD_R;
264 break;
265 default:
266 return -1;
267 }
268 }
269
270 return 0;
271 }
272
273 static void
274 addkeydef(char *kdstr, int flags)
275 {
276 struct keydef *kd;
277
278 kd = enmalloc(2, sizeof(*kd));
279
280 /* parse key definition kdstr with format
281 * start_column[.start_char][flags][,end_column[.end_char][flags…
282 */
283 kd->start_column = 1;
284 kd->start_char = 1;
285 kd->end_column = 0; /* 0 means end of line */
286 kd->end_char = 0; /* 0 means end of column */
287 kd->flags = flags;
288
289 if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
290 enprintf(2, "invalid start column in key definition\n");
291
292 if (*kdstr == '.') {
293 if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
294 enprintf(2, "invalid start character in key "
295 "definition\n");
296 }
297 if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
298 enprintf(2, "invalid start flags in key definition\n");
299
300 if (*kdstr == ',') {
301 if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
302 enprintf(2, "invalid end column in key definitio…
303 if (*kdstr == '.') {
304 if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10…
305 enprintf(2, "invalid end character in ke…
306 "definition\n");
307 }
308 if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
309 enprintf(2, "invalid end flags in key definition…
310 }
311
312 if (*kdstr != '\0')
313 enprintf(2, "invalid key definition\n");
314
315 TAILQ_INSERT_TAIL(&kdhead, kd, entry);
316 }
317
318 static void
319 usage(void)
320 {
321 enprintf(2, "usage: %s [-Cbcdfimnru] [-o outfile] [-t delim] "
322 "[-k def]... [file ...]\n", argv0);
323 }
324
325 int
326 main(int argc, char *argv[])
327 {
328 FILE *fp, *ofp = stdout;
329 struct linebuf linebuf = EMPTY_LINEBUF;
330 size_t i;
331 int global_flags = 0, ret = 0;
332 char *outfile = NULL;
333
334 ARGBEGIN {
335 case 'C':
336 Cflag = 1;
337 break;
338 case 'b':
339 global_flags |= MOD_STARTB | MOD_ENDB;
340 break;
341 case 'c':
342 cflag = 1;
343 break;
344 case 'd':
345 global_flags |= MOD_D;
346 break;
347 case 'f':
348 global_flags |= MOD_F;
349 break;
350 case 'i':
351 global_flags |= MOD_I;
352 break;
353 case 'k':
354 addkeydef(EARGF(usage()), global_flags);
355 break;
356 case 'm':
357 /* more or less for free, but for performance-reasons,
358 * we should keep this flag in mind and maybe some later
359 * day implement it properly so we don't run out of memo…
360 * while merging large sorted files.
361 */
362 break;
363 case 'n':
364 global_flags |= MOD_N;
365 break;
366 case 'o':
367 outfile = EARGF(usage());
368 break;
369 case 'r':
370 global_flags |= MOD_R;
371 break;
372 case 't':
373 fieldsep = EARGF(usage());
374 if (!*fieldsep)
375 eprintf("empty delimiter\n");
376 fieldseplen = unescape(fieldsep);
377 break;
378 case 'u':
379 uflag = 1;
380 break;
381 default:
382 usage();
383 } ARGEND
384
385 /* -b shall only apply to custom key definitions */
386 if (TAILQ_EMPTY(&kdhead) && global_flags)
387 addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
388 if (TAILQ_EMPTY(&kdhead) || (!Cflag && !cflag))
389 addkeydef("1", global_flags & MOD_R);
390
391 if (!argc) {
392 if (Cflag || cflag) {
393 if (check(stdin, "<stdin>") && !ret)
394 ret = 1;
395 } else {
396 getlines(stdin, &linebuf);
397 }
398 } else for (; *argv; argc--, argv++) {
399 if (!strcmp(*argv, "-")) {
400 *argv = "<stdin>";
401 fp = stdin;
402 } else if (!(fp = fopen(*argv, "r"))) {
403 enprintf(2, "fopen %s:", *argv);
404 continue;
405 }
406 if (Cflag || cflag) {
407 if (check(fp, *argv) && !ret)
408 ret = 1;
409 } else {
410 getlines(fp, &linebuf);
411 }
412 if (fp != stdin && fshut(fp, *argv))
413 ret = 2;
414 }
415
416 if (!Cflag && !cflag) {
417 if (outfile && !(ofp = fopen(outfile, "w")))
418 eprintf("fopen %s:", outfile);
419
420 qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lin…
421 (int (*)(const void *, const void *))sli…
422
423 for (i = 0; i < linebuf.nlines; i++) {
424 if (!uflag || i == 0 ||
425 slinecmp(&linebuf.lines[i], &linebuf.lines[i…
426 fwrite(linebuf.lines[i].data, 1,
427 linebuf.lines[i].len, ofp);
428 }
429 }
430 }
431
432 if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
433 fshut(stderr, "<stderr>"))
434 ret = 2;
435
436 return ret;
437 }
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.