Introduction
Introduction Statistics Contact Development Disclaimer Help
sort.c - 9base - revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log
Files
Refs
README
LICENSE
---
sort.c (29541B)
---
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4
5 /*
6 bugs:
7 00/ff for end of file can conflict with 00/ff characters
8 */
9
10 enum
11 {
12 Nline = 500000, /* default max number of l…
13 Nmerge = 10, /* max number of temp…
14 Nfield = 20, /* max number of argu…
15
16 Bflag = 1<<0, /* flags per field */
17 B1flag = 1<<1,
18
19 Dflag = 1<<2,
20 Fflag = 1<<3,
21 Gflag = 1<<4,
22 Iflag = 1<<5,
23 Mflag = 1<<6,
24 Nflag = 1<<7,
25 Rflag = 1<<8,
26 Wflag = 1<<9,
27
28 NSstart = 0, /* states for number …
29 NSsign,
30 NSzero,
31 NSdigit,
32 NSpoint,
33 NSfract,
34 NSzerofract,
35 NSexp,
36 NSexpsign,
37 NSexpdigit
38 };
39
40 typedef struct Line Line;
41 typedef struct Key Key;
42 typedef struct Merge Merge;
43 typedef struct Field Field;
44
45 struct Line
46 {
47 Key* key;
48 int llen; /* always >= 1 */
49 uchar line[1]; /* always ends in '\n' */
50 };
51
52 struct Merge
53 {
54 Key* key; /* copy of line->key so (Line*) …
55 Line* line; /* line at the head of a merge…
56 int fd; /* file descriptor */
57 Biobuf b; /* iobuf for reading a temp file…
58 };
59
60 struct Key
61 {
62 int klen;
63 uchar key[1];
64 };
65
66 struct Field
67 {
68 int beg1;
69 int beg2;
70 int end1;
71 int end2;
72
73 long flags;
74 uchar mapto[256];
75
76 void (*dokey)(Key*, uchar*, uchar*, Field*);
77 };
78
79 struct args
80 {
81 char* ofile;
82 char* tname;
83 Rune tabchar;
84 char cflag;
85 char uflag;
86 char vflag;
87 int nfield;
88 int nfile;
89 Field field[Nfield];
90
91 Line** linep;
92 long nline; /* number of lines in …
93 long lineno; /* overall ordinal fo…
94 int ntemp;
95 long mline; /* max lines per file …
96 } args;
97
98 extern int latinmap[];
99 extern Rune* month[12];
100
101 void buildkey(Line*);
102 void doargs(int, char*[]);
103 void dofield(char*, int*, int*, int, int);
104 void dofile(Biobuf*);
105 void dokey_(Key*, uchar*, uchar*, Field*);
106 void dokey_dfi(Key*, uchar*, uchar*, Field*);
107 void dokey_gn(Key*, uchar*, uchar*, Field*);
108 void dokey_m(Key*, uchar*, uchar*, Field*);
109 void dokey_r(Key*, uchar*, uchar*, Field*);
110 void done(char*);
111 int kcmp(Key*, Key*);
112 void makemapd(Field*);
113 void makemapm(Field*);
114 void mergefiles(int, int, Biobuf*);
115 void mergeout(Biobuf*);
116 void newfield(void);
117 Line* newline(Biobuf*);
118 void nomem(void);
119 void notifyf(void*, char*);
120 void printargs(void);
121 void printout(Biobuf*);
122 void setfield(int, int);
123 uchar* skip(uchar*, int, int, int, int);
124 void sort4(void*, ulong);
125 char* tempfile(int);
126 void tempout(void);
127 void lineout(Biobuf*, Line*);
128
129 void
130 main(int argc, char *argv[])
131 {
132 int i, f;
133 char *s;
134 Biobuf bbuf;
135
136 notify(notifyf); /**/
137 doargs(argc, argv);
138 if(args.vflag)
139 printargs();
140
141 for(i=1; i<argc; i++) {
142 s = argv[i];
143 if(s == 0)
144 continue;
145 if(strcmp(s, "-") == 0) {
146 Binit(&bbuf, 0, OREAD);
147 dofile(&bbuf);
148 Bterm(&bbuf);
149 continue;
150 }
151 f = open(s, OREAD);
152 if(f < 0) {
153 fprint(2, "sort: open %s: %r\n", s);
154 done("open");
155 }
156 Binit(&bbuf, f, OREAD);
157 dofile(&bbuf);
158 Bterm(&bbuf);
159 close(f);
160 }
161 if(args.nfile == 0) {
162 Binit(&bbuf, 0, OREAD);
163 dofile(&bbuf);
164 Bterm(&bbuf);
165 }
166 if(args.cflag)
167 done(0);
168 if(args.vflag)
169 fprint(2, "=========\n");
170
171 f = 1;
172 if(args.ofile) {
173 f = create(args.ofile, OWRITE, 0666);
174 if(f < 0) {
175 fprint(2, "sort: create %s: %r\n", args.ofile);
176 done("create");
177 }
178 }
179
180 Binit(&bbuf, f, OWRITE);
181 if(args.ntemp) {
182 tempout();
183 mergeout(&bbuf);
184 } else {
185 printout(&bbuf);
186 }
187 Bterm(&bbuf);
188 done(0);
189 }
190
191 void
192 dofile(Biobuf *b)
193 {
194 Line *l, *ol;
195 int n;
196
197 if(args.cflag) {
198 ol = newline(b);
199 if(ol == 0)
200 return;
201 for(;;) {
202 l = newline(b);
203 if(l == 0)
204 break;
205 n = kcmp(ol->key, l->key);
206 if(n > 0 || (n == 0 && args.uflag)) {
207 fprint(2, "sort: -c file not in sort\n")…
208 done("order");
209 }
210 free(ol->key);
211 free(ol);
212 ol = l;
213 }
214 return;
215 }
216
217 if(args.linep == 0) {
218 args.linep = malloc(args.mline * sizeof(args.linep));
219 if(args.linep == 0)
220 nomem();
221 }
222 for(;;) {
223 l = newline(b);
224 if(l == 0)
225 break;
226 if(args.nline >= args.mline)
227 tempout();
228 args.linep[args.nline] = l;
229 args.nline++;
230 args.lineno++;
231 }
232 }
233
234 void
235 notifyf(void *a, char *s)
236 {
237 USED(a);
238 if(strcmp(s, "interrupt") == 0)
239 done(0);
240 if(strcmp(s, "hangup") == 0)
241 done(0);
242 if(strcmp(s, "kill") == 0)
243 done(0);
244 if(strncmp(s, "sys: write on closed pipe", 25) == 0)
245 done(0);
246 noted(NDFLT);
247 }
248
249 Line*
250 newline(Biobuf *b)
251 {
252 Line *l;
253 char *p;
254 int n, c;
255
256 p = Brdline(b, '\n');
257 n = Blinelen(b);
258 if(p == 0) {
259 if(n == 0)
260 return 0;
261 l = 0;
262 for(n=0;;) {
263 if((n & 31) == 0) {
264 l = realloc(l, sizeof(Line) +
265 (n+31)*sizeof(l->line[0]));
266 if(l == 0)
267 nomem();
268 }
269 c = Bgetc(b);
270 if(c < 0) {
271 fprint(2, "sort: newline added\n");
272 c = '\n';
273 }
274 l->line[n++] = c;
275 if(c == '\n')
276 break;
277 }
278 l->llen = n;
279 buildkey(l);
280 return l;
281 }
282 l = malloc(sizeof(Line) +
283 (n-1)*sizeof(l->line[0]));
284 if(l == 0)
285 nomem();
286 l->llen = n;
287 memmove(l->line, p, n);
288 buildkey(l);
289 return l;
290 }
291
292 void
293 lineout(Biobuf *b, Line *l)
294 {
295 int n, m;
296
297 n = l->llen;
298 m = Bwrite(b, l->line, n);
299 if(n != m)
300 exits("write");
301 }
302
303 void
304 tempout(void)
305 {
306 long n;
307 Line **lp, *l;
308 char *tf;
309 int f;
310 Biobuf tb;
311
312 sort4(args.linep, args.nline);
313 tf = tempfile(args.ntemp);
314 args.ntemp++;
315 f = create(tf, OWRITE, 0666);
316 if(f < 0) {
317 fprint(2, "sort: create %s: %r\n", tf);
318 done("create");
319 }
320
321 Binit(&tb, f, OWRITE);
322 lp = args.linep;
323 for(n=args.nline; n>0; n--) {
324 l = *lp++;
325 lineout(&tb, l);
326 free(l->key);
327 free(l);
328 }
329 args.nline = 0;
330 Bterm(&tb);
331 close(f);
332 }
333
334 void
335 done(char *xs)
336 {
337 int i;
338
339 for(i=0; i<args.ntemp; i++)
340 remove(tempfile(i));
341 exits(xs);
342 }
343
344 void
345 nomem(void)
346 {
347 fprint(2, "sort: out of memory\n");
348 done("mem");
349 }
350
351 char*
352 tempfile(int n)
353 {
354 static char file[100];
355 static uint pid;
356 char *dir;
357
358 dir = "/var/tmp";
359 if(args.tname)
360 dir = args.tname;
361 if(strlen(dir) >= nelem(file)-20) {
362 fprint(2, "temp file directory name is too long: %s\n", …
363 done("tdir");
364 }
365
366 if(pid == 0) {
367 pid = getpid();
368 if(pid == 0) {
369 pid = time(0);
370 if(pid == 0)
371 pid = 1;
372 }
373 }
374
375 sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n);
376 return file;
377 }
378
379 void
380 mergeout(Biobuf *b)
381 {
382 int n, i, f;
383 char *tf;
384 Biobuf tb;
385
386 for(i=0; i<args.ntemp; i+=n) {
387 n = args.ntemp - i;
388 if(n > Nmerge) {
389 tf = tempfile(args.ntemp);
390 args.ntemp++;
391 f = create(tf, OWRITE, 0666);
392 if(f < 0) {
393 fprint(2, "sort: create %s: %r\n", tf);
394 done("create");
395 }
396 Binit(&tb, f, OWRITE);
397
398 n = Nmerge;
399 mergefiles(i, n, &tb);
400
401 Bterm(&tb);
402 close(f);
403 } else
404 mergefiles(i, n, b);
405 }
406 }
407
408 void
409 mergefiles(int t, int n, Biobuf *b)
410 {
411 Merge *m, *mp, **mmp;
412 Key *ok;
413 Line *l;
414 char *tf;
415 int i, f, nn;
416
417 mmp = malloc(n*sizeof(*mmp));
418 mp = malloc(n*sizeof(*mp));
419 if(mmp == 0 || mp == 0)
420 nomem();
421
422 nn = 0;
423 m = mp;
424 for(i=0; i<n; i++,m++) {
425 tf = tempfile(t+i);
426 f = open(tf, OREAD);
427 if(f < 0) {
428 fprint(2, "sort: reopen %s: %r\n", tf);
429 done("open");
430 }
431 m->fd = f;
432 Binit(&m->b, f, OREAD);
433 mmp[nn] = m;
434
435 l = newline(&m->b);
436 if(l == 0)
437 continue;
438 nn++;
439 m->line = l;
440 m->key = l->key;
441 }
442
443 ok = 0;
444 for(;;) {
445 sort4(mmp, nn);
446 m = *mmp;
447 if(nn == 0)
448 break;
449 for(;;) {
450 l = m->line;
451 if(args.uflag && ok && kcmp(ok, l->key) == 0) {
452 free(l->key);
453 free(l);
454 } else {
455 lineout(b, l);
456 if(ok)
457 free(ok);
458 ok = l->key;
459 free(l);
460 }
461
462 l = newline(&m->b);
463 if(l == 0) {
464 nn--;
465 mmp[0] = mmp[nn];
466 break;
467 }
468 m->line = l;
469 m->key = l->key;
470 if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0)
471 break;
472 }
473 }
474 if(ok)
475 free(ok);
476
477 m = mp;
478 for(i=0; i<n; i++,m++) {
479 Bterm(&m->b);
480 close(m->fd);
481 }
482
483 free(mp);
484 free(mmp);
485 }
486
487 int
488 kcmp(Key *ka, Key *kb)
489 {
490 int n, m;
491
492 /*
493 * set n to length of smaller key
494 */
495 n = ka->klen;
496 m = kb->klen;
497 if(n > m)
498 n = m;
499 return memcmp(ka->key, kb->key, n);
500 }
501
502 void
503 printout(Biobuf *b)
504 {
505 long n;
506 Line **lp, *l;
507 Key *ok;
508
509 sort4(args.linep, args.nline);
510 lp = args.linep;
511 ok = 0;
512 for(n=args.nline; n>0; n--) {
513 l = *lp++;
514 if(args.uflag && ok && kcmp(ok, l->key) == 0)
515 continue;
516 lineout(b, l);
517 ok = l->key;
518 }
519 }
520
521 void
522 setfield(int n, int c)
523 {
524 Field *f;
525
526 f = &args.field[n];
527 switch(c) {
528 default:
529 fprint(2, "sort: unknown option: field.%C\n", c);
530 done("option");
531 case 'b': /* skip blanks */
532 f->flags |= Bflag;
533 break;
534 case 'd': /* directory order */
535 f->flags |= Dflag;
536 break;
537 case 'f': /* fold case */
538 f->flags |= Fflag;
539 break;
540 case 'g': /* floating point -n case */
541 f->flags |= Gflag;
542 break;
543 case 'i': /* ignore non-ascii */
544 f->flags |= Iflag;
545 break;
546 case 'M': /* month */
547 f->flags |= Mflag;
548 break;
549 case 'n': /* numbers */
550 f->flags |= Nflag;
551 break;
552 case 'r': /* reverse */
553 f->flags |= Rflag;
554 break;
555 case 'w': /* ignore white */
556 f->flags |= Wflag;
557 break;
558 }
559 }
560
561 void
562 dofield(char *s, int *n1, int *n2, int off1, int off2)
563 {
564 int c, n;
565
566 c = *s++;
567 if(c >= '0' && c <= '9') {
568 n = 0;
569 while(c >= '0' && c <= '9') {
570 n = n*10 + (c-'0');
571 c = *s++;
572 }
573 n -= off1; /* posix committee: rot in hell */
574 if(n < 0) {
575 fprint(2, "sort: field offset must be positive\n…
576 done("option");
577 }
578 *n1 = n;
579 }
580 if(c == '.') {
581 c = *s++;
582 if(c >= '0' && c <= '9') {
583 n = 0;
584 while(c >= '0' && c <= '9') {
585 n = n*10 + (c-'0');
586 c = *s++;
587 }
588 n -= off2;
589 if(n < 0) {
590 fprint(2, "sort: character offset must b…
591 done("option");
592 }
593 *n2 = n;
594 }
595 }
596 while(c != 0) {
597 setfield(args.nfield, c);
598 c = *s++;
599 }
600 }
601
602 void
603 printargs(void)
604 {
605 int i, n;
606 Field *f;
607 char *prefix;
608
609 fprint(2, "sort");
610 for(i=0; i<=args.nfield; i++) {
611 f = &args.field[i];
612 prefix = " -";
613 if(i) {
614 n = f->beg1;
615 if(n >= 0)
616 fprint(2, " +%d", n);
617 else
618 fprint(2, " +*");
619 n = f->beg2;
620 if(n >= 0)
621 fprint(2, ".%d", n);
622 else
623 fprint(2, ".*");
624
625 if(f->flags & B1flag)
626 fprint(2, "b");
627
628 n = f->end1;
629 if(n >= 0)
630 fprint(2, " -%d", n);
631 else
632 fprint(2, " -*");
633 n = f->end2;
634 if(n >= 0)
635 fprint(2, ".%d", n);
636 else
637 fprint(2, ".*");
638 prefix = "";
639 }
640 if(f->flags & Bflag)
641 fprint(2, "%sb", prefix);
642 if(f->flags & Dflag)
643 fprint(2, "%sd", prefix);
644 if(f->flags & Fflag)
645 fprint(2, "%sf", prefix);
646 if(f->flags & Gflag)
647 fprint(2, "%sg", prefix);
648 if(f->flags & Iflag)
649 fprint(2, "%si", prefix);
650 if(f->flags & Mflag)
651 fprint(2, "%sM", prefix);
652 if(f->flags & Nflag)
653 fprint(2, "%sn", prefix);
654 if(f->flags & Rflag)
655 fprint(2, "%sr", prefix);
656 if(f->flags & Wflag)
657 fprint(2, "%sw", prefix);
658 }
659 if(args.cflag)
660 fprint(2, " -c");
661 if(args.uflag)
662 fprint(2, " -u");
663 if(args.ofile)
664 fprint(2, " -o %s", args.ofile);
665 if(args.mline != Nline)
666 fprint(2, " -l %ld", args.mline);
667 fprint(2, "\n");
668 }
669
670 void
671 newfield(void)
672 {
673 int n;
674 Field *f;
675
676 n = args.nfield + 1;
677 if(n >= Nfield) {
678 fprint(2, "sort: too many fields specified\n");
679 done("option");
680 }
681 args.nfield = n;
682 f = &args.field[n];
683 f->beg1 = -1;
684 f->beg2 = -1;
685 f->end1 = -1;
686 f->end2 = -1;
687 }
688
689 void
690 doargs(int argc, char *argv[])
691 {
692 int i, c, hadplus;
693 char *s, *p, *q;
694 Field *f;
695
696 hadplus = 0;
697 args.mline = Nline;
698 for(i=1; i<argc; i++) {
699 s = argv[i];
700 c = *s++;
701 if(c == '-') {
702 c = *s;
703 if(c == 0) /* forced end of arg m…
704 break;
705 argv[i] = 0; /* clobber args proc…
706 if(c == '.' || (c >= '0' && c <= '9')) {
707 if(!hadplus)
708 newfield();
709 f = &args.field[args.nfield];
710 dofield(s, &f->end1, &f->end2, 0, 0);
711 hadplus = 0;
712 continue;
713 }
714
715 while(c = *s++)
716 switch(c) {
717 case '-': /* end of options */
718 i = argc;
719 continue;
720 case 'T': /* temp directory */
721 if(*s == 0) {
722 i++;
723 if(i < argc) {
724 args.tname = argv[i];
725 argv[i] = 0;
726 }
727 } else
728 args.tname = s;
729 s = strchr(s, 0);
730 break;
731 case 'o': /* output file */
732 if(*s == 0) {
733 i++;
734 if(i < argc) {
735 args.ofile = argv[i];
736 argv[i] = 0;
737 }
738 } else
739 args.ofile = s;
740 s = strchr(s, 0);
741 break;
742 case 'k': /* posix key (what were they th…
743 p = 0;
744 if(*s == 0) {
745 i++;
746 if(i < argc) {
747 p = argv[i];
748 argv[i] = 0;
749 }
750 } else
751 p = s;
752 s = strchr(s, 0);
753 if(p == 0)
754 break;
755
756 newfield();
757 q = strchr(p, ',');
758 if(q)
759 *q++ = 0;
760 f = &args.field[args.nfield];
761 dofield(p, &f->beg1, &f->beg2, 1, 1);
762 if(f->flags & Bflag) {
763 f->flags |= B1flag;
764 f->flags &= ~Bflag;
765 }
766 if(q) {
767 dofield(q, &f->end1, &f->end2, 1…
768 if(f->end2 <= 0)
769 f->end1++;
770 }
771 hadplus = 0;
772 break;
773 case 't': /* tab character */
774 if(*s == 0) {
775 i++;
776 if(i < argc) {
777 chartorune(&args.tabchar…
778 argv[i] = 0;
779 }
780 } else
781 s += chartorune(&args.tabchar, s…
782 if(args.tabchar == '\n') {
783 fprint(2, "aw come on, rob\n");
784 done("rob");
785 }
786 break;
787 case 'c': /* check order */
788 args.cflag = 1;
789 break;
790 case 'u': /* unique */
791 args.uflag = 1;
792 break;
793 case 'v': /* debugging noise */
794 args.vflag = 1;
795 break;
796 case 'l':
797 if(*s == 0) {
798 i++;
799 if(i < argc) {
800 args.mline = atol(argv[i…
801 argv[i] = 0;
802 }
803 } else
804 args.mline = atol(s);
805 s = strchr(s, 0);
806 break;
807
808 case 'M': /* month */
809 case 'b': /* skip blanks */
810 case 'd': /* directory order */
811 case 'f': /* fold case */
812 case 'g': /* floating numbers */
813 case 'i': /* ignore non-ascii */
814 case 'n': /* numbers */
815 case 'r': /* reverse */
816 case 'w': /* ignore white */
817 if(args.nfield > 0)
818 fprint(2, "sort: global field se…
819 setfield(0, c);
820 break;
821 case 'm':
822 /* option m silently ignored but require…
823 break;
824 default:
825 fprint(2, "sort: unknown option: -%C\n",…
826 done("option");
827 }
828 continue;
829 }
830 if(c == '+') {
831 argv[i] = 0; /* clobber args proc…
832 c = *s;
833 if(c == '.' || (c >= '0' && c <= '9')) {
834 newfield();
835 f = &args.field[args.nfield];
836 dofield(s, &f->beg1, &f->beg2, 0, 0);
837 if(f->flags & Bflag) {
838 f->flags |= B1flag;
839 f->flags &= ~Bflag;
840 }
841 hadplus = 1;
842 continue;
843 }
844 fprint(2, "sort: unknown option: +%C\n", c);
845 done("option");
846 }
847 args.nfile++;
848 }
849
850 for(i=0; i<=args.nfield; i++) {
851 f = &args.field[i];
852
853 /*
854 * global options apply to fields that
855 * specify no options
856 */
857 if(f->flags == 0) {
858 f->flags = args.field[0].flags;
859 if(args.field[0].flags & Bflag)
860 f->flags |= B1flag;
861 }
862
863
864 /*
865 * build buildkey specification
866 */
867 switch(f->flags & ~(Bflag|B1flag)) {
868 default:
869 fprint(2, "sort: illegal combination of flags: %…
870 done("option");
871 case 0:
872 f->dokey = dokey_;
873 break;
874 case Rflag:
875 f->dokey = dokey_r;
876 break;
877 case Gflag:
878 case Nflag:
879 case Gflag|Nflag:
880 case Gflag|Rflag:
881 case Nflag|Rflag:
882 case Gflag|Nflag|Rflag:
883 f->dokey = dokey_gn;
884 break;
885 case Mflag:
886 case Mflag|Rflag:
887 f->dokey = dokey_m;
888 makemapm(f);
889 break;
890 case Dflag:
891 case Dflag|Fflag:
892 case Dflag|Fflag|Iflag:
893 case Dflag|Fflag|Iflag|Rflag:
894 case Dflag|Fflag|Iflag|Rflag|Wflag:
895 case Dflag|Fflag|Iflag|Wflag:
896 case Dflag|Fflag|Rflag:
897 case Dflag|Fflag|Rflag|Wflag:
898 case Dflag|Fflag|Wflag:
899 case Dflag|Iflag:
900 case Dflag|Iflag|Rflag:
901 case Dflag|Iflag|Rflag|Wflag:
902 case Dflag|Iflag|Wflag:
903 case Dflag|Rflag:
904 case Dflag|Rflag|Wflag:
905 case Dflag|Wflag:
906 case Fflag:
907 case Fflag|Iflag:
908 case Fflag|Iflag|Rflag:
909 case Fflag|Iflag|Rflag|Wflag:
910 case Fflag|Iflag|Wflag:
911 case Fflag|Rflag:
912 case Fflag|Rflag|Wflag:
913 case Fflag|Wflag:
914 case Iflag:
915 case Iflag|Rflag:
916 case Iflag|Rflag|Wflag:
917 case Iflag|Wflag:
918 case Wflag:
919 f->dokey = dokey_dfi;
920 makemapd(f);
921 break;
922 }
923 }
924
925 /*
926 * random spot checks
927 */
928 if(args.nfile > 1 && args.cflag) {
929 fprint(2, "sort: -c can have at most one input file\n");
930 done("option");
931 }
932 return;
933 }
934
935 uchar*
936 skip(uchar *l, int n1, int n2, int bflag, int endfield)
937 {
938 int i, c, tc;
939 Rune r;
940
941 if(endfield && n1 < 0)
942 return 0;
943
944 c = *l++;
945 tc = args.tabchar;
946 if(tc) {
947 if(tc < Runeself) {
948 for(i=n1; i>0; i--) {
949 while(c != tc) {
950 if(c == '\n')
951 return 0;
952 c = *l++;
953 }
954 if(!(endfield && i == 1))
955 c = *l++;
956 }
957 } else {
958 l--;
959 l += chartorune(&r, (char*)l);
960 for(i=n1; i>0; i--) {
961 while(r != tc) {
962 if(r == '\n')
963 return 0;
964 l += chartorune(&r, (char*)l);
965 }
966 if(!(endfield && i == 1))
967 l += chartorune(&r, (char*)l);
968 }
969 c = r;
970 }
971 } else {
972 for(i=n1; i>0; i--) {
973 while(c == ' ' || c == '\t')
974 c = *l++;
975 while(c != ' ' && c != '\t') {
976 if(c == '\n')
977 return 0;
978 c = *l++;
979 }
980 }
981 }
982
983 if(bflag)
984 while(c == ' ' || c == '\t')
985 c = *l++;
986
987 l--;
988 for(i=n2; i>0; i--) {
989 c = *l;
990 if(c < Runeself) {
991 if(c == '\n')
992 return 0;
993 l++;
994 continue;
995 }
996 l += chartorune(&r, (char*)l);
997 }
998 return l;
999 }
1000
1001 void
1002 dokey_gn(Key *k, uchar *lp, uchar *lpe, Field *f)
1003 {
1004 uchar *kp;
1005 int c, cl, dp;
1006 int state, nzero, exp, expsign, rflag;
1007
1008 cl = k->klen + 3;
1009 kp = k->key + cl; /* skip place for sign, exponent[2] */
1010
1011 nzero = 0; /* number of trailing zeros */
1012 exp = 0; /* value of the exponent */
1013 expsign = 0; /* sign of the exponent */
1014 dp = 0x4040; /* location of decimal point */
1015 rflag = f->flags&Rflag; /* xor of rflag and - sign */
1016 state = NSstart;
1017
1018 for(;; lp++) {
1019 if(lp >= lpe)
1020 break;
1021 c = *lp;
1022
1023 if(c == ' ' || c == '\t') {
1024 switch(state) {
1025 case NSstart:
1026 case NSsign:
1027 continue;
1028 }
1029 break;
1030 }
1031 if(c == '+' || c == '-') {
1032 switch(state) {
1033 case NSstart:
1034 state = NSsign;
1035 if(c == '-')
1036 rflag = !rflag;
1037 continue;
1038 case NSexp:
1039 state = NSexpsign;
1040 if(c == '-')
1041 expsign = 1;
1042 continue;
1043 }
1044 break;
1045 }
1046 if(c == '0') {
1047 switch(state) {
1048 case NSdigit:
1049 if(rflag)
1050 c = ~c;
1051 *kp++ = c;
1052 cl++;
1053 nzero++;
1054 dp++;
1055 state = NSdigit;
1056 continue;
1057 case NSfract:
1058 if(rflag)
1059 c = ~c;
1060 *kp++ = c;
1061 cl++;
1062 nzero++;
1063 state = NSfract;
1064 continue;
1065 case NSstart:
1066 case NSsign:
1067 case NSzero:
1068 state = NSzero;
1069 continue;
1070 case NSzerofract:
1071 case NSpoint:
1072 dp--;
1073 state = NSzerofract;
1074 continue;
1075 case NSexpsign:
1076 case NSexp:
1077 case NSexpdigit:
1078 exp = exp*10 + (c - '0');
1079 state = NSexpdigit;
1080 continue;
1081 }
1082 break;
1083 }
1084 if(c >= '1' && c <= '9') {
1085 switch(state) {
1086 case NSzero:
1087 case NSstart:
1088 case NSsign:
1089 case NSdigit:
1090 if(rflag)
1091 c = ~c;
1092 *kp++ = c;
1093 cl++;
1094 nzero = 0;
1095 dp++;
1096 state = NSdigit;
1097 continue;
1098 case NSzerofract:
1099 case NSpoint:
1100 case NSfract:
1101 if(rflag)
1102 c = ~c;
1103 *kp++ = c;
1104 cl++;
1105 nzero = 0;
1106 state = NSfract;
1107 continue;
1108 case NSexpsign:
1109 case NSexp:
1110 case NSexpdigit:
1111 exp = exp*10 + (c - '0');
1112 state = NSexpdigit;
1113 continue;
1114 }
1115 break;
1116 }
1117 if(c == '.') {
1118 switch(state) {
1119 case NSstart:
1120 case NSsign:
1121 state = NSpoint;
1122 continue;
1123 case NSzero:
1124 state = NSzerofract;
1125 continue;
1126 case NSdigit:
1127 state = NSfract;
1128 continue;
1129 }
1130 break;
1131 }
1132 if((f->flags & Gflag) && (c == 'e' || c == 'E')) {
1133 switch(state) {
1134 case NSdigit:
1135 case NSfract:
1136 state = NSexp;
1137 continue;
1138 }
1139 break;
1140 }
1141 break;
1142 }
1143
1144 switch(state) {
1145 /*
1146 * result is zero
1147 */
1148 case NSstart:
1149 case NSsign:
1150 case NSzero:
1151 case NSzerofract:
1152 case NSpoint:
1153 kp = k->key + k->klen;
1154 k->klen += 2;
1155 kp[0] = 0x20; /* between + and - */
1156 kp[1] = 0;
1157 return;
1158 /*
1159 * result has exponent
1160 */
1161 case NSexpsign:
1162 case NSexp:
1163 case NSexpdigit:
1164 if(expsign)
1165 exp = -exp;
1166 dp += exp;
1167
1168 /*
1169 * result is fixed point number
1170 */
1171 case NSdigit:
1172 case NSfract:
1173 kp -= nzero;
1174 cl -= nzero;
1175 break;
1176 }
1177
1178 /*
1179 * end of number
1180 */
1181 c = 0;
1182 if(rflag)
1183 c = ~c;
1184 *kp = c;
1185
1186 /*
1187 * sign and exponent
1188 */
1189 c = 0x30;
1190 if(rflag) {
1191 c = 0x10;
1192 dp = ~dp;
1193 }
1194 kp = k->key + k->klen;
1195 kp[0] = c;
1196 kp[1] = (dp >> 8);
1197 kp[2] = dp;
1198 k->klen = cl+1;
1199 }
1200
1201 void
1202 dokey_m(Key *k, uchar *lp, uchar *lpe, Field *f)
1203 {
1204 uchar *kp;
1205 Rune r, place[3];
1206 int c, cl, pc;
1207 int rflag;
1208
1209 rflag = f->flags&Rflag;
1210 pc = 0;
1211
1212 cl = k->klen;
1213 kp = k->key + cl;
1214
1215 for(;;) {
1216 /*
1217 * get the character
1218 */
1219 if(lp >= lpe)
1220 break;
1221 c = *lp;
1222 if(c >= Runeself) {
1223 lp += chartorune(&r, (char*)lp);
1224 c = r;
1225 } else
1226 lp++;
1227
1228 if(c < nelem(f->mapto)) {
1229 c = f->mapto[c];
1230 if(c == 0)
1231 continue;
1232 }
1233 place[pc++] = c;
1234 if(pc < 3)
1235 continue;
1236 for(c=11; c>=0; c--)
1237 if(memcmp(month[c], place, sizeof(place)) == 0)
1238 break;
1239 c += 10;
1240 if(rflag)
1241 c = ~c;
1242 *kp++ = c;
1243 cl++;
1244 break;
1245 }
1246
1247 c = 0;
1248 if(rflag)
1249 c = ~c;
1250 *kp = c;
1251 k->klen = cl+1;
1252 }
1253
1254 void
1255 dokey_dfi(Key *k, uchar *lp, uchar *lpe, Field *f)
1256 {
1257 uchar *kp;
1258 Rune r;
1259 int c, cl, n, rflag;
1260
1261 cl = k->klen;
1262 kp = k->key + cl;
1263 rflag = f->flags & Rflag;
1264
1265 for(;;) {
1266 /*
1267 * get the character
1268 */
1269 if(lp >= lpe)
1270 break;
1271 c = *lp;
1272 if(c >= Runeself) {
1273 lp += chartorune(&r, (char*)lp);
1274 c = r;
1275 } else
1276 lp++;
1277
1278 /*
1279 * do the various mappings.
1280 * the common case is handled
1281 * completely by the table.
1282 */
1283 if(c != 0 && c < Runeself) {
1284 c = f->mapto[c];
1285 if(c) {
1286 *kp++ = c;
1287 cl++;
1288 }
1289 continue;
1290 }
1291
1292 /*
1293 * for characters out of range,
1294 * the table does not do Rflag.
1295 * ignore is based on mapto[255]
1296 */
1297 if(c != 0 && c < nelem(f->mapto)) {
1298 c = f->mapto[c];
1299 if(c == 0)
1300 continue;
1301 } else
1302 if(f->mapto[nelem(f->mapto)-1] == 0)
1303 continue;
1304
1305 /*
1306 * put it in the key
1307 */
1308 r = c;
1309 n = runetochar((char*)kp, &r);
1310 kp += n;
1311 cl += n;
1312 if(rflag)
1313 while(n > 0) {
1314 kp[-n] = ~kp[-n];
1315 n--;
1316 }
1317 }
1318
1319 /*
1320 * end of key
1321 */
1322 k->klen = cl+1;
1323 if(rflag) {
1324 *kp = ~0;
1325 return;
1326 }
1327 *kp = 0;
1328 }
1329
1330 void
1331 dokey_r(Key *k, uchar *lp, uchar *lpe, Field *f)
1332 {
1333 int cl, n;
1334 uchar *kp;
1335
1336 USED(f);
1337 n = lpe - lp;
1338 if(n < 0)
1339 n = 0;
1340 cl = k->klen;
1341 kp = k->key + cl;
1342 k->klen = cl+n+1;
1343
1344 lpe -= 3;
1345 while(lp < lpe) {
1346 kp[0] = ~lp[0];
1347 kp[1] = ~lp[1];
1348 kp[2] = ~lp[2];
1349 kp[3] = ~lp[3];
1350 kp += 4;
1351 lp += 4;
1352 }
1353
1354 lpe += 3;
1355 while(lp < lpe)
1356 *kp++ = ~*lp++;
1357 *kp = ~0;
1358 }
1359
1360 void
1361 dokey_(Key *k, uchar *lp, uchar *lpe, Field *f)
1362 {
1363 int n, cl;
1364 uchar *kp;
1365
1366 USED(f);
1367 n = lpe - lp;
1368 if(n < 0)
1369 n = 0;
1370 cl = k->klen;
1371 kp = k->key + cl;
1372 k->klen = cl+n+1;
1373 memmove(kp, lp, n);
1374 kp[n] = 0;
1375 }
1376
1377 void
1378 buildkey(Line *l)
1379 {
1380 Key *k;
1381 uchar *lp, *lpe;
1382 int ll, kl, cl, i, n;
1383 Field *f;
1384
1385 ll = l->llen - 1;
1386 kl = 0; /* allocated length */
1387 cl = 0; /* current length */
1388 k = 0;
1389
1390 for(i=1; i<=args.nfield; i++) {
1391 f = &args.field[i];
1392 lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0);
1393 if(lp == 0)
1394 lp = l->line + ll;
1395 lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1);
1396 if(lpe == 0)
1397 lpe = l->line + ll;
1398 n = (lpe - lp) + 1;
1399 if(n <= 0)
1400 n = 1;
1401 if(cl+(n+4) > kl) {
1402 kl = cl+(n+4);
1403 k = realloc(k, sizeof(Key) +
1404 (kl-1)*sizeof(k->key[0]));
1405 if(k == 0)
1406 nomem();
1407 }
1408 k->klen = cl;
1409 (*f->dokey)(k, lp, lpe, f);
1410 cl = k->klen;
1411 }
1412
1413 /*
1414 * global comparisons
1415 */
1416 if(!(args.uflag && cl > 0)) {
1417 f = &args.field[0];
1418 if(cl+(ll+4) > kl) {
1419 kl = cl+(ll+4);
1420 k = realloc(k, sizeof(Key) +
1421 (kl-1)*sizeof(k->key[0]));
1422 if(k == 0)
1423 nomem();
1424 }
1425 k->klen = cl;
1426 (*f->dokey)(k, l->line, l->line+ll, f);
1427 cl = k->klen;
1428 }
1429
1430 l->key = k;
1431 k->klen = cl;
1432
1433 if(args.vflag) {
1434 write(2, l->line, l->llen);
1435 for(i=0; i<k->klen; i++) {
1436 fprint(2, " %.2x", k->key[i]);
1437 if(k->key[i] == 0x00 || k->key[i] == 0xff)
1438 fprint(2, "\n");
1439 }
1440 }
1441 }
1442
1443 void
1444 makemapm(Field *f)
1445 {
1446 int i, c;
1447
1448 for(i=0; i<nelem(f->mapto); i++) {
1449 c = 1;
1450 if(i == ' ' || i == '\t')
1451 c = 0;
1452 if(i >= 'a' && i <= 'z')
1453 c = i + ('A' - 'a');
1454 if(i >= 'A' && i <= 'Z')
1455 c = i;
1456 f->mapto[i] = c;
1457 if(args.vflag) {
1458 if((i & 15) == 0)
1459 fprint(2, " ");
1460 fprint(2, " %.2x", c);
1461 if((i & 15) == 15)
1462 fprint(2, "\n");
1463 }
1464 }
1465 }
1466
1467 void
1468 makemapd(Field *f)
1469 {
1470 int i, j, c;
1471
1472 for(i=0; i<nelem(f->mapto); i++) {
1473 c = i;
1474 if(f->flags & Iflag)
1475 if(c < 040 || c > 0176)
1476 c = -1;
1477 if((f->flags & Wflag) && c >= 0)
1478 if(c == ' ' || c == '\t')
1479 c = -1;
1480 if((f->flags & Dflag) && c >= 0)
1481 if(!(c == ' ' || c == '\t' ||
1482 (c >= 'a' && c <= 'z') ||
1483 (c >= 'A' && c <= 'Z') ||
1484 (c >= '0' && c <= '9'))) {
1485 for(j=0; latinmap[j]; j+=3)
1486 if(c == latinmap[j+0] ||
1487 c == latinmap[j+1])
1488 break;
1489 if(latinmap[j] == 0)
1490 c = -1;
1491 }
1492 if((f->flags & Fflag) && c >= 0) {
1493 if(c >= 'a' && c <= 'z')
1494 c += 'A' - 'a';
1495 for(j=0; latinmap[j]; j+=3)
1496 if(c == latinmap[j+0] ||
1497 c == latinmap[j+1]) {
1498 c = latinmap[j+2];
1499 break;
1500 }
1501 }
1502 if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself)
1503 c = ~c & 0xff;
1504 if(c < 0)
1505 c = 0;
1506 f->mapto[i] = c;
1507 if(args.vflag) {
1508 if((i & 15) == 0)
1509 fprint(2, " ");
1510 fprint(2, " %.2x", c);
1511 if((i & 15) == 15)
1512 fprint(2, "\n");
1513 }
1514 }
1515 }
1516
1517 int latinmap[] =
1518 {
1519 /* lcase ucase fold */
1520 0xe0, 0xc0, 0x41, /* L'à',…
1521 0xe1, 0xc1, 0x41, /* L'á',…
1522 0xe2, 0xc2, 0x41, /* L'â',…
1523 0xe4, 0xc4, 0x41, /* L'ä',…
1524 0xe3, 0xc3, 0x41, /* L'ã',…
1525 0xe5, 0xc5, 0x41, /* L'å',…
1526 0xe8, 0xc8, 0x45, /* L'è',…
1527 0xe9, 0xc9, 0x45, /* L'é',…
1528 0xea, 0xca, 0x45, /* L'ê',…
1529 0xeb, 0xcb, 0x45, /* L'ë',…
1530 0xec, 0xcc, 0x49, /* L'ì',…
1531 0xed, 0xcd, 0x49, /* L'í',…
1532 0xee, 0xce, 0x49, /* L'î',…
1533 0xef, 0xcf, 0x49, /* L'ï',…
1534 0xf2, 0xd2, 0x4f, /* L'ò',…
1535 0xf3, 0xd3, 0x4f, /* L'ó',…
1536 0xf4, 0xd4, 0x4f, /* L'ô',…
1537 0xf6, 0xd6, 0x4f, /* L'ö',…
1538 0xf5, 0xd5, 0x4f, /* L'õ',…
1539 0xf8, 0xd8, 0x4f, /* L'ø',…
1540 0xf9, 0xd9, 0x55, /* L'ù',…
1541 0xfa, 0xda, 0x55, /* L'ú',…
1542 0xfb, 0xdb, 0x55, /* L'û',…
1543 0xfc, 0xdc, 0x55, /* L'ü',…
1544 0xe6, 0xc6, 0x41, /* L'æ',…
1545 0xf0, 0xd0, 0x44, /* L'ð',…
1546 0xf1, 0xd1, 0x4e, /* L'ñ',…
1547 0xfd, 0xdd, 0x59, /* L'ý',…
1548 0xe7, 0xc7, 0x43, /* L'ç',…
1549 0,
1550 };
1551
1552 Rune LJAN[] = { 'J', 'A', 'N', 0 };
1553 Rune LFEB[] = { 'F', 'E', 'B', 0 };
1554 Rune LMAR[] = { 'M', 'A', 'R', 0 };
1555 Rune LAPR[] = { 'A', 'P', 'R', 0 };
1556 Rune LMAY[] = { 'M', 'A', 'Y', 0 };
1557 Rune LJUN[] = { 'J', 'U', 'N', 0 };
1558 Rune LJUL[] = { 'J', 'U', 'L', 0 };
1559 Rune LAUG[] = { 'A', 'U', 'G', 0 };
1560 Rune LSEP[] = { 'S', 'E', 'P', 0 };
1561 Rune LOCT[] = { 'O', 'C', 'T', 0 };
1562 Rune LNOV[] = { 'N', 'O', 'V', 0 };
1563 Rune LDEC[] = { 'D', 'E', 'C', 0 };
1564
1565 Rune* month[12] =
1566 {
1567 LJAN,
1568 LFEB,
1569 LMAR,
1570 LAPR,
1571 LMAY,
1572 LJUN,
1573 LJUL,
1574 LAUG,
1575 LSEP,
1576 LOCT,
1577 LNOV,
1578 LDEC,
1579 };
1580
1581 /************** radix sort ***********/
1582
1583 enum
1584 {
1585 Threshold = 14
1586 };
1587
1588 void rsort4(Key***, ulong, int);
1589 void bsort4(Key***, ulong, int);
1590
1591 void
1592 sort4(void *a, ulong n)
1593 {
1594 if(n > Threshold)
1595 rsort4((Key***)a, n, 0);
1596 else
1597 bsort4((Key***)a, n, 0);
1598 }
1599
1600 void
1601 rsort4(Key ***a, ulong n, int b)
1602 {
1603 Key ***ea, ***t, ***u, **t1, **u1, *k;
1604 Key ***part[257];
1605 static long count[257];
1606 long clist[257+257], *cp, *cp1;
1607 int c, lowc, higc;
1608
1609 /*
1610 * pass 1 over all keys,
1611 * count the number of each key[b].
1612 * find low count and high count.
1613 */
1614 lowc = 256;
1615 higc = 0;
1616 ea = a+n;
1617 for(t=a; t<ea; t++) {
1618 k = **t;
1619 n = k->klen;
1620 if(b >= n) {
1621 count[256]++;
1622 continue;
1623 }
1624 c = k->key[b];
1625 n = count[c]++;
1626 if(n == 0) {
1627 if(c < lowc)
1628 lowc = c;
1629 if(c > higc)
1630 higc = c;
1631 }
1632 }
1633
1634 /*
1635 * pass 2 over all counts,
1636 * put partition pointers in part[c].
1637 * save compacted indexes and counts
1638 * in clist[].
1639 */
1640 t = a;
1641 n = count[256];
1642 clist[0] = n;
1643 part[256] = t;
1644 t += n;
1645
1646 cp1 = clist+1;
1647 cp = count+lowc;
1648 for(c=lowc; c<=higc; c++,cp++) {
1649 n = *cp;
1650 if(n) {
1651 cp1[0] = n;
1652 cp1[1] = c;
1653 cp1 += 2;
1654 part[c] = t;
1655 t += n;
1656 }
1657 }
1658 *cp1 = 0;
1659
1660 /*
1661 * pass 3 over all counts.
1662 * chase lowest pointer in each partition
1663 * around a permutation until it comes
1664 * back and is stored where it started.
1665 * static array, count[], should be
1666 * reduced to zero entries except maybe
1667 * count[256].
1668 */
1669 for(cp1=clist+1; cp1[0]; cp1+=2) {
1670 c = cp1[1];
1671 cp = count+c;
1672 while(*cp) {
1673 t1 = *part[c];
1674 for(;;) {
1675 k = *t1;
1676 n = 256;
1677 if(b < k->klen)
1678 n = k->key[b];
1679 u = part[n]++;
1680 count[n]--;
1681 u1 = *u;
1682 *u = t1;
1683 if(n == c)
1684 break;
1685 t1 = u1;
1686 }
1687 }
1688 }
1689
1690 /*
1691 * pass 4 over all partitions.
1692 * call recursively.
1693 */
1694 b++;
1695 t = a + clist[0];
1696 count[256] = 0;
1697 for(cp1=clist+1; n=cp1[0]; cp1+=2) {
1698 if(n > Threshold)
1699 rsort4(t, n, b);
1700 else
1701 if(n > 1)
1702 bsort4(t, n, b);
1703 t += n;
1704 }
1705 }
1706
1707 /*
1708 * bubble sort to pick up
1709 * the pieces.
1710 */
1711 void
1712 bsort4(Key ***a, ulong n, int b)
1713 {
1714 Key ***i, ***j, ***k, ***l, **t;
1715 Key *ka, *kb;
1716 int n1, n2;
1717
1718 l = a+n;
1719 j = a;
1720
1721 loop:
1722 i = j;
1723 j++;
1724 if(j >= l)
1725 return;
1726
1727 ka = **i;
1728 kb = **j;
1729 n1 = ka->klen - b;
1730 n2 = kb->klen - b;
1731 if(n1 > n2)
1732 n1 = n2;
1733 if(n1 <= 0)
1734 goto loop;
1735 n2 = ka->key[b] - kb->key[b];
1736 if(n2 == 0)
1737 n2 = memcmp(ka->key+b, kb->key+b, n1);
1738 if(n2 <= 0)
1739 goto loop;
1740
1741 for(;;) {
1742 k = i+1;
1743
1744 t = *k;
1745 *k = *i;
1746 *i = t;
1747
1748 if(i <= a)
1749 goto loop;
1750
1751 i--;
1752 ka = **i;
1753 kb = *t;
1754 n1 = ka->klen - b;
1755 n2 = kb->klen - b;
1756 if(n1 > n2)
1757 n1 = n2;
1758 if(n1 <= 0)
1759 goto loop;
1760 n2 = ka->key[b] - kb->key[b];
1761 if(n2 == 0)
1762 n2 = memcmp(ka->key+b, kb->key+b, n1);
1763 if(n2 <= 0)
1764 goto loop;
1765 }
1766 }
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.