sort.c - 9base - revived minimalist port of Plan 9 userland to Unix | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
sort.c (29541B) | |
--- | |
1 #include <u.h> | |
2 #include <libc.h> | |
3 #include <bio.h> | |
4 | |
5 /* | |
6 bugs: | |
7 00/ff for end of file can conflict with 00/ff characters | |
8 */ | |
9 | |
10 enum | |
11 { | |
12 Nline = 500000, /* default max number of l… | |
13 Nmerge = 10, /* max number of temp… | |
14 Nfield = 20, /* max number of argu… | |
15 | |
16 Bflag = 1<<0, /* flags per field */ | |
17 B1flag = 1<<1, | |
18 | |
19 Dflag = 1<<2, | |
20 Fflag = 1<<3, | |
21 Gflag = 1<<4, | |
22 Iflag = 1<<5, | |
23 Mflag = 1<<6, | |
24 Nflag = 1<<7, | |
25 Rflag = 1<<8, | |
26 Wflag = 1<<9, | |
27 | |
28 NSstart = 0, /* states for number … | |
29 NSsign, | |
30 NSzero, | |
31 NSdigit, | |
32 NSpoint, | |
33 NSfract, | |
34 NSzerofract, | |
35 NSexp, | |
36 NSexpsign, | |
37 NSexpdigit | |
38 }; | |
39 | |
40 typedef struct Line Line; | |
41 typedef struct Key Key; | |
42 typedef struct Merge Merge; | |
43 typedef struct Field Field; | |
44 | |
45 struct Line | |
46 { | |
47 Key* key; | |
48 int llen; /* always >= 1 */ | |
49 uchar line[1]; /* always ends in '\n' */ | |
50 }; | |
51 | |
52 struct Merge | |
53 { | |
54 Key* key; /* copy of line->key so (Line*) … | |
55 Line* line; /* line at the head of a merge… | |
56 int fd; /* file descriptor */ | |
57 Biobuf b; /* iobuf for reading a temp file… | |
58 }; | |
59 | |
60 struct Key | |
61 { | |
62 int klen; | |
63 uchar key[1]; | |
64 }; | |
65 | |
66 struct Field | |
67 { | |
68 int beg1; | |
69 int beg2; | |
70 int end1; | |
71 int end2; | |
72 | |
73 long flags; | |
74 uchar mapto[256]; | |
75 | |
76 void (*dokey)(Key*, uchar*, uchar*, Field*); | |
77 }; | |
78 | |
79 struct args | |
80 { | |
81 char* ofile; | |
82 char* tname; | |
83 Rune tabchar; | |
84 char cflag; | |
85 char uflag; | |
86 char vflag; | |
87 int nfield; | |
88 int nfile; | |
89 Field field[Nfield]; | |
90 | |
91 Line** linep; | |
92 long nline; /* number of lines in … | |
93 long lineno; /* overall ordinal fo… | |
94 int ntemp; | |
95 long mline; /* max lines per file … | |
96 } args; | |
97 | |
98 extern int latinmap[]; | |
99 extern Rune* month[12]; | |
100 | |
101 void buildkey(Line*); | |
102 void doargs(int, char*[]); | |
103 void dofield(char*, int*, int*, int, int); | |
104 void dofile(Biobuf*); | |
105 void dokey_(Key*, uchar*, uchar*, Field*); | |
106 void dokey_dfi(Key*, uchar*, uchar*, Field*); | |
107 void dokey_gn(Key*, uchar*, uchar*, Field*); | |
108 void dokey_m(Key*, uchar*, uchar*, Field*); | |
109 void dokey_r(Key*, uchar*, uchar*, Field*); | |
110 void done(char*); | |
111 int kcmp(Key*, Key*); | |
112 void makemapd(Field*); | |
113 void makemapm(Field*); | |
114 void mergefiles(int, int, Biobuf*); | |
115 void mergeout(Biobuf*); | |
116 void newfield(void); | |
117 Line* newline(Biobuf*); | |
118 void nomem(void); | |
119 void notifyf(void*, char*); | |
120 void printargs(void); | |
121 void printout(Biobuf*); | |
122 void setfield(int, int); | |
123 uchar* skip(uchar*, int, int, int, int); | |
124 void sort4(void*, ulong); | |
125 char* tempfile(int); | |
126 void tempout(void); | |
127 void lineout(Biobuf*, Line*); | |
128 | |
129 void | |
130 main(int argc, char *argv[]) | |
131 { | |
132 int i, f; | |
133 char *s; | |
134 Biobuf bbuf; | |
135 | |
136 notify(notifyf); /**/ | |
137 doargs(argc, argv); | |
138 if(args.vflag) | |
139 printargs(); | |
140 | |
141 for(i=1; i<argc; i++) { | |
142 s = argv[i]; | |
143 if(s == 0) | |
144 continue; | |
145 if(strcmp(s, "-") == 0) { | |
146 Binit(&bbuf, 0, OREAD); | |
147 dofile(&bbuf); | |
148 Bterm(&bbuf); | |
149 continue; | |
150 } | |
151 f = open(s, OREAD); | |
152 if(f < 0) { | |
153 fprint(2, "sort: open %s: %r\n", s); | |
154 done("open"); | |
155 } | |
156 Binit(&bbuf, f, OREAD); | |
157 dofile(&bbuf); | |
158 Bterm(&bbuf); | |
159 close(f); | |
160 } | |
161 if(args.nfile == 0) { | |
162 Binit(&bbuf, 0, OREAD); | |
163 dofile(&bbuf); | |
164 Bterm(&bbuf); | |
165 } | |
166 if(args.cflag) | |
167 done(0); | |
168 if(args.vflag) | |
169 fprint(2, "=========\n"); | |
170 | |
171 f = 1; | |
172 if(args.ofile) { | |
173 f = create(args.ofile, OWRITE, 0666); | |
174 if(f < 0) { | |
175 fprint(2, "sort: create %s: %r\n", args.ofile); | |
176 done("create"); | |
177 } | |
178 } | |
179 | |
180 Binit(&bbuf, f, OWRITE); | |
181 if(args.ntemp) { | |
182 tempout(); | |
183 mergeout(&bbuf); | |
184 } else { | |
185 printout(&bbuf); | |
186 } | |
187 Bterm(&bbuf); | |
188 done(0); | |
189 } | |
190 | |
191 void | |
192 dofile(Biobuf *b) | |
193 { | |
194 Line *l, *ol; | |
195 int n; | |
196 | |
197 if(args.cflag) { | |
198 ol = newline(b); | |
199 if(ol == 0) | |
200 return; | |
201 for(;;) { | |
202 l = newline(b); | |
203 if(l == 0) | |
204 break; | |
205 n = kcmp(ol->key, l->key); | |
206 if(n > 0 || (n == 0 && args.uflag)) { | |
207 fprint(2, "sort: -c file not in sort\n")… | |
208 done("order"); | |
209 } | |
210 free(ol->key); | |
211 free(ol); | |
212 ol = l; | |
213 } | |
214 return; | |
215 } | |
216 | |
217 if(args.linep == 0) { | |
218 args.linep = malloc(args.mline * sizeof(args.linep)); | |
219 if(args.linep == 0) | |
220 nomem(); | |
221 } | |
222 for(;;) { | |
223 l = newline(b); | |
224 if(l == 0) | |
225 break; | |
226 if(args.nline >= args.mline) | |
227 tempout(); | |
228 args.linep[args.nline] = l; | |
229 args.nline++; | |
230 args.lineno++; | |
231 } | |
232 } | |
233 | |
234 void | |
235 notifyf(void *a, char *s) | |
236 { | |
237 USED(a); | |
238 if(strcmp(s, "interrupt") == 0) | |
239 done(0); | |
240 if(strcmp(s, "hangup") == 0) | |
241 done(0); | |
242 if(strcmp(s, "kill") == 0) | |
243 done(0); | |
244 if(strncmp(s, "sys: write on closed pipe", 25) == 0) | |
245 done(0); | |
246 noted(NDFLT); | |
247 } | |
248 | |
249 Line* | |
250 newline(Biobuf *b) | |
251 { | |
252 Line *l; | |
253 char *p; | |
254 int n, c; | |
255 | |
256 p = Brdline(b, '\n'); | |
257 n = Blinelen(b); | |
258 if(p == 0) { | |
259 if(n == 0) | |
260 return 0; | |
261 l = 0; | |
262 for(n=0;;) { | |
263 if((n & 31) == 0) { | |
264 l = realloc(l, sizeof(Line) + | |
265 (n+31)*sizeof(l->line[0])); | |
266 if(l == 0) | |
267 nomem(); | |
268 } | |
269 c = Bgetc(b); | |
270 if(c < 0) { | |
271 fprint(2, "sort: newline added\n"); | |
272 c = '\n'; | |
273 } | |
274 l->line[n++] = c; | |
275 if(c == '\n') | |
276 break; | |
277 } | |
278 l->llen = n; | |
279 buildkey(l); | |
280 return l; | |
281 } | |
282 l = malloc(sizeof(Line) + | |
283 (n-1)*sizeof(l->line[0])); | |
284 if(l == 0) | |
285 nomem(); | |
286 l->llen = n; | |
287 memmove(l->line, p, n); | |
288 buildkey(l); | |
289 return l; | |
290 } | |
291 | |
292 void | |
293 lineout(Biobuf *b, Line *l) | |
294 { | |
295 int n, m; | |
296 | |
297 n = l->llen; | |
298 m = Bwrite(b, l->line, n); | |
299 if(n != m) | |
300 exits("write"); | |
301 } | |
302 | |
303 void | |
304 tempout(void) | |
305 { | |
306 long n; | |
307 Line **lp, *l; | |
308 char *tf; | |
309 int f; | |
310 Biobuf tb; | |
311 | |
312 sort4(args.linep, args.nline); | |
313 tf = tempfile(args.ntemp); | |
314 args.ntemp++; | |
315 f = create(tf, OWRITE, 0666); | |
316 if(f < 0) { | |
317 fprint(2, "sort: create %s: %r\n", tf); | |
318 done("create"); | |
319 } | |
320 | |
321 Binit(&tb, f, OWRITE); | |
322 lp = args.linep; | |
323 for(n=args.nline; n>0; n--) { | |
324 l = *lp++; | |
325 lineout(&tb, l); | |
326 free(l->key); | |
327 free(l); | |
328 } | |
329 args.nline = 0; | |
330 Bterm(&tb); | |
331 close(f); | |
332 } | |
333 | |
334 void | |
335 done(char *xs) | |
336 { | |
337 int i; | |
338 | |
339 for(i=0; i<args.ntemp; i++) | |
340 remove(tempfile(i)); | |
341 exits(xs); | |
342 } | |
343 | |
344 void | |
345 nomem(void) | |
346 { | |
347 fprint(2, "sort: out of memory\n"); | |
348 done("mem"); | |
349 } | |
350 | |
351 char* | |
352 tempfile(int n) | |
353 { | |
354 static char file[100]; | |
355 static uint pid; | |
356 char *dir; | |
357 | |
358 dir = "/var/tmp"; | |
359 if(args.tname) | |
360 dir = args.tname; | |
361 if(strlen(dir) >= nelem(file)-20) { | |
362 fprint(2, "temp file directory name is too long: %s\n", … | |
363 done("tdir"); | |
364 } | |
365 | |
366 if(pid == 0) { | |
367 pid = getpid(); | |
368 if(pid == 0) { | |
369 pid = time(0); | |
370 if(pid == 0) | |
371 pid = 1; | |
372 } | |
373 } | |
374 | |
375 sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n); | |
376 return file; | |
377 } | |
378 | |
379 void | |
380 mergeout(Biobuf *b) | |
381 { | |
382 int n, i, f; | |
383 char *tf; | |
384 Biobuf tb; | |
385 | |
386 for(i=0; i<args.ntemp; i+=n) { | |
387 n = args.ntemp - i; | |
388 if(n > Nmerge) { | |
389 tf = tempfile(args.ntemp); | |
390 args.ntemp++; | |
391 f = create(tf, OWRITE, 0666); | |
392 if(f < 0) { | |
393 fprint(2, "sort: create %s: %r\n", tf); | |
394 done("create"); | |
395 } | |
396 Binit(&tb, f, OWRITE); | |
397 | |
398 n = Nmerge; | |
399 mergefiles(i, n, &tb); | |
400 | |
401 Bterm(&tb); | |
402 close(f); | |
403 } else | |
404 mergefiles(i, n, b); | |
405 } | |
406 } | |
407 | |
408 void | |
409 mergefiles(int t, int n, Biobuf *b) | |
410 { | |
411 Merge *m, *mp, **mmp; | |
412 Key *ok; | |
413 Line *l; | |
414 char *tf; | |
415 int i, f, nn; | |
416 | |
417 mmp = malloc(n*sizeof(*mmp)); | |
418 mp = malloc(n*sizeof(*mp)); | |
419 if(mmp == 0 || mp == 0) | |
420 nomem(); | |
421 | |
422 nn = 0; | |
423 m = mp; | |
424 for(i=0; i<n; i++,m++) { | |
425 tf = tempfile(t+i); | |
426 f = open(tf, OREAD); | |
427 if(f < 0) { | |
428 fprint(2, "sort: reopen %s: %r\n", tf); | |
429 done("open"); | |
430 } | |
431 m->fd = f; | |
432 Binit(&m->b, f, OREAD); | |
433 mmp[nn] = m; | |
434 | |
435 l = newline(&m->b); | |
436 if(l == 0) | |
437 continue; | |
438 nn++; | |
439 m->line = l; | |
440 m->key = l->key; | |
441 } | |
442 | |
443 ok = 0; | |
444 for(;;) { | |
445 sort4(mmp, nn); | |
446 m = *mmp; | |
447 if(nn == 0) | |
448 break; | |
449 for(;;) { | |
450 l = m->line; | |
451 if(args.uflag && ok && kcmp(ok, l->key) == 0) { | |
452 free(l->key); | |
453 free(l); | |
454 } else { | |
455 lineout(b, l); | |
456 if(ok) | |
457 free(ok); | |
458 ok = l->key; | |
459 free(l); | |
460 } | |
461 | |
462 l = newline(&m->b); | |
463 if(l == 0) { | |
464 nn--; | |
465 mmp[0] = mmp[nn]; | |
466 break; | |
467 } | |
468 m->line = l; | |
469 m->key = l->key; | |
470 if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0) | |
471 break; | |
472 } | |
473 } | |
474 if(ok) | |
475 free(ok); | |
476 | |
477 m = mp; | |
478 for(i=0; i<n; i++,m++) { | |
479 Bterm(&m->b); | |
480 close(m->fd); | |
481 } | |
482 | |
483 free(mp); | |
484 free(mmp); | |
485 } | |
486 | |
487 int | |
488 kcmp(Key *ka, Key *kb) | |
489 { | |
490 int n, m; | |
491 | |
492 /* | |
493 * set n to length of smaller key | |
494 */ | |
495 n = ka->klen; | |
496 m = kb->klen; | |
497 if(n > m) | |
498 n = m; | |
499 return memcmp(ka->key, kb->key, n); | |
500 } | |
501 | |
502 void | |
503 printout(Biobuf *b) | |
504 { | |
505 long n; | |
506 Line **lp, *l; | |
507 Key *ok; | |
508 | |
509 sort4(args.linep, args.nline); | |
510 lp = args.linep; | |
511 ok = 0; | |
512 for(n=args.nline; n>0; n--) { | |
513 l = *lp++; | |
514 if(args.uflag && ok && kcmp(ok, l->key) == 0) | |
515 continue; | |
516 lineout(b, l); | |
517 ok = l->key; | |
518 } | |
519 } | |
520 | |
521 void | |
522 setfield(int n, int c) | |
523 { | |
524 Field *f; | |
525 | |
526 f = &args.field[n]; | |
527 switch(c) { | |
528 default: | |
529 fprint(2, "sort: unknown option: field.%C\n", c); | |
530 done("option"); | |
531 case 'b': /* skip blanks */ | |
532 f->flags |= Bflag; | |
533 break; | |
534 case 'd': /* directory order */ | |
535 f->flags |= Dflag; | |
536 break; | |
537 case 'f': /* fold case */ | |
538 f->flags |= Fflag; | |
539 break; | |
540 case 'g': /* floating point -n case */ | |
541 f->flags |= Gflag; | |
542 break; | |
543 case 'i': /* ignore non-ascii */ | |
544 f->flags |= Iflag; | |
545 break; | |
546 case 'M': /* month */ | |
547 f->flags |= Mflag; | |
548 break; | |
549 case 'n': /* numbers */ | |
550 f->flags |= Nflag; | |
551 break; | |
552 case 'r': /* reverse */ | |
553 f->flags |= Rflag; | |
554 break; | |
555 case 'w': /* ignore white */ | |
556 f->flags |= Wflag; | |
557 break; | |
558 } | |
559 } | |
560 | |
561 void | |
562 dofield(char *s, int *n1, int *n2, int off1, int off2) | |
563 { | |
564 int c, n; | |
565 | |
566 c = *s++; | |
567 if(c >= '0' && c <= '9') { | |
568 n = 0; | |
569 while(c >= '0' && c <= '9') { | |
570 n = n*10 + (c-'0'); | |
571 c = *s++; | |
572 } | |
573 n -= off1; /* posix committee: rot in hell */ | |
574 if(n < 0) { | |
575 fprint(2, "sort: field offset must be positive\n… | |
576 done("option"); | |
577 } | |
578 *n1 = n; | |
579 } | |
580 if(c == '.') { | |
581 c = *s++; | |
582 if(c >= '0' && c <= '9') { | |
583 n = 0; | |
584 while(c >= '0' && c <= '9') { | |
585 n = n*10 + (c-'0'); | |
586 c = *s++; | |
587 } | |
588 n -= off2; | |
589 if(n < 0) { | |
590 fprint(2, "sort: character offset must b… | |
591 done("option"); | |
592 } | |
593 *n2 = n; | |
594 } | |
595 } | |
596 while(c != 0) { | |
597 setfield(args.nfield, c); | |
598 c = *s++; | |
599 } | |
600 } | |
601 | |
602 void | |
603 printargs(void) | |
604 { | |
605 int i, n; | |
606 Field *f; | |
607 char *prefix; | |
608 | |
609 fprint(2, "sort"); | |
610 for(i=0; i<=args.nfield; i++) { | |
611 f = &args.field[i]; | |
612 prefix = " -"; | |
613 if(i) { | |
614 n = f->beg1; | |
615 if(n >= 0) | |
616 fprint(2, " +%d", n); | |
617 else | |
618 fprint(2, " +*"); | |
619 n = f->beg2; | |
620 if(n >= 0) | |
621 fprint(2, ".%d", n); | |
622 else | |
623 fprint(2, ".*"); | |
624 | |
625 if(f->flags & B1flag) | |
626 fprint(2, "b"); | |
627 | |
628 n = f->end1; | |
629 if(n >= 0) | |
630 fprint(2, " -%d", n); | |
631 else | |
632 fprint(2, " -*"); | |
633 n = f->end2; | |
634 if(n >= 0) | |
635 fprint(2, ".%d", n); | |
636 else | |
637 fprint(2, ".*"); | |
638 prefix = ""; | |
639 } | |
640 if(f->flags & Bflag) | |
641 fprint(2, "%sb", prefix); | |
642 if(f->flags & Dflag) | |
643 fprint(2, "%sd", prefix); | |
644 if(f->flags & Fflag) | |
645 fprint(2, "%sf", prefix); | |
646 if(f->flags & Gflag) | |
647 fprint(2, "%sg", prefix); | |
648 if(f->flags & Iflag) | |
649 fprint(2, "%si", prefix); | |
650 if(f->flags & Mflag) | |
651 fprint(2, "%sM", prefix); | |
652 if(f->flags & Nflag) | |
653 fprint(2, "%sn", prefix); | |
654 if(f->flags & Rflag) | |
655 fprint(2, "%sr", prefix); | |
656 if(f->flags & Wflag) | |
657 fprint(2, "%sw", prefix); | |
658 } | |
659 if(args.cflag) | |
660 fprint(2, " -c"); | |
661 if(args.uflag) | |
662 fprint(2, " -u"); | |
663 if(args.ofile) | |
664 fprint(2, " -o %s", args.ofile); | |
665 if(args.mline != Nline) | |
666 fprint(2, " -l %ld", args.mline); | |
667 fprint(2, "\n"); | |
668 } | |
669 | |
670 void | |
671 newfield(void) | |
672 { | |
673 int n; | |
674 Field *f; | |
675 | |
676 n = args.nfield + 1; | |
677 if(n >= Nfield) { | |
678 fprint(2, "sort: too many fields specified\n"); | |
679 done("option"); | |
680 } | |
681 args.nfield = n; | |
682 f = &args.field[n]; | |
683 f->beg1 = -1; | |
684 f->beg2 = -1; | |
685 f->end1 = -1; | |
686 f->end2 = -1; | |
687 } | |
688 | |
689 void | |
690 doargs(int argc, char *argv[]) | |
691 { | |
692 int i, c, hadplus; | |
693 char *s, *p, *q; | |
694 Field *f; | |
695 | |
696 hadplus = 0; | |
697 args.mline = Nline; | |
698 for(i=1; i<argc; i++) { | |
699 s = argv[i]; | |
700 c = *s++; | |
701 if(c == '-') { | |
702 c = *s; | |
703 if(c == 0) /* forced end of arg m… | |
704 break; | |
705 argv[i] = 0; /* clobber args proc… | |
706 if(c == '.' || (c >= '0' && c <= '9')) { | |
707 if(!hadplus) | |
708 newfield(); | |
709 f = &args.field[args.nfield]; | |
710 dofield(s, &f->end1, &f->end2, 0, 0); | |
711 hadplus = 0; | |
712 continue; | |
713 } | |
714 | |
715 while(c = *s++) | |
716 switch(c) { | |
717 case '-': /* end of options */ | |
718 i = argc; | |
719 continue; | |
720 case 'T': /* temp directory */ | |
721 if(*s == 0) { | |
722 i++; | |
723 if(i < argc) { | |
724 args.tname = argv[i]; | |
725 argv[i] = 0; | |
726 } | |
727 } else | |
728 args.tname = s; | |
729 s = strchr(s, 0); | |
730 break; | |
731 case 'o': /* output file */ | |
732 if(*s == 0) { | |
733 i++; | |
734 if(i < argc) { | |
735 args.ofile = argv[i]; | |
736 argv[i] = 0; | |
737 } | |
738 } else | |
739 args.ofile = s; | |
740 s = strchr(s, 0); | |
741 break; | |
742 case 'k': /* posix key (what were they th… | |
743 p = 0; | |
744 if(*s == 0) { | |
745 i++; | |
746 if(i < argc) { | |
747 p = argv[i]; | |
748 argv[i] = 0; | |
749 } | |
750 } else | |
751 p = s; | |
752 s = strchr(s, 0); | |
753 if(p == 0) | |
754 break; | |
755 | |
756 newfield(); | |
757 q = strchr(p, ','); | |
758 if(q) | |
759 *q++ = 0; | |
760 f = &args.field[args.nfield]; | |
761 dofield(p, &f->beg1, &f->beg2, 1, 1); | |
762 if(f->flags & Bflag) { | |
763 f->flags |= B1flag; | |
764 f->flags &= ~Bflag; | |
765 } | |
766 if(q) { | |
767 dofield(q, &f->end1, &f->end2, 1… | |
768 if(f->end2 <= 0) | |
769 f->end1++; | |
770 } | |
771 hadplus = 0; | |
772 break; | |
773 case 't': /* tab character */ | |
774 if(*s == 0) { | |
775 i++; | |
776 if(i < argc) { | |
777 chartorune(&args.tabchar… | |
778 argv[i] = 0; | |
779 } | |
780 } else | |
781 s += chartorune(&args.tabchar, s… | |
782 if(args.tabchar == '\n') { | |
783 fprint(2, "aw come on, rob\n"); | |
784 done("rob"); | |
785 } | |
786 break; | |
787 case 'c': /* check order */ | |
788 args.cflag = 1; | |
789 break; | |
790 case 'u': /* unique */ | |
791 args.uflag = 1; | |
792 break; | |
793 case 'v': /* debugging noise */ | |
794 args.vflag = 1; | |
795 break; | |
796 case 'l': | |
797 if(*s == 0) { | |
798 i++; | |
799 if(i < argc) { | |
800 args.mline = atol(argv[i… | |
801 argv[i] = 0; | |
802 } | |
803 } else | |
804 args.mline = atol(s); | |
805 s = strchr(s, 0); | |
806 break; | |
807 | |
808 case 'M': /* month */ | |
809 case 'b': /* skip blanks */ | |
810 case 'd': /* directory order */ | |
811 case 'f': /* fold case */ | |
812 case 'g': /* floating numbers */ | |
813 case 'i': /* ignore non-ascii */ | |
814 case 'n': /* numbers */ | |
815 case 'r': /* reverse */ | |
816 case 'w': /* ignore white */ | |
817 if(args.nfield > 0) | |
818 fprint(2, "sort: global field se… | |
819 setfield(0, c); | |
820 break; | |
821 case 'm': | |
822 /* option m silently ignored but require… | |
823 break; | |
824 default: | |
825 fprint(2, "sort: unknown option: -%C\n",… | |
826 done("option"); | |
827 } | |
828 continue; | |
829 } | |
830 if(c == '+') { | |
831 argv[i] = 0; /* clobber args proc… | |
832 c = *s; | |
833 if(c == '.' || (c >= '0' && c <= '9')) { | |
834 newfield(); | |
835 f = &args.field[args.nfield]; | |
836 dofield(s, &f->beg1, &f->beg2, 0, 0); | |
837 if(f->flags & Bflag) { | |
838 f->flags |= B1flag; | |
839 f->flags &= ~Bflag; | |
840 } | |
841 hadplus = 1; | |
842 continue; | |
843 } | |
844 fprint(2, "sort: unknown option: +%C\n", c); | |
845 done("option"); | |
846 } | |
847 args.nfile++; | |
848 } | |
849 | |
850 for(i=0; i<=args.nfield; i++) { | |
851 f = &args.field[i]; | |
852 | |
853 /* | |
854 * global options apply to fields that | |
855 * specify no options | |
856 */ | |
857 if(f->flags == 0) { | |
858 f->flags = args.field[0].flags; | |
859 if(args.field[0].flags & Bflag) | |
860 f->flags |= B1flag; | |
861 } | |
862 | |
863 | |
864 /* | |
865 * build buildkey specification | |
866 */ | |
867 switch(f->flags & ~(Bflag|B1flag)) { | |
868 default: | |
869 fprint(2, "sort: illegal combination of flags: %… | |
870 done("option"); | |
871 case 0: | |
872 f->dokey = dokey_; | |
873 break; | |
874 case Rflag: | |
875 f->dokey = dokey_r; | |
876 break; | |
877 case Gflag: | |
878 case Nflag: | |
879 case Gflag|Nflag: | |
880 case Gflag|Rflag: | |
881 case Nflag|Rflag: | |
882 case Gflag|Nflag|Rflag: | |
883 f->dokey = dokey_gn; | |
884 break; | |
885 case Mflag: | |
886 case Mflag|Rflag: | |
887 f->dokey = dokey_m; | |
888 makemapm(f); | |
889 break; | |
890 case Dflag: | |
891 case Dflag|Fflag: | |
892 case Dflag|Fflag|Iflag: | |
893 case Dflag|Fflag|Iflag|Rflag: | |
894 case Dflag|Fflag|Iflag|Rflag|Wflag: | |
895 case Dflag|Fflag|Iflag|Wflag: | |
896 case Dflag|Fflag|Rflag: | |
897 case Dflag|Fflag|Rflag|Wflag: | |
898 case Dflag|Fflag|Wflag: | |
899 case Dflag|Iflag: | |
900 case Dflag|Iflag|Rflag: | |
901 case Dflag|Iflag|Rflag|Wflag: | |
902 case Dflag|Iflag|Wflag: | |
903 case Dflag|Rflag: | |
904 case Dflag|Rflag|Wflag: | |
905 case Dflag|Wflag: | |
906 case Fflag: | |
907 case Fflag|Iflag: | |
908 case Fflag|Iflag|Rflag: | |
909 case Fflag|Iflag|Rflag|Wflag: | |
910 case Fflag|Iflag|Wflag: | |
911 case Fflag|Rflag: | |
912 case Fflag|Rflag|Wflag: | |
913 case Fflag|Wflag: | |
914 case Iflag: | |
915 case Iflag|Rflag: | |
916 case Iflag|Rflag|Wflag: | |
917 case Iflag|Wflag: | |
918 case Wflag: | |
919 f->dokey = dokey_dfi; | |
920 makemapd(f); | |
921 break; | |
922 } | |
923 } | |
924 | |
925 /* | |
926 * random spot checks | |
927 */ | |
928 if(args.nfile > 1 && args.cflag) { | |
929 fprint(2, "sort: -c can have at most one input file\n"); | |
930 done("option"); | |
931 } | |
932 return; | |
933 } | |
934 | |
935 uchar* | |
936 skip(uchar *l, int n1, int n2, int bflag, int endfield) | |
937 { | |
938 int i, c, tc; | |
939 Rune r; | |
940 | |
941 if(endfield && n1 < 0) | |
942 return 0; | |
943 | |
944 c = *l++; | |
945 tc = args.tabchar; | |
946 if(tc) { | |
947 if(tc < Runeself) { | |
948 for(i=n1; i>0; i--) { | |
949 while(c != tc) { | |
950 if(c == '\n') | |
951 return 0; | |
952 c = *l++; | |
953 } | |
954 if(!(endfield && i == 1)) | |
955 c = *l++; | |
956 } | |
957 } else { | |
958 l--; | |
959 l += chartorune(&r, (char*)l); | |
960 for(i=n1; i>0; i--) { | |
961 while(r != tc) { | |
962 if(r == '\n') | |
963 return 0; | |
964 l += chartorune(&r, (char*)l); | |
965 } | |
966 if(!(endfield && i == 1)) | |
967 l += chartorune(&r, (char*)l); | |
968 } | |
969 c = r; | |
970 } | |
971 } else { | |
972 for(i=n1; i>0; i--) { | |
973 while(c == ' ' || c == '\t') | |
974 c = *l++; | |
975 while(c != ' ' && c != '\t') { | |
976 if(c == '\n') | |
977 return 0; | |
978 c = *l++; | |
979 } | |
980 } | |
981 } | |
982 | |
983 if(bflag) | |
984 while(c == ' ' || c == '\t') | |
985 c = *l++; | |
986 | |
987 l--; | |
988 for(i=n2; i>0; i--) { | |
989 c = *l; | |
990 if(c < Runeself) { | |
991 if(c == '\n') | |
992 return 0; | |
993 l++; | |
994 continue; | |
995 } | |
996 l += chartorune(&r, (char*)l); | |
997 } | |
998 return l; | |
999 } | |
1000 | |
1001 void | |
1002 dokey_gn(Key *k, uchar *lp, uchar *lpe, Field *f) | |
1003 { | |
1004 uchar *kp; | |
1005 int c, cl, dp; | |
1006 int state, nzero, exp, expsign, rflag; | |
1007 | |
1008 cl = k->klen + 3; | |
1009 kp = k->key + cl; /* skip place for sign, exponent[2] */ | |
1010 | |
1011 nzero = 0; /* number of trailing zeros */ | |
1012 exp = 0; /* value of the exponent */ | |
1013 expsign = 0; /* sign of the exponent */ | |
1014 dp = 0x4040; /* location of decimal point */ | |
1015 rflag = f->flags&Rflag; /* xor of rflag and - sign */ | |
1016 state = NSstart; | |
1017 | |
1018 for(;; lp++) { | |
1019 if(lp >= lpe) | |
1020 break; | |
1021 c = *lp; | |
1022 | |
1023 if(c == ' ' || c == '\t') { | |
1024 switch(state) { | |
1025 case NSstart: | |
1026 case NSsign: | |
1027 continue; | |
1028 } | |
1029 break; | |
1030 } | |
1031 if(c == '+' || c == '-') { | |
1032 switch(state) { | |
1033 case NSstart: | |
1034 state = NSsign; | |
1035 if(c == '-') | |
1036 rflag = !rflag; | |
1037 continue; | |
1038 case NSexp: | |
1039 state = NSexpsign; | |
1040 if(c == '-') | |
1041 expsign = 1; | |
1042 continue; | |
1043 } | |
1044 break; | |
1045 } | |
1046 if(c == '0') { | |
1047 switch(state) { | |
1048 case NSdigit: | |
1049 if(rflag) | |
1050 c = ~c; | |
1051 *kp++ = c; | |
1052 cl++; | |
1053 nzero++; | |
1054 dp++; | |
1055 state = NSdigit; | |
1056 continue; | |
1057 case NSfract: | |
1058 if(rflag) | |
1059 c = ~c; | |
1060 *kp++ = c; | |
1061 cl++; | |
1062 nzero++; | |
1063 state = NSfract; | |
1064 continue; | |
1065 case NSstart: | |
1066 case NSsign: | |
1067 case NSzero: | |
1068 state = NSzero; | |
1069 continue; | |
1070 case NSzerofract: | |
1071 case NSpoint: | |
1072 dp--; | |
1073 state = NSzerofract; | |
1074 continue; | |
1075 case NSexpsign: | |
1076 case NSexp: | |
1077 case NSexpdigit: | |
1078 exp = exp*10 + (c - '0'); | |
1079 state = NSexpdigit; | |
1080 continue; | |
1081 } | |
1082 break; | |
1083 } | |
1084 if(c >= '1' && c <= '9') { | |
1085 switch(state) { | |
1086 case NSzero: | |
1087 case NSstart: | |
1088 case NSsign: | |
1089 case NSdigit: | |
1090 if(rflag) | |
1091 c = ~c; | |
1092 *kp++ = c; | |
1093 cl++; | |
1094 nzero = 0; | |
1095 dp++; | |
1096 state = NSdigit; | |
1097 continue; | |
1098 case NSzerofract: | |
1099 case NSpoint: | |
1100 case NSfract: | |
1101 if(rflag) | |
1102 c = ~c; | |
1103 *kp++ = c; | |
1104 cl++; | |
1105 nzero = 0; | |
1106 state = NSfract; | |
1107 continue; | |
1108 case NSexpsign: | |
1109 case NSexp: | |
1110 case NSexpdigit: | |
1111 exp = exp*10 + (c - '0'); | |
1112 state = NSexpdigit; | |
1113 continue; | |
1114 } | |
1115 break; | |
1116 } | |
1117 if(c == '.') { | |
1118 switch(state) { | |
1119 case NSstart: | |
1120 case NSsign: | |
1121 state = NSpoint; | |
1122 continue; | |
1123 case NSzero: | |
1124 state = NSzerofract; | |
1125 continue; | |
1126 case NSdigit: | |
1127 state = NSfract; | |
1128 continue; | |
1129 } | |
1130 break; | |
1131 } | |
1132 if((f->flags & Gflag) && (c == 'e' || c == 'E')) { | |
1133 switch(state) { | |
1134 case NSdigit: | |
1135 case NSfract: | |
1136 state = NSexp; | |
1137 continue; | |
1138 } | |
1139 break; | |
1140 } | |
1141 break; | |
1142 } | |
1143 | |
1144 switch(state) { | |
1145 /* | |
1146 * result is zero | |
1147 */ | |
1148 case NSstart: | |
1149 case NSsign: | |
1150 case NSzero: | |
1151 case NSzerofract: | |
1152 case NSpoint: | |
1153 kp = k->key + k->klen; | |
1154 k->klen += 2; | |
1155 kp[0] = 0x20; /* between + and - */ | |
1156 kp[1] = 0; | |
1157 return; | |
1158 /* | |
1159 * result has exponent | |
1160 */ | |
1161 case NSexpsign: | |
1162 case NSexp: | |
1163 case NSexpdigit: | |
1164 if(expsign) | |
1165 exp = -exp; | |
1166 dp += exp; | |
1167 | |
1168 /* | |
1169 * result is fixed point number | |
1170 */ | |
1171 case NSdigit: | |
1172 case NSfract: | |
1173 kp -= nzero; | |
1174 cl -= nzero; | |
1175 break; | |
1176 } | |
1177 | |
1178 /* | |
1179 * end of number | |
1180 */ | |
1181 c = 0; | |
1182 if(rflag) | |
1183 c = ~c; | |
1184 *kp = c; | |
1185 | |
1186 /* | |
1187 * sign and exponent | |
1188 */ | |
1189 c = 0x30; | |
1190 if(rflag) { | |
1191 c = 0x10; | |
1192 dp = ~dp; | |
1193 } | |
1194 kp = k->key + k->klen; | |
1195 kp[0] = c; | |
1196 kp[1] = (dp >> 8); | |
1197 kp[2] = dp; | |
1198 k->klen = cl+1; | |
1199 } | |
1200 | |
1201 void | |
1202 dokey_m(Key *k, uchar *lp, uchar *lpe, Field *f) | |
1203 { | |
1204 uchar *kp; | |
1205 Rune r, place[3]; | |
1206 int c, cl, pc; | |
1207 int rflag; | |
1208 | |
1209 rflag = f->flags&Rflag; | |
1210 pc = 0; | |
1211 | |
1212 cl = k->klen; | |
1213 kp = k->key + cl; | |
1214 | |
1215 for(;;) { | |
1216 /* | |
1217 * get the character | |
1218 */ | |
1219 if(lp >= lpe) | |
1220 break; | |
1221 c = *lp; | |
1222 if(c >= Runeself) { | |
1223 lp += chartorune(&r, (char*)lp); | |
1224 c = r; | |
1225 } else | |
1226 lp++; | |
1227 | |
1228 if(c < nelem(f->mapto)) { | |
1229 c = f->mapto[c]; | |
1230 if(c == 0) | |
1231 continue; | |
1232 } | |
1233 place[pc++] = c; | |
1234 if(pc < 3) | |
1235 continue; | |
1236 for(c=11; c>=0; c--) | |
1237 if(memcmp(month[c], place, sizeof(place)) == 0) | |
1238 break; | |
1239 c += 10; | |
1240 if(rflag) | |
1241 c = ~c; | |
1242 *kp++ = c; | |
1243 cl++; | |
1244 break; | |
1245 } | |
1246 | |
1247 c = 0; | |
1248 if(rflag) | |
1249 c = ~c; | |
1250 *kp = c; | |
1251 k->klen = cl+1; | |
1252 } | |
1253 | |
1254 void | |
1255 dokey_dfi(Key *k, uchar *lp, uchar *lpe, Field *f) | |
1256 { | |
1257 uchar *kp; | |
1258 Rune r; | |
1259 int c, cl, n, rflag; | |
1260 | |
1261 cl = k->klen; | |
1262 kp = k->key + cl; | |
1263 rflag = f->flags & Rflag; | |
1264 | |
1265 for(;;) { | |
1266 /* | |
1267 * get the character | |
1268 */ | |
1269 if(lp >= lpe) | |
1270 break; | |
1271 c = *lp; | |
1272 if(c >= Runeself) { | |
1273 lp += chartorune(&r, (char*)lp); | |
1274 c = r; | |
1275 } else | |
1276 lp++; | |
1277 | |
1278 /* | |
1279 * do the various mappings. | |
1280 * the common case is handled | |
1281 * completely by the table. | |
1282 */ | |
1283 if(c != 0 && c < Runeself) { | |
1284 c = f->mapto[c]; | |
1285 if(c) { | |
1286 *kp++ = c; | |
1287 cl++; | |
1288 } | |
1289 continue; | |
1290 } | |
1291 | |
1292 /* | |
1293 * for characters out of range, | |
1294 * the table does not do Rflag. | |
1295 * ignore is based on mapto[255] | |
1296 */ | |
1297 if(c != 0 && c < nelem(f->mapto)) { | |
1298 c = f->mapto[c]; | |
1299 if(c == 0) | |
1300 continue; | |
1301 } else | |
1302 if(f->mapto[nelem(f->mapto)-1] == 0) | |
1303 continue; | |
1304 | |
1305 /* | |
1306 * put it in the key | |
1307 */ | |
1308 r = c; | |
1309 n = runetochar((char*)kp, &r); | |
1310 kp += n; | |
1311 cl += n; | |
1312 if(rflag) | |
1313 while(n > 0) { | |
1314 kp[-n] = ~kp[-n]; | |
1315 n--; | |
1316 } | |
1317 } | |
1318 | |
1319 /* | |
1320 * end of key | |
1321 */ | |
1322 k->klen = cl+1; | |
1323 if(rflag) { | |
1324 *kp = ~0; | |
1325 return; | |
1326 } | |
1327 *kp = 0; | |
1328 } | |
1329 | |
1330 void | |
1331 dokey_r(Key *k, uchar *lp, uchar *lpe, Field *f) | |
1332 { | |
1333 int cl, n; | |
1334 uchar *kp; | |
1335 | |
1336 USED(f); | |
1337 n = lpe - lp; | |
1338 if(n < 0) | |
1339 n = 0; | |
1340 cl = k->klen; | |
1341 kp = k->key + cl; | |
1342 k->klen = cl+n+1; | |
1343 | |
1344 lpe -= 3; | |
1345 while(lp < lpe) { | |
1346 kp[0] = ~lp[0]; | |
1347 kp[1] = ~lp[1]; | |
1348 kp[2] = ~lp[2]; | |
1349 kp[3] = ~lp[3]; | |
1350 kp += 4; | |
1351 lp += 4; | |
1352 } | |
1353 | |
1354 lpe += 3; | |
1355 while(lp < lpe) | |
1356 *kp++ = ~*lp++; | |
1357 *kp = ~0; | |
1358 } | |
1359 | |
1360 void | |
1361 dokey_(Key *k, uchar *lp, uchar *lpe, Field *f) | |
1362 { | |
1363 int n, cl; | |
1364 uchar *kp; | |
1365 | |
1366 USED(f); | |
1367 n = lpe - lp; | |
1368 if(n < 0) | |
1369 n = 0; | |
1370 cl = k->klen; | |
1371 kp = k->key + cl; | |
1372 k->klen = cl+n+1; | |
1373 memmove(kp, lp, n); | |
1374 kp[n] = 0; | |
1375 } | |
1376 | |
1377 void | |
1378 buildkey(Line *l) | |
1379 { | |
1380 Key *k; | |
1381 uchar *lp, *lpe; | |
1382 int ll, kl, cl, i, n; | |
1383 Field *f; | |
1384 | |
1385 ll = l->llen - 1; | |
1386 kl = 0; /* allocated length */ | |
1387 cl = 0; /* current length */ | |
1388 k = 0; | |
1389 | |
1390 for(i=1; i<=args.nfield; i++) { | |
1391 f = &args.field[i]; | |
1392 lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0); | |
1393 if(lp == 0) | |
1394 lp = l->line + ll; | |
1395 lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1); | |
1396 if(lpe == 0) | |
1397 lpe = l->line + ll; | |
1398 n = (lpe - lp) + 1; | |
1399 if(n <= 0) | |
1400 n = 1; | |
1401 if(cl+(n+4) > kl) { | |
1402 kl = cl+(n+4); | |
1403 k = realloc(k, sizeof(Key) + | |
1404 (kl-1)*sizeof(k->key[0])); | |
1405 if(k == 0) | |
1406 nomem(); | |
1407 } | |
1408 k->klen = cl; | |
1409 (*f->dokey)(k, lp, lpe, f); | |
1410 cl = k->klen; | |
1411 } | |
1412 | |
1413 /* | |
1414 * global comparisons | |
1415 */ | |
1416 if(!(args.uflag && cl > 0)) { | |
1417 f = &args.field[0]; | |
1418 if(cl+(ll+4) > kl) { | |
1419 kl = cl+(ll+4); | |
1420 k = realloc(k, sizeof(Key) + | |
1421 (kl-1)*sizeof(k->key[0])); | |
1422 if(k == 0) | |
1423 nomem(); | |
1424 } | |
1425 k->klen = cl; | |
1426 (*f->dokey)(k, l->line, l->line+ll, f); | |
1427 cl = k->klen; | |
1428 } | |
1429 | |
1430 l->key = k; | |
1431 k->klen = cl; | |
1432 | |
1433 if(args.vflag) { | |
1434 write(2, l->line, l->llen); | |
1435 for(i=0; i<k->klen; i++) { | |
1436 fprint(2, " %.2x", k->key[i]); | |
1437 if(k->key[i] == 0x00 || k->key[i] == 0xff) | |
1438 fprint(2, "\n"); | |
1439 } | |
1440 } | |
1441 } | |
1442 | |
1443 void | |
1444 makemapm(Field *f) | |
1445 { | |
1446 int i, c; | |
1447 | |
1448 for(i=0; i<nelem(f->mapto); i++) { | |
1449 c = 1; | |
1450 if(i == ' ' || i == '\t') | |
1451 c = 0; | |
1452 if(i >= 'a' && i <= 'z') | |
1453 c = i + ('A' - 'a'); | |
1454 if(i >= 'A' && i <= 'Z') | |
1455 c = i; | |
1456 f->mapto[i] = c; | |
1457 if(args.vflag) { | |
1458 if((i & 15) == 0) | |
1459 fprint(2, " "); | |
1460 fprint(2, " %.2x", c); | |
1461 if((i & 15) == 15) | |
1462 fprint(2, "\n"); | |
1463 } | |
1464 } | |
1465 } | |
1466 | |
1467 void | |
1468 makemapd(Field *f) | |
1469 { | |
1470 int i, j, c; | |
1471 | |
1472 for(i=0; i<nelem(f->mapto); i++) { | |
1473 c = i; | |
1474 if(f->flags & Iflag) | |
1475 if(c < 040 || c > 0176) | |
1476 c = -1; | |
1477 if((f->flags & Wflag) && c >= 0) | |
1478 if(c == ' ' || c == '\t') | |
1479 c = -1; | |
1480 if((f->flags & Dflag) && c >= 0) | |
1481 if(!(c == ' ' || c == '\t' || | |
1482 (c >= 'a' && c <= 'z') || | |
1483 (c >= 'A' && c <= 'Z') || | |
1484 (c >= '0' && c <= '9'))) { | |
1485 for(j=0; latinmap[j]; j+=3) | |
1486 if(c == latinmap[j+0] || | |
1487 c == latinmap[j+1]) | |
1488 break; | |
1489 if(latinmap[j] == 0) | |
1490 c = -1; | |
1491 } | |
1492 if((f->flags & Fflag) && c >= 0) { | |
1493 if(c >= 'a' && c <= 'z') | |
1494 c += 'A' - 'a'; | |
1495 for(j=0; latinmap[j]; j+=3) | |
1496 if(c == latinmap[j+0] || | |
1497 c == latinmap[j+1]) { | |
1498 c = latinmap[j+2]; | |
1499 break; | |
1500 } | |
1501 } | |
1502 if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself) | |
1503 c = ~c & 0xff; | |
1504 if(c < 0) | |
1505 c = 0; | |
1506 f->mapto[i] = c; | |
1507 if(args.vflag) { | |
1508 if((i & 15) == 0) | |
1509 fprint(2, " "); | |
1510 fprint(2, " %.2x", c); | |
1511 if((i & 15) == 15) | |
1512 fprint(2, "\n"); | |
1513 } | |
1514 } | |
1515 } | |
1516 | |
1517 int latinmap[] = | |
1518 { | |
1519 /* lcase ucase fold */ | |
1520 0xe0, 0xc0, 0x41, /* L'à',… | |
1521 0xe1, 0xc1, 0x41, /* L'á',… | |
1522 0xe2, 0xc2, 0x41, /* L'â',… | |
1523 0xe4, 0xc4, 0x41, /* L'ä',… | |
1524 0xe3, 0xc3, 0x41, /* L'ã',… | |
1525 0xe5, 0xc5, 0x41, /* L'å',… | |
1526 0xe8, 0xc8, 0x45, /* L'è',… | |
1527 0xe9, 0xc9, 0x45, /* L'é',… | |
1528 0xea, 0xca, 0x45, /* L'ê',… | |
1529 0xeb, 0xcb, 0x45, /* L'ë',… | |
1530 0xec, 0xcc, 0x49, /* L'ì',… | |
1531 0xed, 0xcd, 0x49, /* L'í',… | |
1532 0xee, 0xce, 0x49, /* L'î',… | |
1533 0xef, 0xcf, 0x49, /* L'ï',… | |
1534 0xf2, 0xd2, 0x4f, /* L'ò',… | |
1535 0xf3, 0xd3, 0x4f, /* L'ó',… | |
1536 0xf4, 0xd4, 0x4f, /* L'ô',… | |
1537 0xf6, 0xd6, 0x4f, /* L'ö',… | |
1538 0xf5, 0xd5, 0x4f, /* L'õ',… | |
1539 0xf8, 0xd8, 0x4f, /* L'ø',… | |
1540 0xf9, 0xd9, 0x55, /* L'ù',… | |
1541 0xfa, 0xda, 0x55, /* L'ú',… | |
1542 0xfb, 0xdb, 0x55, /* L'û',… | |
1543 0xfc, 0xdc, 0x55, /* L'ü',… | |
1544 0xe6, 0xc6, 0x41, /* L'æ',… | |
1545 0xf0, 0xd0, 0x44, /* L'ð',… | |
1546 0xf1, 0xd1, 0x4e, /* L'ñ',… | |
1547 0xfd, 0xdd, 0x59, /* L'ý',… | |
1548 0xe7, 0xc7, 0x43, /* L'ç',… | |
1549 0, | |
1550 }; | |
1551 | |
1552 Rune LJAN[] = { 'J', 'A', 'N', 0 }; | |
1553 Rune LFEB[] = { 'F', 'E', 'B', 0 }; | |
1554 Rune LMAR[] = { 'M', 'A', 'R', 0 }; | |
1555 Rune LAPR[] = { 'A', 'P', 'R', 0 }; | |
1556 Rune LMAY[] = { 'M', 'A', 'Y', 0 }; | |
1557 Rune LJUN[] = { 'J', 'U', 'N', 0 }; | |
1558 Rune LJUL[] = { 'J', 'U', 'L', 0 }; | |
1559 Rune LAUG[] = { 'A', 'U', 'G', 0 }; | |
1560 Rune LSEP[] = { 'S', 'E', 'P', 0 }; | |
1561 Rune LOCT[] = { 'O', 'C', 'T', 0 }; | |
1562 Rune LNOV[] = { 'N', 'O', 'V', 0 }; | |
1563 Rune LDEC[] = { 'D', 'E', 'C', 0 }; | |
1564 | |
1565 Rune* month[12] = | |
1566 { | |
1567 LJAN, | |
1568 LFEB, | |
1569 LMAR, | |
1570 LAPR, | |
1571 LMAY, | |
1572 LJUN, | |
1573 LJUL, | |
1574 LAUG, | |
1575 LSEP, | |
1576 LOCT, | |
1577 LNOV, | |
1578 LDEC, | |
1579 }; | |
1580 | |
1581 /************** radix sort ***********/ | |
1582 | |
1583 enum | |
1584 { | |
1585 Threshold = 14 | |
1586 }; | |
1587 | |
1588 void rsort4(Key***, ulong, int); | |
1589 void bsort4(Key***, ulong, int); | |
1590 | |
1591 void | |
1592 sort4(void *a, ulong n) | |
1593 { | |
1594 if(n > Threshold) | |
1595 rsort4((Key***)a, n, 0); | |
1596 else | |
1597 bsort4((Key***)a, n, 0); | |
1598 } | |
1599 | |
1600 void | |
1601 rsort4(Key ***a, ulong n, int b) | |
1602 { | |
1603 Key ***ea, ***t, ***u, **t1, **u1, *k; | |
1604 Key ***part[257]; | |
1605 static long count[257]; | |
1606 long clist[257+257], *cp, *cp1; | |
1607 int c, lowc, higc; | |
1608 | |
1609 /* | |
1610 * pass 1 over all keys, | |
1611 * count the number of each key[b]. | |
1612 * find low count and high count. | |
1613 */ | |
1614 lowc = 256; | |
1615 higc = 0; | |
1616 ea = a+n; | |
1617 for(t=a; t<ea; t++) { | |
1618 k = **t; | |
1619 n = k->klen; | |
1620 if(b >= n) { | |
1621 count[256]++; | |
1622 continue; | |
1623 } | |
1624 c = k->key[b]; | |
1625 n = count[c]++; | |
1626 if(n == 0) { | |
1627 if(c < lowc) | |
1628 lowc = c; | |
1629 if(c > higc) | |
1630 higc = c; | |
1631 } | |
1632 } | |
1633 | |
1634 /* | |
1635 * pass 2 over all counts, | |
1636 * put partition pointers in part[c]. | |
1637 * save compacted indexes and counts | |
1638 * in clist[]. | |
1639 */ | |
1640 t = a; | |
1641 n = count[256]; | |
1642 clist[0] = n; | |
1643 part[256] = t; | |
1644 t += n; | |
1645 | |
1646 cp1 = clist+1; | |
1647 cp = count+lowc; | |
1648 for(c=lowc; c<=higc; c++,cp++) { | |
1649 n = *cp; | |
1650 if(n) { | |
1651 cp1[0] = n; | |
1652 cp1[1] = c; | |
1653 cp1 += 2; | |
1654 part[c] = t; | |
1655 t += n; | |
1656 } | |
1657 } | |
1658 *cp1 = 0; | |
1659 | |
1660 /* | |
1661 * pass 3 over all counts. | |
1662 * chase lowest pointer in each partition | |
1663 * around a permutation until it comes | |
1664 * back and is stored where it started. | |
1665 * static array, count[], should be | |
1666 * reduced to zero entries except maybe | |
1667 * count[256]. | |
1668 */ | |
1669 for(cp1=clist+1; cp1[0]; cp1+=2) { | |
1670 c = cp1[1]; | |
1671 cp = count+c; | |
1672 while(*cp) { | |
1673 t1 = *part[c]; | |
1674 for(;;) { | |
1675 k = *t1; | |
1676 n = 256; | |
1677 if(b < k->klen) | |
1678 n = k->key[b]; | |
1679 u = part[n]++; | |
1680 count[n]--; | |
1681 u1 = *u; | |
1682 *u = t1; | |
1683 if(n == c) | |
1684 break; | |
1685 t1 = u1; | |
1686 } | |
1687 } | |
1688 } | |
1689 | |
1690 /* | |
1691 * pass 4 over all partitions. | |
1692 * call recursively. | |
1693 */ | |
1694 b++; | |
1695 t = a + clist[0]; | |
1696 count[256] = 0; | |
1697 for(cp1=clist+1; n=cp1[0]; cp1+=2) { | |
1698 if(n > Threshold) | |
1699 rsort4(t, n, b); | |
1700 else | |
1701 if(n > 1) | |
1702 bsort4(t, n, b); | |
1703 t += n; | |
1704 } | |
1705 } | |
1706 | |
1707 /* | |
1708 * bubble sort to pick up | |
1709 * the pieces. | |
1710 */ | |
1711 void | |
1712 bsort4(Key ***a, ulong n, int b) | |
1713 { | |
1714 Key ***i, ***j, ***k, ***l, **t; | |
1715 Key *ka, *kb; | |
1716 int n1, n2; | |
1717 | |
1718 l = a+n; | |
1719 j = a; | |
1720 | |
1721 loop: | |
1722 i = j; | |
1723 j++; | |
1724 if(j >= l) | |
1725 return; | |
1726 | |
1727 ka = **i; | |
1728 kb = **j; | |
1729 n1 = ka->klen - b; | |
1730 n2 = kb->klen - b; | |
1731 if(n1 > n2) | |
1732 n1 = n2; | |
1733 if(n1 <= 0) | |
1734 goto loop; | |
1735 n2 = ka->key[b] - kb->key[b]; | |
1736 if(n2 == 0) | |
1737 n2 = memcmp(ka->key+b, kb->key+b, n1); | |
1738 if(n2 <= 0) | |
1739 goto loop; | |
1740 | |
1741 for(;;) { | |
1742 k = i+1; | |
1743 | |
1744 t = *k; | |
1745 *k = *i; | |
1746 *i = t; | |
1747 | |
1748 if(i <= a) | |
1749 goto loop; | |
1750 | |
1751 i--; | |
1752 ka = **i; | |
1753 kb = *t; | |
1754 n1 = ka->klen - b; | |
1755 n2 = kb->klen - b; | |
1756 if(n1 > n2) | |
1757 n1 = n2; | |
1758 if(n1 <= 0) | |
1759 goto loop; | |
1760 n2 = ka->key[b] - kb->key[b]; | |
1761 if(n2 == 0) | |
1762 n2 = memcmp(ka->key+b, kb->key+b, n1); | |
1763 if(n2 <= 0) | |
1764 goto loop; | |
1765 } | |
1766 } |