/*
bugs:
00/ff for end of file can conflict with 00/ff characters
*/
enum
{
Nline = 100000, /* default max number of lines saved in memory */
Nmerge = 10, /* max number of temporary files merged */
Nfield = 20, /* max number of argument fields */
Bflag = 1<<0, /* flags per field */
B1flag = 1<<1,
NSstart = 0, /* states for number to key decoding */
NSsign,
NSzero,
NSdigit,
NSpoint,
NSfract,
NSzerofract,
NSexp,
NSexpsign,
NSexpdigit,
};
typedef struct Line Line;
typedef struct Key Key;
typedef struct Merge Merge;
typedef struct Field Field;
struct Line
{
Key* key;
int llen; /* always >= 1 */
uchar line[1]; /* always ends in '\n' */
};
struct Merge
{
Key* key; /* copy of line->key so (Line*) looks like (Merge*) */
Line* line; /* line at the head of a merged temp file */
int fd; /* file descriptor */
Biobuf b; /* iobuf for reading a temp file */
};
struct Key
{
int klen;
uchar key[1];
};
struct Field
{
int beg1;
int beg2;
int end1;
int end2;
long flags;
uchar mapto[256];
void (*dokey)(Key*, uchar*, uchar*, Field*);
};
struct args
{
char* ofile;
char* tname;
Rune tabchar;
char cflag;
char uflag;
char vflag;
int nfield;
int nfile;
Field field[Nfield];
Line** linep;
long nline; /* number of lines in this temp file */
long lineno; /* overall ordinal for -s option */
int ntemp;
long mline; /* max lines per file */
} args;
dir = "/tmp";
if(args.tname)
dir = args.tname;
if(strlen(dir) >= nelem(file)-20) {
fprint(2, "temp file directory name is too long: %s\n", dir);
done("tdir");
}
case 'M': /* month */
case 'b': /* skip blanks */
case 'd': /* directory order */
case 'f': /* fold case */
case 'g': /* floating numbers */
case 'i': /* ignore non-ascii */
case 'n': /* numbers */
case 'r': /* reverse */
case 'w': /* ignore white */
if(args.nfield > 0)
fprint(2, "sort: global field set after -k\n");
setfield(0, c);
break;
case 'm':
/* option m silently ignored but required by posix */
break;
default:
fprint(2, "sort: unknown option: -%C\n", c);
done("option");
}
continue;
}
if(c == '+') {
argv[i] = 0; /* clobber args processed */
c = *s;
if(c == '.' || (c >= '0' && c <= '9')) {
newfield();
f = &args.field[args.nfield];
dofield(s, &f->beg1, &f->beg2, 0, 0);
if(f->flags & Bflag) {
f->flags |= B1flag;
f->flags &= ~Bflag;
}
hadplus = 1;
continue;
}
fprint(2, "sort: unknown option: +%C\n", c);
done("option");
}
args.nfile++;
}
for(i=0; i<=args.nfield; i++) {
f = &args.field[i];
/*
* global options apply to fields that
* specify no options
*/
if(f->flags == 0) {
f->flags = args.field[0].flags;
if(args.field[0].flags & Bflag)
f->flags |= B1flag;
}
/*
* build buildkey specification
*/
switch(f->flags & ~(Bflag|B1flag)) {
default:
fprint(2, "sort: illegal combination of flags: %lx\n", f->flags);
done("option");
case 0:
f->dokey = dokey_;
break;
case Rflag:
f->dokey = dokey_r;
break;
case Gflag:
case Nflag:
case Gflag|Nflag:
case Gflag|Rflag:
case Nflag|Rflag:
case Gflag|Nflag|Rflag:
f->dokey = dokey_gn;
break;
case Mflag:
case Mflag|Rflag:
f->dokey = dokey_m;
makemapm(f);
break;
case Dflag:
case Dflag|Fflag:
case Dflag|Fflag|Iflag:
case Dflag|Fflag|Iflag|Rflag:
case Dflag|Fflag|Iflag|Rflag|Wflag:
case Dflag|Fflag|Iflag|Wflag:
case Dflag|Fflag|Rflag:
case Dflag|Fflag|Rflag|Wflag:
case Dflag|Fflag|Wflag:
case Dflag|Iflag:
case Dflag|Iflag|Rflag:
case Dflag|Iflag|Rflag|Wflag:
case Dflag|Iflag|Wflag:
case Dflag|Rflag:
case Dflag|Rflag|Wflag:
case Dflag|Wflag:
case Fflag:
case Fflag|Iflag:
case Fflag|Iflag|Rflag:
case Fflag|Iflag|Rflag|Wflag:
case Fflag|Iflag|Wflag:
case Fflag|Rflag:
case Fflag|Rflag|Wflag:
case Fflag|Wflag:
case Iflag:
case Iflag|Rflag:
case Iflag|Rflag|Wflag:
case Iflag|Wflag:
case Wflag:
f->dokey = dokey_dfi;
makemapd(f);
break;
}
}
/*
* random spot checks
*/
if(args.nfile > 1 && args.cflag) {
fprint(2, "sort: -c can have at most one input file\n");
done("option");
}
return;
}
uchar*
skip(uchar *l, int n1, int n2, int bflag, int endfield)
{
int i, c, ln, tc;
Rune r;
if(endfield && n1 < 0)
return 0;
c = *l++;
ln = 1;
tc = args.tabchar;
if(tc) {
if(tc < Runeself) {
for(i=n1; i>0; i--) {
while(c != tc) {
if(c == '\n')
return 0;
c = *l++;
}
if(!(endfield && i == 1))
c = *l++;
}
} else {
l--;
l += ln = chartorune(&r, (char*)l);
for(i=n1; i>0; i--) {
while(r != tc) {
if(r == '\n')
return 0;
l += ln = chartorune(&r, (char*)l);
}
if(!(endfield && i == 1))
l += ln = chartorune(&r, (char*)l);
}
c = r;
}
} else {
for(i=n1; i>0; i--) {
while(c == ' ' || c == '\t')
c = *l++;
while(c != ' ' && c != '\t') {
if(c == '\n')
return 0;
c = *l++;
}
}
}
if(bflag)
while(c == ' ' || c == '\t'){
c = *l++;
ln = 1;
}
l -= ln;
for(i=n2; i>0; i--) {
c = *l;
if(c < Runeself) {
if(c == '\n')
return 0;
l++;
continue;
}
l += chartorune(&r, (char*)l);
}
return l;
}
void
dokey_gn(Key *k, uchar *lp, uchar *lpe, Field *f)
{
uchar *kp;
int c, cl, dp;
int state, nzero, exp, expsign, rflag;
cl = k->klen + 3;
kp = k->key + cl; /* skip place for sign, exponent[2] */
nzero = 0; /* number of trailing zeros */
exp = 0; /* value of the exponent */
expsign = 0; /* sign of the exponent */
dp = 0x4040; /* location of decimal point */
rflag = f->flags&Rflag; /* xor of rflag and - sign */
state = NSstart;
for(;; lp++) {
if(lp >= lpe)
break;
c = *lp;
if(c == ' ' || c == '\t') {
switch(state) {
case NSstart:
case NSsign:
continue;
}
break;
}
if(c == '+' || c == '-') {
switch(state) {
case NSstart:
state = NSsign;
if(c == '-')
rflag = !rflag;
continue;
case NSexp:
state = NSexpsign;
if(c == '-')
expsign = 1;
continue;
}
break;
}
if(c == '0') {
switch(state) {
case NSdigit:
if(rflag)
c = ~c;
*kp++ = c;
cl++;
nzero++;
dp++;
state = NSdigit;
continue;
case NSfract:
if(rflag)
c = ~c;
*kp++ = c;
cl++;
nzero++;
state = NSfract;
continue;
case NSstart:
case NSsign:
case NSzero:
state = NSzero;
continue;
case NSzerofract:
case NSpoint:
dp--;
state = NSzerofract;
continue;
case NSexpsign:
case NSexp:
case NSexpdigit:
exp = exp*10 + (c - '0');
state = NSexpdigit;
continue;
}
break;
}
if(c >= '1' && c <= '9') {
switch(state) {
case NSzero:
case NSstart:
case NSsign:
case NSdigit:
if(rflag)
c = ~c;
*kp++ = c;
cl++;
nzero = 0;
dp++;
state = NSdigit;
continue;
case NSzerofract:
case NSpoint:
case NSfract:
if(rflag)
c = ~c;
*kp++ = c;
cl++;
nzero = 0;
state = NSfract;
continue;
case NSexpsign:
case NSexp:
case NSexpdigit:
exp = exp*10 + (c - '0');
state = NSexpdigit;
continue;
}
break;
}
if(c == '.') {
switch(state) {
case NSstart:
case NSsign:
state = NSpoint;
continue;
case NSzero:
state = NSzerofract;
continue;
case NSdigit:
state = NSfract;
continue;
}
break;
}
if((f->flags & Gflag) && (c == 'e' || c == 'E')) {
switch(state) {
case NSdigit:
case NSfract:
state = NSexp;
continue;
}
break;
}
break;
}
switch(state) {
/*
* result is zero
*/
case NSstart:
case NSsign:
case NSzero:
case NSzerofract:
case NSpoint:
kp = k->key + k->klen;
k->klen += 2;
kp[0] = 0x20; /* between + and - */
kp[1] = 0;
return;
/*
* result has exponent
*/
case NSexpsign:
case NSexp:
case NSexpdigit:
if(expsign)
exp = -exp;
dp += exp;
/*
* result is fixed point number
*/
case NSdigit:
case NSfract:
kp -= nzero;
cl -= nzero;
break;
}
/*
* end of number
*/
c = 0;
if(rflag)
c = ~c;
*kp = c;
for(;;) {
/*
* get the character
*/
if(lp >= lpe)
break;
c = *lp;
if(c >= Runeself) {
lp += chartorune(&r, (char*)lp);
c = r;
} else
lp++;
/*
* do the various mappings.
* the common case is handled
* completely by the table.
*/
if(c != 0 && c < Runeself) {
c = f->mapto[c];
if(c) {
*kp++ = c;
cl++;
}
continue;
}
/*
* for characters out of range,
* the table does not do Rflag.
* ignore is based on mapto[255]
*/
if(c != 0 && c < nelem(f->mapto)) {
c = f->mapto[c];
if(c == 0)
continue;
} else
if(f->mapto[nelem(f->mapto)-1] == 0)
continue;
/*
* put it in the key
*/
r = c;
n = runetochar((char*)kp, &r);
kp += n;
cl += n;
if(rflag)
while(n > 0) {
kp[-n] = ~kp[-n];
n--;
}
}
/*
* end of key
*/
k->klen = cl+1;
if(rflag) {
*kp = ~0;
return;
}
*kp = 0;
}
void
sort4(void *a, ulong n)
{
if(n > Threshold)
rsort4((Key***)a, n, 0);
else
bsort4((Key***)a, n, 0);
}
void
rsort4(Key ***a, ulong n, int b)
{
Key ***ea, ***t, ***u, **t1, **u1, *k;
Key ***part[257];
static long count[257];
long clist[257+257], *cp, *cp1;
int c, lowc, higc;
/*
* pass 1 over all keys,
* count the number of each key[b].
* find low count and high count.
*/
lowc = 256;
higc = 0;
ea = a+n;
for(t=a; t<ea; t++) {
k = **t;
n = k->klen;
if(b >= n) {
count[256]++;
continue;
}
c = k->key[b];
n = count[c]++;
if(n == 0) {
if(c < lowc)
lowc = c;
if(c > higc)
higc = c;
}
}
/*
* pass 2 over all counts,
* put partition pointers in part[c].
* save compacted indexes and counts
* in clist[].
*/
t = a;
n = count[256];
clist[0] = n;
part[256] = t;
t += n;
/*
* pass 3 over all counts.
* chase lowest pointer in each partition
* around a permutation until it comes
* back and is stored where it started.
* static array, count[], should be
* reduced to zero entries except maybe
* count[256].
*/
for(cp1=clist+1; cp1[0]; cp1+=2) {
c = cp1[1];
cp = count+c;
while(*cp) {
t1 = *part[c];
for(;;) {
k = *t1;
n = 256;
if(b < k->klen)
n = k->key[b];
u = part[n]++;
count[n]--;
u1 = *u;
*u = t1;
if(n == c)
break;
t1 = u1;
}
}
}
/*
* pass 4 over all partitions.
* call recursively.
*/
b++;
t = a + clist[0];
count[256] = 0;
for(cp1=clist+1; n=cp1[0]; cp1+=2) {
if(n > Threshold)
rsort4(t, n, b);
else
if(n > 1)
bsort4(t, n, b);
t += n;
}
}
/*
* bubble sort to pick up
* the pieces.
*/
void
bsort4(Key ***a, ulong n, int b)
{
Key ***i, ***j, ***k, ***l, **t;
Key *ka, *kb;
int n1, n2;