added commands as discussed with Uriel yesterday - 9base - revived minimalist p… | |
git clone git://git.suckless.org/9base | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit fa62640154da08c5fd229af50efde0d33871a0aa | |
parent 85bacddf7706d2c89c30c2433fb8c43cd794cdb5 | |
Author: Anselm R Garbe <[email protected]> | |
Date: Fri, 28 May 2010 11:30:17 +0100 | |
added commands as discussed with Uriel yesterday | |
Diffstat: | |
M Makefile | 53 +++++++++++++++++++++++++++++… | |
D TODO | 11 ----------- | |
A ascii/Makefile | 10 ++++++++++ | |
A ascii/ascii.1 | 160 +++++++++++++++++++++++++++++… | |
A ascii/ascii.c | 181 +++++++++++++++++++++++++++++… | |
A cmp/Makefile | 10 ++++++++++ | |
A cmp/cmp.1 | 57 +++++++++++++++++++++++++++++… | |
A cmp/cmp.c | 112 +++++++++++++++++++++++++++++… | |
A dd/Makefile | 10 ++++++++++ | |
A dd/dd.1 | 0 | |
A dd/dd.c | 660 +++++++++++++++++++++++++++++… | |
A diff/Makefile | 35 +++++++++++++++++++++++++++++… | |
A diff/diff.1 | 163 +++++++++++++++++++++++++++++… | |
A diff/diff.h | 27 +++++++++++++++++++++++++++ | |
A diff/diffdir.c | 113 +++++++++++++++++++++++++++++… | |
A diff/diffio.c | 387 +++++++++++++++++++++++++++++… | |
A diff/diffreg.c | 420 +++++++++++++++++++++++++++++… | |
A diff/main.c | 270 +++++++++++++++++++++++++++++… | |
A join/Makefile | 10 ++++++++++ | |
A join/join.1 | 147 +++++++++++++++++++++++++++++… | |
A join/join.c | 369 ++++++++++++++++++++++++++++++ | |
M lib9/utf.h | 3 ++- | |
A look/Makefile | 10 ++++++++++ | |
A look/look.1 | 85 +++++++++++++++++++++++++++++… | |
A look/look.c | 349 +++++++++++++++++++++++++++++… | |
A pbd/Makefile | 10 ++++++++++ | |
A pbd/pbd.1 | 0 | |
A pbd/pbd.c | 19 +++++++++++++++++++ | |
M rc/Makefile | 2 +- | |
A split/Makefile | 10 ++++++++++ | |
A split/split.1 | 82 +++++++++++++++++++++++++++++… | |
A split/split.c | 189 +++++++++++++++++++++++++++++… | |
A strings/Makefile | 10 ++++++++++ | |
A strings/strings.1 | 28 ++++++++++++++++++++++++++++ | |
A strings/strings.c | 90 +++++++++++++++++++++++++++++… | |
A unicode/Makefile | 10 ++++++++++ | |
A unicode/unicode.1 | 0 | |
A unicode/unicode.c | 122 +++++++++++++++++++++++++++++… | |
A unutf/Makefile | 10 ++++++++++ | |
A unutf/unutf.1 | 0 | |
A unutf/unutf.c | 20 ++++++++++++++++++++ | |
41 files changed, 4238 insertions(+), 16 deletions(-) | |
--- | |
diff --git a/Makefile b/Makefile | |
@@ -2,9 +2,56 @@ | |
include config.mk | |
-SUBDIRS = lib9 yacc awk basename bc cal cat cleanname date dc du dd echo ed \ | |
- factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \ | |
- rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq | |
+SUBDIRS = lib9\ | |
+ yacc\ | |
+ ascii\ | |
+ awk\ | |
+ basename\ | |
+ bc\ | |
+ cal\ | |
+ cat\ | |
+ cleanname\ | |
+ cmp\ | |
+ date\ | |
+ dc\ | |
+ du\ | |
+ dd\ | |
+ diff\ | |
+ echo\ | |
+ ed\ | |
+ factor\ | |
+ fortune\ | |
+ fmt\ | |
+ freq\ | |
+ getflags\ | |
+ grep\ | |
+ hoc\ | |
+ join\ | |
+ look\ | |
+ ls\ | |
+ mk\ | |
+ mkdir\ | |
+ mtime\ | |
+ pbd\ | |
+ primes\ | |
+ rc\ | |
+ read\ | |
+ sha1sum\ | |
+ sed\ | |
+ seq\ | |
+ sleep\ | |
+ sort\ | |
+ split\ | |
+ strings\ | |
+ tail\ | |
+ tee\ | |
+ test\ | |
+ touch\ | |
+ tr\ | |
+ troff\ | |
+ unicode\ | |
+ uniq\ | |
+ unutf\ | |
all: | |
@echo 9base build options: | |
diff --git a/TODO b/TODO | |
@@ -1,11 +0,0 @@ | |
-12:13 < uriel> garbeam: add dd and diff too | |
-12:13 < uriel> and split | |
-12:14 < uriel> (and join) | |
-12:15 < uriel> and unutf (which I just noticed, seems to be undocumented, but … | |
-12:15 < uriel> and tcs | |
-12:16 < uriel> and strings | |
-12:18 < uriel> oh, oh, I'm finding some great bits: | |
-12:18 < uriel> look(1), ascii(1) and unicode(1) | |
-12:19 < uriel> ok, and cmp(1) is missing too | |
-12:23 < uriel> hah! plan9/src/cmd/index/ is really interesting (but not worth … | |
-12:26 < uriel> oh! pbd! what a wonderful discovery, we certainly should add it… | |
diff --git a/ascii/Makefile b/ascii/Makefile | |
@@ -0,0 +1,10 @@ | |
+# ascii - ascii unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = ascii | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/ascii/ascii.1 b/ascii/ascii.1 | |
@@ -0,0 +1,160 @@ | |
+.TH ASCII 1 | |
+.SH NAME | |
+ascii, unicode \- interpret ASCII, Unicode characters | |
+.SH SYNOPSIS | |
+.B ascii | |
+[ | |
+.B -8 | |
+] | |
+[ | |
+.BI -oxdb n | |
+] | |
+[ | |
+.B -nct | |
+] | |
+[ | |
+.I text | |
+] | |
+.PP | |
+.B unicode | |
+[ | |
+.B -nt | |
+] | |
+.IB hexmin - hexmax | |
+.PP | |
+.B unicode | |
+[ | |
+.B -t | |
+] | |
+.I hex | |
+[ | |
+\&... | |
+] | |
+.PP | |
+.B unicode | |
+[ | |
+.B -n | |
+] | |
+.I characters | |
+.PP | |
+.B look | |
+.I hex | |
+.B \*9/lib/unicode | |
+.SH DESCRIPTION | |
+.I Ascii | |
+prints the | |
+.SM ASCII | |
+values corresponding to characters and | |
+.I vice | |
+.IR versa ; | |
+under the | |
+.B -8 | |
+option, the | |
+.SM ISO | |
+Latin-1 extensions (codes 0200-0377) are included. | |
+The values are interpreted in a settable numeric base; | |
+.B -o | |
+specifies octal, | |
+.B -d | |
+decimal, | |
+.B -x | |
+hexadecimal (the default), and | |
+.BI -b n | |
+base | |
+.IR n . | |
+.PP | |
+With no arguments, | |
+.I ascii | |
+prints a table of the character set in the specified base. | |
+Characters of | |
+.I text | |
+are converted to their | |
+.SM ASCII | |
+values, one per line. If, however, the first | |
+.I text | |
+argument is a valid number in the specified base, conversion | |
+goes the opposite way. | |
+Control characters are printed as two- or three-character mnemonics. | |
+Other options are: | |
+.TP | |
+.B -n | |
+Force numeric output. | |
+.TP | |
+.B -c | |
+Force character output. | |
+.TP | |
+.B -t | |
+Convert from numbers to running text; do not interpret | |
+control characters or insert newlines. | |
+.PP | |
+.I Unicode | |
+is similar; it converts between | |
+.SM UTF | |
+and character values from the Unicode Standard (see | |
+.IR utf (7)). | |
+If given a range of hexadecimal numbers, | |
+.I unicode | |
+prints a table of the specified Unicode characters \(em their values and | |
+.SM UTF | |
+representations. | |
+Otherwise it translates from | |
+.SM UTF | |
+to numeric value or vice versa, | |
+depending on the appearance of the supplied text; | |
+the | |
+.B -n | |
+option forces numeric output to avoid ambiguity with numeric characters. | |
+If converting to | |
+.SM UTF , | |
+the characters are printed one per line unless the | |
+.B -t | |
+flag is set, in which case the output is a single string | |
+containing only the specified characters. | |
+Unlike | |
+.IR ascii , | |
+.I unicode | |
+treats no characters specially. | |
+.PP | |
+The output of | |
+.I ascii | |
+and | |
+.I unicode | |
+may be unhelpful if the characters printed are not available in the current fo… | |
+.PP | |
+The file | |
+.B \*9/lib/unicode | |
+contains a | |
+table of characters and descriptions, sorted in hexadecimal order, | |
+suitable for | |
+.IR look (1) | |
+on the lower case | |
+.I hex | |
+values of characters. | |
+.SH EXAMPLES | |
+.TP | |
+.B "ascii -d" | |
+Print the | |
+.SM ASCII | |
+table base 10. | |
+.TP | |
+.B "unicode p" | |
+Print the hex value of `p'. | |
+.TP | |
+.B "unicode 2200-22f1" | |
+Print a table of miscellaneous mathematical symbols. | |
+.TP | |
+.B "look 039 \*9/lib/unicode" | |
+See the start of the Greek alphabet's encoding in the Unicode Standard. | |
+.SH FILES | |
+.TP | |
+.B \*9/lib/unicode | |
+table of characters and descriptions. | |
+.SH SOURCE | |
+.B \*9/src/cmd/ascii.c | |
+.br | |
+.B \*9/src/cmd/unicode.c | |
+.SH "SEE ALSO" | |
+.IR look (1), | |
+.IR tcs (1), | |
+.IR utf (7), | |
+.IR font (7) | |
diff --git a/ascii/ascii.c b/ascii/ascii.c | |
@@ -0,0 +1,181 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+ | |
+#define MAXBASE 36 | |
+ | |
+void usage(void); | |
+void put(int); | |
+void putn(int, int); | |
+void puttext(char *); | |
+void putnum(char *); | |
+int btoi(char *); | |
+int value(int, int); | |
+int isnum(char *); | |
+ | |
+char *str[256]={ | |
+ "nul", "soh", "stx", "etx", "eot", … | |
+ "bs ", "ht ", "nl ", "vt ", "np ", … | |
+ "dle", "dc1", "dc2", "dc3", "dc4", … | |
+ "can", "em ", "sub", "esc", "fs ", … | |
+ "sp ", " ! ", " \" ", " # ", " $ ", … | |
+ " ( ", " ) ", " * ", " + ", " , ", … | |
+ " 0 ", " 1 ", " 2 ", " 3 ", " 4 ", … | |
+ " 8 ", " 9 ", " : ", " ; ", " < ", … | |
+ " @ ", " A ", " B ", " C ", " D ", … | |
+ " H ", " I ", " J ", " K ", " L ", … | |
+ " P ", " Q ", " R ", " S ", " T ", … | |
+ " X ", " Y ", " Z ", " [ ", " \\ ", … | |
+ " ` ", " a ", " b ", " c ", " d ", … | |
+ " h ", " i ", " j ", " k ", " l ", … | |
+ " p ", " q ", " r ", " s ", " t ", … | |
+ " x ", " y ", " z ", " { ", " | ", … | |
+ "x80", "x81", "x82", "x83", "x84", … | |
+ "x88", "x89", "x8a", "x8b", "x8c", … | |
+ "x90", "x91", "x92", "x93", "x94", … | |
+ "x98", "x99", "x9a", "x9b", "x9c", … | |
+ "xa0", " ¡ ", " ¢ ", " £ ", " ¤ ", … | |
+ " ¨ ", " © ", " ª ", " « ", " ¬ ", … | |
+ " ° ", " ± ", " ² ", " ³ ", " ´ ", … | |
+ " ¸ ", " ¹ ", " º ", " » ", " ¼ ", … | |
+ " À ", " Á ", " Â ", " Ã ", " Ä ", … | |
+ " È ", " É ", " Ê ", " Ë ", " Ì ", … | |
+ " Ð ", " Ñ ", " Ò ", " Ó ", " Ô ", … | |
+ " Ø ", " Ù ", " Ú ", " Û ", " Ü ", … | |
+ " à ", " á ", " â ", " ã ", " ä ", … | |
+ " è ", " é ", " ê ", " ë ", " ì ", … | |
+ " ð ", " ñ ", " ò ", " ó ", " ô ", … | |
+ " ø ", " ù ", " ú ", " û ", " ü ", … | |
+}; | |
+ | |
+char Ncol[]={ | |
+ 0,0,7,5,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,… | |
+}; | |
+ | |
+int nchars=128; | |
+int base=16; | |
+int ncol; | |
+int text=1; | |
+int strip=0; | |
+Biobuf bin; | |
+ | |
+void | |
+main(int argc, char **argv) | |
+{ | |
+ int i; | |
+ | |
+ Binit(&bin, 1, OWRITE); | |
+ ARGBEGIN{ | |
+ case '8': | |
+ nchars=256; break; | |
+ case 'x': | |
+ base=16; break; | |
+ case 'o': | |
+ base=8; break; | |
+ case 'd': | |
+ base=10; break; | |
+ case 'b': | |
+ base=strtoul(EARGF(usage()), 0, 0); | |
+ if(base<2||base>MAXBASE) | |
+ usage(); | |
+ break; | |
+ case 'n': | |
+ text=0; break; | |
+ case 't': | |
+ strip=1; | |
+ /* fall through */ | |
+ case 'c': | |
+ text=2; break; | |
+ default: | |
+ usage(); | |
+ }ARGEND | |
+ | |
+ ncol=Ncol[base]; | |
+ if(argc==0){ | |
+ for(i=0;i<nchars;i++){ | |
+ put(i); | |
+ if((i&7)==7) | |
+ Bprint(&bin, "|\n"); | |
+ } | |
+ }else{ | |
+ if(text==1) | |
+ text=isnum(argv[0]); | |
+ while(argc--) | |
+ if(text) | |
+ puttext(*argv++); | |
+ else | |
+ putnum(*argv++); | |
+ } | |
+ Bputc(&bin, '\n'); | |
+ exits(0); | |
+} | |
+void | |
+usage(void) | |
+{ | |
+ fprint(2, "usage: %s [-8] [-xod | -b8] [-ncst] [--] [text]\n", argv0); | |
+ exits("usage"); | |
+} | |
+void | |
+put(int i) | |
+{ | |
+ Bputc(&bin, '|'); | |
+ putn(i, ncol); | |
+ Bprint(&bin, " %s", str[i]); | |
+} | |
+char dig[]="0123456789abcdefghijklmnopqrstuvwxyz"; | |
+void | |
+putn(int n, int ndig) | |
+{ | |
+ if(ndig==0) | |
+ return; | |
+ putn(n/base, ndig-1); | |
+ Bputc(&bin, dig[n%base]); | |
+} | |
+void | |
+puttext(char *s) | |
+{ | |
+ int n; | |
+ n=btoi(s)&0377; | |
+ if(strip) | |
+ Bputc(&bin, n); | |
+ else | |
+ Bprint(&bin, "%s\n", str[n]); | |
+} | |
+void | |
+putnum(char *s) | |
+{ | |
+ while(*s){ | |
+ putn(*s++&0377, ncol); | |
+ Bputc(&bin, '\n'); | |
+ } | |
+} | |
+int | |
+btoi(char *s) | |
+{ | |
+ int n; | |
+ n=0; | |
+ while(*s) | |
+ n=n*base+value(*s++, 0); | |
+ return(n); | |
+} | |
+int | |
+value(int c, int f) | |
+{ | |
+ char *s; | |
+ for(s=dig; s<dig+base; s++) | |
+ if(*s==c) | |
+ return(s-dig); | |
+ if(f) | |
+ return(-1); | |
+ fprint(2, "%s: bad input char %c\n", argv0, c); | |
+ exits("bad"); | |
+ return 0; /* to keep ken happy */ | |
+} | |
+int | |
+isnum(char *s) | |
+{ | |
+ while(*s) | |
+ if(value(*s++, 1)==-1) | |
+ return(0); | |
+ return(1); | |
+} | |
diff --git a/cmp/Makefile b/cmp/Makefile | |
@@ -0,0 +1,10 @@ | |
+# cmp - cmp unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = cmp | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/cmp/cmp.1 b/cmp/cmp.1 | |
@@ -0,0 +1,57 @@ | |
+.TH CMP 1 | |
+.SH NAME | |
+cmp \- compare two files | |
+.SH SYNOPSIS | |
+.B cmp | |
+[ | |
+.B -lsL | |
+] | |
+.I file1 file2 | |
+[ | |
+.I offset1 | |
+[ | |
+.I offset2 | |
+] | |
+] | |
+.SH DESCRIPTION | |
+The two files are | |
+compared. | |
+A diagnostic results if the contents differ, otherwise | |
+there is no output. | |
+.PP | |
+The options are: | |
+.TP | |
+.B l | |
+Print the byte number (decimal) and the | |
+differing bytes (hexadecimal) for each difference. | |
+.TP | |
+.B s | |
+Print nothing for differing files, | |
+but set the exit status. | |
+.TP | |
+.B L | |
+Print the line number of the first differing byte. | |
+.PP | |
+If offsets are given, | |
+comparison starts at the designated byte position | |
+of the corresponding file. | |
+Offsets that begin with | |
+.B 0x | |
+are hexadecimal; | |
+with | |
+.BR 0 , | |
+octal; with anything else, decimal. | |
+.SH SOURCE | |
+.B \*9/src/cmd/cmp.c | |
+.SH "SEE ALSO" | |
+.IR diff (1) | |
+.SH DIAGNOSTICS | |
+If a file is inaccessible or missing, the exit status is | |
+.LR open . | |
+If the files are the same, the exit status is empty (true). | |
+If they are the same except that one is longer than the other, the exit status… | |
+.LR EOF . | |
+Otherwise | |
+.I cmp | |
+reports the position of the first disagreeing byte and the exit status is | |
+.LR differ . | |
diff --git a/cmp/cmp.c b/cmp/cmp.c | |
@@ -0,0 +1,112 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+ | |
+#define BUF 65536 | |
+ | |
+int sflag = 0; | |
+int lflag = 0; | |
+int Lflag = 0; | |
+ | |
+static void usage(void); | |
+ | |
+void | |
+main(int argc, char *argv[]) | |
+{ | |
+ int n, i; | |
+ uchar *p, *q; | |
+ uchar buf1[BUF], buf2[BUF]; | |
+ int f1, f2; | |
+ vlong nc = 1, o, l = 1; | |
+ char *name1, *name2; | |
+ uchar *b1s, *b1e, *b2s, *b2e; | |
+ | |
+ ARGBEGIN{ | |
+ case 's': sflag = 1; break; | |
+ case 'l': lflag = 1; break; | |
+ case 'L': Lflag = 1; break; | |
+ default: usage(); | |
+ }ARGEND | |
+ if(argc < 2) | |
+ usage(); | |
+ if((f1 = open(name1 = *argv++, OREAD)) == -1){ | |
+ if(!sflag) perror(name1); | |
+ exits("open"); | |
+ } | |
+ if((f2 = open(name2 = *argv++, OREAD)) == -1){ | |
+ if(!sflag) perror(name2); | |
+ exits("open"); | |
+ } | |
+ if(*argv){ | |
+ o = strtoll(*argv++, 0, 0); | |
+ if(seek(f1, o, 0) < 0){ | |
+ if(!sflag) perror("cmp: seek by offset1"); | |
+ exits("seek 1"); | |
+ } | |
+ } | |
+ if(*argv){ | |
+ o = strtoll(*argv++, 0, 0); | |
+ if(seek(f2, o, 0) < 0){ | |
+ if(!sflag) perror("cmp: seek by offset2"); | |
+ exits("seek 2"); | |
+ } | |
+ } | |
+ if(*argv) | |
+ usage(); | |
+ b1s = b1e = buf1; | |
+ b2s = b2e = buf2; | |
+ for(;;){ | |
+ if(b1s >= b1e){ | |
+ if(b1s >= &buf1[BUF]) | |
+ b1s = buf1; | |
+ n = read(f1, b1s, &buf1[BUF] - b1s); | |
+ b1e = b1s + n; | |
+ } | |
+ if(b2s >= b2e){ | |
+ if(b2s >= &buf2[BUF]) | |
+ b2s = buf2; | |
+ n = read(f2, b2s, &buf2[BUF] - b2s); | |
+ b2e = b2s + n; | |
+ } | |
+ n = b2e - b2s; | |
+ if(n > b1e - b1s) | |
+ n = b1e - b1s; | |
+ if(n <= 0) | |
+ break; | |
+ if(memcmp((void *)b1s, (void *)b2s, n) != 0){ | |
+ if(sflag) | |
+ exits("differ"); | |
+ for(p = b1s, q = b2s, i = 0; i < n; p++, q++, i++) { | |
+ if(*p == '\n') | |
+ l++; | |
+ if(*p != *q){ | |
+ if(!lflag){ | |
+ print("%s %s differ: char %lld… | |
+ name1, name2, nc+i); | |
+ print(Lflag?" line %lld\n":"\n… | |
+ exits("differ"); | |
+ } | |
+ print("%6lld 0x%.2x 0x%.2x\n", nc+i, *… | |
+ } | |
+ } | |
+ } | |
+ if(Lflag) | |
+ for(p = b1s; p < b1e;) | |
+ if(*p++ == '\n') | |
+ l++; | |
+ nc += n; | |
+ b1s += n; | |
+ b2s += n; | |
+ } | |
+ if(b1e - b1s == b2e - b2s) | |
+ exits((char *)0); | |
+ if(!sflag) | |
+ print("EOF on %s\n", (b1e - b1s > b2e - b2s)? name2 : name1); | |
+ exits("EOF"); | |
+} | |
+ | |
+static void | |
+usage(void) | |
+{ | |
+ print("Usage: cmp [-lsL] file1 file2 [offset1 [offset2] ]\n"); | |
+ exits("usage"); | |
+} | |
diff --git a/dd/Makefile b/dd/Makefile | |
@@ -0,0 +1,10 @@ | |
+# dd - dd unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = dd | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/dd/dd.1 b/dd/dd.1 | |
diff --git a/dd/dd.c b/dd/dd.c | |
@@ -0,0 +1,660 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+ | |
+#define BIG 2147483647 | |
+#define LCASE (1<<0) | |
+#define UCASE (1<<1) | |
+#define SWAB (1<<2) | |
+#define NERR (1<<3) | |
+#define SYNC (1<<4) | |
+int cflag; | |
+int fflag; | |
+char *string; | |
+char *ifile; | |
+char *ofile; | |
+char *ibuf; | |
+char *obuf; | |
+vlong skip; | |
+vlong oseekn; | |
+vlong iseekn; | |
+vlong count; | |
+long files = 1; | |
+long ibs = 512; | |
+long obs = 512; | |
+long bs; | |
+long cbs; | |
+long ibc; | |
+long obc; | |
+long cbc; | |
+long nifr; | |
+long nipr; | |
+long nofr; | |
+long nopr; | |
+long ntrunc; | |
+int dotrunc = 1; | |
+int ibf; | |
+int obf; | |
+char *op; | |
+int nspace; | |
+uchar etoa[256]; | |
+uchar atoe[256]; | |
+uchar atoibm[256]; | |
+ | |
+void flsh(void); | |
+int match(char *s); | |
+vlong number(long big); | |
+void cnull(int cc); | |
+void null(int c); | |
+void ascii(int cc); | |
+void unblock(int cc); | |
+void ebcdic(int cc); | |
+void ibm(int cc); | |
+void block(int cc); | |
+void term(void); | |
+void stats(void); | |
+ | |
+#define iskey(s) ((key[0] == '-') && (strcmp(key+1, s) == 0)) | |
+ | |
+void | |
+main(int argc, char *argv[]) | |
+{ | |
+ void (*conv)(int); | |
+ char *ip; | |
+ char *key; | |
+ int a, c; | |
+ | |
+ conv = null; | |
+ for(c=1; c<argc; c++) { | |
+ key = argv[c++]; | |
+ if(c >= argc){ | |
+ fprint(2, "dd: arg %s needs a value\n", key); | |
+ exits("arg"); | |
+ } | |
+ string = argv[c]; | |
+ if(iskey("ibs")) { | |
+ ibs = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("obs")) { | |
+ obs = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("cbs")) { | |
+ cbs = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("bs")) { | |
+ bs = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("if")) { | |
+ ifile = string; | |
+ continue; | |
+ } | |
+ if(iskey("of")) { | |
+ ofile = string; | |
+ continue; | |
+ } | |
+ if(iskey("trunc")) { | |
+ dotrunc = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("skip")) { | |
+ skip = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("seek") || iskey("oseek")) { | |
+ oseekn = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("iseek")) { | |
+ iseekn = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("count")) { | |
+ count = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("files")) { | |
+ files = number(BIG); | |
+ continue; | |
+ } | |
+ if(iskey("conv")) { | |
+ cloop: | |
+ if(match(",")) | |
+ goto cloop; | |
+ if(*string == '\0') | |
+ continue; | |
+ if(match("ebcdic")) { | |
+ conv = ebcdic; | |
+ goto cloop; | |
+ } | |
+ if(match("ibm")) { | |
+ conv = ibm; | |
+ goto cloop; | |
+ } | |
+ if(match("ascii")) { | |
+ conv = ascii; | |
+ goto cloop; | |
+ } | |
+ if(match("block")) { | |
+ conv = block; | |
+ goto cloop; | |
+ } | |
+ if(match("unblock")) { | |
+ conv = unblock; | |
+ goto cloop; | |
+ } | |
+ if(match("lcase")) { | |
+ cflag |= LCASE; | |
+ goto cloop; | |
+ } | |
+ if(match("ucase")) { | |
+ cflag |= UCASE; | |
+ goto cloop; | |
+ } | |
+ if(match("swab")) { | |
+ cflag |= SWAB; | |
+ goto cloop; | |
+ } | |
+ if(match("noerror")) { | |
+ cflag |= NERR; | |
+ goto cloop; | |
+ } | |
+ if(match("sync")) { | |
+ cflag |= SYNC; | |
+ goto cloop; | |
+ } | |
+ } | |
+ fprint(2, "dd: bad arg: %s\n", key); | |
+ exits("arg"); | |
+ } | |
+ if(conv == null && cflag&(LCASE|UCASE)) | |
+ conv = cnull; | |
+ if(ifile) | |
+ ibf = open(ifile, 0); | |
+ else | |
+ ibf = dup(0, -1); | |
+ if(ibf < 0) { | |
+ fprint(2, "dd: open %s: %r\n", ifile); | |
+ exits("open"); | |
+ } | |
+ if(ofile){ | |
+ if(dotrunc) | |
+ obf = create(ofile, 1, 0664); | |
+ else | |
+ obf = open(ofile, 1); | |
+ if(obf < 0) { | |
+ fprint(2, "dd: create %s: %r\n", ofile); | |
+ exits("create"); | |
+ } | |
+ }else{ | |
+ obf = dup(1, -1); | |
+ if(obf < 0) { | |
+ fprint(2, "dd: can't dup file descriptor: %s: %r\n", o… | |
+ exits("dup"); | |
+ } | |
+ } | |
+ if(bs) | |
+ ibs = obs = bs; | |
+ if(ibs == obs && conv == null) | |
+ fflag++; | |
+ if(ibs == 0 || obs == 0) { | |
+ fprint(2, "dd: counts: cannot be zero\n"); | |
+ exits("counts"); | |
+ } | |
+ ibuf = sbrk(ibs); | |
+ if(fflag) | |
+ obuf = ibuf; | |
+ else | |
+ obuf = sbrk(obs); | |
+ sbrk(64); /* For good measure */ | |
+ if(ibuf == (char *)-1 || obuf == (char *)-1) { | |
+ fprint(2, "dd: not enough memory: %r\n"); | |
+ exits("memory"); | |
+ } | |
+ ibc = 0; | |
+ obc = 0; | |
+ cbc = 0; | |
+ op = obuf; | |
+ | |
+/* | |
+ if(signal(SIGINT, SIG_IGN) != SIG_IGN) | |
+ signal(SIGINT, term); | |
+*/ | |
+ seek(obf, obs*oseekn, 1); | |
+ seek(ibf, ibs*iseekn, 1); | |
+ while(skip) { | |
+ read(ibf, ibuf, ibs); | |
+ skip--; | |
+ } | |
+ | |
+ ip = 0; | |
+loop: | |
+ if(ibc-- == 0) { | |
+ ibc = 0; | |
+ if(count==0 || nifr+nipr!=count) { | |
+ if(cflag&(NERR|SYNC)) | |
+ for(ip=ibuf+ibs; ip>ibuf;) | |
+ *--ip = 0; | |
+ ibc = read(ibf, ibuf, ibs); | |
+ } | |
+ if(ibc == -1) { | |
+ perror("read"); | |
+ if((cflag&NERR) == 0) { | |
+ flsh(); | |
+ term(); | |
+ } | |
+ ibc = 0; | |
+ for(c=0; c<ibs; c++) | |
+ if(ibuf[c] != 0) | |
+ ibc = c; | |
+ stats(); | |
+ } | |
+ if(ibc == 0 && --files<=0) { | |
+ flsh(); | |
+ term(); | |
+ } | |
+ if(ibc != ibs) { | |
+ nipr++; | |
+ if(cflag&SYNC) | |
+ ibc = ibs; | |
+ } else | |
+ nifr++; | |
+ ip = ibuf; | |
+ c = (ibc>>1) & ~1; | |
+ if(cflag&SWAB && c) | |
+ do { | |
+ a = *ip++; | |
+ ip[-1] = *ip; | |
+ *ip++ = a; | |
+ } while(--c); | |
+ ip = ibuf; | |
+ if(fflag) { | |
+ obc = ibc; | |
+ flsh(); | |
+ ibc = 0; | |
+ } | |
+ goto loop; | |
+ } | |
+ c = 0; | |
+ c |= *ip++; | |
+ c &= 0377; | |
+ (*conv)(c); | |
+ goto loop; | |
+} | |
+ | |
+void | |
+flsh(void) | |
+{ | |
+ int c; | |
+ | |
+ if(obc) { | |
+ c = write(obf, obuf, obc); | |
+ if(c != obc) { | |
+ if(c > 0) | |
+ ++nopr; | |
+ perror("write"); | |
+ term(); | |
+ } | |
+ if(obc == obs) | |
+ nofr++; | |
+ else | |
+ nopr++; | |
+ obc = 0; | |
+ } | |
+} | |
+ | |
+int | |
+match(char *s) | |
+{ | |
+ char *cs; | |
+ | |
+ cs = string; | |
+ while(*cs++ == *s) | |
+ if(*s++ == '\0') | |
+ goto true; | |
+ if(*s != '\0') | |
+ return 0; | |
+ | |
+true: | |
+ cs--; | |
+ string = cs; | |
+ return 1; | |
+} | |
+ | |
+vlong | |
+number(long big) | |
+{ | |
+ char *cs; | |
+ vlong n; | |
+ | |
+ cs = string; | |
+ n = 0; | |
+ while(*cs >= '0' && *cs <= '9') | |
+ n = n*10 + *cs++ - '0'; | |
+ for(;;) | |
+ switch(*cs++) { | |
+ | |
+ case 'k': | |
+ n *= 1024; | |
+ continue; | |
+ | |
+/* case 'w': | |
+ n *= sizeof(int); | |
+ continue; | |
+*/ | |
+ | |
+ case 'b': | |
+ n *= 512; | |
+ continue; | |
+ | |
+/* case '*':*/ | |
+ case 'x': | |
+ string = cs; | |
+ n *= number(BIG); | |
+ | |
+ case '\0': | |
+ if(n>=big || n<0) { | |
+ fprint(2, "dd: argument %lld out of range\n", n); | |
+ exits("range"); | |
+ } | |
+ return n; | |
+ } | |
+ /* never gets here */ | |
+} | |
+ | |
+void | |
+cnull(int cc) | |
+{ | |
+ int c; | |
+ | |
+ c = cc; | |
+ if((cflag&UCASE) && c>='a' && c<='z') | |
+ c += 'A'-'a'; | |
+ if((cflag&LCASE) && c>='A' && c<='Z') | |
+ c += 'a'-'A'; | |
+ null(c); | |
+} | |
+ | |
+void | |
+null(int c) | |
+{ | |
+ | |
+ *op = c; | |
+ op++; | |
+ if(++obc >= obs) { | |
+ flsh(); | |
+ op = obuf; | |
+ } | |
+} | |
+ | |
+void | |
+ascii(int cc) | |
+{ | |
+ int c; | |
+ | |
+ c = etoa[cc]; | |
+ if(cbs == 0) { | |
+ cnull(c); | |
+ return; | |
+ } | |
+ if(c == ' ') { | |
+ nspace++; | |
+ goto out; | |
+ } | |
+ while(nspace > 0) { | |
+ null(' '); | |
+ nspace--; | |
+ } | |
+ cnull(c); | |
+ | |
+out: | |
+ if(++cbc >= cbs) { | |
+ null('\n'); | |
+ cbc = 0; | |
+ nspace = 0; | |
+ } | |
+} | |
+ | |
+void | |
+unblock(int cc) | |
+{ | |
+ int c; | |
+ | |
+ c = cc & 0377; | |
+ if(cbs == 0) { | |
+ cnull(c); | |
+ return; | |
+ } | |
+ if(c == ' ') { | |
+ nspace++; | |
+ goto out; | |
+ } | |
+ while(nspace > 0) { | |
+ null(' '); | |
+ nspace--; | |
+ } | |
+ cnull(c); | |
+ | |
+out: | |
+ if(++cbc >= cbs) { | |
+ null('\n'); | |
+ cbc = 0; | |
+ nspace = 0; | |
+ } | |
+} | |
+ | |
+void | |
+ebcdic(int cc) | |
+{ | |
+ int c; | |
+ | |
+ c = cc; | |
+ if(cflag&UCASE && c>='a' && c<='z') | |
+ c += 'A'-'a'; | |
+ if(cflag&LCASE && c>='A' && c<='Z') | |
+ c += 'a'-'A'; | |
+ c = atoe[c]; | |
+ if(cbs == 0) { | |
+ null(c); | |
+ return; | |
+ } | |
+ if(cc == '\n') { | |
+ while(cbc < cbs) { | |
+ null(atoe[' ']); | |
+ cbc++; | |
+ } | |
+ cbc = 0; | |
+ return; | |
+ } | |
+ if(cbc == cbs) | |
+ ntrunc++; | |
+ cbc++; | |
+ if(cbc <= cbs) | |
+ null(c); | |
+} | |
+ | |
+void | |
+ibm(int cc) | |
+{ | |
+ int c; | |
+ | |
+ c = cc; | |
+ if(cflag&UCASE && c>='a' && c<='z') | |
+ c += 'A'-'a'; | |
+ if(cflag&LCASE && c>='A' && c<='Z') | |
+ c += 'a'-'A'; | |
+ c = atoibm[c] & 0377; | |
+ if(cbs == 0) { | |
+ null(c); | |
+ return; | |
+ } | |
+ if(cc == '\n') { | |
+ while(cbc < cbs) { | |
+ null(atoibm[' ']); | |
+ cbc++; | |
+ } | |
+ cbc = 0; | |
+ return; | |
+ } | |
+ if(cbc == cbs) | |
+ ntrunc++; | |
+ cbc++; | |
+ if(cbc <= cbs) | |
+ null(c); | |
+} | |
+ | |
+void | |
+block(int cc) | |
+{ | |
+ int c; | |
+ | |
+ c = cc; | |
+ if(cflag&UCASE && c>='a' && c<='z') | |
+ c += 'A'-'a'; | |
+ if(cflag&LCASE && c>='A' && c<='Z') | |
+ c += 'a'-'A'; | |
+ c &= 0377; | |
+ if(cbs == 0) { | |
+ null(c); | |
+ return; | |
+ } | |
+ if(cc == '\n') { | |
+ while(cbc < cbs) { | |
+ null(' '); | |
+ cbc++; | |
+ } | |
+ cbc = 0; | |
+ return; | |
+ } | |
+ if(cbc == cbs) | |
+ ntrunc++; | |
+ cbc++; | |
+ if(cbc <= cbs) | |
+ null(c); | |
+} | |
+ | |
+void | |
+term(void) | |
+{ | |
+ | |
+ stats(); | |
+ exits(0); | |
+} | |
+ | |
+void | |
+stats(void) | |
+{ | |
+ | |
+ fprint(2, "%lud+%lud records in\n", nifr, nipr); | |
+ fprint(2, "%lud+%lud records out\n", nofr, nopr); | |
+ if(ntrunc) | |
+ fprint(2, "%lud truncated records\n", ntrunc); | |
+} | |
+ | |
+uchar etoa[] = | |
+{ | |
+ 0000,0001,0002,0003,0234,0011,0206,0177, | |
+ 0227,0215,0216,0013,0014,0015,0016,0017, | |
+ 0020,0021,0022,0023,0235,0205,0010,0207, | |
+ 0030,0031,0222,0217,0034,0035,0036,0037, | |
+ 0200,0201,0202,0203,0204,0012,0027,0033, | |
+ 0210,0211,0212,0213,0214,0005,0006,0007, | |
+ 0220,0221,0026,0223,0224,0225,0226,0004, | |
+ 0230,0231,0232,0233,0024,0025,0236,0032, | |
+ 0040,0240,0241,0242,0243,0244,0245,0246, | |
+ 0247,0250,0133,0056,0074,0050,0053,0041, | |
+ 0046,0251,0252,0253,0254,0255,0256,0257, | |
+ 0260,0261,0135,0044,0052,0051,0073,0136, | |
+ 0055,0057,0262,0263,0264,0265,0266,0267, | |
+ 0270,0271,0174,0054,0045,0137,0076,0077, | |
+ 0272,0273,0274,0275,0276,0277,0300,0301, | |
+ 0302,0140,0072,0043,0100,0047,0075,0042, | |
+ 0303,0141,0142,0143,0144,0145,0146,0147, | |
+ 0150,0151,0304,0305,0306,0307,0310,0311, | |
+ 0312,0152,0153,0154,0155,0156,0157,0160, | |
+ 0161,0162,0313,0314,0315,0316,0317,0320, | |
+ 0321,0176,0163,0164,0165,0166,0167,0170, | |
+ 0171,0172,0322,0323,0324,0325,0326,0327, | |
+ 0330,0331,0332,0333,0334,0335,0336,0337, | |
+ 0340,0341,0342,0343,0344,0345,0346,0347, | |
+ 0173,0101,0102,0103,0104,0105,0106,0107, | |
+ 0110,0111,0350,0351,0352,0353,0354,0355, | |
+ 0175,0112,0113,0114,0115,0116,0117,0120, | |
+ 0121,0122,0356,0357,0360,0361,0362,0363, | |
+ 0134,0237,0123,0124,0125,0126,0127,0130, | |
+ 0131,0132,0364,0365,0366,0367,0370,0371, | |
+ 0060,0061,0062,0063,0064,0065,0066,0067, | |
+ 0070,0071,0372,0373,0374,0375,0376,0377, | |
+}; | |
+uchar atoe[] = | |
+{ | |
+ 0000,0001,0002,0003,0067,0055,0056,0057, | |
+ 0026,0005,0045,0013,0014,0015,0016,0017, | |
+ 0020,0021,0022,0023,0074,0075,0062,0046, | |
+ 0030,0031,0077,0047,0034,0035,0036,0037, | |
+ 0100,0117,0177,0173,0133,0154,0120,0175, | |
+ 0115,0135,0134,0116,0153,0140,0113,0141, | |
+ 0360,0361,0362,0363,0364,0365,0366,0367, | |
+ 0370,0371,0172,0136,0114,0176,0156,0157, | |
+ 0174,0301,0302,0303,0304,0305,0306,0307, | |
+ 0310,0311,0321,0322,0323,0324,0325,0326, | |
+ 0327,0330,0331,0342,0343,0344,0345,0346, | |
+ 0347,0350,0351,0112,0340,0132,0137,0155, | |
+ 0171,0201,0202,0203,0204,0205,0206,0207, | |
+ 0210,0211,0221,0222,0223,0224,0225,0226, | |
+ 0227,0230,0231,0242,0243,0244,0245,0246, | |
+ 0247,0250,0251,0300,0152,0320,0241,0007, | |
+ 0040,0041,0042,0043,0044,0025,0006,0027, | |
+ 0050,0051,0052,0053,0054,0011,0012,0033, | |
+ 0060,0061,0032,0063,0064,0065,0066,0010, | |
+ 0070,0071,0072,0073,0004,0024,0076,0341, | |
+ 0101,0102,0103,0104,0105,0106,0107,0110, | |
+ 0111,0121,0122,0123,0124,0125,0126,0127, | |
+ 0130,0131,0142,0143,0144,0145,0146,0147, | |
+ 0150,0151,0160,0161,0162,0163,0164,0165, | |
+ 0166,0167,0170,0200,0212,0213,0214,0215, | |
+ 0216,0217,0220,0232,0233,0234,0235,0236, | |
+ 0237,0240,0252,0253,0254,0255,0256,0257, | |
+ 0260,0261,0262,0263,0264,0265,0266,0267, | |
+ 0270,0271,0272,0273,0274,0275,0276,0277, | |
+ 0312,0313,0314,0315,0316,0317,0332,0333, | |
+ 0334,0335,0336,0337,0352,0353,0354,0355, | |
+ 0356,0357,0372,0373,0374,0375,0376,0377, | |
+}; | |
+uchar atoibm[] = | |
+{ | |
+ 0000,0001,0002,0003,0067,0055,0056,0057, | |
+ 0026,0005,0045,0013,0014,0015,0016,0017, | |
+ 0020,0021,0022,0023,0074,0075,0062,0046, | |
+ 0030,0031,0077,0047,0034,0035,0036,0037, | |
+ 0100,0132,0177,0173,0133,0154,0120,0175, | |
+ 0115,0135,0134,0116,0153,0140,0113,0141, | |
+ 0360,0361,0362,0363,0364,0365,0366,0367, | |
+ 0370,0371,0172,0136,0114,0176,0156,0157, | |
+ 0174,0301,0302,0303,0304,0305,0306,0307, | |
+ 0310,0311,0321,0322,0323,0324,0325,0326, | |
+ 0327,0330,0331,0342,0343,0344,0345,0346, | |
+ 0347,0350,0351,0255,0340,0275,0137,0155, | |
+ 0171,0201,0202,0203,0204,0205,0206,0207, | |
+ 0210,0211,0221,0222,0223,0224,0225,0226, | |
+ 0227,0230,0231,0242,0243,0244,0245,0246, | |
+ 0247,0250,0251,0300,0117,0320,0241,0007, | |
+ 0040,0041,0042,0043,0044,0025,0006,0027, | |
+ 0050,0051,0052,0053,0054,0011,0012,0033, | |
+ 0060,0061,0032,0063,0064,0065,0066,0010, | |
+ 0070,0071,0072,0073,0004,0024,0076,0341, | |
+ 0101,0102,0103,0104,0105,0106,0107,0110, | |
+ 0111,0121,0122,0123,0124,0125,0126,0127, | |
+ 0130,0131,0142,0143,0144,0145,0146,0147, | |
+ 0150,0151,0160,0161,0162,0163,0164,0165, | |
+ 0166,0167,0170,0200,0212,0213,0214,0215, | |
+ 0216,0217,0220,0232,0233,0234,0235,0236, | |
+ 0237,0240,0252,0253,0254,0255,0256,0257, | |
+ 0260,0261,0262,0263,0264,0265,0266,0267, | |
+ 0270,0271,0272,0273,0274,0275,0276,0277, | |
+ 0312,0313,0314,0315,0316,0317,0332,0333, | |
+ 0334,0335,0336,0337,0352,0353,0354,0355, | |
+ 0356,0357,0372,0373,0374,0375,0376,0377, | |
+}; | |
diff --git a/diff/Makefile b/diff/Makefile | |
@@ -0,0 +1,35 @@ | |
+# diff - diff shell unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = diff | |
+OFILES = diffdir.o diffio.o diffreg.o main.o | |
+MANFILES = diff.1 | |
+ | |
+include ../config.mk | |
+ | |
+all: ${TARG} | |
+ @strip ${TARG} | |
+ @echo built ${TARG} | |
+ | |
+install: ${TARG} | |
+ @mkdir -p ${DESTDIR}${PREFIX}/bin | |
+ @cp -f ${TARG} ${DESTDIR}${PREFIX}/bin/ | |
+ @chmod 755 ${DESTDIR}${PREFIX}/bin/${TARG} | |
+ @mkdir -p ${DESTDIR}${MANPREFIX}/man1 | |
+ @cp -f ${MANFILES} ${DESTDIR}${MANPREFIX}/man1 | |
+ @chmod 444 ${DESTDIR}${MANPREFIX}/man1/${MANFILES} | |
+ | |
+uninstall: | |
+ rm -f ${DESTDIR}${PREFIX}/bin/${TARG} | |
+ rm -f ${DESTDIR}${PREFIX}/man1/${MANFILES} | |
+ | |
+.c.o: | |
+ @echo CC $*.c | |
+ @${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c | |
+ | |
+clean: | |
+ rm -f ${OFILES} ${TARG} | |
+ | |
+${TARG}: ${OFILES} | |
+ @echo LD ${TARG} | |
+ @${CC} ${LDFLAGS} -o ${TARG} ${OFILES} -lm -L${PREFIX}/lib -L../lib9 -… | |
diff --git a/diff/diff.1 b/diff/diff.1 | |
@@ -0,0 +1,163 @@ | |
+.TH DIFF 1 | |
+.SH NAME | |
+diff \- differential file comparator | |
+.SH SYNOPSIS | |
+.B diff | |
+[ | |
+.B -acefmnbwr | |
+] file1 ... file2 | |
+.SH DESCRIPTION | |
+.I Diff | |
+tells what lines must be changed in two files to bring them | |
+into agreement. | |
+If one file | |
+is a directory, | |
+then a file in that directory with basename the same as that of | |
+the other file is used. | |
+If both files are directories, similarly named files in the | |
+two directories are compared by the method of | |
+.I diff | |
+for text | |
+files and | |
+.IR cmp (1) | |
+otherwise. | |
+If more than two file names are given, then each argument is compared | |
+to the last argument as above. | |
+The | |
+.B -r | |
+option causes | |
+.I diff | |
+to process similarly named subdirectories recursively. | |
+When processing more than one file, | |
+.I diff | |
+prefixes file differences with a single line | |
+listing the two differing files, in the form of | |
+a | |
+.I diff | |
+command line. | |
+The | |
+.B -m | |
+flag causes this behavior even when processing single files. | |
+.PP | |
+The normal output contains lines of these forms: | |
+.IP "" 5 | |
+.I n1 | |
+.B a | |
+.I n3,n4 | |
+.br | |
+.I n1,n2 | |
+.B d | |
+.I n3 | |
+.br | |
+.I n1,n2 | |
+.B c | |
+.I n3,n4 | |
+.PP | |
+These lines resemble | |
+.I ed | |
+commands to convert | |
+.I file1 | |
+into | |
+.IR file2 . | |
+The numbers after the letters pertain to | |
+.IR file2 . | |
+In fact, by exchanging `a' for `d' and reading backward | |
+one may ascertain equally how to convert | |
+.I file2 | |
+into | |
+.IR file1 . | |
+As in | |
+.IR ed , | |
+identical pairs where | |
+.I n1 | |
+= | |
+.I n2 | |
+or | |
+.I n3 | |
+= | |
+.I n4 | |
+are abbreviated as a single number. | |
+.PP | |
+Following each of these lines come all the lines that are | |
+affected in the first file flagged by `<', | |
+then all the lines that are affected in the second file | |
+flagged by `>'. | |
+.PP | |
+The | |
+.B -b | |
+option causes | |
+trailing blanks (spaces and tabs) to be ignored | |
+and other strings of blanks to compare equal. | |
+The | |
+.B -w | |
+option causes all white-space to be removed from input lines | |
+before applying the difference algorithm. | |
+.PP | |
+The | |
+.B -n | |
+option prefixes each range with | |
+.IB file : \fR | |
+and inserts a space around the | |
+.BR a , | |
+.BR c , | |
+and | |
+.B d | |
+verbs. | |
+The | |
+.B -e | |
+option produces a script of | |
+.I "a, c" | |
+and | |
+.I d | |
+commands for the editor | |
+.IR ed , | |
+which will recreate | |
+.I file2 | |
+from | |
+.IR file1 . | |
+The | |
+.B -f | |
+option produces a similar script, | |
+not useful with | |
+.IR ed , | |
+in the opposite order. It may, however, be | |
+useful as input to a stream-oriented post-processor. | |
+.PP | |
+The | |
+.B -c | |
+option includes three lines of context around each | |
+change, merging changes whose contexts overlap. | |
+The | |
+.B -a | |
+flag displays the entire file as context. | |
+.PP | |
+Except in rare circumstances, | |
+.I diff | |
+finds a smallest sufficient set of file | |
+differences. | |
+.SH FILES | |
+.B /tmp/diff[12] | |
+.SH SOURCE | |
+.B \*9/src/cmd/diff | |
+.SH "SEE ALSO" | |
+.IR cmp (1), | |
+.IR comm (1), | |
+.IR ed (1) | |
+.SH DIAGNOSTICS | |
+Exit status is the empty string | |
+for no differences, | |
+.L some | |
+for some, | |
+and | |
+.L error | |
+for trouble. | |
+.SH BUGS | |
+Editing scripts produced under the | |
+.BR -e " or" | |
+.BR -f " option are naive about" | |
+creating lines consisting of a single `\fB.\fR'. | |
+.PP | |
+When running | |
+.I diff | |
+on directories, the notion of what is a text | |
+file is open to debate. | |
diff --git a/diff/diff.h b/diff/diff.h | |
@@ -0,0 +1,27 @@ | |
+#define stdout bstdout | |
+ | |
+char mode; /* '\0', 'e', 'f', 'h' */ | |
+char bflag; /* ignore multiple and trailing blanks */ | |
+char rflag; /* recurse down directory trees */ | |
+char mflag; /* pseudo flag: doing multiple files, one d… | |
+int anychange; | |
+extern Biobuf stdout; | |
+extern int binary; | |
+ | |
+#define MALLOC(t, n) ((t *)emalloc((n)*sizeof(t))) | |
+#define REALLOC(p, t, n) ((t *)erealloc((void *)(p), (n)*sizeof(t))) | |
+#define FREE(p) free((void *)(p)) | |
+ | |
+#define MAXPATHLEN 1024 | |
+ | |
+int mkpathname(char *, char *, char *); | |
+void *emalloc(unsigned); | |
+void *erealloc(void *, unsigned); | |
+void diff(char *, char *, int); | |
+void diffdir(char *, char *, int); | |
+void diffreg(char *, char *); | |
+Biobuf *prepare(int, char *); | |
+void panic(int, char *, ...); | |
+void check(Biobuf *, Biobuf *); | |
+void change(int, int, int, int); | |
+void flushchanges(void); | |
diff --git a/diff/diffdir.c b/diff/diffdir.c | |
@@ -0,0 +1,113 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+#include "diff.h" | |
+ | |
+static int | |
+itemcmp(const void *v1, const void *v2) | |
+{ | |
+ char *const*d1 = v1, *const*d2 = v2; | |
+ | |
+ return strcmp(*d1, *d2); | |
+} | |
+ | |
+static char ** | |
+scandir(char *name) | |
+{ | |
+ char **cp; | |
+ Dir *db; | |
+ int nitems; | |
+ int fd, n; | |
+ | |
+ if ((fd = open(name, OREAD)) < 0){ | |
+ panic(mflag ? 0 : 2, "can't open %s\n", name); | |
+ return nil; | |
+ } | |
+ cp = 0; | |
+ nitems = 0; | |
+ if((n = dirreadall(fd, &db)) > 0){ | |
+ while (n--) { | |
+ cp = REALLOC(cp, char *, (nitems+1)); | |
+ cp[nitems] = MALLOC(char, strlen((db+n)->name)+1); | |
+ strcpy(cp[nitems], (db+n)->name); | |
+ nitems++; | |
+ } | |
+ free(db); | |
+ } | |
+ cp = REALLOC(cp, char*, (nitems+1)); | |
+ cp[nitems] = 0; | |
+ close(fd); | |
+ qsort((char *)cp, nitems, sizeof(char*), itemcmp); | |
+ return cp; | |
+} | |
+ | |
+static int | |
+isdotordotdot(char *p) | |
+{ | |
+ if (*p == '.') { | |
+ if (!p[1]) | |
+ return 1; | |
+ if (p[1] == '.' && !p[2]) | |
+ return 1; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+void | |
+diffdir(char *f, char *t, int level) | |
+{ | |
+ char **df, **dt, **dirf, **dirt; | |
+ char *from, *to; | |
+ int res; | |
+ char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1]; | |
+ | |
+ df = scandir(f); | |
+ dt = scandir(t); | |
+ dirf = df; | |
+ dirt = dt; | |
+ if(df == nil || dt == nil) | |
+ goto Out; | |
+ while (*df || *dt) { | |
+ from = *df; | |
+ to = *dt; | |
+ if (from && isdotordotdot(from)) { | |
+ df++; | |
+ continue; | |
+ } | |
+ if (to && isdotordotdot(to)) { | |
+ dt++; | |
+ continue; | |
+ } | |
+ if (!from) | |
+ res = 1; | |
+ else if (!to) | |
+ res = -1; | |
+ else | |
+ res = strcmp(from, to); | |
+ if (res < 0) { | |
+ if (mode == 0 || mode == 'n') | |
+ Bprint(&stdout, "Only in %s: %s\n", f, from); | |
+ df++; | |
+ continue; | |
+ } | |
+ if (res > 0) { | |
+ if (mode == 0 || mode == 'n') | |
+ Bprint(&stdout, "Only in %s: %s\n", t, to); | |
+ dt++; | |
+ continue; | |
+ } | |
+ if (mkpathname(fb, f, from)) | |
+ continue; | |
+ if (mkpathname(tb, t, to)) | |
+ continue; | |
+ diff(fb, tb, level+1); | |
+ df++; dt++; | |
+ } | |
+Out: | |
+ for (df = dirf; df && *df; df++) | |
+ FREE(*df); | |
+ for (dt = dirt; dt && *dt; dt++) | |
+ FREE(*dt); | |
+ FREE(dirf); | |
+ FREE(dirt); | |
+} | |
diff --git a/diff/diffio.c b/diff/diffio.c | |
@@ -0,0 +1,387 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+#include <ctype.h> | |
+#include "diff.h" | |
+ | |
+struct line { | |
+ int serial; | |
+ int value; | |
+}; | |
+extern struct line *file[2]; | |
+extern int len[2]; | |
+extern long *ixold, *ixnew; | |
+extern int *J; | |
+ | |
+static Biobuf *input[2]; | |
+static char *file1, *file2; | |
+static int firstchange; | |
+ | |
+#define MAXLINELEN 4096 | |
+#define MIN(x, y) ((x) < (y) ? (x): (y)) | |
+ | |
+static int | |
+readline(Biobuf *bp, char *buf) | |
+{ | |
+ int c; | |
+ char *p, *e; | |
+ | |
+ p = buf; | |
+ e = p + MAXLINELEN-1; | |
+ do { | |
+ c = Bgetc(bp); | |
+ if (c < 0) { | |
+ if (p == buf) | |
+ return -1; | |
+ break; | |
+ } | |
+ if (c == '\n') | |
+ break; | |
+ *p++ = c; | |
+ } while (p < e); | |
+ *p = 0; | |
+ if (c != '\n' && c >= 0) { | |
+ do c = Bgetc(bp); | |
+ while (c >= 0 && c != '\n'); | |
+ } | |
+ return p - buf; | |
+} | |
+ | |
+#define HALFLONG 16 | |
+#define low(x) (x&((1L<<HALFLONG)-1)) | |
+#define high(x) (x>>HALFLONG) | |
+ | |
+/* | |
+ * hashing has the effect of | |
+ * arranging line in 7-bit bytes and then | |
+ * summing 1-s complement in 16-bit hunks | |
+ */ | |
+static int | |
+readhash(Biobuf *bp, char *buf) | |
+{ | |
+ long sum; | |
+ unsigned shift; | |
+ char *p; | |
+ int len, space; | |
+ | |
+ sum = 1; | |
+ shift = 0; | |
+ if ((len = readline(bp, buf)) == -1) | |
+ return 0; | |
+ p = buf; | |
+ switch(bflag) /* various types of white space handling */ | |
+ { | |
+ case 0: | |
+ while (len--) { | |
+ sum += (long)*p++ << (shift &= (HALFLONG-1)); | |
+ shift += 7; | |
+ } | |
+ break; | |
+ case 1: | |
+ /* | |
+ * coalesce multiple white-space | |
+ */ | |
+ for (space = 0; len--; p++) { | |
+ if (isspace((uchar)*p)) { | |
+ space++; | |
+ continue; | |
+ } | |
+ if (space) { | |
+ shift += 7; | |
+ space = 0; | |
+ } | |
+ sum += (long)*p << (shift &= (HALFLONG-1)); | |
+ shift += 7; | |
+ } | |
+ break; | |
+ default: | |
+ /* | |
+ * strip all white-space | |
+ */ | |
+ while (len--) { | |
+ if (isspace((uchar)*p)) { | |
+ p++; | |
+ continue; | |
+ } | |
+ sum += (long)*p++ << (shift &= (HALFLONG-1)); | |
+ shift += 7; | |
+ } | |
+ break; | |
+ } | |
+ sum = low(sum) + high(sum); | |
+ return ((short)low(sum) + (short)high(sum)); | |
+} | |
+ | |
+Biobuf * | |
+prepare(int i, char *arg) | |
+{ | |
+ struct line *p; | |
+ int j, h; | |
+ Biobuf *bp; | |
+ char *cp, buf[MAXLINELEN]; | |
+ int nbytes; | |
+ Rune r; | |
+ | |
+ bp = Bopen(arg, OREAD); | |
+ if (!bp) { | |
+ panic(mflag ? 0: 2, "cannot open %s: %r\n", arg); | |
+ return 0; | |
+ } | |
+ if (binary) | |
+ return bp; | |
+ nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN)); | |
+ if (nbytes > 0) { | |
+ cp = buf; | |
+ while (cp < buf+nbytes-UTFmax) { | |
+ /* | |
+ * heuristic for a binary file in the | |
+ * brave new UNICODE world | |
+ */ | |
+ cp += chartorune(&r, cp); | |
+ if (r == 0 || (r > 0x7f && r <= 0xa0)) { | |
+ binary++; | |
+ return bp; | |
+ } | |
+ } | |
+ Bseek(bp, 0, 0); | |
+ } | |
+ p = MALLOC(struct line, 3); | |
+ for (j = 0; h = readhash(bp, buf); p[j].value = h) | |
+ p = REALLOC(p, struct line, (++j+3)); | |
+ len[i] = j; | |
+ file[i] = p; | |
+ input[i] = bp; /*fix*/ | |
+ if (i == 0) { /*fix*/ | |
+ file1 = arg; | |
+ firstchange = 0; | |
+ } | |
+ else | |
+ file2 = arg; | |
+ return bp; | |
+} | |
+ | |
+static int | |
+squishspace(char *buf) | |
+{ | |
+ char *p, *q; | |
+ int space; | |
+ | |
+ for (space = 0, q = p = buf; *q; q++) { | |
+ if (isspace((uchar)*q)) { | |
+ space++; | |
+ continue; | |
+ } | |
+ if (space && bflag == 1) { | |
+ *p++ = ' '; | |
+ space = 0; | |
+ } | |
+ *p++ = *q; | |
+ } | |
+ *p = 0; | |
+ return p - buf; | |
+} | |
+ | |
+/* | |
+ * need to fix up for unexpected EOF's | |
+ */ | |
+void | |
+check(Biobuf *bf, Biobuf *bt) | |
+{ | |
+ int f, t, flen, tlen; | |
+ char fbuf[MAXLINELEN], tbuf[MAXLINELEN]; | |
+ | |
+ ixold[0] = ixnew[0] = 0; | |
+ for (f = t = 1; f < len[0]; f++) { | |
+ flen = readline(bf, fbuf); | |
+ ixold[f] = ixold[f-1] + flen + 1; /* ftell(bf) … | |
+ if (J[f] == 0) | |
+ continue; | |
+ do { | |
+ tlen = readline(bt, tbuf); | |
+ ixnew[t] = ixnew[t-1] + tlen + 1; /* ftell(bt) … | |
+ } while (t++ < J[f]); | |
+ if (bflag) { | |
+ flen = squishspace(fbuf); | |
+ tlen = squishspace(tbuf); | |
+ } | |
+ if (flen != tlen || strcmp(fbuf, tbuf)) | |
+ J[f] = 0; | |
+ } | |
+ while (t < len[1]) { | |
+ tlen = readline(bt, tbuf); | |
+ ixnew[t] = ixnew[t-1] + tlen + 1; /* fseek(bt) */ | |
+ t++; | |
+ } | |
+} | |
+ | |
+static void | |
+range(int a, int b, char *separator) | |
+{ | |
+ Bprint(&stdout, "%d", a > b ? b: a); | |
+ if (a < b) | |
+ Bprint(&stdout, "%s%d", separator, b); | |
+} | |
+ | |
+static void | |
+fetch(long *f, int a, int b, Biobuf *bp, char *s) | |
+{ | |
+ char buf[MAXLINELEN]; | |
+ int maxb; | |
+ | |
+ if(a <= 1) | |
+ a = 1; | |
+ if(bp == input[0]) | |
+ maxb = len[0]; | |
+ else | |
+ maxb = len[1]; | |
+ if(b > maxb) | |
+ b = maxb; | |
+ if(a > maxb) | |
+ return; | |
+ Bseek(bp, f[a-1], 0); | |
+ while (a++ <= b) { | |
+ readline(bp, buf); | |
+ Bprint(&stdout, "%s%s\n", s, buf); | |
+ } | |
+} | |
+ | |
+typedef struct Change Change; | |
+struct Change | |
+{ | |
+ int a; | |
+ int b; | |
+ int c; | |
+ int d; | |
+}; | |
+ | |
+Change *changes; | |
+int nchanges; | |
+ | |
+void | |
+change(int a, int b, int c, int d) | |
+{ | |
+ char verb; | |
+ char buf[4]; | |
+ Change *ch; | |
+ | |
+ if (a > b && c > d) | |
+ return; | |
+ anychange = 1; | |
+ if (mflag && firstchange == 0) { | |
+ if(mode) { | |
+ buf[0] = '-'; | |
+ buf[1] = mode; | |
+ buf[2] = ' '; | |
+ buf[3] = '\0'; | |
+ } else { | |
+ buf[0] = '\0'; | |
+ } | |
+ Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2); | |
+ firstchange = 1; | |
+ } | |
+ verb = a > b ? 'a': c > d ? 'd': 'c'; | |
+ switch(mode) { | |
+ case 'e': | |
+ range(a, b, ","); | |
+ Bputc(&stdout, verb); | |
+ break; | |
+ case 0: | |
+ range(a, b, ","); | |
+ Bputc(&stdout, verb); | |
+ range(c, d, ","); | |
+ break; | |
+ case 'n': | |
+ Bprint(&stdout, "%s:", file1); | |
+ range(a, b, ","); | |
+ Bprint(&stdout, " %c ", verb); | |
+ Bprint(&stdout, "%s:", file2); | |
+ range(c, d, ","); | |
+ break; | |
+ case 'f': | |
+ Bputc(&stdout, verb); | |
+ range(a, b, " "); | |
+ break; | |
+ case 'c': | |
+ case 'a': | |
+ if(nchanges%1024 == 0) | |
+ changes = erealloc(changes, (nchanges+1024)*sizeof(cha… | |
+ ch = &changes[nchanges++]; | |
+ ch->a = a; | |
+ ch->b = b; | |
+ ch->c = c; | |
+ ch->d = d; | |
+ return; | |
+ } | |
+ Bputc(&stdout, '\n'); | |
+ if (mode == 0 || mode == 'n') { | |
+ fetch(ixold, a, b, input[0], "< "); | |
+ if (a <= b && c <= d) | |
+ Bprint(&stdout, "---\n"); | |
+ } | |
+ fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": ""); | |
+ if (mode != 0 && mode != 'n' && c <= d) | |
+ Bprint(&stdout, ".\n"); | |
+} | |
+ | |
+enum | |
+{ | |
+ Lines = 3 /* number of lines of context shown */ | |
+}; | |
+ | |
+int | |
+changeset(int i) | |
+{ | |
+ while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a) | |
+ i++; | |
+ if(i<nchanges) | |
+ return i+1; | |
+ return nchanges; | |
+} | |
+ | |
+void | |
+flushchanges(void) | |
+{ | |
+ int a, b, c, d, at; | |
+ int i, j; | |
+ | |
+ if(nchanges == 0) | |
+ return; | |
+ | |
+ for(i=0; i<nchanges; ){ | |
+ j = changeset(i); | |
+ a = changes[i].a-Lines; | |
+ b = changes[j-1].b+Lines; | |
+ c = changes[i].c-Lines; | |
+ d = changes[j-1].d+Lines; | |
+ if(a < 1) | |
+ a = 1; | |
+ if(c < 1) | |
+ c = 1; | |
+ if(b > len[0]) | |
+ b = len[0]; | |
+ if(d > len[1]) | |
+ d = len[1]; | |
+ if(mode == 'a'){ | |
+ a = 1; | |
+ b = len[0]; | |
+ c = 1; | |
+ d = len[1]; | |
+ j = nchanges; | |
+ } | |
+ Bprint(&stdout, "%s:", file1); | |
+ range(a, b, ","); | |
+ Bprint(&stdout, " - "); | |
+ Bprint(&stdout, "%s:", file2); | |
+ range(c, d, ","); | |
+ Bputc(&stdout, '\n'); | |
+ at = a; | |
+ for(; i<j; i++){ | |
+ fetch(ixold, at, changes[i].a-1, input[0], " "); | |
+ fetch(ixold, changes[i].a, changes[i].b, input[0], "- … | |
+ fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ … | |
+ at = changes[i].b+1; | |
+ } | |
+ fetch(ixold, at, b, input[0], " "); | |
+ } | |
+ nchanges = 0; | |
+} | |
diff --git a/diff/diffreg.c b/diff/diffreg.c | |
@@ -0,0 +1,420 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+#include "diff.h" | |
+ | |
+/* diff - differential file comparison | |
+* | |
+* Uses an algorithm due to Harold Stone, which finds | |
+* a pair of longest identical subsequences in the two | |
+* files. | |
+* | |
+* The major goal is to generate the match vector J. | |
+* J[i] is the index of the line in file1 corresponding | |
+* to line i file0. J[i] = 0 if there is no | |
+* such line in file1. | |
+* | |
+* Lines are hashed so as to work in core. All potential | |
+* matches are located by sorting the lines of each file | |
+* on the hash (called value). In particular, this | |
+* collects the equivalence classes in file1 together. | |
+* Subroutine equiv replaces the value of each line in | |
+* file0 by the index of the first element of its | |
+* matching equivalence in (the reordered) file1. | |
+* To save space equiv squeezes file1 into a single | |
+* array member in which the equivalence classes | |
+* are simply concatenated, except that their first | |
+* members are flagged by changing sign. | |
+* | |
+* Next the indices that point into member are unsorted into | |
+* array class according to the original order of file0. | |
+* | |
+* The cleverness lies in routine stone. This marches | |
+* through the lines of file0, developing a vector klist | |
+* of "k-candidates". At step i a k-candidate is a matched | |
+* pair of lines x,y (x in file0 y in file1) such that | |
+* there is a common subsequence of lenght k | |
+* between the first i lines of file0 and the first y | |
+* lines of file1, but there is no such subsequence for | |
+* any smaller y. x is the earliest possible mate to y | |
+* that occurs in such a subsequence. | |
+* | |
+* Whenever any of the members of the equivalence class of | |
+* lines in file1 matable to a line in file0 has serial number | |
+* less than the y of some k-candidate, that k-candidate | |
+* with the smallest such y is replaced. The new | |
+* k-candidate is chained (via pred) to the current | |
+* k-1 candidate so that the actual subsequence can | |
+* be recovered. When a member has serial number greater | |
+* that the y of all k-candidates, the klist is extended. | |
+* At the end, the longest subsequence is pulled out | |
+* and placed in the array J by unravel. | |
+* | |
+* With J in hand, the matches there recorded are | |
+* check'ed against reality to assure that no spurious | |
+* matches have crept in due to hashing. If they have, | |
+* they are broken, and "jackpot " is recorded--a harmless | |
+* matter except that a true match for a spuriously | |
+* mated line may now be unnecessarily reported as a change. | |
+* | |
+* Much of the complexity of the program comes simply | |
+* from trying to minimize core utilization and | |
+* maximize the range of doable problems by dynamically | |
+* allocating what is needed and reusing what is not. | |
+* The core requirements for problems larger than somewhat | |
+* are (in words) 2*length(file0) + length(file1) + | |
+* 3*(number of k-candidates installed), typically about | |
+* 6n words for files of length n. | |
+*/ | |
+/* TIDY THIS UP */ | |
+struct cand { | |
+ int x; | |
+ int y; | |
+ int pred; | |
+} cand; | |
+struct line { | |
+ int serial; | |
+ int value; | |
+} *file[2], line; | |
+int len[2]; | |
+int binary; | |
+struct line *sfile[2]; /*shortened by pruning common prefix and suffix*/ | |
+int slen[2]; | |
+int pref, suff; /*length of prefix and suffix*/ | |
+int *class; /*will be overlaid on file[0]*/ | |
+int *member; /*will be overlaid on file[1]*/ | |
+int *klist; /*will be overlaid on file[0] after class*/ | |
+struct cand *clist; /* merely a free storage pot for candidates */ | |
+int clen; | |
+int *J; /*will be overlaid on class*/ | |
+long *ixold; /*will be overlaid on klist*/ | |
+long *ixnew; /*will be overlaid on file[1]*/ | |
+/* END OF SOME TIDYING */ | |
+ | |
+static void | |
+sort(struct line *a, int n) /*shellsort CACM #201*/ | |
+{ | |
+ int m; | |
+ struct line *ai, *aim, *j, *k; | |
+ struct line w; | |
+ int i; | |
+ | |
+ m = 0; | |
+ for (i = 1; i <= n; i *= 2) | |
+ m = 2*i - 1; | |
+ for (m /= 2; m != 0; m /= 2) { | |
+ k = a+(n-m); | |
+ for (j = a+1; j <= k; j++) { | |
+ ai = j; | |
+ aim = ai+m; | |
+ do { | |
+ if (aim->value > ai->value || | |
+ aim->value == ai->value && | |
+ aim->serial > ai->serial) | |
+ break; | |
+ w = *ai; | |
+ *ai = *aim; | |
+ *aim = w; | |
+ | |
+ aim = ai; | |
+ ai -= m; | |
+ } while (ai > a && aim >= ai); | |
+ } | |
+ } | |
+} | |
+ | |
+static void | |
+unsort(struct line *f, int l, int *b) | |
+{ | |
+ int *a; | |
+ int i; | |
+ | |
+ a = MALLOC(int, (l+1)); | |
+ for(i=1;i<=l;i++) | |
+ a[f[i].serial] = f[i].value; | |
+ for(i=1;i<=l;i++) | |
+ b[i] = a[i]; | |
+ FREE(a); | |
+} | |
+ | |
+static void | |
+prune(void) | |
+{ | |
+ int i,j; | |
+ | |
+ for(pref=0;pref<len[0]&&pref<len[1]&& | |
+ file[0][pref+1].value==file[1][pref+1].value; | |
+ pref++ ) ; | |
+ for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&& | |
+ file[0][len[0]-suff].value==file[1][len[1]-suff].value; | |
+ suff++) ; | |
+ for(j=0;j<2;j++) { | |
+ sfile[j] = file[j]+pref; | |
+ slen[j] = len[j]-pref-suff; | |
+ for(i=0;i<=slen[j];i++) | |
+ sfile[j][i].serial = i; | |
+ } | |
+} | |
+ | |
+static void | |
+equiv(struct line *a, int n, struct line *b, int m, int *c) | |
+{ | |
+ int i, j; | |
+ | |
+ i = j = 1; | |
+ while(i<=n && j<=m) { | |
+ if(a[i].value < b[j].value) | |
+ a[i++].value = 0; | |
+ else if(a[i].value == b[j].value) | |
+ a[i++].value = j; | |
+ else | |
+ j++; | |
+ } | |
+ while(i <= n) | |
+ a[i++].value = 0; | |
+ b[m+1].value = 0; | |
+ j = 0; | |
+ while(++j <= m) { | |
+ c[j] = -b[j].serial; | |
+ while(b[j+1].value == b[j].value) { | |
+ j++; | |
+ c[j] = b[j].serial; | |
+ } | |
+ } | |
+ c[j] = -1; | |
+} | |
+ | |
+static int | |
+newcand(int x, int y, int pred) | |
+{ | |
+ struct cand *q; | |
+ | |
+ clist = REALLOC(clist, struct cand, (clen+1)); | |
+ q = clist + clen; | |
+ q->x = x; | |
+ q->y = y; | |
+ q->pred = pred; | |
+ return clen++; | |
+} | |
+ | |
+static int | |
+search(int *c, int k, int y) | |
+{ | |
+ int i, j, l; | |
+ int t; | |
+ | |
+ if(clist[c[k]].y < y) /*quick look for typical case*/ | |
+ return k+1; | |
+ i = 0; | |
+ j = k+1; | |
+ while((l=(i+j)/2) > i) { | |
+ t = clist[c[l]].y; | |
+ if(t > y) | |
+ j = l; | |
+ else if(t < y) | |
+ i = l; | |
+ else | |
+ return l; | |
+ } | |
+ return l+1; | |
+} | |
+ | |
+static int | |
+stone(int *a, int n, int *b, int *c) | |
+{ | |
+ int i, k,y; | |
+ int j, l; | |
+ int oldc, tc; | |
+ int oldl; | |
+ | |
+ k = 0; | |
+ c[0] = newcand(0,0,0); | |
+ for(i=1; i<=n; i++) { | |
+ j = a[i]; | |
+ if(j==0) | |
+ continue; | |
+ y = -b[j]; | |
+ oldl = 0; | |
+ oldc = c[0]; | |
+ do { | |
+ if(y <= clist[oldc].y) | |
+ continue; | |
+ l = search(c, k, y); | |
+ if(l!=oldl+1) | |
+ oldc = c[l-1]; | |
+ if(l<=k) { | |
+ if(clist[c[l]].y <= y) | |
+ continue; | |
+ tc = c[l]; | |
+ c[l] = newcand(i,y,oldc); | |
+ oldc = tc; | |
+ oldl = l; | |
+ } else { | |
+ c[l] = newcand(i,y,oldc); | |
+ k++; | |
+ break; | |
+ } | |
+ } while((y=b[++j]) > 0); | |
+ } | |
+ return k; | |
+} | |
+ | |
+static void | |
+unravel(int p) | |
+{ | |
+ int i; | |
+ struct cand *q; | |
+ | |
+ for(i=0; i<=len[0]; i++) { | |
+ if (i <= pref) | |
+ J[i] = i; | |
+ else if (i > len[0]-suff) | |
+ J[i] = i+len[1]-len[0]; | |
+ else | |
+ J[i] = 0; | |
+ } | |
+ for(q=clist+p;q->y!=0;q=clist+q->pred) | |
+ J[q->x+pref] = q->y+pref; | |
+} | |
+ | |
+static void | |
+output(void) | |
+{ | |
+ int m, i0, i1, j0, j1; | |
+ | |
+ m = len[0]; | |
+ J[0] = 0; | |
+ J[m+1] = len[1]+1; | |
+ if (mode != 'e') { | |
+ for (i0 = 1; i0 <= m; i0 = i1+1) { | |
+ while (i0 <= m && J[i0] == J[i0-1]+1) | |
+ i0++; | |
+ j0 = J[i0-1]+1; | |
+ i1 = i0-1; | |
+ while (i1 < m && J[i1+1] == 0) | |
+ i1++; | |
+ j1 = J[i1+1]-1; | |
+ J[i1] = j1; | |
+ change(i0, i1, j0, j1); | |
+ } | |
+ } | |
+ else { | |
+ for (i0 = m; i0 >= 1; i0 = i1-1) { | |
+ while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0]) | |
+ i0--; | |
+ j0 = J[i0+1]-1; | |
+ i1 = i0+1; | |
+ while (i1 > 1 && J[i1-1] == 0) | |
+ i1--; | |
+ j1 = J[i1-1]+1; | |
+ J[i1] = j1; | |
+ change(i1 , i0, j1, j0); | |
+ } | |
+ } | |
+ if (m == 0) | |
+ change(1, 0, 1, len[1]); | |
+ flushchanges(); | |
+} | |
+ | |
+#define BUF 4096 | |
+static int | |
+cmp(Biobuf* b1, Biobuf* b2) | |
+{ | |
+ int n; | |
+ uchar buf1[BUF], buf2[BUF]; | |
+ int f1, f2; | |
+ vlong nc = 1; | |
+ uchar *b1s, *b1e, *b2s, *b2e; | |
+ | |
+ f1 = Bfildes(b1); | |
+ f2 = Bfildes(b2); | |
+ seek(f1, 0, 0); | |
+ seek(f2, 0, 0); | |
+ b1s = b1e = buf1; | |
+ b2s = b2e = buf2; | |
+ for(;;){ | |
+ if(b1s >= b1e){ | |
+ if(b1s >= &buf1[BUF]) | |
+ b1s = buf1; | |
+ n = read(f1, b1s, &buf1[BUF] - b1s); | |
+ b1e = b1s + n; | |
+ } | |
+ if(b2s >= b2e){ | |
+ if(b2s >= &buf2[BUF]) | |
+ b2s = buf2; | |
+ n = read(f2, b2s, &buf2[BUF] - b2s); | |
+ b2e = b2s + n; | |
+ } | |
+ n = b2e - b2s; | |
+ if(n > b1e - b1s) | |
+ n = b1e - b1s; | |
+ if(n <= 0) | |
+ break; | |
+ if(memcmp((void *)b1s, (void *)b2s, n) != 0){ | |
+ return 1; | |
+ } | |
+ nc += n; | |
+ b1s += n; | |
+ b2s += n; | |
+ } | |
+ if(b1e - b1s == b2e - b2s) | |
+ return 0; | |
+ return 1; | |
+} | |
+ | |
+void | |
+diffreg(char *f, char *t) | |
+{ | |
+ Biobuf *b0, *b1; | |
+ int k; | |
+ | |
+ binary = 0; | |
+ b0 = prepare(0, f); | |
+ if (!b0) | |
+ return; | |
+ b1 = prepare(1, t); | |
+ if (!b1) { | |
+ FREE(file[0]); | |
+ Bterm(b0); | |
+ return; | |
+ } | |
+ if (binary){ | |
+ /* could use b0 and b1 but this is simpler. */ | |
+ if (cmp(b0, b1)) | |
+ print("binary files %s %s differ\n", f, t); | |
+ Bterm(b0); | |
+ Bterm(b1); | |
+ return; | |
+ } | |
+ clen = 0; | |
+ prune(); | |
+ sort(sfile[0], slen[0]); | |
+ sort(sfile[1], slen[1]); | |
+ | |
+ member = (int *)file[1]; | |
+ equiv(sfile[0], slen[0], sfile[1], slen[1], member); | |
+ member = REALLOC(member, int, slen[1]+2); | |
+ | |
+ class = (int *)file[0]; | |
+ unsort(sfile[0], slen[0], class); | |
+ class = REALLOC(class, int, slen[0]+2); | |
+ | |
+ klist = MALLOC(int, slen[0]+2); | |
+ clist = MALLOC(struct cand, 1); | |
+ k = stone(class, slen[0], member, klist); | |
+ FREE(member); | |
+ FREE(class); | |
+ | |
+ J = MALLOC(int, len[0]+2); | |
+ unravel(klist[k]); | |
+ FREE(clist); | |
+ FREE(klist); | |
+ | |
+ ixold = MALLOC(long, len[0]+2); | |
+ ixnew = MALLOC(long, len[1]+2); | |
+ Bseek(b0, 0, 0); Bseek(b1, 0, 0); | |
+ check(b0, b1); | |
+ output(); | |
+ FREE(J); FREE(ixold); FREE(ixnew); | |
+ Bterm(b0); Bterm(b1); /* ++++ */ | |
+} | |
diff --git a/diff/main.c b/diff/main.c | |
@@ -0,0 +1,270 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+#include "diff.h" | |
+ | |
+#define DIRECTORY(s) ((s)->qid.type&QTDIR) | |
+#define REGULAR_FILE(s) ((s)->type == 'M' && !DIRECTORY(… | |
+ | |
+Biobuf stdout; | |
+ | |
+static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"}; | |
+static int whichtmp; | |
+static char *progname; | |
+static char usage[] = "diff [ -acefmnbwr ] file1 ... file2\n"; | |
+ | |
+static void | |
+rmtmpfiles(void) | |
+{ | |
+ while (whichtmp > 0) { | |
+ whichtmp--; | |
+ remove(tmp[whichtmp]); | |
+ } | |
+} | |
+ | |
+void | |
+done(int status) | |
+{ | |
+ rmtmpfiles(); | |
+ switch(status) | |
+ { | |
+ case 0: | |
+ exits(""); | |
+ case 1: | |
+ exits("some"); | |
+ default: | |
+ exits("error"); | |
+ } | |
+ /*NOTREACHED*/ | |
+} | |
+ | |
+void | |
+panic(int status, char *fmt, ...) | |
+{ | |
+ va_list arg; | |
+ | |
+ Bflush(&stdout); | |
+ | |
+ fprint(2, "%s: ", progname); | |
+ va_start(arg, fmt); | |
+ vfprint(2, fmt, arg); | |
+ va_end(arg); | |
+ if (status) | |
+ done(status); | |
+ /*NOTREACHED*/ | |
+} | |
+ | |
+static int | |
+catch(void *a, char *msg) | |
+{ | |
+ USED(a); | |
+ panic(2, msg); | |
+ return 1; | |
+} | |
+ | |
+int | |
+mkpathname(char *pathname, char *path, char *name) | |
+{ | |
+ if (strlen(path) + strlen(name) > MAXPATHLEN) { | |
+ panic(0, "pathname %s/%s too long\n", path, name); | |
+ return 1; | |
+ } | |
+ sprint(pathname, "%s/%s", path, name); | |
+ return 0; | |
+} | |
+ | |
+static char * | |
+mktmpfile(int input, Dir **sb) | |
+{ | |
+ int fd, i; | |
+ char *p; | |
+ char buf[8192]; | |
+ | |
+ atnotify(catch, 1); | |
+/* | |
+ p = mktemp(tmp[whichtmp++]); | |
+ fd = create(p, OWRITE, 0600); | |
+*/ | |
+ fd = mkstemp(p=tmp[whichtmp++]); | |
+ if (fd < 0) { | |
+ panic(mflag ? 0: 2, "cannot create %s: %r\n", p); | |
+ return 0; | |
+ } | |
+ while ((i = read(input, buf, sizeof(buf))) > 0) { | |
+ if ((i = write(fd, buf, i)) < 0) | |
+ break; | |
+ } | |
+ *sb = dirfstat(fd); | |
+ close(fd); | |
+ if (i < 0) { | |
+ panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p); | |
+ return 0; | |
+ } | |
+ return p; | |
+} | |
+ | |
+static char * | |
+statfile(char *file, Dir **sb) | |
+{ | |
+ Dir *dir; | |
+ int input; | |
+ | |
+ dir = dirstat(file); | |
+ if(dir == nil) { | |
+ if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) { | |
+ panic(mflag ? 0: 2, "cannot stat %s: %r\n", file); | |
+ return 0; | |
+ } | |
+ free(dir); | |
+ return mktmpfile(0, sb); | |
+ } | |
+ else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) { | |
+ free(dir); | |
+ if ((input = open(file, OREAD)) == -1) { | |
+ panic(mflag ? 0: 2, "cannot open %s: %r\n", file); | |
+ return 0; | |
+ } | |
+ file = mktmpfile(input, sb); | |
+ close(input); | |
+ } | |
+ else | |
+ *sb = dir; | |
+ return file; | |
+} | |
+ | |
+void | |
+diff(char *f, char *t, int level) | |
+{ | |
+ char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1]; | |
+ Dir *fsb, *tsb; | |
+ | |
+ if ((fp = statfile(f, &fsb)) == 0) | |
+ goto Return; | |
+ if ((tp = statfile(t, &tsb)) == 0){ | |
+ free(fsb); | |
+ goto Return; | |
+ } | |
+ if (DIRECTORY(fsb) && DIRECTORY(tsb)) { | |
+ if (rflag || level == 0) | |
+ diffdir(fp, tp, level); | |
+ else | |
+ Bprint(&stdout, "Common subdirectories: %s and %s\n", | |
+ fp, tp); | |
+ } | |
+ else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb)) | |
+ diffreg(fp, tp); | |
+ else { | |
+ if (REGULAR_FILE(fsb)) { | |
+ if ((p = utfrrune(f, '/')) == 0) | |
+ p = f; | |
+ else | |
+ p++; | |
+ if (mkpathname(tb, tp, p) == 0) | |
+ diffreg(fp, tb); | |
+ } | |
+ else { | |
+ if ((p = utfrrune(t, '/')) == 0) | |
+ p = t; | |
+ else | |
+ p++; | |
+ if (mkpathname(fb, fp, p) == 0) | |
+ diffreg(fb, tp); | |
+ } | |
+ } | |
+ free(fsb); | |
+ free(tsb); | |
+Return: | |
+ rmtmpfiles(); | |
+} | |
+ | |
+void | |
+main(int argc, char *argv[]) | |
+{ | |
+ char *p; | |
+ int i; | |
+ Dir *fsb, *tsb; | |
+ extern int _p9usepwlibrary; | |
+ | |
+ _p9usepwlibrary = 0; | |
+ Binit(&stdout, 1, OWRITE); | |
+ progname = *argv; | |
+ while (--argc && (*++argv)[0] == '-' && (*argv)[1]) { | |
+ for (p = *argv+1; *p; p++) { | |
+ switch (*p) { | |
+ | |
+ case 'e': | |
+ case 'f': | |
+ case 'n': | |
+ case 'c': | |
+ case 'a': | |
+ mode = *p; | |
+ break; | |
+ | |
+ case 'w': | |
+ bflag = 2; | |
+ break; | |
+ | |
+ case 'b': | |
+ bflag = 1; | |
+ break; | |
+ | |
+ case 'r': | |
+ rflag = 1; | |
+ mflag = 1; | |
+ break; | |
+ | |
+ case 'm': | |
+ mflag = 1; | |
+ break; | |
+ | |
+ case 'h': | |
+ default: | |
+ progname = "Usage"; | |
+ panic(2, usage); | |
+ } | |
+ } | |
+ } | |
+ if (argc < 2) | |
+ panic(2, usage, progname); | |
+ if ((tsb = dirstat(argv[argc-1])) == nil) | |
+ panic(2, "can't stat %s\n", argv[argc-1]); | |
+ if (argc > 2) { | |
+ if (!DIRECTORY(tsb)) | |
+ panic(2, usage, progname); | |
+ mflag = 1; | |
+ } | |
+ else { | |
+ if ((fsb = dirstat(argv[0])) == nil) | |
+ panic(2, "can't stat %s\n", argv[0]); | |
+ if (DIRECTORY(fsb) && DIRECTORY(tsb)) | |
+ mflag = 1; | |
+ free(fsb); | |
+ } | |
+ free(tsb); | |
+ for (i = 0; i < argc-1; i++) | |
+ diff(argv[i], argv[argc-1], 0); | |
+ done(anychange); | |
+ /*NOTREACHED*/ | |
+} | |
+ | |
+static char noroom[] = "out of memory - try diff -h\n"; | |
+ | |
+void * | |
+emalloc(unsigned n) | |
+{ | |
+ register void *p; | |
+ | |
+ if ((p = malloc(n)) == 0) | |
+ panic(2, noroom); | |
+ return p; | |
+} | |
+ | |
+void * | |
+erealloc(void *p, unsigned n) | |
+{ | |
+ register void *rp; | |
+ | |
+ if ((rp = realloc(p, n)) == 0) | |
+ panic(2, noroom); | |
+ return rp; | |
+} | |
diff --git a/join/Makefile b/join/Makefile | |
@@ -0,0 +1,10 @@ | |
+# join - join unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = join | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/join/join.1 b/join/join.1 | |
@@ -0,0 +1,147 @@ | |
+.TH JOIN 1 | |
+.CT 1 files | |
+.SH NAME | |
+join \- relational database operator | |
+.SH SYNOPSIS | |
+.B join | |
+[ | |
+.I options | |
+] | |
+.I file1 file2 | |
+.SH DESCRIPTION | |
+.I Join | |
+forms, on the standard output, | |
+a join | |
+of the two relations specified by the lines of | |
+.I file1 | |
+and | |
+.IR file2 . | |
+If one of the file names is | |
+.LR - , | |
+the standard input is used. | |
+.PP | |
+.I File1 | |
+and | |
+.I file2 | |
+must be sorted in increasing | |
+.SM ASCII | |
+collating | |
+sequence on the fields | |
+on which they are to be joined, | |
+normally the first in each line. | |
+.PP | |
+There is one line in the output | |
+for each pair of lines in | |
+.I file1 | |
+and | |
+.I file2 | |
+that have identical join fields. | |
+The output line normally consists of the common field, | |
+then the rest of the line from | |
+.IR file1 , | |
+then the rest of the line from | |
+.IR file2 . | |
+.PP | |
+Input fields are normally separated spaces or tabs; | |
+output fields by space. | |
+In this case, multiple separators count as one, and | |
+leading separators are discarded. | |
+.PP | |
+The following options are recognized, with POSIX syntax. | |
+.TP | |
+.BI -a " n | |
+In addition to the normal output, | |
+produce a line for each unpairable line in file | |
+.IR n , | |
+where | |
+.I n | |
+is 1 or 2. | |
+.TP | |
+.BI -v " n | |
+Like | |
+.BR -a , | |
+omitting output for paired lines. | |
+.TP | |
+.BI -e " s | |
+Replace empty output fields by string | |
+.IR s . | |
+.TP | |
+.BI -1 " m | |
+.br | |
+.ns | |
+.TP | |
+.BI -2 " m | |
+Join on the | |
+.IR m th | |
+field of | |
+.I file1 | |
+or | |
+.IR file2 . | |
+.TP | |
+.BI -j "n m" | |
+Archaic equivalent for | |
+.BI - n " m"\f1. | |
+.TP | |
+.BI -o fields | |
+Each output line comprises the designated fields. | |
+The comma-separated field designators are either | |
+.BR 0 , | |
+meaning the join field, or have the form | |
+.IR n . m , | |
+where | |
+.I n | |
+is a file number and | |
+.I m | |
+is a field number. | |
+Archaic usage allows separate arguments for field designators. | |
+.PP | |
+.TP | |
+.BI -t c | |
+Use character | |
+.I c | |
+as the only separator (tab character) on input and output. | |
+Every appearance of | |
+.I c | |
+in a line is significant. | |
+.SH EXAMPLES | |
+.TP | |
+.L | |
+sort /etc/passwd | join -t: -1 1 -a 1 -e "" - bdays | |
+Add birthdays to the | |
+.B /etc/passwd | |
+file, leaving unknown | |
+birthdays empty. | |
+The layout of | |
+.B /adm/users | |
+is given in | |
+.IR passwd (5); | |
+.B bdays | |
+contains sorted lines like | |
+.LR "ken:Feb\ 4,\ 1953" . | |
+.TP | |
+.L | |
+tr : ' ' </etc/passwd | sort -k 3 3 >temp | |
+.br | |
+.ns | |
+.TP | |
+.L | |
+join -1 3 -2 3 -o 1.1,2.1 temp temp | awk '$1 < $2' | |
+Print all pairs of users with identical userids. | |
+.SH SOURCE | |
+.B \*9/src/cmd/join.c | |
+.SH "SEE ALSO" | |
+.IR sort (1), | |
+.IR comm (1), | |
+.IR awk (1) | |
+.SH BUGS | |
+With default field separation, | |
+the collating sequence is that of | |
+.BI "sort -b" | |
+.BI -k y , y\f1; | |
+with | |
+.BR -t , | |
+the sequence is that of | |
+.BI "sort -t" x | |
+.BI -k y , y\f1. | |
+.PP | |
+One of the files must be randomly accessible. | |
diff --git a/join/join.c b/join/join.c | |
@@ -0,0 +1,369 @@ | |
+/* join F1 F2 on stuff */ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <stdio.h> | |
+#include <ctype.h> | |
+#define F1 0 | |
+#define F2 1 | |
+#define F0 3 | |
+#define NFLD 100 /* max field per line */ | |
+#define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) | |
+FILE *f[2]; | |
+Rune buf[2][BUFSIZ]; /*input lines */ | |
+Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ | |
+Rune *s1,*s2; | |
+#define j1 joinj1 | |
+#define j2 joinj2 | |
+ | |
+int j1 = 1; /* join of this field of file 1 */ | |
+int j2 = 1; /* join of this field of file 2 */ | |
+int olist[2*NFLD]; /* output these fields */ | |
+int olistf[2*NFLD]; /* from these files */ | |
+int no; /* number of entries in olist */ | |
+Rune sep1 = ' '; /* default field separator */ | |
+Rune sep2 = '\t'; | |
+char *sepstr=" "; | |
+int discard; /* count of truncated lines */ | |
+Rune null[BUFSIZ]/* = L""*/; | |
+int a1; | |
+int a2; | |
+ | |
+char *getoptarg(int*, char***); | |
+void output(int, int); | |
+int input(int); | |
+void oparse(char*); | |
+void error(char*, char*); | |
+void seek1(void), seek2(void); | |
+Rune *strtorune(Rune *, char *); | |
+ | |
+ | |
+void | |
+main(int argc, char **argv) | |
+{ | |
+ int i; | |
+ | |
+ while (argc > 1 && argv[1][0] == '-') { | |
+ if (argv[1][1] == '\0') | |
+ break; | |
+ switch (argv[1][1]) { | |
+ case '-': | |
+ argc--; | |
+ argv++; | |
+ goto proceed; | |
+ case 'a': | |
+ switch(*getoptarg(&argc, &argv)) { | |
+ case '1': | |
+ a1++; | |
+ break; | |
+ case '2': | |
+ a2++; | |
+ break; | |
+ default: | |
+ error("incomplete option -a",""); | |
+ } | |
+ break; | |
+ case 'e': | |
+ strtorune(null, getoptarg(&argc, &argv)); | |
+ break; | |
+ case 't': | |
+ sepstr=getoptarg(&argc, &argv); | |
+ chartorune(&sep1, sepstr); | |
+ sep2 = sep1; | |
+ break; | |
+ case 'o': | |
+ if(argv[1][2]!=0 || | |
+ argc>2 && strchr(argv[2],',')!=0) | |
+ oparse(getoptarg(&argc, &argv)); | |
+ else for (no = 0; no<2*NFLD && argc>2; no++){ | |
+ if (argv[2][0] == '1' && argv[2][1] == '.') { | |
+ olistf[no] = F1; | |
+ olist[no] = atoi(&argv[2][2]); | |
+ } else if (argv[2][0] == '2' && argv[2][1] == … | |
+ olist[no] = atoi(&argv[2][2]); | |
+ olistf[no] = F2; | |
+ } else if (argv[2][0] == '0') | |
+ olistf[no] = F0; | |
+ else | |
+ break; | |
+ argc--; | |
+ argv++; | |
+ } | |
+ break; | |
+ case 'j': | |
+ if(argc <= 2) | |
+ break; | |
+ if (argv[1][2] == '1') | |
+ j1 = atoi(argv[2]); | |
+ else if (argv[1][2] == '2') | |
+ j2 = atoi(argv[2]); | |
+ else | |
+ j1 = j2 = atoi(argv[2]); | |
+ argc--; | |
+ argv++; | |
+ break; | |
+ case '1': | |
+ j1 = atoi(getoptarg(&argc, &argv)); | |
+ break; | |
+ case '2': | |
+ j2 = atoi(getoptarg(&argc, &argv)); | |
+ break; | |
+ } | |
+ argc--; | |
+ argv++; | |
+ } | |
+proceed: | |
+ for (i = 0; i < no; i++) | |
+ if (olist[i]-- > NFLD) /* 0 origin */ | |
+ error("field number too big in -o",""); | |
+ if (argc != 3) | |
+ error("usage: join [-1 x -2 y] [-o list] file1 file2",""); | |
+ j1--; | |
+ j2--; /* everyone else believes in 0 origin */ | |
+ s1 = ppi[F1][j1]; | |
+ s2 = ppi[F2][j2]; | |
+ if (strcmp(argv[1], "-") == 0) | |
+ f[F1] = stdin; | |
+ else if ((f[F1] = fopen(argv[1], "r")) == 0) | |
+ error("can't open %s", argv[1]); | |
+ if(strcmp(argv[2], "-") == 0) { | |
+ f[F2] = stdin; | |
+ } else if ((f[F2] = fopen(argv[2], "r")) == 0) | |
+ error("can't open %s", argv[2]); | |
+ | |
+ if(ftell(f[F2]) >= 0) | |
+ seek2(); | |
+ else if(ftell(f[F1]) >= 0) | |
+ seek1(); | |
+ else | |
+ error("neither file is randomly accessible",""); | |
+ if (discard) | |
+ error("some input line was truncated", ""); | |
+ exits(""); | |
+} | |
+int runecmp(Rune *a, Rune *b){ | |
+ while(*a==*b){ | |
+ if(*a=='\0') return 0; | |
+ a++; | |
+ b++; | |
+ } | |
+ if(*a<*b) return -1; | |
+ return 1; | |
+} | |
+char *runetostr(char *buf, Rune *r){ | |
+ char *s; | |
+ for(s=buf;*r;r++) s+=runetochar(s, r); | |
+ *s='\0'; | |
+ return buf; | |
+} | |
+Rune *strtorune(Rune *buf, char *s){ | |
+ Rune *r; | |
+ for(r=buf;*s;r++) s+=chartorune(r, s); | |
+ *r='\0'; | |
+ return buf; | |
+} | |
+/* lazy. there ought to be a clean way to combine seek1 & seek2 */ | |
+#define get1() n1=input(F1) | |
+#define get2() n2=input(F2) | |
+void | |
+seek2(void) | |
+{ | |
+ int n1, n2; | |
+ int top2=0; | |
+ int bot2 = ftell(f[F2]); | |
+ get1(); | |
+ get2(); | |
+ while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { | |
+ if(n1>0 && n2>0 && comp()>0 || n1==0) { | |
+ if(a2) output(0, n2); | |
+ bot2 = ftell(f[F2]); | |
+ get2(); | |
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | |
+ if(a1) output(n1, 0); | |
+ get1(); | |
+ } else /*(n1>0 && n2>0 && comp()==0)*/ { | |
+ while(n2>0 && comp()==0) { | |
+ output(n1, n2); | |
+ top2 = ftell(f[F2]); | |
+ get2(); | |
+ } | |
+ fseek(f[F2], bot2, 0); | |
+ get2(); | |
+ get1(); | |
+ for(;;) { | |
+ if(n1>0 && n2>0 && comp()==0) { | |
+ output(n1, n2); | |
+ get2(); | |
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | |
+ fseek(f[F2], bot2, 0); | |
+ get2(); | |
+ get1(); | |
+ } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ | |
+ fseek(f[F2], top2, 0); | |
+ bot2 = top2; | |
+ get2(); | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ } | |
+} | |
+void | |
+seek1(void) | |
+{ | |
+ int n1, n2; | |
+ int top1=0; | |
+ int bot1 = ftell(f[F1]); | |
+ get1(); | |
+ get2(); | |
+ while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { | |
+ if(n1>0 && n2>0 && comp()>0 || n1==0) { | |
+ if(a2) output(0, n2); | |
+ get2(); | |
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) { | |
+ if(a1) output(n1, 0); | |
+ bot1 = ftell(f[F1]); | |
+ get1(); | |
+ } else /*(n1>0 && n2>0 && comp()==0)*/ { | |
+ while(n2>0 && comp()==0) { | |
+ output(n1, n2); | |
+ top1 = ftell(f[F1]); | |
+ get1(); | |
+ } | |
+ fseek(f[F1], bot1, 0); | |
+ get2(); | |
+ get1(); | |
+ for(;;) { | |
+ if(n1>0 && n2>0 && comp()==0) { | |
+ output(n1, n2); | |
+ get1(); | |
+ } else if(n1>0 && n2>0 && comp()>0 || n1==0) { | |
+ fseek(f[F1], bot1, 0); | |
+ get2(); | |
+ get1(); | |
+ } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ | |
+ fseek(f[F1], top1, 0); | |
+ bot1 = top1; | |
+ get1(); | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ } | |
+} | |
+ | |
+int | |
+input(int n) /* get input line and split into fields */ | |
+{ | |
+ register int i, c; | |
+ Rune *bp; | |
+ Rune **pp; | |
+ char line[BUFSIZ]; | |
+ | |
+ bp = buf[n]; | |
+ pp = ppi[n]; | |
+ if (fgets(line, BUFSIZ, f[n]) == 0) | |
+ return(0); | |
+ strtorune(bp, line); | |
+ i = 0; | |
+ do { | |
+ i++; | |
+ if (sep1 == ' ') /* strip multiples */ | |
+ while ((c = *bp) == sep1 || c == sep2) | |
+ bp++; /* skip blanks */ | |
+ *pp++ = bp; /* record beginning */ | |
+ while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0… | |
+ bp++; | |
+ *bp++ = '\0'; /* mark end by overwriting blank */ | |
+ } while (c != '\n' && c != '\0' && i < NFLD-1); | |
+ if (c != '\n') | |
+ discard++; | |
+ | |
+ *pp = 0; | |
+ return(i); | |
+} | |
+ | |
+void | |
+output(int on1, int on2) /* print items from olist */ | |
+{ | |
+ int i; | |
+ Rune *temp; | |
+ char buf[BUFSIZ]; | |
+ | |
+ if (no <= 0) { /* default case */ | |
+ printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); | |
+ for (i = 0; i < on1; i++) | |
+ if (i != j1) | |
+ printf("%s%s", sepstr, runetostr(buf, ppi[F1][… | |
+ for (i = 0; i < on2; i++) | |
+ if (i != j2) | |
+ printf("%s%s", sepstr, runetostr(buf, ppi[F2][… | |
+ printf("\n"); | |
+ } else { | |
+ for (i = 0; i < no; i++) { | |
+ if (olistf[i]==F0 && on1>j1) | |
+ temp = ppi[F1][j1]; | |
+ else if (olistf[i]==F0 && on2>j2) | |
+ temp = ppi[F2][j2]; | |
+ else { | |
+ temp = ppi[olistf[i]][olist[i]]; | |
+ if(olistf[i]==F1 && on1<=olist[i] || | |
+ olistf[i]==F2 && on2<=olist[i] || | |
+ *temp==0) | |
+ temp = null; | |
+ } | |
+ printf("%s", runetostr(buf, temp)); | |
+ if (i == no - 1) | |
+ printf("\n"); | |
+ else | |
+ printf("%s", sepstr); | |
+ } | |
+ } | |
+} | |
+ | |
+void | |
+error(char *s1, char *s2) | |
+{ | |
+ fprintf(stderr, "join: "); | |
+ fprintf(stderr, s1, s2); | |
+ fprintf(stderr, "\n"); | |
+ exits(s1); | |
+} | |
+ | |
+char * | |
+getoptarg(int *argcp, char ***argvp) | |
+{ | |
+ int argc = *argcp; | |
+ char **argv = *argvp; | |
+ if(argv[1][2] != 0) | |
+ return &argv[1][2]; | |
+ if(argc<=2 || argv[2][0]=='-') | |
+ error("incomplete option %s", argv[1]); | |
+ *argcp = argc-1; | |
+ *argvp = ++argv; | |
+ return argv[1]; | |
+} | |
+ | |
+void | |
+oparse(char *s) | |
+{ | |
+ for (no = 0; no<2*NFLD && *s; no++, s++) { | |
+ switch(*s) { | |
+ case 0: | |
+ return; | |
+ case '0': | |
+ olistf[no] = F0; | |
+ break; | |
+ case '1': | |
+ case '2': | |
+ if(s[1] == '.' && isdigit((uchar)s[2])) { | |
+ olistf[no] = *s=='1'? F1: F2; | |
+ olist[no] = atoi(s += 2); | |
+ break; | |
+ } /* fall thru */ | |
+ default: | |
+ error("invalid -o list", ""); | |
+ } | |
+ if(s[1] == ',') | |
+ s++; | |
+ } | |
+} | |
diff --git a/lib9/utf.h b/lib9/utf.h | |
@@ -11,7 +11,8 @@ enum | |
UTFmax = 3, /* maximum bytes per rune */ | |
Runesync = 0x80, /* cannot represent part of a U… | |
Runeself = 0x80, /* rune and UTF sequences are t… | |
- Runeerror = 0xFFFD /* decoding error in UTF */ | |
+ Runeerror = 0xFFFD, /* decoding error in UTF */ | |
+ Runemax = 0x10FFFF /* maximum rune value */ | |
}; | |
/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */ | |
diff --git a/look/Makefile b/look/Makefile | |
@@ -0,0 +1,10 @@ | |
+# look - look unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = look | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/look/look.1 b/look/look.1 | |
@@ -0,0 +1,85 @@ | |
+.TH LOOK 1 | |
+.SH NAME | |
+look \- find lines in a sorted list | |
+.SH SYNOPSIS | |
+.B look | |
+[ | |
+.BI -dfnixt c | |
+] | |
+[ | |
+.I string | |
+] | |
+[ | |
+.I file | |
+] | |
+.SH DESCRIPTION | |
+.I Look | |
+consults a sorted | |
+.I file | |
+and prints all lines that begin with | |
+.IR string . | |
+It uses binary search. | |
+.PP | |
+The following options are recognized. | |
+Options | |
+.B dfnt | |
+affect comparisons as in | |
+.IR sort (1). | |
+.TP | |
+.B -i | |
+Interactive. | |
+There is no | |
+.I string | |
+argument; instead | |
+.I look | |
+takes lines from the standard input as strings to be looked up. | |
+.TP | |
+.B -x | |
+Exact. | |
+Print only lines of the file whose key matches | |
+.I string | |
+exactly. | |
+.TP | |
+.B -d | |
+`Directory' order: | |
+only letters, digits, | |
+tabs and blanks participate in comparisons. | |
+.TP | |
+.B -f | |
+Fold. | |
+Upper case letters compare equal to lower case. | |
+.TP | |
+.B -n | |
+Numeric comparison with initial string of digits, optional minus sign, | |
+and optional decimal point. | |
+.TP | |
+.BR -t [ \f2c\f1 ] | |
+Character | |
+.I c | |
+terminates the sort key in the | |
+.IR file . | |
+By default, tab terminates the key. If | |
+.I c | |
+is missing the entire line comprises the key. | |
+.PP | |
+If no | |
+.I file | |
+is specified, | |
+.B /lib/words | |
+is assumed, with collating sequence | |
+.BR df . | |
+.SH FILES | |
+.B /lib/words | |
+.SH SOURCE | |
+.B \*9/src/cmd/look.c | |
+.SH "SEE ALSO" | |
+.IR sort (1), | |
+.IR grep (1) | |
+.SH DIAGNOSTICS | |
+The exit status is | |
+.RB `` "not found" '' | |
+if no match is found, and | |
+.RB `` "no dictionary" '' | |
+if | |
+.I file | |
+or the default dictionary cannot be opened. | |
diff --git a/look/look.c b/look/look.c | |
@@ -0,0 +1,349 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+ /* Macros for Rune support of ctype.h-like functions */ | |
+ | |
+#undef isupper | |
+#undef islower | |
+#undef isalpha | |
+#undef isdigit | |
+#undef isalnum | |
+#undef isspace | |
+#undef tolower | |
+#define isupper(r) ('A' <= (r) && (r) <= 'Z') | |
+#define islower(r) ('a' <= (r) && (r) <= 'z') | |
+#define isalpha(r) (isupper(r) || islower(r)) | |
+#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF) | |
+ | |
+#define isdigit(r) ('0' <= (r) && (r) <= '9') | |
+ | |
+#define isalnum(r) (isalpha(r) || isdigit(r)) | |
+ | |
+#define isspace(r) ((r) == ' ' || (r) == '\t' \ | |
+ || (0x0A <= (r) && (r) <= 0x0D)) | |
+ | |
+#define tolower(r) ((r)-'A'+'a') | |
+ | |
+#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0)) | |
+ | |
+#define WORDSIZ 4000 | |
+char *filename = "#9/lib/words"; | |
+Biobuf *dfile; | |
+Biobuf bout; | |
+Biobuf bin; | |
+ | |
+int fold; | |
+int direc; | |
+int exact; | |
+int iflag; | |
+int rev = 1; /*-1 for reverse-ordered file, not implemented*/ | |
+int (*compare)(Rune*, Rune*); | |
+Rune tab = '\t'; | |
+Rune entry[WORDSIZ]; | |
+Rune word[WORDSIZ]; | |
+Rune key[50], orig[50]; | |
+Rune latin_fold_tab[] = | |
+{ | |
+/* Table to fold latin 1 characters to ASCII equivalents | |
+ based at Rune value 0xc0 | |
+ | |
+ À Á Â Ã Ä Å Æ Ç | |
+ È É Ê Ë Ì Í Î Ï | |
+ Ð Ñ Ò Ó Ô Õ Ö × | |
+ Ø Ù Ú Û Ü Ý Þ ß | |
+ à á â ã ä å æ ç | |
+ è é ê ë ì í î ï | |
+ ð ñ ò ó ô õ ö ÷ | |
+ ø ù ú û ü ý þ ÿ | |
+*/ | |
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', | |
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', | |
+ 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , | |
+ 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , | |
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', | |
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', | |
+ 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , | |
+ 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', | |
+}; | |
+ | |
+int locate(void); | |
+int acomp(Rune*, Rune*); | |
+int getword(Biobuf*, Rune *rp, int n); | |
+void torune(char*, Rune*); | |
+void rcanon(Rune*, Rune*); | |
+int ncomp(Rune*, Rune*); | |
+ | |
+void | |
+main(int argc, char *argv[]) | |
+{ | |
+ int n; | |
+ | |
+ filename = unsharp(filename); | |
+ | |
+ Binit(&bin, 0, OREAD); | |
+ Binit(&bout, 1, OWRITE); | |
+ compare = acomp; | |
+ ARGBEGIN{ | |
+ case 'd': | |
+ direc++; | |
+ break; | |
+ case 'f': | |
+ fold++; | |
+ break; | |
+ case 'i': | |
+ iflag++; | |
+ break; | |
+ case 'n': | |
+ compare = ncomp; | |
+ break; | |
+ case 't': | |
+ chartorune(&tab,ARGF()); | |
+ break; | |
+ case 'x': | |
+ exact++; | |
+ break; | |
+ default: | |
+ fprint(2, "%s: bad option %c\n", argv0, ARGC()); | |
+ fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0… | |
+ exits("usage"); | |
+ } ARGEND | |
+ if(!iflag){ | |
+ if(argc >= 1) { | |
+ torune(argv[0], orig); | |
+ argv++; | |
+ argc--; | |
+ } else | |
+ iflag++; | |
+ } | |
+ if(argc < 1) { | |
+ direc++; | |
+ fold++; | |
+ } else | |
+ filename = argv[0]; | |
+ if (!iflag) | |
+ rcanon(orig, key); | |
+ dfile = Bopen(filename, OREAD); | |
+ if(dfile == 0) { | |
+ fprint(2, "look: can't open %s\n", filename); | |
+ exits("no dictionary"); | |
+ } | |
+ if(!iflag) | |
+ if(!locate()) | |
+ exits("not found"); | |
+ do { | |
+ if(iflag) { | |
+ Bflush(&bout); | |
+ if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0]))) | |
+ exits(0); | |
+ rcanon(orig, key); | |
+ if(!locate()) | |
+ continue; | |
+ } | |
+ if (!exact || !acomp(word, key)) | |
+ Bprint(&bout, "%S\n", entry); | |
+ while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { | |
+ rcanon(entry, word); | |
+ n = compare(key, word); | |
+ switch(n) { | |
+ case -1: | |
+ if(exact) | |
+ break; | |
+ case 0: | |
+ if (!exact || !acomp(word, orig)) | |
+ Bprint(&bout, "%S\n", entry); | |
+ continue; | |
+ } | |
+ break; | |
+ } | |
+ } while(iflag); | |
+ exits(0); | |
+} | |
+ | |
+int | |
+locate(void) | |
+{ | |
+ vlong top, bot, mid; | |
+ int c; | |
+ int n; | |
+ | |
+ bot = 0; | |
+ top = Bseek(dfile, 0L, 2); | |
+ for(;;) { | |
+ mid = (top+bot) / 2; | |
+ Bseek(dfile, mid, 0); | |
+ do | |
+ c = Bgetrune(dfile); | |
+ while(c>=0 && c!='\n'); | |
+ mid = Boffset(dfile); | |
+ if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) | |
+ break; | |
+ rcanon(entry, word); | |
+ n = compare(key, word); | |
+ switch(n) { | |
+ case -2: | |
+ case -1: | |
+ case 0: | |
+ if(top <= mid) | |
+ break; | |
+ top = mid; | |
+ continue; | |
+ case 1: | |
+ case 2: | |
+ bot = mid; | |
+ continue; | |
+ } | |
+ break; | |
+ } | |
+ Bseek(dfile, bot, 0); | |
+ while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { | |
+ rcanon(entry, word); | |
+ n = compare(key, word); | |
+ switch(n) { | |
+ case -2: | |
+ return 0; | |
+ case -1: | |
+ if(exact) | |
+ return 0; | |
+ case 0: | |
+ return 1; | |
+ case 1: | |
+ case 2: | |
+ continue; | |
+ } | |
+ } | |
+ return 0; | |
+} | |
+ | |
+/* | |
+ * acomp(s, t) returns: | |
+ * -2 if s strictly precedes t | |
+ * -1 if s is a prefix of t | |
+ * 0 if s is the same as t | |
+ * 1 if t is a prefix of s | |
+ * 2 if t strictly precedes s | |
+ */ | |
+ | |
+int | |
+acomp(Rune *s, Rune *t) | |
+{ | |
+ int cs, ct; | |
+ | |
+ for(;;) { | |
+ cs = *s; | |
+ ct = *t; | |
+ if(cs != ct) | |
+ break; | |
+ if(cs == 0) | |
+ return 0; | |
+ s++; | |
+ t++; | |
+ } | |
+ if(cs == 0) | |
+ return -1; | |
+ if(ct == 0) | |
+ return 1; | |
+ if(cs < ct) | |
+ return -2; | |
+ return 2; | |
+} | |
+ | |
+void | |
+torune(char *old, Rune *new) | |
+{ | |
+ do old += chartorune(new, old); | |
+ while(*new++); | |
+} | |
+ | |
+void | |
+rcanon(Rune *old, Rune *new) | |
+{ | |
+ Rune r; | |
+ | |
+ while((r = *old++) && r != tab) { | |
+ if (islatin1(r) && latin_fold_tab[r-0xc0]) | |
+ r = latin_fold_tab[r-0xc0]; | |
+ if(direc) | |
+ if(!(isalnum(r) || r == ' ' || r == '\t')) | |
+ continue; | |
+ if(fold) | |
+ if(isupper(r)) | |
+ r = tolower(r); | |
+ *new++ = r; | |
+ } | |
+ *new = 0; | |
+} | |
+ | |
+int | |
+ncomp(Rune *s, Rune *t) | |
+{ | |
+ Rune *is, *it, *js, *jt; | |
+ int a, b; | |
+ int ssgn, tsgn; | |
+ | |
+ while(isspace(*s)) | |
+ s++; | |
+ while(isspace(*t)) | |
+ t++; | |
+ ssgn = tsgn = -2*rev; | |
+ if(*s == '-') { | |
+ s++; | |
+ ssgn = -ssgn; | |
+ } | |
+ if(*t == '-') { | |
+ t++; | |
+ tsgn = -tsgn; | |
+ } | |
+ for(is = s; isdigit(*is); is++) | |
+ ; | |
+ for(it = t; isdigit(*it); it++) | |
+ ; | |
+ js = is; | |
+ jt = it; | |
+ a = 0; | |
+ if(ssgn == tsgn) | |
+ while(it>t && is>s) | |
+ if(b = *--it - *--is) | |
+ a = b; | |
+ while(is > s) | |
+ if(*--is != '0') | |
+ return -ssgn; | |
+ while(it > t) | |
+ if(*--it != '0') | |
+ return tsgn; | |
+ if(a) | |
+ return sgn(a)*ssgn; | |
+ if(*(s=js) == '.') | |
+ s++; | |
+ if(*(t=jt) == '.') | |
+ t++; | |
+ if(ssgn == tsgn) | |
+ while(isdigit(*s) && isdigit(*t)) | |
+ if(a = *t++ - *s++) | |
+ return sgn(a)*ssgn; | |
+ while(isdigit(*s)) | |
+ if(*s++ != '0') | |
+ return -ssgn; | |
+ while(isdigit(*t)) | |
+ if(*t++ != '0') | |
+ return tsgn; | |
+ return 0; | |
+} | |
+ | |
+int | |
+getword(Biobuf *f, Rune *rp, int n) | |
+{ | |
+ long c; | |
+ | |
+ while(n-- > 0) { | |
+ c = Bgetrune(f); | |
+ if(c < 0) | |
+ return 0; | |
+ if(c == '\n') { | |
+ *rp = '\0'; | |
+ return 1; | |
+ } | |
+ *rp++ = c; | |
+ } | |
+ fprint(2, "Look: word too long. Bailing out.\n"); | |
+ return 0; | |
+} | |
diff --git a/pbd/Makefile b/pbd/Makefile | |
@@ -0,0 +1,10 @@ | |
+# pbd - pbd unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = pbd | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/pbd/pbd.1 b/pbd/pbd.1 | |
diff --git a/pbd/pbd.c b/pbd/pbd.c | |
@@ -0,0 +1,19 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+ | |
+void | |
+main(void) | |
+{ | |
+ char buf[512], *p; | |
+ | |
+ p = "???"; | |
+ if(getwd(buf, sizeof buf)){ | |
+ p = strrchr(buf, '/'); | |
+ if(p == nil) | |
+ p = buf; | |
+ else if(p>buf || p[1]!='\0') | |
+ p++; | |
+ } | |
+ write(1, p, strlen(p)); | |
+ exits(0); | |
+} | |
diff --git a/rc/Makefile b/rc/Makefile | |
@@ -46,7 +46,7 @@ uninstall: | |
@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c | |
clean: | |
- rm -f ${OFILES} ${TARG} y.tab.c y.tab.h | |
+ rm -f ${OFILES} ${TARG} y.tab.c y.tab.h x.tab.h | |
${TARG}: ${OFILES} | |
@echo LD ${TARG} | |
diff --git a/split/Makefile b/split/Makefile | |
@@ -0,0 +1,10 @@ | |
+# split - split unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = split | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/split/split.1 b/split/split.1 | |
@@ -0,0 +1,82 @@ | |
+.TH SPLIT 1 | |
+.CT 1 files | |
+.SH NAME | |
+split \- split a file into pieces | |
+.SH SYNOPSIS | |
+.B split | |
+[ | |
+.I option ... | |
+] | |
+[ | |
+.I file | |
+] | |
+.SH DESCRIPTION | |
+.I Split | |
+reads | |
+.I file | |
+(standard input by default) | |
+and writes it in pieces of 1000 | |
+lines per output file. | |
+The names of the | |
+output files are | |
+.BR xaa , | |
+.BR xab , | |
+and so on to | |
+.BR xzz . | |
+The options are | |
+.TP | |
+.BI -n " n" | |
+Split into | |
+.IR n -line | |
+pieces. | |
+.TP | |
+.BI -l " n" | |
+Synonym for | |
+.B -n | |
+.IR n , | |
+a nod to Unix's syntax. | |
+.TP | |
+.BI -e " expression" | |
+File divisions occur at each line | |
+that matches a regular | |
+.IR expression ; | |
+see | |
+.IR regexp (7). | |
+Multiple | |
+.B -e | |
+options may appear. | |
+If a subexpression of | |
+.I expression | |
+is contained in parentheses | |
+.BR ( ... ) , | |
+the output file name is the portion of the | |
+line which matches the subexpression. | |
+.TP | |
+.BI -f " stem | |
+Use | |
+.I stem | |
+instead of | |
+.B x | |
+in output file names. | |
+.TP | |
+.BI -s " suffix | |
+Append | |
+.I suffix | |
+to names identified under | |
+.BR -e . | |
+.TP | |
+.B -x | |
+Exclude the matched input line from the output file. | |
+.TP | |
+.B -i | |
+Ignore case in option | |
+.BR -e ; | |
+force output file names (excluding the suffix) | |
+to lower case. | |
+.SH SOURCE | |
+.B \*9/src/cmd/split.c | |
+.SH SEE ALSO | |
+.IR sed (1), | |
+.IR awk (1), | |
+.IR grep (1), | |
+.IR regexp (7) | |
diff --git a/split/split.c b/split/split.c | |
@@ -0,0 +1,189 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+#include <ctype.h> | |
+#include <regexp.h> | |
+ | |
+char digit[] = "0123456789"; | |
+char *suffix = ""; | |
+char *stem = "x"; | |
+char suff[] = "aa"; | |
+char name[200]; | |
+Biobuf bout; | |
+Biobuf *output = &bout; | |
+ | |
+extern int nextfile(void); | |
+extern int matchfile(Resub*); | |
+extern void openf(void); | |
+extern char *fold(char*,int); | |
+extern void usage(void); | |
+extern void badexp(void); | |
+ | |
+void | |
+main(int argc, char *argv[]) | |
+{ | |
+ Reprog *exp; | |
+ char *pattern = 0; | |
+ int n = 1000; | |
+ char *line; | |
+ int xflag = 0; | |
+ int iflag = 0; | |
+ Biobuf bin; | |
+ Biobuf *b = &bin; | |
+ char buf[256]; | |
+ | |
+ ARGBEGIN { | |
+ case 'l': | |
+ case 'n': | |
+ n=atoi(EARGF(usage())); | |
+ break; | |
+ case 'e': | |
+ pattern = strdup(EARGF(usage())); | |
+ break; | |
+ case 'f': | |
+ stem = strdup(EARGF(usage())); | |
+ break; | |
+ case 's': | |
+ suffix = strdup(EARGF(usage())); | |
+ break; | |
+ case 'x': | |
+ xflag++; | |
+ break; | |
+ case 'i': | |
+ iflag++; | |
+ break; | |
+ default: | |
+ usage(); | |
+ break; | |
+ | |
+ } ARGEND; | |
+ | |
+ if(argc < 0 || argc > 1) | |
+ usage(); | |
+ | |
+ if(argc != 0) { | |
+ b = Bopen(argv[0], OREAD); | |
+ if(b == nil) { | |
+ fprint(2, "split: can't open %s: %r\n", argv[0]); | |
+ exits("open"); | |
+ } | |
+ } else | |
+ Binit(b, 0, OREAD); | |
+ | |
+ if(pattern) { | |
+ if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): patte… | |
+ badexp(); | |
+ while((line=Brdline(b,'\n')) != 0) { | |
+ Resub match[2]; | |
+ memset(match, 0, sizeof match); | |
+ line[Blinelen(b)-1] = 0; | |
+ if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,mat… | |
+ if(matchfile(match) && xflag) | |
+ continue; | |
+ } else if(output == 0) | |
+ nextfile(); /* at most once */ | |
+ Bwrite(output, line, Blinelen(b)-1); | |
+ Bputc(output, '\n'); | |
+ } | |
+ } else { | |
+ int linecnt = n; | |
+ | |
+ while((line=Brdline(b,'\n')) != 0) { | |
+ if(++linecnt > n) { | |
+ nextfile(); | |
+ linecnt = 1; | |
+ } | |
+ Bwrite(output, line, Blinelen(b)); | |
+ } | |
+ | |
+ /* | |
+ * in case we didn't end with a newline, tack whatever's | |
+ * left onto the last file | |
+ */ | |
+ while((n = Bread(b, buf, sizeof(buf))) > 0) | |
+ Bwrite(output, buf, n); | |
+ } | |
+ if(b != nil) | |
+ Bterm(b); | |
+ exits(0); | |
+} | |
+ | |
+int | |
+nextfile(void) | |
+{ | |
+ static int canopen = 1; | |
+ if(suff[0] > 'z') { | |
+ if(canopen) | |
+ fprint(2, "split: file %szz not split\n",stem); | |
+ canopen = 0; | |
+ } else { | |
+ strcpy(name, stem); | |
+ strcat(name, suff); | |
+ if(++suff[1] > 'z') | |
+ suff[1] = 'a', ++suff[0]; | |
+ openf(); | |
+ } | |
+ return canopen; | |
+} | |
+ | |
+int | |
+matchfile(Resub *match) | |
+{ | |
+ if(match[1].s.sp) { | |
+ int len = match[1].e.ep - match[1].s.sp; | |
+ strncpy(name, match[1].s.sp, len); | |
+ strcpy(name+len, suffix); | |
+ openf(); | |
+ return 1; | |
+ } | |
+ return nextfile(); | |
+} | |
+ | |
+void | |
+openf(void) | |
+{ | |
+ static int fd = 0; | |
+ Bflush(output); | |
+ Bterm(output); | |
+ if(fd > 0) | |
+ close(fd); | |
+ fd = create(name,OWRITE,0666); | |
+ if(fd < 0) { | |
+ fprint(2, "grep: can't create %s: %r\n", name); | |
+ exits("create"); | |
+ } | |
+ Binit(output, fd, OWRITE); | |
+} | |
+ | |
+char * | |
+fold(char *s, int n) | |
+{ | |
+ static char *fline; | |
+ static int linesize = 0; | |
+ char *t; | |
+ | |
+ if(linesize < n+1){ | |
+ fline = realloc(fline,n+1); | |
+ linesize = n+1; | |
+ } | |
+ for(t=fline; *t++ = tolower((uchar)*s++); ) | |
+ continue; | |
+ /* we assume the 'A'-'Z' only appear as themselves | |
+ * in a utf encoding. | |
+ */ | |
+ return fline; | |
+} | |
+ | |
+void | |
+usage(void) | |
+{ | |
+ fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i… | |
+ exits("usage"); | |
+} | |
+ | |
+void | |
+badexp(void) | |
+{ | |
+ fprint(2, "split: bad regular expression\n"); | |
+ exits("bad regular expression"); | |
+} | |
diff --git a/strings/Makefile b/strings/Makefile | |
@@ -0,0 +1,10 @@ | |
+# strings - strings unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = strings | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/strings/strings.1 b/strings/strings.1 | |
@@ -0,0 +1,28 @@ | |
+.TH STRINGS 1 | |
+.SH NAME | |
+strings \- extract printable strings | |
+.SH SYNOPSIS | |
+.B strings | |
+[ | |
+.I file ... | |
+] | |
+.SH DESCRIPTION | |
+.I Strings | |
+finds and prints strings containing 6 or more | |
+consecutive printable UTF-encoded characters | |
+in a (typically) binary file, default | |
+standard input. | |
+Printable characters are taken to be | |
+.SM ASCII | |
+characters from blank through tilde (hexadecimal 20 through 7E), inclusive, | |
+and | |
+all other characters from value 00A0 to FFFF. | |
+Strings reports | |
+the decimal offset within the file at which the string starts and the text | |
+of the string. If the string is longer than 70 runes the line is | |
+terminated by three dots and the printing is resumed on the next | |
+line with the offset of the continuation line. | |
+.SH SOURCE | |
+.B \*9/src/cmd/strings.c | |
+.SH SEE ALSO | |
+.IR nm (1) | |
diff --git a/strings/strings.c b/strings/strings.c | |
@@ -0,0 +1,90 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+ | |
+Biobuf *fin; | |
+Biobuf fout; | |
+ | |
+#define MINSPAN 6 /* Min characters in st… | |
+ | |
+#define BUFSIZE 70 | |
+ | |
+void stringit(char *); | |
+#undef isprint | |
+#define isprint risprint | |
+int isprint(Rune); | |
+ | |
+void | |
+main(int argc, char **argv) | |
+{ | |
+ int i; | |
+ | |
+ Binit(&fout, 1, OWRITE); | |
+ if(argc < 2) { | |
+ stringit("/dev/stdin"); | |
+ exits(0); | |
+ } | |
+ | |
+ for(i = 1; i < argc; i++) { | |
+ if(argc > 2) | |
+ print("%s:\n", argv[i]); | |
+ | |
+ stringit(argv[i]); | |
+ } | |
+ | |
+ exits(0); | |
+} | |
+ | |
+void | |
+stringit(char *str) | |
+{ | |
+ long posn, start; | |
+ int cnt = 0; | |
+ long c; | |
+ | |
+ Rune buf[BUFSIZE]; | |
+ | |
+ if ((fin = Bopen(str, OREAD)) == 0) { | |
+ perror("open"); | |
+ return; | |
+ } | |
+ | |
+ start = 0; | |
+ posn = Boffset(fin); | |
+ while((c = Bgetrune(fin)) >= 0) { | |
+ if(isprint(c)) { | |
+ if(start == 0) | |
+ start = posn; | |
+ buf[cnt++] = c; | |
+ if(cnt == BUFSIZE-1) { | |
+ buf[cnt] = 0; | |
+ Bprint(&fout, "%8ld: %S ...\n", start, buf); | |
+ start = 0; | |
+ cnt = 0; | |
+ } | |
+ } else { | |
+ if(cnt >= MINSPAN) { | |
+ buf[cnt] = 0; | |
+ Bprint(&fout, "%8ld: %S\n", start, buf); | |
+ } | |
+ start = 0; | |
+ cnt = 0; | |
+ } | |
+ posn = Boffset(fin); | |
+ } | |
+ | |
+ if(cnt >= MINSPAN){ | |
+ buf[cnt] = 0; | |
+ Bprint(&fout, "%8ld: %S\n", start, buf); | |
+ } | |
+ Bterm(fin); | |
+} | |
+ | |
+int | |
+isprint(Rune r) | |
+{ | |
+ if ((r >= ' ' && r <0x7f) || r > 0xA0) | |
+ return 1; | |
+ else | |
+ return 0; | |
+} | |
diff --git a/unicode/Makefile b/unicode/Makefile | |
@@ -0,0 +1,10 @@ | |
+# unicode - unicode unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = unicode | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/unicode/unicode.1 b/unicode/unicode.1 | |
diff --git a/unicode/unicode.c b/unicode/unicode.c | |
@@ -0,0 +1,122 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+ | |
+char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] c… | |
+char hex[] = "0123456789abcdefABCDEF"; | |
+int numout = 0; | |
+int text = 0; | |
+char *err; | |
+Biobuf bout; | |
+ | |
+char *range(char*[]); | |
+char *nums(char*[]); | |
+char *chars(char*[]); | |
+ | |
+void | |
+main(int argc, char *argv[]) | |
+{ | |
+ ARGBEGIN{ | |
+ case 'n': | |
+ numout = 1; | |
+ break; | |
+ case 't': | |
+ text = 1; | |
+ break; | |
+ }ARGEND | |
+ Binit(&bout, 1, OWRITE); | |
+ if(argc == 0){ | |
+ fprint(2, "usage: %s\n", usage); | |
+ exits("usage"); | |
+ } | |
+ if(!numout && utfrune(argv[0], '-')) | |
+ exits(range(argv)); | |
+ if(numout || strchr(hex, argv[0][0])==0) | |
+ exits(nums(argv)); | |
+ exits(chars(argv)); | |
+} | |
+ | |
+char* | |
+range(char *argv[]) | |
+{ | |
+ char *q; | |
+ int min, max; | |
+ int i; | |
+ | |
+ while(*argv){ | |
+ q = *argv; | |
+ if(strchr(hex, q[0]) == 0){ | |
+ err: | |
+ fprint(2, "unicode: bad range %s\n", *argv); | |
+ return "bad range"; | |
+ } | |
+ min = strtoul(q, &q, 16); | |
+ if(min<0 || min>Runemax || *q!='-') | |
+ goto err; | |
+ q++; | |
+ if(strchr(hex, *q) == 0) | |
+ goto err; | |
+ max = strtoul(q, &q, 16); | |
+ if(max<0 || max>Runemax || max<min || *q!=0) | |
+ goto err; | |
+ i = 0; | |
+ do{ | |
+ Bprint(&bout, "%.4x %C", min, min); | |
+ i++; | |
+ if(min==max || (i&7)==0) | |
+ Bprint(&bout, "\n"); | |
+ else | |
+ Bprint(&bout, "\t"); | |
+ min++; | |
+ }while(min<=max); | |
+ argv++; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+char* | |
+nums(char *argv[]) | |
+{ | |
+ char *q; | |
+ Rune r; | |
+ int w; | |
+ | |
+ while(*argv){ | |
+ q = *argv; | |
+ while(*q){ | |
+ w = chartorune(&r, q); | |
+ if(r==0x80 && (q[0]&0xFF)!=0x80){ | |
+ fprint(2, "unicode: invalid utf string %s\n", … | |
+ return "bad utf"; | |
+ } | |
+ Bprint(&bout, "%.4x\n", r); | |
+ q += w; | |
+ } | |
+ argv++; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+char* | |
+chars(char *argv[]) | |
+{ | |
+ char *q; | |
+ int m; | |
+ | |
+ while(*argv){ | |
+ q = *argv; | |
+ if(strchr(hex, q[0]) == 0){ | |
+ err: | |
+ fprint(2, "unicode: bad unicode value %s\n", *argv); | |
+ return "bad char"; | |
+ } | |
+ m = strtoul(q, &q, 16); | |
+ if(m<0 || m>Runemax || *q!=0) | |
+ goto err; | |
+ Bprint(&bout, "%C", m); | |
+ if(!text) | |
+ Bprint(&bout, "\n"); | |
+ argv++; | |
+ } | |
+ return 0; | |
+} | |
diff --git a/unutf/Makefile b/unutf/Makefile | |
@@ -0,0 +1,10 @@ | |
+# unutf - unutf unix port from plan9 | |
+# Depends on ../lib9 | |
+ | |
+TARG = unutf | |
+ | |
+include ../std.mk | |
+ | |
+pre-uninstall: | |
+ | |
+post-install: | |
diff --git a/unutf/unutf.1 b/unutf/unutf.1 | |
diff --git a/unutf/unutf.c b/unutf/unutf.c | |
@@ -0,0 +1,20 @@ | |
+/* | |
+ * stupid little program to pipe unicode chars through | |
+ * when converting to non-utf compilers. | |
+ */ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+ | |
+Biobuf bin; | |
+ | |
+void | |
+main(void) | |
+{ | |
+ int c; | |
+ | |
+ Binit(&bin, 0, OREAD); | |
+ while((c = Bgetrune(&bin)) >= 0) | |
+ print("0x%ux\n", c); | |
+ exits(0); | |
+} |