Introduction
Introduction Statistics Contact Development Disclaimer Help
added commands as discussed with Uriel yesterday - 9base - revived minimalist p…
git clone git://git.suckless.org/9base
Log
Files
Refs
README
LICENSE
---
commit fa62640154da08c5fd229af50efde0d33871a0aa
parent 85bacddf7706d2c89c30c2433fb8c43cd794cdb5
Author: Anselm R Garbe <[email protected]>
Date: Fri, 28 May 2010 11:30:17 +0100
added commands as discussed with Uriel yesterday
Diffstat:
M Makefile | 53 +++++++++++++++++++++++++++++…
D TODO | 11 -----------
A ascii/Makefile | 10 ++++++++++
A ascii/ascii.1 | 160 +++++++++++++++++++++++++++++…
A ascii/ascii.c | 181 +++++++++++++++++++++++++++++…
A cmp/Makefile | 10 ++++++++++
A cmp/cmp.1 | 57 +++++++++++++++++++++++++++++…
A cmp/cmp.c | 112 +++++++++++++++++++++++++++++…
A dd/Makefile | 10 ++++++++++
A dd/dd.1 | 0
A dd/dd.c | 660 +++++++++++++++++++++++++++++…
A diff/Makefile | 35 +++++++++++++++++++++++++++++…
A diff/diff.1 | 163 +++++++++++++++++++++++++++++…
A diff/diff.h | 27 +++++++++++++++++++++++++++
A diff/diffdir.c | 113 +++++++++++++++++++++++++++++…
A diff/diffio.c | 387 +++++++++++++++++++++++++++++…
A diff/diffreg.c | 420 +++++++++++++++++++++++++++++…
A diff/main.c | 270 +++++++++++++++++++++++++++++…
A join/Makefile | 10 ++++++++++
A join/join.1 | 147 +++++++++++++++++++++++++++++…
A join/join.c | 369 ++++++++++++++++++++++++++++++
M lib9/utf.h | 3 ++-
A look/Makefile | 10 ++++++++++
A look/look.1 | 85 +++++++++++++++++++++++++++++…
A look/look.c | 349 +++++++++++++++++++++++++++++…
A pbd/Makefile | 10 ++++++++++
A pbd/pbd.1 | 0
A pbd/pbd.c | 19 +++++++++++++++++++
M rc/Makefile | 2 +-
A split/Makefile | 10 ++++++++++
A split/split.1 | 82 +++++++++++++++++++++++++++++…
A split/split.c | 189 +++++++++++++++++++++++++++++…
A strings/Makefile | 10 ++++++++++
A strings/strings.1 | 28 ++++++++++++++++++++++++++++
A strings/strings.c | 90 +++++++++++++++++++++++++++++…
A unicode/Makefile | 10 ++++++++++
A unicode/unicode.1 | 0
A unicode/unicode.c | 122 +++++++++++++++++++++++++++++…
A unutf/Makefile | 10 ++++++++++
A unutf/unutf.1 | 0
A unutf/unutf.c | 20 ++++++++++++++++++++
41 files changed, 4238 insertions(+), 16 deletions(-)
---
diff --git a/Makefile b/Makefile
@@ -2,9 +2,56 @@
include config.mk
-SUBDIRS = lib9 yacc awk basename bc cal cat cleanname date dc du dd echo ed \
- factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \
- rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq
+SUBDIRS = lib9\
+ yacc\
+ ascii\
+ awk\
+ basename\
+ bc\
+ cal\
+ cat\
+ cleanname\
+ cmp\
+ date\
+ dc\
+ du\
+ dd\
+ diff\
+ echo\
+ ed\
+ factor\
+ fortune\
+ fmt\
+ freq\
+ getflags\
+ grep\
+ hoc\
+ join\
+ look\
+ ls\
+ mk\
+ mkdir\
+ mtime\
+ pbd\
+ primes\
+ rc\
+ read\
+ sha1sum\
+ sed\
+ seq\
+ sleep\
+ sort\
+ split\
+ strings\
+ tail\
+ tee\
+ test\
+ touch\
+ tr\
+ troff\
+ unicode\
+ uniq\
+ unutf\
all:
@echo 9base build options:
diff --git a/TODO b/TODO
@@ -1,11 +0,0 @@
-12:13 < uriel> garbeam: add dd and diff too
-12:13 < uriel> and split
-12:14 < uriel> (and join)
-12:15 < uriel> and unutf (which I just noticed, seems to be undocumented, but …
-12:15 < uriel> and tcs
-12:16 < uriel> and strings
-12:18 < uriel> oh, oh, I'm finding some great bits:
-12:18 < uriel> look(1), ascii(1) and unicode(1)
-12:19 < uriel> ok, and cmp(1) is missing too
-12:23 < uriel> hah! plan9/src/cmd/index/ is really interesting (but not worth …
-12:26 < uriel> oh! pbd! what a wonderful discovery, we certainly should add it…
diff --git a/ascii/Makefile b/ascii/Makefile
@@ -0,0 +1,10 @@
+# ascii - ascii unix port from plan9
+# Depends on ../lib9
+
+TARG = ascii
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/ascii/ascii.1 b/ascii/ascii.1
@@ -0,0 +1,160 @@
+.TH ASCII 1
+.SH NAME
+ascii, unicode \- interpret ASCII, Unicode characters
+.SH SYNOPSIS
+.B ascii
+[
+.B -8
+]
+[
+.BI -oxdb n
+]
+[
+.B -nct
+]
+[
+.I text
+]
+.PP
+.B unicode
+[
+.B -nt
+]
+.IB hexmin - hexmax
+.PP
+.B unicode
+[
+.B -t
+]
+.I hex
+[
+\&...
+]
+.PP
+.B unicode
+[
+.B -n
+]
+.I characters
+.PP
+.B look
+.I hex
+.B \*9/lib/unicode
+.SH DESCRIPTION
+.I Ascii
+prints the
+.SM ASCII
+values corresponding to characters and
+.I vice
+.IR versa ;
+under the
+.B -8
+option, the
+.SM ISO
+Latin-1 extensions (codes 0200-0377) are included.
+The values are interpreted in a settable numeric base;
+.B -o
+specifies octal,
+.B -d
+decimal,
+.B -x
+hexadecimal (the default), and
+.BI -b n
+base
+.IR n .
+.PP
+With no arguments,
+.I ascii
+prints a table of the character set in the specified base.
+Characters of
+.I text
+are converted to their
+.SM ASCII
+values, one per line. If, however, the first
+.I text
+argument is a valid number in the specified base, conversion
+goes the opposite way.
+Control characters are printed as two- or three-character mnemonics.
+Other options are:
+.TP
+.B -n
+Force numeric output.
+.TP
+.B -c
+Force character output.
+.TP
+.B -t
+Convert from numbers to running text; do not interpret
+control characters or insert newlines.
+.PP
+.I Unicode
+is similar; it converts between
+.SM UTF
+and character values from the Unicode Standard (see
+.IR utf (7)).
+If given a range of hexadecimal numbers,
+.I unicode
+prints a table of the specified Unicode characters \(em their values and
+.SM UTF
+representations.
+Otherwise it translates from
+.SM UTF
+to numeric value or vice versa,
+depending on the appearance of the supplied text;
+the
+.B -n
+option forces numeric output to avoid ambiguity with numeric characters.
+If converting to
+.SM UTF ,
+the characters are printed one per line unless the
+.B -t
+flag is set, in which case the output is a single string
+containing only the specified characters.
+Unlike
+.IR ascii ,
+.I unicode
+treats no characters specially.
+.PP
+The output of
+.I ascii
+and
+.I unicode
+may be unhelpful if the characters printed are not available in the current fo…
+.PP
+The file
+.B \*9/lib/unicode
+contains a
+table of characters and descriptions, sorted in hexadecimal order,
+suitable for
+.IR look (1)
+on the lower case
+.I hex
+values of characters.
+.SH EXAMPLES
+.TP
+.B "ascii -d"
+Print the
+.SM ASCII
+table base 10.
+.TP
+.B "unicode p"
+Print the hex value of `p'.
+.TP
+.B "unicode 2200-22f1"
+Print a table of miscellaneous mathematical symbols.
+.TP
+.B "look 039 \*9/lib/unicode"
+See the start of the Greek alphabet's encoding in the Unicode Standard.
+.SH FILES
+.TP
+.B \*9/lib/unicode
+table of characters and descriptions.
+.SH SOURCE
+.B \*9/src/cmd/ascii.c
+.br
+.B \*9/src/cmd/unicode.c
+.SH "SEE ALSO"
+.IR look (1),
+.IR tcs (1),
+.IR utf (7),
+.IR font (7)
diff --git a/ascii/ascii.c b/ascii/ascii.c
@@ -0,0 +1,181 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+#define MAXBASE 36
+
+void usage(void);
+void put(int);
+void putn(int, int);
+void puttext(char *);
+void putnum(char *);
+int btoi(char *);
+int value(int, int);
+int isnum(char *);
+
+char *str[256]={
+ "nul", "soh", "stx", "etx", "eot", …
+ "bs ", "ht ", "nl ", "vt ", "np ", …
+ "dle", "dc1", "dc2", "dc3", "dc4", …
+ "can", "em ", "sub", "esc", "fs ", …
+ "sp ", " ! ", " \" ", " # ", " $ ", …
+ " ( ", " ) ", " * ", " + ", " , ", …
+ " 0 ", " 1 ", " 2 ", " 3 ", " 4 ", …
+ " 8 ", " 9 ", " : ", " ; ", " < ", …
+ " @ ", " A ", " B ", " C ", " D ", …
+ " H ", " I ", " J ", " K ", " L ", …
+ " P ", " Q ", " R ", " S ", " T ", …
+ " X ", " Y ", " Z ", " [ ", " \\ ", …
+ " ` ", " a ", " b ", " c ", " d ", …
+ " h ", " i ", " j ", " k ", " l ", …
+ " p ", " q ", " r ", " s ", " t ", …
+ " x ", " y ", " z ", " { ", " | ", …
+ "x80", "x81", "x82", "x83", "x84", …
+ "x88", "x89", "x8a", "x8b", "x8c", …
+ "x90", "x91", "x92", "x93", "x94", …
+ "x98", "x99", "x9a", "x9b", "x9c", …
+ "xa0", " ¡ ", " ¢ ", " £ ", " ¤ ", …
+ " ¨ ", " © ", " ª ", " « ", " ¬ ", …
+ " ° ", " ± ", " ² ", " ³ ", " ´ ", …
+ " ¸ ", " ¹ ", " º ", " » ", " ¼ ", …
+ " À ", " Á ", " Â ", " Ã ", " Ä ", …
+ " È ", " É ", " Ê ", " Ë ", " Ì ", …
+ " Ð ", " Ñ ", " Ò ", " Ó ", " Ô ", …
+ " Ø ", " Ù ", " Ú ", " Û ", " Ü ", …
+ " à ", " á ", " â ", " ã ", " ä ", …
+ " è ", " é ", " ê ", " ë ", " ì ", …
+ " ð ", " ñ ", " ò ", " ó ", " ô ", …
+ " ø ", " ù ", " ú ", " û ", " ü ", …
+};
+
+char Ncol[]={
+ 0,0,7,5,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,…
+};
+
+int nchars=128;
+int base=16;
+int ncol;
+int text=1;
+int strip=0;
+Biobuf bin;
+
+void
+main(int argc, char **argv)
+{
+ int i;
+
+ Binit(&bin, 1, OWRITE);
+ ARGBEGIN{
+ case '8':
+ nchars=256; break;
+ case 'x':
+ base=16; break;
+ case 'o':
+ base=8; break;
+ case 'd':
+ base=10; break;
+ case 'b':
+ base=strtoul(EARGF(usage()), 0, 0);
+ if(base<2||base>MAXBASE)
+ usage();
+ break;
+ case 'n':
+ text=0; break;
+ case 't':
+ strip=1;
+ /* fall through */
+ case 'c':
+ text=2; break;
+ default:
+ usage();
+ }ARGEND
+
+ ncol=Ncol[base];
+ if(argc==0){
+ for(i=0;i<nchars;i++){
+ put(i);
+ if((i&7)==7)
+ Bprint(&bin, "|\n");
+ }
+ }else{
+ if(text==1)
+ text=isnum(argv[0]);
+ while(argc--)
+ if(text)
+ puttext(*argv++);
+ else
+ putnum(*argv++);
+ }
+ Bputc(&bin, '\n');
+ exits(0);
+}
+void
+usage(void)
+{
+ fprint(2, "usage: %s [-8] [-xod | -b8] [-ncst] [--] [text]\n", argv0);
+ exits("usage");
+}
+void
+put(int i)
+{
+ Bputc(&bin, '|');
+ putn(i, ncol);
+ Bprint(&bin, " %s", str[i]);
+}
+char dig[]="0123456789abcdefghijklmnopqrstuvwxyz";
+void
+putn(int n, int ndig)
+{
+ if(ndig==0)
+ return;
+ putn(n/base, ndig-1);
+ Bputc(&bin, dig[n%base]);
+}
+void
+puttext(char *s)
+{
+ int n;
+ n=btoi(s)&0377;
+ if(strip)
+ Bputc(&bin, n);
+ else
+ Bprint(&bin, "%s\n", str[n]);
+}
+void
+putnum(char *s)
+{
+ while(*s){
+ putn(*s++&0377, ncol);
+ Bputc(&bin, '\n');
+ }
+}
+int
+btoi(char *s)
+{
+ int n;
+ n=0;
+ while(*s)
+ n=n*base+value(*s++, 0);
+ return(n);
+}
+int
+value(int c, int f)
+{
+ char *s;
+ for(s=dig; s<dig+base; s++)
+ if(*s==c)
+ return(s-dig);
+ if(f)
+ return(-1);
+ fprint(2, "%s: bad input char %c\n", argv0, c);
+ exits("bad");
+ return 0; /* to keep ken happy */
+}
+int
+isnum(char *s)
+{
+ while(*s)
+ if(value(*s++, 1)==-1)
+ return(0);
+ return(1);
+}
diff --git a/cmp/Makefile b/cmp/Makefile
@@ -0,0 +1,10 @@
+# cmp - cmp unix port from plan9
+# Depends on ../lib9
+
+TARG = cmp
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/cmp/cmp.1 b/cmp/cmp.1
@@ -0,0 +1,57 @@
+.TH CMP 1
+.SH NAME
+cmp \- compare two files
+.SH SYNOPSIS
+.B cmp
+[
+.B -lsL
+]
+.I file1 file2
+[
+.I offset1
+[
+.I offset2
+]
+]
+.SH DESCRIPTION
+The two files are
+compared.
+A diagnostic results if the contents differ, otherwise
+there is no output.
+.PP
+The options are:
+.TP
+.B l
+Print the byte number (decimal) and the
+differing bytes (hexadecimal) for each difference.
+.TP
+.B s
+Print nothing for differing files,
+but set the exit status.
+.TP
+.B L
+Print the line number of the first differing byte.
+.PP
+If offsets are given,
+comparison starts at the designated byte position
+of the corresponding file.
+Offsets that begin with
+.B 0x
+are hexadecimal;
+with
+.BR 0 ,
+octal; with anything else, decimal.
+.SH SOURCE
+.B \*9/src/cmd/cmp.c
+.SH "SEE ALSO"
+.IR diff (1)
+.SH DIAGNOSTICS
+If a file is inaccessible or missing, the exit status is
+.LR open .
+If the files are the same, the exit status is empty (true).
+If they are the same except that one is longer than the other, the exit status…
+.LR EOF .
+Otherwise
+.I cmp
+reports the position of the first disagreeing byte and the exit status is
+.LR differ .
diff --git a/cmp/cmp.c b/cmp/cmp.c
@@ -0,0 +1,112 @@
+#include <u.h>
+#include <libc.h>
+
+#define BUF 65536
+
+int sflag = 0;
+int lflag = 0;
+int Lflag = 0;
+
+static void usage(void);
+
+void
+main(int argc, char *argv[])
+{
+ int n, i;
+ uchar *p, *q;
+ uchar buf1[BUF], buf2[BUF];
+ int f1, f2;
+ vlong nc = 1, o, l = 1;
+ char *name1, *name2;
+ uchar *b1s, *b1e, *b2s, *b2e;
+
+ ARGBEGIN{
+ case 's': sflag = 1; break;
+ case 'l': lflag = 1; break;
+ case 'L': Lflag = 1; break;
+ default: usage();
+ }ARGEND
+ if(argc < 2)
+ usage();
+ if((f1 = open(name1 = *argv++, OREAD)) == -1){
+ if(!sflag) perror(name1);
+ exits("open");
+ }
+ if((f2 = open(name2 = *argv++, OREAD)) == -1){
+ if(!sflag) perror(name2);
+ exits("open");
+ }
+ if(*argv){
+ o = strtoll(*argv++, 0, 0);
+ if(seek(f1, o, 0) < 0){
+ if(!sflag) perror("cmp: seek by offset1");
+ exits("seek 1");
+ }
+ }
+ if(*argv){
+ o = strtoll(*argv++, 0, 0);
+ if(seek(f2, o, 0) < 0){
+ if(!sflag) perror("cmp: seek by offset2");
+ exits("seek 2");
+ }
+ }
+ if(*argv)
+ usage();
+ b1s = b1e = buf1;
+ b2s = b2e = buf2;
+ for(;;){
+ if(b1s >= b1e){
+ if(b1s >= &buf1[BUF])
+ b1s = buf1;
+ n = read(f1, b1s, &buf1[BUF] - b1s);
+ b1e = b1s + n;
+ }
+ if(b2s >= b2e){
+ if(b2s >= &buf2[BUF])
+ b2s = buf2;
+ n = read(f2, b2s, &buf2[BUF] - b2s);
+ b2e = b2s + n;
+ }
+ n = b2e - b2s;
+ if(n > b1e - b1s)
+ n = b1e - b1s;
+ if(n <= 0)
+ break;
+ if(memcmp((void *)b1s, (void *)b2s, n) != 0){
+ if(sflag)
+ exits("differ");
+ for(p = b1s, q = b2s, i = 0; i < n; p++, q++, i++) {
+ if(*p == '\n')
+ l++;
+ if(*p != *q){
+ if(!lflag){
+ print("%s %s differ: char %lld…
+ name1, name2, nc+i);
+ print(Lflag?" line %lld\n":"\n…
+ exits("differ");
+ }
+ print("%6lld 0x%.2x 0x%.2x\n", nc+i, *…
+ }
+ }
+ }
+ if(Lflag)
+ for(p = b1s; p < b1e;)
+ if(*p++ == '\n')
+ l++;
+ nc += n;
+ b1s += n;
+ b2s += n;
+ }
+ if(b1e - b1s == b2e - b2s)
+ exits((char *)0);
+ if(!sflag)
+ print("EOF on %s\n", (b1e - b1s > b2e - b2s)? name2 : name1);
+ exits("EOF");
+}
+
+static void
+usage(void)
+{
+ print("Usage: cmp [-lsL] file1 file2 [offset1 [offset2] ]\n");
+ exits("usage");
+}
diff --git a/dd/Makefile b/dd/Makefile
@@ -0,0 +1,10 @@
+# dd - dd unix port from plan9
+# Depends on ../lib9
+
+TARG = dd
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/dd/dd.1 b/dd/dd.1
diff --git a/dd/dd.c b/dd/dd.c
@@ -0,0 +1,660 @@
+#include <u.h>
+#include <libc.h>
+
+#define BIG 2147483647
+#define LCASE (1<<0)
+#define UCASE (1<<1)
+#define SWAB (1<<2)
+#define NERR (1<<3)
+#define SYNC (1<<4)
+int cflag;
+int fflag;
+char *string;
+char *ifile;
+char *ofile;
+char *ibuf;
+char *obuf;
+vlong skip;
+vlong oseekn;
+vlong iseekn;
+vlong count;
+long files = 1;
+long ibs = 512;
+long obs = 512;
+long bs;
+long cbs;
+long ibc;
+long obc;
+long cbc;
+long nifr;
+long nipr;
+long nofr;
+long nopr;
+long ntrunc;
+int dotrunc = 1;
+int ibf;
+int obf;
+char *op;
+int nspace;
+uchar etoa[256];
+uchar atoe[256];
+uchar atoibm[256];
+
+void flsh(void);
+int match(char *s);
+vlong number(long big);
+void cnull(int cc);
+void null(int c);
+void ascii(int cc);
+void unblock(int cc);
+void ebcdic(int cc);
+void ibm(int cc);
+void block(int cc);
+void term(void);
+void stats(void);
+
+#define iskey(s) ((key[0] == '-') && (strcmp(key+1, s) == 0))
+
+void
+main(int argc, char *argv[])
+{
+ void (*conv)(int);
+ char *ip;
+ char *key;
+ int a, c;
+
+ conv = null;
+ for(c=1; c<argc; c++) {
+ key = argv[c++];
+ if(c >= argc){
+ fprint(2, "dd: arg %s needs a value\n", key);
+ exits("arg");
+ }
+ string = argv[c];
+ if(iskey("ibs")) {
+ ibs = number(BIG);
+ continue;
+ }
+ if(iskey("obs")) {
+ obs = number(BIG);
+ continue;
+ }
+ if(iskey("cbs")) {
+ cbs = number(BIG);
+ continue;
+ }
+ if(iskey("bs")) {
+ bs = number(BIG);
+ continue;
+ }
+ if(iskey("if")) {
+ ifile = string;
+ continue;
+ }
+ if(iskey("of")) {
+ ofile = string;
+ continue;
+ }
+ if(iskey("trunc")) {
+ dotrunc = number(BIG);
+ continue;
+ }
+ if(iskey("skip")) {
+ skip = number(BIG);
+ continue;
+ }
+ if(iskey("seek") || iskey("oseek")) {
+ oseekn = number(BIG);
+ continue;
+ }
+ if(iskey("iseek")) {
+ iseekn = number(BIG);
+ continue;
+ }
+ if(iskey("count")) {
+ count = number(BIG);
+ continue;
+ }
+ if(iskey("files")) {
+ files = number(BIG);
+ continue;
+ }
+ if(iskey("conv")) {
+ cloop:
+ if(match(","))
+ goto cloop;
+ if(*string == '\0')
+ continue;
+ if(match("ebcdic")) {
+ conv = ebcdic;
+ goto cloop;
+ }
+ if(match("ibm")) {
+ conv = ibm;
+ goto cloop;
+ }
+ if(match("ascii")) {
+ conv = ascii;
+ goto cloop;
+ }
+ if(match("block")) {
+ conv = block;
+ goto cloop;
+ }
+ if(match("unblock")) {
+ conv = unblock;
+ goto cloop;
+ }
+ if(match("lcase")) {
+ cflag |= LCASE;
+ goto cloop;
+ }
+ if(match("ucase")) {
+ cflag |= UCASE;
+ goto cloop;
+ }
+ if(match("swab")) {
+ cflag |= SWAB;
+ goto cloop;
+ }
+ if(match("noerror")) {
+ cflag |= NERR;
+ goto cloop;
+ }
+ if(match("sync")) {
+ cflag |= SYNC;
+ goto cloop;
+ }
+ }
+ fprint(2, "dd: bad arg: %s\n", key);
+ exits("arg");
+ }
+ if(conv == null && cflag&(LCASE|UCASE))
+ conv = cnull;
+ if(ifile)
+ ibf = open(ifile, 0);
+ else
+ ibf = dup(0, -1);
+ if(ibf < 0) {
+ fprint(2, "dd: open %s: %r\n", ifile);
+ exits("open");
+ }
+ if(ofile){
+ if(dotrunc)
+ obf = create(ofile, 1, 0664);
+ else
+ obf = open(ofile, 1);
+ if(obf < 0) {
+ fprint(2, "dd: create %s: %r\n", ofile);
+ exits("create");
+ }
+ }else{
+ obf = dup(1, -1);
+ if(obf < 0) {
+ fprint(2, "dd: can't dup file descriptor: %s: %r\n", o…
+ exits("dup");
+ }
+ }
+ if(bs)
+ ibs = obs = bs;
+ if(ibs == obs && conv == null)
+ fflag++;
+ if(ibs == 0 || obs == 0) {
+ fprint(2, "dd: counts: cannot be zero\n");
+ exits("counts");
+ }
+ ibuf = sbrk(ibs);
+ if(fflag)
+ obuf = ibuf;
+ else
+ obuf = sbrk(obs);
+ sbrk(64); /* For good measure */
+ if(ibuf == (char *)-1 || obuf == (char *)-1) {
+ fprint(2, "dd: not enough memory: %r\n");
+ exits("memory");
+ }
+ ibc = 0;
+ obc = 0;
+ cbc = 0;
+ op = obuf;
+
+/*
+ if(signal(SIGINT, SIG_IGN) != SIG_IGN)
+ signal(SIGINT, term);
+*/
+ seek(obf, obs*oseekn, 1);
+ seek(ibf, ibs*iseekn, 1);
+ while(skip) {
+ read(ibf, ibuf, ibs);
+ skip--;
+ }
+
+ ip = 0;
+loop:
+ if(ibc-- == 0) {
+ ibc = 0;
+ if(count==0 || nifr+nipr!=count) {
+ if(cflag&(NERR|SYNC))
+ for(ip=ibuf+ibs; ip>ibuf;)
+ *--ip = 0;
+ ibc = read(ibf, ibuf, ibs);
+ }
+ if(ibc == -1) {
+ perror("read");
+ if((cflag&NERR) == 0) {
+ flsh();
+ term();
+ }
+ ibc = 0;
+ for(c=0; c<ibs; c++)
+ if(ibuf[c] != 0)
+ ibc = c;
+ stats();
+ }
+ if(ibc == 0 && --files<=0) {
+ flsh();
+ term();
+ }
+ if(ibc != ibs) {
+ nipr++;
+ if(cflag&SYNC)
+ ibc = ibs;
+ } else
+ nifr++;
+ ip = ibuf;
+ c = (ibc>>1) & ~1;
+ if(cflag&SWAB && c)
+ do {
+ a = *ip++;
+ ip[-1] = *ip;
+ *ip++ = a;
+ } while(--c);
+ ip = ibuf;
+ if(fflag) {
+ obc = ibc;
+ flsh();
+ ibc = 0;
+ }
+ goto loop;
+ }
+ c = 0;
+ c |= *ip++;
+ c &= 0377;
+ (*conv)(c);
+ goto loop;
+}
+
+void
+flsh(void)
+{
+ int c;
+
+ if(obc) {
+ c = write(obf, obuf, obc);
+ if(c != obc) {
+ if(c > 0)
+ ++nopr;
+ perror("write");
+ term();
+ }
+ if(obc == obs)
+ nofr++;
+ else
+ nopr++;
+ obc = 0;
+ }
+}
+
+int
+match(char *s)
+{
+ char *cs;
+
+ cs = string;
+ while(*cs++ == *s)
+ if(*s++ == '\0')
+ goto true;
+ if(*s != '\0')
+ return 0;
+
+true:
+ cs--;
+ string = cs;
+ return 1;
+}
+
+vlong
+number(long big)
+{
+ char *cs;
+ vlong n;
+
+ cs = string;
+ n = 0;
+ while(*cs >= '0' && *cs <= '9')
+ n = n*10 + *cs++ - '0';
+ for(;;)
+ switch(*cs++) {
+
+ case 'k':
+ n *= 1024;
+ continue;
+
+/* case 'w':
+ n *= sizeof(int);
+ continue;
+*/
+
+ case 'b':
+ n *= 512;
+ continue;
+
+/* case '*':*/
+ case 'x':
+ string = cs;
+ n *= number(BIG);
+
+ case '\0':
+ if(n>=big || n<0) {
+ fprint(2, "dd: argument %lld out of range\n", n);
+ exits("range");
+ }
+ return n;
+ }
+ /* never gets here */
+}
+
+void
+cnull(int cc)
+{
+ int c;
+
+ c = cc;
+ if((cflag&UCASE) && c>='a' && c<='z')
+ c += 'A'-'a';
+ if((cflag&LCASE) && c>='A' && c<='Z')
+ c += 'a'-'A';
+ null(c);
+}
+
+void
+null(int c)
+{
+
+ *op = c;
+ op++;
+ if(++obc >= obs) {
+ flsh();
+ op = obuf;
+ }
+}
+
+void
+ascii(int cc)
+{
+ int c;
+
+ c = etoa[cc];
+ if(cbs == 0) {
+ cnull(c);
+ return;
+ }
+ if(c == ' ') {
+ nspace++;
+ goto out;
+ }
+ while(nspace > 0) {
+ null(' ');
+ nspace--;
+ }
+ cnull(c);
+
+out:
+ if(++cbc >= cbs) {
+ null('\n');
+ cbc = 0;
+ nspace = 0;
+ }
+}
+
+void
+unblock(int cc)
+{
+ int c;
+
+ c = cc & 0377;
+ if(cbs == 0) {
+ cnull(c);
+ return;
+ }
+ if(c == ' ') {
+ nspace++;
+ goto out;
+ }
+ while(nspace > 0) {
+ null(' ');
+ nspace--;
+ }
+ cnull(c);
+
+out:
+ if(++cbc >= cbs) {
+ null('\n');
+ cbc = 0;
+ nspace = 0;
+ }
+}
+
+void
+ebcdic(int cc)
+{
+ int c;
+
+ c = cc;
+ if(cflag&UCASE && c>='a' && c<='z')
+ c += 'A'-'a';
+ if(cflag&LCASE && c>='A' && c<='Z')
+ c += 'a'-'A';
+ c = atoe[c];
+ if(cbs == 0) {
+ null(c);
+ return;
+ }
+ if(cc == '\n') {
+ while(cbc < cbs) {
+ null(atoe[' ']);
+ cbc++;
+ }
+ cbc = 0;
+ return;
+ }
+ if(cbc == cbs)
+ ntrunc++;
+ cbc++;
+ if(cbc <= cbs)
+ null(c);
+}
+
+void
+ibm(int cc)
+{
+ int c;
+
+ c = cc;
+ if(cflag&UCASE && c>='a' && c<='z')
+ c += 'A'-'a';
+ if(cflag&LCASE && c>='A' && c<='Z')
+ c += 'a'-'A';
+ c = atoibm[c] & 0377;
+ if(cbs == 0) {
+ null(c);
+ return;
+ }
+ if(cc == '\n') {
+ while(cbc < cbs) {
+ null(atoibm[' ']);
+ cbc++;
+ }
+ cbc = 0;
+ return;
+ }
+ if(cbc == cbs)
+ ntrunc++;
+ cbc++;
+ if(cbc <= cbs)
+ null(c);
+}
+
+void
+block(int cc)
+{
+ int c;
+
+ c = cc;
+ if(cflag&UCASE && c>='a' && c<='z')
+ c += 'A'-'a';
+ if(cflag&LCASE && c>='A' && c<='Z')
+ c += 'a'-'A';
+ c &= 0377;
+ if(cbs == 0) {
+ null(c);
+ return;
+ }
+ if(cc == '\n') {
+ while(cbc < cbs) {
+ null(' ');
+ cbc++;
+ }
+ cbc = 0;
+ return;
+ }
+ if(cbc == cbs)
+ ntrunc++;
+ cbc++;
+ if(cbc <= cbs)
+ null(c);
+}
+
+void
+term(void)
+{
+
+ stats();
+ exits(0);
+}
+
+void
+stats(void)
+{
+
+ fprint(2, "%lud+%lud records in\n", nifr, nipr);
+ fprint(2, "%lud+%lud records out\n", nofr, nopr);
+ if(ntrunc)
+ fprint(2, "%lud truncated records\n", ntrunc);
+}
+
+uchar etoa[] =
+{
+ 0000,0001,0002,0003,0234,0011,0206,0177,
+ 0227,0215,0216,0013,0014,0015,0016,0017,
+ 0020,0021,0022,0023,0235,0205,0010,0207,
+ 0030,0031,0222,0217,0034,0035,0036,0037,
+ 0200,0201,0202,0203,0204,0012,0027,0033,
+ 0210,0211,0212,0213,0214,0005,0006,0007,
+ 0220,0221,0026,0223,0224,0225,0226,0004,
+ 0230,0231,0232,0233,0024,0025,0236,0032,
+ 0040,0240,0241,0242,0243,0244,0245,0246,
+ 0247,0250,0133,0056,0074,0050,0053,0041,
+ 0046,0251,0252,0253,0254,0255,0256,0257,
+ 0260,0261,0135,0044,0052,0051,0073,0136,
+ 0055,0057,0262,0263,0264,0265,0266,0267,
+ 0270,0271,0174,0054,0045,0137,0076,0077,
+ 0272,0273,0274,0275,0276,0277,0300,0301,
+ 0302,0140,0072,0043,0100,0047,0075,0042,
+ 0303,0141,0142,0143,0144,0145,0146,0147,
+ 0150,0151,0304,0305,0306,0307,0310,0311,
+ 0312,0152,0153,0154,0155,0156,0157,0160,
+ 0161,0162,0313,0314,0315,0316,0317,0320,
+ 0321,0176,0163,0164,0165,0166,0167,0170,
+ 0171,0172,0322,0323,0324,0325,0326,0327,
+ 0330,0331,0332,0333,0334,0335,0336,0337,
+ 0340,0341,0342,0343,0344,0345,0346,0347,
+ 0173,0101,0102,0103,0104,0105,0106,0107,
+ 0110,0111,0350,0351,0352,0353,0354,0355,
+ 0175,0112,0113,0114,0115,0116,0117,0120,
+ 0121,0122,0356,0357,0360,0361,0362,0363,
+ 0134,0237,0123,0124,0125,0126,0127,0130,
+ 0131,0132,0364,0365,0366,0367,0370,0371,
+ 0060,0061,0062,0063,0064,0065,0066,0067,
+ 0070,0071,0372,0373,0374,0375,0376,0377,
+};
+uchar atoe[] =
+{
+ 0000,0001,0002,0003,0067,0055,0056,0057,
+ 0026,0005,0045,0013,0014,0015,0016,0017,
+ 0020,0021,0022,0023,0074,0075,0062,0046,
+ 0030,0031,0077,0047,0034,0035,0036,0037,
+ 0100,0117,0177,0173,0133,0154,0120,0175,
+ 0115,0135,0134,0116,0153,0140,0113,0141,
+ 0360,0361,0362,0363,0364,0365,0366,0367,
+ 0370,0371,0172,0136,0114,0176,0156,0157,
+ 0174,0301,0302,0303,0304,0305,0306,0307,
+ 0310,0311,0321,0322,0323,0324,0325,0326,
+ 0327,0330,0331,0342,0343,0344,0345,0346,
+ 0347,0350,0351,0112,0340,0132,0137,0155,
+ 0171,0201,0202,0203,0204,0205,0206,0207,
+ 0210,0211,0221,0222,0223,0224,0225,0226,
+ 0227,0230,0231,0242,0243,0244,0245,0246,
+ 0247,0250,0251,0300,0152,0320,0241,0007,
+ 0040,0041,0042,0043,0044,0025,0006,0027,
+ 0050,0051,0052,0053,0054,0011,0012,0033,
+ 0060,0061,0032,0063,0064,0065,0066,0010,
+ 0070,0071,0072,0073,0004,0024,0076,0341,
+ 0101,0102,0103,0104,0105,0106,0107,0110,
+ 0111,0121,0122,0123,0124,0125,0126,0127,
+ 0130,0131,0142,0143,0144,0145,0146,0147,
+ 0150,0151,0160,0161,0162,0163,0164,0165,
+ 0166,0167,0170,0200,0212,0213,0214,0215,
+ 0216,0217,0220,0232,0233,0234,0235,0236,
+ 0237,0240,0252,0253,0254,0255,0256,0257,
+ 0260,0261,0262,0263,0264,0265,0266,0267,
+ 0270,0271,0272,0273,0274,0275,0276,0277,
+ 0312,0313,0314,0315,0316,0317,0332,0333,
+ 0334,0335,0336,0337,0352,0353,0354,0355,
+ 0356,0357,0372,0373,0374,0375,0376,0377,
+};
+uchar atoibm[] =
+{
+ 0000,0001,0002,0003,0067,0055,0056,0057,
+ 0026,0005,0045,0013,0014,0015,0016,0017,
+ 0020,0021,0022,0023,0074,0075,0062,0046,
+ 0030,0031,0077,0047,0034,0035,0036,0037,
+ 0100,0132,0177,0173,0133,0154,0120,0175,
+ 0115,0135,0134,0116,0153,0140,0113,0141,
+ 0360,0361,0362,0363,0364,0365,0366,0367,
+ 0370,0371,0172,0136,0114,0176,0156,0157,
+ 0174,0301,0302,0303,0304,0305,0306,0307,
+ 0310,0311,0321,0322,0323,0324,0325,0326,
+ 0327,0330,0331,0342,0343,0344,0345,0346,
+ 0347,0350,0351,0255,0340,0275,0137,0155,
+ 0171,0201,0202,0203,0204,0205,0206,0207,
+ 0210,0211,0221,0222,0223,0224,0225,0226,
+ 0227,0230,0231,0242,0243,0244,0245,0246,
+ 0247,0250,0251,0300,0117,0320,0241,0007,
+ 0040,0041,0042,0043,0044,0025,0006,0027,
+ 0050,0051,0052,0053,0054,0011,0012,0033,
+ 0060,0061,0032,0063,0064,0065,0066,0010,
+ 0070,0071,0072,0073,0004,0024,0076,0341,
+ 0101,0102,0103,0104,0105,0106,0107,0110,
+ 0111,0121,0122,0123,0124,0125,0126,0127,
+ 0130,0131,0142,0143,0144,0145,0146,0147,
+ 0150,0151,0160,0161,0162,0163,0164,0165,
+ 0166,0167,0170,0200,0212,0213,0214,0215,
+ 0216,0217,0220,0232,0233,0234,0235,0236,
+ 0237,0240,0252,0253,0254,0255,0256,0257,
+ 0260,0261,0262,0263,0264,0265,0266,0267,
+ 0270,0271,0272,0273,0274,0275,0276,0277,
+ 0312,0313,0314,0315,0316,0317,0332,0333,
+ 0334,0335,0336,0337,0352,0353,0354,0355,
+ 0356,0357,0372,0373,0374,0375,0376,0377,
+};
diff --git a/diff/Makefile b/diff/Makefile
@@ -0,0 +1,35 @@
+# diff - diff shell unix port from plan9
+# Depends on ../lib9
+
+TARG = diff
+OFILES = diffdir.o diffio.o diffreg.o main.o
+MANFILES = diff.1
+
+include ../config.mk
+
+all: ${TARG}
+ @strip ${TARG}
+ @echo built ${TARG}
+
+install: ${TARG}
+ @mkdir -p ${DESTDIR}${PREFIX}/bin
+ @cp -f ${TARG} ${DESTDIR}${PREFIX}/bin/
+ @chmod 755 ${DESTDIR}${PREFIX}/bin/${TARG}
+ @mkdir -p ${DESTDIR}${MANPREFIX}/man1
+ @cp -f ${MANFILES} ${DESTDIR}${MANPREFIX}/man1
+ @chmod 444 ${DESTDIR}${MANPREFIX}/man1/${MANFILES}
+
+uninstall:
+ rm -f ${DESTDIR}${PREFIX}/bin/${TARG}
+ rm -f ${DESTDIR}${PREFIX}/man1/${MANFILES}
+
+.c.o:
+ @echo CC $*.c
+ @${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
+
+clean:
+ rm -f ${OFILES} ${TARG}
+
+${TARG}: ${OFILES}
+ @echo LD ${TARG}
+ @${CC} ${LDFLAGS} -o ${TARG} ${OFILES} -lm -L${PREFIX}/lib -L../lib9 -…
diff --git a/diff/diff.1 b/diff/diff.1
@@ -0,0 +1,163 @@
+.TH DIFF 1
+.SH NAME
+diff \- differential file comparator
+.SH SYNOPSIS
+.B diff
+[
+.B -acefmnbwr
+] file1 ... file2
+.SH DESCRIPTION
+.I Diff
+tells what lines must be changed in two files to bring them
+into agreement.
+If one file
+is a directory,
+then a file in that directory with basename the same as that of
+the other file is used.
+If both files are directories, similarly named files in the
+two directories are compared by the method of
+.I diff
+for text
+files and
+.IR cmp (1)
+otherwise.
+If more than two file names are given, then each argument is compared
+to the last argument as above.
+The
+.B -r
+option causes
+.I diff
+to process similarly named subdirectories recursively.
+When processing more than one file,
+.I diff
+prefixes file differences with a single line
+listing the two differing files, in the form of
+a
+.I diff
+command line.
+The
+.B -m
+flag causes this behavior even when processing single files.
+.PP
+The normal output contains lines of these forms:
+.IP "" 5
+.I n1
+.B a
+.I n3,n4
+.br
+.I n1,n2
+.B d
+.I n3
+.br
+.I n1,n2
+.B c
+.I n3,n4
+.PP
+These lines resemble
+.I ed
+commands to convert
+.I file1
+into
+.IR file2 .
+The numbers after the letters pertain to
+.IR file2 .
+In fact, by exchanging `a' for `d' and reading backward
+one may ascertain equally how to convert
+.I file2
+into
+.IR file1 .
+As in
+.IR ed ,
+identical pairs where
+.I n1
+=
+.I n2
+or
+.I n3
+=
+.I n4
+are abbreviated as a single number.
+.PP
+Following each of these lines come all the lines that are
+affected in the first file flagged by `<',
+then all the lines that are affected in the second file
+flagged by `>'.
+.PP
+The
+.B -b
+option causes
+trailing blanks (spaces and tabs) to be ignored
+and other strings of blanks to compare equal.
+The
+.B -w
+option causes all white-space to be removed from input lines
+before applying the difference algorithm.
+.PP
+The
+.B -n
+option prefixes each range with
+.IB file : \fR
+and inserts a space around the
+.BR a ,
+.BR c ,
+and
+.B d
+verbs.
+The
+.B -e
+option produces a script of
+.I "a, c"
+and
+.I d
+commands for the editor
+.IR ed ,
+which will recreate
+.I file2
+from
+.IR file1 .
+The
+.B -f
+option produces a similar script,
+not useful with
+.IR ed ,
+in the opposite order. It may, however, be
+useful as input to a stream-oriented post-processor.
+.PP
+The
+.B -c
+option includes three lines of context around each
+change, merging changes whose contexts overlap.
+The
+.B -a
+flag displays the entire file as context.
+.PP
+Except in rare circumstances,
+.I diff
+finds a smallest sufficient set of file
+differences.
+.SH FILES
+.B /tmp/diff[12]
+.SH SOURCE
+.B \*9/src/cmd/diff
+.SH "SEE ALSO"
+.IR cmp (1),
+.IR comm (1),
+.IR ed (1)
+.SH DIAGNOSTICS
+Exit status is the empty string
+for no differences,
+.L some
+for some,
+and
+.L error
+for trouble.
+.SH BUGS
+Editing scripts produced under the
+.BR -e " or"
+.BR -f " option are naive about"
+creating lines consisting of a single `\fB.\fR'.
+.PP
+When running
+.I diff
+on directories, the notion of what is a text
+file is open to debate.
diff --git a/diff/diff.h b/diff/diff.h
@@ -0,0 +1,27 @@
+#define stdout bstdout
+
+char mode; /* '\0', 'e', 'f', 'h' */
+char bflag; /* ignore multiple and trailing blanks */
+char rflag; /* recurse down directory trees */
+char mflag; /* pseudo flag: doing multiple files, one d…
+int anychange;
+extern Biobuf stdout;
+extern int binary;
+
+#define MALLOC(t, n) ((t *)emalloc((n)*sizeof(t)))
+#define REALLOC(p, t, n) ((t *)erealloc((void *)(p), (n)*sizeof(t)))
+#define FREE(p) free((void *)(p))
+
+#define MAXPATHLEN 1024
+
+int mkpathname(char *, char *, char *);
+void *emalloc(unsigned);
+void *erealloc(void *, unsigned);
+void diff(char *, char *, int);
+void diffdir(char *, char *, int);
+void diffreg(char *, char *);
+Biobuf *prepare(int, char *);
+void panic(int, char *, ...);
+void check(Biobuf *, Biobuf *);
+void change(int, int, int, int);
+void flushchanges(void);
diff --git a/diff/diffdir.c b/diff/diffdir.c
@@ -0,0 +1,113 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+static int
+itemcmp(const void *v1, const void *v2)
+{
+ char *const*d1 = v1, *const*d2 = v2;
+
+ return strcmp(*d1, *d2);
+}
+
+static char **
+scandir(char *name)
+{
+ char **cp;
+ Dir *db;
+ int nitems;
+ int fd, n;
+
+ if ((fd = open(name, OREAD)) < 0){
+ panic(mflag ? 0 : 2, "can't open %s\n", name);
+ return nil;
+ }
+ cp = 0;
+ nitems = 0;
+ if((n = dirreadall(fd, &db)) > 0){
+ while (n--) {
+ cp = REALLOC(cp, char *, (nitems+1));
+ cp[nitems] = MALLOC(char, strlen((db+n)->name)+1);
+ strcpy(cp[nitems], (db+n)->name);
+ nitems++;
+ }
+ free(db);
+ }
+ cp = REALLOC(cp, char*, (nitems+1));
+ cp[nitems] = 0;
+ close(fd);
+ qsort((char *)cp, nitems, sizeof(char*), itemcmp);
+ return cp;
+}
+
+static int
+isdotordotdot(char *p)
+{
+ if (*p == '.') {
+ if (!p[1])
+ return 1;
+ if (p[1] == '.' && !p[2])
+ return 1;
+ }
+ return 0;
+}
+
+void
+diffdir(char *f, char *t, int level)
+{
+ char **df, **dt, **dirf, **dirt;
+ char *from, *to;
+ int res;
+ char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
+
+ df = scandir(f);
+ dt = scandir(t);
+ dirf = df;
+ dirt = dt;
+ if(df == nil || dt == nil)
+ goto Out;
+ while (*df || *dt) {
+ from = *df;
+ to = *dt;
+ if (from && isdotordotdot(from)) {
+ df++;
+ continue;
+ }
+ if (to && isdotordotdot(to)) {
+ dt++;
+ continue;
+ }
+ if (!from)
+ res = 1;
+ else if (!to)
+ res = -1;
+ else
+ res = strcmp(from, to);
+ if (res < 0) {
+ if (mode == 0 || mode == 'n')
+ Bprint(&stdout, "Only in %s: %s\n", f, from);
+ df++;
+ continue;
+ }
+ if (res > 0) {
+ if (mode == 0 || mode == 'n')
+ Bprint(&stdout, "Only in %s: %s\n", t, to);
+ dt++;
+ continue;
+ }
+ if (mkpathname(fb, f, from))
+ continue;
+ if (mkpathname(tb, t, to))
+ continue;
+ diff(fb, tb, level+1);
+ df++; dt++;
+ }
+Out:
+ for (df = dirf; df && *df; df++)
+ FREE(*df);
+ for (dt = dirt; dt && *dt; dt++)
+ FREE(*dt);
+ FREE(dirf);
+ FREE(dirt);
+}
diff --git a/diff/diffio.c b/diff/diffio.c
@@ -0,0 +1,387 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+#include "diff.h"
+
+struct line {
+ int serial;
+ int value;
+};
+extern struct line *file[2];
+extern int len[2];
+extern long *ixold, *ixnew;
+extern int *J;
+
+static Biobuf *input[2];
+static char *file1, *file2;
+static int firstchange;
+
+#define MAXLINELEN 4096
+#define MIN(x, y) ((x) < (y) ? (x): (y))
+
+static int
+readline(Biobuf *bp, char *buf)
+{
+ int c;
+ char *p, *e;
+
+ p = buf;
+ e = p + MAXLINELEN-1;
+ do {
+ c = Bgetc(bp);
+ if (c < 0) {
+ if (p == buf)
+ return -1;
+ break;
+ }
+ if (c == '\n')
+ break;
+ *p++ = c;
+ } while (p < e);
+ *p = 0;
+ if (c != '\n' && c >= 0) {
+ do c = Bgetc(bp);
+ while (c >= 0 && c != '\n');
+ }
+ return p - buf;
+}
+
+#define HALFLONG 16
+#define low(x) (x&((1L<<HALFLONG)-1))
+#define high(x) (x>>HALFLONG)
+
+/*
+ * hashing has the effect of
+ * arranging line in 7-bit bytes and then
+ * summing 1-s complement in 16-bit hunks
+ */
+static int
+readhash(Biobuf *bp, char *buf)
+{
+ long sum;
+ unsigned shift;
+ char *p;
+ int len, space;
+
+ sum = 1;
+ shift = 0;
+ if ((len = readline(bp, buf)) == -1)
+ return 0;
+ p = buf;
+ switch(bflag) /* various types of white space handling */
+ {
+ case 0:
+ while (len--) {
+ sum += (long)*p++ << (shift &= (HALFLONG-1));
+ shift += 7;
+ }
+ break;
+ case 1:
+ /*
+ * coalesce multiple white-space
+ */
+ for (space = 0; len--; p++) {
+ if (isspace((uchar)*p)) {
+ space++;
+ continue;
+ }
+ if (space) {
+ shift += 7;
+ space = 0;
+ }
+ sum += (long)*p << (shift &= (HALFLONG-1));
+ shift += 7;
+ }
+ break;
+ default:
+ /*
+ * strip all white-space
+ */
+ while (len--) {
+ if (isspace((uchar)*p)) {
+ p++;
+ continue;
+ }
+ sum += (long)*p++ << (shift &= (HALFLONG-1));
+ shift += 7;
+ }
+ break;
+ }
+ sum = low(sum) + high(sum);
+ return ((short)low(sum) + (short)high(sum));
+}
+
+Biobuf *
+prepare(int i, char *arg)
+{
+ struct line *p;
+ int j, h;
+ Biobuf *bp;
+ char *cp, buf[MAXLINELEN];
+ int nbytes;
+ Rune r;
+
+ bp = Bopen(arg, OREAD);
+ if (!bp) {
+ panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
+ return 0;
+ }
+ if (binary)
+ return bp;
+ nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
+ if (nbytes > 0) {
+ cp = buf;
+ while (cp < buf+nbytes-UTFmax) {
+ /*
+ * heuristic for a binary file in the
+ * brave new UNICODE world
+ */
+ cp += chartorune(&r, cp);
+ if (r == 0 || (r > 0x7f && r <= 0xa0)) {
+ binary++;
+ return bp;
+ }
+ }
+ Bseek(bp, 0, 0);
+ }
+ p = MALLOC(struct line, 3);
+ for (j = 0; h = readhash(bp, buf); p[j].value = h)
+ p = REALLOC(p, struct line, (++j+3));
+ len[i] = j;
+ file[i] = p;
+ input[i] = bp; /*fix*/
+ if (i == 0) { /*fix*/
+ file1 = arg;
+ firstchange = 0;
+ }
+ else
+ file2 = arg;
+ return bp;
+}
+
+static int
+squishspace(char *buf)
+{
+ char *p, *q;
+ int space;
+
+ for (space = 0, q = p = buf; *q; q++) {
+ if (isspace((uchar)*q)) {
+ space++;
+ continue;
+ }
+ if (space && bflag == 1) {
+ *p++ = ' ';
+ space = 0;
+ }
+ *p++ = *q;
+ }
+ *p = 0;
+ return p - buf;
+}
+
+/*
+ * need to fix up for unexpected EOF's
+ */
+void
+check(Biobuf *bf, Biobuf *bt)
+{
+ int f, t, flen, tlen;
+ char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
+
+ ixold[0] = ixnew[0] = 0;
+ for (f = t = 1; f < len[0]; f++) {
+ flen = readline(bf, fbuf);
+ ixold[f] = ixold[f-1] + flen + 1; /* ftell(bf) …
+ if (J[f] == 0)
+ continue;
+ do {
+ tlen = readline(bt, tbuf);
+ ixnew[t] = ixnew[t-1] + tlen + 1; /* ftell(bt) …
+ } while (t++ < J[f]);
+ if (bflag) {
+ flen = squishspace(fbuf);
+ tlen = squishspace(tbuf);
+ }
+ if (flen != tlen || strcmp(fbuf, tbuf))
+ J[f] = 0;
+ }
+ while (t < len[1]) {
+ tlen = readline(bt, tbuf);
+ ixnew[t] = ixnew[t-1] + tlen + 1; /* fseek(bt) */
+ t++;
+ }
+}
+
+static void
+range(int a, int b, char *separator)
+{
+ Bprint(&stdout, "%d", a > b ? b: a);
+ if (a < b)
+ Bprint(&stdout, "%s%d", separator, b);
+}
+
+static void
+fetch(long *f, int a, int b, Biobuf *bp, char *s)
+{
+ char buf[MAXLINELEN];
+ int maxb;
+
+ if(a <= 1)
+ a = 1;
+ if(bp == input[0])
+ maxb = len[0];
+ else
+ maxb = len[1];
+ if(b > maxb)
+ b = maxb;
+ if(a > maxb)
+ return;
+ Bseek(bp, f[a-1], 0);
+ while (a++ <= b) {
+ readline(bp, buf);
+ Bprint(&stdout, "%s%s\n", s, buf);
+ }
+}
+
+typedef struct Change Change;
+struct Change
+{
+ int a;
+ int b;
+ int c;
+ int d;
+};
+
+Change *changes;
+int nchanges;
+
+void
+change(int a, int b, int c, int d)
+{
+ char verb;
+ char buf[4];
+ Change *ch;
+
+ if (a > b && c > d)
+ return;
+ anychange = 1;
+ if (mflag && firstchange == 0) {
+ if(mode) {
+ buf[0] = '-';
+ buf[1] = mode;
+ buf[2] = ' ';
+ buf[3] = '\0';
+ } else {
+ buf[0] = '\0';
+ }
+ Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
+ firstchange = 1;
+ }
+ verb = a > b ? 'a': c > d ? 'd': 'c';
+ switch(mode) {
+ case 'e':
+ range(a, b, ",");
+ Bputc(&stdout, verb);
+ break;
+ case 0:
+ range(a, b, ",");
+ Bputc(&stdout, verb);
+ range(c, d, ",");
+ break;
+ case 'n':
+ Bprint(&stdout, "%s:", file1);
+ range(a, b, ",");
+ Bprint(&stdout, " %c ", verb);
+ Bprint(&stdout, "%s:", file2);
+ range(c, d, ",");
+ break;
+ case 'f':
+ Bputc(&stdout, verb);
+ range(a, b, " ");
+ break;
+ case 'c':
+ case 'a':
+ if(nchanges%1024 == 0)
+ changes = erealloc(changes, (nchanges+1024)*sizeof(cha…
+ ch = &changes[nchanges++];
+ ch->a = a;
+ ch->b = b;
+ ch->c = c;
+ ch->d = d;
+ return;
+ }
+ Bputc(&stdout, '\n');
+ if (mode == 0 || mode == 'n') {
+ fetch(ixold, a, b, input[0], "< ");
+ if (a <= b && c <= d)
+ Bprint(&stdout, "---\n");
+ }
+ fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
+ if (mode != 0 && mode != 'n' && c <= d)
+ Bprint(&stdout, ".\n");
+}
+
+enum
+{
+ Lines = 3 /* number of lines of context shown */
+};
+
+int
+changeset(int i)
+{
+ while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a)
+ i++;
+ if(i<nchanges)
+ return i+1;
+ return nchanges;
+}
+
+void
+flushchanges(void)
+{
+ int a, b, c, d, at;
+ int i, j;
+
+ if(nchanges == 0)
+ return;
+
+ for(i=0; i<nchanges; ){
+ j = changeset(i);
+ a = changes[i].a-Lines;
+ b = changes[j-1].b+Lines;
+ c = changes[i].c-Lines;
+ d = changes[j-1].d+Lines;
+ if(a < 1)
+ a = 1;
+ if(c < 1)
+ c = 1;
+ if(b > len[0])
+ b = len[0];
+ if(d > len[1])
+ d = len[1];
+ if(mode == 'a'){
+ a = 1;
+ b = len[0];
+ c = 1;
+ d = len[1];
+ j = nchanges;
+ }
+ Bprint(&stdout, "%s:", file1);
+ range(a, b, ",");
+ Bprint(&stdout, " - ");
+ Bprint(&stdout, "%s:", file2);
+ range(c, d, ",");
+ Bputc(&stdout, '\n');
+ at = a;
+ for(; i<j; i++){
+ fetch(ixold, at, changes[i].a-1, input[0], " ");
+ fetch(ixold, changes[i].a, changes[i].b, input[0], "- …
+ fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ …
+ at = changes[i].b+1;
+ }
+ fetch(ixold, at, b, input[0], " ");
+ }
+ nchanges = 0;
+}
diff --git a/diff/diffreg.c b/diff/diffreg.c
@@ -0,0 +1,420 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+/* diff - differential file comparison
+*
+* Uses an algorithm due to Harold Stone, which finds
+* a pair of longest identical subsequences in the two
+* files.
+*
+* The major goal is to generate the match vector J.
+* J[i] is the index of the line in file1 corresponding
+* to line i file0. J[i] = 0 if there is no
+* such line in file1.
+*
+* Lines are hashed so as to work in core. All potential
+* matches are located by sorting the lines of each file
+* on the hash (called value). In particular, this
+* collects the equivalence classes in file1 together.
+* Subroutine equiv replaces the value of each line in
+* file0 by the index of the first element of its
+* matching equivalence in (the reordered) file1.
+* To save space equiv squeezes file1 into a single
+* array member in which the equivalence classes
+* are simply concatenated, except that their first
+* members are flagged by changing sign.
+*
+* Next the indices that point into member are unsorted into
+* array class according to the original order of file0.
+*
+* The cleverness lies in routine stone. This marches
+* through the lines of file0, developing a vector klist
+* of "k-candidates". At step i a k-candidate is a matched
+* pair of lines x,y (x in file0 y in file1) such that
+* there is a common subsequence of lenght k
+* between the first i lines of file0 and the first y
+* lines of file1, but there is no such subsequence for
+* any smaller y. x is the earliest possible mate to y
+* that occurs in such a subsequence.
+*
+* Whenever any of the members of the equivalence class of
+* lines in file1 matable to a line in file0 has serial number
+* less than the y of some k-candidate, that k-candidate
+* with the smallest such y is replaced. The new
+* k-candidate is chained (via pred) to the current
+* k-1 candidate so that the actual subsequence can
+* be recovered. When a member has serial number greater
+* that the y of all k-candidates, the klist is extended.
+* At the end, the longest subsequence is pulled out
+* and placed in the array J by unravel.
+*
+* With J in hand, the matches there recorded are
+* check'ed against reality to assure that no spurious
+* matches have crept in due to hashing. If they have,
+* they are broken, and "jackpot " is recorded--a harmless
+* matter except that a true match for a spuriously
+* mated line may now be unnecessarily reported as a change.
+*
+* Much of the complexity of the program comes simply
+* from trying to minimize core utilization and
+* maximize the range of doable problems by dynamically
+* allocating what is needed and reusing what is not.
+* The core requirements for problems larger than somewhat
+* are (in words) 2*length(file0) + length(file1) +
+* 3*(number of k-candidates installed), typically about
+* 6n words for files of length n.
+*/
+/* TIDY THIS UP */
+struct cand {
+ int x;
+ int y;
+ int pred;
+} cand;
+struct line {
+ int serial;
+ int value;
+} *file[2], line;
+int len[2];
+int binary;
+struct line *sfile[2]; /*shortened by pruning common prefix and suffix*/
+int slen[2];
+int pref, suff; /*length of prefix and suffix*/
+int *class; /*will be overlaid on file[0]*/
+int *member; /*will be overlaid on file[1]*/
+int *klist; /*will be overlaid on file[0] after class*/
+struct cand *clist; /* merely a free storage pot for candidates */
+int clen;
+int *J; /*will be overlaid on class*/
+long *ixold; /*will be overlaid on klist*/
+long *ixnew; /*will be overlaid on file[1]*/
+/* END OF SOME TIDYING */
+
+static void
+sort(struct line *a, int n) /*shellsort CACM #201*/
+{
+ int m;
+ struct line *ai, *aim, *j, *k;
+ struct line w;
+ int i;
+
+ m = 0;
+ for (i = 1; i <= n; i *= 2)
+ m = 2*i - 1;
+ for (m /= 2; m != 0; m /= 2) {
+ k = a+(n-m);
+ for (j = a+1; j <= k; j++) {
+ ai = j;
+ aim = ai+m;
+ do {
+ if (aim->value > ai->value ||
+ aim->value == ai->value &&
+ aim->serial > ai->serial)
+ break;
+ w = *ai;
+ *ai = *aim;
+ *aim = w;
+
+ aim = ai;
+ ai -= m;
+ } while (ai > a && aim >= ai);
+ }
+ }
+}
+
+static void
+unsort(struct line *f, int l, int *b)
+{
+ int *a;
+ int i;
+
+ a = MALLOC(int, (l+1));
+ for(i=1;i<=l;i++)
+ a[f[i].serial] = f[i].value;
+ for(i=1;i<=l;i++)
+ b[i] = a[i];
+ FREE(a);
+}
+
+static void
+prune(void)
+{
+ int i,j;
+
+ for(pref=0;pref<len[0]&&pref<len[1]&&
+ file[0][pref+1].value==file[1][pref+1].value;
+ pref++ ) ;
+ for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
+ file[0][len[0]-suff].value==file[1][len[1]-suff].value;
+ suff++) ;
+ for(j=0;j<2;j++) {
+ sfile[j] = file[j]+pref;
+ slen[j] = len[j]-pref-suff;
+ for(i=0;i<=slen[j];i++)
+ sfile[j][i].serial = i;
+ }
+}
+
+static void
+equiv(struct line *a, int n, struct line *b, int m, int *c)
+{
+ int i, j;
+
+ i = j = 1;
+ while(i<=n && j<=m) {
+ if(a[i].value < b[j].value)
+ a[i++].value = 0;
+ else if(a[i].value == b[j].value)
+ a[i++].value = j;
+ else
+ j++;
+ }
+ while(i <= n)
+ a[i++].value = 0;
+ b[m+1].value = 0;
+ j = 0;
+ while(++j <= m) {
+ c[j] = -b[j].serial;
+ while(b[j+1].value == b[j].value) {
+ j++;
+ c[j] = b[j].serial;
+ }
+ }
+ c[j] = -1;
+}
+
+static int
+newcand(int x, int y, int pred)
+{
+ struct cand *q;
+
+ clist = REALLOC(clist, struct cand, (clen+1));
+ q = clist + clen;
+ q->x = x;
+ q->y = y;
+ q->pred = pred;
+ return clen++;
+}
+
+static int
+search(int *c, int k, int y)
+{
+ int i, j, l;
+ int t;
+
+ if(clist[c[k]].y < y) /*quick look for typical case*/
+ return k+1;
+ i = 0;
+ j = k+1;
+ while((l=(i+j)/2) > i) {
+ t = clist[c[l]].y;
+ if(t > y)
+ j = l;
+ else if(t < y)
+ i = l;
+ else
+ return l;
+ }
+ return l+1;
+}
+
+static int
+stone(int *a, int n, int *b, int *c)
+{
+ int i, k,y;
+ int j, l;
+ int oldc, tc;
+ int oldl;
+
+ k = 0;
+ c[0] = newcand(0,0,0);
+ for(i=1; i<=n; i++) {
+ j = a[i];
+ if(j==0)
+ continue;
+ y = -b[j];
+ oldl = 0;
+ oldc = c[0];
+ do {
+ if(y <= clist[oldc].y)
+ continue;
+ l = search(c, k, y);
+ if(l!=oldl+1)
+ oldc = c[l-1];
+ if(l<=k) {
+ if(clist[c[l]].y <= y)
+ continue;
+ tc = c[l];
+ c[l] = newcand(i,y,oldc);
+ oldc = tc;
+ oldl = l;
+ } else {
+ c[l] = newcand(i,y,oldc);
+ k++;
+ break;
+ }
+ } while((y=b[++j]) > 0);
+ }
+ return k;
+}
+
+static void
+unravel(int p)
+{
+ int i;
+ struct cand *q;
+
+ for(i=0; i<=len[0]; i++) {
+ if (i <= pref)
+ J[i] = i;
+ else if (i > len[0]-suff)
+ J[i] = i+len[1]-len[0];
+ else
+ J[i] = 0;
+ }
+ for(q=clist+p;q->y!=0;q=clist+q->pred)
+ J[q->x+pref] = q->y+pref;
+}
+
+static void
+output(void)
+{
+ int m, i0, i1, j0, j1;
+
+ m = len[0];
+ J[0] = 0;
+ J[m+1] = len[1]+1;
+ if (mode != 'e') {
+ for (i0 = 1; i0 <= m; i0 = i1+1) {
+ while (i0 <= m && J[i0] == J[i0-1]+1)
+ i0++;
+ j0 = J[i0-1]+1;
+ i1 = i0-1;
+ while (i1 < m && J[i1+1] == 0)
+ i1++;
+ j1 = J[i1+1]-1;
+ J[i1] = j1;
+ change(i0, i1, j0, j1);
+ }
+ }
+ else {
+ for (i0 = m; i0 >= 1; i0 = i1-1) {
+ while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])
+ i0--;
+ j0 = J[i0+1]-1;
+ i1 = i0+1;
+ while (i1 > 1 && J[i1-1] == 0)
+ i1--;
+ j1 = J[i1-1]+1;
+ J[i1] = j1;
+ change(i1 , i0, j1, j0);
+ }
+ }
+ if (m == 0)
+ change(1, 0, 1, len[1]);
+ flushchanges();
+}
+
+#define BUF 4096
+static int
+cmp(Biobuf* b1, Biobuf* b2)
+{
+ int n;
+ uchar buf1[BUF], buf2[BUF];
+ int f1, f2;
+ vlong nc = 1;
+ uchar *b1s, *b1e, *b2s, *b2e;
+
+ f1 = Bfildes(b1);
+ f2 = Bfildes(b2);
+ seek(f1, 0, 0);
+ seek(f2, 0, 0);
+ b1s = b1e = buf1;
+ b2s = b2e = buf2;
+ for(;;){
+ if(b1s >= b1e){
+ if(b1s >= &buf1[BUF])
+ b1s = buf1;
+ n = read(f1, b1s, &buf1[BUF] - b1s);
+ b1e = b1s + n;
+ }
+ if(b2s >= b2e){
+ if(b2s >= &buf2[BUF])
+ b2s = buf2;
+ n = read(f2, b2s, &buf2[BUF] - b2s);
+ b2e = b2s + n;
+ }
+ n = b2e - b2s;
+ if(n > b1e - b1s)
+ n = b1e - b1s;
+ if(n <= 0)
+ break;
+ if(memcmp((void *)b1s, (void *)b2s, n) != 0){
+ return 1;
+ }
+ nc += n;
+ b1s += n;
+ b2s += n;
+ }
+ if(b1e - b1s == b2e - b2s)
+ return 0;
+ return 1;
+}
+
+void
+diffreg(char *f, char *t)
+{
+ Biobuf *b0, *b1;
+ int k;
+
+ binary = 0;
+ b0 = prepare(0, f);
+ if (!b0)
+ return;
+ b1 = prepare(1, t);
+ if (!b1) {
+ FREE(file[0]);
+ Bterm(b0);
+ return;
+ }
+ if (binary){
+ /* could use b0 and b1 but this is simpler. */
+ if (cmp(b0, b1))
+ print("binary files %s %s differ\n", f, t);
+ Bterm(b0);
+ Bterm(b1);
+ return;
+ }
+ clen = 0;
+ prune();
+ sort(sfile[0], slen[0]);
+ sort(sfile[1], slen[1]);
+
+ member = (int *)file[1];
+ equiv(sfile[0], slen[0], sfile[1], slen[1], member);
+ member = REALLOC(member, int, slen[1]+2);
+
+ class = (int *)file[0];
+ unsort(sfile[0], slen[0], class);
+ class = REALLOC(class, int, slen[0]+2);
+
+ klist = MALLOC(int, slen[0]+2);
+ clist = MALLOC(struct cand, 1);
+ k = stone(class, slen[0], member, klist);
+ FREE(member);
+ FREE(class);
+
+ J = MALLOC(int, len[0]+2);
+ unravel(klist[k]);
+ FREE(clist);
+ FREE(klist);
+
+ ixold = MALLOC(long, len[0]+2);
+ ixnew = MALLOC(long, len[1]+2);
+ Bseek(b0, 0, 0); Bseek(b1, 0, 0);
+ check(b0, b1);
+ output();
+ FREE(J); FREE(ixold); FREE(ixnew);
+ Bterm(b0); Bterm(b1); /* ++++ */
+}
diff --git a/diff/main.c b/diff/main.c
@@ -0,0 +1,270 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+#define DIRECTORY(s) ((s)->qid.type&QTDIR)
+#define REGULAR_FILE(s) ((s)->type == 'M' && !DIRECTORY(…
+
+Biobuf stdout;
+
+static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
+static int whichtmp;
+static char *progname;
+static char usage[] = "diff [ -acefmnbwr ] file1 ... file2\n";
+
+static void
+rmtmpfiles(void)
+{
+ while (whichtmp > 0) {
+ whichtmp--;
+ remove(tmp[whichtmp]);
+ }
+}
+
+void
+done(int status)
+{
+ rmtmpfiles();
+ switch(status)
+ {
+ case 0:
+ exits("");
+ case 1:
+ exits("some");
+ default:
+ exits("error");
+ }
+ /*NOTREACHED*/
+}
+
+void
+panic(int status, char *fmt, ...)
+{
+ va_list arg;
+
+ Bflush(&stdout);
+
+ fprint(2, "%s: ", progname);
+ va_start(arg, fmt);
+ vfprint(2, fmt, arg);
+ va_end(arg);
+ if (status)
+ done(status);
+ /*NOTREACHED*/
+}
+
+static int
+catch(void *a, char *msg)
+{
+ USED(a);
+ panic(2, msg);
+ return 1;
+}
+
+int
+mkpathname(char *pathname, char *path, char *name)
+{
+ if (strlen(path) + strlen(name) > MAXPATHLEN) {
+ panic(0, "pathname %s/%s too long\n", path, name);
+ return 1;
+ }
+ sprint(pathname, "%s/%s", path, name);
+ return 0;
+}
+
+static char *
+mktmpfile(int input, Dir **sb)
+{
+ int fd, i;
+ char *p;
+ char buf[8192];
+
+ atnotify(catch, 1);
+/*
+ p = mktemp(tmp[whichtmp++]);
+ fd = create(p, OWRITE, 0600);
+*/
+ fd = mkstemp(p=tmp[whichtmp++]);
+ if (fd < 0) {
+ panic(mflag ? 0: 2, "cannot create %s: %r\n", p);
+ return 0;
+ }
+ while ((i = read(input, buf, sizeof(buf))) > 0) {
+ if ((i = write(fd, buf, i)) < 0)
+ break;
+ }
+ *sb = dirfstat(fd);
+ close(fd);
+ if (i < 0) {
+ panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p);
+ return 0;
+ }
+ return p;
+}
+
+static char *
+statfile(char *file, Dir **sb)
+{
+ Dir *dir;
+ int input;
+
+ dir = dirstat(file);
+ if(dir == nil) {
+ if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
+ panic(mflag ? 0: 2, "cannot stat %s: %r\n", file);
+ return 0;
+ }
+ free(dir);
+ return mktmpfile(0, sb);
+ }
+ else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
+ free(dir);
+ if ((input = open(file, OREAD)) == -1) {
+ panic(mflag ? 0: 2, "cannot open %s: %r\n", file);
+ return 0;
+ }
+ file = mktmpfile(input, sb);
+ close(input);
+ }
+ else
+ *sb = dir;
+ return file;
+}
+
+void
+diff(char *f, char *t, int level)
+{
+ char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
+ Dir *fsb, *tsb;
+
+ if ((fp = statfile(f, &fsb)) == 0)
+ goto Return;
+ if ((tp = statfile(t, &tsb)) == 0){
+ free(fsb);
+ goto Return;
+ }
+ if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
+ if (rflag || level == 0)
+ diffdir(fp, tp, level);
+ else
+ Bprint(&stdout, "Common subdirectories: %s and %s\n",
+ fp, tp);
+ }
+ else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
+ diffreg(fp, tp);
+ else {
+ if (REGULAR_FILE(fsb)) {
+ if ((p = utfrrune(f, '/')) == 0)
+ p = f;
+ else
+ p++;
+ if (mkpathname(tb, tp, p) == 0)
+ diffreg(fp, tb);
+ }
+ else {
+ if ((p = utfrrune(t, '/')) == 0)
+ p = t;
+ else
+ p++;
+ if (mkpathname(fb, fp, p) == 0)
+ diffreg(fb, tp);
+ }
+ }
+ free(fsb);
+ free(tsb);
+Return:
+ rmtmpfiles();
+}
+
+void
+main(int argc, char *argv[])
+{
+ char *p;
+ int i;
+ Dir *fsb, *tsb;
+ extern int _p9usepwlibrary;
+
+ _p9usepwlibrary = 0;
+ Binit(&stdout, 1, OWRITE);
+ progname = *argv;
+ while (--argc && (*++argv)[0] == '-' && (*argv)[1]) {
+ for (p = *argv+1; *p; p++) {
+ switch (*p) {
+
+ case 'e':
+ case 'f':
+ case 'n':
+ case 'c':
+ case 'a':
+ mode = *p;
+ break;
+
+ case 'w':
+ bflag = 2;
+ break;
+
+ case 'b':
+ bflag = 1;
+ break;
+
+ case 'r':
+ rflag = 1;
+ mflag = 1;
+ break;
+
+ case 'm':
+ mflag = 1;
+ break;
+
+ case 'h':
+ default:
+ progname = "Usage";
+ panic(2, usage);
+ }
+ }
+ }
+ if (argc < 2)
+ panic(2, usage, progname);
+ if ((tsb = dirstat(argv[argc-1])) == nil)
+ panic(2, "can't stat %s\n", argv[argc-1]);
+ if (argc > 2) {
+ if (!DIRECTORY(tsb))
+ panic(2, usage, progname);
+ mflag = 1;
+ }
+ else {
+ if ((fsb = dirstat(argv[0])) == nil)
+ panic(2, "can't stat %s\n", argv[0]);
+ if (DIRECTORY(fsb) && DIRECTORY(tsb))
+ mflag = 1;
+ free(fsb);
+ }
+ free(tsb);
+ for (i = 0; i < argc-1; i++)
+ diff(argv[i], argv[argc-1], 0);
+ done(anychange);
+ /*NOTREACHED*/
+}
+
+static char noroom[] = "out of memory - try diff -h\n";
+
+void *
+emalloc(unsigned n)
+{
+ register void *p;
+
+ if ((p = malloc(n)) == 0)
+ panic(2, noroom);
+ return p;
+}
+
+void *
+erealloc(void *p, unsigned n)
+{
+ register void *rp;
+
+ if ((rp = realloc(p, n)) == 0)
+ panic(2, noroom);
+ return rp;
+}
diff --git a/join/Makefile b/join/Makefile
@@ -0,0 +1,10 @@
+# join - join unix port from plan9
+# Depends on ../lib9
+
+TARG = join
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/join/join.1 b/join/join.1
@@ -0,0 +1,147 @@
+.TH JOIN 1
+.CT 1 files
+.SH NAME
+join \- relational database operator
+.SH SYNOPSIS
+.B join
+[
+.I options
+]
+.I file1 file2
+.SH DESCRIPTION
+.I Join
+forms, on the standard output,
+a join
+of the two relations specified by the lines of
+.I file1
+and
+.IR file2 .
+If one of the file names is
+.LR - ,
+the standard input is used.
+.PP
+.I File1
+and
+.I file2
+must be sorted in increasing
+.SM ASCII
+collating
+sequence on the fields
+on which they are to be joined,
+normally the first in each line.
+.PP
+There is one line in the output
+for each pair of lines in
+.I file1
+and
+.I file2
+that have identical join fields.
+The output line normally consists of the common field,
+then the rest of the line from
+.IR file1 ,
+then the rest of the line from
+.IR file2 .
+.PP
+Input fields are normally separated spaces or tabs;
+output fields by space.
+In this case, multiple separators count as one, and
+leading separators are discarded.
+.PP
+The following options are recognized, with POSIX syntax.
+.TP
+.BI -a " n
+In addition to the normal output,
+produce a line for each unpairable line in file
+.IR n ,
+where
+.I n
+is 1 or 2.
+.TP
+.BI -v " n
+Like
+.BR -a ,
+omitting output for paired lines.
+.TP
+.BI -e " s
+Replace empty output fields by string
+.IR s .
+.TP
+.BI -1 " m
+.br
+.ns
+.TP
+.BI -2 " m
+Join on the
+.IR m th
+field of
+.I file1
+or
+.IR file2 .
+.TP
+.BI -j "n m"
+Archaic equivalent for
+.BI - n " m"\f1.
+.TP
+.BI -o fields
+Each output line comprises the designated fields.
+The comma-separated field designators are either
+.BR 0 ,
+meaning the join field, or have the form
+.IR n . m ,
+where
+.I n
+is a file number and
+.I m
+is a field number.
+Archaic usage allows separate arguments for field designators.
+.PP
+.TP
+.BI -t c
+Use character
+.I c
+as the only separator (tab character) on input and output.
+Every appearance of
+.I c
+in a line is significant.
+.SH EXAMPLES
+.TP
+.L
+sort /etc/passwd | join -t: -1 1 -a 1 -e "" - bdays
+Add birthdays to the
+.B /etc/passwd
+file, leaving unknown
+birthdays empty.
+The layout of
+.B /adm/users
+is given in
+.IR passwd (5);
+.B bdays
+contains sorted lines like
+.LR "ken:Feb\ 4,\ 1953" .
+.TP
+.L
+tr : ' ' </etc/passwd | sort -k 3 3 >temp
+.br
+.ns
+.TP
+.L
+join -1 3 -2 3 -o 1.1,2.1 temp temp | awk '$1 < $2'
+Print all pairs of users with identical userids.
+.SH SOURCE
+.B \*9/src/cmd/join.c
+.SH "SEE ALSO"
+.IR sort (1),
+.IR comm (1),
+.IR awk (1)
+.SH BUGS
+With default field separation,
+the collating sequence is that of
+.BI "sort -b"
+.BI -k y , y\f1;
+with
+.BR -t ,
+the sequence is that of
+.BI "sort -t" x
+.BI -k y , y\f1.
+.PP
+One of the files must be randomly accessible.
diff --git a/join/join.c b/join/join.c
@@ -0,0 +1,369 @@
+/* join F1 F2 on stuff */
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include <ctype.h>
+#define F1 0
+#define F2 1
+#define F0 3
+#define NFLD 100 /* max field per line */
+#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
+FILE *f[2];
+Rune buf[2][BUFSIZ]; /*input lines */
+Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */
+Rune *s1,*s2;
+#define j1 joinj1
+#define j2 joinj2
+
+int j1 = 1; /* join of this field of file 1 */
+int j2 = 1; /* join of this field of file 2 */
+int olist[2*NFLD]; /* output these fields */
+int olistf[2*NFLD]; /* from these files */
+int no; /* number of entries in olist */
+Rune sep1 = ' '; /* default field separator */
+Rune sep2 = '\t';
+char *sepstr=" ";
+int discard; /* count of truncated lines */
+Rune null[BUFSIZ]/* = L""*/;
+int a1;
+int a2;
+
+char *getoptarg(int*, char***);
+void output(int, int);
+int input(int);
+void oparse(char*);
+void error(char*, char*);
+void seek1(void), seek2(void);
+Rune *strtorune(Rune *, char *);
+
+
+void
+main(int argc, char **argv)
+{
+ int i;
+
+ while (argc > 1 && argv[1][0] == '-') {
+ if (argv[1][1] == '\0')
+ break;
+ switch (argv[1][1]) {
+ case '-':
+ argc--;
+ argv++;
+ goto proceed;
+ case 'a':
+ switch(*getoptarg(&argc, &argv)) {
+ case '1':
+ a1++;
+ break;
+ case '2':
+ a2++;
+ break;
+ default:
+ error("incomplete option -a","");
+ }
+ break;
+ case 'e':
+ strtorune(null, getoptarg(&argc, &argv));
+ break;
+ case 't':
+ sepstr=getoptarg(&argc, &argv);
+ chartorune(&sep1, sepstr);
+ sep2 = sep1;
+ break;
+ case 'o':
+ if(argv[1][2]!=0 ||
+ argc>2 && strchr(argv[2],',')!=0)
+ oparse(getoptarg(&argc, &argv));
+ else for (no = 0; no<2*NFLD && argc>2; no++){
+ if (argv[2][0] == '1' && argv[2][1] == '.') {
+ olistf[no] = F1;
+ olist[no] = atoi(&argv[2][2]);
+ } else if (argv[2][0] == '2' && argv[2][1] == …
+ olist[no] = atoi(&argv[2][2]);
+ olistf[no] = F2;
+ } else if (argv[2][0] == '0')
+ olistf[no] = F0;
+ else
+ break;
+ argc--;
+ argv++;
+ }
+ break;
+ case 'j':
+ if(argc <= 2)
+ break;
+ if (argv[1][2] == '1')
+ j1 = atoi(argv[2]);
+ else if (argv[1][2] == '2')
+ j2 = atoi(argv[2]);
+ else
+ j1 = j2 = atoi(argv[2]);
+ argc--;
+ argv++;
+ break;
+ case '1':
+ j1 = atoi(getoptarg(&argc, &argv));
+ break;
+ case '2':
+ j2 = atoi(getoptarg(&argc, &argv));
+ break;
+ }
+ argc--;
+ argv++;
+ }
+proceed:
+ for (i = 0; i < no; i++)
+ if (olist[i]-- > NFLD) /* 0 origin */
+ error("field number too big in -o","");
+ if (argc != 3)
+ error("usage: join [-1 x -2 y] [-o list] file1 file2","");
+ j1--;
+ j2--; /* everyone else believes in 0 origin */
+ s1 = ppi[F1][j1];
+ s2 = ppi[F2][j2];
+ if (strcmp(argv[1], "-") == 0)
+ f[F1] = stdin;
+ else if ((f[F1] = fopen(argv[1], "r")) == 0)
+ error("can't open %s", argv[1]);
+ if(strcmp(argv[2], "-") == 0) {
+ f[F2] = stdin;
+ } else if ((f[F2] = fopen(argv[2], "r")) == 0)
+ error("can't open %s", argv[2]);
+
+ if(ftell(f[F2]) >= 0)
+ seek2();
+ else if(ftell(f[F1]) >= 0)
+ seek1();
+ else
+ error("neither file is randomly accessible","");
+ if (discard)
+ error("some input line was truncated", "");
+ exits("");
+}
+int runecmp(Rune *a, Rune *b){
+ while(*a==*b){
+ if(*a=='\0') return 0;
+ a++;
+ b++;
+ }
+ if(*a<*b) return -1;
+ return 1;
+}
+char *runetostr(char *buf, Rune *r){
+ char *s;
+ for(s=buf;*r;r++) s+=runetochar(s, r);
+ *s='\0';
+ return buf;
+}
+Rune *strtorune(Rune *buf, char *s){
+ Rune *r;
+ for(r=buf;*s;r++) s+=chartorune(r, s);
+ *r='\0';
+ return buf;
+}
+/* lazy. there ought to be a clean way to combine seek1 & seek2 */
+#define get1() n1=input(F1)
+#define get2() n2=input(F2)
+void
+seek2(void)
+{
+ int n1, n2;
+ int top2=0;
+ int bot2 = ftell(f[F2]);
+ get1();
+ get2();
+ while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
+ if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ if(a2) output(0, n2);
+ bot2 = ftell(f[F2]);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ if(a1) output(n1, 0);
+ get1();
+ } else /*(n1>0 && n2>0 && comp()==0)*/ {
+ while(n2>0 && comp()==0) {
+ output(n1, n2);
+ top2 = ftell(f[F2]);
+ get2();
+ }
+ fseek(f[F2], bot2, 0);
+ get2();
+ get1();
+ for(;;) {
+ if(n1>0 && n2>0 && comp()==0) {
+ output(n1, n2);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ fseek(f[F2], bot2, 0);
+ get2();
+ get1();
+ } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
+ fseek(f[F2], top2, 0);
+ bot2 = top2;
+ get2();
+ break;
+ }
+ }
+ }
+ }
+}
+void
+seek1(void)
+{
+ int n1, n2;
+ int top1=0;
+ int bot1 = ftell(f[F1]);
+ get1();
+ get2();
+ while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
+ if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ if(a2) output(0, n2);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ if(a1) output(n1, 0);
+ bot1 = ftell(f[F1]);
+ get1();
+ } else /*(n1>0 && n2>0 && comp()==0)*/ {
+ while(n2>0 && comp()==0) {
+ output(n1, n2);
+ top1 = ftell(f[F1]);
+ get1();
+ }
+ fseek(f[F1], bot1, 0);
+ get2();
+ get1();
+ for(;;) {
+ if(n1>0 && n2>0 && comp()==0) {
+ output(n1, n2);
+ get1();
+ } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ fseek(f[F1], bot1, 0);
+ get2();
+ get1();
+ } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
+ fseek(f[F1], top1, 0);
+ bot1 = top1;
+ get1();
+ break;
+ }
+ }
+ }
+ }
+}
+
+int
+input(int n) /* get input line and split into fields */
+{
+ register int i, c;
+ Rune *bp;
+ Rune **pp;
+ char line[BUFSIZ];
+
+ bp = buf[n];
+ pp = ppi[n];
+ if (fgets(line, BUFSIZ, f[n]) == 0)
+ return(0);
+ strtorune(bp, line);
+ i = 0;
+ do {
+ i++;
+ if (sep1 == ' ') /* strip multiples */
+ while ((c = *bp) == sep1 || c == sep2)
+ bp++; /* skip blanks */
+ *pp++ = bp; /* record beginning */
+ while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0…
+ bp++;
+ *bp++ = '\0'; /* mark end by overwriting blank */
+ } while (c != '\n' && c != '\0' && i < NFLD-1);
+ if (c != '\n')
+ discard++;
+
+ *pp = 0;
+ return(i);
+}
+
+void
+output(int on1, int on2) /* print items from olist */
+{
+ int i;
+ Rune *temp;
+ char buf[BUFSIZ];
+
+ if (no <= 0) { /* default case */
+ printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
+ for (i = 0; i < on1; i++)
+ if (i != j1)
+ printf("%s%s", sepstr, runetostr(buf, ppi[F1][…
+ for (i = 0; i < on2; i++)
+ if (i != j2)
+ printf("%s%s", sepstr, runetostr(buf, ppi[F2][…
+ printf("\n");
+ } else {
+ for (i = 0; i < no; i++) {
+ if (olistf[i]==F0 && on1>j1)
+ temp = ppi[F1][j1];
+ else if (olistf[i]==F0 && on2>j2)
+ temp = ppi[F2][j2];
+ else {
+ temp = ppi[olistf[i]][olist[i]];
+ if(olistf[i]==F1 && on1<=olist[i] ||
+ olistf[i]==F2 && on2<=olist[i] ||
+ *temp==0)
+ temp = null;
+ }
+ printf("%s", runetostr(buf, temp));
+ if (i == no - 1)
+ printf("\n");
+ else
+ printf("%s", sepstr);
+ }
+ }
+}
+
+void
+error(char *s1, char *s2)
+{
+ fprintf(stderr, "join: ");
+ fprintf(stderr, s1, s2);
+ fprintf(stderr, "\n");
+ exits(s1);
+}
+
+char *
+getoptarg(int *argcp, char ***argvp)
+{
+ int argc = *argcp;
+ char **argv = *argvp;
+ if(argv[1][2] != 0)
+ return &argv[1][2];
+ if(argc<=2 || argv[2][0]=='-')
+ error("incomplete option %s", argv[1]);
+ *argcp = argc-1;
+ *argvp = ++argv;
+ return argv[1];
+}
+
+void
+oparse(char *s)
+{
+ for (no = 0; no<2*NFLD && *s; no++, s++) {
+ switch(*s) {
+ case 0:
+ return;
+ case '0':
+ olistf[no] = F0;
+ break;
+ case '1':
+ case '2':
+ if(s[1] == '.' && isdigit((uchar)s[2])) {
+ olistf[no] = *s=='1'? F1: F2;
+ olist[no] = atoi(s += 2);
+ break;
+ } /* fall thru */
+ default:
+ error("invalid -o list", "");
+ }
+ if(s[1] == ',')
+ s++;
+ }
+}
diff --git a/lib9/utf.h b/lib9/utf.h
@@ -11,7 +11,8 @@ enum
UTFmax = 3, /* maximum bytes per rune */
Runesync = 0x80, /* cannot represent part of a U…
Runeself = 0x80, /* rune and UTF sequences are t…
- Runeerror = 0xFFFD /* decoding error in UTF */
+ Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runemax = 0x10FFFF /* maximum rune value */
};
/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
diff --git a/look/Makefile b/look/Makefile
@@ -0,0 +1,10 @@
+# look - look unix port from plan9
+# Depends on ../lib9
+
+TARG = look
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/look/look.1 b/look/look.1
@@ -0,0 +1,85 @@
+.TH LOOK 1
+.SH NAME
+look \- find lines in a sorted list
+.SH SYNOPSIS
+.B look
+[
+.BI -dfnixt c
+]
+[
+.I string
+]
+[
+.I file
+]
+.SH DESCRIPTION
+.I Look
+consults a sorted
+.I file
+and prints all lines that begin with
+.IR string .
+It uses binary search.
+.PP
+The following options are recognized.
+Options
+.B dfnt
+affect comparisons as in
+.IR sort (1).
+.TP
+.B -i
+Interactive.
+There is no
+.I string
+argument; instead
+.I look
+takes lines from the standard input as strings to be looked up.
+.TP
+.B -x
+Exact.
+Print only lines of the file whose key matches
+.I string
+exactly.
+.TP
+.B -d
+`Directory' order:
+only letters, digits,
+tabs and blanks participate in comparisons.
+.TP
+.B -f
+Fold.
+Upper case letters compare equal to lower case.
+.TP
+.B -n
+Numeric comparison with initial string of digits, optional minus sign,
+and optional decimal point.
+.TP
+.BR -t [ \f2c\f1 ]
+Character
+.I c
+terminates the sort key in the
+.IR file .
+By default, tab terminates the key. If
+.I c
+is missing the entire line comprises the key.
+.PP
+If no
+.I file
+is specified,
+.B /lib/words
+is assumed, with collating sequence
+.BR df .
+.SH FILES
+.B /lib/words
+.SH SOURCE
+.B \*9/src/cmd/look.c
+.SH "SEE ALSO"
+.IR sort (1),
+.IR grep (1)
+.SH DIAGNOSTICS
+The exit status is
+.RB `` "not found" ''
+if no match is found, and
+.RB `` "no dictionary" ''
+if
+.I file
+or the default dictionary cannot be opened.
diff --git a/look/look.c b/look/look.c
@@ -0,0 +1,349 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+ /* Macros for Rune support of ctype.h-like functions */
+
+#undef isupper
+#undef islower
+#undef isalpha
+#undef isdigit
+#undef isalnum
+#undef isspace
+#undef tolower
+#define isupper(r) ('A' <= (r) && (r) <= 'Z')
+#define islower(r) ('a' <= (r) && (r) <= 'z')
+#define isalpha(r) (isupper(r) || islower(r))
+#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
+
+#define isdigit(r) ('0' <= (r) && (r) <= '9')
+
+#define isalnum(r) (isalpha(r) || isdigit(r))
+
+#define isspace(r) ((r) == ' ' || (r) == '\t' \
+ || (0x0A <= (r) && (r) <= 0x0D))
+
+#define tolower(r) ((r)-'A'+'a')
+
+#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
+
+#define WORDSIZ 4000
+char *filename = "#9/lib/words";
+Biobuf *dfile;
+Biobuf bout;
+Biobuf bin;
+
+int fold;
+int direc;
+int exact;
+int iflag;
+int rev = 1; /*-1 for reverse-ordered file, not implemented*/
+int (*compare)(Rune*, Rune*);
+Rune tab = '\t';
+Rune entry[WORDSIZ];
+Rune word[WORDSIZ];
+Rune key[50], orig[50];
+Rune latin_fold_tab[] =
+{
+/* Table to fold latin 1 characters to ASCII equivalents
+ based at Rune value 0xc0
+
+ À Á Â Ã Ä Å Æ Ç
+ È É Ê Ë Ì Í Î Ï
+ Ð Ñ Ò Ó Ô Õ Ö ×
+ Ø Ù Ú Û Ü Ý Þ ß
+ à á â ã ä å æ ç
+ è é ê ë ì í î ï
+ ð ñ ò ó ô õ ö ÷
+ ø ù ú û ü ý þ ÿ
+*/
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+ 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
+ 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+ 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
+ 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
+};
+
+int locate(void);
+int acomp(Rune*, Rune*);
+int getword(Biobuf*, Rune *rp, int n);
+void torune(char*, Rune*);
+void rcanon(Rune*, Rune*);
+int ncomp(Rune*, Rune*);
+
+void
+main(int argc, char *argv[])
+{
+ int n;
+
+ filename = unsharp(filename);
+
+ Binit(&bin, 0, OREAD);
+ Binit(&bout, 1, OWRITE);
+ compare = acomp;
+ ARGBEGIN{
+ case 'd':
+ direc++;
+ break;
+ case 'f':
+ fold++;
+ break;
+ case 'i':
+ iflag++;
+ break;
+ case 'n':
+ compare = ncomp;
+ break;
+ case 't':
+ chartorune(&tab,ARGF());
+ break;
+ case 'x':
+ exact++;
+ break;
+ default:
+ fprint(2, "%s: bad option %c\n", argv0, ARGC());
+ fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0…
+ exits("usage");
+ } ARGEND
+ if(!iflag){
+ if(argc >= 1) {
+ torune(argv[0], orig);
+ argv++;
+ argc--;
+ } else
+ iflag++;
+ }
+ if(argc < 1) {
+ direc++;
+ fold++;
+ } else
+ filename = argv[0];
+ if (!iflag)
+ rcanon(orig, key);
+ dfile = Bopen(filename, OREAD);
+ if(dfile == 0) {
+ fprint(2, "look: can't open %s\n", filename);
+ exits("no dictionary");
+ }
+ if(!iflag)
+ if(!locate())
+ exits("not found");
+ do {
+ if(iflag) {
+ Bflush(&bout);
+ if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
+ exits(0);
+ rcanon(orig, key);
+ if(!locate())
+ continue;
+ }
+ if (!exact || !acomp(word, key))
+ Bprint(&bout, "%S\n", entry);
+ while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -1:
+ if(exact)
+ break;
+ case 0:
+ if (!exact || !acomp(word, orig))
+ Bprint(&bout, "%S\n", entry);
+ continue;
+ }
+ break;
+ }
+ } while(iflag);
+ exits(0);
+}
+
+int
+locate(void)
+{
+ vlong top, bot, mid;
+ int c;
+ int n;
+
+ bot = 0;
+ top = Bseek(dfile, 0L, 2);
+ for(;;) {
+ mid = (top+bot) / 2;
+ Bseek(dfile, mid, 0);
+ do
+ c = Bgetrune(dfile);
+ while(c>=0 && c!='\n');
+ mid = Boffset(dfile);
+ if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
+ break;
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -2:
+ case -1:
+ case 0:
+ if(top <= mid)
+ break;
+ top = mid;
+ continue;
+ case 1:
+ case 2:
+ bot = mid;
+ continue;
+ }
+ break;
+ }
+ Bseek(dfile, bot, 0);
+ while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -2:
+ return 0;
+ case -1:
+ if(exact)
+ return 0;
+ case 0:
+ return 1;
+ case 1:
+ case 2:
+ continue;
+ }
+ }
+ return 0;
+}
+
+/*
+ * acomp(s, t) returns:
+ * -2 if s strictly precedes t
+ * -1 if s is a prefix of t
+ * 0 if s is the same as t
+ * 1 if t is a prefix of s
+ * 2 if t strictly precedes s
+ */
+
+int
+acomp(Rune *s, Rune *t)
+{
+ int cs, ct;
+
+ for(;;) {
+ cs = *s;
+ ct = *t;
+ if(cs != ct)
+ break;
+ if(cs == 0)
+ return 0;
+ s++;
+ t++;
+ }
+ if(cs == 0)
+ return -1;
+ if(ct == 0)
+ return 1;
+ if(cs < ct)
+ return -2;
+ return 2;
+}
+
+void
+torune(char *old, Rune *new)
+{
+ do old += chartorune(new, old);
+ while(*new++);
+}
+
+void
+rcanon(Rune *old, Rune *new)
+{
+ Rune r;
+
+ while((r = *old++) && r != tab) {
+ if (islatin1(r) && latin_fold_tab[r-0xc0])
+ r = latin_fold_tab[r-0xc0];
+ if(direc)
+ if(!(isalnum(r) || r == ' ' || r == '\t'))
+ continue;
+ if(fold)
+ if(isupper(r))
+ r = tolower(r);
+ *new++ = r;
+ }
+ *new = 0;
+}
+
+int
+ncomp(Rune *s, Rune *t)
+{
+ Rune *is, *it, *js, *jt;
+ int a, b;
+ int ssgn, tsgn;
+
+ while(isspace(*s))
+ s++;
+ while(isspace(*t))
+ t++;
+ ssgn = tsgn = -2*rev;
+ if(*s == '-') {
+ s++;
+ ssgn = -ssgn;
+ }
+ if(*t == '-') {
+ t++;
+ tsgn = -tsgn;
+ }
+ for(is = s; isdigit(*is); is++)
+ ;
+ for(it = t; isdigit(*it); it++)
+ ;
+ js = is;
+ jt = it;
+ a = 0;
+ if(ssgn == tsgn)
+ while(it>t && is>s)
+ if(b = *--it - *--is)
+ a = b;
+ while(is > s)
+ if(*--is != '0')
+ return -ssgn;
+ while(it > t)
+ if(*--it != '0')
+ return tsgn;
+ if(a)
+ return sgn(a)*ssgn;
+ if(*(s=js) == '.')
+ s++;
+ if(*(t=jt) == '.')
+ t++;
+ if(ssgn == tsgn)
+ while(isdigit(*s) && isdigit(*t))
+ if(a = *t++ - *s++)
+ return sgn(a)*ssgn;
+ while(isdigit(*s))
+ if(*s++ != '0')
+ return -ssgn;
+ while(isdigit(*t))
+ if(*t++ != '0')
+ return tsgn;
+ return 0;
+}
+
+int
+getword(Biobuf *f, Rune *rp, int n)
+{
+ long c;
+
+ while(n-- > 0) {
+ c = Bgetrune(f);
+ if(c < 0)
+ return 0;
+ if(c == '\n') {
+ *rp = '\0';
+ return 1;
+ }
+ *rp++ = c;
+ }
+ fprint(2, "Look: word too long. Bailing out.\n");
+ return 0;
+}
diff --git a/pbd/Makefile b/pbd/Makefile
@@ -0,0 +1,10 @@
+# pbd - pbd unix port from plan9
+# Depends on ../lib9
+
+TARG = pbd
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/pbd/pbd.1 b/pbd/pbd.1
diff --git a/pbd/pbd.c b/pbd/pbd.c
@@ -0,0 +1,19 @@
+#include <u.h>
+#include <libc.h>
+
+void
+main(void)
+{
+ char buf[512], *p;
+
+ p = "???";
+ if(getwd(buf, sizeof buf)){
+ p = strrchr(buf, '/');
+ if(p == nil)
+ p = buf;
+ else if(p>buf || p[1]!='\0')
+ p++;
+ }
+ write(1, p, strlen(p));
+ exits(0);
+}
diff --git a/rc/Makefile b/rc/Makefile
@@ -46,7 +46,7 @@ uninstall:
@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
clean:
- rm -f ${OFILES} ${TARG} y.tab.c y.tab.h
+ rm -f ${OFILES} ${TARG} y.tab.c y.tab.h x.tab.h
${TARG}: ${OFILES}
@echo LD ${TARG}
diff --git a/split/Makefile b/split/Makefile
@@ -0,0 +1,10 @@
+# split - split unix port from plan9
+# Depends on ../lib9
+
+TARG = split
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/split/split.1 b/split/split.1
@@ -0,0 +1,82 @@
+.TH SPLIT 1
+.CT 1 files
+.SH NAME
+split \- split a file into pieces
+.SH SYNOPSIS
+.B split
+[
+.I option ...
+]
+[
+.I file
+]
+.SH DESCRIPTION
+.I Split
+reads
+.I file
+(standard input by default)
+and writes it in pieces of 1000
+lines per output file.
+The names of the
+output files are
+.BR xaa ,
+.BR xab ,
+and so on to
+.BR xzz .
+The options are
+.TP
+.BI -n " n"
+Split into
+.IR n -line
+pieces.
+.TP
+.BI -l " n"
+Synonym for
+.B -n
+.IR n ,
+a nod to Unix's syntax.
+.TP
+.BI -e " expression"
+File divisions occur at each line
+that matches a regular
+.IR expression ;
+see
+.IR regexp (7).
+Multiple
+.B -e
+options may appear.
+If a subexpression of
+.I expression
+is contained in parentheses
+.BR ( ... ) ,
+the output file name is the portion of the
+line which matches the subexpression.
+.TP
+.BI -f " stem
+Use
+.I stem
+instead of
+.B x
+in output file names.
+.TP
+.BI -s " suffix
+Append
+.I suffix
+to names identified under
+.BR -e .
+.TP
+.B -x
+Exclude the matched input line from the output file.
+.TP
+.B -i
+Ignore case in option
+.BR -e ;
+force output file names (excluding the suffix)
+to lower case.
+.SH SOURCE
+.B \*9/src/cmd/split.c
+.SH SEE ALSO
+.IR sed (1),
+.IR awk (1),
+.IR grep (1),
+.IR regexp (7)
diff --git a/split/split.c b/split/split.c
@@ -0,0 +1,189 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+#include <regexp.h>
+
+char digit[] = "0123456789";
+char *suffix = "";
+char *stem = "x";
+char suff[] = "aa";
+char name[200];
+Biobuf bout;
+Biobuf *output = &bout;
+
+extern int nextfile(void);
+extern int matchfile(Resub*);
+extern void openf(void);
+extern char *fold(char*,int);
+extern void usage(void);
+extern void badexp(void);
+
+void
+main(int argc, char *argv[])
+{
+ Reprog *exp;
+ char *pattern = 0;
+ int n = 1000;
+ char *line;
+ int xflag = 0;
+ int iflag = 0;
+ Biobuf bin;
+ Biobuf *b = &bin;
+ char buf[256];
+
+ ARGBEGIN {
+ case 'l':
+ case 'n':
+ n=atoi(EARGF(usage()));
+ break;
+ case 'e':
+ pattern = strdup(EARGF(usage()));
+ break;
+ case 'f':
+ stem = strdup(EARGF(usage()));
+ break;
+ case 's':
+ suffix = strdup(EARGF(usage()));
+ break;
+ case 'x':
+ xflag++;
+ break;
+ case 'i':
+ iflag++;
+ break;
+ default:
+ usage();
+ break;
+
+ } ARGEND;
+
+ if(argc < 0 || argc > 1)
+ usage();
+
+ if(argc != 0) {
+ b = Bopen(argv[0], OREAD);
+ if(b == nil) {
+ fprint(2, "split: can't open %s: %r\n", argv[0]);
+ exits("open");
+ }
+ } else
+ Binit(b, 0, OREAD);
+
+ if(pattern) {
+ if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): patte…
+ badexp();
+ while((line=Brdline(b,'\n')) != 0) {
+ Resub match[2];
+ memset(match, 0, sizeof match);
+ line[Blinelen(b)-1] = 0;
+ if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,mat…
+ if(matchfile(match) && xflag)
+ continue;
+ } else if(output == 0)
+ nextfile(); /* at most once */
+ Bwrite(output, line, Blinelen(b)-1);
+ Bputc(output, '\n');
+ }
+ } else {
+ int linecnt = n;
+
+ while((line=Brdline(b,'\n')) != 0) {
+ if(++linecnt > n) {
+ nextfile();
+ linecnt = 1;
+ }
+ Bwrite(output, line, Blinelen(b));
+ }
+
+ /*
+ * in case we didn't end with a newline, tack whatever's
+ * left onto the last file
+ */
+ while((n = Bread(b, buf, sizeof(buf))) > 0)
+ Bwrite(output, buf, n);
+ }
+ if(b != nil)
+ Bterm(b);
+ exits(0);
+}
+
+int
+nextfile(void)
+{
+ static int canopen = 1;
+ if(suff[0] > 'z') {
+ if(canopen)
+ fprint(2, "split: file %szz not split\n",stem);
+ canopen = 0;
+ } else {
+ strcpy(name, stem);
+ strcat(name, suff);
+ if(++suff[1] > 'z')
+ suff[1] = 'a', ++suff[0];
+ openf();
+ }
+ return canopen;
+}
+
+int
+matchfile(Resub *match)
+{
+ if(match[1].s.sp) {
+ int len = match[1].e.ep - match[1].s.sp;
+ strncpy(name, match[1].s.sp, len);
+ strcpy(name+len, suffix);
+ openf();
+ return 1;
+ }
+ return nextfile();
+}
+
+void
+openf(void)
+{
+ static int fd = 0;
+ Bflush(output);
+ Bterm(output);
+ if(fd > 0)
+ close(fd);
+ fd = create(name,OWRITE,0666);
+ if(fd < 0) {
+ fprint(2, "grep: can't create %s: %r\n", name);
+ exits("create");
+ }
+ Binit(output, fd, OWRITE);
+}
+
+char *
+fold(char *s, int n)
+{
+ static char *fline;
+ static int linesize = 0;
+ char *t;
+
+ if(linesize < n+1){
+ fline = realloc(fline,n+1);
+ linesize = n+1;
+ }
+ for(t=fline; *t++ = tolower((uchar)*s++); )
+ continue;
+ /* we assume the 'A'-'Z' only appear as themselves
+ * in a utf encoding.
+ */
+ return fline;
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i…
+ exits("usage");
+}
+
+void
+badexp(void)
+{
+ fprint(2, "split: bad regular expression\n");
+ exits("bad regular expression");
+}
diff --git a/strings/Makefile b/strings/Makefile
@@ -0,0 +1,10 @@
+# strings - strings unix port from plan9
+# Depends on ../lib9
+
+TARG = strings
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/strings/strings.1 b/strings/strings.1
@@ -0,0 +1,28 @@
+.TH STRINGS 1
+.SH NAME
+strings \- extract printable strings
+.SH SYNOPSIS
+.B strings
+[
+.I file ...
+]
+.SH DESCRIPTION
+.I Strings
+finds and prints strings containing 6 or more
+consecutive printable UTF-encoded characters
+in a (typically) binary file, default
+standard input.
+Printable characters are taken to be
+.SM ASCII
+characters from blank through tilde (hexadecimal 20 through 7E), inclusive,
+and
+all other characters from value 00A0 to FFFF.
+Strings reports
+the decimal offset within the file at which the string starts and the text
+of the string. If the string is longer than 70 runes the line is
+terminated by three dots and the printing is resumed on the next
+line with the offset of the continuation line.
+.SH SOURCE
+.B \*9/src/cmd/strings.c
+.SH SEE ALSO
+.IR nm (1)
diff --git a/strings/strings.c b/strings/strings.c
@@ -0,0 +1,90 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+Biobuf *fin;
+Biobuf fout;
+
+#define MINSPAN 6 /* Min characters in st…
+
+#define BUFSIZE 70
+
+void stringit(char *);
+#undef isprint
+#define isprint risprint
+int isprint(Rune);
+
+void
+main(int argc, char **argv)
+{
+ int i;
+
+ Binit(&fout, 1, OWRITE);
+ if(argc < 2) {
+ stringit("/dev/stdin");
+ exits(0);
+ }
+
+ for(i = 1; i < argc; i++) {
+ if(argc > 2)
+ print("%s:\n", argv[i]);
+
+ stringit(argv[i]);
+ }
+
+ exits(0);
+}
+
+void
+stringit(char *str)
+{
+ long posn, start;
+ int cnt = 0;
+ long c;
+
+ Rune buf[BUFSIZE];
+
+ if ((fin = Bopen(str, OREAD)) == 0) {
+ perror("open");
+ return;
+ }
+
+ start = 0;
+ posn = Boffset(fin);
+ while((c = Bgetrune(fin)) >= 0) {
+ if(isprint(c)) {
+ if(start == 0)
+ start = posn;
+ buf[cnt++] = c;
+ if(cnt == BUFSIZE-1) {
+ buf[cnt] = 0;
+ Bprint(&fout, "%8ld: %S ...\n", start, buf);
+ start = 0;
+ cnt = 0;
+ }
+ } else {
+ if(cnt >= MINSPAN) {
+ buf[cnt] = 0;
+ Bprint(&fout, "%8ld: %S\n", start, buf);
+ }
+ start = 0;
+ cnt = 0;
+ }
+ posn = Boffset(fin);
+ }
+
+ if(cnt >= MINSPAN){
+ buf[cnt] = 0;
+ Bprint(&fout, "%8ld: %S\n", start, buf);
+ }
+ Bterm(fin);
+}
+
+int
+isprint(Rune r)
+{
+ if ((r >= ' ' && r <0x7f) || r > 0xA0)
+ return 1;
+ else
+ return 0;
+}
diff --git a/unicode/Makefile b/unicode/Makefile
@@ -0,0 +1,10 @@
+# unicode - unicode unix port from plan9
+# Depends on ../lib9
+
+TARG = unicode
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/unicode/unicode.1 b/unicode/unicode.1
diff --git a/unicode/unicode.c b/unicode/unicode.c
@@ -0,0 +1,122 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] c…
+char hex[] = "0123456789abcdefABCDEF";
+int numout = 0;
+int text = 0;
+char *err;
+Biobuf bout;
+
+char *range(char*[]);
+char *nums(char*[]);
+char *chars(char*[]);
+
+void
+main(int argc, char *argv[])
+{
+ ARGBEGIN{
+ case 'n':
+ numout = 1;
+ break;
+ case 't':
+ text = 1;
+ break;
+ }ARGEND
+ Binit(&bout, 1, OWRITE);
+ if(argc == 0){
+ fprint(2, "usage: %s\n", usage);
+ exits("usage");
+ }
+ if(!numout && utfrune(argv[0], '-'))
+ exits(range(argv));
+ if(numout || strchr(hex, argv[0][0])==0)
+ exits(nums(argv));
+ exits(chars(argv));
+}
+
+char*
+range(char *argv[])
+{
+ char *q;
+ int min, max;
+ int i;
+
+ while(*argv){
+ q = *argv;
+ if(strchr(hex, q[0]) == 0){
+ err:
+ fprint(2, "unicode: bad range %s\n", *argv);
+ return "bad range";
+ }
+ min = strtoul(q, &q, 16);
+ if(min<0 || min>Runemax || *q!='-')
+ goto err;
+ q++;
+ if(strchr(hex, *q) == 0)
+ goto err;
+ max = strtoul(q, &q, 16);
+ if(max<0 || max>Runemax || max<min || *q!=0)
+ goto err;
+ i = 0;
+ do{
+ Bprint(&bout, "%.4x %C", min, min);
+ i++;
+ if(min==max || (i&7)==0)
+ Bprint(&bout, "\n");
+ else
+ Bprint(&bout, "\t");
+ min++;
+ }while(min<=max);
+ argv++;
+ }
+ return 0;
+}
+
+char*
+nums(char *argv[])
+{
+ char *q;
+ Rune r;
+ int w;
+
+ while(*argv){
+ q = *argv;
+ while(*q){
+ w = chartorune(&r, q);
+ if(r==0x80 && (q[0]&0xFF)!=0x80){
+ fprint(2, "unicode: invalid utf string %s\n", …
+ return "bad utf";
+ }
+ Bprint(&bout, "%.4x\n", r);
+ q += w;
+ }
+ argv++;
+ }
+ return 0;
+}
+
+char*
+chars(char *argv[])
+{
+ char *q;
+ int m;
+
+ while(*argv){
+ q = *argv;
+ if(strchr(hex, q[0]) == 0){
+ err:
+ fprint(2, "unicode: bad unicode value %s\n", *argv);
+ return "bad char";
+ }
+ m = strtoul(q, &q, 16);
+ if(m<0 || m>Runemax || *q!=0)
+ goto err;
+ Bprint(&bout, "%C", m);
+ if(!text)
+ Bprint(&bout, "\n");
+ argv++;
+ }
+ return 0;
+}
diff --git a/unutf/Makefile b/unutf/Makefile
@@ -0,0 +1,10 @@
+# unutf - unutf unix port from plan9
+# Depends on ../lib9
+
+TARG = unutf
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/unutf/unutf.1 b/unutf/unutf.1
diff --git a/unutf/unutf.c b/unutf/unutf.c
@@ -0,0 +1,20 @@
+/*
+ * stupid little program to pipe unicode chars through
+ * when converting to non-utf compilers.
+ */
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+Biobuf bin;
+
+void
+main(void)
+{
+ int c;
+
+ Binit(&bin, 0, OREAD);
+ while((c = Bgetrune(&bin)) >= 0)
+ print("0x%ux\n", c);
+ exits(0);
+}
You are viewing proxied material from suckless.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.