toops - plan9port - [fork] Plan 9 from user space | |
git clone git://src.adamsgaard.dk/plan9port | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 30d57c74899f4825349441e2b1e7a0bb5995c21f | |
parent 3e0d8fb3ea83b2b65a6425c65beda887140f9349 | |
Author: rsc <devnull@localhost> | |
Date: Tue, 27 Dec 2005 23:16:48 +0000 | |
oops | |
Diffstat: | |
A src/cmd/tcs/html.c | 330 +++++++++++++++++++++++++++++… | |
1 file changed, 330 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/src/cmd/tcs/html.c b/src/cmd/tcs/html.c | |
t@@ -0,0 +1,330 @@ | |
+#include <u.h> | |
+#include <libc.h> | |
+#include <bio.h> | |
+#include "hdr.h" | |
+#include "conv.h" | |
+ | |
+typedef struct Hchar Hchar; | |
+struct Hchar | |
+{ | |
+ char *s; | |
+ Rune r; | |
+}; | |
+ | |
+/* <, >, ", & intentionally omitted */ | |
+ | |
+static Hchar byname[] = | |
+{ | |
+ {"AElig", 198}, | |
+ {"Aacute", 193}, | |
+ {"Acirc", 194}, | |
+ {"Agrave", 192}, | |
+ {"Aring", 197}, | |
+ {"Atilde", 195}, | |
+ {"Auml", 196}, | |
+ {"Ccedil", 199}, | |
+ {"ETH", 208}, | |
+ {"Eacute", 201}, | |
+ {"Ecirc", 202}, | |
+ {"Egrave", 200}, | |
+ {"Euml", 203}, | |
+ {"Iacute", 205}, | |
+ {"Icirc", 206}, | |
+ {"Igrave", 204}, | |
+ {"Iuml", 207}, | |
+ {"Ntilde", 209}, | |
+ {"Oacute", 211}, | |
+ {"Ocirc", 212}, | |
+ {"Ograve", 210}, | |
+ {"Oslash", 216}, | |
+ {"Otilde", 213}, | |
+ {"Ouml", 214}, | |
+ {"THORN", 222}, | |
+ {"Uacute", 218}, | |
+ {"Ucirc", 219}, | |
+ {"Ugrave", 217}, | |
+ {"Uuml", 220}, | |
+ {"Yacute", 221}, | |
+ {"aacute", 225}, | |
+ {"acirc", 226}, | |
+ {"acute", 180}, | |
+ {"aelig", 230}, | |
+ {"agrave", 224}, | |
+ {"alpha", 945}, | |
+ {"aring", 229}, | |
+ {"atilde", 227}, | |
+ {"auml", 228}, | |
+ {"beta", 946}, | |
+ {"brvbar", 166}, | |
+ {"ccedil", 231}, | |
+ {"cdots", 8943}, | |
+ {"cedil", 184}, | |
+ {"cent", 162}, | |
+ {"chi", 967}, | |
+ {"copy", 169}, | |
+ {"curren", 164}, | |
+ {"ddots", 8945}, | |
+ {"deg", 176}, | |
+ {"delta", 948}, | |
+ {"divide", 247}, | |
+ {"eacute", 233}, | |
+ {"ecirc", 234}, | |
+ {"egrave", 232}, | |
+ {"emdash", 8212}, /* non-standard but commonly used */ | |
+ {"emsp", 8195}, | |
+ {"endash", 8211}, /* non-standard but commonly used */ | |
+ {"ensp", 8194}, | |
+ {"epsilon", 949}, | |
+ {"eta", 951}, | |
+ {"eth", 240}, | |
+ {"euml", 235}, | |
+ {"frac12", 189}, | |
+ {"frac14", 188}, | |
+ {"frac34", 190}, | |
+ {"gamma", 947}, | |
+ {"iacute", 237}, | |
+ {"icirc", 238}, | |
+ {"iexcl", 161}, | |
+ {"igrave", 236}, | |
+ {"iota", 953}, | |
+ {"iquest", 191}, | |
+ {"iuml", 239}, | |
+ {"kappa", 954}, | |
+ {"lambda", 955}, | |
+ {"laquo", 171}, | |
+ {"ldquo", 8220}, | |
+ {"ldots", 8230}, | |
+ {"lsquo", 8216}, | |
+ {"macr", 175}, | |
+ {"mdash", 8212}, | |
+ {"micro", 181}, | |
+ {"middot", 183}, | |
+ {"mu", 956}, | |
+ {"nbsp", 160}, | |
+ {"ndash", 8211}, | |
+ {"not", 172}, | |
+ {"ntilde", 241}, | |
+ {"nu", 957}, | |
+ {"oacute", 243}, | |
+ {"ocirc", 244}, | |
+ {"ograve", 242}, | |
+ {"omega", 969}, | |
+ {"omicron", 959}, | |
+ {"ordf", 170}, | |
+ {"ordm", 186}, | |
+ {"oslash", 248}, | |
+ {"otilde", 245}, | |
+ {"ouml", 246}, | |
+ {"para", 182}, | |
+ {"phi", 966}, | |
+ {"pi", 960}, | |
+ {"plusmn", 177}, | |
+ {"pound", 163}, | |
+ {"psi", 968}, | |
+ {"quad", 8193}, | |
+ {"raquo", 187}, | |
+ {"rdquo", 8221}, | |
+ {"reg", 174}, | |
+ {"rho", 961}, | |
+ {"rsquo", 8217}, | |
+ {"sect", 167}, | |
+ {"shy", 173}, | |
+ {"sigma", 963}, | |
+ {"sp", 8194}, | |
+ {"sup1", 185}, | |
+ {"sup2", 178}, | |
+ {"sup3", 179}, | |
+ {"szlig", 223}, | |
+ {"tau", 964}, | |
+ {"theta", 952}, | |
+ {"thinsp", 8201}, | |
+ {"thorn", 254}, | |
+ {"times", 215}, | |
+ {"trade", 8482}, | |
+ {"uacute", 250}, | |
+ {"ucirc", 251}, | |
+ {"ugrave", 249}, | |
+ {"uml", 168}, | |
+ {"upsilon", 965}, | |
+ {"uuml", 252}, | |
+ {"varepsilon", 8712}, | |
+ {"varphi", 981}, | |
+ {"varpi", 982}, | |
+ {"varrho", 1009}, | |
+ {"vdots", 8942}, | |
+ {"vsigma", 962}, | |
+ {"vtheta", 977}, | |
+ {"xi", 958}, | |
+ {"yacute", 253}, | |
+ {"yen", 165}, | |
+ {"yuml", 255}, | |
+ {"zeta", 950} | |
+}; | |
+ | |
+static Hchar byrune[nelem(byname)]; | |
+ | |
+static int | |
+hnamecmp(const void *va, const void *vb) | |
+{ | |
+ Hchar *a, *b; | |
+ | |
+ a = (Hchar*)va; | |
+ b = (Hchar*)vb; | |
+ return strcmp(a->s, b->s); | |
+} | |
+ | |
+static int | |
+hrunecmp(const void *va, const void *vb) | |
+{ | |
+ Hchar *a, *b; | |
+ | |
+ a = (Hchar*)va; | |
+ b = (Hchar*)vb; | |
+ return a->r - b->r; | |
+} | |
+ | |
+static void | |
+html_init(void) | |
+{ | |
+ static int init; | |
+ | |
+ if(init) | |
+ return; | |
+ init = 1; | |
+ memmove(byrune, byname, sizeof byrune); | |
+ qsort(byname, nelem(byname), sizeof byname[0], hnamecmp); | |
+ qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp); | |
+} | |
+ | |
+static Rune | |
+findbyname(char *s) | |
+{ | |
+ Hchar *h; | |
+ int n, m, x; | |
+ | |
+ h = byname; | |
+ n = nelem(byname); | |
+ while(n > 0){ | |
+ m = n/2; | |
+ x = strcmp(h[m].s, s); | |
+ if(x == 0) | |
+ return h[m].r; | |
+ if(x < 0){ | |
+ h += m+1; | |
+ n -= m+1; | |
+ }else | |
+ n = m; | |
+ } | |
+ return Runeerror; | |
+} | |
+ | |
+static char* | |
+findbyrune(Rune r) | |
+{ | |
+ Hchar *h; | |
+ int n, m; | |
+ | |
+ h = byrune; | |
+ n = nelem(byrune); | |
+ while(n > 0){ | |
+ m = n/2; | |
+ if(h[m].r == r) | |
+ return h[m].s; | |
+ if(h[m].r < r){ | |
+ h += m+1; | |
+ n -= m+1; | |
+ }else | |
+ n = m; | |
+ } | |
+ return nil; | |
+} | |
+ | |
+void | |
+html_in(int fd, long *x, struct convert *out) | |
+{ | |
+ char buf[100], *p; | |
+ Biobuf b; | |
+ Rune rbuf[N]; | |
+ Rune *r, *er; | |
+ int c, i; | |
+ | |
+ USED(x); | |
+ | |
+ html_init(); | |
+ r = rbuf; | |
+ er = rbuf+N; | |
+ Binit(&b, fd, OREAD); | |
+ while((c = Bgetrune(&b)) != Beof){ | |
+ if(r >= er){ | |
+ OUT(out, rbuf, r-rbuf); | |
+ r = rbuf; | |
+ } | |
+ if(c == '&'){ | |
+ buf[0] = c; | |
+ for(i=1; i<nelem(buf)-1;){ | |
+ c = Bgetc(&b); | |
+ if(c == Beof) | |
+ break; | |
+ buf[i++] = c; | |
+ if(strchr("; \t\r\n", c)) | |
+ break; | |
+ } | |
+ buf[i] = 0; | |
+ if(buf[i-1] == ';'){ | |
+ buf[i-1] = 0; | |
+ if((c = findbyname(buf+1)) != Runeerror){ | |
+ *r++ = c; | |
+ continue; | |
+ } | |
+ buf[i-1] = ';'; | |
+ if(buf[1] == '#'){ | |
+ if(buf[2] == 'x') | |
+ c = strtol(buf+3, &p, 16); | |
+ else | |
+ c = strtol(buf+2, &p, 10); | |
+ if(*p != ';' || c >= NRUNE || c < 0) | |
+ goto bad; | |
+ *r++ = c; | |
+ continue; | |
+ } | |
+ } | |
+ bad: | |
+ for(p=buf; p<buf+i; ){ | |
+ p += chartorune(r++, p); | |
+ if(r >= er){ | |
+ OUT(out, rbuf, r-rbuf); | |
+ r = rbuf; | |
+ } | |
+ } | |
+ continue; | |
+ } | |
+ *r++ = c; | |
+ } | |
+ if(r > rbuf) | |
+ OUT(out, rbuf, r-rbuf); | |
+} | |
+ | |
+/* | |
+ * use biobuf because can use more than UTFmax bytes per rune | |
+ */ | |
+void | |
+html_out(Rune *r, int n, long *x) | |
+{ | |
+ char *s; | |
+ Biobuf b; | |
+ Rune *er; | |
+ | |
+ html_init(); | |
+ Binit(&b, 1, OWRITE); | |
+ er = r+n; | |
+ for(; r<er; r++){ | |
+ if(*r < Runeself) | |
+ Bputrune(&b, *r); | |
+ else if((s = findbyrune(*r)) != nil) | |
+ Bprint(&b, "&%s;", s); | |
+ else | |
+ Bprint(&b, "&#%04x;", *r); | |
+ } | |
+ Bflush(&b); | |
+} | |
+ |