timport from plan9 - plan9port - [fork] Plan 9 from user space | |
git clone git://src.adamsgaard.dk/plan9port | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 536f9b83c0bed9986800d806c74ae4d225628fe3 | |
parent 44fc56d8c3cc534bf903133c63a9c9ecb42e5b63 | |
Author: rsc <devnull@localhost> | |
Date: Sun, 21 May 2006 18:57:51 +0000 | |
import from plan9 | |
Diffstat: | |
M src/cmd/tcs/conv.h | 2 ++ | |
M src/cmd/tcs/conv_big5.c | 1 + | |
M src/cmd/tcs/conv_gb.c | 1 + | |
M src/cmd/tcs/conv_jis.c | 1 + | |
M src/cmd/tcs/conv_ksc.c | 1 + | |
M src/cmd/tcs/html.c | 128 +++++++++++++++++++++++++++++… | |
M src/cmd/tcs/mkfile | 6 +++++- | |
M src/cmd/tcs/tcs.c | 212 ++++++++++++++++++++++++-----… | |
M src/cmd/tcs/utf.c | 30 ++++++++++++++++-------------- | |
9 files changed, 320 insertions(+), 62 deletions(-) | |
--- | |
diff --git a/src/cmd/tcs/conv.h b/src/cmd/tcs/conv.h | |
t@@ -13,6 +13,8 @@ void uksc_in(int fd, long *notused, struct convert *out); | |
void uksc_out(Rune *base, int n, long *notused); | |
void html_in(int fd, long *notused, struct convert *out); | |
void html_out(Rune *base, int n, long *notused); | |
+void tune_in(int fd, long *notused, struct convert *out); | |
+void tune_out(Rune *base, int n, long *notused); | |
#define emit(x) *(*r)++ = (x) | |
#define NRUNE 65536 | |
diff --git a/src/cmd/tcs/conv_big5.c b/src/cmd/tcs/conv_big5.c | |
t@@ -110,6 +110,7 @@ big5_in(int fd, long *notused, struct convert *out) | |
big5proc(-1, &r, nin); | |
if(r > ob) | |
OUT(out, ob, r-ob); | |
+ OUT(out, ob, 0); | |
} | |
void | |
diff --git a/src/cmd/tcs/conv_gb.c b/src/cmd/tcs/conv_gb.c | |
t@@ -88,6 +88,7 @@ gb_in(int fd, long *notused, struct convert *out) | |
gbproc(-1, &r, nin); | |
if(r > ob) | |
OUT(out, ob, r-ob); | |
+ OUT(out, ob, 0); | |
} | |
void | |
diff --git a/src/cmd/tcs/conv_jis.c b/src/cmd/tcs/conv_jis.c | |
t@@ -363,6 +363,7 @@ do_in(int fd, void (*procfn)(int, Rune **, long), struct c… | |
(*procfn)(-1, &r, nin); | |
if(r > ob) | |
OUT(out, ob, r-ob); | |
+ OUT(out, ob, 0); | |
} | |
void | |
diff --git a/src/cmd/tcs/conv_ksc.c b/src/cmd/tcs/conv_ksc.c | |
t@@ -109,6 +109,7 @@ uksc_in(int fd, long *notused, struct convert *out) | |
ukscproc(-1, &r, nin); | |
if(r > ob) | |
OUT(out, ob, r-ob); | |
+ OUT(out, ob, 0); | |
} | |
void | |
diff --git a/src/cmd/tcs/html.c b/src/cmd/tcs/html.c | |
t@@ -19,132 +19,251 @@ static Hchar byname[] = | |
{"Aacute", 193}, | |
{"Acirc", 194}, | |
{"Agrave", 192}, | |
+ {"Alpha", 913}, | |
{"Aring", 197}, | |
{"Atilde", 195}, | |
{"Auml", 196}, | |
+ {"Beta", 914}, | |
{"Ccedil", 199}, | |
+ {"Chi", 935}, | |
+ {"Dagger", 8225}, | |
+ {"Delta", 916}, | |
{"ETH", 208}, | |
{"Eacute", 201}, | |
{"Ecirc", 202}, | |
{"Egrave", 200}, | |
+ {"Epsilon", 917}, | |
+ {"Eta", 919}, | |
{"Euml", 203}, | |
+ {"Gamma", 915}, | |
{"Iacute", 205}, | |
{"Icirc", 206}, | |
{"Igrave", 204}, | |
+ {"Iota", 921}, | |
{"Iuml", 207}, | |
+ {"Kappa", 922}, | |
+ {"Lambda", 923}, | |
+ {"Mu", 924}, | |
{"Ntilde", 209}, | |
+ {"Nu", 925}, | |
+ {"OElig", 338}, | |
{"Oacute", 211}, | |
{"Ocirc", 212}, | |
{"Ograve", 210}, | |
+ {"Omega", 937}, | |
+ {"Omicron", 927}, | |
{"Oslash", 216}, | |
{"Otilde", 213}, | |
{"Ouml", 214}, | |
+ {"Phi", 934}, | |
+ {"Pi", 928}, | |
+ {"Prime", 8243}, | |
+ {"Psi", 936}, | |
+ {"Rho", 929}, | |
+ {"Scaron", 352}, | |
+ {"Sigma", 931}, | |
{"THORN", 222}, | |
+ {"Tau", 932}, | |
+ {"Theta", 920}, | |
{"Uacute", 218}, | |
{"Ucirc", 219}, | |
{"Ugrave", 217}, | |
+ {"Upsilon", 933}, | |
{"Uuml", 220}, | |
+ {"Xi", 926}, | |
{"Yacute", 221}, | |
+ {"Yuml", 376}, | |
+ {"Zeta", 918}, | |
{"aacute", 225}, | |
{"acirc", 226}, | |
{"acute", 180}, | |
{"aelig", 230}, | |
{"agrave", 224}, | |
+ {"alefsym", 8501}, | |
{"alpha", 945}, | |
+ {"amp", 38}, | |
+ {"and", 8743}, | |
+ {"ang", 8736}, | |
{"aring", 229}, | |
+ {"asymp", 8776}, | |
{"atilde", 227}, | |
{"auml", 228}, | |
+ {"bdquo", 8222}, | |
{"beta", 946}, | |
{"brvbar", 166}, | |
+ {"bull", 8226}, | |
+ {"cap", 8745}, | |
{"ccedil", 231}, | |
{"cdots", 8943}, | |
{"cedil", 184}, | |
{"cent", 162}, | |
{"chi", 967}, | |
+ {"circ", 710}, | |
+ {"clubs", 9827}, | |
+ {"cong", 8773}, | |
{"copy", 169}, | |
+ {"crarr", 8629}, | |
+ {"cup", 8746}, | |
{"curren", 164}, | |
+ {"dArr", 8659}, | |
+ {"dagger", 8224}, | |
+ {"darr", 8595}, | |
{"ddots", 8945}, | |
{"deg", 176}, | |
{"delta", 948}, | |
+ {"diams", 9830}, | |
{"divide", 247}, | |
{"eacute", 233}, | |
{"ecirc", 234}, | |
{"egrave", 232}, | |
{"emdash", 8212}, /* non-standard but commonly used */ | |
+ {"empty", 8709}, | |
{"emsp", 8195}, | |
{"endash", 8211}, /* non-standard but commonly used */ | |
{"ensp", 8194}, | |
{"epsilon", 949}, | |
+ {"equiv", 8801}, | |
{"eta", 951}, | |
{"eth", 240}, | |
{"euml", 235}, | |
+ {"euro", 8364}, | |
+ {"exist", 8707}, | |
+ {"fnof", 402}, | |
+ {"forall", 8704}, | |
{"frac12", 189}, | |
{"frac14", 188}, | |
{"frac34", 190}, | |
+ {"frasl", 8260}, | |
{"gamma", 947}, | |
+ {"ge", 8805}, | |
+ {"gt", 62}, | |
+ {"hArr", 8660}, | |
+ {"harr", 8596}, | |
+ {"hearts", 9829}, | |
+ {"hellip", 8230}, | |
{"iacute", 237}, | |
{"icirc", 238}, | |
{"iexcl", 161}, | |
{"igrave", 236}, | |
+ {"image", 8465}, | |
+ {"infin", 8734}, | |
+ {"int", 8747}, | |
{"iota", 953}, | |
{"iquest", 191}, | |
+ {"isin", 8712}, | |
{"iuml", 239}, | |
{"kappa", 954}, | |
+ {"lArr", 8656}, | |
{"lambda", 955}, | |
+ {"lang", 9001}, | |
{"laquo", 171}, | |
- {"ldquo", 8220}, | |
+ {"larr", 8592}, | |
+ {"lceil", 8968}, | |
{"ldots", 8230}, | |
+ {"ldquo", 8220}, | |
+ {"le", 8804}, | |
+ {"lfloor", 8970}, | |
+ {"lowast", 8727}, | |
+ {"loz", 9674}, | |
+ {"lrm", 8206}, | |
+ {"lsaquo", 8249}, | |
{"lsquo", 8216}, | |
+ {"lt", 60}, | |
{"macr", 175}, | |
{"mdash", 8212}, | |
{"micro", 181}, | |
{"middot", 183}, | |
+ {"minus", 8722}, | |
{"mu", 956}, | |
+ {"nabla", 8711}, | |
{"nbsp", 160}, | |
{"ndash", 8211}, | |
+ {"ne", 8800}, | |
+ {"ni", 8715}, | |
{"not", 172}, | |
+ {"notin", 8713}, | |
+ {"nsub", 8836}, | |
{"ntilde", 241}, | |
{"nu", 957}, | |
{"oacute", 243}, | |
{"ocirc", 244}, | |
+ {"oelig", 339}, | |
{"ograve", 242}, | |
+ {"oline", 8254}, | |
{"omega", 969}, | |
{"omicron", 959}, | |
+ {"oplus", 8853}, | |
+ {"or", 8744}, | |
{"ordf", 170}, | |
{"ordm", 186}, | |
{"oslash", 248}, | |
{"otilde", 245}, | |
+ {"otimes", 8855}, | |
{"ouml", 246}, | |
{"para", 182}, | |
+ {"part", 8706}, | |
+ {"permil", 8240}, | |
+ {"perp", 8869}, | |
{"phi", 966}, | |
{"pi", 960}, | |
+ {"piv", 982}, | |
{"plusmn", 177}, | |
{"pound", 163}, | |
+ {"prime", 8242}, | |
+ {"prod", 8719}, | |
+ {"prop", 8733}, | |
{"psi", 968}, | |
{"quad", 8193}, | |
+ {"quot", 34}, | |
+ {"rArr", 8658}, | |
+ {"radic", 8730}, | |
+ {"rang", 9002}, | |
{"raquo", 187}, | |
+ {"rarr", 8594}, | |
+ {"rceil", 8969}, | |
{"rdquo", 8221}, | |
+ {"real", 8476}, | |
{"reg", 174}, | |
+ {"rfloor", 8971}, | |
{"rho", 961}, | |
+ {"rlm", 8207}, | |
+ {"rsaquo", 8250}, | |
{"rsquo", 8217}, | |
+ {"sbquo", 8218}, | |
+ {"scaron", 353}, | |
+ {"sdot", 8901}, | |
{"sect", 167}, | |
{"shy", 173}, | |
{"sigma", 963}, | |
+ {"sigmaf", 962}, | |
+ {"sim", 8764}, | |
{"sp", 8194}, | |
+ {"spades", 9824}, | |
+ {"sub", 8834}, | |
+ {"sube", 8838}, | |
+ {"sum", 8721}, | |
+ {"sup", 8835}, | |
{"sup1", 185}, | |
{"sup2", 178}, | |
{"sup3", 179}, | |
+ {"supe", 8839}, | |
{"szlig", 223}, | |
{"tau", 964}, | |
+ {"there4", 8756}, | |
{"theta", 952}, | |
+ {"thetasym", 977}, | |
{"thinsp", 8201}, | |
{"thorn", 254}, | |
+ {"tilde", 732}, | |
{"times", 215}, | |
{"trade", 8482}, | |
+ {"uArr", 8657}, | |
{"uacute", 250}, | |
+ {"uarr", 8593}, | |
{"ucirc", 251}, | |
{"ugrave", 249}, | |
{"uml", 168}, | |
+ {"upsih", 978}, | |
{"upsilon", 965}, | |
{"uuml", 252}, | |
{"varepsilon", 8712}, | |
t@@ -154,11 +273,14 @@ static Hchar byname[] = | |
{"vdots", 8942}, | |
{"vsigma", 962}, | |
{"vtheta", 977}, | |
+ {"weierp", 8472}, | |
{"xi", 958}, | |
{"yacute", 253}, | |
{"yen", 165}, | |
{"yuml", 255}, | |
- {"zeta", 950} | |
+ {"zeta", 950}, | |
+ {"zwj", 8205}, | |
+ {"zwnj", 8204} | |
}; | |
static Hchar byrune[nelem(byname)]; | |
t@@ -302,6 +424,7 @@ html_in(int fd, long *x, struct convert *out) | |
} | |
if(r > rbuf) | |
OUT(out, rbuf, r-rbuf); | |
+ OUT(out, rbuf, 0); | |
} | |
/* | |
t@@ -314,6 +437,7 @@ html_out(Rune *r, int n, long *x) | |
Biobuf b; | |
Rune *er; | |
+ USED(x); | |
html_init(); | |
Binit(&b, 1, OWRITE); | |
er = r+n; | |
diff --git a/src/cmd/tcs/mkfile b/src/cmd/tcs/mkfile | |
t@@ -11,7 +11,8 @@ OFILES=tcs.$O\ | |
kuten208.$O\ | |
gb.$O\ | |
ksc.$O\ | |
- big5.$O | |
+ big5.$O\ | |
+ tune.$O\ | |
<$PLAN9/src/mkone | |
CFLAGS= -DPLAN9 $CFLAGS | |
t@@ -23,6 +24,9 @@ tcs.$O big5.$O: big5.h | |
tcs.$O gb.$O: gb.h | |
tcs.$O: cyrillic.h | |
tcs.$O: conv.h | |
+tcs.$O: 8859.h | |
+tcs.$O: ms.h | |
+tcs.$O: misc.h | |
conv%.$O: conv.h | |
conv_ksc.$O: ksc.h | |
diff --git a/src/cmd/tcs/tcs.c b/src/cmd/tcs/tcs.c | |
t@@ -54,7 +54,7 @@ main(int argc, char **argv) | |
clean = 1; | |
break; | |
case 'f': | |
- from = ARGF(); | |
+ from = EARGF(usage()); | |
break; | |
case 'l': | |
listem = 1; | |
t@@ -63,7 +63,7 @@ main(int argc, char **argv) | |
squawk = 0; | |
break; | |
case 't': | |
- to = ARGF(); | |
+ to = EARGF(usage()); | |
break; | |
case 'v': | |
verbose = 1; | |
t@@ -160,7 +160,7 @@ conv(char *name, int from) | |
struct convert *c; | |
for(c = convert; c->name; c++){ | |
- if(strcmp(c->name, name) != 0) | |
+ if(cistrcmp(c->name, name) != 0) | |
continue; | |
if(c->flags&Table) | |
return(c); | |
t@@ -208,23 +208,79 @@ unicode_in(int fd, long *notused, struct convert *out) | |
} | |
while((n = read(fd, (char *)buf, 2*N)) > 0){ | |
ninput += n; | |
+ if(swabme) | |
+ swab2((char *)buf, n); | |
if(n&1){ | |
if(squawk) | |
EPR "%s: odd byte count in %s\n", argv0, file); | |
nerrors++; | |
if(clean) | |
n--; | |
- else { | |
- n++; | |
- buf[n/2] = Runeerror; | |
- if(swabme) /* swab so later swab undoes… | |
- swab2((char *)&buf[n/2], 2); | |
- } | |
+ else | |
+ buf[n++/2] = Runeerror; | |
+ } | |
+ OUT(out, buf, n/2); | |
+ } | |
+} | |
+ | |
+void | |
+unicode_in_be(int fd, long *notused, struct convert *out) | |
+{ | |
+ int i, n; | |
+ Rune buf[N], r; | |
+ uchar *p; | |
+ | |
+ USED(notused); | |
+ while((n = read(fd, (char *)buf, 2*N)) > 0){ | |
+ ninput += n; | |
+ p = (uchar*)buf; | |
+ for(i=0; i<n/2; i++){ | |
+ r = *p++<<8; | |
+ r |= *p++; | |
+ buf[i] = r; | |
+ } | |
+ if(n&1){ | |
+ if(squawk) | |
+ EPR "%s: odd byte count in %s\n", argv0, file); | |
+ nerrors++; | |
+ if(clean) | |
+ n--; | |
+ else | |
+ buf[n++/2] = Runeerror; | |
} | |
- if(swabme) | |
- swab2((char *)buf, n); | |
OUT(out, buf, n/2); | |
} | |
+ OUT(out, buf, 0); | |
+} | |
+ | |
+void | |
+unicode_in_le(int fd, long *notused, struct convert *out) | |
+{ | |
+ int i, n; | |
+ Rune buf[N], r; | |
+ uchar *p; | |
+ | |
+ USED(notused); | |
+ while((n = read(fd, (char *)buf, 2*N)) > 0){ | |
+ ninput += n; | |
+ p = (uchar*)buf; | |
+ for(i=0; i<n/2; i++){ | |
+ r = *p++; | |
+ r |= *p++<<8; | |
+ buf[i] = r; | |
+ } | |
+ if(n&1){ | |
+ if(squawk) | |
+ EPR "%s: odd byte count in %s\n", argv0, file); | |
+ nerrors++; | |
+ if(clean) | |
+ n--; | |
+ else | |
+ buf[n++/2] = Runeerror; | |
+ } | |
+ OUT(out, buf, n/2); | |
+ } | |
+ OUT(out, buf, 0); | |
} | |
void | |
t@@ -245,6 +301,44 @@ unicode_out(Rune *base, int n, long *notused) | |
} | |
void | |
+unicode_out_be(Rune *base, int n, long *notused) | |
+{ | |
+ int i; | |
+ uchar *p; | |
+ Rune r; | |
+ | |
+ USED(notused); | |
+ p = (uchar*)base; | |
+ for(i=0; i<n; i++){ | |
+ r = base[i]; | |
+ *p++ = r>>8; | |
+ *p++ = r; | |
+ } | |
+ nrunes += n; | |
+ noutput += 2*n; | |
+ write(1, (char *)base, 2*n); | |
+} | |
+ | |
+void | |
+unicode_out_le(Rune *base, int n, long *notused) | |
+{ | |
+ int i; | |
+ uchar *p; | |
+ Rune r; | |
+ | |
+ USED(notused); | |
+ p = (uchar*)base; | |
+ for(i=0; i<n; i++){ | |
+ r = base[i]; | |
+ *p++ = r; | |
+ *p++ = r>>8; | |
+ } | |
+ nrunes += n; | |
+ noutput += 2*n; | |
+ write(1, (char *)base, 2*n); | |
+} | |
+ | |
+void | |
intable(int fd, long *table, struct convert *out) | |
{ | |
uchar buf[N]; | |
t@@ -270,6 +364,7 @@ intable(int fd, long *table, struct convert *out) | |
} | |
OUT(out, runes, r-runes); | |
} | |
+ OUT(out, runes, 0); | |
if(n < 0){ | |
#ifdef PLAN9 | |
EPR "%s: input read: %r\n", argv0); | |
t@@ -403,64 +498,91 @@ struct convert convert[] = | |
{ "av", "Alternativnyj Variant", Table, (void *)tabav }, | |
{ "big5", "Big 5 (HKU)", From|Func, 0, (Fnptr)big5_in }, | |
{ "big5", "Big 5 (HKU)", Func, 0, (Fnptr)big5_out }, | |
- { "cp437", "Code Page 437 (US)", Table, (void*)tabcp437 }, | |
- { "cp720", "Code Page 720 (Arabic)", Table, (void*)tabcp720 }, | |
- { "cp737", "Code Page 737 (Greek)", Table, (void*)tabcp737 }, | |
- { "cp775", "Code Page 775 (Baltic)", Table, (void*)tabcp775 }, | |
- { "cp850", "Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp… | |
- { "cp852", "Code Page 852 (Latin II)", Table, (void*)tabcp852 }, | |
- { "cp855", "Code Page 855 (Cyrillic)", Table, (void*)tabcp855 }, | |
- { "cp857", "Code Page 857 (Turkish)", Table, (void*)tabcp857 }, | |
- { "cp858", "Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)… | |
- { "cp862", "Code Page 862 (Hebrew)", Table, (void*)tabcp862 }, | |
- { "cp866", "Code Page 866 (Russian)", Table, (void*)tabcp866 }, | |
- { "cp874", "Code Page 874 (Thai)", Table, (void*)tabcp874 }, | |
- { "cp1250", "Code Page 1250 (Central Europe)", Table, (void *)tabcp125… | |
- { "cp1251", "Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 }, | |
- { "cp1252", "Code Page 1252 (Latin I)", Table, (void *)tabcp1252 }, | |
- { "cp1253", "Code Page 1253 (Greek)", Table, (void *)tabcp1253 }, | |
- { "cp1254", "Code Page 1254 (Turkish)", Table, (void *)tabcp1254 }, | |
- { "cp1255", "Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 }, | |
- { "cp1256", "Code Page 1256 (Arabic)", Table, (void *)tabcp1256 }, | |
- { "cp1257", "Code Page 1257 (Baltic)", Table, (void *)tabcp1257 }, | |
- { "cp1258", "Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 }, | |
{ "ebcdic", "EBCDIC", Table, (void *)tabebcdic }, /* 6f is reco… | |
{ "euc-k", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uk… | |
{ "euc-k", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_ou… | |
- { "gb", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in }, | |
- { "gb", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out }, | |
+ { "gb2312", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in }, | |
+ { "gb2312", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out }, | |
{ "html", "HTML", From|Func, 0, (Fnptr)html_in }, | |
{ "html", "HTML", Func, 0, (Fnptr)html_out }, | |
+ { "ibm437", "IBM Code Page 437 (US)", Table, (void*)tabcp437 }, | |
+ { "ibm720", "IBM Code Page 720 (Arabic)", Table, (void*)tabcp720 }, | |
+ { "ibm737", "IBM Code Page 737 (Greek)", Table, (void*)tabcp737 }, | |
+ { "ibm775", "IBM Code Page 775 (Baltic)", Table, (void*)tabcp775 }, | |
+ { "ibm850", "IBM Code Page 850 (Multilingual Latin I)", Table, (void*)… | |
+ { "ibm852", "IBM Code Page 852 (Latin II)", Table, (void*)tabcp852 }, | |
+ { "ibm855", "IBM Code Page 855 (Cyrillic)", Table, (void*)tabcp855 }, | |
+ { "ibm857", "IBM Code Page 857 (Turkish)", Table, (void*)tabcp857 }, | |
+ { "ibm858", "IBM Code Page 858 (Multilingual Latin I+Euro)", Table, (v… | |
+ { "ibm862", "IBM Code Page 862 (Hebrew)", Table, (void*)tabcp862 }, | |
+ { "ibm866", "IBM Code Page 866 (Russian)", Table, (void*)tabcp866 }, | |
+ { "ibm874", "IBM Code Page 874 (Thai)", Table, (void*)tabcp874 }, | |
+ { "iso-2022-jp", "alias for jis-kanji (MIME)", From|Func, 0, (Fnptr)ji… | |
+ { "iso-2022-jp", "alias for jis-kanji (MIME)", Func, 0, (Fnptr)jisjis_… | |
+ { "iso-8859-1", "alias for 8859-1 (MIME)", Table, (void *)tab8859_1 }, | |
+ { "iso-8859-2", "alias for 8859-2 (MIME)", Table, (void *)tab8859_2 }, | |
+ { "iso-8859-3", "alias for 8859-3 (MIME)", Table, (void *)tab8859_3 }, | |
+ { "iso-8859-4", "alias for 8859-4 (MIME)", Table, (void *)tab8859_4 }, | |
+ { "iso-8859-5", "alias for 8859-5 (MIME)", Table, (void *)tab8859_5 }, | |
+ { "iso-8859-6", "alias for 8859-6 (MIME)", Table, (void *)tab8859_6 }, | |
+ { "iso-8859-7", "alias for 8859-7 (MIME)", Table, (void *)tab8859_7 }, | |
+ { "iso-8859-8", "alias for 8859-8 (MIME)", Table, (void *)tab8859_8 }, | |
+ { "iso-8859-9", "alias for 8859-9 (MIME)", Table, (void *)tab8859_9 }, | |
+ { "iso-8859-10", "alias for 8859-10 (MIME)", Table, (void *)tab8859_10… | |
+ { "iso-8859-15", "alias for 8859-15 (MIME)", Table, (void *)tab8859_15… | |
{ "jis", "guesses at the JIS encoding", From|Func, 0, (Fnptr)jis_in }, | |
{ "jis-kanji", "ISO 2022-JP (Japanese)", From|Func, 0, (Fnptr)jisjis_i… | |
{ "jis-kanji", "ISO 2022-JP (Japanese)", Func, 0, (Fnptr)jisjis_out }, | |
{ "koi8", "KOI-8 (GOST 19769-74)", Table, (void *)tabkoi8 }, | |
- { "latin1", "ISO 8859-1", Table, (void *)tab8859_1 }, | |
+ { "koi8-r", "alias for koi8 (MIME)", Table, (void *)tabkoi8 }, | |
+ { "latin1", "alias for 8859-1", Table, (void *)tab8859_1 }, | |
{ "macrom", "Macintosh Standard Roman character set", Table, (void *)t… | |
- { "microsoft", "Windows (CP 1252)", Table, (void *)tabcp1252 }, | |
- { "msdos", "IBM PC (CP 437)", Table, (void *)tabcp437 }, | |
- { "msdos2", "IBM PC (CP 437 with graphics in C0)", Table, (void *)tabm… | |
+ { "microsoft", "alias for windows1252", Table, (void *)tabcp1252 }, | |
{ "ms-kanji", "Microsoft, or Shift-JIS", From|Func, 0, (Fnptr)msjis_in… | |
{ "ms-kanji", "Microsoft, or Shift-JIS", Func, 0, (Fnptr)msjis_out }, | |
+ { "msdos", "IBM PC (alias for ibm437)", Table, (void *)tabcp437 }, | |
+ { "msdos2", "IBM PC (ibm437 with graphics in C0)", Table, (void *)tabm… | |
{ "next", "NEXTSTEP character set", Table, (void *)tabnextstep }, | |
{ "ov", "Osnovnoj Variant", Table, (void *)tabov }, | |
- { "ps2", "IBM PS/2: (CP 850)", Table, (void *)tabcp850 }, | |
+ { "ps2", "IBM PS/2: (alias for ibm850)", Table, (void *)tabcp850 }, | |
{ "sf1", "ISO-646: Finnish/Swedish SF-1 variant", Table, (void *)tabsf… | |
{ "sf2", "ISO-646: Finnish/Swedish SF-2 variant (recommended)", Table,… | |
- { "tis", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 }, | |
+ { "tis-620", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 }, | |
+ { "tune", "TUNE (Tamil)", From|Func, 0, (Fnptr)tune_in }, | |
+ { "tune", "TUNE (Tamil)", Func, 0, (Fnptr)tune_out }, | |
{ "ucode", "Russian U-code", Table, (void *)tabucode }, | |
{ "ujis", "EUC-JX: JIS 0208", From|Func, 0, (Fnptr)ujis_in }, | |
{ "ujis", "EUC-JX: JIS 0208", Func, 0, (Fnptr)ujis_out }, | |
{ "unicode", "Unicode 1.1", From|Func, 0, (Fnptr)unicode_in }, | |
{ "unicode", "Unicode 1.1", Func, 0, (Fnptr)unicode_out }, | |
- { "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in … | |
- { "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out }, | |
+ { "unicode-be", "Unicode 1.1 big-endian", From|Func, 0, (Fnptr)unicode… | |
+ { "unicode-be", "Unicode 1.1 big-endian", Func, 0, (Fnptr)unicode_out_… | |
+ { "unicode-le", "Unicode 1.1 little-endian", From|Func, 0, (Fnptr)unic… | |
+ { "unicode-le", "Unicode 1.1 little-endian", Func, 0, (Fnptr)unicode_o… | |
+ { "us-ascii", "alias for ascii (MIME)", Table, (void *)tabascii }, | |
{ "utf", "FSS-UTF a.k.a. UTF-8", From|Func, 0, (Fnptr)utf_in }, | |
{ "utf", "FSS-UTF a.k.a. UTF-8", Func, 0, (Fnptr)utf_out }, | |
- { "utf-l2", "from", From|Func, 0, (Fnptr)utf_in }, | |
- { "utf-l2", "to", Func, 0, (Fnptr)utf_out }, | |
+ { "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in … | |
+ { "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out }, | |
+ { "utf-8", "alias for utf (MIME)", From|Func, 0, (Fnptr)utf_in }, | |
+ { "utf-8", "alias for utf (MIME)", Func, 0, (Fnptr)utf_out }, | |
+ { "utf-16", "alias for unicode (MIME)", From|Func, 0, (Fnptr)unicode_i… | |
+ { "utf-16", "alias for unicode (MIME)", Func, 0, (Fnptr)unicode_out }, | |
+ { "utf-16be", "alias for unicode-be (MIME)", From|Func, 0, (Fnptr)unic… | |
+ { "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_o… | |
+ { "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unic… | |
+ { "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_o… | |
{ "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 }, | |
{ "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 }, | |
- { "viscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii }, | |
+ { "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii }, | |
+ { "windows-1250", "Windows Code Page 1250 (Central Europe)", Table, (v… | |
+ { "windows-1251", "Windows Code Page 1251 (Cyrillic)", Table, (void *)… | |
+ { "windows-1252", "Windows Code Page 1252 (Latin I)", Table, (void *)t… | |
+ { "windows-1253", "Windows Code Page 1253 (Greek)", Table, (void *)tab… | |
+ { "windows-1254", "Windows Code Page 1254 (Turkish)", Table, (void *)t… | |
+ { "windows-1255", "Windows Code Page 1255 (Hebrew)", Table, (void *)ta… | |
+ { "windows-1256", "Windows Code Page 1256 (Arabic)", Table, (void *)ta… | |
+ { "windows-1257", "Windows Code Page 1257 (Baltic)", Table, (void *)ta… | |
+ { "windows-1258", "Windows Code Page 1258 (Vietnam)", Table, (void *)t… | |
{ 0 } | |
}; | |
diff --git a/src/cmd/tcs/utf.c b/src/cmd/tcs/utf.c | |
t@@ -45,15 +45,15 @@ utf_in(int fd, long *notused, struct convert *out) | |
tot = 0; | |
while((n = read(fd, buf+tot, N-tot)) >= 0){ | |
tot += n; | |
- for(i=j=0; i<tot; ){ | |
+ for(i=j=0; i<tot-UTFmax || (n==0 && i<tot); ){ | |
c = our_mbtowc(&l, buf+i, tot-i); | |
- if(c == -2) | |
- break; | |
if(c == -1){ | |
if(squawk) | |
EPR "%s: bad UTF sequence near byte %l… | |
- if(clean) | |
+ if(clean){ | |
+ i++; | |
continue; | |
+ } | |
nerrors++; | |
l = Runeerror; | |
c = 1; | |
t@@ -69,6 +69,7 @@ utf_in(int fd, long *notused, struct convert *out) | |
if(n == 0) | |
break; | |
} | |
+ OUT(out, runes, 0); | |
} | |
void | |
t@@ -100,11 +101,13 @@ isoutf_in(int fd, long *notused, struct convert *out) | |
if(!fullisorune(buf+i, tot-i)) | |
break; | |
c = isochartorune(&runes[j], buf+i); | |
- if(runes[j] == Runeerror){ | |
+ if(runes[j] == Runeerror && c == 1){ | |
if(squawk) | |
EPR "%s: bad UTF sequence near byte %l… | |
- if(clean) | |
+ if(clean){ | |
+ i++; | |
continue; | |
+ } | |
nerrors++; | |
} | |
j++; | |
t@@ -118,6 +121,7 @@ isoutf_in(int fd, long *notused, struct convert *out) | |
if(n == 0) | |
break; | |
} | |
+ OUT(out, runes, 0); | |
} | |
void | |
t@@ -393,19 +397,19 @@ our_mbtowc(unsigned long *p, char *s, unsigned n) | |
return 0; /* no shift states */ | |
if(n < 1) | |
- goto badlen; | |
+ goto bad; | |
us = (uchar*)s; | |
c0 = us[0]; | |
if(c0 >= T3) { | |
if(n < 3) | |
- goto badlen; | |
+ goto bad; | |
c1 = us[1] ^ Tx; | |
c2 = us[2] ^ Tx; | |
if((c1|c2) & T2) | |
goto bad; | |
if(c0 >= T5) { | |
if(n < 5) | |
- goto badlen; | |
+ goto bad; | |
c3 = us[3] ^ Tx; | |
c4 = us[4] ^ Tx; | |
if((c3|c4) & T2) | |
t@@ -413,7 +417,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n) | |
if(c0 >= T6) { | |
/* 6 bytes */ | |
if(n < 6) | |
- goto badlen; | |
+ goto bad; | |
c5 = us[5] ^ Tx; | |
if(c5 & T2) | |
goto bad; | |
t@@ -437,7 +441,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n) | |
if(c0 >= T4) { | |
/* 4 bytes */ | |
if(n < 4) | |
- goto badlen; | |
+ goto bad; | |
c3 = us[3] ^ Tx; | |
if(c3 & T2) | |
goto bad; | |
t@@ -460,7 +464,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n) | |
if(c0 >= T2) { | |
/* 2 bytes */ | |
if(n < 2) | |
- goto badlen; | |
+ goto bad; | |
c1 = us[1] ^ Tx; | |
if(c1 & T2) | |
goto bad; | |
t@@ -480,6 +484,4 @@ our_mbtowc(unsigned long *p, char *s, unsigned n) | |
bad: | |
errno = EILSEQ; | |
return -1; | |
-badlen: | |
- return -2; | |
} |