GopherProxy

	tMore & names and numbers. - plan9port - [fork] Plan 9 from user space
	git clone git://src.adamsgaard.dk/plan9port
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 431e32de9b41c230b0791fb9f2f293859d189e59
	parent 01a1c31a7d99fd24ba134ddc33fb3df95a668f3a
	Author: rsc <devnull@localhost>
	Date: Fri, 30 Sep 2005 17:45:40 +0000

	More & names and numbers.

	Diffstat:
	M src/libhtml/lex.c \| 214 +++++++++++++++++------------…

	1 file changed, 118 insertions(+), 96 deletions(-)
	---
	diff --git a/src/libhtml/lex.c b/src/libhtml/lex.c
	t@@ -333,7 +333,9 @@ AsciiInt _chartab[] = {
	{"kappa", 954},
	{"lambda", 955},
	{"laquo", 171},
	+ {"ldquo", 8220},
	{"ldots", 8230},
	+ {"lsquo", 8216},
	{"lt", 60},
	{"macr", 175},
	{"mdash", 8212},
	t@@ -364,8 +366,10 @@ AsciiInt _chartab[] = {
	{"quad", 8193},
	{"quot", 34},
	{"raquo", 187},
	+ {"rdquo", 8221},
	{"reg", 174},
	{"rho", 961},
	+ {"rsquo", 8217},
	{"sect", 167},
	{"shy", 173},
	{"sigma", 963},
	t@@ -492,9 +496,9 @@ _gettoks(uchar* data, int datalen, int chset, int mtype, i…
	ai = 0;
	if(dbglex)
	fprint(2, "_gettoks starts, ts.i=%d, ts.edata=%d\n", ts->i, ts…
	- if(ts->mtype == TextHtml) {
	- for(;;) {
	- if(ai == alen) {
	+ if(ts->mtype == TextHtml){
	+ for(;;){
	+ if(ai == alen){
	a = (Token)erealloc(a, (alen+ToksChunk)sizeo…
	alen += ToksChunk;
	}
	t@@ -502,9 +506,9 @@ _gettoks(uchar* data, int datalen, int chset, int mtype, i…
	c = getchar(ts);
	if(c < 0)
	break;
	- if(c == '<') {
	+ if(c == '<'){
	tag = gettag(ts, starti, a, &ai);
	- if(tag == Tscript) {
	+ if(tag == Tscript){
	// special rules for getting Data afte…
	starti = ts->i;
	c = getchar(ts);
	t@@ -521,8 +525,8 @@ _gettoks(uchar* data, int datalen, int chset, int mtype, i…
	}
	else {
	// plain text (non-html) tokens
	- for(;;) {
	- if(ai == alen) {
	+ for(;;){
	+ if(ai == alen){
	a = (Token)erealloc(a, (alen+ToksChunk)sizeo…
	alen += ToksChunk;
	}
	t@@ -560,14 +564,14 @@ getplaindata(TokenSource* ts, Token* a, int* pai)
	s = nil;
	j = 0;
	starti = ts->i;
	- for(c = getchar(ts); c >= 0; c = getchar(ts)) {
	- if(c < ' ') {
	- if(isspace(c)) {
	- if(c == '\r') {
	+ for(c = getchar(ts); c >= 0; c = getchar(ts)){
	+ if(c < ' '){
	+ if(isspace(c)){
	+ if(c == '\r'){
	// ignore it unless no following '\n',
	// in which case treat it like '\n'
	c = getchar(ts);
	- if(c != '\n') {
	+ if(c != '\n'){
	if(c >= 0)
	ungetchar(ts, c);
	c = '\n';
	t@@ -577,9 +581,9 @@ getplaindata(TokenSource* ts, Token* a, int* pai)
	else
	c = 0;
	}
	- if(c != 0) {
	+ if(c != 0){
	buf[j++] = c;
	- if(j == sizeof(buf)-1) {
	+ if(j == sizeof(buf)-1){
	s = buftostr(s, buf, j);
	j = 0;
	}
	t@@ -627,19 +631,19 @@ getdata(TokenSource* ts, int firstc, int starti, Token* …
	s = nil;
	j = 0;
	c = firstc;
	- while(c >= 0) {
	- if(c == '&') {
	+ while(c >= 0){
	+ if(c == '&'){
	c = ampersand(ts);
	if(c < 0)
	break;
	}
	- else if(c < ' ') {
	- if(isspace(c)) {
	- if(c == '\r') {
	+ else if(c < ' '){
	+ if(isspace(c)){
	+ if(c == '\r'){
	// ignore it unless no following '\n',
	// in which case treat it like '\n'
	c = getchar(ts);
	- if(c != '\n') {
	+ if(c != '\n'){
	if(c >= 0)
	ungetchar(ts, c);
	c = '\n';
	t@@ -652,13 +656,13 @@ getdata(TokenSource* ts, int firstc, int starti, Token* …
	c = 0;
	}
	}
	- else if(c == '<') {
	+ else if(c == '<'){
	ungetchar(ts, c);
	break;
	}
	- if(c != 0) {
	+ if(c != 0){
	buf[j++] = c;
	- if(j == BIGBUFSIZE-1) {
	+ if(j == BIGBUFSIZE-1){
	s = buftostr(s, buf, j);
	j = 0;
	}
	t@@ -696,12 +700,12 @@ getscriptdata(TokenSource* ts, int firstc, int starti, T…
	tstarti = starti;
	c = firstc;
	done = 0;
	- while(c >= 0) {
	- if(c == '<') {
	+ while(c >= 0){
	+ if(c == '<'){
	// other browsers ignore stuff to end of line after <!
	savei = ts->i;
	c = getchar(ts);
	- if(c == '!') {
	+ if(c == '!'){
	while(c >= 0 && c != '\n' && c != '\r')
	c = getchar(ts);
	if(c == '\r')
	t@@ -709,7 +713,7 @@ getscriptdata(TokenSource* ts, int firstc, int starti, Tok…
	if(c == '\n')
	c = getchar(ts);
	}
	- else if(c >= 0) {
	+ else if(c >= 0){
	backup(ts, savei);
	tag = gettag(ts, tstarti, a, pai);
	if(tag == -1)
	t@@ -717,7 +721,7 @@ getscriptdata(TokenSource* ts, int firstc, int starti, Tok…
	if(tag != Comment)
	(*pai)--;
	backup(ts, tstarti);
	- if(tag == Tscript + RBRA) {
	+ if(tag == Tscript + RBRA){
	done = 1;
	break;
	}
	t@@ -727,9 +731,9 @@ getscriptdata(TokenSource* ts, int firstc, int starti, Tok…
	}
	if(c < 0)
	break;
	- if(c != 0) {
	+ if(c != 0){
	buf[j++] = c;
	- if(j == BIGBUFSIZE-1) {
	+ if(j == BIGBUFSIZE-1){
	s = buftostr(s, buf, j);
	j = 0;
	}
	t@@ -737,7 +741,7 @@ getscriptdata(TokenSource* ts, int firstc, int starti, Tok…
	tstarti = ts->i;
	c = getchar(ts);
	}
	- if(done \|\| ts->i == ts->edata) {
	+ if(done \|\| ts->i == ts->edata){
	s = buftostr(s, buf, j);
	tok = &a[(*pai)++];
	tok->tag = Data;
	t@@ -784,15 +788,15 @@ gettag(TokenSource* ts, int starti, Token* a, int* pai)
	tok->attr = nil;
	tok->starti = starti;
	c = getchar(ts);
	- if(c == '/') {
	+ if(c == '/'){
	rbra = RBRA;
	c = getchar(ts);
	}
	if(c < 0)
	goto eob_done;
	- if(c >= 256 \|\| !isalpha(c)) {
	+ if(c >= 256 \|\| !isalpha(c)){
	// not a tag
	- if(c == '!') {
	+ if(c == '!'){
	ans = comment(ts);
	if(ans != -1)
	return ans;
	t@@ -809,7 +813,7 @@ gettag(TokenSource* ts, int starti, Token* a, int* pai)
	// c starts a tagname
	buf[0] = c;
	i = 1;
	- while(1) {
	+ for(;;){
	c = getchar(ts);
	if(c < 0)
	goto eob_done;
	t@@ -826,34 +830,34 @@ gettag(TokenSource* ts, int starti, Token* a, int* pai)

	// attribute gathering loop
	al = nil;
	- while(1) {
	+ for(;;){
	// look for "ws name" or "ws name ws = ws val" (ws=whitespace)
	// skip whitespace
	attrloop_continue:
	- while(c < 256 && isspace(c)) {
	+ while(c < 256 && isspace(c)){
	c = getchar(ts);
	if(c < 0)
	goto eob_done;
	}
	if(c == '>')
	goto attrloop_done;
	- if(c == '<') {
	+ if(c == '<'){
	if(warn)
	fprint(2, "warning: unclosed tag\n");
	ungetchar(ts, c);
	goto attrloop_done;
	}
	- if(c >= 256 \|\| !isalpha(c)) {
	+ if(c >= 256 \|\| !isalpha(c)){
	if(warn)
	fprint(2, "warning: expected attribute name\n"…
	// skipt to next attribute name
	- while(1) {
	+ for(;;){
	c = getchar(ts);
	if(c < 0)
	goto eob_done;
	if(c < 256 && isalpha(c))
	goto attrloop_continue;
	- if(c == '<') {
	+ if(c == '<'){
	if(warn)
	fprint(2, "warning: unclosed t…
	ungetchar(ts, 60);
	t@@ -866,7 +870,7 @@ attrloop_continue:
	// gather attribute name
	buf[0] = c;
	i = 1;
	- while(1) {
	+ for(;;){
	c = getchar(ts);
	if(c < 0)
	goto eob_done;
	t@@ -876,23 +880,23 @@ attrloop_continue:
	buf[i++] = c;
	}
	afnd = _lookup(attrtable, Numattrs, buf, i, &attid);
	- if(warn && !afnd) {
	+ if(warn && !afnd){
	buf[i] = 0;
	fprint(2, "warning: unknown attribute name %S\n", buf);
	}
	// skip whitespace
	- while(c < 256 && isspace(c)) {
	+ while(c < 256 && isspace(c)){
	c = getchar(ts);
	if(c < 0)
	goto eob_done;
	}
	- if(c != '=') {
	+ if(c != '='){
	if(afnd)
	al = newattr(attid, nil, al);
	goto attrloop_continue;
	}
	//# c is '=' here; skip whitespace
	- while(1) {
	+ for(;;){
	c = getchar(ts);
	if(c < 0)
	goto eob_done;
	t@@ -900,7 +904,7 @@ attrloop_continue:
	break;
	}
	quote = 0;
	- if(c == '\'' \|\| c == '"') {
	+ if(c == '\'' \|\| c == '"'){
	quote = c;
	c = getchar(ts);
	if(c < 0)
	t@@ -908,31 +912,31 @@ attrloop_continue:
	}
	val = nil;
	nv = 0;
	- while(1) {
	+ for(;;){
	valloop_continue:
	if(c < 0)
	goto eob_done;
	- if(c == '>') {
	- if(quote) {
	+ if(c == '>'){
	+ if(quote){
	// c might be part of string (though n…
	// but if line ends before close quote…
	// there was an unmatched quote
	ti = ts->i;
	- while(1) {
	+ for(;;){
	c = getchar(ts);
	if(c < 0)
	goto eob_done;
	- if(c == quote) {
	+ if(c == quote){
	backup(ts, ti);
	buf[nv++] = '>';
	- if(nv == BIGBUFSIZE-1)…
	+ if(nv == BIGBUFSIZE-1){
	val = buftostr…
	nv = 0;
	}
	c = getchar(ts);
	goto valloop_continue;
	}
	- if(c == '\n') {
	+ if(c == '\n'){
	if(warn)
	fprint(2, "war…
	backup(ts, ti);
	t@@ -944,14 +948,14 @@ valloop_continue:
	else
	goto valloop_done;
	}
	- if(quote) {
	- if(c == quote) {
	+ if(quote){
	+ if(c == quote){
	c = getchar(ts);
	if(c < 0)
	goto eob_done;
	goto valloop_done;
	}
	- if(c == '\r') {
	+ if(c == '\r'){
	c = getchar(ts);
	goto valloop_continue;
	}
	t@@ -962,20 +966,20 @@ valloop_continue:
	if(c < 256 && isspace(c))
	goto valloop_done;
	}
	- if(c == '&') {
	+ if(c == '&'){
	c = ampersand(ts);
	if(c == -1)
	goto eob_done;
	}
	buf[nv++] = c;
	- if(nv == BIGBUFSIZE-1) {
	+ if(nv == BIGBUFSIZE-1){
	val = buftostr(val, buf, nv);
	nv = 0;
	}
	c = getchar(ts);
	}
	valloop_done:
	- if(afnd) {
	+ if(afnd){
	val = buftostr(val, buf, nv);
	al = newattr(attid, val, al);
	}
	t@@ -1017,19 +1021,19 @@ comment(TokenSource* ts)
	nexti = ts->i;
	havecomment = 0;
	c = getchar(ts);
	- if(c == '-') {
	+ if(c == '-'){
	c = getchar(ts);
	- if(c == '-') {
	+ if(c == '-'){
	if(findstr(ts, L(Larrow)))
	havecomment = 1;
	else
	backup(ts, nexti);
	}
	}
	- if(!havecomment) {
	+ if(!havecomment){
	if(c == '>')
	havecomment = 1;
	- else if(c >= 0) {
	+ else if(c >= 0){
	if(findstr(ts, L(Lgt)))
	havecomment = 1;
	}
	t@@ -1053,15 +1057,15 @@ findstr(TokenSource* ts, Rune* s)

	c0 = s[0];
	n = runestrlen(s);
	- while(1) {
	+ for(;;){
	c = getchar(ts);
	if(c < 0)
	break;
	- if(c == c0) {
	+ if(c == c0){
	if(n == 1)
	return 1;
	nexti = ts->i;
	- for(i = 1; i < n; i++) {
	+ for(i = 1; i < n; i++){
	c = getchar(ts);
	if(c < 0)
	goto mainloop_done;
	t@@ -1077,6 +1081,18 @@ mainloop_done:
	return 0;
	}

	+static int
	+xdigit(int c)
	+{
	+ if('0' <= c && c <= '9')
	+ return c-'0';
	+ if('a' <= c && c <= 'f')
	+ return c-'a'+10;
	+ if('A' <= c && c <= 'F')
	+ return c-'A'+10;
	+ return -1;
	+}
	+
	// We've just read an '&'; look for an entity reference
	// name, and if found, return translated char.
	// if there is a complete entity name but it isn't known,
	t@@ -1100,36 +1116,42 @@ ampersand(TokenSource* ts)
	c = getchar(ts);
	fnd = 0;
	ans = -1;
	- if(c == '#') {
	+ if(c == '#'){
	c = getchar(ts);
	v = 0;
	- while(c >= 0) {
	- if(!(c < 256 && isdigit(c)))
	- break;
	- v = v*10 + c - 48;
	+ if(c == 'x'){
	c = getchar(ts);
	+ while((i=xdigit(c)) != -1){
	+ v = v*16 + i;
	+ c = getchar(ts);
	+ }
	+ }else{
	+ while('0' <= c && c <= '9'){
	+ v = v*10 + c - '0';
	+ c = getchar(ts);
	+ }
	}
	- if(c >= 0) {
	+ if(c >= 0){
	if(!(c == ';' \|\| c == '\n' \|\| c == '\r'))
	ungetchar(ts, c);
	c = v;
	if(c == 160)
	c = 160;
	- if(c >= Winstart && c <= Winend) {
	+ if(c >= Winstart && c <= Winend){
	c = winchars[c - Winstart];
	}
	ans = c;
	fnd = 1;
	}
	}
	- else if(c < 256 && isalpha(c)) {
	+ else if(c < 256 && isalpha(c)){
	buf[0] = c;
	k = 1;
	- while(1) {
	+ for(;;){
	c = getchar(ts);
	if(c < 0)
	break;
	- if(ISNAMCHAR(c)) {
	+ if(ISNAMCHAR(c)){
	if(k < SMALLBUFSIZE-1)
	buf[k++] = c;
	}
	t@@ -1139,17 +1161,17 @@ ampersand(TokenSource* ts)
	break;
	}
	}
	- if(c >= 0) {
	+ if(c >= 0){
	fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);
	- if(!fnd) {
	+ if(!fnd){
	// Try prefixes of s
	if(c == ';' \|\| c == '\n' \|\| c == '\r')
	ungetchar(ts, c);
	i = k;
	- while(--k > 0) {
	+ while(--k > 0){
	fnd = _lookup(chartab, NCHARTAB, buf, …
	- if(fnd) {
	- while(i > k) {
	+ if(fnd){
	+ while(i > k){
	i--;
	ungetchar(ts, buf[i]);
	}
	t@@ -1159,7 +1181,7 @@ ampersand(TokenSource* ts)
	}
	}
	}
	- if(!fnd) {
	+ if(!fnd){
	backup(ts, savei);
	ans = '&';
	}
	t@@ -1181,14 +1203,14 @@ getchar(TokenSource* ts)
	return -1;
	buf = ts->data;
	c = buf[ts->i];
	- switch(ts->chset) {
	+ switch(ts->chset){
	case ISO_8859_1:
	if(c >= Winstart && c <= Winend)
	c = winchars[c - Winstart];
	ts->i++;
	break;
	case US_Ascii:
	- if(c > 127) {
	+ if(c > 127){
	if(warn)
	fprint(2, "non-ascii char (%x) when US-ASCII s…
	}
	t@@ -1197,7 +1219,7 @@ getchar(TokenSource* ts)
	case UTF_8:
	ok = fullrune((char*)(buf+ts->i), ts->edata-ts->i);
	n = chartorune(&r, (char*)(buf+ts->i));
	- if(ok) {
	+ if(ok){
	if(warn && c == 0x80)
	fprint(2, "warning: invalid utf-8 sequence (st…
	ts->i += n;
	t@@ -1210,7 +1232,7 @@ getchar(TokenSource* ts)
	}
	break;
	case Unicode:
	- if(ts->i < ts->edata - 1) {
	+ if(ts->i < ts->edata - 1){
	//standards say most-significant byte first
	c = (c << 8)\|(buf[ts->i + 1]);
	ts->i += 2;
	t@@ -1235,9 +1257,9 @@ ungetchar(TokenSource* ts, int c)
	char a[UTFmax];

	n = 1;
	- switch(ts->chset) {
	+ switch(ts->chset){
	case UTF_8:
	- if(c >= 128) {
	+ if(c >= 128){
	r = c;
	n = runetochar(a, &r);
	}
	t@@ -1273,8 +1295,8 @@ _tokaval(Token* t, int attid, Rune** pans, int xfer)
	Attr* attr;

	attr = t->attr;
	- while(attr != nil) {
	- if(attr->attid == attid) {
	+ while(attr != nil){
	+ if(attr->attid == attid){
	if(pans != nil)
	*pans = attr->value;
	if(xfer)
	t@@ -1308,12 +1330,12 @@ Tconv(Fmt *f)
	if(dbglex > 1)
	i = snprint(buf, sizeof(buf), "[%d]", t->starti);
	tag = t->tag;
	- if(tag == Data) {
	+ if(tag == Data){
	i += snprint(buf+i, sizeof(buf)-i-1, "'%S'", t->text);
	}
	else {
	srbra = "";
	- if(tag >= RBRA) {
	+ if(tag >= RBRA){
	tag -= RBRA;
	srbra = "/";
	}
	t@@ -1321,7 +1343,7 @@ Tconv(Fmt *f)
	if(tag == Notfound)
	tname = L(Lquestion);
	i += snprint(buf+i, sizeof(buf)-i-1, "<%s%S", srbra, t…
	- for(a = t->attr; a != nil; a = a->next) {
	+ for(a = t->attr; a != nil; a = a->next){
	aname = attrnames[a->attid];
	i += snprint(buf+i, sizeof(buf)-i-1, " %S", an…
	if(a->value != nil)
	t@@ -1356,7 +1378,7 @@ freeattrs(Attr* ahead)
	Attr* nexta;

	a = ahead;
	- while(a != nil) {
	+ while(a != nil){
	nexta = a->next;
	free(a->value);
	free(a);
	t@@ -1377,7 +1399,7 @@ _freetokens(Token* tarray, int n)

	if(tarray == nil)
	return;
	- for(i = 0; i < n; i++) {
	+ for(i = 0; i < n; i++){
	t = &tarray[i];
	free(t->text);
	freeattrs(t->attr);