GopherProxy

	XML tag parse improvements for PI and end tags - tscrape - twitter scraper
	git clone git://git.codemadness.org/tscrape
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 0fac9621c44b76c38d911438b1966d665e3b8134
	parent 24fad792de3bab17f1cf485450435761fb3b8657
	Author: Hiltjo Posthuma <[email protected]>
	Date: Mon, 17 Dec 2018 18:25:08 +0100

	XML tag parse improvements for PI and end tags

	- Stricter parsing of tags, no whitespace stripping after <.
	- For end tags the "internal" context x->tag would be "/sometag". Make sure
	this matches exactly with the parameter tag.
	- Reset tagname after parsing an end tag.
	- Make end tag handling more consistent.
	- Remove temporary variable taglen.

	Diffstat:
	M xml.c \| 52 +++++++++++++++++------------…

	1 file changed, 29 insertions(+), 23 deletions(-)
	---
	diff --git a/xml.c b/xml.c
	@@ -334,8 +334,8 @@ xml_entitytostr(const char e, char buf, size_t bufsiz)
	void
	xml_parse(XMLParser *x)
	{
	- int c, ispi;
	- size_t datalen, tagdatalen, taglen;
	+ size_t datalen, tagdatalen;
	+ int c, isend;

	if (!x->getnext)
	return;
	@@ -367,30 +367,32 @@ xml_parse(XMLParser *x)
	}
	}
	} else {
	- x->tag[0] = '\0';
	- x->taglen = 0;
	-
	/* normal tag (open, short open, close), proce…
	- if (isspace(c))
	- while ((c = x->getnext()) != EOF && is…
	- ;
	- if (c == EOF)
	- return;
	x->tag[0] = c;
	- ispi = (c == '?') ? 1 : 0;
	- x->isshorttag = ispi;
	- taglen = 1;
	+ x->taglen = 1;
	+ x->isshorttag = isend = 0;
	+
	+ /* treat processing instruction as shorttag, d…
	+ if (c == '?') {
	+ x->isshorttag = 1;
	+ } else if (c == '/') {
	+ if ((c = x->getnext()) == EOF)
	+ return;
	+ x->tag[0] = c;
	+ isend = 1;
	+ }
	+
	while ((c = x->getnext()) != EOF) {
	if (c == '/')
	x->isshorttag = 1; /* short ta…
	else if (c == '>' \|\| isspace(c)) {
	- x->tag[taglen] = '\0';
	- if (x->tag[0] == '/') { /* end…
	- x->taglen = --taglen; …
	- if (taglen && x->xmlta…
	- x->xmltagend(x…
	+ x->tag[x->taglen] = '\0';
	+ if (isend) { /* end tag, start…
	+ if (x->xmltagend)
	+ x->xmltagend(x…
	+ x->tag[0] = '\0';
	+ x->taglen = 0;
	} else {
	- x->taglen = taglen;
	/* start tag */
	if (x->xmltagstart)
	x->xmltagstart…
	@@ -400,11 +402,15 @@ xml_parse(XMLParser *x)
	x->xmltagstart…
	}
	/* call tagend for shortform o…
	- if ((x->isshorttag \|\| ispi) &&…
	- x->xmltagend(x, x->tag…
	+ if (x->isshorttag) {
	+ if (x->xmltagend)
	+ x->xmltagend(x…
	+ x->tag[0] = '\0';
	+ x->taglen = 0;
	+ }
	break;
	- } else if (taglen < sizeof(x->tag) - 1)
	- x->tag[taglen++] = c; /* NOTE:…
	+ } else if (x->taglen < sizeof(x->tag) …
	+ x->tag[x->taglen++] = c; /* NO…
	}
	}
	} else {