GopherProxy

	sync XML parser and some improvements - sub - subscene.com subtitle search
	git clone git://git.codemadness.org/sub
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 919b13a33a111b5f946652c2e2ce0a07200a3fe3
	parent 6ef7f7e85bfb08f37166b9c8c450afb43bc7fc50
	Author: Hiltjo Posthuma <[email protected]>
	Date: Sun, 11 Mar 2018 18:51:49 +0100

	sync XML parser and some improvements

	Diffstat:
	M sub.c \| 8 +++++---
	M xml.c \| 439 ++++++++++++++++++++---------…
	M xml.h \| 79 +++++++++++++++--------------…

	3 files changed, 322 insertions(+), 204 deletions(-)
	---
	diff --git a/sub.c b/sub.c
	@@ -1,3 +1,5 @@
	+#include <sys/types.h>
	+
	#include <ctype.h>
	#include <errno.h>
	#include <stdio.h>
	@@ -165,14 +167,14 @@ main(void)
	return 1;
	}

	- xmlparser_init(&parser, stdin);
	-
	parser.xmltagstart = xml_handler_start_element;
	parser.xmltagend = xml_handler_end_element;
	parser.xmlattr = xml_handler_attr;
	parser.xmldata = xml_handler_data;

	- xmlparser_parse(&parser);
	+ parser.getnext = getchar;
	+
	+ xml_parse(&parser);

	return 0;
	}
	diff --git a/xml.c b/xml.c
	@@ -1,110 +1,104 @@
	+#include <sys/types.h>
	+
	+#include <ctype.h>
	+#include <errno.h>
	+#include <limits.h>
	+#include <stdint.h>
	#include <stdio.h>
	-#include <string.h>
	#include <stdlib.h>
	-#include <ctype.h>
	+#include <string.h>

	#include "xml.h"

	-static __inline__ int /* like getc(), but do some smart buffering */
	-xmlparser_getnext(XMLParser *x) {
	- return fgetc(x->fp);
	-#if 0
	- if(x->readoffset >= x->readlastbytes) {
	- x->readoffset = 0;
	- if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf…
	- return EOF; /* 0 bytes read, assume EOF */
	- }
	- return (int)x->readbuf[x->readoffset++];
	-#endif
	-}
	-
	-static __inline__ void
	-xmlparser_parseattrs(XMLParser *x) {
	+static void
	+xml_parseattrs(XMLParser *x)
	+{
	size_t namelen = 0, valuelen;
	int c, endsep, endname = 0;

	- while((c = xmlparser_getnext(x)) != EOF) {
	- if(isspace(c)) { /* TODO: simplify endname ? */
	- if(namelen)
	+ while ((c = x->getnext()) != EOF) {
	+ if (isspace(c)) { /* TODO: simplify endname ? */
	+ if (namelen)
	endname = 1;
	continue;
	}
	- if(c == '?')
	+ if (c == '?')
	; /* ignore */
	- else if(c == '=') {
	+ else if (c == '=') {
	x->name[namelen] = '\0';
	- } else if(namelen && ((endname && isalpha(c)) \|\| (c == '>' \|\| …
	+ } else if (namelen && ((endname && isalpha(c)) \|\| (c == '>' \|\|…
	/* attribute without value */
	x->name[namelen] = '\0';
	- if(x->xmlattrstart)
	+ if (x->xmlattrstart)
	x->xmlattrstart(x, x->tag, x->taglen, x->name,…
	- if(x->xmlattr)
	+ if (x->xmlattr)
	x->xmlattr(x, x->tag, x->taglen, x->name, name…
	- if(x->xmlattrend)
	+ if (x->xmlattrend)
	x->xmlattrend(x, x->tag, x->taglen, x->name, n…
	endname = 0;
	x->name[0] = c;
	namelen = 1;
	- } else if(namelen && (c == '\'' \|\| c == '"')) {
	+ } else if (namelen && (c == '\'' \|\| c == '"')) {
	/* attribute with value */
	endsep = c; /* c is end separator */
	- if(x->xmlattrstart)
	+ if (x->xmlattrstart)
	x->xmlattrstart(x, x->tag, x->taglen, x->name,…
	- for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
	- if(c == '&' && x->xmlattrentity) { /* entities…
	+ for (valuelen = 0; (c = x->getnext()) != EOF;) {
	+ if (c == '&') { /* entities */
	x->data[valuelen] = '\0';
	/* call data function with data before…
	- if(valuelen && x->xmlattr)
	+ if (valuelen && x->xmlattr)
	x->xmlattr(x, x->tag, x->tagle…
	x->data[0] = c;
	valuelen = 1;
	- while((c = xmlparser_getnext(x)) != EO…
	- if(c == endsep)
	+ while ((c = x->getnext()) != EOF) {
	+ if (c == endsep)
	break;
	- if(valuelen < sizeof(x->data) …
	+ if (valuelen < sizeof(x->data)…
	x->data[valuelen++] = …
	else {
	/* TODO: entity too lo…
	x->data[valuelen] = '\…
	- if(x->xmlattr)
	+ if (x->xmlattr)
	x->xmlattr(x, …
	valuelen = 0;
	break;
	}
	- if(c == ';') {
	+ if (c == ';') {
	x->data[valuelen] = '\…
	- x->xmlattrentity(x, x-…
	+ if (x->xmlattrentity)
	+ x->xmlattrenti…
	valuelen = 0;
	break;
	}
	}
	- } else if(c != endsep) {
	- if(valuelen < sizeof(x->data) - 1) {
	+ } else if (c != endsep) {
	+ if (valuelen < sizeof(x->data) - 1) {
	x->data[valuelen++] = c;
	} else {
	x->data[valuelen] = '\0';
	- if(x->xmlattr)
	+ if (x->xmlattr)
	x->xmlattr(x, x->tag, …
	x->data[0] = c;
	valuelen = 1;
	}
	}
	- if(c == endsep) {
	+ if (c == endsep) {
	x->data[valuelen] = '\0';
	- if(x->xmlattr)
	+ if (x->xmlattr)
	x->xmlattr(x, x->tag, x->tagle…
	- if(x->xmlattrend)
	+ if (x->xmlattrend)
	x->xmlattrend(x, x->tag, x->ta…
	break;
	}
	}
	- namelen = 0;
	- endname = 0;
	- } else if(namelen < sizeof(x->name) - 1)
	+ namelen = endname = 0;
	+ } else if (namelen < sizeof(x->name) - 1) {
	x->name[namelen++] = c;
	- if(c == '>') {
	+ }
	+ if (c == '>') {
	break;
	- } else if(c == '/') {
	+ } else if (c == '/') {
	x->isshorttag = 1;
	namelen = 0;
	x->name[0] = '\0';
	@@ -112,37 +106,48 @@ xmlparser_parseattrs(XMLParser *x) {
	}
	}

	-static __inline__ void
	-xmlparser_parsecomment(XMLParser *x) {
	+static void
	+xml_parsecomment(XMLParser *x)
	+{
	size_t datalen = 0, i = 0;
	int c;

	- if(x->xmlcommentstart)
	+ if (x->xmlcommentstart)
	x->xmlcommentstart(x);
	- while((c = xmlparser_getnext(x)) != EOF) {
	- if(c == '-' && i < 2)
	- i++;
	- else if(c == '>') {
	- if(i == 2) { /* -- */
	- if(datalen >= 2) {
	- datalen -= 2;
	- x->data[datalen] = '\0';
	- if(x->xmlcomment)
	- x->xmlcomment(x, x->data, data…
	- }
	- if(x->xmlcommentend)
	- x->xmlcommentend(x);
	- break;
	+ while ((c = x->getnext()) != EOF) {
	+ if (c == '-' \|\| c == '>') {
	+ if (x->xmlcomment) {
	+ x->data[datalen] = '\0';
	+ x->xmlcomment(x, x->data, datalen);
	+ datalen = 0;
	+ }
	+ }
	+
	+ if (c == '-') {
	+ if (++i > 2) {
	+ if (x->xmlcomment)
	+ for (; i > 2; i--)
	+ x->xmlcomment(x, "-", 1);
	+ i = 2;
	+ }
	+ continue;
	+ } else if (c == '>' && i == 2) {
	+ if (x->xmlcommentend)
	+ x->xmlcommentend(x);
	+ return;
	+ } else if (i) {
	+ if (x->xmlcomment) {
	+ for (; i > 0; i--)
	+ x->xmlcomment(x, "-", 1);
	}
	i = 0;
	}
	- /* \|\| (c == '-' && d >= sizeof(x->data) - 4)) { */
	- /* TODO: what if the end has --, and it's cut on the boundary,…
	- if(datalen < sizeof(x->data) - 1)
	+
	+ if (datalen < sizeof(x->data) - 1) {
	x->data[datalen++] = c;
	- else {
	+ } else {
	x->data[datalen] = '\0';
	- if(x->xmlcomment)
	+ if (x->xmlcomment)
	x->xmlcomment(x, x->data, datalen);
	x->data[0] = c;
	datalen = 1;
	@@ -150,43 +155,47 @@ xmlparser_parsecomment(XMLParser *x) {
	}
	}

	-/* TODO:
	- * <test><![CDATA[1234567dddd8]]]>
	- *
	- * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1
	- * test comment function too for similar bug?
	- *
	- */
	-static __inline__ void
	-xmlparser_parsecdata(XMLParser *x) {
	+static void
	+xml_parsecdata(XMLParser *x)
	+{
	size_t datalen = 0, i = 0;
	int c;

	- if(x->xmlcdatastart)
	+ if (x->xmlcdatastart)
	x->xmlcdatastart(x);
	- while((c = xmlparser_getnext(x)) != EOF) {
	- if(c == ']' && i < 2) {
	- i++;
	- } else if(c == '>') {
	- if(i == 2) { /* ]] */
	- if(datalen >= 2) {
	- datalen -= 2;
	- x->data[datalen] = '\0';
	- if(x->xmlcdata && datalen)
	- x->xmlcdata(x, x->data, datale…
	- }
	- if(x->xmlcdataend)
	- x->xmlcdataend(x);
	- break;
	+ while ((c = x->getnext()) != EOF) {
	+ if (c == ']' \|\| c == '>') {
	+ if (x->xmlcdata) {
	+ x->data[datalen] = '\0';
	+ x->xmlcdata(x, x->data, datalen);
	+ datalen = 0;
	}
	+ }
	+
	+ if (c == ']') {
	+ if (++i > 2) {
	+ if (x->xmlcdata)
	+ for (; i > 2; i--)
	+ x->xmlcdata(x, "]", 1);
	+ i = 2;
	+ }
	+ continue;
	+ } else if (c == '>' && i == 2) {
	+ if (x->xmlcdataend)
	+ x->xmlcdataend(x);
	+ return;
	+ } else if (i) {
	+ if (x->xmlcdata)
	+ for (; i > 0; i--)
	+ x->xmlcdata(x, "]", 1);
	i = 0;
	}
	- /* TODO: what if the end has ]>, and it's cut on the boundary …
	- if(datalen < sizeof(x->data) - 1) {
	+
	+ if (datalen < sizeof(x->data) - 1) {
	x->data[datalen++] = c;
	} else {
	x->data[datalen] = '\0';
	- if(x->xmlcdata)
	+ if (x->xmlcdata)
	x->xmlcdata(x, x->data, datalen);
	x->data[0] = c;
	datalen = 1;
	@@ -194,128 +203,240 @@ xmlparser_parsecdata(XMLParser *x) {
	}
	}

	-void
	-xmlparser_init(XMLParser x, FILE fp) {
	- memset(x, 0, sizeof(XMLParser));
	- x->fp = fp;
	+int
	+xml_codepointtoutf8(uint32_t cp, uint32_t *utf)
	+{
	+ if (cp >= 0x10000) {
	+ /* 4 bytes */
	+ *utf = 0xf0808080 \| ((cp & 0xfc0000) << 6) \|
	+ ((cp & 0x3f000) << 4) \| ((cp & 0xfc0) << 2) \|
	+ (cp & 0x3f);
	+ return 4;
	+ } else if (cp >= 0x00800) {
	+ /* 3 bytes */
	+ *utf = 0xe08080 \|
	+ ((cp & 0x3f000) << 4) \| ((cp & 0xfc0) << 2) \|
	+ (cp & 0x3f);
	+ return 3;
	+ } else if (cp >= 0x80) {
	+ /* 2 bytes */
	+ *utf = 0xc080 \|
	+ ((cp & 0xfc0) << 2) \| (cp & 0x3f);
	+ return 2;
	+ }
	+ *utf = cp & 0xff;
	+ return utf ? 1 : 0; / 1 byte */
	+}
	+
	+ssize_t
	+xml_namedentitytostr(const char e, char buf, size_t bufsiz)
	+{
	+ static const struct {
	+ char *entity;
	+ int c;
	+ } entities[] = {
	+ { .entity = "&", .c = '&' },
	+ { .entity = "<", .c = '<' },
	+ { .entity = ">", .c = '>' },
	+ { .entity = "'", .c = '\'' },
	+ { .entity = """, .c = '"' },
	+ { .entity = "&AMP;", .c = '&' },
	+ { .entity = "&LT;", .c = '<' },
	+ { .entity = "&GT;", .c = '>' },
	+ { .entity = "&APOS;", .c = '\'' },
	+ { .entity = "&QUOT;", .c = '"' }
	+ };
	+ size_t i;
	+
	+ /* buffer is too small */
	+ if (bufsiz < 2)
	+ return -1;
	+
	+ /* doesn't start with &: can't match */
	+ if (*e != '&')
	+ return 0;
	+
	+ for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
	+ if (!strcmp(e, entities[i].entity)) {
	+ buf[0] = entities[i].c;
	+ buf[1] = '\0';
	+ return 1;
	+ }
	+ }
	+ return 0;
	+}
	+
	+ssize_t
	+xml_numericentitytostr(const char e, char buf, size_t bufsiz)
	+{
	+ uint32_t l = 0, cp = 0;
	+ size_t b, len;
	+ char *end;
	+
	+ /* buffer is too small */
	+ if (bufsiz < 5)
	+ return -1;
	+
	+ /* not a numeric entity */
	+ if (e[0] != '&' \|\| e[1] != '#')
	+ return 0;
	+
	+ /* e[1] == '#', numeric / hexadecimal entity */
	+ e += 2; /* skip "&#" */
	+ errno = 0;
	+ /* hex (16) or decimal (10) */
	+ if (*e == 'x')
	+ l = strtoul(e + 1, &end, 16);
	+ else
	+ l = strtoul(e, &end, 10);
	+ /* invalid value or not a well-formed entity */
	+ if (errno \|\| *end != ';')
	+ return 0;
	+ len = xml_codepointtoutf8(l, &cp);
	+ /* make string */
	+ for (b = 0; b < len; b++)
	+ buf[b] = (cp >> (8 * (len - 1 - b))) & 0xff;
	+ buf[len] = '\0';
	+
	+ return (ssize_t)len;
	+}
	+
	+/* convert named- or numeric entity string to buffer string
	+ * returns byte-length of string. */
	+ssize_t
	+xml_entitytostr(const char e, char buf, size_t bufsiz)
	+{
	+ /* buffer is too small */
	+ if (bufsiz < 5)
	+ return -1;
	+ /* doesn't start with & */
	+ if (e[0] != '&')
	+ return 0;
	+ /* named entity */
	+ if (e[1] != '#')
	+ return xml_namedentitytostr(e, buf, bufsiz);
	+ else /* numeric entity */
	+ return xml_numericentitytostr(e, buf, bufsiz);
	}

	void
	-xmlparser_parse(XMLParser *x) {
	+xml_parse(XMLParser *x)
	+{
	int c, ispi;
	size_t datalen, tagdatalen, taglen;

	- while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < …
	+ if (!x->getnext)
	+ return;
	+ while ((c = x->getnext()) != EOF && c != '<')
	+ ; /* skip until < */

	- while(c != EOF) {
	- if(c == '<') { /* parse tag */
	- if((c = xmlparser_getnext(x)) == EOF)
	+ while (c != EOF) {
	+ if (c == '<') { /* parse tag */
	+ if ((c = x->getnext()) == EOF)
	return;
	x->tag[0] = '\0';
	x->taglen = 0;
	- if(c == '!') { /* cdata and comments */
	- for(tagdatalen = 0; (c = xmlparser_getnext(x))…
	- if(tagdatalen <= strlen("[CDATA[")) /*…
	+ if (c == '!') { /* cdata and comments */
	+ for (tagdatalen = 0; (c = x->getnext()) != EOF…
	+ if (tagdatalen <= sizeof("[CDATA[") - …
	x->data[tagdatalen++] = c; /* …
	- if(c == '>')
	+ if (c == '>')
	break;
	- else if(c == '-' && tagdatalen == strl…
	- (x->data[0] == '-')) {…
	- xmlparser_parsecomment(x);
	+ else if (c == '-' && tagdatalen == siz…
	+ (x->data[0] == '-')) {
	+ xml_parsecomment(x);
	break;
	- } else if(c == '[') {
	- if(tagdatalen == strlen("[CDAT…
	- x->data[1] == 'C' && x…
	- x->data[3] == 'A' && x…
	- x->data[5] == 'A' && x…
	- xmlparser_parsecdata(x…
	+ } else if (c == '[') {
	+ if (tagdatalen == sizeof("[CDA…
	+ !strncmp(x->data, "[CDATA[…
	+ xml_parsecdata(x);
	break;
	- #if 0
	- } else {
	- /* TODO ? */
	- /* markup declaration …
	- while((c = xmlparser_g…
	- #endif
	}
	}
	}
	- } else { /* normal tag (open, short open, close), proc…
	- if(isspace(c))
	- while((c = xmlparser_getnext(x)) != EO…
	- if(c == EOF)
	+ } else {
	+ /* normal tag (open, short open, close), proce…
	+ if (isspace(c))
	+ while ((c = x->getnext()) != EOF && is…
	+ ;
	+ if (c == EOF)
	return;
	x->tag[0] = c;
	ispi = (c == '?') ? 1 : 0;
	x->isshorttag = ispi;
	taglen = 1;
	- while((c = xmlparser_getnext(x)) != EOF) {
	- if(c == '/') /* TODO: simplify short t…
	+ while ((c = x->getnext()) != EOF) {
	+ if (c == '/') /* TODO: simplify short …
	x->isshorttag = 1; /* short ta…
	- else if(c == '>' \|\| isspace(c)) {
	+ else if (c == '>' \|\| isspace(c)) {
	x->tag[taglen] = '\0';
	- if(x->tag[0] == '/') { /* end …
	+ if (x->tag[0] == '/') { /* end…
	x->taglen = --taglen; …
	- if(taglen && x->xmltag…
	+ if (taglen && x->xmlta…
	x->xmltagend(x…
	} else {
	x->taglen = taglen;
	- if(x->xmltagstart)
	- x->xmltagstart…
	- if(isspace(c))
	- xmlparser_pars…
	- if(x->xmltagstartparse…
	+ /* start tag */
	+ if (x->xmltagstart)
	+ x->xmltagstart…
	+ if (isspace(c))
	+ xml_parseattrs…
	+ if (x->xmltagstartpars…
	x->xmltagstart…
	}
	- if((x->isshorttag \|\| ispi) && …
	+ /* call tagend for shortform o…
	+ if ((x->isshorttag \|\| ispi) &&…
	x->xmltagend(x, x->tag…
	break;
	- } else if(taglen < sizeof(x->tag) - 1)
	+ } else if (taglen < sizeof(x->tag) - 1)
	x->tag[taglen++] = c;
	}
	}
	} else {
	- /* parse data */
	+ /* parse tag data */
	datalen = 0;
	- if(x->xmldatastart)
	+ if (x->xmldatastart)
	x->xmldatastart(x);
	- while((c = xmlparser_getnext(x)) != EOF) {
	- if(c == '&' && x->xmldataentity) {
	- if(datalen) {
	+ while ((c = x->getnext()) != EOF) {
	+ if (c == '&') {
	+ if (datalen) {
	x->data[datalen] = '\0';
	- x->xmldata(x, x->data, datalen…
	+ if (x->xmldata)
	+ x->xmldata(x, x->data,…
	}
	x->data[0] = c;
	datalen = 1;
	- while((c = xmlparser_getnext(x)) != EO…
	- if(c == '<')
	+ while ((c = x->getnext()) != EOF) {
	+ if (c == '<')
	break;
	- if(datalen < sizeof(x->data) -…
	+ if (datalen < sizeof(x->data) …
	x->data[datalen++] = c;
	- if(isspace(c))
	+ if (isspace(c))
	break;
	- else if(c == ';') {
	+ else if (c == ';') {
	x->data[datalen] = '\0…
	- x->xmldataentity(x, x-…
	+ if (x->xmldataentity)
	+ x->xmldataenti…
	datalen = 0;
	break;
	}
	}
	- } else if(c != '<') {
	- if(datalen < sizeof(x->data) - 1) {
	+ } else if (c != '<') {
	+ if (datalen < sizeof(x->data) - 1) {
	x->data[datalen++] = c;
	} else {
	x->data[datalen] = '\0';
	- if(x->xmldata)
	+ if (x->xmldata)
	x->xmldata(x, x->data,…
	x->data[0] = c;
	datalen = 1;
	}
	}
	- if(c == '<') {
	+ if (c == '<') {
	x->data[datalen] = '\0';
	- if(x->xmldata && datalen)
	+ if (x->xmldata && datalen)
	x->xmldata(x, x->data, datalen…
	- if(x->xmldataend)
	+ if (x->xmldataend)
	x->xmldataend(x);
	break;
	}
	diff --git a/xml.h b/xml.h
	@@ -1,49 +1,44 @@
	-#include <stdio.h>
	-#include <string.h>
	-#include <stdlib.h>
	-
	typedef struct xmlparser {
	/* handlers */
	- void (xmltagstart)(struct xmlparser p, const char *tag, size_t tagle…
	- void (xmltagstartparsed)(struct xmlparser p, const char *tag,
	- size_t taglen, int isshort);
	- void (xmltagend)(struct xmlparser p, const char *tag, size_t taglen,
	- int isshort);
	- void (xmldatastart)(struct xmlparser p);
	- void (xmldata)(struct xmlparser p, const char *data, size_t datalen);
	- void (xmldataend)(struct xmlparser p);
	- void (xmldataentity)(struct xmlparser p, const char *data,
	- size_t datalen);
	- void (xmlattrstart)(struct xmlparser p, const char *tag, size_t tagl…
	- const char *name, size_t namelen);
	- void (xmlattr)(struct xmlparser p, const char *tag, size_t taglen,
	- const char name, size_t namelen, const char value,
	- size_t valuelen);
	- void (xmlattrend)(struct xmlparser p, const char *tag, size_t taglen,
	- const char *name, size_t namelen);
	- void (xmlattrentity)(struct xmlparser p, const char *tag, size_t tag…
	- const char name, size_t namelen, const char value,
	- size_t valuelen);
	- void (xmlcdatastart)(struct xmlparser p);
	- void (xmlcdata)(struct xmlparser p, const char *data, size_t datalen…
	- void (xmlcdataend)(struct xmlparser p);
	- void (xmlcommentstart)(struct xmlparser p);
	- void (xmlcomment)(struct xmlparser p, const char *comment,
	- size_t commentlen);
	- void (xmlcommentend)(struct xmlparser p);
	+ void (xmlattr)(struct xmlparser , const char *, size_t,
	+ const char , size_t, const char , size_t);
	+ void (xmlattrend)(struct xmlparser , const char *, size_t,
	+ const char *, size_t);
	+ void (xmlattrstart)(struct xmlparser , const char *, size_t,
	+ const char *, size_t);
	+ void (xmlattrentity)(struct xmlparser , const char *, size_t,
	+ const char , size_t, const char , size_t);
	+ void (xmlcdatastart)(struct xmlparser );
	+ void (xmlcdata)(struct xmlparser , const char *, size_t);
	+ void (xmlcdataend)(struct xmlparser );
	+ void (xmlcommentstart)(struct xmlparser );
	+ void (xmlcomment)(struct xmlparser , const char *, size_t);
	+ void (xmlcommentend)(struct xmlparser );
	+ void (xmldata)(struct xmlparser , const char *, size_t);
	+ void (xmldataend)(struct xmlparser );
	+ void (xmldataentity)(struct xmlparser , const char *, size_t);
	+ void (xmldatastart)(struct xmlparser );
	+ void (xmltagend)(struct xmlparser , const char *, size_t, int);
	+ void (xmltagstart)(struct xmlparser , const char *, size_t);
	+ void (xmltagstartparsed)(struct xmlparser , const char *,
	+ size_t, int);

	- FILE fp; / file stream to read from */
	+ int (*getnext)(void);

	- /* private; internal state */
	- char tag[1024]; /* current tag */
	- int isshorttag; /* current tag is in short form ? */
	+ /* current tag */
	+ char tag[1024];
	size_t taglen;
	- char name[256]; /* current attribute name */
	- char data[BUFSIZ]; /* data buffer used for tag and attribute data */
	- size_t readoffset;
	- size_t readlastbytes;
	- unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnex…
	+ /* current tag is in short form ? <tag /> */
	+ int isshorttag;
	+ /* current attribute name */
	+ char name[256];
	+ /* data buffer used for tag data, cdata and attribute data */
	+ char data[BUFSIZ];
	} XMLParser;

	-void xmlparser_init(XMLParser x, FILE fp);
	-void xmlparser_parse(XMLParser *x);
	+int xml_codepointtoutf8(uint32_t, uint32_t *);
	+ssize_t xml_entitytostr(const char , char , size_t);
	+ssize_t xml_namedentitytostr(const char , char , size_t);
	+ssize_t xml_numericentitytostr(const char , char , size_t);
	+
	+void xml_parse(XMLParser *);