sync XML parser and some improvements - sub - subscene.com subtitle search | |
git clone git://git.codemadness.org/sub | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 919b13a33a111b5f946652c2e2ce0a07200a3fe3 | |
parent 6ef7f7e85bfb08f37166b9c8c450afb43bc7fc50 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sun, 11 Mar 2018 18:51:49 +0100 | |
sync XML parser and some improvements | |
Diffstat: | |
M sub.c | 8 +++++--- | |
M xml.c | 439 ++++++++++++++++++++---------… | |
M xml.h | 79 +++++++++++++++--------------… | |
3 files changed, 322 insertions(+), 204 deletions(-) | |
--- | |
diff --git a/sub.c b/sub.c | |
@@ -1,3 +1,5 @@ | |
+#include <sys/types.h> | |
+ | |
#include <ctype.h> | |
#include <errno.h> | |
#include <stdio.h> | |
@@ -165,14 +167,14 @@ main(void) | |
return 1; | |
} | |
- xmlparser_init(&parser, stdin); | |
- | |
parser.xmltagstart = xml_handler_start_element; | |
parser.xmltagend = xml_handler_end_element; | |
parser.xmlattr = xml_handler_attr; | |
parser.xmldata = xml_handler_data; | |
- xmlparser_parse(&parser); | |
+ parser.getnext = getchar; | |
+ | |
+ xml_parse(&parser); | |
return 0; | |
} | |
diff --git a/xml.c b/xml.c | |
@@ -1,110 +1,104 @@ | |
+#include <sys/types.h> | |
+ | |
+#include <ctype.h> | |
+#include <errno.h> | |
+#include <limits.h> | |
+#include <stdint.h> | |
#include <stdio.h> | |
-#include <string.h> | |
#include <stdlib.h> | |
-#include <ctype.h> | |
+#include <string.h> | |
#include "xml.h" | |
-static __inline__ int /* like getc(), but do some smart buffering */ | |
-xmlparser_getnext(XMLParser *x) { | |
- return fgetc(x->fp); | |
-#if 0 | |
- if(x->readoffset >= x->readlastbytes) { | |
- x->readoffset = 0; | |
- if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf… | |
- return EOF; /* 0 bytes read, assume EOF */ | |
- } | |
- return (int)x->readbuf[x->readoffset++]; | |
-#endif | |
-} | |
- | |
-static __inline__ void | |
-xmlparser_parseattrs(XMLParser *x) { | |
+static void | |
+xml_parseattrs(XMLParser *x) | |
+{ | |
size_t namelen = 0, valuelen; | |
int c, endsep, endname = 0; | |
- while((c = xmlparser_getnext(x)) != EOF) { | |
- if(isspace(c)) { /* TODO: simplify endname ? */ | |
- if(namelen) | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (isspace(c)) { /* TODO: simplify endname ? */ | |
+ if (namelen) | |
endname = 1; | |
continue; | |
} | |
- if(c == '?') | |
+ if (c == '?') | |
; /* ignore */ | |
- else if(c == '=') { | |
+ else if (c == '=') { | |
x->name[namelen] = '\0'; | |
- } else if(namelen && ((endname && isalpha(c)) || (c == '>' || … | |
+ } else if (namelen && ((endname && isalpha(c)) || (c == '>' ||… | |
/* attribute without value */ | |
x->name[namelen] = '\0'; | |
- if(x->xmlattrstart) | |
+ if (x->xmlattrstart) | |
x->xmlattrstart(x, x->tag, x->taglen, x->name,… | |
- if(x->xmlattr) | |
+ if (x->xmlattr) | |
x->xmlattr(x, x->tag, x->taglen, x->name, name… | |
- if(x->xmlattrend) | |
+ if (x->xmlattrend) | |
x->xmlattrend(x, x->tag, x->taglen, x->name, n… | |
endname = 0; | |
x->name[0] = c; | |
namelen = 1; | |
- } else if(namelen && (c == '\'' || c == '"')) { | |
+ } else if (namelen && (c == '\'' || c == '"')) { | |
/* attribute with value */ | |
endsep = c; /* c is end separator */ | |
- if(x->xmlattrstart) | |
+ if (x->xmlattrstart) | |
x->xmlattrstart(x, x->tag, x->taglen, x->name,… | |
- for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) { | |
- if(c == '&' && x->xmlattrentity) { /* entities… | |
+ for (valuelen = 0; (c = x->getnext()) != EOF;) { | |
+ if (c == '&') { /* entities */ | |
x->data[valuelen] = '\0'; | |
/* call data function with data before… | |
- if(valuelen && x->xmlattr) | |
+ if (valuelen && x->xmlattr) | |
x->xmlattr(x, x->tag, x->tagle… | |
x->data[0] = c; | |
valuelen = 1; | |
- while((c = xmlparser_getnext(x)) != EO… | |
- if(c == endsep) | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (c == endsep) | |
break; | |
- if(valuelen < sizeof(x->data) … | |
+ if (valuelen < sizeof(x->data)… | |
x->data[valuelen++] = … | |
else { | |
/* TODO: entity too lo… | |
x->data[valuelen] = '\… | |
- if(x->xmlattr) | |
+ if (x->xmlattr) | |
x->xmlattr(x, … | |
valuelen = 0; | |
break; | |
} | |
- if(c == ';') { | |
+ if (c == ';') { | |
x->data[valuelen] = '\… | |
- x->xmlattrentity(x, x-… | |
+ if (x->xmlattrentity) | |
+ x->xmlattrenti… | |
valuelen = 0; | |
break; | |
} | |
} | |
- } else if(c != endsep) { | |
- if(valuelen < sizeof(x->data) - 1) { | |
+ } else if (c != endsep) { | |
+ if (valuelen < sizeof(x->data) - 1) { | |
x->data[valuelen++] = c; | |
} else { | |
x->data[valuelen] = '\0'; | |
- if(x->xmlattr) | |
+ if (x->xmlattr) | |
x->xmlattr(x, x->tag, … | |
x->data[0] = c; | |
valuelen = 1; | |
} | |
} | |
- if(c == endsep) { | |
+ if (c == endsep) { | |
x->data[valuelen] = '\0'; | |
- if(x->xmlattr) | |
+ if (x->xmlattr) | |
x->xmlattr(x, x->tag, x->tagle… | |
- if(x->xmlattrend) | |
+ if (x->xmlattrend) | |
x->xmlattrend(x, x->tag, x->ta… | |
break; | |
} | |
} | |
- namelen = 0; | |
- endname = 0; | |
- } else if(namelen < sizeof(x->name) - 1) | |
+ namelen = endname = 0; | |
+ } else if (namelen < sizeof(x->name) - 1) { | |
x->name[namelen++] = c; | |
- if(c == '>') { | |
+ } | |
+ if (c == '>') { | |
break; | |
- } else if(c == '/') { | |
+ } else if (c == '/') { | |
x->isshorttag = 1; | |
namelen = 0; | |
x->name[0] = '\0'; | |
@@ -112,37 +106,48 @@ xmlparser_parseattrs(XMLParser *x) { | |
} | |
} | |
-static __inline__ void | |
-xmlparser_parsecomment(XMLParser *x) { | |
+static void | |
+xml_parsecomment(XMLParser *x) | |
+{ | |
size_t datalen = 0, i = 0; | |
int c; | |
- if(x->xmlcommentstart) | |
+ if (x->xmlcommentstart) | |
x->xmlcommentstart(x); | |
- while((c = xmlparser_getnext(x)) != EOF) { | |
- if(c == '-' && i < 2) | |
- i++; | |
- else if(c == '>') { | |
- if(i == 2) { /* -- */ | |
- if(datalen >= 2) { | |
- datalen -= 2; | |
- x->data[datalen] = '\0'; | |
- if(x->xmlcomment) | |
- x->xmlcomment(x, x->data, data… | |
- } | |
- if(x->xmlcommentend) | |
- x->xmlcommentend(x); | |
- break; | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (c == '-' || c == '>') { | |
+ if (x->xmlcomment) { | |
+ x->data[datalen] = '\0'; | |
+ x->xmlcomment(x, x->data, datalen); | |
+ datalen = 0; | |
+ } | |
+ } | |
+ | |
+ if (c == '-') { | |
+ if (++i > 2) { | |
+ if (x->xmlcomment) | |
+ for (; i > 2; i--) | |
+ x->xmlcomment(x, "-", 1); | |
+ i = 2; | |
+ } | |
+ continue; | |
+ } else if (c == '>' && i == 2) { | |
+ if (x->xmlcommentend) | |
+ x->xmlcommentend(x); | |
+ return; | |
+ } else if (i) { | |
+ if (x->xmlcomment) { | |
+ for (; i > 0; i--) | |
+ x->xmlcomment(x, "-", 1); | |
} | |
i = 0; | |
} | |
- /* || (c == '-' && d >= sizeof(x->data) - 4)) { */ | |
- /* TODO: what if the end has --, and it's cut on the boundary,… | |
- if(datalen < sizeof(x->data) - 1) | |
+ | |
+ if (datalen < sizeof(x->data) - 1) { | |
x->data[datalen++] = c; | |
- else { | |
+ } else { | |
x->data[datalen] = '\0'; | |
- if(x->xmlcomment) | |
+ if (x->xmlcomment) | |
x->xmlcomment(x, x->data, datalen); | |
x->data[0] = c; | |
datalen = 1; | |
@@ -150,43 +155,47 @@ xmlparser_parsecomment(XMLParser *x) { | |
} | |
} | |
-/* TODO: | |
- * <test><![CDATA[1234567dddd8]]]> | |
- * | |
- * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1 | |
- * test comment function too for similar bug? | |
- * | |
- */ | |
-static __inline__ void | |
-xmlparser_parsecdata(XMLParser *x) { | |
+static void | |
+xml_parsecdata(XMLParser *x) | |
+{ | |
size_t datalen = 0, i = 0; | |
int c; | |
- if(x->xmlcdatastart) | |
+ if (x->xmlcdatastart) | |
x->xmlcdatastart(x); | |
- while((c = xmlparser_getnext(x)) != EOF) { | |
- if(c == ']' && i < 2) { | |
- i++; | |
- } else if(c == '>') { | |
- if(i == 2) { /* ]] */ | |
- if(datalen >= 2) { | |
- datalen -= 2; | |
- x->data[datalen] = '\0'; | |
- if(x->xmlcdata && datalen) | |
- x->xmlcdata(x, x->data, datale… | |
- } | |
- if(x->xmlcdataend) | |
- x->xmlcdataend(x); | |
- break; | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (c == ']' || c == '>') { | |
+ if (x->xmlcdata) { | |
+ x->data[datalen] = '\0'; | |
+ x->xmlcdata(x, x->data, datalen); | |
+ datalen = 0; | |
} | |
+ } | |
+ | |
+ if (c == ']') { | |
+ if (++i > 2) { | |
+ if (x->xmlcdata) | |
+ for (; i > 2; i--) | |
+ x->xmlcdata(x, "]", 1); | |
+ i = 2; | |
+ } | |
+ continue; | |
+ } else if (c == '>' && i == 2) { | |
+ if (x->xmlcdataend) | |
+ x->xmlcdataend(x); | |
+ return; | |
+ } else if (i) { | |
+ if (x->xmlcdata) | |
+ for (; i > 0; i--) | |
+ x->xmlcdata(x, "]", 1); | |
i = 0; | |
} | |
- /* TODO: what if the end has ]>, and it's cut on the boundary … | |
- if(datalen < sizeof(x->data) - 1) { | |
+ | |
+ if (datalen < sizeof(x->data) - 1) { | |
x->data[datalen++] = c; | |
} else { | |
x->data[datalen] = '\0'; | |
- if(x->xmlcdata) | |
+ if (x->xmlcdata) | |
x->xmlcdata(x, x->data, datalen); | |
x->data[0] = c; | |
datalen = 1; | |
@@ -194,128 +203,240 @@ xmlparser_parsecdata(XMLParser *x) { | |
} | |
} | |
-void | |
-xmlparser_init(XMLParser *x, FILE *fp) { | |
- memset(x, 0, sizeof(XMLParser)); | |
- x->fp = fp; | |
+int | |
+xml_codepointtoutf8(uint32_t cp, uint32_t *utf) | |
+{ | |
+ if (cp >= 0x10000) { | |
+ /* 4 bytes */ | |
+ *utf = 0xf0808080 | ((cp & 0xfc0000) << 6) | | |
+ ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) | | |
+ (cp & 0x3f); | |
+ return 4; | |
+ } else if (cp >= 0x00800) { | |
+ /* 3 bytes */ | |
+ *utf = 0xe08080 | | |
+ ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) | | |
+ (cp & 0x3f); | |
+ return 3; | |
+ } else if (cp >= 0x80) { | |
+ /* 2 bytes */ | |
+ *utf = 0xc080 | | |
+ ((cp & 0xfc0) << 2) | (cp & 0x3f); | |
+ return 2; | |
+ } | |
+ *utf = cp & 0xff; | |
+ return *utf ? 1 : 0; /* 1 byte */ | |
+} | |
+ | |
+ssize_t | |
+xml_namedentitytostr(const char *e, char *buf, size_t bufsiz) | |
+{ | |
+ static const struct { | |
+ char *entity; | |
+ int c; | |
+ } entities[] = { | |
+ { .entity = "&", .c = '&' }, | |
+ { .entity = "<", .c = '<' }, | |
+ { .entity = ">", .c = '>' }, | |
+ { .entity = "'", .c = '\'' }, | |
+ { .entity = """, .c = '"' }, | |
+ { .entity = "&", .c = '&' }, | |
+ { .entity = "<", .c = '<' }, | |
+ { .entity = ">", .c = '>' }, | |
+ { .entity = "&APOS;", .c = '\'' }, | |
+ { .entity = """, .c = '"' } | |
+ }; | |
+ size_t i; | |
+ | |
+ /* buffer is too small */ | |
+ if (bufsiz < 2) | |
+ return -1; | |
+ | |
+ /* doesn't start with &: can't match */ | |
+ if (*e != '&') | |
+ return 0; | |
+ | |
+ for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) { | |
+ if (!strcmp(e, entities[i].entity)) { | |
+ buf[0] = entities[i].c; | |
+ buf[1] = '\0'; | |
+ return 1; | |
+ } | |
+ } | |
+ return 0; | |
+} | |
+ | |
+ssize_t | |
+xml_numericentitytostr(const char *e, char *buf, size_t bufsiz) | |
+{ | |
+ uint32_t l = 0, cp = 0; | |
+ size_t b, len; | |
+ char *end; | |
+ | |
+ /* buffer is too small */ | |
+ if (bufsiz < 5) | |
+ return -1; | |
+ | |
+ /* not a numeric entity */ | |
+ if (e[0] != '&' || e[1] != '#') | |
+ return 0; | |
+ | |
+ /* e[1] == '#', numeric / hexadecimal entity */ | |
+ e += 2; /* skip "&#" */ | |
+ errno = 0; | |
+ /* hex (16) or decimal (10) */ | |
+ if (*e == 'x') | |
+ l = strtoul(e + 1, &end, 16); | |
+ else | |
+ l = strtoul(e, &end, 10); | |
+ /* invalid value or not a well-formed entity */ | |
+ if (errno || *end != ';') | |
+ return 0; | |
+ len = xml_codepointtoutf8(l, &cp); | |
+ /* make string */ | |
+ for (b = 0; b < len; b++) | |
+ buf[b] = (cp >> (8 * (len - 1 - b))) & 0xff; | |
+ buf[len] = '\0'; | |
+ | |
+ return (ssize_t)len; | |
+} | |
+ | |
+/* convert named- or numeric entity string to buffer string | |
+ * returns byte-length of string. */ | |
+ssize_t | |
+xml_entitytostr(const char *e, char *buf, size_t bufsiz) | |
+{ | |
+ /* buffer is too small */ | |
+ if (bufsiz < 5) | |
+ return -1; | |
+ /* doesn't start with & */ | |
+ if (e[0] != '&') | |
+ return 0; | |
+ /* named entity */ | |
+ if (e[1] != '#') | |
+ return xml_namedentitytostr(e, buf, bufsiz); | |
+ else /* numeric entity */ | |
+ return xml_numericentitytostr(e, buf, bufsiz); | |
} | |
void | |
-xmlparser_parse(XMLParser *x) { | |
+xml_parse(XMLParser *x) | |
+{ | |
int c, ispi; | |
size_t datalen, tagdatalen, taglen; | |
- while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < … | |
+ if (!x->getnext) | |
+ return; | |
+ while ((c = x->getnext()) != EOF && c != '<') | |
+ ; /* skip until < */ | |
- while(c != EOF) { | |
- if(c == '<') { /* parse tag */ | |
- if((c = xmlparser_getnext(x)) == EOF) | |
+ while (c != EOF) { | |
+ if (c == '<') { /* parse tag */ | |
+ if ((c = x->getnext()) == EOF) | |
return; | |
x->tag[0] = '\0'; | |
x->taglen = 0; | |
- if(c == '!') { /* cdata and comments */ | |
- for(tagdatalen = 0; (c = xmlparser_getnext(x))… | |
- if(tagdatalen <= strlen("[CDATA[")) /*… | |
+ if (c == '!') { /* cdata and comments */ | |
+ for (tagdatalen = 0; (c = x->getnext()) != EOF… | |
+ if (tagdatalen <= sizeof("[CDATA[") - … | |
x->data[tagdatalen++] = c; /* … | |
- if(c == '>') | |
+ if (c == '>') | |
break; | |
- else if(c == '-' && tagdatalen == strl… | |
- (x->data[0] == '-')) {… | |
- xmlparser_parsecomment(x); | |
+ else if (c == '-' && tagdatalen == siz… | |
+ (x->data[0] == '-')) { | |
+ xml_parsecomment(x); | |
break; | |
- } else if(c == '[') { | |
- if(tagdatalen == strlen("[CDAT… | |
- x->data[1] == 'C' && x… | |
- x->data[3] == 'A' && x… | |
- x->data[5] == 'A' && x… | |
- xmlparser_parsecdata(x… | |
+ } else if (c == '[') { | |
+ if (tagdatalen == sizeof("[CDA… | |
+ !strncmp(x->data, "[CDATA[… | |
+ xml_parsecdata(x); | |
break; | |
- #if 0 | |
- } else { | |
- /* TODO ? */ | |
- /* markup declaration … | |
- while((c = xmlparser_g… | |
- #endif | |
} | |
} | |
} | |
- } else { /* normal tag (open, short open, close), proc… | |
- if(isspace(c)) | |
- while((c = xmlparser_getnext(x)) != EO… | |
- if(c == EOF) | |
+ } else { | |
+ /* normal tag (open, short open, close), proce… | |
+ if (isspace(c)) | |
+ while ((c = x->getnext()) != EOF && is… | |
+ ; | |
+ if (c == EOF) | |
return; | |
x->tag[0] = c; | |
ispi = (c == '?') ? 1 : 0; | |
x->isshorttag = ispi; | |
taglen = 1; | |
- while((c = xmlparser_getnext(x)) != EOF) { | |
- if(c == '/') /* TODO: simplify short t… | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (c == '/') /* TODO: simplify short … | |
x->isshorttag = 1; /* short ta… | |
- else if(c == '>' || isspace(c)) { | |
+ else if (c == '>' || isspace(c)) { | |
x->tag[taglen] = '\0'; | |
- if(x->tag[0] == '/') { /* end … | |
+ if (x->tag[0] == '/') { /* end… | |
x->taglen = --taglen; … | |
- if(taglen && x->xmltag… | |
+ if (taglen && x->xmlta… | |
x->xmltagend(x… | |
} else { | |
x->taglen = taglen; | |
- if(x->xmltagstart) | |
- x->xmltagstart… | |
- if(isspace(c)) | |
- xmlparser_pars… | |
- if(x->xmltagstartparse… | |
+ /* start tag */ | |
+ if (x->xmltagstart) | |
+ x->xmltagstart… | |
+ if (isspace(c)) | |
+ xml_parseattrs… | |
+ if (x->xmltagstartpars… | |
x->xmltagstart… | |
} | |
- if((x->isshorttag || ispi) && … | |
+ /* call tagend for shortform o… | |
+ if ((x->isshorttag || ispi) &&… | |
x->xmltagend(x, x->tag… | |
break; | |
- } else if(taglen < sizeof(x->tag) - 1) | |
+ } else if (taglen < sizeof(x->tag) - 1) | |
x->tag[taglen++] = c; | |
} | |
} | |
} else { | |
- /* parse data */ | |
+ /* parse tag data */ | |
datalen = 0; | |
- if(x->xmldatastart) | |
+ if (x->xmldatastart) | |
x->xmldatastart(x); | |
- while((c = xmlparser_getnext(x)) != EOF) { | |
- if(c == '&' && x->xmldataentity) { | |
- if(datalen) { | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (c == '&') { | |
+ if (datalen) { | |
x->data[datalen] = '\0'; | |
- x->xmldata(x, x->data, datalen… | |
+ if (x->xmldata) | |
+ x->xmldata(x, x->data,… | |
} | |
x->data[0] = c; | |
datalen = 1; | |
- while((c = xmlparser_getnext(x)) != EO… | |
- if(c == '<') | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (c == '<') | |
break; | |
- if(datalen < sizeof(x->data) -… | |
+ if (datalen < sizeof(x->data) … | |
x->data[datalen++] = c; | |
- if(isspace(c)) | |
+ if (isspace(c)) | |
break; | |
- else if(c == ';') { | |
+ else if (c == ';') { | |
x->data[datalen] = '\0… | |
- x->xmldataentity(x, x-… | |
+ if (x->xmldataentity) | |
+ x->xmldataenti… | |
datalen = 0; | |
break; | |
} | |
} | |
- } else if(c != '<') { | |
- if(datalen < sizeof(x->data) - 1) { | |
+ } else if (c != '<') { | |
+ if (datalen < sizeof(x->data) - 1) { | |
x->data[datalen++] = c; | |
} else { | |
x->data[datalen] = '\0'; | |
- if(x->xmldata) | |
+ if (x->xmldata) | |
x->xmldata(x, x->data,… | |
x->data[0] = c; | |
datalen = 1; | |
} | |
} | |
- if(c == '<') { | |
+ if (c == '<') { | |
x->data[datalen] = '\0'; | |
- if(x->xmldata && datalen) | |
+ if (x->xmldata && datalen) | |
x->xmldata(x, x->data, datalen… | |
- if(x->xmldataend) | |
+ if (x->xmldataend) | |
x->xmldataend(x); | |
break; | |
} | |
diff --git a/xml.h b/xml.h | |
@@ -1,49 +1,44 @@ | |
-#include <stdio.h> | |
-#include <string.h> | |
-#include <stdlib.h> | |
- | |
typedef struct xmlparser { | |
/* handlers */ | |
- void (*xmltagstart)(struct xmlparser *p, const char *tag, size_t tagle… | |
- void (*xmltagstartparsed)(struct xmlparser *p, const char *tag, | |
- size_t taglen, int isshort); | |
- void (*xmltagend)(struct xmlparser *p, const char *tag, size_t taglen, | |
- int isshort); | |
- void (*xmldatastart)(struct xmlparser *p); | |
- void (*xmldata)(struct xmlparser *p, const char *data, size_t datalen); | |
- void (*xmldataend)(struct xmlparser *p); | |
- void (*xmldataentity)(struct xmlparser *p, const char *data, | |
- size_t datalen); | |
- void (*xmlattrstart)(struct xmlparser *p, const char *tag, size_t tagl… | |
- const char *name, size_t namelen); | |
- void (*xmlattr)(struct xmlparser *p, const char *tag, size_t taglen, | |
- const char *name, size_t namelen, const char *value, | |
- size_t valuelen); | |
- void (*xmlattrend)(struct xmlparser *p, const char *tag, size_t taglen, | |
- const char *name, size_t namelen); | |
- void (*xmlattrentity)(struct xmlparser *p, const char *tag, size_t tag… | |
- const char *name, size_t namelen, const char *value, | |
- size_t valuelen); | |
- void (*xmlcdatastart)(struct xmlparser *p); | |
- void (*xmlcdata)(struct xmlparser *p, const char *data, size_t datalen… | |
- void (*xmlcdataend)(struct xmlparser *p); | |
- void (*xmlcommentstart)(struct xmlparser *p); | |
- void (*xmlcomment)(struct xmlparser *p, const char *comment, | |
- size_t commentlen); | |
- void (*xmlcommentend)(struct xmlparser *p); | |
+ void (*xmlattr)(struct xmlparser *, const char *, size_t, | |
+ const char *, size_t, const char *, size_t); | |
+ void (*xmlattrend)(struct xmlparser *, const char *, size_t, | |
+ const char *, size_t); | |
+ void (*xmlattrstart)(struct xmlparser *, const char *, size_t, | |
+ const char *, size_t); | |
+ void (*xmlattrentity)(struct xmlparser *, const char *, size_t, | |
+ const char *, size_t, const char *, size_t); | |
+ void (*xmlcdatastart)(struct xmlparser *); | |
+ void (*xmlcdata)(struct xmlparser *, const char *, size_t); | |
+ void (*xmlcdataend)(struct xmlparser *); | |
+ void (*xmlcommentstart)(struct xmlparser *); | |
+ void (*xmlcomment)(struct xmlparser *, const char *, size_t); | |
+ void (*xmlcommentend)(struct xmlparser *); | |
+ void (*xmldata)(struct xmlparser *, const char *, size_t); | |
+ void (*xmldataend)(struct xmlparser *); | |
+ void (*xmldataentity)(struct xmlparser *, const char *, size_t); | |
+ void (*xmldatastart)(struct xmlparser *); | |
+ void (*xmltagend)(struct xmlparser *, const char *, size_t, int); | |
+ void (*xmltagstart)(struct xmlparser *, const char *, size_t); | |
+ void (*xmltagstartparsed)(struct xmlparser *, const char *, | |
+ size_t, int); | |
- FILE *fp; /* file stream to read from */ | |
+ int (*getnext)(void); | |
- /* private; internal state */ | |
- char tag[1024]; /* current tag */ | |
- int isshorttag; /* current tag is in short form ? */ | |
+ /* current tag */ | |
+ char tag[1024]; | |
size_t taglen; | |
- char name[256]; /* current attribute name */ | |
- char data[BUFSIZ]; /* data buffer used for tag and attribute data */ | |
- size_t readoffset; | |
- size_t readlastbytes; | |
- unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnex… | |
+ /* current tag is in short form ? <tag /> */ | |
+ int isshorttag; | |
+ /* current attribute name */ | |
+ char name[256]; | |
+ /* data buffer used for tag data, cdata and attribute data */ | |
+ char data[BUFSIZ]; | |
} XMLParser; | |
-void xmlparser_init(XMLParser *x, FILE *fp); | |
-void xmlparser_parse(XMLParser *x); | |
+int xml_codepointtoutf8(uint32_t, uint32_t *); | |
+ssize_t xml_entitytostr(const char *, char *, size_t); | |
+ssize_t xml_namedentitytostr(const char *, char *, size_t); | |
+ssize_t xml_numericentitytostr(const char *, char *, size_t); | |
+ | |
+void xml_parse(XMLParser *); |