Introduction
Introduction Statistics Contact Development Disclaimer Help
sync XML improvements (from sfeed) - xmlparser - XML parser
git clone git://git.codemadness.org/xmlparser
Log
Files
Refs
README
LICENSE
---
commit 908a3c3d0c612673b32c2714d9f46bc723c7a38b
parent b2078dbb866bea46507ebb9d3d4c12c93c4f39f8
Author: Hiltjo Posthuma <[email protected]>
Date: Sun, 16 Jun 2019 22:19:31 +0200
sync XML improvements (from sfeed)
Diffstat:
M README | 10 +++++++++-
M skeleton.c | 2 ++
M xml.c | 125 ++++++++++++++---------------…
M xml.h | 7 +++++++
4 files changed, 76 insertions(+), 68 deletions(-)
---
diff --git a/README b/README
@@ -5,7 +5,7 @@ XML parser
Dependencies
------------
-- C compiler (C99 expected).
+- C compiler (C99).
Features
@@ -36,6 +36,8 @@ Caveats
-------
- It is not a compliant XML parser.
+- Performance: data is buffered even if a handler is not set: to make parsing
+ faster change this code from xml.c.
- The XML is not checked for errors so it will continue parsing XML data, this
is by design.
- Internally fixed-size buffers are used, callbacks like XMLParser.xmldata are
@@ -59,6 +61,12 @@ Interface / API
Should be trivial, see xml.c and xml.h and the examples below.
+Examples
+--------
+
+See skeleton.c for a base program to start quickly.
+
+
License
-------
diff --git a/skeleton.c b/skeleton.c
@@ -114,7 +114,9 @@ main(void)
x.xmltagstart = xmltagstart;
x.xmltagstartparsed = xmltagstartparsed;
+#ifndef GETNEXT
x.getnext = getchar;
+#endif
xml_parse(&x);
diff --git a/xml.c b/xml.c
@@ -15,7 +15,7 @@ xml_parseattrs(XMLParser *x)
size_t namelen = 0, valuelen;
int c, endsep, endname = 0, valuestart = 0;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (isspace(c)) {
if (namelen)
endname = 1;
@@ -51,7 +51,7 @@ xml_parseattrs(XMLParser *x)
goto startvalue;
}
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
startvalue:
if (c == '&') { /* entities */
x->data[valuelen] = '\0';
@@ -60,7 +60,7 @@ startvalue:
x->xmlattr(x, x->tag, x->tagle…
x->data[0] = c;
valuelen = 1;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == endsep || (endsep == …
break;
if (valuelen < sizeof(x->data)…
@@ -124,9 +124,9 @@ xml_parsecomment(XMLParser *x)
if (x->xmlcommentstart)
x->xmlcommentstart(x);
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == '-' || c == '>') {
- if (x->xmlcomment) {
+ if (x->xmlcomment && datalen) {
x->data[datalen] = '\0';
x->xmlcomment(x, x->data, datalen);
datalen = 0;
@@ -173,9 +173,9 @@ xml_parsecdata(XMLParser *x)
if (x->xmlcdatastart)
x->xmlcdatastart(x);
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == ']' || c == '>') {
- if (x->xmlcdata) {
+ if (x->xmlcdata && datalen) {
x->data[datalen] = '\0';
x->xmlcdata(x, x->data, datalen);
datalen = 0;
@@ -247,19 +247,19 @@ static int
namedentitytostr(const char *e, char *buf, size_t bufsiz)
{
static const struct {
- char *entity;
+ const char *entity;
int c;
} entities[] = {
- { "&amp;", '&' },
- { "&lt;", '<' },
- { "&gt;", '>' },
- { "&apos;", '\'' },
- { "&quot;", '"' },
- { "&AMP;", '&' },
- { "&LT;", '<' },
- { "&GT;", '>' },
- { "&APOS;", '\'' },
- { "&QUOT;", '"' }
+ { "amp;", '&' },
+ { "lt;", '<' },
+ { "gt;", '>' },
+ { "apos;", '\'' },
+ { "quot;", '"' },
+ { "AMP;", '&' },
+ { "LT;", '<' },
+ { "GT;", '>' },
+ { "APOS;", '\'' },
+ { "QUOT;", '"' }
};
size_t i;
@@ -267,10 +267,6 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz)
if (bufsiz < 2)
return -1;
- /* doesn't start with &: can't match */
- if (*e != '&')
- return 0;
-
for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
if (!strcmp(e, entities[i].entity)) {
buf[0] = entities[i].c;
@@ -292,12 +288,6 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
if (bufsiz < 5)
return -1;
- /* not a numeric entity */
- if (e[0] != '&' || e[1] != '#')
- return 0;
-
- /* e[1] == '#', numeric / hexadecimal entity */
- e += 2; /* skip "&#" */
errno = 0;
/* hex (16) or decimal (10) */
if (*e == 'x')
@@ -318,37 +308,32 @@ numericentitytostr(const char *e, char *buf, size_t bufsi…
int
xml_entitytostr(const char *e, char *buf, size_t bufsiz)
{
- /* buffer is too small */
- if (bufsiz < 5)
- return -1;
/* doesn't start with & */
if (e[0] != '&')
return 0;
- /* named entity */
- if (e[1] != '#')
- return namedentitytostr(e, buf, bufsiz);
- else /* numeric entity */
- return numericentitytostr(e, buf, bufsiz);
+ /* numeric entity */
+ if (e[1] == '#')
+ return numericentitytostr(e + 2, buf, bufsiz);
+ else /* named entity */
+ return namedentitytostr(e + 1, buf, bufsiz);
}
void
xml_parse(XMLParser *x)
{
- int c, ispi;
- size_t datalen, tagdatalen, taglen;
+ size_t datalen, tagdatalen;
+ int c, isend;
- if (!x->getnext)
- return;
- while ((c = x->getnext()) != EOF && c != '<')
+ while ((c = GETNEXT()) != EOF && c != '<')
; /* skip until < */
while (c != EOF) {
if (c == '<') { /* parse tag */
- if ((c = x->getnext()) == EOF)
+ if ((c = GETNEXT()) == EOF)
return;
if (c == '!') { /* cdata and comments */
- for (tagdatalen = 0; (c = x->getnext()) != EOF…
+ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
/* NOTE: sizeof(x->data) must be atlea…
if (tagdatalen <= sizeof("[CDATA[") - …
x->data[tagdatalen++] = c;
@@ -367,30 +352,32 @@ xml_parse(XMLParser *x)
}
}
} else {
- x->tag[0] = '\0';
- x->taglen = 0;
-
/* normal tag (open, short open, close), proce…
- if (isspace(c))
- while ((c = x->getnext()) != EOF && is…
- ;
- if (c == EOF)
- return;
x->tag[0] = c;
- ispi = (c == '?') ? 1 : 0;
- x->isshorttag = ispi;
- taglen = 1;
- while ((c = x->getnext()) != EOF) {
+ x->taglen = 1;
+ x->isshorttag = isend = 0;
+
+ /* treat processing instruction as shorttag, d…
+ if (c == '?') {
+ x->isshorttag = 1;
+ } else if (c == '/') {
+ if ((c = GETNEXT()) == EOF)
+ return;
+ x->tag[0] = c;
+ isend = 1;
+ }
+
+ while ((c = GETNEXT()) != EOF) {
if (c == '/')
x->isshorttag = 1; /* short ta…
else if (c == '>' || isspace(c)) {
- x->tag[taglen] = '\0';
- if (x->tag[0] == '/') { /* end…
- x->taglen = --taglen; …
- if (taglen && x->xmlta…
- x->xmltagend(x…
+ x->tag[x->taglen] = '\0';
+ if (isend) { /* end tag, start…
+ if (x->xmltagend)
+ x->xmltagend(x…
+ x->tag[0] = '\0';
+ x->taglen = 0;
} else {
- x->taglen = taglen;
/* start tag */
if (x->xmltagstart)
x->xmltagstart…
@@ -400,11 +387,15 @@ xml_parse(XMLParser *x)
x->xmltagstart…
}
/* call tagend for shortform o…
- if ((x->isshorttag || ispi) &&…
- x->xmltagend(x, x->tag…
+ if (x->isshorttag) {
+ if (x->xmltagend)
+ x->xmltagend(x…
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ }
break;
- } else if (taglen < sizeof(x->tag) - 1)
- x->tag[taglen++] = c; /* NOTE:…
+ } else if (x->taglen < sizeof(x->tag) …
+ x->tag[x->taglen++] = c; /* NO…
}
}
} else {
@@ -412,7 +403,7 @@ xml_parse(XMLParser *x)
datalen = 0;
if (x->xmldatastart)
x->xmldatastart(x);
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == '&') {
if (datalen) {
x->data[datalen] = '\0';
@@ -421,7 +412,7 @@ xml_parse(XMLParser *x)
}
x->data[0] = c;
datalen = 1;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == '<')
break;
if (datalen < sizeof(x->data) …
diff --git a/xml.h b/xml.h
@@ -1,3 +1,6 @@
+#ifndef _XML_H
+#define _XML_H
+
typedef struct xmlparser {
/* handlers */
void (*xmlattr)(struct xmlparser *, const char *, size_t,
@@ -23,7 +26,10 @@ typedef struct xmlparser {
void (*xmltagstartparsed)(struct xmlparser *, const char *,
size_t, int);
+#ifndef GETNEXT
+ #define GETNEXT (x)->getnext
int (*getnext)(void);
+#endif
/* current tag */
char tag[1024];
@@ -38,3 +44,4 @@ typedef struct xmlparser {
int xml_entitytostr(const char *, char *, size_t);
void xml_parse(XMLParser *);
+#endif
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.