Introduction
Introduction Statistics Contact Development Disclaimer Help
XML tag parse improvements for PI and end tags - tscrape - twitter scraper
git clone git://git.codemadness.org/tscrape
Log
Files
Refs
README
LICENSE
---
commit 0fac9621c44b76c38d911438b1966d665e3b8134
parent 24fad792de3bab17f1cf485450435761fb3b8657
Author: Hiltjo Posthuma <[email protected]>
Date: Mon, 17 Dec 2018 18:25:08 +0100
XML tag parse improvements for PI and end tags
- Stricter parsing of tags, no whitespace stripping after <.
- For end tags the "internal" context x->tag would be "/sometag". Make sure
this matches exactly with the parameter tag.
- Reset tagname after parsing an end tag.
- Make end tag handling more consistent.
- Remove temporary variable taglen.
Diffstat:
M xml.c | 52 +++++++++++++++++------------…
1 file changed, 29 insertions(+), 23 deletions(-)
---
diff --git a/xml.c b/xml.c
@@ -334,8 +334,8 @@ xml_entitytostr(const char *e, char *buf, size_t bufsiz)
void
xml_parse(XMLParser *x)
{
- int c, ispi;
- size_t datalen, tagdatalen, taglen;
+ size_t datalen, tagdatalen;
+ int c, isend;
if (!x->getnext)
return;
@@ -367,30 +367,32 @@ xml_parse(XMLParser *x)
}
}
} else {
- x->tag[0] = '\0';
- x->taglen = 0;
-
/* normal tag (open, short open, close), proce…
- if (isspace(c))
- while ((c = x->getnext()) != EOF && is…
- ;
- if (c == EOF)
- return;
x->tag[0] = c;
- ispi = (c == '?') ? 1 : 0;
- x->isshorttag = ispi;
- taglen = 1;
+ x->taglen = 1;
+ x->isshorttag = isend = 0;
+
+ /* treat processing instruction as shorttag, d…
+ if (c == '?') {
+ x->isshorttag = 1;
+ } else if (c == '/') {
+ if ((c = x->getnext()) == EOF)
+ return;
+ x->tag[0] = c;
+ isend = 1;
+ }
+
while ((c = x->getnext()) != EOF) {
if (c == '/')
x->isshorttag = 1; /* short ta…
else if (c == '>' || isspace(c)) {
- x->tag[taglen] = '\0';
- if (x->tag[0] == '/') { /* end…
- x->taglen = --taglen; …
- if (taglen && x->xmlta…
- x->xmltagend(x…
+ x->tag[x->taglen] = '\0';
+ if (isend) { /* end tag, start…
+ if (x->xmltagend)
+ x->xmltagend(x…
+ x->tag[0] = '\0';
+ x->taglen = 0;
} else {
- x->taglen = taglen;
/* start tag */
if (x->xmltagstart)
x->xmltagstart…
@@ -400,11 +402,15 @@ xml_parse(XMLParser *x)
x->xmltagstart…
}
/* call tagend for shortform o…
- if ((x->isshorttag || ispi) &&…
- x->xmltagend(x, x->tag…
+ if (x->isshorttag) {
+ if (x->xmltagend)
+ x->xmltagend(x…
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ }
break;
- } else if (taglen < sizeof(x->tag) - 1)
- x->tag[taglen++] = c; /* NOTE:…
+ } else if (x->taglen < sizeof(x->tag) …
+ x->tag[x->taglen++] = c; /* NO…
}
}
} else {
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.