sync XML improvements - tscrape - twitter scraper | |
git clone git://git.codemadness.org/tscrape | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 2872a29d4f44afbfa4f439ba1f3d84c22114b0d4 | |
parent bd299de160e8f56d6f88538d9d4d4ded4775038d | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Mon, 22 Apr 2019 14:46:30 +0200 | |
sync XML improvements | |
Diffstat: | |
M xml.c | 26 ++++++++++++-------------- | |
M xml.h | 7 +++++++ | |
2 files changed, 19 insertions(+), 14 deletions(-) | |
--- | |
diff --git a/xml.c b/xml.c | |
@@ -15,7 +15,7 @@ xml_parseattrs(XMLParser *x) | |
size_t namelen = 0, valuelen; | |
int c, endsep, endname = 0, valuestart = 0; | |
- while ((c = x->getnext()) != EOF) { | |
+ while ((c = GETNEXT()) != EOF) { | |
if (isspace(c)) { | |
if (namelen) | |
endname = 1; | |
@@ -51,7 +51,7 @@ xml_parseattrs(XMLParser *x) | |
goto startvalue; | |
} | |
- while ((c = x->getnext()) != EOF) { | |
+ while ((c = GETNEXT()) != EOF) { | |
startvalue: | |
if (c == '&') { /* entities */ | |
x->data[valuelen] = '\0'; | |
@@ -60,7 +60,7 @@ startvalue: | |
x->xmlattr(x, x->tag, x->tagle… | |
x->data[0] = c; | |
valuelen = 1; | |
- while ((c = x->getnext()) != EOF) { | |
+ while ((c = GETNEXT()) != EOF) { | |
if (c == endsep || (endsep == … | |
break; | |
if (valuelen < sizeof(x->data)… | |
@@ -124,7 +124,7 @@ xml_parsecomment(XMLParser *x) | |
if (x->xmlcommentstart) | |
x->xmlcommentstart(x); | |
- while ((c = x->getnext()) != EOF) { | |
+ while ((c = GETNEXT()) != EOF) { | |
if (c == '-' || c == '>') { | |
if (x->xmlcomment) { | |
x->data[datalen] = '\0'; | |
@@ -173,7 +173,7 @@ xml_parsecdata(XMLParser *x) | |
if (x->xmlcdatastart) | |
x->xmlcdatastart(x); | |
- while ((c = x->getnext()) != EOF) { | |
+ while ((c = GETNEXT()) != EOF) { | |
if (c == ']' || c == '>') { | |
if (x->xmlcdata) { | |
x->data[datalen] = '\0'; | |
@@ -324,18 +324,16 @@ xml_parse(XMLParser *x) | |
size_t datalen, tagdatalen; | |
int c, isend; | |
- if (!x->getnext) | |
- return; | |
- while ((c = x->getnext()) != EOF && c != '<') | |
+ while ((c = GETNEXT()) != EOF && c != '<') | |
; /* skip until < */ | |
while (c != EOF) { | |
if (c == '<') { /* parse tag */ | |
- if ((c = x->getnext()) == EOF) | |
+ if ((c = GETNEXT()) == EOF) | |
return; | |
if (c == '!') { /* cdata and comments */ | |
- for (tagdatalen = 0; (c = x->getnext()) != EOF… | |
+ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) { | |
/* NOTE: sizeof(x->data) must be atlea… | |
if (tagdatalen <= sizeof("[CDATA[") - … | |
x->data[tagdatalen++] = c; | |
@@ -363,13 +361,13 @@ xml_parse(XMLParser *x) | |
if (c == '?') { | |
x->isshorttag = 1; | |
} else if (c == '/') { | |
- if ((c = x->getnext()) == EOF) | |
+ if ((c = GETNEXT()) == EOF) | |
return; | |
x->tag[0] = c; | |
isend = 1; | |
} | |
- while ((c = x->getnext()) != EOF) { | |
+ while ((c = GETNEXT()) != EOF) { | |
if (c == '/') | |
x->isshorttag = 1; /* short ta… | |
else if (c == '>' || isspace(c)) { | |
@@ -405,7 +403,7 @@ xml_parse(XMLParser *x) | |
datalen = 0; | |
if (x->xmldatastart) | |
x->xmldatastart(x); | |
- while ((c = x->getnext()) != EOF) { | |
+ while ((c = GETNEXT()) != EOF) { | |
if (c == '&') { | |
if (datalen) { | |
x->data[datalen] = '\0'; | |
@@ -414,7 +412,7 @@ xml_parse(XMLParser *x) | |
} | |
x->data[0] = c; | |
datalen = 1; | |
- while ((c = x->getnext()) != EOF) { | |
+ while ((c = GETNEXT()) != EOF) { | |
if (c == '<') | |
break; | |
if (datalen < sizeof(x->data) … | |
diff --git a/xml.h b/xml.h | |
@@ -1,3 +1,6 @@ | |
+#ifndef _XML_H | |
+#define _XML_H | |
+ | |
typedef struct xmlparser { | |
/* handlers */ | |
void (*xmlattr)(struct xmlparser *, const char *, size_t, | |
@@ -23,7 +26,10 @@ typedef struct xmlparser { | |
void (*xmltagstartparsed)(struct xmlparser *, const char *, | |
size_t, int); | |
+#ifndef GETNEXT | |
+ #define GETNEXT (x)->getnext | |
int (*getnext)(void); | |
+#endif | |
/* current tag */ | |
char tag[1024]; | |
@@ -38,3 +44,4 @@ typedef struct xmlparser { | |
int xml_entitytostr(const char *, char *, size_t); | |
void xml_parse(XMLParser *); | |
+#endif |