remove unneeded code, handle - grabtitle - stupid HTML title grabber | |
git clone git://git.codemadness.org/grabtitle | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 16cc59c155068e6de1fd5cfa8720d6d765db6548 | |
parent 16dfed456fd96d1c483eb515594019d7a5febc86 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sun, 22 Sep 2019 19:50:21 +0200 | |
remove unneeded code, handle | |
Diffstat: | |
M xml.c | 136 ++++-------------------------… | |
M xml.h | 19 ------------------- | |
2 files changed, 18 insertions(+), 137 deletions(-) | |
--- | |
diff --git a/xml.c b/xml.c | |
@@ -12,7 +12,7 @@ | |
static void | |
xml_parseattrs(XMLParser *x) | |
{ | |
- size_t namelen = 0, valuelen; | |
+ size_t namelen = 0; | |
int c, endsep, endname = 0, valuestart = 0; | |
while ((c = x->getnext()) != EOF) { | |
@@ -23,94 +23,33 @@ xml_parseattrs(XMLParser *x) | |
} else if (c == '?') | |
; /* ignore */ | |
else if (c == '=') { | |
- x->name[namelen] = '\0'; | |
valuestart = 1; | |
endname = 1; | |
} else if (namelen && ((endname && !valuestart && isalpha(c)) … | |
- /* attribute without value */ | |
- x->name[namelen] = '\0'; | |
- if (x->xmlattrstart) | |
- x->xmlattrstart(x, x->tag, x->taglen, x->name,… | |
- if (x->xmlattr) | |
- x->xmlattr(x, x->tag, x->taglen, x->name, name… | |
- if (x->xmlattrend) | |
- x->xmlattrend(x, x->tag, x->taglen, x->name, n… | |
endname = 0; | |
- x->name[0] = c; | |
namelen = 1; | |
} else if (namelen && valuestart) { | |
/* attribute with value */ | |
- if (x->xmlattrstart) | |
- x->xmlattrstart(x, x->tag, x->taglen, x->name,… | |
- | |
- valuelen = 0; | |
if (c == '\'' || c == '"') { | |
endsep = c; | |
- } else { | |
- endsep = ' '; /* isspace() */ | |
- goto startvalue; | |
- } | |
- | |
- while ((c = x->getnext()) != EOF) { | |
-startvalue: | |
- if (c == '&') { /* entities */ | |
- x->data[valuelen] = '\0'; | |
- /* call data function with data before… | |
- if (valuelen && x->xmlattr) | |
- x->xmlattr(x, x->tag, x->tagle… | |
- x->data[0] = c; | |
- valuelen = 1; | |
- while ((c = x->getnext()) != EOF) { | |
- if (c == endsep || (endsep == … | |
- break; | |
- if (valuelen < sizeof(x->data)… | |
- x->data[valuelen++] = … | |
- else { | |
- /* entity too long for… | |
- x->data[valuelen] = '\… | |
- if (x->xmlattr) | |
- x->xmlattr(x, … | |
- x->data[0] = c; | |
- valuelen = 1; | |
- break; | |
- } | |
- if (c == ';') { | |
- x->data[valuelen] = '\… | |
- if (x->xmlattrentity) | |
- x->xmlattrenti… | |
- valuelen = 0; | |
- break; | |
- } | |
- } | |
- } else if (c != endsep && !(endsep == ' ' && (… | |
- if (valuelen < sizeof(x->data) - 1) { | |
- x->data[valuelen++] = c; | |
- } else { | |
- x->data[valuelen] = '\0'; | |
- if (x->xmlattr) | |
- x->xmlattr(x, x->tag, … | |
- x->data[0] = c; | |
- valuelen = 1; | |
- } | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (c == endsep) | |
+ break; | |
} | |
- if (c == endsep || (endsep == ' ' && (c == '>'… | |
- x->data[valuelen] = '\0'; | |
- if (x->xmlattr) | |
- x->xmlattr(x, x->tag, x->tagle… | |
- if (x->xmlattrend) | |
- x->xmlattrend(x, x->tag, x->ta… | |
- break; | |
+ } else { | |
+ while ((c = x->getnext()) != EOF) { | |
+ if (c == '>' || isspace(c)) | |
+ break; | |
} | |
} | |
namelen = endname = valuestart = 0; | |
- } else if (namelen < sizeof(x->name) - 1) { | |
- x->name[namelen++] = c; | |
+ } else { | |
+ namelen = 1; | |
} | |
if (c == '>') { | |
break; | |
} else if (c == '/') { | |
x->isshorttag = 1; | |
- x->name[0] = '\0'; | |
namelen = 0; | |
} | |
} | |
@@ -119,48 +58,17 @@ startvalue: | |
static void | |
xml_parsecomment(XMLParser *x) | |
{ | |
- size_t datalen = 0, i = 0; | |
+ size_t i = 0; | |
int c; | |
- if (x->xmlcommentstart) | |
- x->xmlcommentstart(x); | |
while ((c = x->getnext()) != EOF) { | |
- if (c == '-' || c == '>') { | |
- if (x->xmlcomment) { | |
- x->data[datalen] = '\0'; | |
- x->xmlcomment(x, x->data, datalen); | |
- datalen = 0; | |
- } | |
- } | |
- | |
if (c == '-') { | |
- if (++i > 2) { | |
- if (x->xmlcomment) | |
- for (; i > 2; i--) | |
- x->xmlcomment(x, "-", 1); | |
- i = 2; | |
- } | |
- continue; | |
+ if (i < 2) | |
+ i++; | |
} else if (c == '>' && i == 2) { | |
- if (x->xmlcommentend) | |
- x->xmlcommentend(x); | |
return; | |
- } else if (i) { | |
- if (x->xmlcomment) { | |
- for (; i > 0; i--) | |
- x->xmlcomment(x, "-", 1); | |
- } | |
- i = 0; | |
- } | |
- | |
- if (datalen < sizeof(x->data) - 1) { | |
- x->data[datalen++] = c; | |
} else { | |
- x->data[datalen] = '\0'; | |
- if (x->xmlcomment) | |
- x->xmlcomment(x, x->data, datalen); | |
- x->data[0] = c; | |
- datalen = 1; | |
+ i = 0; | |
} | |
} | |
} | |
@@ -171,8 +79,6 @@ xml_parsecdata(XMLParser *x) | |
size_t datalen = 0, i = 0; | |
int c; | |
- if (x->xmlcdatastart) | |
- x->xmlcdatastart(x); | |
while ((c = x->getnext()) != EOF) { | |
if (c == ']' || c == '>') { | |
if (x->xmlcdata) { | |
@@ -191,10 +97,8 @@ xml_parsecdata(XMLParser *x) | |
} | |
continue; | |
} else if (c == '>' && i == 2) { | |
- if (x->xmlcdataend) | |
- x->xmlcdataend(x); | |
return; | |
- } else if (i) { | |
+ } else { | |
if (x->xmlcdata) | |
for (; i > 0; i--) | |
x->xmlcdata(x, "]", 1); | |
@@ -255,11 +159,13 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz) | |
{ ">", '>' }, | |
{ "'", '\'' }, | |
{ """, '"' }, | |
+ { " ", ' ' }, | |
{ "&", '&' }, | |
{ "<", '<' }, | |
{ ">", '>' }, | |
{ "&APOS;", '\'' }, | |
- { """, '"' } | |
+ { """, '"' }, | |
+ { "&NBSP;", ' ' }, | |
}; | |
size_t i; | |
@@ -398,8 +304,6 @@ xml_parse(XMLParser *x) | |
x->xmltagstart… | |
if (isspace(c)) | |
xml_parseattrs… | |
- if (x->xmltagstartpars… | |
- x->xmltagstart… | |
} | |
/* call tagend for shortform o… | |
if (x->isshorttag) { | |
@@ -416,8 +320,6 @@ xml_parse(XMLParser *x) | |
} else { | |
/* parse tag data */ | |
datalen = 0; | |
- if (x->xmldatastart) | |
- x->xmldatastart(x); | |
while ((c = x->getnext()) != EOF) { | |
if (c == '&') { | |
if (datalen) { | |
@@ -464,8 +366,6 @@ xml_parse(XMLParser *x) | |
x->data[datalen] = '\0'; | |
if (x->xmldata && datalen) | |
x->xmldata(x, x->data, datalen… | |
- if (x->xmldataend) | |
- x->xmldataend(x); | |
break; | |
} | |
} | |
diff --git a/xml.h b/xml.h | |
@@ -1,27 +1,10 @@ | |
typedef struct xmlparser { | |
/* handlers */ | |
- void (*xmlattr)(struct xmlparser *, const char *, size_t, | |
- const char *, size_t, const char *, size_t); | |
- void (*xmlattrend)(struct xmlparser *, const char *, size_t, | |
- const char *, size_t); | |
- void (*xmlattrstart)(struct xmlparser *, const char *, size_t, | |
- const char *, size_t); | |
- void (*xmlattrentity)(struct xmlparser *, const char *, size_t, | |
- const char *, size_t, const char *, size_t); | |
- void (*xmlcdatastart)(struct xmlparser *); | |
void (*xmlcdata)(struct xmlparser *, const char *, size_t); | |
- void (*xmlcdataend)(struct xmlparser *); | |
- void (*xmlcommentstart)(struct xmlparser *); | |
- void (*xmlcomment)(struct xmlparser *, const char *, size_t); | |
- void (*xmlcommentend)(struct xmlparser *); | |
void (*xmldata)(struct xmlparser *, const char *, size_t); | |
- void (*xmldataend)(struct xmlparser *); | |
void (*xmldataentity)(struct xmlparser *, const char *, size_t); | |
- void (*xmldatastart)(struct xmlparser *); | |
void (*xmltagend)(struct xmlparser *, const char *, size_t, int); | |
void (*xmltagstart)(struct xmlparser *, const char *, size_t); | |
- void (*xmltagstartparsed)(struct xmlparser *, const char *, | |
- size_t, int); | |
int (*getnext)(void); | |
@@ -30,8 +13,6 @@ typedef struct xmlparser { | |
size_t taglen; | |
/* current tag is in short form ? <tag /> */ | |
int isshorttag; | |
- /* current attribute name */ | |
- char name[1024]; | |
/* data buffer used for tag data, cdata and attribute data */ | |
char data[BUFSIZ]; | |
} XMLParser; |