Introduction
Introduction Statistics Contact Development Disclaimer Help
sync xml.{c,h} - grabtitle - stupid HTML title grabber
git clone git://git.codemadness.org/grabtitle
Log
Files
Refs
README
LICENSE
---
commit 8e2bee7e85c6a6fbdb2b9ef84c69f8f74ab5b77c
parent 0ffe161701f6f9ecde66204f5784e6709d647a1e
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 30 May 2020 13:36:43 +0200
sync xml.{c,h}
Diffstat:
M xml.c | 113 ++++++++++++++---------------…
M xml.h | 5 +++++
2 files changed, 55 insertions(+), 63 deletions(-)
---
diff --git a/xml.c b/xml.c
@@ -15,7 +15,7 @@ xml_parseattrs(XMLParser *x)
size_t namelen = 0;
int c, endsep, endname = 0, valuestart = 0;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (isspace(c)) {
if (namelen)
endname = 1;
@@ -32,12 +32,12 @@ xml_parseattrs(XMLParser *x)
/* attribute with value */
if (c == '\'' || c == '"') {
endsep = c;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == endsep)
break;
}
} else {
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == '>' || isspace(c))
break;
}
@@ -61,7 +61,7 @@ xml_parsecomment(XMLParser *x)
size_t i = 0;
int c;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == '-') {
if (i < 2)
i++;
@@ -79,7 +79,7 @@ xml_parsecdata(XMLParser *x)
size_t datalen = 0, i = 0;
int c;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == ']' || c == '>') {
if (x->xmlcdata) {
x->data[datalen] = '\0';
@@ -147,44 +147,42 @@ codepointtoutf8(long r, char *s)
}
}
+struct namedentity {
+ const char *entity;
+ long cp;
+};
+
+int
+namedentitycmp(const void *v1, const void *v2)
+{
+ struct namedentity *n1 = (struct namedentity *)v1;
+ struct namedentity *n2 = (struct namedentity *)v2;
+
+ return strcmp(n1->entity, n2->entity);
+}
+
static int
namedentitytostr(const char *e, char *buf, size_t bufsiz)
{
- static const struct {
- char *entity;
- int c;
- } entities[] = {
- { "&amp;", '&' },
- { "&lt;", '<' },
- { "&gt;", '>' },
- { "&apos;", '\'' },
- { "&quot;", '"' },
- { "&nbsp;", ' ' },
- { "&AMP;", '&' },
- { "&LT;", '<' },
- { "&GT;", '>' },
- { "&APOS;", '\'' },
- { "&QUOT;", '"' },
- { "&NBSP;", ' ' },
+ static const struct namedentity entities[] = {
+#include "namedentities.h"
};
+ struct namedentity find, *found;
size_t i;
/* buffer is too small */
- if (bufsiz < 2)
+ if (bufsiz < 5)
return -1;
- /* doesn't start with &: can't match */
- if (*e != '&')
- return 0;
-
- for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
- if (!strcmp(e, entities[i].entity)) {
- buf[0] = entities[i].c;
- buf[1] = '\0';
- return 1;
- }
+ find.entity = e;
+ found = bsearch(&find, entities, sizeof(entities) / sizeof(*entities),
+ sizeof(*entities), namedentitycmp);
+ if (found) {
+ i = codepointtoutf8(found->cp, buf);
+ buf[i] = '\0';
+ return i;
}
- return 0;
+ return -1;
}
static int
@@ -198,21 +196,15 @@ numericentitytostr(const char *e, char *buf, size_t bufsi…
if (bufsiz < 5)
return -1;
- /* not a numeric entity */
- if (e[0] != '&' || e[1] != '#')
- return 0;
-
- /* e[1] == '#', numeric / hexadecimal entity */
- e += 2; /* skip "&#" */
errno = 0;
/* hex (16) or decimal (10) */
if (*e == 'x')
- l = strtoul(e + 1, &end, 16);
+ l = strtol(++e, &end, 16);
else
- l = strtoul(e, &end, 10);
- /* invalid value or not a well-formed entity or too high codepoint */
- if (errno || *end != ';' || l > 0x10FFFF)
- return 0;
+ l = strtol(e, &end, 10);
+ /* invalid value or not a well-formed entity or invalid codepoint */
+ if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff)
+ return -1;
len = codepointtoutf8(l, buf);
buf[len] = '\0';
@@ -220,21 +212,18 @@ numericentitytostr(const char *e, char *buf, size_t bufsi…
}
/* convert named- or numeric entity string to buffer string
- * returns byte-length of string. */
+ * returns byte-length of string or -1 on failure. */
int
xml_entitytostr(const char *e, char *buf, size_t bufsiz)
{
- /* buffer is too small */
- if (bufsiz < 5)
- return -1;
/* doesn't start with & */
if (e[0] != '&')
- return 0;
- /* named entity */
- if (e[1] != '#')
- return namedentitytostr(e, buf, bufsiz);
- else /* numeric entity */
- return numericentitytostr(e, buf, bufsiz);
+ return -1;
+ /* numeric entity */
+ if (e[1] == '#')
+ return numericentitytostr(e + 2, buf, bufsiz);
+ else /* named entity */
+ return namedentitytostr(e + 1, buf, bufsiz);
}
void
@@ -243,18 +232,16 @@ xml_parse(XMLParser *x)
size_t datalen, tagdatalen;
int c, isend;
- if (!x->getnext)
- return;
- while ((c = x->getnext()) != EOF && c != '<')
+ while ((c = GETNEXT()) != EOF && c != '<')
; /* skip until < */
while (c != EOF) {
if (c == '<') { /* parse tag */
- if ((c = x->getnext()) == EOF)
+ if ((c = GETNEXT()) == EOF)
return;
if (c == '!') { /* cdata and comments */
- for (tagdatalen = 0; (c = x->getnext()) != EOF…
+ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
/* NOTE: sizeof(x->data) must be atlea…
if (tagdatalen <= sizeof("[CDATA[") - …
x->data[tagdatalen++] = c;
@@ -282,13 +269,13 @@ xml_parse(XMLParser *x)
if (c == '?') {
x->isshorttag = 1;
} else if (c == '/') {
- if ((c = x->getnext()) == EOF)
+ if ((c = GETNEXT()) == EOF)
return;
x->tag[0] = c;
isend = 1;
}
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == '/')
x->isshorttag = 1; /* short ta…
else if (c == '>' || isspace(c)) {
@@ -320,7 +307,7 @@ xml_parse(XMLParser *x)
} else {
/* parse tag data */
datalen = 0;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == '&') {
if (datalen) {
x->data[datalen] = '\0';
@@ -329,7 +316,7 @@ xml_parse(XMLParser *x)
}
x->data[0] = c;
datalen = 1;
- while ((c = x->getnext()) != EOF) {
+ while ((c = GETNEXT()) != EOF) {
if (c == '<')
break;
if (datalen < sizeof(x->data) …
diff --git a/xml.h b/xml.h
@@ -1,3 +1,6 @@
+#ifndef _XML_H
+#define _XML_H
+
typedef struct xmlparser {
/* handlers */
void (*xmlcdata)(struct xmlparser *, const char *, size_t);
@@ -6,6 +9,7 @@ typedef struct xmlparser {
void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
void (*xmltagstart)(struct xmlparser *, const char *, size_t);
+#define GETNEXT (x)->getnext
int (*getnext)(void);
/* current tag */
@@ -19,3 +23,4 @@ typedef struct xmlparser {
int xml_entitytostr(const char *, char *, size_t);
void xml_parse(XMLParser *);
+#endif
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.