sync XML improvements - tscrape - twitter scraper | |
git clone git://git.codemadness.org/tscrape | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 5df58d27f557292778cdc5dee306f18db8c980f7 | |
parent f8629e681a16fc3af086355a44c942df57291b4b | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sat, 1 Feb 2020 15:02:27 +0100 | |
sync XML improvements | |
Diffstat: | |
M tscrape.c | 8 ++++---- | |
M xml.c | 24 ++++++++---------------- | |
M xml.h | 2 ++ | |
3 files changed, 14 insertions(+), 20 deletions(-) | |
--- | |
diff --git a/tscrape.c b/tscrape.c | |
@@ -107,10 +107,10 @@ isclassmatch(const char *classes, const char *clss, size_… | |
} | |
/* convert XML and some HTML entities */ | |
-static ssize_t | |
+static int | |
html_entitytostr(const char *s, char *buf, size_t bufsiz) | |
{ | |
- ssize_t len; | |
+ int len; | |
if ((len = xml_entitytostr(s, buf, bufsiz)) > 0) | |
return len; | |
@@ -244,7 +244,7 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, const… | |
const char *v, size_t vl) | |
{ | |
char buf[16]; | |
- ssize_t len; | |
+ int len; | |
if (!state) | |
return; | |
@@ -267,7 +267,7 @@ static void | |
xmldataentity(XMLParser *x, const char *d, size_t dl) | |
{ | |
char buf[16]; | |
- ssize_t len; | |
+ int len; | |
if (!(state & Text)) | |
return; | |
diff --git a/xml.c b/xml.c | |
@@ -1,8 +1,5 @@ | |
-#include <sys/types.h> | |
- | |
#include <ctype.h> | |
#include <errno.h> | |
-#include <limits.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
@@ -255,11 +252,6 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz) | |
{ "gt;", '>' }, | |
{ "apos;", '\'' }, | |
{ "quot;", '"' }, | |
- { "AMP;", '&' }, | |
- { "LT;", '<' }, | |
- { "GT;", '>' }, | |
- { "APOS;", '\'' }, | |
- { "QUOT;", '"' } | |
}; | |
size_t i; | |
@@ -274,7 +266,7 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz) | |
return 1; | |
} | |
} | |
- return 0; | |
+ return -1; | |
} | |
static int | |
@@ -291,12 +283,12 @@ numericentitytostr(const char *e, char *buf, size_t bufsi… | |
errno = 0; | |
/* hex (16) or decimal (10) */ | |
if (*e == 'x') | |
- l = strtoul(e + 1, &end, 16); | |
+ l = strtol(++e, &end, 16); | |
else | |
- l = strtoul(e, &end, 10); | |
- /* invalid value or not a well-formed entity or too high codepoint */ | |
- if (errno || *end != ';' || l > 0x10FFFF) | |
- return 0; | |
+ l = strtol(e, &end, 10); | |
+ /* invalid value or not a well-formed entity or invalid codepoint */ | |
+ if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff) | |
+ return -1; | |
len = codepointtoutf8(l, buf); | |
buf[len] = '\0'; | |
@@ -304,13 +296,13 @@ numericentitytostr(const char *e, char *buf, size_t bufsi… | |
} | |
/* convert named- or numeric entity string to buffer string | |
- * returns byte-length of string. */ | |
+ * returns byte-length of string or -1 on failure. */ | |
int | |
xml_entitytostr(const char *e, char *buf, size_t bufsiz) | |
{ | |
/* doesn't start with & */ | |
if (e[0] != '&') | |
- return 0; | |
+ return -1; | |
/* numeric entity */ | |
if (e[1] == '#') | |
return numericentitytostr(e + 2, buf, bufsiz); | |
diff --git a/xml.h b/xml.h | |
@@ -1,6 +1,8 @@ | |
#ifndef _XML_H | |
#define _XML_H | |
+#include <stdio.h> | |
+ | |
typedef struct xmlparser { | |
/* handlers */ | |
void (*xmlattr)(struct xmlparser *, const char *, size_t, |