Introduction
Introduction Statistics Contact Development Disclaimer Help
sync XML improvements - tscrape - twitter scraper
git clone git://git.codemadness.org/tscrape
Log
Files
Refs
README
LICENSE
---
commit 5df58d27f557292778cdc5dee306f18db8c980f7
parent f8629e681a16fc3af086355a44c942df57291b4b
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 1 Feb 2020 15:02:27 +0100
sync XML improvements
Diffstat:
M tscrape.c | 8 ++++----
M xml.c | 24 ++++++++----------------
M xml.h | 2 ++
3 files changed, 14 insertions(+), 20 deletions(-)
---
diff --git a/tscrape.c b/tscrape.c
@@ -107,10 +107,10 @@ isclassmatch(const char *classes, const char *clss, size_…
}
/* convert XML and some HTML entities */
-static ssize_t
+static int
html_entitytostr(const char *s, char *buf, size_t bufsiz)
{
- ssize_t len;
+ int len;
if ((len = xml_entitytostr(s, buf, bufsiz)) > 0)
return len;
@@ -244,7 +244,7 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, const…
const char *v, size_t vl)
{
char buf[16];
- ssize_t len;
+ int len;
if (!state)
return;
@@ -267,7 +267,7 @@ static void
xmldataentity(XMLParser *x, const char *d, size_t dl)
{
char buf[16];
- ssize_t len;
+ int len;
if (!(state & Text))
return;
diff --git a/xml.c b/xml.c
@@ -1,8 +1,5 @@
-#include <sys/types.h>
-
#include <ctype.h>
#include <errno.h>
-#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -255,11 +252,6 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz)
{ "gt;", '>' },
{ "apos;", '\'' },
{ "quot;", '"' },
- { "AMP;", '&' },
- { "LT;", '<' },
- { "GT;", '>' },
- { "APOS;", '\'' },
- { "QUOT;", '"' }
};
size_t i;
@@ -274,7 +266,7 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz)
return 1;
}
}
- return 0;
+ return -1;
}
static int
@@ -291,12 +283,12 @@ numericentitytostr(const char *e, char *buf, size_t bufsi…
errno = 0;
/* hex (16) or decimal (10) */
if (*e == 'x')
- l = strtoul(e + 1, &end, 16);
+ l = strtol(++e, &end, 16);
else
- l = strtoul(e, &end, 10);
- /* invalid value or not a well-formed entity or too high codepoint */
- if (errno || *end != ';' || l > 0x10FFFF)
- return 0;
+ l = strtol(e, &end, 10);
+ /* invalid value or not a well-formed entity or invalid codepoint */
+ if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff)
+ return -1;
len = codepointtoutf8(l, buf);
buf[len] = '\0';
@@ -304,13 +296,13 @@ numericentitytostr(const char *e, char *buf, size_t bufsi…
}
/* convert named- or numeric entity string to buffer string
- * returns byte-length of string. */
+ * returns byte-length of string or -1 on failure. */
int
xml_entitytostr(const char *e, char *buf, size_t bufsiz)
{
/* doesn't start with & */
if (e[0] != '&')
- return 0;
+ return -1;
/* numeric entity */
if (e[1] == '#')
return numericentitytostr(e + 2, buf, bufsiz);
diff --git a/xml.h b/xml.h
@@ -1,6 +1,8 @@
#ifndef _XML_H
#define _XML_H
+#include <stdio.h>
+
typedef struct xmlparser {
/* handlers */
void (*xmlattr)(struct xmlparser *, const char *, size_t,
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.