GopherProxy

	sync xml.{c,h} - grabtitle - stupid HTML title grabber
	git clone git://git.codemadness.org/grabtitle
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 8e2bee7e85c6a6fbdb2b9ef84c69f8f74ab5b77c
	parent 0ffe161701f6f9ecde66204f5784e6709d647a1e
	Author: Hiltjo Posthuma <[email protected]>
	Date: Sat, 30 May 2020 13:36:43 +0200

	sync xml.{c,h}

	Diffstat:
	M xml.c \| 113 ++++++++++++++---------------…
	M xml.h \| 5 +++++

	2 files changed, 55 insertions(+), 63 deletions(-)
	---
	diff --git a/xml.c b/xml.c
	@@ -15,7 +15,7 @@ xml_parseattrs(XMLParser *x)
	size_t namelen = 0;
	int c, endsep, endname = 0, valuestart = 0;

	- while ((c = x->getnext()) != EOF) {
	+ while ((c = GETNEXT()) != EOF) {
	if (isspace(c)) {
	if (namelen)
	endname = 1;
	@@ -32,12 +32,12 @@ xml_parseattrs(XMLParser *x)
	/* attribute with value */
	if (c == '\'' \|\| c == '"') {
	endsep = c;
	- while ((c = x->getnext()) != EOF) {
	+ while ((c = GETNEXT()) != EOF) {
	if (c == endsep)
	break;
	}
	} else {
	- while ((c = x->getnext()) != EOF) {
	+ while ((c = GETNEXT()) != EOF) {
	if (c == '>' \|\| isspace(c))
	break;
	}
	@@ -61,7 +61,7 @@ xml_parsecomment(XMLParser *x)
	size_t i = 0;
	int c;

	- while ((c = x->getnext()) != EOF) {
	+ while ((c = GETNEXT()) != EOF) {
	if (c == '-') {
	if (i < 2)
	i++;
	@@ -79,7 +79,7 @@ xml_parsecdata(XMLParser *x)
	size_t datalen = 0, i = 0;
	int c;

	- while ((c = x->getnext()) != EOF) {
	+ while ((c = GETNEXT()) != EOF) {
	if (c == ']' \|\| c == '>') {
	if (x->xmlcdata) {
	x->data[datalen] = '\0';
	@@ -147,44 +147,42 @@ codepointtoutf8(long r, char *s)
	}
	}

	+struct namedentity {
	+ const char *entity;
	+ long cp;
	+};
	+
	+int
	+namedentitycmp(const void v1, const void v2)
	+{
	+ struct namedentity n1 = (struct namedentity )v1;
	+ struct namedentity n2 = (struct namedentity )v2;
	+
	+ return strcmp(n1->entity, n2->entity);
	+}
	+
	static int
	namedentitytostr(const char e, char buf, size_t bufsiz)
	{
	- static const struct {
	- char *entity;
	- int c;
	- } entities[] = {
	- { "&", '&' },
	- { "<", '<' },
	- { ">", '>' },
	- { "'", '\'' },
	- { """, '"' },
	- { " ", ' ' },
	- { "&AMP;", '&' },
	- { "&LT;", '<' },
	- { "&GT;", '>' },
	- { "&APOS;", '\'' },
	- { "&QUOT;", '"' },
	- { "&NBSP;", ' ' },
	+ static const struct namedentity entities[] = {
	+#include "namedentities.h"
	};
	+ struct namedentity find, *found;
	size_t i;

	/* buffer is too small */
	- if (bufsiz < 2)
	+ if (bufsiz < 5)
	return -1;

	- /* doesn't start with &: can't match */
	- if (*e != '&')
	- return 0;
	-
	- for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
	- if (!strcmp(e, entities[i].entity)) {
	- buf[0] = entities[i].c;
	- buf[1] = '\0';
	- return 1;
	- }
	+ find.entity = e;
	+ found = bsearch(&find, entities, sizeof(entities) / sizeof(*entities),
	+ sizeof(*entities), namedentitycmp);
	+ if (found) {
	+ i = codepointtoutf8(found->cp, buf);
	+ buf[i] = '\0';
	+ return i;
	}
	- return 0;
	+ return -1;
	}

	static int
	@@ -198,21 +196,15 @@ numericentitytostr(const char e, char buf, size_t bufsi…
	if (bufsiz < 5)
	return -1;

	- /* not a numeric entity */
	- if (e[0] != '&' \|\| e[1] != '#')
	- return 0;
	-
	- /* e[1] == '#', numeric / hexadecimal entity */
	- e += 2; /* skip "&#" */
	errno = 0;
	/* hex (16) or decimal (10) */
	if (*e == 'x')
	- l = strtoul(e + 1, &end, 16);
	+ l = strtol(++e, &end, 16);
	else
	- l = strtoul(e, &end, 10);
	- /* invalid value or not a well-formed entity or too high codepoint */
	- if (errno \|\| *end != ';' \|\| l > 0x10FFFF)
	- return 0;
	+ l = strtol(e, &end, 10);
	+ /* invalid value or not a well-formed entity or invalid codepoint */
	+ if (errno \|\| e == end \|\| *end != ';' \|\| l < 0 \|\| l > 0x10ffff)
	+ return -1;
	len = codepointtoutf8(l, buf);
	buf[len] = '\0';

	@@ -220,21 +212,18 @@ numericentitytostr(const char e, char buf, size_t bufsi…
	}

	/* convert named- or numeric entity string to buffer string
	- * returns byte-length of string. */
	+ * returns byte-length of string or -1 on failure. */
	int
	xml_entitytostr(const char e, char buf, size_t bufsiz)
	{
	- /* buffer is too small */
	- if (bufsiz < 5)
	- return -1;
	/* doesn't start with & */
	if (e[0] != '&')
	- return 0;
	- /* named entity */
	- if (e[1] != '#')
	- return namedentitytostr(e, buf, bufsiz);
	- else /* numeric entity */
	- return numericentitytostr(e, buf, bufsiz);
	+ return -1;
	+ /* numeric entity */
	+ if (e[1] == '#')
	+ return numericentitytostr(e + 2, buf, bufsiz);
	+ else /* named entity */
	+ return namedentitytostr(e + 1, buf, bufsiz);
	}

	void
	@@ -243,18 +232,16 @@ xml_parse(XMLParser *x)
	size_t datalen, tagdatalen;
	int c, isend;

	- if (!x->getnext)
	- return;
	- while ((c = x->getnext()) != EOF && c != '<')
	+ while ((c = GETNEXT()) != EOF && c != '<')
	; /* skip until < */

	while (c != EOF) {
	if (c == '<') { /* parse tag */
	- if ((c = x->getnext()) == EOF)
	+ if ((c = GETNEXT()) == EOF)
	return;

	if (c == '!') { /* cdata and comments */
	- for (tagdatalen = 0; (c = x->getnext()) != EOF…
	+ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
	/* NOTE: sizeof(x->data) must be atlea…
	if (tagdatalen <= sizeof("[CDATA[") - …
	x->data[tagdatalen++] = c;
	@@ -282,13 +269,13 @@ xml_parse(XMLParser *x)
	if (c == '?') {
	x->isshorttag = 1;
	} else if (c == '/') {
	- if ((c = x->getnext()) == EOF)
	+ if ((c = GETNEXT()) == EOF)
	return;
	x->tag[0] = c;
	isend = 1;
	}

	- while ((c = x->getnext()) != EOF) {
	+ while ((c = GETNEXT()) != EOF) {
	if (c == '/')
	x->isshorttag = 1; /* short ta…
	else if (c == '>' \|\| isspace(c)) {
	@@ -320,7 +307,7 @@ xml_parse(XMLParser *x)
	} else {
	/* parse tag data */
	datalen = 0;
	- while ((c = x->getnext()) != EOF) {
	+ while ((c = GETNEXT()) != EOF) {
	if (c == '&') {
	if (datalen) {
	x->data[datalen] = '\0';
	@@ -329,7 +316,7 @@ xml_parse(XMLParser *x)
	}
	x->data[0] = c;
	datalen = 1;
	- while ((c = x->getnext()) != EOF) {
	+ while ((c = GETNEXT()) != EOF) {
	if (c == '<')
	break;
	if (datalen < sizeof(x->data) …
	diff --git a/xml.h b/xml.h
	@@ -1,3 +1,6 @@
	+#ifndef _XML_H
	+#define _XML_H
	+
	typedef struct xmlparser {
	/* handlers */
	void (xmlcdata)(struct xmlparser , const char *, size_t);
	@@ -6,6 +9,7 @@ typedef struct xmlparser {
	void (xmltagend)(struct xmlparser , const char *, size_t, int);
	void (xmltagstart)(struct xmlparser , const char *, size_t);

	+#define GETNEXT (x)->getnext
	int (*getnext)(void);

	/* current tag */
	@@ -19,3 +23,4 @@ typedef struct xmlparser {

	int xml_entitytostr(const char , char , size_t);
	void xml_parse(XMLParser *);
	+#endif