Introduction
Introduction Statistics Contact Development Disclaimer Help
initial import - pubsubhubbubblub - pubsubhubbub client implementation
git clone git://git.codemadness.org/pubsubhubbubblub
Log
Files
Refs
README
LICENSE
---
commit a9f9a229d5be860a5fdab051fbda7ece66d2dd64
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 28 May 2022 12:09:41 +0200
initial import
Diffstat:
A LICENSE | 15 +++++++++++++++
A Makefile | 17 +++++++++++++++++
A README | 116 ++++++++++++++++++++++++++++++
A hmac_sha1.c | 63 +++++++++++++++++++++++++++++…
A hmac_sha1.h | 4 ++++
A pubsub_cgi.c | 463 +++++++++++++++++++++++++++++…
A pubsub_gethub.c | 149 +++++++++++++++++++++++++++++…
A pubsub_setup | 133 +++++++++++++++++++++++++++++…
A sha1.c | 145 +++++++++++++++++++++++++++++…
A sha1.h | 13 +++++++++++++
A strlcat.c | 54 +++++++++++++++++++++++++++++…
A xml.c | 415 ++++++++++++++++++++++++++++++
A xml.h | 43 ++++++++++++++++++++++++++++++
13 files changed, 1630 insertions(+), 0 deletions(-)
---
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2022 Hiltjo Posthuma <[email protected]>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,17 @@
+.POSIX:
+
+PREFIX = /usr/local
+CGIDIR = /var/www/cgi-bin
+
+build: clean
+ ${CC} -c sha1.c ${CFLAGS} ${CPPFLAGS}
+ ${CC} -c hmac_sha1.c ${CFLAGS} ${CPPFLAGS}
+ ${CC} -c strlcat.c xml.c ${CFLAGS} ${CPPFLAGS}
+ ${CC} -c pubsub_cgi.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE
+ ${CC} -c pubsub_gethub.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE
+ # link
+ ${CC} -o pubsub_cgi hmac_sha1.o sha1.o pubsub_cgi.o ${LDFLAGS} -static…
+ ${CC} -o pubsub_gethub strlcat.o xml.o pubsub_gethub.o ${LDFLAGS}
+
+clean:
+ rm -f *.o pubsub_cgi pubsub_gethub
diff --git a/README b/README
@@ -0,0 +1,116 @@
+pubsubhubbubblub
+----------------
+
+Generic pubsubhubbub client implementation.
+Helper scripts to use it with sfeed.
+
+
+What is it
+----------
+
+pubsubhubbub is a publisher/subscriber technology used to push updates in a we…
+This allows to push content updates, instead of polling for news in an interva…
+
+
+Features
+--------
+
+- Not many dependencies.
+- Uses pledge and unveil on OpenBSD.
+- Signatures (hub.secret) support, Pubsubhub 0.4 core SHA1 only.
+
+
+Dependencies
+------------
+
+- C compiler
+
+
+Files
+-----
+
+pubsub_cgi.c - Small stupid PubSubHubBub implementation as a CGI program.
+pubsub_gethub - Helper program extract a hub and feed URL from a RSS or At…
+pubsub_setup - Helper script that sets up the directory structure for
+ processing the feed for the CGI program. It has an
+ -s option to subscribe and an -u option to unsubscribe at …
+
+
+How to install
+--------------
+
+For the CGI program:
+
+OpenBSD httpd and slowcgi, httpd.conf:
+
+ location "/pubsub/**" {
+ request strip 1
+ root "/cgi-bin/pubsub"
+ fastcgi socket "/run/slowcgi.sock"
+ }
+
+Compile cgi.c statically and copy it to /var/www/cgi-bin/pubsub
+
+- Create a directory with write-access for the pubsub CGI program
+ /var/www/pubsub-data/feedname. The setup_feed.sh script can be used to crea…
+ the directories.
+- Make sure to set the proper permissions for the CGI program (slowcgi) and
+ HTTPd.
+- The base name of the CGI script can be changed in the setup_feed.sh script.
+
+
+How does it work
+----------------
+
+The CGI program https://codemadness.org/pubsub/slashdot/secrettoken
+
+
+Directory structure:
+
+/pubsub-data/config/feedname/ - Directory with metadata about the feed.
+/pubsub-data/config/feedname/hub - The hub URL, for example http://pubsubhu…
+/pubsub-data/config/feedname/topic - hub.topic, the feed URL.
+/pubsub-data/config/feedname/secret - hub.secret for calculating the message d…
+ see Section 8 of Pubsubhubbub core 0.4.
+/pubsub-data/config/feedname/token - File containing a line with a secret tok…
+ is not easy guessable (by different hubs…
+/pubsub-data/feeds/feedname/ - Directory containing processed messages.
+/pubsub-data/tmp/feedname/ - Temporary directory to process messages.
+ Moves to the feeds/feedname directory on…
+/pubsub-data/log - Log file, TAB-separated.
+
+
+Example
+-------
+
+Get the hub and feed URL:
+
+ curl -s http://rss.slashdot.org/Slashdot/slashdot | pubsub_gethub
+
+ http://rss.slashdot.org/Slashdot/slashdot self
+ http://pubsubhubbub.appspot.com/ hub
+
+Setup the feed for the CGI program:
+ cd /var/www/pubsub-data
+ pubsub_setup -s 'slashdot' 'http://pubsubhubbub.appspot.com/' 'http://…
+
+
+Monitor script example
+----------------------
+
+This monitors the log file using tail(1) and uses sfeed and sfeed_plain to wri…
+This can then be piped to the suckless ii(1) program for IRC notifications for…
+It uses sfeed for parsing RSS and Atom content and formats it to a plain-text …
+
+ #!/bin/sh
+ cd /var/www/pubsub-data
+ tail -f log | \
+ LC_ALL=C awk '{ print $2 "\t" $3; fflush(); }' | \
+ while IFS=" " read -r feed file; do sfeed < "feeds/${fe…
+ sfeed_plain
+
+
+References
+----------
+
+Pubsubhubbub core 0.4: https://pubsubhubbub.github.io/PubSubHubbub/pubsubhubbu…
diff --git a/hmac_sha1.c b/hmac_sha1.c
@@ -0,0 +1,63 @@
+/* Adapted from RFC2104 hmac_md5, some code-style changes and data streaming s…
+
+#include <string.h>
+#include <stdio.h>
+
+#include "hmac_sha1.h"
+
+void
+hmac_sha1_init(SHA_CTX *ctx, const unsigned char *key, size_t key_len,
+unsigned char *k_opad, size_t k_opadlen)
+{
+ SHA_CTX tctx;
+ unsigned char k_ipad[65]; /* inner padding - key XORd with ipad */
+ unsigned char tk[20];
+ int i;
+
+ /* if key is longer than 64 bytes reset it to key=SHA1(key) */
+ if (key_len > 64) {
+ SHA1_Init(&tctx);
+ SHA1_Update(&tctx, key, key_len);
+ SHA1_Final(tk, &tctx);
+
+ key = tk;
+ key_len = 20;
+ }
+
+ /*
+ * the HMAC_SHA1 transform looks like:
+ *
+ * SHA1(K XOR opad, SHA1(K XOR ipad, text))
+ *
+ * where K is an n byte key
+ * ipad is the byte 0x36 repeated 64 times
+ * opad is the byte 0x5c repeated 64 times
+ * and text is the data being protected
+ */
+
+ /* start out by storing key in pads */
+ memset(k_ipad, 0, sizeof(k_ipad));
+ memset(k_opad, 0, k_opadlen);
+ memcpy(k_ipad, key, key_len);
+ memcpy(k_opad, key, key_len);
+
+ /* XOR key with ipad and opad values */
+ for (i = 0; i < 64; i++) {
+ k_ipad[i] ^= 0x36;
+ k_opad[i] ^= 0x5c;
+ }
+ /* perform inner SHA1 */
+ SHA1_Init(ctx); /* init context for 1st pass */
+ SHA1_Update(ctx, k_ipad, 64); /* start with inner pad */
+}
+
+void
+hmac_sha1_final(SHA_CTX *ctx, const unsigned char *k_opad, unsigned char *dige…
+{
+ SHA1_Final(digest, ctx); /* finish up 1st pass */
+ /* perform outer SHA1 */
+ SHA1_Init(ctx); /* init context for 2nd pass */
+ SHA1_Update(ctx, k_opad, 64); /* start with outer pad */
+ SHA1_Update(ctx, digest, 20); /* then results of 1st hash */
+ SHA1_Final(digest, ctx); /* finish up 2nd pass */
+}
diff --git a/hmac_sha1.h b/hmac_sha1.h
@@ -0,0 +1,4 @@
+#include "sha1.h"
+
+void hmac_sha1_init(SHA_CTX *, const unsigned char *, size_t, unsigned char *,…
+void hmac_sha1_final(SHA_CTX *, const unsigned char *, unsigned char *);
diff --git a/pubsub_cgi.c b/pubsub_cgi.c
@@ -0,0 +1,463 @@
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#ifdef __OpenBSD__
+#include <unistd.h>
+#else
+#define pledge(p1,p2) 0
+#define unveil(p1,p2) 0
+#endif
+
+#include "hmac_sha1.h"
+
+static const char *relpath = "/pubsub/";
+
+#define DATADIR "/pubsub-data"
+
+static const char *configdir = DATADIR "/config";
+static const char *datadir = DATADIR "/feeds";
+static const char *tmpdir = DATADIR "/tmp";
+static const char *logfile = DATADIR "/log";
+static time_t now;
+
+char *
+readfile(const char *path)
+{
+ static char buf[256];
+ FILE *fp;
+
+ if (!(fp = fopen(path, "rb")))
+ goto err;
+ if (!fgets(buf, sizeof(buf), fp))
+ goto err;
+ fclose(fp);
+ buf[strcspn(buf, "\n")] = '\0';
+ return buf;
+
+err:
+ if (fp)
+ fclose(fp);
+ return NULL;
+}
+
+int
+hexdigit(int c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+
+ return 0;
+}
+
+/* decode until NUL separator or end of "key". */
+int
+decodeparamuntilend(char *buf, size_t bufsiz, const char *s, int end)
+{
+ size_t i;
+
+ if (!bufsiz)
+ return -1;
+
+ for (i = 0; *s && *s != end; s++) {
+ switch (*s) {
+ case '%':
+ if (i + 3 >= bufsiz)
+ return -1;
+ if (!isxdigit((unsigned char)*(s+1)) ||
+ !isxdigit((unsigned char)*(s+2)))
+ return -1;
+ buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2));
+ s += 2;
+ break;
+ case '+':
+ if (i + 1 >= bufsiz)
+ return -1;
+ buf[i++] = ' ';
+ break;
+ default:
+ if (i + 1 >= bufsiz)
+ return -1;
+ buf[i++] = *s;
+ break;
+ }
+ }
+ buf[i] = '\0';
+
+ return i;
+}
+
+/* decode until NUL separator or end of "key". */
+int
+decodeparam(char *buf, size_t bufsiz, const char *s)
+{
+ return decodeparamuntilend(buf, bufsiz, s, '&');
+}
+
+char *
+getparam(const char *query, const char *s)
+{
+ const char *p, *last = NULL;
+ size_t len;
+
+ len = strlen(s);
+ for (p = query; (p = strstr(p, s)); p += len) {
+ if (p[len] == '=' && (p == query || p[-1] == '&' || p[-1] == '…
+ last = p + len + 1;
+ }
+
+ return (char *)last;
+}
+
+const char *
+httpstatusmsg(int code)
+{
+ switch (code) {
+ case 200: return "200 OK";
+ case 202: return "202 Accepted";
+ case 400: return "400 Bad Request";
+ case 403: return "403 Forbidden";
+ case 404: return "404 Not Found";
+ case 500: return "500 Internal Server Error";
+ }
+ return NULL;
+}
+
+void
+httpstatus(int code)
+{
+ const char *msg;
+
+ if ((msg = httpstatusmsg(code)))
+ printf("Status: %s\r\n", msg);
+}
+
+void
+httperror(int code, const char *s)
+{
+ httpstatus(code);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
+ fputs("\r\n", stdout);
+ if (s)
+ printf("%s: %s\r\n", httpstatusmsg(code), s);
+ else
+ printf("%s\r\n", httpstatusmsg(code));
+ exit(0);
+}
+
+void
+badrequest(const char *s)
+{
+ httperror(400, s);
+}
+
+void
+forbidden(const char *s)
+{
+ httperror(403, s);
+}
+
+void
+notfound(const char *s)
+{
+ httperror(404, s);
+}
+
+void
+servererror(const char *s)
+{
+ httperror(500, s);
+}
+
+void
+logrequest(const char *feedname, const char *filename, const char *signature)
+{
+ FILE *fp;
+
+ /* file format: timestamp TAB feedname TAB data-filename */
+ if (!(fp = fopen(logfile, "a")))
+ servererror("cannot write data");
+ fprintf(fp, "%lld\t", (long long)now);
+ fputs(feedname, fp);
+ fputs("\t", fp);
+ fputs(filename, fp);
+ fputs("\t", fp);
+ fputs(signature, fp);
+ fputs("\n", fp);
+ fclose(fp);
+}
+
+char *
+contenttypetoext(const char *s)
+{
+ return "xml"; /* for now just support XML, for RSS and Atom */
+}
+
+int
+main(void)
+{
+ FILE *fpdata;
+ char challenge[256], mode[32] = "", signature[128] = "";
+ char requesturi[4096], requesturidecoded[4096];
+ char feedname[256], token[256] = "";
+ char filename[PATH_MAX], tmpfilename[PATH_MAX];
+ char configpath[PATH_MAX], feedpath[PATH_MAX], secretpath[PATH_MAX];
+ char tokenpath[PATH_MAX];
+ char *contentlength = "", *contenttype = "", *method = "GET", *query =…
+ char *p, *fileext, *tmp;
+ char buf[4096];
+ size_t n, total;
+ long long ll;
+ int i, j, fd, r;
+ /* HMAC */
+ SHA_CTX ctx;
+ unsigned char key_opad[65]; /* outer padding - key XORd with opad */
+ unsigned char *key;
+ size_t key_len;
+ unsigned char digest[SHA_DIGEST_LENGTH];
+ unsigned char inputdigest[SHA_DIGEST_LENGTH];
+
+ if (unveil(DATADIR, "rwc") == -1)
+ err(1, "unveil");
+ if (pledge("stdio rpath wpath cpath fattr", NULL) == -1)
+ err(1, "pledge");
+
+ if ((tmp = getenv("CONTENT_TYPE")))
+ contenttype = tmp;
+ if ((tmp = getenv("CONTENT_LENGTH")))
+ contentlength = tmp;
+ if ((tmp = getenv("REQUEST_METHOD")))
+ method = tmp;
+ if ((tmp = getenv("QUERY_STRING")))
+ query = tmp;
+
+ /* "8. Authenticated Content Distribution" */
+ if ((p = getenv("HTTP_X_HUB_SIGNATURE"))) {
+ r = snprintf(signature, sizeof(signature), "%s", p);
+ if (r < 0 || (size_t)r >= sizeof(signature))
+ badrequest("invalid signature (truncated)");
+
+ /* accept sha1=digest or sha=digest */
+ if ((tmp = strstr(signature, "sha1=")))
+ tmp += sizeof("sha1=") - 1;
+ else if ((tmp = strstr(signature, "sha=")))
+ tmp += sizeof("sha=") - 1;
+ if (tmp) {
+ for (p = tmp, i = 0; *p; p++, i++) {
+ if (!isxdigit((unsigned char)*p))
+ break;
+ }
+ }
+ if (tmp && !*p && i == (SHA_DIGEST_LENGTH * 2)) {
+ for (i = 0, j = 0, p = tmp; i < SHA_DIGEST_LENGTH; i++…
+ inputdigest[i] = (hexdigit(p[j]) << 4) |
+ hexdigit(p[j + 1]);
+ }
+ } else {
+ badrequest("invalid hash format");
+ }
+ }
+
+ if (!(p = getenv("REQUEST_URI")))
+ p = "";
+ snprintf(requesturi, sizeof(requesturi), "%s", p);
+ if ((p = strchr(requesturi, '?')))
+ *p = '\0'; /* remove query string */
+
+ if (decodeparamuntilend(requesturidecoded, sizeof(requesturidecoded), …
+ badrequest("request URI");
+
+ p = requesturidecoded;
+ if (strncmp(p, relpath, strlen(relpath)))
+ forbidden("invalid relative path");
+ p += strlen(relpath);
+
+ /* first part of path of request URI is the feedname, last part is the…
+ if ((tmp = strchr(p, '/'))) {
+ *tmp = '\0'; /* temporary NUL terminate */
+
+ r = snprintf(feedname, sizeof(feedname), "%s", p);
+ if (r < 0 || (size_t)r >= sizeof(feedname))
+ servererror("path truncated");
+
+ r = snprintf(token, sizeof(token), "%s", tmp + 1);
+ if (r < 0 || (size_t)r >= sizeof(token))
+ servererror("path truncated");
+
+ *tmp = '/'; /* restore NUL byte to '/' */
+ } else {
+ r = snprintf(feedname, sizeof(feedname), "%s", p);
+ if (r < 0 || (size_t)r >= sizeof(feedname))
+ servererror("path truncated");
+ }
+ if (strstr(feedname, ".."))
+ badrequest("invalid feed name");
+
+ /* check if configdir of feedname exists, else skip request and return…
+ r = snprintf(configpath, sizeof(configpath), "%s/%s", configdir, feedn…
+ if (r < 0 || (size_t)r >= sizeof(configpath))
+ servererror("path truncated");
+ if (access(configpath, X_OK) == -1)
+ notfound("feed entrypoint does not exist");
+
+ r = snprintf(tokenpath, sizeof(tokenpath), "%s/%s/token", configdir, f…
+ if (r < 0 || (size_t)r >= sizeof(tokenpath))
+ servererror("path truncated");
+ if ((tmp = readfile(tokenpath))) {
+ if (strcmp(tmp, token))
+ forbidden("missing or incorrect token in path");
+ }
+
+ if (!strcasecmp(method, "POST")) {
+ if (!feedname[0])
+ badrequest("feed name part of path is missing");
+
+ /* read secret, initialize for HMAC and data signature verific…
+ r = snprintf(secretpath, sizeof(secretpath), "%s/%s/secret", c…
+ if (r < 0 || (size_t)r >= sizeof(secretpath))
+ servererror("path truncated");
+ key = readfile(secretpath);
+ if (key && !signature[0])
+ forbidden("requires signature header X-Hub-Signature");
+
+ if (key) {
+ key_len = strlen(key);
+ hmac_sha1_init(&ctx, key, key_len, key_opad, sizeof(ke…
+ }
+
+ /* temporary file with random characters */
+ if ((now = time(NULL)) == (time_t)-1)
+ servererror("cannot get current time");
+ r = snprintf(tmpfilename, sizeof(tmpfilename), "%s/%s/%lld.XXX…
+ if (r < 0 || (size_t)r >= sizeof(tmpfilename))
+ servererror("path truncated");
+
+ if ((fd = mkstemp(tmpfilename)) == -1)
+ servererror("cannot create tmpfilename");
+ if (!(fpdata = fdopen(fd, "wb")))
+ servererror(tmpfilename);
+
+ total = 0;
+ while ((n = fread(buf, 1, sizeof(buf), stdin)) == sizeof(buf))…
+ if (fwrite(buf, 1, n, fpdata) != n)
+ break;
+ if (key)
+ SHA1_Update(&ctx, buf, n); /* hash data for si…
+ total += n;
+ }
+ if (n) {
+ fwrite(buf, 1, n, fpdata);
+ if (key)
+ SHA1_Update(&ctx, buf, n);
+ total += n;
+ }
+ if (ferror(stdin)) {
+ fclose(fpdata);
+ unlink(tmpfilename);
+ servererror("cannot process POST message: read error");
+ }
+ if (fflush(fpdata) || ferror(fpdata)) {
+ fclose(fpdata);
+ unlink(tmpfilename);
+ servererror("cannot process POST message: write error"…
+ }
+ fclose(fpdata);
+ chmod(tmpfilename, 0644);
+
+ /* if Content-Length is set then check if it matches */
+ if (contentlength[0]) {
+ ll = strtoll(contentlength, NULL, 10);
+ if (ll < 0 || (size_t)ll != total) {
+ unlink(tmpfilename);
+ badrequest("Content-Length does not match");
+ }
+ }
+
+ if (key) {
+ /* finalize signature digest */
+ hmac_sha1_final(&ctx, key_opad, digest);
+
+ /* compare digest */
+ if (memcmp(inputdigest, digest, sizeof(digest))) {
+ unlink(tmpfilename);
+ forbidden("invalid digest for data");
+ }
+ }
+
+ /* use part of basename of the random temp file as the filenam…
+ if (!(tmp = strrchr(tmpfilename, '/')))
+ servererror("invalid path"); /* cannot happen */
+ r = snprintf(feedpath, sizeof(feedpath), "%s/%s", datadir, fee…
+ if (r < 0 || (size_t)r >= sizeof(feedpath))
+ servererror("path truncated");
+ fileext = contenttypetoext(contenttype);
+ r = snprintf(filename, sizeof(filename), "%s/%s%s%s", feedpath…
+ fileext[0] ? "." : "", fileext);
+ if (r < 0 || (size_t)r >= sizeof(filename))
+ servererror("path truncated");
+
+ if ((r = rename(tmpfilename, filename)) != 0) {
+ unlink(filename);
+ unlink(tmpfilename);
+ servererror("cannot process POST message: failed to re…
+ }
+ chmod(filename, 0644);
+
+ httpstatus(200);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
+ fputs("\r\n", stdout);
+
+ /* output stored file: feedname, basename of the file */
+ if ((tmp = strrchr(filename, '/')))
+ tmp++;
+ else
+ tmp = "";
+ printf("%s/%s\n", feedname, tmp);
+
+ /* write to a log file, this could be a pipe or used with tail…
+ logrequest(feedname, tmp, signature);
+
+ return 0;
+ }
+
+ if ((p = getparam(query, "hub.mode"))) {
+ if (decodeparam(mode, sizeof(mode), p) == -1)
+ badrequest("hub.mode");
+ }
+
+ if (!strcmp(mode, "subscribe") || !strcmp(mode, "unsubscribe")) {
+ if ((p = getparam(query, "hub.challenge"))) {
+ if (decodeparam(challenge, sizeof(challenge), p) == -1)
+ badrequest("hub.challenge");
+ }
+ if (!challenge[0])
+ badrequest("hub.challenge is required, but is missing"…
+
+ httpstatus(202);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
+ fputs("\r\n", stdout);
+ printf("%s\r\n", challenge);
+ return 0;
+ } else if (mode[0]) {
+ badrequest("hub.mode: only subscribe or unsubscribe is support…
+ }
+
+ httpstatus(200);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
+ fputs("\r\n", stdout);
+ printf("pubsubhubbubblub running perfectly and flapping graciously in …
+
+ return 0;
+}
diff --git a/pubsub_gethub.c b/pubsub_gethub.c
@@ -0,0 +1,149 @@
+#include <err.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+
+#undef strlcat
+size_t strlcat(char *, const char *, size_t);
+
+#include "xml.h"
+
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
+
+/* string and size */
+#define STRP(s) s,sizeof(s)-1
+
+static XMLParser parser;
+static int islinktag, ishrefattr, isrelattr;
+static char linkhref[4096], linkrel[256];
+
+/* strcasestr() included for portability */
+char *
+strcasestr(const char *h, const char *n)
+{
+ size_t i;
+
+ if (!n[0])
+ return (char *)h;
+
+ for (; *h; ++h) {
+ for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
+ TOLOWER((unsigned char)h[i]); ++i)
+ ;
+ if (n[i] == '\0')
+ return (char *)h;
+ }
+
+ return NULL;
+}
+
+static void
+printvalue(const char *s)
+{
+ for (; *s; s++)
+ if (!ISCNTRL((unsigned char)*s))
+ putchar(*s);
+}
+
+static void
+xmltagstart(XMLParser *p, const char *t, size_t tl)
+{
+ islinktag = 0;
+ char *l;
+
+ if (((l = strcasestr(t, ":link")) && !strcasecmp(l, ":link")) ||
+ !strcasecmp(t, "link")) {
+ islinktag = 1;
+ linkhref[0] = '\0';
+ linkrel[0] = '\0';
+ }
+}
+
+static void
+xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ if (!islinktag)
+ return;
+
+ if (strncasecmp(linkrel, STRP("hub")) &&
+ strncasecmp(linkrel, STRP("self")))
+ return;
+
+ printvalue(linkhref);
+ putchar('\t');
+ printvalue(linkrel);
+ putchar('\n');
+}
+
+static void
+xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al)
+{
+ ishrefattr = isrelattr = 0;
+
+ if (!islinktag)
+ return;
+
+ if (!strcasecmp(a, "href")) {
+ ishrefattr = 1;
+ linkhref[0] = '\0';
+ } else if (!strcasecmp(a, "rel")) {
+ isrelattr = 1;
+ linkrel[0] = '\0';
+ }
+}
+
+static void
+xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
+ const char *v, size_t vl)
+{
+ if (islinktag) {
+ if (ishrefattr)
+ strlcat(linkhref, v, sizeof(linkhref));
+ else if (isrelattr)
+ strlcat(linkrel, v, sizeof(linkrel));
+ }
+}
+
+static void
+xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al,
+ const char *v, size_t vl)
+{
+ char buf[16];
+ int len;
+
+ if (!ishrefattr && !isrelattr)
+ return;
+
+ /* try to translate entity, else just pass as data to
+ * xmlattr handler. */
+ if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
+ xmlattr(p, t, tl, a, al, buf, (size_t)len);
+ else
+ xmlattr(p, t, tl, a, al, v, vl);
+}
+
+int
+main(void)
+{
+#ifdef __OpenBSD__
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+#endif
+
+ parser.xmlattr = xmlattr;
+ parser.xmlattrentity = xmlattrentity;
+ parser.xmlattrstart = xmlattrstart;
+ parser.xmltagstart = xmltagstart;
+ parser.xmltagstartparsed = xmltagstartparsed;
+
+ /* NOTE: getnext is defined in xml.h for inline optimization */
+ xml_parse(&parser);
+
+ if (ferror(stdin))
+ fputs("read error: <stdin>\n", stderr);;
+ if (fflush(stdout) || ferror(stdout))
+ fputs("write error: <stdout>\n", stderr);
+
+ return 0;
+}
diff --git a/pubsub_setup b/pubsub_setup
@@ -0,0 +1,133 @@
+#!/bin/sh
+
+while getopts "c:su" f; do
+ case "${f}" in
+ s) dosubscribe=1;;
+ u) dounsubscribe=1;;
+ esac
+done
+shift $(expr ${OPTIND} - 1)
+
+base="https://codemadness.org/pubsub/"
+
+# Linux
+shacmd="$(command -v sha256sum)"
+# BSD
+test "${shacmd}" = "" && shacmd=$(command -v sha256)
+if test "${shacmd}" = ""; then
+ echo "No sha256 or sha256sum tool found" >&2
+ exit 1
+fi
+
+# sha()
+sha() {
+ ${shacmd} | cut -f 1 -d ' '
+}
+
+# log(s)
+log() {
+ echo "$1" >&2
+}
+
+# subscribe(feedname, hub, topic, callback, mode, secret)
+subscribe() {
+ feedname="$1"
+ hub="$2"
+ topic="$3"
+ callback="$4"
+ mode="${5:-subscribe}"
+ secret="$6"
+ verify="async" # or "sync"
+ lease_seconds=""
+
+# if curl -s -f -H 'User-Agent:' -m 15 \
+ # DEBUG
+ if curl -v -f -H 'User-Agent:' -m 15 \
+ -L --max-redirs 3 \
+ --data-raw "hub.callback=${callback}" \
+ --data-raw "hub.lease_seconds=${lease_seconds}" \
+ --data-raw "hub.mode=${mode}" \
+ --data-raw "hub.secret=${secret}" \
+ --data-raw "hub.topic=${topic}" \
+ --data-raw "hub.verify=${verify}" \
+ "${hub}/subscribe"; then
+ log "${mode} OK"
+ return 0
+ else
+ log "${mode} FAIL"
+ return 1
+ fi
+}
+
+feedname="$1"
+hub="$2"
+topic="$3"
+if test "$1" = "" -o "$2" = "" -o "$3" = ""; then
+ echo "usage: $0 [-s] [-u] <feedname> <hub> <topic>" >&2
+ exit 1
+fi
+
+isnew=1
+test -d "config/${feedname}" && isnew=0
+
+mkdir -p "config/${feedname}"
+mkdir -p "feeds/${feedname}"
+mkdir -p "tmp/${feedname}"
+
+# general log
+touch "log"
+
+if test "${dosubscribe}" = "1"; then
+ f="config/${feedname}/hub"
+ if test -f "${f}"; then
+ echo "already registered? file exists: ${f}, skipping subscrib…
+ exit 1
+ fi
+fi
+
+# generate random token if it does not exist.
+f="config/${feedname}/token"
+if ! test -f "${f}" -a "${isnew}" = "1"; then
+ token="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)"
+ echo "${token}" > "${f}"
+fi
+
+# generate random secret if it does not exist.
+f="config/${feedname}/secret"
+if ! test -f "${f}" -a "${isnew}" = "1"; then
+ secret="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)"
+ echo "${secret}" > "${f}"
+fi
+
+# read config.
+f="config/${feedname}/token"
+token=$(cat "${f}" 2>/dev/null)
+callback="$1/${token}"
+f="config/${feedname}/secret"
+secret=$(cat "${f}" 2>/dev/null)
+
+callback="${base}${feedname}/${token}"
+
+if test "${dosubscribe}" = "1"; then
+ f="config/${feedname}/hub"
+ if test -f "${f}"; then
+ echo "already registered? file exists: ${f}, skipping subscrib…
+ exit 1
+ fi
+
+ # register at hub. save state when succesfully registered.
+ if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "subscrib…
+ printf '%s\n' "${callback}" > "config/${feedname}/callback"
+ printf '%s\n' "${hub}" > "config/${feedname}/hub"
+ printf '%s\n' "${topic}" > "config/${feedname}/topic"
+ fi
+fi
+
+if test "${dounsubscribe}" = "1"; then
+ # unregister at hub. remove state when succesfully registered.
+ if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "unsubscr…
+ rm -f "config/${feedname}/callback"
+ rm -f "config/${feedname}/hub"
+ rm -f "config/${feedname}/topic"
+ fi
+fi
diff --git a/sha1.c b/sha1.c
@@ -0,0 +1,145 @@
+/* Public domain SHA1 implementation based on RFC3174 and libtomcrypt
+ Modified to make function prototypes compatible with OpenSSL / LibreSSL. */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "sha1.h"
+
+static uint32_t rol(uint32_t n, int k) { return (n << k) | (n >> (32-k)); }
+#define F0(b,c,d) (d ^ (b & (c ^ d)))
+#define F1(b,c,d) (b ^ c ^ d)
+#define F2(b,c,d) ((b & c) | (d & (b | c)))
+#define F3(b,c,d) (b ^ c ^ d)
+#define G0(a,b,c,d,e,i) e += rol(a,5)+F0(b,c,d)+W[i]+0x5A827999; b = rol(b,30)
+#define G1(a,b,c,d,e,i) e += rol(a,5)+F1(b,c,d)+W[i]+0x6ED9EBA1; b = rol(b,30)
+#define G2(a,b,c,d,e,i) e += rol(a,5)+F2(b,c,d)+W[i]+0x8F1BBCDC; b = rol(b,30)
+#define G3(a,b,c,d,e,i) e += rol(a,5)+F3(b,c,d)+W[i]+0xCA62C1D6; b = rol(b,30)
+
+static void
+processblock(SHA_CTX *s, const unsigned char *buf)
+{
+ uint32_t W[80], a, b, c, d, e;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ W[i] = (uint32_t)buf[4*i]<<24;
+ W[i] |= (uint32_t)buf[4*i+1]<<16;
+ W[i] |= (uint32_t)buf[4*i+2]<<8;
+ W[i] |= buf[4*i+3];
+ }
+ for (; i < 80; i++)
+ W[i] = rol(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1);
+ a = s->h[0];
+ b = s->h[1];
+ c = s->h[2];
+ d = s->h[3];
+ e = s->h[4];
+ for (i = 0; i < 20; ) {
+ G0(a,b,c,d,e,i++);
+ G0(e,a,b,c,d,i++);
+ G0(d,e,a,b,c,i++);
+ G0(c,d,e,a,b,i++);
+ G0(b,c,d,e,a,i++);
+ }
+ while (i < 40) {
+ G1(a,b,c,d,e,i++);
+ G1(e,a,b,c,d,i++);
+ G1(d,e,a,b,c,i++);
+ G1(c,d,e,a,b,i++);
+ G1(b,c,d,e,a,i++);
+ }
+ while (i < 60) {
+ G2(a,b,c,d,e,i++);
+ G2(e,a,b,c,d,i++);
+ G2(d,e,a,b,c,i++);
+ G2(c,d,e,a,b,i++);
+ G2(b,c,d,e,a,i++);
+ }
+ while (i < 80) {
+ G3(a,b,c,d,e,i++);
+ G3(e,a,b,c,d,i++);
+ G3(d,e,a,b,c,i++);
+ G3(c,d,e,a,b,i++);
+ G3(b,c,d,e,a,i++);
+ }
+ s->h[0] += a;
+ s->h[1] += b;
+ s->h[2] += c;
+ s->h[3] += d;
+ s->h[4] += e;
+}
+
+static void
+pad(SHA_CTX *c)
+{
+ unsigned r = c->len % 64;
+
+ c->buf[r++] = 0x80;
+ if (r > 56) {
+ memset(c->buf + r, 0, 64 - r);
+ r = 0;
+ processblock(c, c->buf);
+ }
+ memset(c->buf + r, 0, 56 - r);
+ c->len *= 8;
+ c->buf[56] = c->len >> 56;
+ c->buf[57] = c->len >> 48;
+ c->buf[58] = c->len >> 40;
+ c->buf[59] = c->len >> 32;
+ c->buf[60] = c->len >> 24;
+ c->buf[61] = c->len >> 16;
+ c->buf[62] = c->len >> 8;
+ c->buf[63] = c->len;
+ processblock(c, c->buf);
+}
+
+int
+SHA1_Init(SHA_CTX *c)
+{
+ c->len = 0;
+ c->h[0] = 0x67452301;
+ c->h[1] = 0xEFCDAB89;
+ c->h[2] = 0x98BADCFE;
+ c->h[3] = 0x10325476;
+ c->h[4] = 0xC3D2E1F0;
+ return 1;
+}
+
+int
+SHA1_Update(SHA_CTX *c, const void *m, size_t len)
+{
+ const uint8_t *p = m;
+ unsigned r = c->len % 64;
+
+ c->len += len;
+ if (r) {
+ if (len < 64 - r) {
+ memcpy(c->buf + r, p, len);
+ return 1;
+ }
+ memcpy(c->buf + r, p, 64 - r);
+ len -= 64 - r;
+ p += 64 - r;
+ processblock(c, c->buf);
+ }
+ for (; len >= 64; len -= 64, p += 64)
+ processblock(c, p);
+ memcpy(c->buf, p, len);
+ return 1;
+}
+
+int
+SHA1_Final(unsigned char *md, SHA_CTX *c)
+{
+ int i;
+
+ pad(c);
+ for (i = 0; i < 5; i++) {
+ md[4 * i] = c->h[i] >> 24;
+ md[4 * i + 1] = c->h[i] >> 16;
+ md[4 * i + 2] = c->h[i] >> 8;
+ md[4 * i + 3] = c->h[i];
+ }
+ return 1;
+}
diff --git a/sha1.h b/sha1.h
@@ -0,0 +1,13 @@
+#include <stdint.h>
+
+typedef struct sha1 {
+ uint64_t len; /* processed message length */
+ uint32_t h[5]; /* hash state */
+ uint8_t buf[64]; /* message block buffer */
+} SHA_CTX;
+
+#define SHA_DIGEST_LENGTH 20
+
+int SHA1_Init(SHA_CTX *);
+int SHA1_Update(SHA_CTX *, const void *, size_t);
+int SHA1_Final(unsigned char *, SHA_CTX *);
diff --git a/strlcat.c b/strlcat.c
@@ -0,0 +1,54 @@
+/* $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $ …
+
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <string.h>
+
+/*
+ * Appends src to string dst of size dsize (unlike strncat, dsize is the
+ * full size of dst, not space left). At most dsize-1 characters
+ * will be copied. Always NUL terminates (unless dsize <= strlen(dst)).
+ * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
+ * If retval >= dsize, truncation occurred.
+ */
+size_t
+strlcat(char *dst, const char *src, size_t dsize)
+{
+ const char *odst = dst;
+ const char *osrc = src;
+ size_t n = dsize;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end. */
+ while (n-- != 0 && *dst != '\0')
+ dst++;
+ dlen = dst - odst;
+ n = dsize - dlen;
+
+ if (n-- == 0)
+ return(dlen + strlen(src));
+ while (*src != '\0') {
+ if (n != 0) {
+ *dst++ = *src;
+ n--;
+ }
+ src++;
+ }
+ *dst = '\0';
+
+ return(dlen + (src - osrc)); /* count does not include NUL */
+}
diff --git a/xml.c b/xml.c
@@ -0,0 +1,415 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "xml.h"
+
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+
+static void
+xml_parseattrs(XMLParser *x)
+{
+ size_t namelen = 0, valuelen;
+ int c, endsep, endname = 0, valuestart = 0;
+
+ while ((c = GETNEXT()) != EOF) {
+ if (ISSPACE(c)) {
+ if (namelen)
+ endname = 1;
+ continue;
+ } else if (c == '?')
+ ; /* ignore */
+ else if (c == '=') {
+ x->name[namelen] = '\0';
+ valuestart = 1;
+ endname = 1;
+ } else if (namelen && ((endname && !valuestart && ISALPHA(c)) …
+ /* attribute without value */
+ x->name[namelen] = '\0';
+ if (x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name,…
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, name…
+ if (x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, n…
+ endname = 0;
+ x->name[0] = c;
+ namelen = 1;
+ } else if (namelen && valuestart) {
+ /* attribute with value */
+ if (x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name,…
+
+ valuelen = 0;
+ if (c == '\'' || c == '"') {
+ endsep = c;
+ } else {
+ endsep = ' '; /* ISSPACE() */
+ goto startvalue;
+ }
+
+ while ((c = GETNEXT()) != EOF) {
+startvalue:
+ if (c == '&') { /* entities */
+ x->data[valuelen] = '\0';
+ /* call data function with data before…
+ if (valuelen && x->xmlattr)
+ x->xmlattr(x, x->tag, x->tagle…
+ x->data[0] = c;
+ valuelen = 1;
+ while ((c = GETNEXT()) != EOF) {
+ if (c == endsep || (endsep == …
+ break;
+ if (valuelen < sizeof(x->data)…
+ x->data[valuelen++] = …
+ else {
+ /* entity too long for…
+ x->data[valuelen] = '\…
+ if (x->xmlattr)
+ x->xmlattr(x, …
+ x->data[0] = c;
+ valuelen = 1;
+ break;
+ }
+ if (c == ';') {
+ x->data[valuelen] = '\…
+ if (x->xmlattrentity)
+ x->xmlattrenti…
+ valuelen = 0;
+ break;
+ }
+ }
+ } else if (c != endsep && !(endsep == ' ' && (…
+ if (valuelen < sizeof(x->data) - 1) {
+ x->data[valuelen++] = c;
+ } else {
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, …
+ x->data[0] = c;
+ valuelen = 1;
+ }
+ }
+ if (c == endsep || (endsep == ' ' && (c == '>'…
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->tagle…
+ if (x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->ta…
+ break;
+ }
+ }
+ namelen = endname = valuestart = 0;
+ } else if (namelen < sizeof(x->name) - 1) {
+ x->name[namelen++] = c;
+ }
+ if (c == '>') {
+ break;
+ } else if (c == '/') {
+ x->isshorttag = 1;
+ x->name[0] = '\0';
+ namelen = 0;
+ }
+ }
+}
+
+static void
+xml_parsecomment(XMLParser *x)
+{
+ int c, i = 0;
+
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '-') {
+ if (++i > 2)
+ i = 2;
+ continue;
+ } else if (c == '>' && i == 2) {
+ return;
+ } else if (i) {
+ i = 0;
+ }
+ }
+}
+
+static void
+xml_parsecdata(XMLParser *x)
+{
+ size_t datalen = 0, i = 0;
+ int c;
+
+ while ((c = GETNEXT()) != EOF) {
+ if (c == ']' || c == '>') {
+ if (x->xmlcdata && datalen) {
+ x->data[datalen] = '\0';
+ x->xmlcdata(x, x->data, datalen);
+ datalen = 0;
+ }
+ }
+
+ if (c == ']') {
+ if (++i > 2) {
+ if (x->xmlcdata)
+ for (; i > 2; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 2;
+ }
+ continue;
+ } else if (c == '>' && i == 2) {
+ return;
+ } else if (i) {
+ if (x->xmlcdata)
+ for (; i > 0; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 0;
+ }
+
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmlcdata)
+ x->xmlcdata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+static int
+codepointtoutf8(long r, char *s)
+{
+ if (r == 0) {
+ return 0; /* NUL byte */
+ } else if (r <= 0x7F) {
+ /* 1 byte: 0aaaaaaa */
+ s[0] = r;
+ return 1;
+ } else if (r <= 0x07FF) {
+ /* 2 bytes: 00000aaa aabbbbbb */
+ s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
+ s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
+ return 2;
+ } else if (r <= 0xFFFF) {
+ /* 3 bytes: aaaabbbb bbcccccc */
+ s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
+ s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
+ s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
+ return 3;
+ } else {
+ /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
+ s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
+ s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
+ s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
+ s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
+ return 4;
+ }
+}
+
+static int
+namedentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ static const struct {
+ const char *entity;
+ int c;
+ } entities[] = {
+ { "amp;", '&' },
+ { "lt;", '<' },
+ { "gt;", '>' },
+ { "apos;", '\'' },
+ { "quot;", '"' },
+ };
+ size_t i;
+
+ /* buffer is too small */
+ if (bufsiz < 2)
+ return -1;
+
+ for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
+ if (!strcmp(e, entities[i].entity)) {
+ buf[0] = entities[i].c;
+ buf[1] = '\0';
+ return 1;
+ }
+ }
+ return -1;
+}
+
+static int
+numericentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ long l;
+ int len;
+ char *end;
+
+ /* buffer is too small */
+ if (bufsiz < 5)
+ return -1;
+
+ errno = 0;
+ /* hex (16) or decimal (10) */
+ if (*e == 'x')
+ l = strtol(++e, &end, 16);
+ else
+ l = strtol(e, &end, 10);
+ /* invalid value or not a well-formed entity or invalid code point */
+ if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
+ (l >= 0xd800 && l <= 0xdfff))
+ return -1;
+ len = codepointtoutf8(l, buf);
+ buf[len] = '\0';
+
+ return len;
+}
+
+/* convert named- or numeric entity string to buffer string
+ * returns byte-length of string or -1 on failure. */
+int
+xml_entitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ /* doesn't start with & */
+ if (e[0] != '&')
+ return -1;
+ /* numeric entity */
+ if (e[1] == '#')
+ return numericentitytostr(e + 2, buf, bufsiz);
+ else /* named entity */
+ return namedentitytostr(e + 1, buf, bufsiz);
+}
+
+void
+xml_parse(XMLParser *x)
+{
+ size_t datalen, tagdatalen;
+ int c, isend;
+
+ while ((c = GETNEXT()) != EOF && c != '<')
+ ; /* skip until < */
+
+ while (c != EOF) {
+ if (c == '<') { /* parse tag */
+ if ((c = GETNEXT()) == EOF)
+ return;
+
+ if (c == '!') { /* cdata and comments */
+ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
+ /* NOTE: sizeof(x->data) must be at le…
+ if (tagdatalen <= sizeof("[CDATA[") - …
+ x->data[tagdatalen++] = c;
+ if (c == '>')
+ break;
+ else if (c == '-' && tagdatalen == siz…
+ (x->data[0] == '-')) {
+ xml_parsecomment(x);
+ break;
+ } else if (c == '[') {
+ if (tagdatalen == sizeof("[CDA…
+ !strncmp(x->data, "[CDATA[…
+ xml_parsecdata(x);
+ break;
+ }
+ }
+ }
+ } else {
+ /* normal tag (open, short open, close), proce…
+ x->tag[0] = c;
+ x->taglen = 1;
+ x->isshorttag = isend = 0;
+
+ /* treat processing instruction as shorttag, d…
+ if (c == '?') {
+ x->isshorttag = 1;
+ } else if (c == '/') {
+ if ((c = GETNEXT()) == EOF)
+ return;
+ x->tag[0] = c;
+ isend = 1;
+ }
+
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '/')
+ x->isshorttag = 1; /* short ta…
+ else if (c == '>' || ISSPACE(c)) {
+ x->tag[x->taglen] = '\0';
+ if (isend) { /* end tag, start…
+ if (x->xmltagend)
+ x->xmltagend(x…
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ } else {
+ /* start tag */
+ if (x->xmltagstart)
+ x->xmltagstart…
+ if (ISSPACE(c))
+ xml_parseattrs…
+ if (x->xmltagstartpars…
+ x->xmltagstart…
+ }
+ /* call tagend for shortform o…
+ if (x->isshorttag) {
+ if (x->xmltagend)
+ x->xmltagend(x…
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ }
+ break;
+ } else if (x->taglen < sizeof(x->tag) …
+ x->tag[x->taglen++] = c; /* NO…
+ }
+ }
+ } else {
+ /* parse tag data */
+ datalen = 0;
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '&') {
+ if (datalen) {
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data,…
+ }
+ x->data[0] = c;
+ datalen = 1;
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '<')
+ break;
+ if (datalen < sizeof(x->data) …
+ x->data[datalen++] = c;
+ else {
+ /* entity too long for…
+ x->data[datalen] = '\0…
+ if (x->xmldata)
+ x->xmldata(x, …
+ x->data[0] = c;
+ datalen = 1;
+ break;
+ }
+ if (c == ';') {
+ x->data[datalen] = '\0…
+ if (x->xmldataentity)
+ x->xmldataenti…
+ datalen = 0;
+ break;
+ }
+ }
+ } else if (c != '<') {
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data,…
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+ if (c == '<') {
+ x->data[datalen] = '\0';
+ if (x->xmldata && datalen)
+ x->xmldata(x, x->data, datalen…
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/xml.h b/xml.h
@@ -0,0 +1,43 @@
+#ifndef _XML_H_
+#define _XML_H_
+
+#include <stdio.h>
+
+typedef struct xmlparser {
+ /* handlers */
+ void (*xmlattr)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlattrend)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlcdata)(struct xmlparser *, const char *, size_t);
+ void (*xmldata)(struct xmlparser *, const char *, size_t);
+ void (*xmldataentity)(struct xmlparser *, const char *, size_t);
+ void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
+ void (*xmltagstart)(struct xmlparser *, const char *, size_t);
+ void (*xmltagstartparsed)(struct xmlparser *, const char *,
+ size_t, int);
+
+#ifndef GETNEXT
+ /* GETNEXT overridden to reduce function call overhead and further
+ context optimizations. */
+ #define GETNEXT getchar
+#endif
+
+ /* current tag */
+ char tag[1024];
+ size_t taglen;
+ /* current tag is in short form ? <tag /> */
+ int isshorttag;
+ /* current attribute name */
+ char name[1024];
+ /* data buffer used for tag data, cdata and attribute data */
+ char data[BUFSIZ];
+} XMLParser;
+
+int xml_entitytostr(const char *, char *, size_t);
+void xml_parse(XMLParser *);
+#endif
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.