initial import - pubsubhubbubblub - pubsubhubbub client implementation | |
git clone git://git.codemadness.org/pubsubhubbubblub | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit a9f9a229d5be860a5fdab051fbda7ece66d2dd64 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sat, 28 May 2022 12:09:41 +0200 | |
initial import | |
Diffstat: | |
A LICENSE | 15 +++++++++++++++ | |
A Makefile | 17 +++++++++++++++++ | |
A README | 116 ++++++++++++++++++++++++++++++ | |
A hmac_sha1.c | 63 +++++++++++++++++++++++++++++… | |
A hmac_sha1.h | 4 ++++ | |
A pubsub_cgi.c | 463 +++++++++++++++++++++++++++++… | |
A pubsub_gethub.c | 149 +++++++++++++++++++++++++++++… | |
A pubsub_setup | 133 +++++++++++++++++++++++++++++… | |
A sha1.c | 145 +++++++++++++++++++++++++++++… | |
A sha1.h | 13 +++++++++++++ | |
A strlcat.c | 54 +++++++++++++++++++++++++++++… | |
A xml.c | 415 ++++++++++++++++++++++++++++++ | |
A xml.h | 43 ++++++++++++++++++++++++++++++ | |
13 files changed, 1630 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/LICENSE b/LICENSE | |
@@ -0,0 +1,15 @@ | |
+ISC License | |
+ | |
+Copyright (c) 2022 Hiltjo Posthuma <[email protected]> | |
+ | |
+Permission to use, copy, modify, and/or distribute this software for any | |
+purpose with or without fee is hereby granted, provided that the above | |
+copyright notice and this permission notice appear in all copies. | |
+ | |
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
diff --git a/Makefile b/Makefile | |
@@ -0,0 +1,17 @@ | |
+.POSIX: | |
+ | |
+PREFIX = /usr/local | |
+CGIDIR = /var/www/cgi-bin | |
+ | |
+build: clean | |
+ ${CC} -c sha1.c ${CFLAGS} ${CPPFLAGS} | |
+ ${CC} -c hmac_sha1.c ${CFLAGS} ${CPPFLAGS} | |
+ ${CC} -c strlcat.c xml.c ${CFLAGS} ${CPPFLAGS} | |
+ ${CC} -c pubsub_cgi.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE | |
+ ${CC} -c pubsub_gethub.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE | |
+ # link | |
+ ${CC} -o pubsub_cgi hmac_sha1.o sha1.o pubsub_cgi.o ${LDFLAGS} -static… | |
+ ${CC} -o pubsub_gethub strlcat.o xml.o pubsub_gethub.o ${LDFLAGS} | |
+ | |
+clean: | |
+ rm -f *.o pubsub_cgi pubsub_gethub | |
diff --git a/README b/README | |
@@ -0,0 +1,116 @@ | |
+pubsubhubbubblub | |
+---------------- | |
+ | |
+Generic pubsubhubbub client implementation. | |
+Helper scripts to use it with sfeed. | |
+ | |
+ | |
+What is it | |
+---------- | |
+ | |
+pubsubhubbub is a publisher/subscriber technology used to push updates in a we… | |
+This allows to push content updates, instead of polling for news in an interva… | |
+ | |
+ | |
+Features | |
+-------- | |
+ | |
+- Not many dependencies. | |
+- Uses pledge and unveil on OpenBSD. | |
+- Signatures (hub.secret) support, Pubsubhub 0.4 core SHA1 only. | |
+ | |
+ | |
+Dependencies | |
+------------ | |
+ | |
+- C compiler | |
+ | |
+ | |
+Files | |
+----- | |
+ | |
+pubsub_cgi.c - Small stupid PubSubHubBub implementation as a CGI program. | |
+pubsub_gethub - Helper program extract a hub and feed URL from a RSS or At… | |
+pubsub_setup - Helper script that sets up the directory structure for | |
+ processing the feed for the CGI program. It has an | |
+ -s option to subscribe and an -u option to unsubscribe at … | |
+ | |
+ | |
+How to install | |
+-------------- | |
+ | |
+For the CGI program: | |
+ | |
+OpenBSD httpd and slowcgi, httpd.conf: | |
+ | |
+ location "/pubsub/**" { | |
+ request strip 1 | |
+ root "/cgi-bin/pubsub" | |
+ fastcgi socket "/run/slowcgi.sock" | |
+ } | |
+ | |
+Compile cgi.c statically and copy it to /var/www/cgi-bin/pubsub | |
+ | |
+- Create a directory with write-access for the pubsub CGI program | |
+ /var/www/pubsub-data/feedname. The setup_feed.sh script can be used to crea… | |
+ the directories. | |
+- Make sure to set the proper permissions for the CGI program (slowcgi) and | |
+ HTTPd. | |
+- The base name of the CGI script can be changed in the setup_feed.sh script. | |
+ | |
+ | |
+How does it work | |
+---------------- | |
+ | |
+The CGI program https://codemadness.org/pubsub/slashdot/secrettoken | |
+ | |
+ | |
+Directory structure: | |
+ | |
+/pubsub-data/config/feedname/ - Directory with metadata about the feed. | |
+/pubsub-data/config/feedname/hub - The hub URL, for example http://pubsubhu… | |
+/pubsub-data/config/feedname/topic - hub.topic, the feed URL. | |
+/pubsub-data/config/feedname/secret - hub.secret for calculating the message d… | |
+ see Section 8 of Pubsubhubbub core 0.4. | |
+/pubsub-data/config/feedname/token - File containing a line with a secret tok… | |
+ is not easy guessable (by different hubs… | |
+/pubsub-data/feeds/feedname/ - Directory containing processed messages. | |
+/pubsub-data/tmp/feedname/ - Temporary directory to process messages. | |
+ Moves to the feeds/feedname directory on… | |
+/pubsub-data/log - Log file, TAB-separated. | |
+ | |
+ | |
+Example | |
+------- | |
+ | |
+Get the hub and feed URL: | |
+ | |
+ curl -s http://rss.slashdot.org/Slashdot/slashdot | pubsub_gethub | |
+ | |
+ http://rss.slashdot.org/Slashdot/slashdot self | |
+ http://pubsubhubbub.appspot.com/ hub | |
+ | |
+Setup the feed for the CGI program: | |
+ cd /var/www/pubsub-data | |
+ pubsub_setup -s 'slashdot' 'http://pubsubhubbub.appspot.com/' 'http://… | |
+ | |
+ | |
+Monitor script example | |
+---------------------- | |
+ | |
+This monitors the log file using tail(1) and uses sfeed and sfeed_plain to wri… | |
+This can then be piped to the suckless ii(1) program for IRC notifications for… | |
+It uses sfeed for parsing RSS and Atom content and formats it to a plain-text … | |
+ | |
+ #!/bin/sh | |
+ cd /var/www/pubsub-data | |
+ tail -f log | \ | |
+ LC_ALL=C awk '{ print $2 "\t" $3; fflush(); }' | \ | |
+ while IFS=" " read -r feed file; do sfeed < "feeds/${fe… | |
+ sfeed_plain | |
+ | |
+ | |
+References | |
+---------- | |
+ | |
+Pubsubhubbub core 0.4: https://pubsubhubbub.github.io/PubSubHubbub/pubsubhubbu… | |
diff --git a/hmac_sha1.c b/hmac_sha1.c | |
@@ -0,0 +1,63 @@ | |
+/* Adapted from RFC2104 hmac_md5, some code-style changes and data streaming s… | |
+ | |
+#include <string.h> | |
+#include <stdio.h> | |
+ | |
+#include "hmac_sha1.h" | |
+ | |
+void | |
+hmac_sha1_init(SHA_CTX *ctx, const unsigned char *key, size_t key_len, | |
+unsigned char *k_opad, size_t k_opadlen) | |
+{ | |
+ SHA_CTX tctx; | |
+ unsigned char k_ipad[65]; /* inner padding - key XORd with ipad */ | |
+ unsigned char tk[20]; | |
+ int i; | |
+ | |
+ /* if key is longer than 64 bytes reset it to key=SHA1(key) */ | |
+ if (key_len > 64) { | |
+ SHA1_Init(&tctx); | |
+ SHA1_Update(&tctx, key, key_len); | |
+ SHA1_Final(tk, &tctx); | |
+ | |
+ key = tk; | |
+ key_len = 20; | |
+ } | |
+ | |
+ /* | |
+ * the HMAC_SHA1 transform looks like: | |
+ * | |
+ * SHA1(K XOR opad, SHA1(K XOR ipad, text)) | |
+ * | |
+ * where K is an n byte key | |
+ * ipad is the byte 0x36 repeated 64 times | |
+ * opad is the byte 0x5c repeated 64 times | |
+ * and text is the data being protected | |
+ */ | |
+ | |
+ /* start out by storing key in pads */ | |
+ memset(k_ipad, 0, sizeof(k_ipad)); | |
+ memset(k_opad, 0, k_opadlen); | |
+ memcpy(k_ipad, key, key_len); | |
+ memcpy(k_opad, key, key_len); | |
+ | |
+ /* XOR key with ipad and opad values */ | |
+ for (i = 0; i < 64; i++) { | |
+ k_ipad[i] ^= 0x36; | |
+ k_opad[i] ^= 0x5c; | |
+ } | |
+ /* perform inner SHA1 */ | |
+ SHA1_Init(ctx); /* init context for 1st pass */ | |
+ SHA1_Update(ctx, k_ipad, 64); /* start with inner pad */ | |
+} | |
+ | |
+void | |
+hmac_sha1_final(SHA_CTX *ctx, const unsigned char *k_opad, unsigned char *dige… | |
+{ | |
+ SHA1_Final(digest, ctx); /* finish up 1st pass */ | |
+ /* perform outer SHA1 */ | |
+ SHA1_Init(ctx); /* init context for 2nd pass */ | |
+ SHA1_Update(ctx, k_opad, 64); /* start with outer pad */ | |
+ SHA1_Update(ctx, digest, 20); /* then results of 1st hash */ | |
+ SHA1_Final(digest, ctx); /* finish up 2nd pass */ | |
+} | |
diff --git a/hmac_sha1.h b/hmac_sha1.h | |
@@ -0,0 +1,4 @@ | |
+#include "sha1.h" | |
+ | |
+void hmac_sha1_init(SHA_CTX *, const unsigned char *, size_t, unsigned char *,… | |
+void hmac_sha1_final(SHA_CTX *, const unsigned char *, unsigned char *); | |
diff --git a/pubsub_cgi.c b/pubsub_cgi.c | |
@@ -0,0 +1,463 @@ | |
+#include <sys/stat.h> | |
+ | |
+#include <ctype.h> | |
+#include <err.h> | |
+#include <errno.h> | |
+#include <limits.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+#include <time.h> | |
+#include <unistd.h> | |
+ | |
+#ifdef __OpenBSD__ | |
+#include <unistd.h> | |
+#else | |
+#define pledge(p1,p2) 0 | |
+#define unveil(p1,p2) 0 | |
+#endif | |
+ | |
+#include "hmac_sha1.h" | |
+ | |
+static const char *relpath = "/pubsub/"; | |
+ | |
+#define DATADIR "/pubsub-data" | |
+ | |
+static const char *configdir = DATADIR "/config"; | |
+static const char *datadir = DATADIR "/feeds"; | |
+static const char *tmpdir = DATADIR "/tmp"; | |
+static const char *logfile = DATADIR "/log"; | |
+static time_t now; | |
+ | |
+char * | |
+readfile(const char *path) | |
+{ | |
+ static char buf[256]; | |
+ FILE *fp; | |
+ | |
+ if (!(fp = fopen(path, "rb"))) | |
+ goto err; | |
+ if (!fgets(buf, sizeof(buf), fp)) | |
+ goto err; | |
+ fclose(fp); | |
+ buf[strcspn(buf, "\n")] = '\0'; | |
+ return buf; | |
+ | |
+err: | |
+ if (fp) | |
+ fclose(fp); | |
+ return NULL; | |
+} | |
+ | |
+int | |
+hexdigit(int c) | |
+{ | |
+ if (c >= '0' && c <= '9') | |
+ return c - '0'; | |
+ else if (c >= 'A' && c <= 'F') | |
+ return c - 'A' + 10; | |
+ else if (c >= 'a' && c <= 'f') | |
+ return c - 'a' + 10; | |
+ | |
+ return 0; | |
+} | |
+ | |
+/* decode until NUL separator or end of "key". */ | |
+int | |
+decodeparamuntilend(char *buf, size_t bufsiz, const char *s, int end) | |
+{ | |
+ size_t i; | |
+ | |
+ if (!bufsiz) | |
+ return -1; | |
+ | |
+ for (i = 0; *s && *s != end; s++) { | |
+ switch (*s) { | |
+ case '%': | |
+ if (i + 3 >= bufsiz) | |
+ return -1; | |
+ if (!isxdigit((unsigned char)*(s+1)) || | |
+ !isxdigit((unsigned char)*(s+2))) | |
+ return -1; | |
+ buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2)); | |
+ s += 2; | |
+ break; | |
+ case '+': | |
+ if (i + 1 >= bufsiz) | |
+ return -1; | |
+ buf[i++] = ' '; | |
+ break; | |
+ default: | |
+ if (i + 1 >= bufsiz) | |
+ return -1; | |
+ buf[i++] = *s; | |
+ break; | |
+ } | |
+ } | |
+ buf[i] = '\0'; | |
+ | |
+ return i; | |
+} | |
+ | |
+/* decode until NUL separator or end of "key". */ | |
+int | |
+decodeparam(char *buf, size_t bufsiz, const char *s) | |
+{ | |
+ return decodeparamuntilend(buf, bufsiz, s, '&'); | |
+} | |
+ | |
+char * | |
+getparam(const char *query, const char *s) | |
+{ | |
+ const char *p, *last = NULL; | |
+ size_t len; | |
+ | |
+ len = strlen(s); | |
+ for (p = query; (p = strstr(p, s)); p += len) { | |
+ if (p[len] == '=' && (p == query || p[-1] == '&' || p[-1] == '… | |
+ last = p + len + 1; | |
+ } | |
+ | |
+ return (char *)last; | |
+} | |
+ | |
+const char * | |
+httpstatusmsg(int code) | |
+{ | |
+ switch (code) { | |
+ case 200: return "200 OK"; | |
+ case 202: return "202 Accepted"; | |
+ case 400: return "400 Bad Request"; | |
+ case 403: return "403 Forbidden"; | |
+ case 404: return "404 Not Found"; | |
+ case 500: return "500 Internal Server Error"; | |
+ } | |
+ return NULL; | |
+} | |
+ | |
+void | |
+httpstatus(int code) | |
+{ | |
+ const char *msg; | |
+ | |
+ if ((msg = httpstatusmsg(code))) | |
+ printf("Status: %s\r\n", msg); | |
+} | |
+ | |
+void | |
+httperror(int code, const char *s) | |
+{ | |
+ httpstatus(code); | |
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout); | |
+ fputs("\r\n", stdout); | |
+ if (s) | |
+ printf("%s: %s\r\n", httpstatusmsg(code), s); | |
+ else | |
+ printf("%s\r\n", httpstatusmsg(code)); | |
+ exit(0); | |
+} | |
+ | |
+void | |
+badrequest(const char *s) | |
+{ | |
+ httperror(400, s); | |
+} | |
+ | |
+void | |
+forbidden(const char *s) | |
+{ | |
+ httperror(403, s); | |
+} | |
+ | |
+void | |
+notfound(const char *s) | |
+{ | |
+ httperror(404, s); | |
+} | |
+ | |
+void | |
+servererror(const char *s) | |
+{ | |
+ httperror(500, s); | |
+} | |
+ | |
+void | |
+logrequest(const char *feedname, const char *filename, const char *signature) | |
+{ | |
+ FILE *fp; | |
+ | |
+ /* file format: timestamp TAB feedname TAB data-filename */ | |
+ if (!(fp = fopen(logfile, "a"))) | |
+ servererror("cannot write data"); | |
+ fprintf(fp, "%lld\t", (long long)now); | |
+ fputs(feedname, fp); | |
+ fputs("\t", fp); | |
+ fputs(filename, fp); | |
+ fputs("\t", fp); | |
+ fputs(signature, fp); | |
+ fputs("\n", fp); | |
+ fclose(fp); | |
+} | |
+ | |
+char * | |
+contenttypetoext(const char *s) | |
+{ | |
+ return "xml"; /* for now just support XML, for RSS and Atom */ | |
+} | |
+ | |
+int | |
+main(void) | |
+{ | |
+ FILE *fpdata; | |
+ char challenge[256], mode[32] = "", signature[128] = ""; | |
+ char requesturi[4096], requesturidecoded[4096]; | |
+ char feedname[256], token[256] = ""; | |
+ char filename[PATH_MAX], tmpfilename[PATH_MAX]; | |
+ char configpath[PATH_MAX], feedpath[PATH_MAX], secretpath[PATH_MAX]; | |
+ char tokenpath[PATH_MAX]; | |
+ char *contentlength = "", *contenttype = "", *method = "GET", *query =… | |
+ char *p, *fileext, *tmp; | |
+ char buf[4096]; | |
+ size_t n, total; | |
+ long long ll; | |
+ int i, j, fd, r; | |
+ /* HMAC */ | |
+ SHA_CTX ctx; | |
+ unsigned char key_opad[65]; /* outer padding - key XORd with opad */ | |
+ unsigned char *key; | |
+ size_t key_len; | |
+ unsigned char digest[SHA_DIGEST_LENGTH]; | |
+ unsigned char inputdigest[SHA_DIGEST_LENGTH]; | |
+ | |
+ if (unveil(DATADIR, "rwc") == -1) | |
+ err(1, "unveil"); | |
+ if (pledge("stdio rpath wpath cpath fattr", NULL) == -1) | |
+ err(1, "pledge"); | |
+ | |
+ if ((tmp = getenv("CONTENT_TYPE"))) | |
+ contenttype = tmp; | |
+ if ((tmp = getenv("CONTENT_LENGTH"))) | |
+ contentlength = tmp; | |
+ if ((tmp = getenv("REQUEST_METHOD"))) | |
+ method = tmp; | |
+ if ((tmp = getenv("QUERY_STRING"))) | |
+ query = tmp; | |
+ | |
+ /* "8. Authenticated Content Distribution" */ | |
+ if ((p = getenv("HTTP_X_HUB_SIGNATURE"))) { | |
+ r = snprintf(signature, sizeof(signature), "%s", p); | |
+ if (r < 0 || (size_t)r >= sizeof(signature)) | |
+ badrequest("invalid signature (truncated)"); | |
+ | |
+ /* accept sha1=digest or sha=digest */ | |
+ if ((tmp = strstr(signature, "sha1="))) | |
+ tmp += sizeof("sha1=") - 1; | |
+ else if ((tmp = strstr(signature, "sha="))) | |
+ tmp += sizeof("sha=") - 1; | |
+ if (tmp) { | |
+ for (p = tmp, i = 0; *p; p++, i++) { | |
+ if (!isxdigit((unsigned char)*p)) | |
+ break; | |
+ } | |
+ } | |
+ if (tmp && !*p && i == (SHA_DIGEST_LENGTH * 2)) { | |
+ for (i = 0, j = 0, p = tmp; i < SHA_DIGEST_LENGTH; i++… | |
+ inputdigest[i] = (hexdigit(p[j]) << 4) | | |
+ hexdigit(p[j + 1]); | |
+ } | |
+ } else { | |
+ badrequest("invalid hash format"); | |
+ } | |
+ } | |
+ | |
+ if (!(p = getenv("REQUEST_URI"))) | |
+ p = ""; | |
+ snprintf(requesturi, sizeof(requesturi), "%s", p); | |
+ if ((p = strchr(requesturi, '?'))) | |
+ *p = '\0'; /* remove query string */ | |
+ | |
+ if (decodeparamuntilend(requesturidecoded, sizeof(requesturidecoded), … | |
+ badrequest("request URI"); | |
+ | |
+ p = requesturidecoded; | |
+ if (strncmp(p, relpath, strlen(relpath))) | |
+ forbidden("invalid relative path"); | |
+ p += strlen(relpath); | |
+ | |
+ /* first part of path of request URI is the feedname, last part is the… | |
+ if ((tmp = strchr(p, '/'))) { | |
+ *tmp = '\0'; /* temporary NUL terminate */ | |
+ | |
+ r = snprintf(feedname, sizeof(feedname), "%s", p); | |
+ if (r < 0 || (size_t)r >= sizeof(feedname)) | |
+ servererror("path truncated"); | |
+ | |
+ r = snprintf(token, sizeof(token), "%s", tmp + 1); | |
+ if (r < 0 || (size_t)r >= sizeof(token)) | |
+ servererror("path truncated"); | |
+ | |
+ *tmp = '/'; /* restore NUL byte to '/' */ | |
+ } else { | |
+ r = snprintf(feedname, sizeof(feedname), "%s", p); | |
+ if (r < 0 || (size_t)r >= sizeof(feedname)) | |
+ servererror("path truncated"); | |
+ } | |
+ if (strstr(feedname, "..")) | |
+ badrequest("invalid feed name"); | |
+ | |
+ /* check if configdir of feedname exists, else skip request and return… | |
+ r = snprintf(configpath, sizeof(configpath), "%s/%s", configdir, feedn… | |
+ if (r < 0 || (size_t)r >= sizeof(configpath)) | |
+ servererror("path truncated"); | |
+ if (access(configpath, X_OK) == -1) | |
+ notfound("feed entrypoint does not exist"); | |
+ | |
+ r = snprintf(tokenpath, sizeof(tokenpath), "%s/%s/token", configdir, f… | |
+ if (r < 0 || (size_t)r >= sizeof(tokenpath)) | |
+ servererror("path truncated"); | |
+ if ((tmp = readfile(tokenpath))) { | |
+ if (strcmp(tmp, token)) | |
+ forbidden("missing or incorrect token in path"); | |
+ } | |
+ | |
+ if (!strcasecmp(method, "POST")) { | |
+ if (!feedname[0]) | |
+ badrequest("feed name part of path is missing"); | |
+ | |
+ /* read secret, initialize for HMAC and data signature verific… | |
+ r = snprintf(secretpath, sizeof(secretpath), "%s/%s/secret", c… | |
+ if (r < 0 || (size_t)r >= sizeof(secretpath)) | |
+ servererror("path truncated"); | |
+ key = readfile(secretpath); | |
+ if (key && !signature[0]) | |
+ forbidden("requires signature header X-Hub-Signature"); | |
+ | |
+ if (key) { | |
+ key_len = strlen(key); | |
+ hmac_sha1_init(&ctx, key, key_len, key_opad, sizeof(ke… | |
+ } | |
+ | |
+ /* temporary file with random characters */ | |
+ if ((now = time(NULL)) == (time_t)-1) | |
+ servererror("cannot get current time"); | |
+ r = snprintf(tmpfilename, sizeof(tmpfilename), "%s/%s/%lld.XXX… | |
+ if (r < 0 || (size_t)r >= sizeof(tmpfilename)) | |
+ servererror("path truncated"); | |
+ | |
+ if ((fd = mkstemp(tmpfilename)) == -1) | |
+ servererror("cannot create tmpfilename"); | |
+ if (!(fpdata = fdopen(fd, "wb"))) | |
+ servererror(tmpfilename); | |
+ | |
+ total = 0; | |
+ while ((n = fread(buf, 1, sizeof(buf), stdin)) == sizeof(buf))… | |
+ if (fwrite(buf, 1, n, fpdata) != n) | |
+ break; | |
+ if (key) | |
+ SHA1_Update(&ctx, buf, n); /* hash data for si… | |
+ total += n; | |
+ } | |
+ if (n) { | |
+ fwrite(buf, 1, n, fpdata); | |
+ if (key) | |
+ SHA1_Update(&ctx, buf, n); | |
+ total += n; | |
+ } | |
+ if (ferror(stdin)) { | |
+ fclose(fpdata); | |
+ unlink(tmpfilename); | |
+ servererror("cannot process POST message: read error"); | |
+ } | |
+ if (fflush(fpdata) || ferror(fpdata)) { | |
+ fclose(fpdata); | |
+ unlink(tmpfilename); | |
+ servererror("cannot process POST message: write error"… | |
+ } | |
+ fclose(fpdata); | |
+ chmod(tmpfilename, 0644); | |
+ | |
+ /* if Content-Length is set then check if it matches */ | |
+ if (contentlength[0]) { | |
+ ll = strtoll(contentlength, NULL, 10); | |
+ if (ll < 0 || (size_t)ll != total) { | |
+ unlink(tmpfilename); | |
+ badrequest("Content-Length does not match"); | |
+ } | |
+ } | |
+ | |
+ if (key) { | |
+ /* finalize signature digest */ | |
+ hmac_sha1_final(&ctx, key_opad, digest); | |
+ | |
+ /* compare digest */ | |
+ if (memcmp(inputdigest, digest, sizeof(digest))) { | |
+ unlink(tmpfilename); | |
+ forbidden("invalid digest for data"); | |
+ } | |
+ } | |
+ | |
+ /* use part of basename of the random temp file as the filenam… | |
+ if (!(tmp = strrchr(tmpfilename, '/'))) | |
+ servererror("invalid path"); /* cannot happen */ | |
+ r = snprintf(feedpath, sizeof(feedpath), "%s/%s", datadir, fee… | |
+ if (r < 0 || (size_t)r >= sizeof(feedpath)) | |
+ servererror("path truncated"); | |
+ fileext = contenttypetoext(contenttype); | |
+ r = snprintf(filename, sizeof(filename), "%s/%s%s%s", feedpath… | |
+ fileext[0] ? "." : "", fileext); | |
+ if (r < 0 || (size_t)r >= sizeof(filename)) | |
+ servererror("path truncated"); | |
+ | |
+ if ((r = rename(tmpfilename, filename)) != 0) { | |
+ unlink(filename); | |
+ unlink(tmpfilename); | |
+ servererror("cannot process POST message: failed to re… | |
+ } | |
+ chmod(filename, 0644); | |
+ | |
+ httpstatus(200); | |
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout); | |
+ fputs("\r\n", stdout); | |
+ | |
+ /* output stored file: feedname, basename of the file */ | |
+ if ((tmp = strrchr(filename, '/'))) | |
+ tmp++; | |
+ else | |
+ tmp = ""; | |
+ printf("%s/%s\n", feedname, tmp); | |
+ | |
+ /* write to a log file, this could be a pipe or used with tail… | |
+ logrequest(feedname, tmp, signature); | |
+ | |
+ return 0; | |
+ } | |
+ | |
+ if ((p = getparam(query, "hub.mode"))) { | |
+ if (decodeparam(mode, sizeof(mode), p) == -1) | |
+ badrequest("hub.mode"); | |
+ } | |
+ | |
+ if (!strcmp(mode, "subscribe") || !strcmp(mode, "unsubscribe")) { | |
+ if ((p = getparam(query, "hub.challenge"))) { | |
+ if (decodeparam(challenge, sizeof(challenge), p) == -1) | |
+ badrequest("hub.challenge"); | |
+ } | |
+ if (!challenge[0]) | |
+ badrequest("hub.challenge is required, but is missing"… | |
+ | |
+ httpstatus(202); | |
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout); | |
+ fputs("\r\n", stdout); | |
+ printf("%s\r\n", challenge); | |
+ return 0; | |
+ } else if (mode[0]) { | |
+ badrequest("hub.mode: only subscribe or unsubscribe is support… | |
+ } | |
+ | |
+ httpstatus(200); | |
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout); | |
+ fputs("\r\n", stdout); | |
+ printf("pubsubhubbubblub running perfectly and flapping graciously in … | |
+ | |
+ return 0; | |
+} | |
diff --git a/pubsub_gethub.c b/pubsub_gethub.c | |
@@ -0,0 +1,149 @@ | |
+#include <err.h> | |
+#include <stdio.h> | |
+#include <strings.h> | |
+#include <unistd.h> | |
+ | |
+#undef strlcat | |
+size_t strlcat(char *, const char *, size_t); | |
+ | |
+#include "xml.h" | |
+ | |
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f) | |
+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c)) | |
+ | |
+/* string and size */ | |
+#define STRP(s) s,sizeof(s)-1 | |
+ | |
+static XMLParser parser; | |
+static int islinktag, ishrefattr, isrelattr; | |
+static char linkhref[4096], linkrel[256]; | |
+ | |
+/* strcasestr() included for portability */ | |
+char * | |
+strcasestr(const char *h, const char *n) | |
+{ | |
+ size_t i; | |
+ | |
+ if (!n[0]) | |
+ return (char *)h; | |
+ | |
+ for (; *h; ++h) { | |
+ for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) == | |
+ TOLOWER((unsigned char)h[i]); ++i) | |
+ ; | |
+ if (n[i] == '\0') | |
+ return (char *)h; | |
+ } | |
+ | |
+ return NULL; | |
+} | |
+ | |
+static void | |
+printvalue(const char *s) | |
+{ | |
+ for (; *s; s++) | |
+ if (!ISCNTRL((unsigned char)*s)) | |
+ putchar(*s); | |
+} | |
+ | |
+static void | |
+xmltagstart(XMLParser *p, const char *t, size_t tl) | |
+{ | |
+ islinktag = 0; | |
+ char *l; | |
+ | |
+ if (((l = strcasestr(t, ":link")) && !strcasecmp(l, ":link")) || | |
+ !strcasecmp(t, "link")) { | |
+ islinktag = 1; | |
+ linkhref[0] = '\0'; | |
+ linkrel[0] = '\0'; | |
+ } | |
+} | |
+ | |
+static void | |
+xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) | |
+{ | |
+ if (!islinktag) | |
+ return; | |
+ | |
+ if (strncasecmp(linkrel, STRP("hub")) && | |
+ strncasecmp(linkrel, STRP("self"))) | |
+ return; | |
+ | |
+ printvalue(linkhref); | |
+ putchar('\t'); | |
+ printvalue(linkrel); | |
+ putchar('\n'); | |
+} | |
+ | |
+static void | |
+xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al) | |
+{ | |
+ ishrefattr = isrelattr = 0; | |
+ | |
+ if (!islinktag) | |
+ return; | |
+ | |
+ if (!strcasecmp(a, "href")) { | |
+ ishrefattr = 1; | |
+ linkhref[0] = '\0'; | |
+ } else if (!strcasecmp(a, "rel")) { | |
+ isrelattr = 1; | |
+ linkrel[0] = '\0'; | |
+ } | |
+} | |
+ | |
+static void | |
+xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, | |
+ const char *v, size_t vl) | |
+{ | |
+ if (islinktag) { | |
+ if (ishrefattr) | |
+ strlcat(linkhref, v, sizeof(linkhref)); | |
+ else if (isrelattr) | |
+ strlcat(linkrel, v, sizeof(linkrel)); | |
+ } | |
+} | |
+ | |
+static void | |
+xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al, | |
+ const char *v, size_t vl) | |
+{ | |
+ char buf[16]; | |
+ int len; | |
+ | |
+ if (!ishrefattr && !isrelattr) | |
+ return; | |
+ | |
+ /* try to translate entity, else just pass as data to | |
+ * xmlattr handler. */ | |
+ if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0) | |
+ xmlattr(p, t, tl, a, al, buf, (size_t)len); | |
+ else | |
+ xmlattr(p, t, tl, a, al, v, vl); | |
+} | |
+ | |
+int | |
+main(void) | |
+{ | |
+#ifdef __OpenBSD__ | |
+ if (pledge("stdio", NULL) == -1) | |
+ err(1, "pledge"); | |
+#endif | |
+ | |
+ parser.xmlattr = xmlattr; | |
+ parser.xmlattrentity = xmlattrentity; | |
+ parser.xmlattrstart = xmlattrstart; | |
+ parser.xmltagstart = xmltagstart; | |
+ parser.xmltagstartparsed = xmltagstartparsed; | |
+ | |
+ /* NOTE: getnext is defined in xml.h for inline optimization */ | |
+ xml_parse(&parser); | |
+ | |
+ if (ferror(stdin)) | |
+ fputs("read error: <stdin>\n", stderr);; | |
+ if (fflush(stdout) || ferror(stdout)) | |
+ fputs("write error: <stdout>\n", stderr); | |
+ | |
+ return 0; | |
+} | |
diff --git a/pubsub_setup b/pubsub_setup | |
@@ -0,0 +1,133 @@ | |
+#!/bin/sh | |
+ | |
+while getopts "c:su" f; do | |
+ case "${f}" in | |
+ s) dosubscribe=1;; | |
+ u) dounsubscribe=1;; | |
+ esac | |
+done | |
+shift $(expr ${OPTIND} - 1) | |
+ | |
+base="https://codemadness.org/pubsub/" | |
+ | |
+# Linux | |
+shacmd="$(command -v sha256sum)" | |
+# BSD | |
+test "${shacmd}" = "" && shacmd=$(command -v sha256) | |
+if test "${shacmd}" = ""; then | |
+ echo "No sha256 or sha256sum tool found" >&2 | |
+ exit 1 | |
+fi | |
+ | |
+# sha() | |
+sha() { | |
+ ${shacmd} | cut -f 1 -d ' ' | |
+} | |
+ | |
+# log(s) | |
+log() { | |
+ echo "$1" >&2 | |
+} | |
+ | |
+# subscribe(feedname, hub, topic, callback, mode, secret) | |
+subscribe() { | |
+ feedname="$1" | |
+ hub="$2" | |
+ topic="$3" | |
+ callback="$4" | |
+ mode="${5:-subscribe}" | |
+ secret="$6" | |
+ verify="async" # or "sync" | |
+ lease_seconds="" | |
+ | |
+# if curl -s -f -H 'User-Agent:' -m 15 \ | |
+ # DEBUG | |
+ if curl -v -f -H 'User-Agent:' -m 15 \ | |
+ -L --max-redirs 3 \ | |
+ --data-raw "hub.callback=${callback}" \ | |
+ --data-raw "hub.lease_seconds=${lease_seconds}" \ | |
+ --data-raw "hub.mode=${mode}" \ | |
+ --data-raw "hub.secret=${secret}" \ | |
+ --data-raw "hub.topic=${topic}" \ | |
+ --data-raw "hub.verify=${verify}" \ | |
+ "${hub}/subscribe"; then | |
+ log "${mode} OK" | |
+ return 0 | |
+ else | |
+ log "${mode} FAIL" | |
+ return 1 | |
+ fi | |
+} | |
+ | |
+feedname="$1" | |
+hub="$2" | |
+topic="$3" | |
+if test "$1" = "" -o "$2" = "" -o "$3" = ""; then | |
+ echo "usage: $0 [-s] [-u] <feedname> <hub> <topic>" >&2 | |
+ exit 1 | |
+fi | |
+ | |
+isnew=1 | |
+test -d "config/${feedname}" && isnew=0 | |
+ | |
+mkdir -p "config/${feedname}" | |
+mkdir -p "feeds/${feedname}" | |
+mkdir -p "tmp/${feedname}" | |
+ | |
+# general log | |
+touch "log" | |
+ | |
+if test "${dosubscribe}" = "1"; then | |
+ f="config/${feedname}/hub" | |
+ if test -f "${f}"; then | |
+ echo "already registered? file exists: ${f}, skipping subscrib… | |
+ exit 1 | |
+ fi | |
+fi | |
+ | |
+# generate random token if it does not exist. | |
+f="config/${feedname}/token" | |
+if ! test -f "${f}" -a "${isnew}" = "1"; then | |
+ token="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)" | |
+ echo "${token}" > "${f}" | |
+fi | |
+ | |
+# generate random secret if it does not exist. | |
+f="config/${feedname}/secret" | |
+if ! test -f "${f}" -a "${isnew}" = "1"; then | |
+ secret="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)" | |
+ echo "${secret}" > "${f}" | |
+fi | |
+ | |
+# read config. | |
+f="config/${feedname}/token" | |
+token=$(cat "${f}" 2>/dev/null) | |
+callback="$1/${token}" | |
+f="config/${feedname}/secret" | |
+secret=$(cat "${f}" 2>/dev/null) | |
+ | |
+callback="${base}${feedname}/${token}" | |
+ | |
+if test "${dosubscribe}" = "1"; then | |
+ f="config/${feedname}/hub" | |
+ if test -f "${f}"; then | |
+ echo "already registered? file exists: ${f}, skipping subscrib… | |
+ exit 1 | |
+ fi | |
+ | |
+ # register at hub. save state when succesfully registered. | |
+ if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "subscrib… | |
+ printf '%s\n' "${callback}" > "config/${feedname}/callback" | |
+ printf '%s\n' "${hub}" > "config/${feedname}/hub" | |
+ printf '%s\n' "${topic}" > "config/${feedname}/topic" | |
+ fi | |
+fi | |
+ | |
+if test "${dounsubscribe}" = "1"; then | |
+ # unregister at hub. remove state when succesfully registered. | |
+ if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "unsubscr… | |
+ rm -f "config/${feedname}/callback" | |
+ rm -f "config/${feedname}/hub" | |
+ rm -f "config/${feedname}/topic" | |
+ fi | |
+fi | |
diff --git a/sha1.c b/sha1.c | |
@@ -0,0 +1,145 @@ | |
+/* Public domain SHA1 implementation based on RFC3174 and libtomcrypt | |
+ Modified to make function prototypes compatible with OpenSSL / LibreSSL. */ | |
+ | |
+#include <stdint.h> | |
+#include <string.h> | |
+ | |
+#include "sha1.h" | |
+ | |
+static uint32_t rol(uint32_t n, int k) { return (n << k) | (n >> (32-k)); } | |
+#define F0(b,c,d) (d ^ (b & (c ^ d))) | |
+#define F1(b,c,d) (b ^ c ^ d) | |
+#define F2(b,c,d) ((b & c) | (d & (b | c))) | |
+#define F3(b,c,d) (b ^ c ^ d) | |
+#define G0(a,b,c,d,e,i) e += rol(a,5)+F0(b,c,d)+W[i]+0x5A827999; b = rol(b,30) | |
+#define G1(a,b,c,d,e,i) e += rol(a,5)+F1(b,c,d)+W[i]+0x6ED9EBA1; b = rol(b,30) | |
+#define G2(a,b,c,d,e,i) e += rol(a,5)+F2(b,c,d)+W[i]+0x8F1BBCDC; b = rol(b,30) | |
+#define G3(a,b,c,d,e,i) e += rol(a,5)+F3(b,c,d)+W[i]+0xCA62C1D6; b = rol(b,30) | |
+ | |
+static void | |
+processblock(SHA_CTX *s, const unsigned char *buf) | |
+{ | |
+ uint32_t W[80], a, b, c, d, e; | |
+ int i; | |
+ | |
+ for (i = 0; i < 16; i++) { | |
+ W[i] = (uint32_t)buf[4*i]<<24; | |
+ W[i] |= (uint32_t)buf[4*i+1]<<16; | |
+ W[i] |= (uint32_t)buf[4*i+2]<<8; | |
+ W[i] |= buf[4*i+3]; | |
+ } | |
+ for (; i < 80; i++) | |
+ W[i] = rol(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1); | |
+ a = s->h[0]; | |
+ b = s->h[1]; | |
+ c = s->h[2]; | |
+ d = s->h[3]; | |
+ e = s->h[4]; | |
+ for (i = 0; i < 20; ) { | |
+ G0(a,b,c,d,e,i++); | |
+ G0(e,a,b,c,d,i++); | |
+ G0(d,e,a,b,c,i++); | |
+ G0(c,d,e,a,b,i++); | |
+ G0(b,c,d,e,a,i++); | |
+ } | |
+ while (i < 40) { | |
+ G1(a,b,c,d,e,i++); | |
+ G1(e,a,b,c,d,i++); | |
+ G1(d,e,a,b,c,i++); | |
+ G1(c,d,e,a,b,i++); | |
+ G1(b,c,d,e,a,i++); | |
+ } | |
+ while (i < 60) { | |
+ G2(a,b,c,d,e,i++); | |
+ G2(e,a,b,c,d,i++); | |
+ G2(d,e,a,b,c,i++); | |
+ G2(c,d,e,a,b,i++); | |
+ G2(b,c,d,e,a,i++); | |
+ } | |
+ while (i < 80) { | |
+ G3(a,b,c,d,e,i++); | |
+ G3(e,a,b,c,d,i++); | |
+ G3(d,e,a,b,c,i++); | |
+ G3(c,d,e,a,b,i++); | |
+ G3(b,c,d,e,a,i++); | |
+ } | |
+ s->h[0] += a; | |
+ s->h[1] += b; | |
+ s->h[2] += c; | |
+ s->h[3] += d; | |
+ s->h[4] += e; | |
+} | |
+ | |
+static void | |
+pad(SHA_CTX *c) | |
+{ | |
+ unsigned r = c->len % 64; | |
+ | |
+ c->buf[r++] = 0x80; | |
+ if (r > 56) { | |
+ memset(c->buf + r, 0, 64 - r); | |
+ r = 0; | |
+ processblock(c, c->buf); | |
+ } | |
+ memset(c->buf + r, 0, 56 - r); | |
+ c->len *= 8; | |
+ c->buf[56] = c->len >> 56; | |
+ c->buf[57] = c->len >> 48; | |
+ c->buf[58] = c->len >> 40; | |
+ c->buf[59] = c->len >> 32; | |
+ c->buf[60] = c->len >> 24; | |
+ c->buf[61] = c->len >> 16; | |
+ c->buf[62] = c->len >> 8; | |
+ c->buf[63] = c->len; | |
+ processblock(c, c->buf); | |
+} | |
+ | |
+int | |
+SHA1_Init(SHA_CTX *c) | |
+{ | |
+ c->len = 0; | |
+ c->h[0] = 0x67452301; | |
+ c->h[1] = 0xEFCDAB89; | |
+ c->h[2] = 0x98BADCFE; | |
+ c->h[3] = 0x10325476; | |
+ c->h[4] = 0xC3D2E1F0; | |
+ return 1; | |
+} | |
+ | |
+int | |
+SHA1_Update(SHA_CTX *c, const void *m, size_t len) | |
+{ | |
+ const uint8_t *p = m; | |
+ unsigned r = c->len % 64; | |
+ | |
+ c->len += len; | |
+ if (r) { | |
+ if (len < 64 - r) { | |
+ memcpy(c->buf + r, p, len); | |
+ return 1; | |
+ } | |
+ memcpy(c->buf + r, p, 64 - r); | |
+ len -= 64 - r; | |
+ p += 64 - r; | |
+ processblock(c, c->buf); | |
+ } | |
+ for (; len >= 64; len -= 64, p += 64) | |
+ processblock(c, p); | |
+ memcpy(c->buf, p, len); | |
+ return 1; | |
+} | |
+ | |
+int | |
+SHA1_Final(unsigned char *md, SHA_CTX *c) | |
+{ | |
+ int i; | |
+ | |
+ pad(c); | |
+ for (i = 0; i < 5; i++) { | |
+ md[4 * i] = c->h[i] >> 24; | |
+ md[4 * i + 1] = c->h[i] >> 16; | |
+ md[4 * i + 2] = c->h[i] >> 8; | |
+ md[4 * i + 3] = c->h[i]; | |
+ } | |
+ return 1; | |
+} | |
diff --git a/sha1.h b/sha1.h | |
@@ -0,0 +1,13 @@ | |
+#include <stdint.h> | |
+ | |
+typedef struct sha1 { | |
+ uint64_t len; /* processed message length */ | |
+ uint32_t h[5]; /* hash state */ | |
+ uint8_t buf[64]; /* message block buffer */ | |
+} SHA_CTX; | |
+ | |
+#define SHA_DIGEST_LENGTH 20 | |
+ | |
+int SHA1_Init(SHA_CTX *); | |
+int SHA1_Update(SHA_CTX *, const void *, size_t); | |
+int SHA1_Final(unsigned char *, SHA_CTX *); | |
diff --git a/strlcat.c b/strlcat.c | |
@@ -0,0 +1,54 @@ | |
+/* $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $ … | |
+ | |
+/* | |
+ * Copyright (c) 1998, 2015 Todd C. Miller <[email protected]> | |
+ * | |
+ * Permission to use, copy, modify, and distribute this software for any | |
+ * purpose with or without fee is hereby granted, provided that the above | |
+ * copyright notice and this permission notice appear in all copies. | |
+ * | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ */ | |
+ | |
+#include <string.h> | |
+ | |
+/* | |
+ * Appends src to string dst of size dsize (unlike strncat, dsize is the | |
+ * full size of dst, not space left). At most dsize-1 characters | |
+ * will be copied. Always NUL terminates (unless dsize <= strlen(dst)). | |
+ * Returns strlen(src) + MIN(dsize, strlen(initial dst)). | |
+ * If retval >= dsize, truncation occurred. | |
+ */ | |
+size_t | |
+strlcat(char *dst, const char *src, size_t dsize) | |
+{ | |
+ const char *odst = dst; | |
+ const char *osrc = src; | |
+ size_t n = dsize; | |
+ size_t dlen; | |
+ | |
+ /* Find the end of dst and adjust bytes left but don't go past end. */ | |
+ while (n-- != 0 && *dst != '\0') | |
+ dst++; | |
+ dlen = dst - odst; | |
+ n = dsize - dlen; | |
+ | |
+ if (n-- == 0) | |
+ return(dlen + strlen(src)); | |
+ while (*src != '\0') { | |
+ if (n != 0) { | |
+ *dst++ = *src; | |
+ n--; | |
+ } | |
+ src++; | |
+ } | |
+ *dst = '\0'; | |
+ | |
+ return(dlen + (src - osrc)); /* count does not include NUL */ | |
+} | |
diff --git a/xml.c b/xml.c | |
@@ -0,0 +1,415 @@ | |
+#include <errno.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+ | |
+#include "xml.h" | |
+ | |
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) | |
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) | |
+ | |
+static void | |
+xml_parseattrs(XMLParser *x) | |
+{ | |
+ size_t namelen = 0, valuelen; | |
+ int c, endsep, endname = 0, valuestart = 0; | |
+ | |
+ while ((c = GETNEXT()) != EOF) { | |
+ if (ISSPACE(c)) { | |
+ if (namelen) | |
+ endname = 1; | |
+ continue; | |
+ } else if (c == '?') | |
+ ; /* ignore */ | |
+ else if (c == '=') { | |
+ x->name[namelen] = '\0'; | |
+ valuestart = 1; | |
+ endname = 1; | |
+ } else if (namelen && ((endname && !valuestart && ISALPHA(c)) … | |
+ /* attribute without value */ | |
+ x->name[namelen] = '\0'; | |
+ if (x->xmlattrstart) | |
+ x->xmlattrstart(x, x->tag, x->taglen, x->name,… | |
+ if (x->xmlattr) | |
+ x->xmlattr(x, x->tag, x->taglen, x->name, name… | |
+ if (x->xmlattrend) | |
+ x->xmlattrend(x, x->tag, x->taglen, x->name, n… | |
+ endname = 0; | |
+ x->name[0] = c; | |
+ namelen = 1; | |
+ } else if (namelen && valuestart) { | |
+ /* attribute with value */ | |
+ if (x->xmlattrstart) | |
+ x->xmlattrstart(x, x->tag, x->taglen, x->name,… | |
+ | |
+ valuelen = 0; | |
+ if (c == '\'' || c == '"') { | |
+ endsep = c; | |
+ } else { | |
+ endsep = ' '; /* ISSPACE() */ | |
+ goto startvalue; | |
+ } | |
+ | |
+ while ((c = GETNEXT()) != EOF) { | |
+startvalue: | |
+ if (c == '&') { /* entities */ | |
+ x->data[valuelen] = '\0'; | |
+ /* call data function with data before… | |
+ if (valuelen && x->xmlattr) | |
+ x->xmlattr(x, x->tag, x->tagle… | |
+ x->data[0] = c; | |
+ valuelen = 1; | |
+ while ((c = GETNEXT()) != EOF) { | |
+ if (c == endsep || (endsep == … | |
+ break; | |
+ if (valuelen < sizeof(x->data)… | |
+ x->data[valuelen++] = … | |
+ else { | |
+ /* entity too long for… | |
+ x->data[valuelen] = '\… | |
+ if (x->xmlattr) | |
+ x->xmlattr(x, … | |
+ x->data[0] = c; | |
+ valuelen = 1; | |
+ break; | |
+ } | |
+ if (c == ';') { | |
+ x->data[valuelen] = '\… | |
+ if (x->xmlattrentity) | |
+ x->xmlattrenti… | |
+ valuelen = 0; | |
+ break; | |
+ } | |
+ } | |
+ } else if (c != endsep && !(endsep == ' ' && (… | |
+ if (valuelen < sizeof(x->data) - 1) { | |
+ x->data[valuelen++] = c; | |
+ } else { | |
+ x->data[valuelen] = '\0'; | |
+ if (x->xmlattr) | |
+ x->xmlattr(x, x->tag, … | |
+ x->data[0] = c; | |
+ valuelen = 1; | |
+ } | |
+ } | |
+ if (c == endsep || (endsep == ' ' && (c == '>'… | |
+ x->data[valuelen] = '\0'; | |
+ if (x->xmlattr) | |
+ x->xmlattr(x, x->tag, x->tagle… | |
+ if (x->xmlattrend) | |
+ x->xmlattrend(x, x->tag, x->ta… | |
+ break; | |
+ } | |
+ } | |
+ namelen = endname = valuestart = 0; | |
+ } else if (namelen < sizeof(x->name) - 1) { | |
+ x->name[namelen++] = c; | |
+ } | |
+ if (c == '>') { | |
+ break; | |
+ } else if (c == '/') { | |
+ x->isshorttag = 1; | |
+ x->name[0] = '\0'; | |
+ namelen = 0; | |
+ } | |
+ } | |
+} | |
+ | |
+static void | |
+xml_parsecomment(XMLParser *x) | |
+{ | |
+ int c, i = 0; | |
+ | |
+ while ((c = GETNEXT()) != EOF) { | |
+ if (c == '-') { | |
+ if (++i > 2) | |
+ i = 2; | |
+ continue; | |
+ } else if (c == '>' && i == 2) { | |
+ return; | |
+ } else if (i) { | |
+ i = 0; | |
+ } | |
+ } | |
+} | |
+ | |
+static void | |
+xml_parsecdata(XMLParser *x) | |
+{ | |
+ size_t datalen = 0, i = 0; | |
+ int c; | |
+ | |
+ while ((c = GETNEXT()) != EOF) { | |
+ if (c == ']' || c == '>') { | |
+ if (x->xmlcdata && datalen) { | |
+ x->data[datalen] = '\0'; | |
+ x->xmlcdata(x, x->data, datalen); | |
+ datalen = 0; | |
+ } | |
+ } | |
+ | |
+ if (c == ']') { | |
+ if (++i > 2) { | |
+ if (x->xmlcdata) | |
+ for (; i > 2; i--) | |
+ x->xmlcdata(x, "]", 1); | |
+ i = 2; | |
+ } | |
+ continue; | |
+ } else if (c == '>' && i == 2) { | |
+ return; | |
+ } else if (i) { | |
+ if (x->xmlcdata) | |
+ for (; i > 0; i--) | |
+ x->xmlcdata(x, "]", 1); | |
+ i = 0; | |
+ } | |
+ | |
+ if (datalen < sizeof(x->data) - 1) { | |
+ x->data[datalen++] = c; | |
+ } else { | |
+ x->data[datalen] = '\0'; | |
+ if (x->xmlcdata) | |
+ x->xmlcdata(x, x->data, datalen); | |
+ x->data[0] = c; | |
+ datalen = 1; | |
+ } | |
+ } | |
+} | |
+ | |
+static int | |
+codepointtoutf8(long r, char *s) | |
+{ | |
+ if (r == 0) { | |
+ return 0; /* NUL byte */ | |
+ } else if (r <= 0x7F) { | |
+ /* 1 byte: 0aaaaaaa */ | |
+ s[0] = r; | |
+ return 1; | |
+ } else if (r <= 0x07FF) { | |
+ /* 2 bytes: 00000aaa aabbbbbb */ | |
+ s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ | |
+ s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ | |
+ return 2; | |
+ } else if (r <= 0xFFFF) { | |
+ /* 3 bytes: aaaabbbb bbcccccc */ | |
+ s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ | |
+ s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ | |
+ s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ | |
+ return 3; | |
+ } else { | |
+ /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ | |
+ s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ | |
+ s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ | |
+ s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ | |
+ s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ | |
+ return 4; | |
+ } | |
+} | |
+ | |
+static int | |
+namedentitytostr(const char *e, char *buf, size_t bufsiz) | |
+{ | |
+ static const struct { | |
+ const char *entity; | |
+ int c; | |
+ } entities[] = { | |
+ { "amp;", '&' }, | |
+ { "lt;", '<' }, | |
+ { "gt;", '>' }, | |
+ { "apos;", '\'' }, | |
+ { "quot;", '"' }, | |
+ }; | |
+ size_t i; | |
+ | |
+ /* buffer is too small */ | |
+ if (bufsiz < 2) | |
+ return -1; | |
+ | |
+ for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) { | |
+ if (!strcmp(e, entities[i].entity)) { | |
+ buf[0] = entities[i].c; | |
+ buf[1] = '\0'; | |
+ return 1; | |
+ } | |
+ } | |
+ return -1; | |
+} | |
+ | |
+static int | |
+numericentitytostr(const char *e, char *buf, size_t bufsiz) | |
+{ | |
+ long l; | |
+ int len; | |
+ char *end; | |
+ | |
+ /* buffer is too small */ | |
+ if (bufsiz < 5) | |
+ return -1; | |
+ | |
+ errno = 0; | |
+ /* hex (16) or decimal (10) */ | |
+ if (*e == 'x') | |
+ l = strtol(++e, &end, 16); | |
+ else | |
+ l = strtol(e, &end, 10); | |
+ /* invalid value or not a well-formed entity or invalid code point */ | |
+ if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff || | |
+ (l >= 0xd800 && l <= 0xdfff)) | |
+ return -1; | |
+ len = codepointtoutf8(l, buf); | |
+ buf[len] = '\0'; | |
+ | |
+ return len; | |
+} | |
+ | |
+/* convert named- or numeric entity string to buffer string | |
+ * returns byte-length of string or -1 on failure. */ | |
+int | |
+xml_entitytostr(const char *e, char *buf, size_t bufsiz) | |
+{ | |
+ /* doesn't start with & */ | |
+ if (e[0] != '&') | |
+ return -1; | |
+ /* numeric entity */ | |
+ if (e[1] == '#') | |
+ return numericentitytostr(e + 2, buf, bufsiz); | |
+ else /* named entity */ | |
+ return namedentitytostr(e + 1, buf, bufsiz); | |
+} | |
+ | |
+void | |
+xml_parse(XMLParser *x) | |
+{ | |
+ size_t datalen, tagdatalen; | |
+ int c, isend; | |
+ | |
+ while ((c = GETNEXT()) != EOF && c != '<') | |
+ ; /* skip until < */ | |
+ | |
+ while (c != EOF) { | |
+ if (c == '<') { /* parse tag */ | |
+ if ((c = GETNEXT()) == EOF) | |
+ return; | |
+ | |
+ if (c == '!') { /* cdata and comments */ | |
+ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) { | |
+ /* NOTE: sizeof(x->data) must be at le… | |
+ if (tagdatalen <= sizeof("[CDATA[") - … | |
+ x->data[tagdatalen++] = c; | |
+ if (c == '>') | |
+ break; | |
+ else if (c == '-' && tagdatalen == siz… | |
+ (x->data[0] == '-')) { | |
+ xml_parsecomment(x); | |
+ break; | |
+ } else if (c == '[') { | |
+ if (tagdatalen == sizeof("[CDA… | |
+ !strncmp(x->data, "[CDATA[… | |
+ xml_parsecdata(x); | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ } else { | |
+ /* normal tag (open, short open, close), proce… | |
+ x->tag[0] = c; | |
+ x->taglen = 1; | |
+ x->isshorttag = isend = 0; | |
+ | |
+ /* treat processing instruction as shorttag, d… | |
+ if (c == '?') { | |
+ x->isshorttag = 1; | |
+ } else if (c == '/') { | |
+ if ((c = GETNEXT()) == EOF) | |
+ return; | |
+ x->tag[0] = c; | |
+ isend = 1; | |
+ } | |
+ | |
+ while ((c = GETNEXT()) != EOF) { | |
+ if (c == '/') | |
+ x->isshorttag = 1; /* short ta… | |
+ else if (c == '>' || ISSPACE(c)) { | |
+ x->tag[x->taglen] = '\0'; | |
+ if (isend) { /* end tag, start… | |
+ if (x->xmltagend) | |
+ x->xmltagend(x… | |
+ x->tag[0] = '\0'; | |
+ x->taglen = 0; | |
+ } else { | |
+ /* start tag */ | |
+ if (x->xmltagstart) | |
+ x->xmltagstart… | |
+ if (ISSPACE(c)) | |
+ xml_parseattrs… | |
+ if (x->xmltagstartpars… | |
+ x->xmltagstart… | |
+ } | |
+ /* call tagend for shortform o… | |
+ if (x->isshorttag) { | |
+ if (x->xmltagend) | |
+ x->xmltagend(x… | |
+ x->tag[0] = '\0'; | |
+ x->taglen = 0; | |
+ } | |
+ break; | |
+ } else if (x->taglen < sizeof(x->tag) … | |
+ x->tag[x->taglen++] = c; /* NO… | |
+ } | |
+ } | |
+ } else { | |
+ /* parse tag data */ | |
+ datalen = 0; | |
+ while ((c = GETNEXT()) != EOF) { | |
+ if (c == '&') { | |
+ if (datalen) { | |
+ x->data[datalen] = '\0'; | |
+ if (x->xmldata) | |
+ x->xmldata(x, x->data,… | |
+ } | |
+ x->data[0] = c; | |
+ datalen = 1; | |
+ while ((c = GETNEXT()) != EOF) { | |
+ if (c == '<') | |
+ break; | |
+ if (datalen < sizeof(x->data) … | |
+ x->data[datalen++] = c; | |
+ else { | |
+ /* entity too long for… | |
+ x->data[datalen] = '\0… | |
+ if (x->xmldata) | |
+ x->xmldata(x, … | |
+ x->data[0] = c; | |
+ datalen = 1; | |
+ break; | |
+ } | |
+ if (c == ';') { | |
+ x->data[datalen] = '\0… | |
+ if (x->xmldataentity) | |
+ x->xmldataenti… | |
+ datalen = 0; | |
+ break; | |
+ } | |
+ } | |
+ } else if (c != '<') { | |
+ if (datalen < sizeof(x->data) - 1) { | |
+ x->data[datalen++] = c; | |
+ } else { | |
+ x->data[datalen] = '\0'; | |
+ if (x->xmldata) | |
+ x->xmldata(x, x->data,… | |
+ x->data[0] = c; | |
+ datalen = 1; | |
+ } | |
+ } | |
+ if (c == '<') { | |
+ x->data[datalen] = '\0'; | |
+ if (x->xmldata && datalen) | |
+ x->xmldata(x, x->data, datalen… | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ } | |
+} | |
diff --git a/xml.h b/xml.h | |
@@ -0,0 +1,43 @@ | |
+#ifndef _XML_H_ | |
+#define _XML_H_ | |
+ | |
+#include <stdio.h> | |
+ | |
+typedef struct xmlparser { | |
+ /* handlers */ | |
+ void (*xmlattr)(struct xmlparser *, const char *, size_t, | |
+ const char *, size_t, const char *, size_t); | |
+ void (*xmlattrend)(struct xmlparser *, const char *, size_t, | |
+ const char *, size_t); | |
+ void (*xmlattrstart)(struct xmlparser *, const char *, size_t, | |
+ const char *, size_t); | |
+ void (*xmlattrentity)(struct xmlparser *, const char *, size_t, | |
+ const char *, size_t, const char *, size_t); | |
+ void (*xmlcdata)(struct xmlparser *, const char *, size_t); | |
+ void (*xmldata)(struct xmlparser *, const char *, size_t); | |
+ void (*xmldataentity)(struct xmlparser *, const char *, size_t); | |
+ void (*xmltagend)(struct xmlparser *, const char *, size_t, int); | |
+ void (*xmltagstart)(struct xmlparser *, const char *, size_t); | |
+ void (*xmltagstartparsed)(struct xmlparser *, const char *, | |
+ size_t, int); | |
+ | |
+#ifndef GETNEXT | |
+ /* GETNEXT overridden to reduce function call overhead and further | |
+ context optimizations. */ | |
+ #define GETNEXT getchar | |
+#endif | |
+ | |
+ /* current tag */ | |
+ char tag[1024]; | |
+ size_t taglen; | |
+ /* current tag is in short form ? <tag /> */ | |
+ int isshorttag; | |
+ /* current attribute name */ | |
+ char name[1024]; | |
+ /* data buffer used for tag data, cdata and attribute data */ | |
+ char data[BUFSIZ]; | |
+} XMLParser; | |
+ | |
+int xml_entitytostr(const char *, char *, size_t); | |
+void xml_parse(XMLParser *); | |
+#endif |