GopherProxy

	add jf2sfeed: convert JSON Feed to sfeed(5) - jfconvert - JSON Feed (subset) to…
	git clone git://git.codemadness.org/jfconvert
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 6dd24b7a0e38fe5d01726178ac4b5cc5f325cd59
	parent 469bc51805a16876507da21e3145e05bb3c57e72
	Author: Hiltjo Posthuma <[email protected]>
	Date: Tue, 4 Apr 2023 18:39:13 +0200

	add jf2sfeed: convert JSON Feed to sfeed(5)

	Diffstat:
	M Makefile \| 16 ++++++++--------
	M README \| 7 +++++--
	A jf2sfeed.1 \| 39 +++++++++++++++++++++++++++++…
	A jf2sfeed.c \| 538 +++++++++++++++++++++++++++++…

	4 files changed, 590 insertions(+), 10 deletions(-)
	---
	diff --git a/Makefile b/Makefile
	@@ -1,6 +1,6 @@
	.POSIX:

	-NAME = jf2atom
	+NAME = jfconvert
	VERSION = 0.1

	# paths
	@@ -11,14 +11,14 @@ DOCPREFIX = ${PREFIX}/share/doc/${NAME}
	RANLIB = ranlib

	# use system flags.
	-JFA_CFLAGS = ${CFLAGS}
	-JFA_LDFLAGS = ${LDFLAGS}
	-JFA_CPPFLAGS = -D_DEFAULT_SOURCE
	+JFCONVERT_CFLAGS = ${CFLAGS}
	+JFCONVERT_LDFLAGS = ${LDFLAGS}
	+JFCONVERT_CPPFLAGS = -D_DEFAULT_SOURCE

	# uncomment for conservative locked I/O.
	-#JFA_CPPFLAGS = -D_DEFAULT_SOURCE -DGETNEXT=getchar
	+#JFCONVERT_CPPFLAGS = -D_DEFAULT_SOURCE -DGETNEXT=getchar

	-BIN = ${NAME}
	+BIN = jf2atom jf2sfeed
	SRC = ${BIN:=.c}
	HDR = json.h
	MAN1 = ${BIN:=.1}
	@@ -41,10 +41,10 @@ OBJ = ${SRC:.c=.o} ${LIBJSONOBJ}
	${OBJ}: ${HDR}

	.o:
	- ${CC} ${JFA_LDFLAGS} -o $@ $< ${LIB}
	+ ${CC} ${JFCONVERT_LDFLAGS} -o $@ $< ${LIB}

	.c.o:
	- ${CC} ${JFA_CFLAGS} ${JFA_CPPFLAGS} -o $@ -c $<
	+ ${CC} ${JFCONVERT_CFLAGS} ${JFCONVERT_CPPFLAGS} -o $@ -c $<

	${LIBJSON}: ${LIBJSONOBJ}
	${AR} -rc $@ $?
	diff --git a/README b/README
	@@ -1,9 +1,12 @@
	-jf2atom
	+jfconvert
	-------

	+JSON Feed (subset) to sfeed(5) converter.
	JSON Feed (subset) to Atom converter.

	JSON Feed specification: https://www.jsonfeed.org/version/1/
	+
	+sfeed: https://codemadness.org/sfeed.html
	Atom specification: https://datatracker.ietf.org/doc/html/rfc4287


	@@ -31,7 +34,7 @@ Optional dependencies
	Examples and documentation
	--------------------------

	-See the man page.
	+See the man pages.


	License
	diff --git a/jf2sfeed.1 b/jf2sfeed.1
	@@ -0,0 +1,39 @@
	+.Dd April 4, 2023
	+.Dt JF2SFEED 1
	+.Os
	+.Sh NAME
	+.Nm jf2sfeed
	+.Nd convert JSON Feed to sfeed
	+.Sh SYNOPSIS
	+.Nm
	+.Sh DESCRIPTION
	+.Nm
	+reads JSON data from stdin.
	+It writes sfeed TSV data to stdout.
	+.Sh EXIT STATUS
	+.Ex -std
	+.Sh EXAMPLES
	+.Bd -literal
	+jf2sfeed < input.json
	+.Ed
	+.Pp
	+An example to support JSON Feed in sfeed:
	+.Bd -literal
	+curl -s 'https://codemadness.org/jsonfeed_content.json' \| jf2sfeed \| sfeed_cur…
	+.Ed
	+.Sh SEE ALSO
	+.Xr awk 1 ,
	+.Xr curl 1 ,
	+.Xr sfeed 1
	+.Sh STANDARDS
	+.Rs
	+.%T The Atom Syndication Format
	+.%R RFC 4287
	+.Re
	+.Rs
	+.%T JSON Feed Version 1.1
	+.%U https://www.jsonfeed.org/version/1.1/
	+.%D Nov, 2022
	+.Re
	+.Sh AUTHORS
	+.An Hiltjo Posthuma Aq Mt [email protected]
	diff --git a/jf2sfeed.c b/jf2sfeed.c
	@@ -0,0 +1,538 @@
	+#include <errno.h>
	+#include <limits.h>
	+#include <stdarg.h>
	+#include <stdint.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+
	+#ifdef __OpenBSD__
	+#include <unistd.h>
	+#else
	+#define pledge(a,b) 0
	+#endif
	+
	+#include "json.h"
	+
	+/* hint for compilers and static analyzers that a function exits */
	+#ifndef __dead
	+#define __dead
	+#endif
	+
	+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
	+#define ISALPHA(c) ((((unsigned)c) \| 32) - 'a' < 26)
	+#define ISCNTRL(c) ((c) < ' ' \|\| (c) == 0x7f)
	+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
	+#define ISSPACE(c) ((c) == ' ' \|\| ((((unsigned)c) - '\t') < 5))
	+
	+/* compare attributes case-sensitively */
	+#define attrcmp strcmp
	+
	+enum {
	+ FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
	+ FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
	+ FeedFieldLast
	+};
	+
	+enum ContentType {
	+ ContentTypeNone = 0,
	+ ContentTypePlain = 1,
	+ ContentTypeHTML = 2
	+};
	+static const char *contenttypes[] = { "", "plain", "html" };
	+
	+/* String data / memory pool */
	+typedef struct string {
	+ char data; / data */
	+ size_t len; /* string length */
	+ size_t bufsiz; /* allocated size */
	+} String;
	+
	+static String fields[FeedFieldLast]; /* data for current item */
	+static enum ContentType contenttype; /* content-type for item */
	+static int itemisopen = 0;
	+
	+static const int FieldSeparator = '\t';
	+/* separator for multiple values in a field, separator should be 1 byte */
	+static const char FieldMultiSeparator[] = "\|";
	+
	+/* print to stderr, print error message of errno and exit().
	+ Unlike BSD err() it does not prefix __progname */
	+__dead void
	+err(int exitstatus, const char *fmt, ...)
	+{
	+ va_list ap;
	+ int saved_errno;
	+
	+ saved_errno = errno;
	+
	+ if (fmt) {
	+ va_start(ap, fmt);
	+ vfprintf(stderr, fmt, ap);
	+ va_end(ap);
	+ fputs(": ", stderr);
	+ }
	+ fprintf(stderr, "%s\n", strerror(saved_errno));
	+
	+ exit(exitstatus);
	+}
	+
	+/* print to stderr and exit().
	+ Unlike BSD errx() it does not prefix __progname */
	+__dead void
	+errx(int exitstatus, const char *fmt, ...)
	+{
	+ va_list ap;
	+
	+ if (fmt) {
	+ va_start(ap, fmt);
	+ vfprintf(stderr, fmt, ap);
	+ va_end(ap);
	+ }
	+ fputs("\n", stderr);
	+
	+ exit(exitstatus);
	+}
	+
	+
	+/* Convert time fields. Returns a UNIX timestamp. */
	+static long long
	+datetounix(long long year, int mon, int day, int hour, int min, int sec)
	+{
	+ static const long secs_through_month[] = {
	+ 0, 31 * 86400, 59 * 86400, 90 * 86400,
	+ 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
	+ 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
	+ int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
	+ long long t;
	+
	+ if (year - 2ULL <= 136) {
	+ leaps = (year - 68) >> 2;
	+ if (!((year - 68) & 3)) {
	+ leaps--;
	+ is_leap = 1;
	+ } else {
	+ is_leap = 0;
	+ }
	+ t = 31536000 * (year - 70) + 86400 * leaps;
	+ } else {
	+ cycles = (year - 100) / 400;
	+ rem = (year - 100) % 400;
	+ if (rem < 0) {
	+ cycles--;
	+ rem += 400;
	+ }
	+ if (!rem) {
	+ is_leap = 1;
	+ } else {
	+ if (rem >= 300)
	+ centuries = 3, rem -= 300;
	+ else if (rem >= 200)
	+ centuries = 2, rem -= 200;
	+ else if (rem >= 100)
	+ centuries = 1, rem -= 100;
	+ if (rem) {
	+ leaps = rem / 4U;
	+ rem %= 4U;
	+ is_leap = !rem;
	+ }
	+ }
	+ leaps += 97 * cycles + 24 * centuries - is_leap;
	+ t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94677120…
	+ }
	+ t += secs_through_month[mon];
	+ if (is_leap && mon >= 2)
	+ t += 86400;
	+ t += 86400LL * (day - 1);
	+ t += 3600LL * hour;
	+ t += 60LL * min;
	+ t += sec;
	+
	+ return t;
	+}
	+
	+/* Get timezone from string, return time offset in seconds from UTC. */
	+static long
	+gettzoffset(const char *s)
	+{
	+ const char *p;
	+ long tzhour = 0, tzmin = 0;
	+ size_t i;
	+
	+ for (; ISSPACE((unsigned char)*s); s++)
	+ ;
	+ switch (*s) {
	+ case '-': /* offset */
	+ case '+':
	+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i+…
	+ tzhour = (tzhour * 10) + (*p - '0');
	+ if (*p == ':')
	+ p++;
	+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
	+ tzmin = (tzmin * 10) + (*p - '0');
	+ return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : …
	+ default: /* timezone name */
	+ break;
	+ }
	+ return 0;
	+}
	+
	+/* Parse time string `s` into the UNIX timestamp `tp`.
	+ Returns 0 on success or -1 on failure. */
	+static int
	+parsetime(const char s, long long tp)
	+{
	+ int va[6] = { 0 }, i, v, vi;
	+
	+ for (; ISSPACE((unsigned char)*s); s++)
	+ ;
	+
	+ if (!ISDIGIT((unsigned char)s[0]) \|\|
	+ !ISDIGIT((unsigned char)s[1]) \|\|
	+ !ISDIGIT((unsigned char)s[2]) \|\|
	+ !ISDIGIT((unsigned char)s[3]))
	+ return -1;
	+
	+ /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" …
	+ vi = 0;
	+
	+ /* parse time parts (and possibly remaining date parts) */
	+ for (; *s && vi < 6; vi++) {
	+ for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
	+ ISDIGIT((unsigned char)*s); s++, i++) {
	+ v = (v * 10) + (*s - '0');
	+ }
	+ va[vi] = v;
	+
	+ if ((vi < 2 && *s == '-') \|\|
	+ (vi == 2 && (s == 'T' \|\| ISSPACE((unsigned char)s))) \|\|
	+ (vi > 2 && *s == ':'))
	+ s++;
	+ }
	+
	+ /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */
	+ if (*s == '.') {
	+ for (s++; ISDIGIT((unsigned char)*s); s++)
	+ ;
	+ }
	+
	+ /* invalid range */
	+ if (va[0] < 0 \|\| va[0] > 9999 \|\|
	+ va[1] < 1 \|\| va[1] > 12 \|\|
	+ va[2] < 1 \|\| va[2] > 31 \|\|
	+ va[3] < 0 \|\| va[3] > 23 \|\|
	+ va[4] < 0 \|\| va[4] > 59 \|\|
	+ va[5] < 0 \|\| va[5] > 60) /* allow leap second */
	+ return -1;
	+
	+ *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
	+ gettzoffset(s);
	+
	+ return 0;
	+}
	+
	+/* Handle read or write errors for a FILE * stream */
	+static void
	+checkfileerror(FILE fp, const char name, int mode)
	+{
	+ if (mode == 'r' && ferror(fp))
	+ errx(1, "read error: %s", name);
	+ else if (mode == 'w' && (fflush(fp) \|\| ferror(fp)))
	+ errx(1, "write error: %s", name);
	+}
	+
	+/* Clear string only; don't free, prevents unnecessary reallocation. */
	+static void
	+string_clear(String *s)
	+{
	+ if (s->data)
	+ s->data[0] = '\0';
	+ s->len = 0;
	+}
	+
	+static void
	+string_buffer_realloc(String *s, size_t newlen)
	+{
	+ size_t alloclen;
	+
	+ if (newlen > SIZE_MAX / 2) {
	+ alloclen = SIZE_MAX;
	+ } else {
	+ for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
	+ ;
	+ }
	+ if (!(s->data = realloc(s->data, alloclen)))
	+ err(1, "realloc");
	+ s->bufsiz = alloclen;
	+}
	+
	+/* Append data to String, s->data and data may not overlap. */
	+static void
	+string_append(String s, const char data, size_t len)
	+{
	+ if (!len)
	+ return;
	+
	+ if (s->len >= SIZE_MAX - len) {
	+ errno = EOVERFLOW;
	+ err(1, "realloc");
	+ }
	+
	+ /* check if allocation is necessary, never shrink the buffer. */
	+ if (s->len + len >= s->bufsiz)
	+ string_buffer_realloc(s, s->len + len + 1);
	+ memcpy(s->data + s->len, data, len);
	+ s->len += len;
	+ s->data[s->len] = '\0';
	+}
	+
	+/* Clear and append string */
	+static void
	+string_set(String s, const char data, size_t len)
	+{
	+ string_clear(s);
	+ string_append(s, data, len);
	+}
	+
	+/* Print text, encode TABs, newlines and '\', remove other whitespace.
	+ * Remove leading and trailing whitespace. */
	+static void
	+string_print_encoded(String *s)
	+{
	+ const char p, e;
	+
	+ if (!s->data \|\| !s->len)
	+ return;
	+
	+ p = s->data;
	+ e = p + s->len;
	+
	+ for (; *p && p != e; p++) {
	+ switch (*p) {
	+ case '\n': putchar('\\'); putchar('n'); break;
	+ case '\\': putchar('\\'); putchar('\\'); break;
	+ case '\t': putchar('\\'); putchar('t'); break;
	+ default:
	+ /* ignore control chars */
	+ if (!ISCNTRL((unsigned char)*p))
	+ putchar(*p);
	+ break;
	+ }
	+ }
	+}
	+
	+/* Print text, replace TABs, carriage return and other whitespace with ' '.
	+ * Other control chars are removed. Remove leading and trailing whitespace. */
	+static void
	+string_print(String *s)
	+{
	+ const char p, e;
	+
	+ if (!s->data \|\| !s->len)
	+ return;
	+
	+ p = s->data;
	+ e = s->data + s->len;
	+ for (; *p && p != e; p++) {
	+ if (ISSPACE((unsigned char)*p))
	+ putchar(' '); /* any whitespace to space */
	+ else if (!ISCNTRL((unsigned char)*p))
	+ /* ignore other control chars */
	+ putchar(*p);
	+ }
	+}
	+
	+/* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
	+static void
	+string_print_timestamp(String *s)
	+{
	+ long long t;
	+
	+ if (!s->data \|\| !s->len)
	+ return;
	+
	+ if (parsetime(s->data, &t) != -1)
	+ printf("%lld", t);
	+}
	+
	+static void
	+printfields(void)
	+{
	+ string_print_timestamp(&fields[FeedFieldTime]);
	+ putchar(FieldSeparator);
	+ string_print(&fields[FeedFieldTitle]);
	+ putchar(FieldSeparator);
	+ string_print(&fields[FeedFieldLink]);
	+ putchar(FieldSeparator);
	+ string_print_encoded(&fields[FeedFieldContent]);
	+ putchar(FieldSeparator);
	+ fputs(contenttypes[contenttype], stdout);
	+ putchar(FieldSeparator);
	+ string_print(&fields[FeedFieldId]);
	+ putchar(FieldSeparator);
	+ string_print(&fields[FeedFieldAuthor]);
	+ putchar(FieldSeparator);
	+ string_print(&fields[FeedFieldEnclosure]);
	+ putchar(FieldSeparator);
	+ string_print(&fields[FeedFieldCategory]);
	+ putchar('\n');
	+
	+ if (ferror(stdout)) /* check for errors but do not flush */
	+ checkfileerror(stdout, "<stdout>", 'w');
	+}
	+
	+static void
	+newitem(void)
	+{
	+ size_t i;
	+
	+ contenttype = ContentTypeNone;
	+ for (i = 0; i < FeedFieldLast; i++)
	+ string_clear(&fields[i]);
	+
	+}
	+
	+static void
	+processnode(struct json_node nodes, size_t depth, const char value, size_t v…
	+{
	+ /* item */
	+ if (depth == 3) {
	+ if (nodes[0].type == JSON_TYPE_OBJECT &&
	+ nodes[1].type == JSON_TYPE_ARRAY &&
	+ nodes[2].type == JSON_TYPE_OBJECT &&
	+ !attrcmp(nodes[1].name, "items")) {
	+ if (itemisopen)
	+ printfields();
	+ newitem();
	+ itemisopen = 1;
	+ }
	+ }
	+
	+ /* item attributes */
	+ if (depth == 4) {
	+ if (nodes[0].type == JSON_TYPE_OBJECT &&
	+ nodes[1].type == JSON_TYPE_ARRAY &&
	+ nodes[2].type == JSON_TYPE_OBJECT &&
	+ !attrcmp(nodes[1].name, "items")) {
	+ if (!attrcmp(nodes[3].name, "content_html")) {
	+ string_set(&fields[FeedFieldContent], value, v…
	+ contenttype = ContentTypeHTML;
	+ } else if (!attrcmp(nodes[3].name, "content_text")) {
	+ /* prefer HTML, if summary text is set overrid…
	+ if (!fields[FeedFieldContent].len && contentty…
	+ string_set(&fields[FeedFieldContent], …
	+ contenttype = ContentTypePlain;
	+ }
	+ } else if (!attrcmp(nodes[3].name, "date_published")) {
	+ /* published has higher priority than updated …
	+ string_set(&fields[FeedFieldTime], value, valu…
	+ } else if (!attrcmp(nodes[3].name, "date_modified")) {
	+ if (!fields[FeedFieldTime].len)
	+ string_append(&fields[FeedFieldTime], …
	+ } else if (!attrcmp(nodes[3].name, "id")) {
	+ if (!fields[FeedFieldId].len)
	+ string_append(&fields[FeedFieldId], va…
	+ } else if (!attrcmp(nodes[3].name, "summary")) {
	+ /* only if content_html or content_text is not…
	+ if (!fields[FeedFieldContent].len) {
	+ string_append(&fields[FeedFieldContent…
	+ contenttype = ContentTypePlain;
	+ }
	+ } else if (!attrcmp(nodes[3].name, "title")) {
	+ if (!fields[FeedFieldTitle].len)
	+ string_set(&fields[FeedFieldTitle], va…
	+ } else if (!attrcmp(nodes[3].name, "url")) {
	+ if (!fields[FeedFieldLink].len)
	+ string_append(&fields[FeedFieldLink], …
	+ }
	+ }
	+ }
	+
	+ if (depth == 5) {
	+ /* 1.0 author name */
	+ if (nodes[0].type == JSON_TYPE_OBJECT &&
	+ nodes[1].type == JSON_TYPE_ARRAY &&
	+ nodes[2].type == JSON_TYPE_OBJECT &&
	+ nodes[3].type == JSON_TYPE_OBJECT &&
	+ nodes[4].type == JSON_TYPE_STRING &&
	+ !attrcmp(nodes[1].name, "items") &&
	+ !attrcmp(nodes[3].name, "author") &&
	+ !attrcmp(nodes[4].name, "name")) {
	+ if (!fields[FeedFieldAuthor].len)
	+ string_append(&fields[FeedFieldAuthor], value,…
	+ }
	+
	+ /* tags / categories */
	+ if (nodes[0].type == JSON_TYPE_OBJECT &&
	+ nodes[1].type == JSON_TYPE_ARRAY &&
	+ nodes[2].type == JSON_TYPE_OBJECT &&
	+ nodes[3].type == JSON_TYPE_ARRAY &&
	+ nodes[4].type == JSON_TYPE_STRING &&
	+ !attrcmp(nodes[1].name, "items") &&
	+ !attrcmp(nodes[3].name, "tags")) {
	+ if (fields[FeedFieldCategory].len)
	+ string_append(&fields[FeedFieldCategory], Fiel…
	+ sizeof(FieldMultiSeparator) - 1);
	+ string_append(&fields[FeedFieldCategory], value, value…
	+ }
	+ }
	+
	+ if (depth == 6) {
	+ /* 1.1 author name */
	+ if (nodes[0].type == JSON_TYPE_OBJECT &&
	+ nodes[1].type == JSON_TYPE_ARRAY &&
	+ nodes[2].type == JSON_TYPE_OBJECT &&
	+ nodes[3].type == JSON_TYPE_ARRAY &&
	+ nodes[4].type == JSON_TYPE_OBJECT &&
	+ nodes[5].type == JSON_TYPE_STRING &&
	+ !attrcmp(nodes[1].name, "items") &&
	+ !attrcmp(nodes[3].name, "authors") &&
	+ !attrcmp(nodes[5].name, "name")) {
	+ if (!fields[FeedFieldAuthor].len)
	+ string_append(&fields[FeedFieldAuthor], value,…
	+ }
	+
	+ /* enclosure attributes */
	+ if (nodes[0].type == JSON_TYPE_OBJECT &&
	+ nodes[1].type == JSON_TYPE_ARRAY &&
	+ nodes[2].type == JSON_TYPE_OBJECT &&
	+ nodes[3].type == JSON_TYPE_ARRAY &&
	+ nodes[4].type == JSON_TYPE_OBJECT &&
	+ (nodes[5].type == JSON_TYPE_STRING \|\| nodes[5].type == JSO…
	+ !attrcmp(nodes[1].name, "items") &&
	+ !attrcmp(nodes[3].name, "attachments") &&
	+ !attrcmp(nodes[5].name, "url")) {
	+ if (!fields[FeedFieldEnclosure].len)
	+ string_append(&fields[FeedFieldEnclosure], val…
	+ }
	+ }
	+
	+ if (ferror(stdout)) {
	+ fprintf(stderr, "write error: <stdout>\n");
	+ exit(2);
	+ }
	+}
	+
	+int
	+main(int argc, char *argv[])
	+{
	+ if (pledge("stdio", NULL) == -1)
	+ err(1, "pledge");
	+
	+ if (itemisopen)
	+ printfields();
	+
	+ switch (parsejson(processnode)) {
	+ case JSON_ERROR_MEM:
	+ errx(2, "error: cannot allocate enough memory");
	+ case JSON_ERROR_INVALID:
	+ errx(1, "error: invalid JSON");
	+ }
	+
	+ if (ferror(stdin))
	+ errx(2, "read error: <stdin>");
	+ if (fflush(stdout) \|\| ferror(stdout))
	+ errx(2, "write error: <stdout>");
	+
	+ return 0;
	+}