Introduction
Introduction Statistics Contact Development Disclaimer Help
add jf2sfeed: convert JSON Feed to sfeed(5) - jfconvert - JSON Feed (subset) to…
git clone git://git.codemadness.org/jfconvert
Log
Files
Refs
README
LICENSE
---
commit 6dd24b7a0e38fe5d01726178ac4b5cc5f325cd59
parent 469bc51805a16876507da21e3145e05bb3c57e72
Author: Hiltjo Posthuma <[email protected]>
Date: Tue, 4 Apr 2023 18:39:13 +0200
add jf2sfeed: convert JSON Feed to sfeed(5)
Diffstat:
M Makefile | 16 ++++++++--------
M README | 7 +++++--
A jf2sfeed.1 | 39 +++++++++++++++++++++++++++++…
A jf2sfeed.c | 538 +++++++++++++++++++++++++++++…
4 files changed, 590 insertions(+), 10 deletions(-)
---
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
.POSIX:
-NAME = jf2atom
+NAME = jfconvert
VERSION = 0.1
# paths
@@ -11,14 +11,14 @@ DOCPREFIX = ${PREFIX}/share/doc/${NAME}
RANLIB = ranlib
# use system flags.
-JFA_CFLAGS = ${CFLAGS}
-JFA_LDFLAGS = ${LDFLAGS}
-JFA_CPPFLAGS = -D_DEFAULT_SOURCE
+JFCONVERT_CFLAGS = ${CFLAGS}
+JFCONVERT_LDFLAGS = ${LDFLAGS}
+JFCONVERT_CPPFLAGS = -D_DEFAULT_SOURCE
# uncomment for conservative locked I/O.
-#JFA_CPPFLAGS = -D_DEFAULT_SOURCE -DGETNEXT=getchar
+#JFCONVERT_CPPFLAGS = -D_DEFAULT_SOURCE -DGETNEXT=getchar
-BIN = ${NAME}
+BIN = jf2atom jf2sfeed
SRC = ${BIN:=.c}
HDR = json.h
MAN1 = ${BIN:=.1}
@@ -41,10 +41,10 @@ OBJ = ${SRC:.c=.o} ${LIBJSONOBJ}
${OBJ}: ${HDR}
.o:
- ${CC} ${JFA_LDFLAGS} -o $@ $< ${LIB}
+ ${CC} ${JFCONVERT_LDFLAGS} -o $@ $< ${LIB}
.c.o:
- ${CC} ${JFA_CFLAGS} ${JFA_CPPFLAGS} -o $@ -c $<
+ ${CC} ${JFCONVERT_CFLAGS} ${JFCONVERT_CPPFLAGS} -o $@ -c $<
${LIBJSON}: ${LIBJSONOBJ}
${AR} -rc $@ $?
diff --git a/README b/README
@@ -1,9 +1,12 @@
-jf2atom
+jfconvert
-------
+JSON Feed (subset) to sfeed(5) converter.
JSON Feed (subset) to Atom converter.
JSON Feed specification: https://www.jsonfeed.org/version/1/
+
+sfeed: https://codemadness.org/sfeed.html
Atom specification: https://datatracker.ietf.org/doc/html/rfc4287
@@ -31,7 +34,7 @@ Optional dependencies
Examples and documentation
--------------------------
-See the man page.
+See the man pages.
License
diff --git a/jf2sfeed.1 b/jf2sfeed.1
@@ -0,0 +1,39 @@
+.Dd April 4, 2023
+.Dt JF2SFEED 1
+.Os
+.Sh NAME
+.Nm jf2sfeed
+.Nd convert JSON Feed to sfeed
+.Sh SYNOPSIS
+.Nm
+.Sh DESCRIPTION
+.Nm
+reads JSON data from stdin.
+It writes sfeed TSV data to stdout.
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+.Bd -literal
+jf2sfeed < input.json
+.Ed
+.Pp
+An example to support JSON Feed in sfeed:
+.Bd -literal
+curl -s 'https://codemadness.org/jsonfeed_content.json' | jf2sfeed | sfeed_cur…
+.Ed
+.Sh SEE ALSO
+.Xr awk 1 ,
+.Xr curl 1 ,
+.Xr sfeed 1
+.Sh STANDARDS
+.Rs
+.%T The Atom Syndication Format
+.%R RFC 4287
+.Re
+.Rs
+.%T JSON Feed Version 1.1
+.%U https://www.jsonfeed.org/version/1.1/
+.%D Nov, 2022
+.Re
+.Sh AUTHORS
+.An Hiltjo Posthuma Aq Mt [email protected]
diff --git a/jf2sfeed.c b/jf2sfeed.c
@@ -0,0 +1,538 @@
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __OpenBSD__
+#include <unistd.h>
+#else
+#define pledge(a,b) 0
+#endif
+
+#include "json.h"
+
+/* hint for compilers and static analyzers that a function exits */
+#ifndef __dead
+#define __dead
+#endif
+
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+
+/* compare attributes case-sensitively */
+#define attrcmp strcmp
+
+enum {
+ FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
+ FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
+ FeedFieldLast
+};
+
+enum ContentType {
+ ContentTypeNone = 0,
+ ContentTypePlain = 1,
+ ContentTypeHTML = 2
+};
+static const char *contenttypes[] = { "", "plain", "html" };
+
+/* String data / memory pool */
+typedef struct string {
+ char *data; /* data */
+ size_t len; /* string length */
+ size_t bufsiz; /* allocated size */
+} String;
+
+static String fields[FeedFieldLast]; /* data for current item */
+static enum ContentType contenttype; /* content-type for item */
+static int itemisopen = 0;
+
+static const int FieldSeparator = '\t';
+/* separator for multiple values in a field, separator should be 1 byte */
+static const char FieldMultiSeparator[] = "|";
+
+/* print to stderr, print error message of errno and exit().
+ Unlike BSD err() it does not prefix __progname */
+__dead void
+err(int exitstatus, const char *fmt, ...)
+{
+ va_list ap;
+ int saved_errno;
+
+ saved_errno = errno;
+
+ if (fmt) {
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputs(": ", stderr);
+ }
+ fprintf(stderr, "%s\n", strerror(saved_errno));
+
+ exit(exitstatus);
+}
+
+/* print to stderr and exit().
+ Unlike BSD errx() it does not prefix __progname */
+__dead void
+errx(int exitstatus, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (fmt) {
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+ fputs("\n", stderr);
+
+ exit(exitstatus);
+}
+
+
+/* Convert time fields. Returns a UNIX timestamp. */
+static long long
+datetounix(long long year, int mon, int day, int hour, int min, int sec)
+{
+ static const long secs_through_month[] = {
+ 0, 31 * 86400, 59 * 86400, 90 * 86400,
+ 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
+ 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
+ int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
+ long long t;
+
+ if (year - 2ULL <= 136) {
+ leaps = (year - 68) >> 2;
+ if (!((year - 68) & 3)) {
+ leaps--;
+ is_leap = 1;
+ } else {
+ is_leap = 0;
+ }
+ t = 31536000 * (year - 70) + 86400 * leaps;
+ } else {
+ cycles = (year - 100) / 400;
+ rem = (year - 100) % 400;
+ if (rem < 0) {
+ cycles--;
+ rem += 400;
+ }
+ if (!rem) {
+ is_leap = 1;
+ } else {
+ if (rem >= 300)
+ centuries = 3, rem -= 300;
+ else if (rem >= 200)
+ centuries = 2, rem -= 200;
+ else if (rem >= 100)
+ centuries = 1, rem -= 100;
+ if (rem) {
+ leaps = rem / 4U;
+ rem %= 4U;
+ is_leap = !rem;
+ }
+ }
+ leaps += 97 * cycles + 24 * centuries - is_leap;
+ t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94677120…
+ }
+ t += secs_through_month[mon];
+ if (is_leap && mon >= 2)
+ t += 86400;
+ t += 86400LL * (day - 1);
+ t += 3600LL * hour;
+ t += 60LL * min;
+ t += sec;
+
+ return t;
+}
+
+/* Get timezone from string, return time offset in seconds from UTC. */
+static long
+gettzoffset(const char *s)
+{
+ const char *p;
+ long tzhour = 0, tzmin = 0;
+ size_t i;
+
+ for (; ISSPACE((unsigned char)*s); s++)
+ ;
+ switch (*s) {
+ case '-': /* offset */
+ case '+':
+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i+…
+ tzhour = (tzhour * 10) + (*p - '0');
+ if (*p == ':')
+ p++;
+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
+ tzmin = (tzmin * 10) + (*p - '0');
+ return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : …
+ default: /* timezone name */
+ break;
+ }
+ return 0;
+}
+
+/* Parse time string `s` into the UNIX timestamp `tp`.
+ Returns 0 on success or -1 on failure. */
+static int
+parsetime(const char *s, long long *tp)
+{
+ int va[6] = { 0 }, i, v, vi;
+
+ for (; ISSPACE((unsigned char)*s); s++)
+ ;
+
+ if (!ISDIGIT((unsigned char)s[0]) ||
+ !ISDIGIT((unsigned char)s[1]) ||
+ !ISDIGIT((unsigned char)s[2]) ||
+ !ISDIGIT((unsigned char)s[3]))
+ return -1;
+
+ /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" …
+ vi = 0;
+
+ /* parse time parts (and possibly remaining date parts) */
+ for (; *s && vi < 6; vi++) {
+ for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
+ ISDIGIT((unsigned char)*s); s++, i++) {
+ v = (v * 10) + (*s - '0');
+ }
+ va[vi] = v;
+
+ if ((vi < 2 && *s == '-') ||
+ (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
+ (vi > 2 && *s == ':'))
+ s++;
+ }
+
+ /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */
+ if (*s == '.') {
+ for (s++; ISDIGIT((unsigned char)*s); s++)
+ ;
+ }
+
+ /* invalid range */
+ if (va[0] < 0 || va[0] > 9999 ||
+ va[1] < 1 || va[1] > 12 ||
+ va[2] < 1 || va[2] > 31 ||
+ va[3] < 0 || va[3] > 23 ||
+ va[4] < 0 || va[4] > 59 ||
+ va[5] < 0 || va[5] > 60) /* allow leap second */
+ return -1;
+
+ *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
+ gettzoffset(s);
+
+ return 0;
+}
+
+/* Handle read or write errors for a FILE * stream */
+static void
+checkfileerror(FILE *fp, const char *name, int mode)
+{
+ if (mode == 'r' && ferror(fp))
+ errx(1, "read error: %s", name);
+ else if (mode == 'w' && (fflush(fp) || ferror(fp)))
+ errx(1, "write error: %s", name);
+}
+
+/* Clear string only; don't free, prevents unnecessary reallocation. */
+static void
+string_clear(String *s)
+{
+ if (s->data)
+ s->data[0] = '\0';
+ s->len = 0;
+}
+
+static void
+string_buffer_realloc(String *s, size_t newlen)
+{
+ size_t alloclen;
+
+ if (newlen > SIZE_MAX / 2) {
+ alloclen = SIZE_MAX;
+ } else {
+ for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
+ ;
+ }
+ if (!(s->data = realloc(s->data, alloclen)))
+ err(1, "realloc");
+ s->bufsiz = alloclen;
+}
+
+/* Append data to String, s->data and data may not overlap. */
+static void
+string_append(String *s, const char *data, size_t len)
+{
+ if (!len)
+ return;
+
+ if (s->len >= SIZE_MAX - len) {
+ errno = EOVERFLOW;
+ err(1, "realloc");
+ }
+
+ /* check if allocation is necessary, never shrink the buffer. */
+ if (s->len + len >= s->bufsiz)
+ string_buffer_realloc(s, s->len + len + 1);
+ memcpy(s->data + s->len, data, len);
+ s->len += len;
+ s->data[s->len] = '\0';
+}
+
+/* Clear and append string */
+static void
+string_set(String *s, const char *data, size_t len)
+{
+ string_clear(s);
+ string_append(s, data, len);
+}
+
+/* Print text, encode TABs, newlines and '\', remove other whitespace.
+ * Remove leading and trailing whitespace. */
+static void
+string_print_encoded(String *s)
+{
+ const char *p, *e;
+
+ if (!s->data || !s->len)
+ return;
+
+ p = s->data;
+ e = p + s->len;
+
+ for (; *p && p != e; p++) {
+ switch (*p) {
+ case '\n': putchar('\\'); putchar('n'); break;
+ case '\\': putchar('\\'); putchar('\\'); break;
+ case '\t': putchar('\\'); putchar('t'); break;
+ default:
+ /* ignore control chars */
+ if (!ISCNTRL((unsigned char)*p))
+ putchar(*p);
+ break;
+ }
+ }
+}
+
+/* Print text, replace TABs, carriage return and other whitespace with ' '.
+ * Other control chars are removed. Remove leading and trailing whitespace. */
+static void
+string_print(String *s)
+{
+ const char *p, *e;
+
+ if (!s->data || !s->len)
+ return;
+
+ p = s->data;
+ e = s->data + s->len;
+ for (; *p && p != e; p++) {
+ if (ISSPACE((unsigned char)*p))
+ putchar(' '); /* any whitespace to space */
+ else if (!ISCNTRL((unsigned char)*p))
+ /* ignore other control chars */
+ putchar(*p);
+ }
+}
+
+/* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
+static void
+string_print_timestamp(String *s)
+{
+ long long t;
+
+ if (!s->data || !s->len)
+ return;
+
+ if (parsetime(s->data, &t) != -1)
+ printf("%lld", t);
+}
+
+static void
+printfields(void)
+{
+ string_print_timestamp(&fields[FeedFieldTime]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldTitle]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldLink]);
+ putchar(FieldSeparator);
+ string_print_encoded(&fields[FeedFieldContent]);
+ putchar(FieldSeparator);
+ fputs(contenttypes[contenttype], stdout);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldId]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldAuthor]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldEnclosure]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldCategory]);
+ putchar('\n');
+
+ if (ferror(stdout)) /* check for errors but do not flush */
+ checkfileerror(stdout, "<stdout>", 'w');
+}
+
+static void
+newitem(void)
+{
+ size_t i;
+
+ contenttype = ContentTypeNone;
+ for (i = 0; i < FeedFieldLast; i++)
+ string_clear(&fields[i]);
+
+}
+
+static void
+processnode(struct json_node *nodes, size_t depth, const char *value, size_t v…
+{
+ /* item */
+ if (depth == 3) {
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ !attrcmp(nodes[1].name, "items")) {
+ if (itemisopen)
+ printfields();
+ newitem();
+ itemisopen = 1;
+ }
+ }
+
+ /* item attributes */
+ if (depth == 4) {
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ !attrcmp(nodes[1].name, "items")) {
+ if (!attrcmp(nodes[3].name, "content_html")) {
+ string_set(&fields[FeedFieldContent], value, v…
+ contenttype = ContentTypeHTML;
+ } else if (!attrcmp(nodes[3].name, "content_text")) {
+ /* prefer HTML, if summary text is set overrid…
+ if (!fields[FeedFieldContent].len && contentty…
+ string_set(&fields[FeedFieldContent], …
+ contenttype = ContentTypePlain;
+ }
+ } else if (!attrcmp(nodes[3].name, "date_published")) {
+ /* published has higher priority than updated …
+ string_set(&fields[FeedFieldTime], value, valu…
+ } else if (!attrcmp(nodes[3].name, "date_modified")) {
+ if (!fields[FeedFieldTime].len)
+ string_append(&fields[FeedFieldTime], …
+ } else if (!attrcmp(nodes[3].name, "id")) {
+ if (!fields[FeedFieldId].len)
+ string_append(&fields[FeedFieldId], va…
+ } else if (!attrcmp(nodes[3].name, "summary")) {
+ /* only if content_html or content_text is not…
+ if (!fields[FeedFieldContent].len) {
+ string_append(&fields[FeedFieldContent…
+ contenttype = ContentTypePlain;
+ }
+ } else if (!attrcmp(nodes[3].name, "title")) {
+ if (!fields[FeedFieldTitle].len)
+ string_set(&fields[FeedFieldTitle], va…
+ } else if (!attrcmp(nodes[3].name, "url")) {
+ if (!fields[FeedFieldLink].len)
+ string_append(&fields[FeedFieldLink], …
+ }
+ }
+ }
+
+ if (depth == 5) {
+ /* 1.0 author name */
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_OBJECT &&
+ nodes[4].type == JSON_TYPE_STRING &&
+ !attrcmp(nodes[1].name, "items") &&
+ !attrcmp(nodes[3].name, "author") &&
+ !attrcmp(nodes[4].name, "name")) {
+ if (!fields[FeedFieldAuthor].len)
+ string_append(&fields[FeedFieldAuthor], value,…
+ }
+
+ /* tags / categories */
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_ARRAY &&
+ nodes[4].type == JSON_TYPE_STRING &&
+ !attrcmp(nodes[1].name, "items") &&
+ !attrcmp(nodes[3].name, "tags")) {
+ if (fields[FeedFieldCategory].len)
+ string_append(&fields[FeedFieldCategory], Fiel…
+ sizeof(FieldMultiSeparator) - 1);
+ string_append(&fields[FeedFieldCategory], value, value…
+ }
+ }
+
+ if (depth == 6) {
+ /* 1.1 author name */
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_ARRAY &&
+ nodes[4].type == JSON_TYPE_OBJECT &&
+ nodes[5].type == JSON_TYPE_STRING &&
+ !attrcmp(nodes[1].name, "items") &&
+ !attrcmp(nodes[3].name, "authors") &&
+ !attrcmp(nodes[5].name, "name")) {
+ if (!fields[FeedFieldAuthor].len)
+ string_append(&fields[FeedFieldAuthor], value,…
+ }
+
+ /* enclosure attributes */
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_ARRAY &&
+ nodes[4].type == JSON_TYPE_OBJECT &&
+ (nodes[5].type == JSON_TYPE_STRING || nodes[5].type == JSO…
+ !attrcmp(nodes[1].name, "items") &&
+ !attrcmp(nodes[3].name, "attachments") &&
+ !attrcmp(nodes[5].name, "url")) {
+ if (!fields[FeedFieldEnclosure].len)
+ string_append(&fields[FeedFieldEnclosure], val…
+ }
+ }
+
+ if (ferror(stdout)) {
+ fprintf(stderr, "write error: <stdout>\n");
+ exit(2);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
+ if (itemisopen)
+ printfields();
+
+ switch (parsejson(processnode)) {
+ case JSON_ERROR_MEM:
+ errx(2, "error: cannot allocate enough memory");
+ case JSON_ERROR_INVALID:
+ errx(1, "error: invalid JSON");
+ }
+
+ if (ferror(stdin))
+ errx(2, "read error: <stdin>");
+ if (fflush(stdout) || ferror(stdout))
+ errx(2, "write error: <stdout>");
+
+ return 0;
+}
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.