GopherProxy

	jf2sfeed.c - jfconvert - JSON Feed (subset) to sfeed or Atom converter
	git clone git://git.codemadness.org/jfconvert
	Log
	Files
	Refs
	README
	LICENSE
	---
	jf2sfeed.c (14382B)
	---
	1 #include <errno.h>
	2 #include <stdarg.h>
	3 #include <stdint.h>
	4 #include <stdio.h>
	5 #include <stdlib.h>
	6 #include <string.h>
	7
	8 #ifdef __OpenBSD__
	9 #include <unistd.h>
	10 #else
	11 #define pledge(a,b) 0
	12 #endif
	13
	14 #include "json.h"
	15
	16 /* hint for compilers and static analyzers that a function exits */
	17 #ifndef __dead
	18 #define __dead
	19 #endif
	20
	21 /* ctype-like macros, but always compatible with ASCII / UTF-8 */
	22 #define ISALPHA(c) ((((unsigned)c) \| 32) - 'a' < 26)
	23 #define ISCNTRL(c) ((c) < ' ' \|\| (c) == 0x7f)
	24 #define ISDIGIT(c) (((unsigned)c) - '0' < 10)
	25 #define ISSPACE(c) ((c) == ' ' \|\| ((((unsigned)c) - '\t') < 5))
	26
	27 /* compare attributes case-sensitively */
	28 #define attrcmp strcmp
	29
	30 enum {
	31 FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldConte…
	32 FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCateg…
	33 FeedFieldLast
	34 };
	35
	36 enum ContentType {
	37 ContentTypeNone = 0,
	38 ContentTypePlain = 1,
	39 ContentTypeHTML = 2
	40 };
	41 static const char *contenttypes[] = { "", "plain", "html" };
	42
	43 /* String data / memory pool */
	44 typedef struct string {
	45 char data; / data */
	46 size_t len; /* string length */
	47 size_t bufsiz; /* allocated size */
	48 } String;
	49
	50 static String fields[FeedFieldLast]; /* data for current item */
	51 static enum ContentType contenttype; /* content-type for item */
	52 static int itemisopen = 0;
	53
	54 static const int FieldSeparator = '\t';
	55 /* separator for multiple values in a field, separator should be 1 byte …
	56 static const char FieldMultiSeparator[] = "\|";
	57
	58 /* print to stderr, print error message of errno and exit().
	59 Unlike BSD err() it does not prefix __progname */
	60 __dead void
	61 err(int exitstatus, const char *fmt, ...)
	62 {
	63 va_list ap;
	64 int saved_errno;
	65
	66 saved_errno = errno;
	67
	68 if (fmt) {
	69 va_start(ap, fmt);
	70 vfprintf(stderr, fmt, ap);
	71 va_end(ap);
	72 fputs(": ", stderr);
	73 }
	74 fprintf(stderr, "%s\n", strerror(saved_errno));
	75
	76 exit(exitstatus);
	77 }
	78
	79 /* print to stderr and exit().
	80 Unlike BSD errx() it does not prefix __progname */
	81 __dead void
	82 errx(int exitstatus, const char *fmt, ...)
	83 {
	84 va_list ap;
	85
	86 if (fmt) {
	87 va_start(ap, fmt);
	88 vfprintf(stderr, fmt, ap);
	89 va_end(ap);
	90 }
	91 fputs("\n", stderr);
	92
	93 exit(exitstatus);
	94 }
	95
	96 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestam…
	97 Parameters should be passed as they are in a struct tm:
	98 that is: year = year - 1900, month = month - 1. */
	99 static long long
	100 datetounix(long long year, int mon, int day, int hour, int min, int sec)
	101 {
	102 /* seconds in a month in a regular (non-leap) year */
	103 static const long secs_through_month[] = {
	104 0, 31 * 86400, 59 * 86400, 90 * 86400,
	105 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
	106 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
	107 int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
	108 long long t;
	109
	110 /* optimization: handle common range year 1902 up to and includi…
	111 if (year - 2ULL <= 136) {
	112 /* amount of leap days relative to 1970: every 4 years */
	113 leaps = (year - 68) >> 2;
	114 if (!((year - 68) & 3)) {
	115 leaps--;
	116 is_leap = 1;
	117 } else {
	118 is_leap = 0;
	119 }
	120 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 8…
	121 } else {
	122 /* general leap year calculation:
	123 leap years occur mostly every 4 years but every 100 y…
	124 a leap year is skipped unless the year is divisible b…
	125 cycles = (year - 100) / 400;
	126 rem = (year - 100) % 400;
	127 if (rem < 0) {
	128 cycles--;
	129 rem += 400;
	130 }
	131 if (!rem) {
	132 is_leap = 1;
	133 } else {
	134 if (rem >= 300) {
	135 centuries = 3;
	136 rem -= 300;
	137 } else if (rem >= 200) {
	138 centuries = 2;
	139 rem -= 200;
	140 } else if (rem >= 100) {
	141 centuries = 1;
	142 rem -= 100;
	143 }
	144 if (rem) {
	145 leaps = rem / 4U;
	146 rem %= 4U;
	147 is_leap = !rem;
	148 }
	149 }
	150 leaps += (97 * cycles) + (24 * centuries) - is_leap;
	151
	152 /* adjust 8 leap days from 1970 up to and including 2000:
	153 ((30 * 365) + 8) * 86400 = 946771200 */
	154 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94…
	155 }
	156 t += secs_through_month[mon];
	157 if (is_leap && mon >= 2)
	158 t += 86400;
	159 t += 86400LL * (day - 1);
	160 t += 3600LL * hour;
	161 t += 60LL * min;
	162 t += sec;
	163
	164 return t;
	165 }
	166
	167 /* Get timezone from string, return time offset in seconds from UTC. */
	168 static long
	169 gettzoffset(const char *s)
	170 {
	171 const char *p;
	172 long tzhour = 0, tzmin = 0;
	173 size_t i;
	174
	175 for (; ISSPACE((unsigned char)*s); s++)
	176 ;
	177 switch (*s) {
	178 case '-': /* offset */
	179 case '+':
	180 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*…
	181 tzhour = (tzhour * 10) + (*p - '0');
	182 if (*p == ':')
	183 p++;
	184 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p+…
	185 tzmin = (tzmin * 10) + (*p - '0');
	186 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ?…
	187 default: /* timezone name */
	188 break;
	189 }
	190 return 0;
	191 }
	192
	193 /* Parse time string `s` into the UNIX timestamp `tp`.
	194 Returns 0 on success or -1 on failure. */
	195 static int
	196 parsetime(const char s, long long tp)
	197 {
	198 int va[6] = { 0 }, i, v, vi;
	199
	200 for (; ISSPACE((unsigned char)*s); s++)
	201 ;
	202
	203 if (!ISDIGIT((unsigned char)s[0]) \|\|
	204 !ISDIGIT((unsigned char)s[1]) \|\|
	205 !ISDIGIT((unsigned char)s[2]) \|\|
	206 !ISDIGIT((unsigned char)s[3]))
	207 return -1;
	208
	209 /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H…
	210 vi = 0;
	211
	212 /* parse time parts (and possibly remaining date parts) */
	213 for (; *s && vi < 6; vi++) {
	214 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
	215 ISDIGIT((unsigned char)*s); s++, i++)…
	216 v = (v * 10) + (*s - '0');
	217 }
	218 va[vi] = v;
	219
	220 if ((vi < 2 && *s == '-') \|\|
	221 (vi == 2 && (s == 'T' \|\| ISSPACE((unsigned char)s)…
	222 (vi > 2 && *s == ':'))
	223 s++;
	224 }
	225
	226 /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */
	227 if (*s == '.') {
	228 for (s++; ISDIGIT((unsigned char)*s); s++)
	229 ;
	230 }
	231
	232 /* invalid range */
	233 if (va[0] < 0 \|\| va[0] > 9999 \|\|
	234 va[1] < 1 \|\| va[1] > 12 \|\|
	235 va[2] < 1 \|\| va[2] > 31 \|\|
	236 va[3] < 0 \|\| va[3] > 23 \|\|
	237 va[4] < 0 \|\| va[4] > 59 \|\|
	238 va[5] < 0 \|\| va[5] > 60) /* allow leap second */
	239 return -1;
	240
	241 *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], v…
	242 gettzoffset(s);
	243
	244 return 0;
	245 }
	246
	247 /* Handle read or write errors for a FILE * stream */
	248 static void
	249 checkfileerror(FILE fp, const char name, int mode)
	250 {
	251 if (mode == 'r' && ferror(fp))
	252 errx(1, "read error: %s", name);
	253 else if (mode == 'w' && (fflush(fp) \|\| ferror(fp)))
	254 errx(1, "write error: %s", name);
	255 }
	256
	257 /* Clear string only; don't free, prevents unnecessary reallocation. */
	258 static void
	259 string_clear(String *s)
	260 {
	261 if (s->data)
	262 s->data[0] = '\0';
	263 s->len = 0;
	264 }
	265
	266 static void
	267 string_buffer_realloc(String *s, size_t newlen)
	268 {
	269 size_t alloclen;
	270
	271 if (newlen > SIZE_MAX / 2) {
	272 alloclen = SIZE_MAX;
	273 } else {
	274 for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
	275 ;
	276 }
	277 if (!(s->data = realloc(s->data, alloclen)))
	278 err(1, "realloc");
	279 s->bufsiz = alloclen;
	280 }
	281
	282 /* Append data to String, s->data and data may not overlap. */
	283 static void
	284 string_append(String s, const char data, size_t len)
	285 {
	286 if (!len)
	287 return;
	288
	289 if (s->len >= SIZE_MAX - len) {
	290 errno = ENOMEM;
	291 err(1, "realloc");
	292 }
	293
	294 /* check if allocation is necessary, never shrink the buffer. */
	295 if (s->len + len >= s->bufsiz)
	296 string_buffer_realloc(s, s->len + len + 1);
	297 memcpy(s->data + s->len, data, len);
	298 s->len += len;
	299 s->data[s->len] = '\0';
	300 }
	301
	302 /* Clear and append string */
	303 static void
	304 string_set(String s, const char data, size_t len)
	305 {
	306 string_clear(s);
	307 string_append(s, data, len);
	308 }
	309
	310 /* Print text, encode TABs, newlines and '\', remove other whitespace.
	311 * Remove leading and trailing whitespace. */
	312 static void
	313 string_print_encoded(String *s)
	314 {
	315 const char p, e;
	316
	317 if (!s->data \|\| !s->len)
	318 return;
	319
	320 p = s->data;
	321 e = p + s->len;
	322
	323 for (; *p && p != e; p++) {
	324 switch (*p) {
	325 case '\n': putchar('\\'); putchar('n'); break;
	326 case '\\': putchar('\\'); putchar('\\'); break;
	327 case '\t': putchar('\\'); putchar('t'); break;
	328 default:
	329 /* ignore control chars */
	330 if (!ISCNTRL((unsigned char)*p))
	331 putchar(*p);
	332 break;
	333 }
	334 }
	335 }
	336
	337 /* Print text, replace TABs, carriage return and other whitespace with '…
	338 * Other control chars are removed. Remove leading and trailing whitespa…
	339 static void
	340 string_print(String *s)
	341 {
	342 const char p, e;
	343
	344 if (!s->data \|\| !s->len)
	345 return;
	346
	347 p = s->data;
	348 e = s->data + s->len;
	349 for (; *p && p != e; p++) {
	350 if (ISSPACE((unsigned char)*p))
	351 putchar(' '); /* any whitespace to space */
	352 else if (!ISCNTRL((unsigned char)*p))
	353 /* ignore other control chars */
	354 putchar(*p);
	355 }
	356 }
	357
	358 /* Print as UNIX timestamp, print nothing if the time is empty or invali…
	359 static void
	360 string_print_timestamp(String *s)
	361 {
	362 long long t;
	363
	364 if (!s->data \|\| !s->len)
	365 return;
	366
	367 if (parsetime(s->data, &t) != -1)
	368 printf("%lld", t);
	369 }
	370
	371 static void
	372 printfields(void)
	373 {
	374 string_print_timestamp(&fields[FeedFieldTime]);
	375 putchar(FieldSeparator);
	376 string_print(&fields[FeedFieldTitle]);
	377 putchar(FieldSeparator);
	378 string_print(&fields[FeedFieldLink]);
	379 putchar(FieldSeparator);
	380 string_print_encoded(&fields[FeedFieldContent]);
	381 putchar(FieldSeparator);
	382 fputs(contenttypes[contenttype], stdout);
	383 putchar(FieldSeparator);
	384 string_print(&fields[FeedFieldId]);
	385 putchar(FieldSeparator);
	386 string_print(&fields[FeedFieldAuthor]);
	387 putchar(FieldSeparator);
	388 string_print(&fields[FeedFieldEnclosure]);
	389 putchar(FieldSeparator);
	390 string_print(&fields[FeedFieldCategory]);
	391 putchar('\n');
	392
	393 if (ferror(stdout)) /* check for errors but do not flush */
	394 checkfileerror(stdout, "<stdout>", 'w');
	395 }
	396
	397 static void
	398 newitem(void)
	399 {
	400 size_t i;
	401
	402 contenttype = ContentTypeNone;
	403 for (i = 0; i < FeedFieldLast; i++)
	404 string_clear(&fields[i]);
	405
	406 }
	407
	408 static void
	409 processnode(struct json_node nodes, size_t depth, const char value, si…
	410 {
	411 /* item */
	412 if (depth == 3) {
	413 if (nodes[0].type == JSON_TYPE_OBJECT &&
	414 nodes[1].type == JSON_TYPE_ARRAY &&
	415 nodes[2].type == JSON_TYPE_OBJECT &&
	416 !attrcmp(nodes[1].name, "items")) {
	417 if (itemisopen)
	418 printfields();
	419 newitem();
	420 itemisopen = 1;
	421 }
	422 }
	423
	424 /* item attributes */
	425 if (depth == 4) {
	426 if (nodes[0].type == JSON_TYPE_OBJECT &&
	427 nodes[1].type == JSON_TYPE_ARRAY &&
	428 nodes[2].type == JSON_TYPE_OBJECT &&
	429 !attrcmp(nodes[1].name, "items")) {
	430 if (!attrcmp(nodes[3].name, "content_html")) {
	431 string_set(&fields[FeedFieldContent], va…
	432 contenttype = ContentTypeHTML;
	433 } else if (!attrcmp(nodes[3].name, "content_text…
	434 /* prefer HTML, if summary text is set o…
	435 if (!fields[FeedFieldContent].len && con…
	436 string_set(&fields[FeedFieldCont…
	437 contenttype = ContentTypePlain;
	438 }
	439 } else if (!attrcmp(nodes[3].name, "date_publish…
	440 /* published has higher priority than up…
	441 string_set(&fields[FeedFieldTime], value…
	442 } else if (!attrcmp(nodes[3].name, "date_modifie…
	443 if (!fields[FeedFieldTime].len)
	444 string_append(&fields[FeedFieldT…
	445 } else if (!attrcmp(nodes[3].name, "id")) {
	446 if (!fields[FeedFieldId].len)
	447 string_append(&fields[FeedFieldI…
	448 } else if (!attrcmp(nodes[3].name, "summary")) {
	449 /* only if content_html or content_text …
	450 if (!fields[FeedFieldContent].len) {
	451 string_append(&fields[FeedFieldC…
	452 contenttype = ContentTypePlain;
	453 }
	454 } else if (!attrcmp(nodes[3].name, "title")) {
	455 if (!fields[FeedFieldTitle].len)
	456 string_set(&fields[FeedFieldTitl…
	457 } else if (!attrcmp(nodes[3].name, "url")) {
	458 if (!fields[FeedFieldLink].len)
	459 string_append(&fields[FeedFieldL…
	460 }
	461 }
	462 }
	463
	464 if (depth == 5) {
	465 /* 1.0 author name */
	466 if (nodes[0].type == JSON_TYPE_OBJECT &&
	467 nodes[1].type == JSON_TYPE_ARRAY &&
	468 nodes[2].type == JSON_TYPE_OBJECT &&
	469 nodes[3].type == JSON_TYPE_OBJECT &&
	470 nodes[4].type == JSON_TYPE_STRING &&
	471 !attrcmp(nodes[1].name, "items") &&
	472 !attrcmp(nodes[3].name, "author") &&
	473 !attrcmp(nodes[4].name, "name")) {
	474 if (!fields[FeedFieldAuthor].len)
	475 string_append(&fields[FeedFieldAuthor], …
	476 }
	477
	478 /* tags / categories */
	479 if (nodes[0].type == JSON_TYPE_OBJECT &&
	480 nodes[1].type == JSON_TYPE_ARRAY &&
	481 nodes[2].type == JSON_TYPE_OBJECT &&
	482 nodes[3].type == JSON_TYPE_ARRAY &&
	483 nodes[4].type == JSON_TYPE_STRING &&
	484 !attrcmp(nodes[1].name, "items") &&
	485 !attrcmp(nodes[3].name, "tags")) {
	486 if (fields[FeedFieldCategory].len)
	487 string_append(&fields[FeedFieldCategory]…
	488 sizeof(FieldMultiSeparator…
	489 string_append(&fields[FeedFieldCategory], value,…
	490 }
	491 }
	492
	493 if (depth == 6) {
	494 /* 1.1 author name */
	495 if (nodes[0].type == JSON_TYPE_OBJECT &&
	496 nodes[1].type == JSON_TYPE_ARRAY &&
	497 nodes[2].type == JSON_TYPE_OBJECT &&
	498 nodes[3].type == JSON_TYPE_ARRAY &&
	499 nodes[4].type == JSON_TYPE_OBJECT &&
	500 nodes[5].type == JSON_TYPE_STRING &&
	501 !attrcmp(nodes[1].name, "items") &&
	502 !attrcmp(nodes[3].name, "authors") &&
	503 !attrcmp(nodes[5].name, "name")) {
	504 if (!fields[FeedFieldAuthor].len)
	505 string_append(&fields[FeedFieldAuthor], …
	506 }
	507
	508 /* enclosure attributes */
	509 if (nodes[0].type == JSON_TYPE_OBJECT &&
	510 nodes[1].type == JSON_TYPE_ARRAY &&
	511 nodes[2].type == JSON_TYPE_OBJECT &&
	512 nodes[3].type == JSON_TYPE_ARRAY &&
	513 nodes[4].type == JSON_TYPE_OBJECT &&
	514 (nodes[5].type == JSON_TYPE_STRING \|\| nodes[5].type …
	515 !attrcmp(nodes[1].name, "items") &&
	516 !attrcmp(nodes[3].name, "attachments") &&
	517 !attrcmp(nodes[5].name, "url")) {
	518 if (!fields[FeedFieldEnclosure].len)
	519 string_append(&fields[FeedFieldEnclosure…
	520 }
	521 }
	522
	523 if (ferror(stdout)) {
	524 fprintf(stderr, "write error: <stdout>\n");
	525 exit(2);
	526 }
	527 }
	528
	529 int
	530 main(int argc, char *argv[])
	531 {
	532 if (pledge("stdio", NULL) == -1)
	533 err(1, "pledge");
	534
	535 switch (parsejson(processnode)) {
	536 case JSON_ERROR_MEM:
	537 errx(2, "error: cannot allocate enough memory");
	538 case JSON_ERROR_INVALID:
	539 errx(1, "error: invalid JSON");
	540 }
	541
	542 if (itemisopen)
	543 printfields();
	544
	545 if (ferror(stdin))
	546 errx(2, "read error: <stdin>");
	547 if (fflush(stdout) \|\| ferror(stdout))
	548 errx(2, "write error: <stdout>");
	549
	550 return 0;
	551 }