Introduction
Introduction Statistics Contact Development Disclaimer Help
jf2sfeed.c - jfconvert - JSON Feed (subset) to sfeed or Atom converter
git clone git://git.codemadness.org/jfconvert
Log
Files
Refs
README
LICENSE
---
jf2sfeed.c (14382B)
---
1 #include <errno.h>
2 #include <stdarg.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7
8 #ifdef __OpenBSD__
9 #include <unistd.h>
10 #else
11 #define pledge(a,b) 0
12 #endif
13
14 #include "json.h"
15
16 /* hint for compilers and static analyzers that a function exits */
17 #ifndef __dead
18 #define __dead
19 #endif
20
21 /* ctype-like macros, but always compatible with ASCII / UTF-8 */
22 #define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
23 #define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
24 #define ISDIGIT(c) (((unsigned)c) - '0' < 10)
25 #define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
26
27 /* compare attributes case-sensitively */
28 #define attrcmp strcmp
29
30 enum {
31 FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldConte…
32 FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCateg…
33 FeedFieldLast
34 };
35
36 enum ContentType {
37 ContentTypeNone = 0,
38 ContentTypePlain = 1,
39 ContentTypeHTML = 2
40 };
41 static const char *contenttypes[] = { "", "plain", "html" };
42
43 /* String data / memory pool */
44 typedef struct string {
45 char *data; /* data */
46 size_t len; /* string length */
47 size_t bufsiz; /* allocated size */
48 } String;
49
50 static String fields[FeedFieldLast]; /* data for current item */
51 static enum ContentType contenttype; /* content-type for item */
52 static int itemisopen = 0;
53
54 static const int FieldSeparator = '\t';
55 /* separator for multiple values in a field, separator should be 1 byte …
56 static const char FieldMultiSeparator[] = "|";
57
58 /* print to stderr, print error message of errno and exit().
59 Unlike BSD err() it does not prefix __progname */
60 __dead void
61 err(int exitstatus, const char *fmt, ...)
62 {
63 va_list ap;
64 int saved_errno;
65
66 saved_errno = errno;
67
68 if (fmt) {
69 va_start(ap, fmt);
70 vfprintf(stderr, fmt, ap);
71 va_end(ap);
72 fputs(": ", stderr);
73 }
74 fprintf(stderr, "%s\n", strerror(saved_errno));
75
76 exit(exitstatus);
77 }
78
79 /* print to stderr and exit().
80 Unlike BSD errx() it does not prefix __progname */
81 __dead void
82 errx(int exitstatus, const char *fmt, ...)
83 {
84 va_list ap;
85
86 if (fmt) {
87 va_start(ap, fmt);
88 vfprintf(stderr, fmt, ap);
89 va_end(ap);
90 }
91 fputs("\n", stderr);
92
93 exit(exitstatus);
94 }
95
96 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestam…
97 Parameters should be passed as they are in a struct tm:
98 that is: year = year - 1900, month = month - 1. */
99 static long long
100 datetounix(long long year, int mon, int day, int hour, int min, int sec)
101 {
102 /* seconds in a month in a regular (non-leap) year */
103 static const long secs_through_month[] = {
104 0, 31 * 86400, 59 * 86400, 90 * 86400,
105 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
106 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
107 int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
108 long long t;
109
110 /* optimization: handle common range year 1902 up to and includi…
111 if (year - 2ULL <= 136) {
112 /* amount of leap days relative to 1970: every 4 years */
113 leaps = (year - 68) >> 2;
114 if (!((year - 68) & 3)) {
115 leaps--;
116 is_leap = 1;
117 } else {
118 is_leap = 0;
119 }
120 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 8…
121 } else {
122 /* general leap year calculation:
123 leap years occur mostly every 4 years but every 100 y…
124 a leap year is skipped unless the year is divisible b…
125 cycles = (year - 100) / 400;
126 rem = (year - 100) % 400;
127 if (rem < 0) {
128 cycles--;
129 rem += 400;
130 }
131 if (!rem) {
132 is_leap = 1;
133 } else {
134 if (rem >= 300) {
135 centuries = 3;
136 rem -= 300;
137 } else if (rem >= 200) {
138 centuries = 2;
139 rem -= 200;
140 } else if (rem >= 100) {
141 centuries = 1;
142 rem -= 100;
143 }
144 if (rem) {
145 leaps = rem / 4U;
146 rem %= 4U;
147 is_leap = !rem;
148 }
149 }
150 leaps += (97 * cycles) + (24 * centuries) - is_leap;
151
152 /* adjust 8 leap days from 1970 up to and including 2000:
153 ((30 * 365) + 8) * 86400 = 946771200 */
154 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94…
155 }
156 t += secs_through_month[mon];
157 if (is_leap && mon >= 2)
158 t += 86400;
159 t += 86400LL * (day - 1);
160 t += 3600LL * hour;
161 t += 60LL * min;
162 t += sec;
163
164 return t;
165 }
166
167 /* Get timezone from string, return time offset in seconds from UTC. */
168 static long
169 gettzoffset(const char *s)
170 {
171 const char *p;
172 long tzhour = 0, tzmin = 0;
173 size_t i;
174
175 for (; ISSPACE((unsigned char)*s); s++)
176 ;
177 switch (*s) {
178 case '-': /* offset */
179 case '+':
180 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*…
181 tzhour = (tzhour * 10) + (*p - '0');
182 if (*p == ':')
183 p++;
184 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p+…
185 tzmin = (tzmin * 10) + (*p - '0');
186 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ?…
187 default: /* timezone name */
188 break;
189 }
190 return 0;
191 }
192
193 /* Parse time string `s` into the UNIX timestamp `tp`.
194 Returns 0 on success or -1 on failure. */
195 static int
196 parsetime(const char *s, long long *tp)
197 {
198 int va[6] = { 0 }, i, v, vi;
199
200 for (; ISSPACE((unsigned char)*s); s++)
201 ;
202
203 if (!ISDIGIT((unsigned char)s[0]) ||
204 !ISDIGIT((unsigned char)s[1]) ||
205 !ISDIGIT((unsigned char)s[2]) ||
206 !ISDIGIT((unsigned char)s[3]))
207 return -1;
208
209 /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H…
210 vi = 0;
211
212 /* parse time parts (and possibly remaining date parts) */
213 for (; *s && vi < 6; vi++) {
214 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
215 ISDIGIT((unsigned char)*s); s++, i++)…
216 v = (v * 10) + (*s - '0');
217 }
218 va[vi] = v;
219
220 if ((vi < 2 && *s == '-') ||
221 (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s)…
222 (vi > 2 && *s == ':'))
223 s++;
224 }
225
226 /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */
227 if (*s == '.') {
228 for (s++; ISDIGIT((unsigned char)*s); s++)
229 ;
230 }
231
232 /* invalid range */
233 if (va[0] < 0 || va[0] > 9999 ||
234 va[1] < 1 || va[1] > 12 ||
235 va[2] < 1 || va[2] > 31 ||
236 va[3] < 0 || va[3] > 23 ||
237 va[4] < 0 || va[4] > 59 ||
238 va[5] < 0 || va[5] > 60) /* allow leap second */
239 return -1;
240
241 *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], v…
242 gettzoffset(s);
243
244 return 0;
245 }
246
247 /* Handle read or write errors for a FILE * stream */
248 static void
249 checkfileerror(FILE *fp, const char *name, int mode)
250 {
251 if (mode == 'r' && ferror(fp))
252 errx(1, "read error: %s", name);
253 else if (mode == 'w' && (fflush(fp) || ferror(fp)))
254 errx(1, "write error: %s", name);
255 }
256
257 /* Clear string only; don't free, prevents unnecessary reallocation. */
258 static void
259 string_clear(String *s)
260 {
261 if (s->data)
262 s->data[0] = '\0';
263 s->len = 0;
264 }
265
266 static void
267 string_buffer_realloc(String *s, size_t newlen)
268 {
269 size_t alloclen;
270
271 if (newlen > SIZE_MAX / 2) {
272 alloclen = SIZE_MAX;
273 } else {
274 for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
275 ;
276 }
277 if (!(s->data = realloc(s->data, alloclen)))
278 err(1, "realloc");
279 s->bufsiz = alloclen;
280 }
281
282 /* Append data to String, s->data and data may not overlap. */
283 static void
284 string_append(String *s, const char *data, size_t len)
285 {
286 if (!len)
287 return;
288
289 if (s->len >= SIZE_MAX - len) {
290 errno = ENOMEM;
291 err(1, "realloc");
292 }
293
294 /* check if allocation is necessary, never shrink the buffer. */
295 if (s->len + len >= s->bufsiz)
296 string_buffer_realloc(s, s->len + len + 1);
297 memcpy(s->data + s->len, data, len);
298 s->len += len;
299 s->data[s->len] = '\0';
300 }
301
302 /* Clear and append string */
303 static void
304 string_set(String *s, const char *data, size_t len)
305 {
306 string_clear(s);
307 string_append(s, data, len);
308 }
309
310 /* Print text, encode TABs, newlines and '\', remove other whitespace.
311 * Remove leading and trailing whitespace. */
312 static void
313 string_print_encoded(String *s)
314 {
315 const char *p, *e;
316
317 if (!s->data || !s->len)
318 return;
319
320 p = s->data;
321 e = p + s->len;
322
323 for (; *p && p != e; p++) {
324 switch (*p) {
325 case '\n': putchar('\\'); putchar('n'); break;
326 case '\\': putchar('\\'); putchar('\\'); break;
327 case '\t': putchar('\\'); putchar('t'); break;
328 default:
329 /* ignore control chars */
330 if (!ISCNTRL((unsigned char)*p))
331 putchar(*p);
332 break;
333 }
334 }
335 }
336
337 /* Print text, replace TABs, carriage return and other whitespace with '…
338 * Other control chars are removed. Remove leading and trailing whitespa…
339 static void
340 string_print(String *s)
341 {
342 const char *p, *e;
343
344 if (!s->data || !s->len)
345 return;
346
347 p = s->data;
348 e = s->data + s->len;
349 for (; *p && p != e; p++) {
350 if (ISSPACE((unsigned char)*p))
351 putchar(' '); /* any whitespace to space */
352 else if (!ISCNTRL((unsigned char)*p))
353 /* ignore other control chars */
354 putchar(*p);
355 }
356 }
357
358 /* Print as UNIX timestamp, print nothing if the time is empty or invali…
359 static void
360 string_print_timestamp(String *s)
361 {
362 long long t;
363
364 if (!s->data || !s->len)
365 return;
366
367 if (parsetime(s->data, &t) != -1)
368 printf("%lld", t);
369 }
370
371 static void
372 printfields(void)
373 {
374 string_print_timestamp(&fields[FeedFieldTime]);
375 putchar(FieldSeparator);
376 string_print(&fields[FeedFieldTitle]);
377 putchar(FieldSeparator);
378 string_print(&fields[FeedFieldLink]);
379 putchar(FieldSeparator);
380 string_print_encoded(&fields[FeedFieldContent]);
381 putchar(FieldSeparator);
382 fputs(contenttypes[contenttype], stdout);
383 putchar(FieldSeparator);
384 string_print(&fields[FeedFieldId]);
385 putchar(FieldSeparator);
386 string_print(&fields[FeedFieldAuthor]);
387 putchar(FieldSeparator);
388 string_print(&fields[FeedFieldEnclosure]);
389 putchar(FieldSeparator);
390 string_print(&fields[FeedFieldCategory]);
391 putchar('\n');
392
393 if (ferror(stdout)) /* check for errors but do not flush */
394 checkfileerror(stdout, "<stdout>", 'w');
395 }
396
397 static void
398 newitem(void)
399 {
400 size_t i;
401
402 contenttype = ContentTypeNone;
403 for (i = 0; i < FeedFieldLast; i++)
404 string_clear(&fields[i]);
405
406 }
407
408 static void
409 processnode(struct json_node *nodes, size_t depth, const char *value, si…
410 {
411 /* item */
412 if (depth == 3) {
413 if (nodes[0].type == JSON_TYPE_OBJECT &&
414 nodes[1].type == JSON_TYPE_ARRAY &&
415 nodes[2].type == JSON_TYPE_OBJECT &&
416 !attrcmp(nodes[1].name, "items")) {
417 if (itemisopen)
418 printfields();
419 newitem();
420 itemisopen = 1;
421 }
422 }
423
424 /* item attributes */
425 if (depth == 4) {
426 if (nodes[0].type == JSON_TYPE_OBJECT &&
427 nodes[1].type == JSON_TYPE_ARRAY &&
428 nodes[2].type == JSON_TYPE_OBJECT &&
429 !attrcmp(nodes[1].name, "items")) {
430 if (!attrcmp(nodes[3].name, "content_html")) {
431 string_set(&fields[FeedFieldContent], va…
432 contenttype = ContentTypeHTML;
433 } else if (!attrcmp(nodes[3].name, "content_text…
434 /* prefer HTML, if summary text is set o…
435 if (!fields[FeedFieldContent].len && con…
436 string_set(&fields[FeedFieldCont…
437 contenttype = ContentTypePlain;
438 }
439 } else if (!attrcmp(nodes[3].name, "date_publish…
440 /* published has higher priority than up…
441 string_set(&fields[FeedFieldTime], value…
442 } else if (!attrcmp(nodes[3].name, "date_modifie…
443 if (!fields[FeedFieldTime].len)
444 string_append(&fields[FeedFieldT…
445 } else if (!attrcmp(nodes[3].name, "id")) {
446 if (!fields[FeedFieldId].len)
447 string_append(&fields[FeedFieldI…
448 } else if (!attrcmp(nodes[3].name, "summary")) {
449 /* only if content_html or content_text …
450 if (!fields[FeedFieldContent].len) {
451 string_append(&fields[FeedFieldC…
452 contenttype = ContentTypePlain;
453 }
454 } else if (!attrcmp(nodes[3].name, "title")) {
455 if (!fields[FeedFieldTitle].len)
456 string_set(&fields[FeedFieldTitl…
457 } else if (!attrcmp(nodes[3].name, "url")) {
458 if (!fields[FeedFieldLink].len)
459 string_append(&fields[FeedFieldL…
460 }
461 }
462 }
463
464 if (depth == 5) {
465 /* 1.0 author name */
466 if (nodes[0].type == JSON_TYPE_OBJECT &&
467 nodes[1].type == JSON_TYPE_ARRAY &&
468 nodes[2].type == JSON_TYPE_OBJECT &&
469 nodes[3].type == JSON_TYPE_OBJECT &&
470 nodes[4].type == JSON_TYPE_STRING &&
471 !attrcmp(nodes[1].name, "items") &&
472 !attrcmp(nodes[3].name, "author") &&
473 !attrcmp(nodes[4].name, "name")) {
474 if (!fields[FeedFieldAuthor].len)
475 string_append(&fields[FeedFieldAuthor], …
476 }
477
478 /* tags / categories */
479 if (nodes[0].type == JSON_TYPE_OBJECT &&
480 nodes[1].type == JSON_TYPE_ARRAY &&
481 nodes[2].type == JSON_TYPE_OBJECT &&
482 nodes[3].type == JSON_TYPE_ARRAY &&
483 nodes[4].type == JSON_TYPE_STRING &&
484 !attrcmp(nodes[1].name, "items") &&
485 !attrcmp(nodes[3].name, "tags")) {
486 if (fields[FeedFieldCategory].len)
487 string_append(&fields[FeedFieldCategory]…
488 sizeof(FieldMultiSeparator…
489 string_append(&fields[FeedFieldCategory], value,…
490 }
491 }
492
493 if (depth == 6) {
494 /* 1.1 author name */
495 if (nodes[0].type == JSON_TYPE_OBJECT &&
496 nodes[1].type == JSON_TYPE_ARRAY &&
497 nodes[2].type == JSON_TYPE_OBJECT &&
498 nodes[3].type == JSON_TYPE_ARRAY &&
499 nodes[4].type == JSON_TYPE_OBJECT &&
500 nodes[5].type == JSON_TYPE_STRING &&
501 !attrcmp(nodes[1].name, "items") &&
502 !attrcmp(nodes[3].name, "authors") &&
503 !attrcmp(nodes[5].name, "name")) {
504 if (!fields[FeedFieldAuthor].len)
505 string_append(&fields[FeedFieldAuthor], …
506 }
507
508 /* enclosure attributes */
509 if (nodes[0].type == JSON_TYPE_OBJECT &&
510 nodes[1].type == JSON_TYPE_ARRAY &&
511 nodes[2].type == JSON_TYPE_OBJECT &&
512 nodes[3].type == JSON_TYPE_ARRAY &&
513 nodes[4].type == JSON_TYPE_OBJECT &&
514 (nodes[5].type == JSON_TYPE_STRING || nodes[5].type …
515 !attrcmp(nodes[1].name, "items") &&
516 !attrcmp(nodes[3].name, "attachments") &&
517 !attrcmp(nodes[5].name, "url")) {
518 if (!fields[FeedFieldEnclosure].len)
519 string_append(&fields[FeedFieldEnclosure…
520 }
521 }
522
523 if (ferror(stdout)) {
524 fprintf(stderr, "write error: <stdout>\n");
525 exit(2);
526 }
527 }
528
529 int
530 main(int argc, char *argv[])
531 {
532 if (pledge("stdio", NULL) == -1)
533 err(1, "pledge");
534
535 switch (parsejson(processnode)) {
536 case JSON_ERROR_MEM:
537 errx(2, "error: cannot allocate enough memory");
538 case JSON_ERROR_INVALID:
539 errx(1, "error: invalid JSON");
540 }
541
542 if (itemisopen)
543 printfields();
544
545 if (ferror(stdin))
546 errx(2, "read error: <stdin>");
547 if (fflush(stdout) || ferror(stdout))
548 errx(2, "write error: <stdout>");
549
550 return 0;
551 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.