feed.c - frontends - front-ends for some sites (experiment) | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
feed.c (29885B) | |
--- | |
1 #include <err.h> | |
2 #include <errno.h> | |
3 #include <stdint.h> | |
4 #include <stdio.h> | |
5 #include <stdlib.h> | |
6 #include <string.h> | |
7 #include <strings.h> | |
8 #include <time.h> | |
9 #include <unistd.h> | |
10 | |
11 #include "https.h" | |
12 #include "util.h" | |
13 #include "youtube.h" | |
14 #include "xml.h" | |
15 | |
16 #define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag)) | |
17 #define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag) | |
18 | |
19 /* string and byte-length */ | |
20 #define STRP(s) s,sizeof(s)-1 | |
21 | |
22 enum FeedType { | |
23 FeedTypeNone = 0, | |
24 FeedTypeAtom = 2 | |
25 }; | |
26 | |
27 /* String data / memory pool */ | |
28 typedef struct string { | |
29 char *data; /* data */ | |
30 size_t len; /* string length */ | |
31 size_t bufsiz; /* allocated size */ | |
32 } String; | |
33 | |
34 /* NOTE: the order of these fields (content, date, author) indicate the | |
35 * priority to use them, from least important to high. */ | |
36 enum TagId { | |
37 TagUnknown = 0, | |
38 /* Atom */ | |
39 /* creation date has higher priority */ | |
40 AtomTagPublished, | |
41 AtomTagTitle, | |
42 AtomTagMediaDescription, | |
43 AtomTagId, | |
44 AtomTagLink, | |
45 AtomTagLinkAlternate, | |
46 AtomTagAuthor, AtomTagAuthorName, | |
47 TagYoutubeVideoId, | |
48 TagLast | |
49 }; | |
50 | |
51 typedef struct feedtag { | |
52 char *name; /* name of tag to match */ | |
53 size_t len; /* len of `name` */ | |
54 enum TagId id; /* unique ID */ | |
55 } FeedTag; | |
56 | |
57 typedef struct field { | |
58 String str; | |
59 enum TagId tagid; /* tagid set previously, used for tag priority… | |
60 } FeedField; | |
61 | |
62 enum { | |
63 /* sfeed fields */ | |
64 FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldConte… | |
65 FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCateg… | |
66 FeedFieldYoutubeId, /* yt:videoId */ | |
67 FeedFieldLast | |
68 }; | |
69 | |
70 typedef struct feedcontext { | |
71 String *field; /* current FeedItem field String … | |
72 FeedField fields[FeedFieldLast]; /* data for current item… | |
73 FeedTag tag; /* unique current parsed tag */ | |
74 int iscontent; /* in content data */ | |
75 int iscontenttag; /* in content tag */ | |
76 enum FeedType feedtype; | |
77 } FeedContext; | |
78 | |
79 static long long datetounix(long long, int, int, int, int, int); | |
80 static FeedTag * gettag(enum FeedType, const char *, size_t); | |
81 static long gettzoffset(const char *); | |
82 static int isattr(const char *, size_t, const char *, size_t); | |
83 static int istag(const char *, size_t, const char *, size_t); | |
84 static int parsetime(const char *, long long *); | |
85 | |
86 static void atom_header(void); | |
87 static void atom_item(void); | |
88 static void atom_footer(void); | |
89 static void gph_header(void); | |
90 static void gph_footer(void); | |
91 static void html_header(void); | |
92 static void html_footer(void); | |
93 static void json_header(void); | |
94 static void json_item(void); | |
95 static void json_footer(void); | |
96 static void sfeed_item(void); /* TSV / sfeed */ | |
97 static void twtxt_item(void); | |
98 | |
99 static void string_append(String *, const char *, size_t); | |
100 static void string_buffer_realloc(String *, size_t); | |
101 static void string_clear(String *); | |
102 static void string_print_encoded(String *); | |
103 static void string_print_timestamp(String *); | |
104 static void string_print(String *); | |
105 static void xmlattr(XMLParser *, const char *, size_t, const char *, siz… | |
106 const char *, size_t); | |
107 static void xmlattrentity(XMLParser *, const char *, size_t, const char … | |
108 size_t, const char *, size_t); | |
109 static void xmlattrstart(XMLParser *, const char *, size_t, const char *, | |
110 size_t); | |
111 static void xmldata(XMLParser *, const char *, size_t); | |
112 static void xmldataentity(XMLParser *, const char *, size_t); | |
113 static void xmltagend(XMLParser *, const char *, size_t, int); | |
114 static void xmltagstart(XMLParser *, const char *, size_t); | |
115 static void xmltagstartparsed(XMLParser *, const char *, size_t, int); | |
116 | |
117 /* Atom, must be alphabetical order */ | |
118 static const FeedTag atomtags[] = { | |
119 { STRP("author"), AtomTagAuthor }, | |
120 { STRP("id"), AtomTagId }, | |
121 /* Atom: <link href="" />, RSS has <link></link> */ | |
122 { STRP("link"), AtomTagLink }, | |
123 { STRP("media:description"), AtomTagMediaDescription }, | |
124 { STRP("published"), AtomTagPublished }, | |
125 { STRP("title"), AtomTagTitle }, | |
126 { STRP("yt:videoId"), TagYoutubeVideoId } | |
127 }; | |
128 | |
129 /* special case: nested <author><name> */ | |
130 static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor }; | |
131 static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorNa… | |
132 | |
133 /* reference to no / unknown tag */ | |
134 static const FeedTag notag = { STRP(""), TagUnknown }; | |
135 | |
136 /* map TagId type to RSS/Atom field, all tags must be defined */ | |
137 static const int fieldmap[TagLast] = { | |
138 [TagUnknown] = -1, | |
139 /* Atom */ | |
140 [AtomTagPublished] = FeedFieldTime, | |
141 [AtomTagTitle] = FeedFieldTitle, | |
142 [AtomTagMediaDescription] = FeedFieldContent, | |
143 [AtomTagId] = FeedFieldId, | |
144 [AtomTagLink] = -1, | |
145 [AtomTagLinkAlternate] = FeedFieldLink, | |
146 [AtomTagAuthor] = -1, | |
147 [AtomTagAuthorName] = FeedFieldAuthor, | |
148 [TagYoutubeVideoId] = FeedFieldYoutubeId | |
149 }; | |
150 | |
151 static const int FieldSeparator = '\t'; | |
152 | |
153 static FeedContext ctx; | |
154 static XMLParser parser; /* XML parser state */ | |
155 static String attrrel, tmpstr; | |
156 | |
157 static struct search_response *search_res = NULL; | |
158 static void (*printfields)(void) = sfeed_item; | |
159 static int cgimode = 0, godmode = 0; | |
160 static const char *server_name = "127.0.0.1", *server_port = "70"; | |
161 | |
162 static int | |
163 tagcmp(const void *v1, const void *v2) | |
164 { | |
165 return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name); | |
166 } | |
167 | |
168 /* Unique tagid for parsed tag name. */ | |
169 static FeedTag * | |
170 gettag(enum FeedType feedtype, const char *name, size_t namelen) | |
171 { | |
172 FeedTag f, *r = NULL; | |
173 | |
174 f.name = (char *)name; | |
175 | |
176 switch (feedtype) { | |
177 case FeedTypeAtom: | |
178 r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atom… | |
179 sizeof(atomtags[0]), tagcmp); | |
180 break; | |
181 default: | |
182 break; | |
183 } | |
184 | |
185 return r; | |
186 } | |
187 | |
188 /* Clear string only; don't free, prevents unnecessary reallocation. */ | |
189 static void | |
190 string_clear(String *s) | |
191 { | |
192 if (s->data) | |
193 s->data[0] = '\0'; | |
194 s->len = 0; | |
195 } | |
196 | |
197 static void | |
198 string_buffer_realloc(String *s, size_t newlen) | |
199 { | |
200 size_t alloclen; | |
201 | |
202 if (newlen > SIZE_MAX / 2) { | |
203 alloclen = SIZE_MAX; | |
204 } else { | |
205 for (alloclen = 64; alloclen <= newlen; alloclen *= 2) | |
206 ; | |
207 } | |
208 if (!(s->data = realloc(s->data, alloclen))) | |
209 err(1, "realloc"); | |
210 s->bufsiz = alloclen; | |
211 } | |
212 | |
213 /* Append data to String, s->data and data may not overlap. */ | |
214 static void | |
215 string_append(String *s, const char *data, size_t len) | |
216 { | |
217 if (!len) | |
218 return; | |
219 | |
220 if (s->len >= SIZE_MAX - len) { | |
221 errno = ENOMEM; | |
222 err(1, "realloc"); | |
223 } | |
224 | |
225 /* check if allocation is necessary, never shrink the buffer. */ | |
226 if (s->len + len >= s->bufsiz) | |
227 string_buffer_realloc(s, s->len + len + 1); | |
228 memcpy(s->data + s->len, data, len); | |
229 s->len += len; | |
230 s->data[s->len] = '\0'; | |
231 } | |
232 | |
233 /* Print text, encode TABs, newlines and '\', remove other whitespace. | |
234 * Remove leading and trailing whitespace. */ | |
235 static void | |
236 string_print_encoded(String *s) | |
237 { | |
238 const char *p, *e; | |
239 | |
240 if (!s->data || !s->len) | |
241 return; | |
242 | |
243 p = s->data; | |
244 e = p + strlen(p); | |
245 | |
246 for (; *p && p != e; p++) { | |
247 switch (*p) { | |
248 case '\n': putchar('\\'); putchar('n'); break; | |
249 case '\\': putchar('\\'); putchar('\\'); break; | |
250 case '\t': putchar('\\'); putchar('t'); break; | |
251 default: | |
252 /* ignore control chars */ | |
253 if (!ISCNTRL((unsigned char)*p)) | |
254 putchar(*p); | |
255 break; | |
256 } | |
257 } | |
258 } | |
259 | |
260 /* Print text, replace TABs, carriage return and other whitespace with '… | |
261 * Other control chars are removed. Remove leading and trailing whitespa… | |
262 static void | |
263 string_print(String *s) | |
264 { | |
265 char *p, *e; | |
266 | |
267 if (!s->data || !s->len) | |
268 return; | |
269 | |
270 p = s->data; | |
271 e = p + s->len; | |
272 for (; *p && p != e; p++) { | |
273 if (ISSPACE((unsigned char)*p)) | |
274 putchar(' '); /* any whitespace to space */ | |
275 else if (!ISCNTRL((unsigned char)*p)) | |
276 /* ignore other control chars */ | |
277 putchar(*p); | |
278 } | |
279 } | |
280 | |
281 /* Print as UNIX timestamp, print nothing if the time is empty or invali… | |
282 static void | |
283 string_print_timestamp(String *s) | |
284 { | |
285 long long t; | |
286 | |
287 if (!s->data || !s->len) | |
288 return; | |
289 | |
290 if (parsetime(s->data, &t) != -1) | |
291 printf("%lld", t); | |
292 } | |
293 | |
294 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestam… | |
295 Parameters should be passed as they are in a struct tm: | |
296 that is: year = year - 1900, month = month - 1. */ | |
297 static long long | |
298 datetounix(long long year, int mon, int day, int hour, int min, int sec) | |
299 { | |
300 /* seconds in a month in a regular (non-leap) year */ | |
301 static const long secs_through_month[] = { | |
302 0, 31 * 86400, 59 * 86400, 90 * 86400, | |
303 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400, | |
304 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 }; | |
305 int is_leap = 0, cycles, centuries = 0, leaps = 0, rem; | |
306 long long t; | |
307 | |
308 /* optimization: handle common range year 1902 up to and includi… | |
309 if (year - 2ULL <= 136) { | |
310 /* amount of leap days relative to 1970: every 4 years */ | |
311 leaps = (year - 68) >> 2; | |
312 if (!((year - 68) & 3)) { | |
313 leaps--; | |
314 is_leap = 1; | |
315 } else { | |
316 is_leap = 0; | |
317 } | |
318 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 8… | |
319 } else { | |
320 /* general leap year calculation: | |
321 leap years occur mostly every 4 years but every 100 y… | |
322 a leap year is skipped unless the year is divisible b… | |
323 cycles = (year - 100) / 400; | |
324 rem = (year - 100) % 400; | |
325 if (rem < 0) { | |
326 cycles--; | |
327 rem += 400; | |
328 } | |
329 if (!rem) { | |
330 is_leap = 1; | |
331 } else { | |
332 if (rem >= 300) | |
333 centuries = 3, rem -= 300; | |
334 else if (rem >= 200) | |
335 centuries = 2, rem -= 200; | |
336 else if (rem >= 100) | |
337 centuries = 1, rem -= 100; | |
338 if (rem) { | |
339 leaps = rem / 4U; | |
340 rem %= 4U; | |
341 is_leap = !rem; | |
342 } | |
343 } | |
344 leaps += (97 * cycles) + (24 * centuries) - is_leap; | |
345 | |
346 /* adjust 8 leap days from 1970 up to and including 2000: | |
347 ((30 * 365) + 8) * 86400 = 946771200 */ | |
348 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94… | |
349 } | |
350 t += secs_through_month[mon]; | |
351 if (is_leap && mon >= 2) | |
352 t += 86400; | |
353 t += 86400LL * (day - 1); | |
354 t += 3600LL * hour; | |
355 t += 60LL * min; | |
356 t += sec; | |
357 | |
358 return t; | |
359 } | |
360 | |
361 /* Get timezone from string, return time offset in seconds from UTC. */ | |
362 static long | |
363 gettzoffset(const char *s) | |
364 { | |
365 const char *p; | |
366 long tzhour = 0, tzmin = 0; | |
367 size_t i; | |
368 | |
369 switch (*s) { | |
370 case '-': /* offset */ | |
371 case '+': | |
372 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*… | |
373 tzhour = (tzhour * 10) + (*p - '0'); | |
374 if (*p == ':') | |
375 p++; | |
376 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p+… | |
377 tzmin = (tzmin * 10) + (*p - '0'); | |
378 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ?… | |
379 default: /* timezone name */ | |
380 break; | |
381 } | |
382 return 0; | |
383 } | |
384 | |
385 /* Parse time string `s` into the UNIX timestamp `tp`. | |
386 Returns 0 on success or -1 on failure. */ | |
387 static int | |
388 parsetime(const char *s, long long *tp) | |
389 { | |
390 int va[6] = { 0 }, i, v, vi; | |
391 | |
392 /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H… | |
393 if (!ISDIGIT((unsigned char)s[0]) || | |
394 !ISDIGIT((unsigned char)s[1]) || | |
395 !ISDIGIT((unsigned char)s[2]) || | |
396 !ISDIGIT((unsigned char)s[3])) | |
397 return -1; | |
398 | |
399 /* parse time parts (and possibly remaining date parts) */ | |
400 for (vi = 0; *s && vi < 6; vi++) { | |
401 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) && | |
402 ISDIGIT((unsigned char)*s); s++, i++)… | |
403 v = (v * 10) + (*s - '0'); | |
404 } | |
405 va[vi] = v; | |
406 | |
407 if ((vi < 2 && *s == '-') || | |
408 (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s)… | |
409 (vi > 2 && *s == ':')) | |
410 s++; | |
411 } | |
412 | |
413 /* invalid range */ | |
414 if (va[0] < 0 || va[0] > 9999 || | |
415 va[1] < 1 || va[1] > 12 || | |
416 va[2] < 1 || va[2] > 31 || | |
417 va[3] < 0 || va[3] > 23 || | |
418 va[4] < 0 || va[4] > 59 || | |
419 va[5] < 0 || va[5] > 60) /* allow leap second */ | |
420 return -1; | |
421 | |
422 *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], v… | |
423 gettzoffset(s); | |
424 | |
425 return 0; | |
426 } | |
427 | |
428 static void | |
429 atom_header(void) | |
430 { | |
431 fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" | |
432 "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n" | |
433 "\t<title>Newsfeed</title>\n", stdout); | |
434 } | |
435 | |
436 static void | |
437 atom_footer(void) | |
438 { | |
439 fputs("</feed>\n", stdout); | |
440 } | |
441 | |
442 static void | |
443 atom_item(void) | |
444 { | |
445 struct item *v, *found = NULL; | |
446 size_t i; | |
447 | |
448 /* must have a video id */ | |
449 if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
450 return; | |
451 | |
452 for (i = 0; i < search_res->nitems; i++) { | |
453 v = &(search_res->items[i]); | |
454 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->… | |
455 found = v; | |
456 } | |
457 /* Only print the video if it was found in the feed aswell. | |
458 This way it filters away shorts too. */ | |
459 if (!found) | |
460 return; | |
461 | |
462 fputs("<entry>\n\t<title>", stdout); | |
463 xmlencode(ctx.fields[FeedFieldTitle].str.data); | |
464 if (found->duration[0]) { | |
465 fputs(" [", stdout); | |
466 xmlencode(found->duration); | |
467 fputs("]", stdout); | |
468 } | |
469 fputs("</title>\n", stdout); | |
470 if (ctx.fields[FeedFieldLink].str.len) { | |
471 fputs("\t<link rel=\"alternate\" href=\"", stdout); | |
472 xmlencode(ctx.fields[FeedFieldLink].str.data); | |
473 fputs("\" />\n", stdout); | |
474 } | |
475 /* prefer link over id for Atom <id>. */ | |
476 fputs("\t<id>", stdout); | |
477 if (ctx.fields[FeedFieldLink].str.len) | |
478 xmlencode(ctx.fields[FeedFieldLink].str.data); | |
479 else if (ctx.fields[FeedFieldId].str.len) | |
480 xmlencode(ctx.fields[FeedFieldId].str.data); | |
481 fputs("</id>\n", stdout); | |
482 | |
483 /* just print the original timestamp, it should conform */ | |
484 fputs("\t<updated>", stdout); | |
485 string_print(&ctx.fields[FeedFieldTime].str); | |
486 fputs("</updated>\n", stdout); | |
487 | |
488 if (ctx.fields[FeedFieldAuthor].str.len) { | |
489 fputs("\t<author><name>", stdout); | |
490 xmlencode(ctx.fields[FeedFieldAuthor].str.data); | |
491 fputs("</name></author>\n", stdout); | |
492 } | |
493 if (ctx.fields[FeedFieldContent].str.len) { | |
494 fputs("\t<content>", stdout); | |
495 xmlencode(ctx.fields[FeedFieldContent].str.data); | |
496 fputs("</content>\n", stdout); | |
497 } | |
498 fputs("</entry>\n", stdout); | |
499 } | |
500 | |
501 | |
502 static void | |
503 html_header(void) | |
504 { | |
505 fputs("<!DOCTYPE HTML>\n" | |
506 "<html>\n" | |
507 "<head>\n" | |
508 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=… | |
509 "</head>\n" | |
510 "<body><pre>\n", stdout); | |
511 } | |
512 | |
513 static void | |
514 html_footer(void) | |
515 { | |
516 fputs("</pre></body>\n</html>\n", stdout); | |
517 } | |
518 | |
519 static void | |
520 html_item(void) | |
521 { | |
522 struct item *v, *found = NULL; | |
523 size_t i; | |
524 | |
525 /* must have a video id */ | |
526 if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
527 return; | |
528 | |
529 for (i = 0; i < search_res->nitems; i++) { | |
530 v = &(search_res->items[i]); | |
531 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->… | |
532 found = v; | |
533 } | |
534 /* Only print the video if it was found in the feed aswell. | |
535 This way it filters away shorts too. */ | |
536 if (!found) | |
537 return; | |
538 | |
539 /* just print the original timestamp, it should conform */ | |
540 xmlencode(ctx.fields[FeedFieldTime].str.data); | |
541 fputs(" ", stdout); | |
542 | |
543 if (ctx.fields[FeedFieldLink].str.len) { | |
544 fputs("<a href=\"", stdout); | |
545 xmlencode(ctx.fields[FeedFieldLink].str.data); | |
546 fputs("\">", stdout); | |
547 } | |
548 | |
549 xmlencode(ctx.fields[FeedFieldTitle].str.data); | |
550 | |
551 if (found->duration[0]) { | |
552 fputs(" [", stdout); | |
553 xmlencode(found->duration); | |
554 fputs("]", stdout); | |
555 } | |
556 if (ctx.fields[FeedFieldLink].str.len) { | |
557 fputs("</a>", stdout); | |
558 } | |
559 fputs("\n", stdout); | |
560 } | |
561 | |
562 static void | |
563 gphencode(const char *s) | |
564 { | |
565 gophertext(stdout, s, strlen(s)); | |
566 } | |
567 | |
568 static void | |
569 gph_header(void) | |
570 { | |
571 } | |
572 | |
573 static void | |
574 gph_footer(void) | |
575 { | |
576 fputs(".\r\n", stdout); | |
577 } | |
578 | |
579 static void | |
580 gph_item(void) | |
581 { | |
582 struct item *v, *found = NULL; | |
583 size_t i; | |
584 | |
585 /* must have a video id */ | |
586 if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
587 return; | |
588 | |
589 for (i = 0; i < search_res->nitems; i++) { | |
590 v = &(search_res->items[i]); | |
591 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->… | |
592 found = v; | |
593 } | |
594 /* Only print the video if it was found in the feed aswell. | |
595 This way it filters away shorts too. */ | |
596 if (!found) | |
597 return; | |
598 | |
599 fputs("h", stdout); | |
600 /* just print the original timestamp, it should conform */ | |
601 gphencode(ctx.fields[FeedFieldTime].str.data); | |
602 fputs(" ", stdout); | |
603 gphencode(ctx.fields[FeedFieldTitle].str.data); | |
604 if (found->duration[0]) { | |
605 fputs(" [", stdout); | |
606 gphencode(found->duration); | |
607 fputs("]", stdout); | |
608 } | |
609 fputs("\t", stdout); | |
610 if (ctx.fields[FeedFieldLink].str.len) { | |
611 fputs("URL:", stdout); | |
612 gphencode(ctx.fields[FeedFieldLink].str.data); | |
613 } | |
614 printf("\t%s\t%s\r\n", server_name, server_port); | |
615 } | |
616 | |
617 static void | |
618 json_header(void) | |
619 { | |
620 fputs("{\n" | |
621 "\"version\": \"https://jsonfeed.org/version/1.1\",\n" | |
622 "\"title\": \"Newsfeed\",\n" | |
623 "\"items\": [\n", stdout); | |
624 } | |
625 | |
626 static void | |
627 json_footer(void) | |
628 { | |
629 fputs("]\n}\n", stdout); | |
630 } | |
631 | |
632 static void | |
633 json_printfield(const char *s) | |
634 { | |
635 for (; *s; s++) { | |
636 if (*s == '\\') | |
637 fputs("\\\\", stdout); | |
638 else if (*s == '"') | |
639 fputs("\\\"", stdout); | |
640 else if (ISCNTRL((unsigned char)*s)) | |
641 printf("\\u00%02x", (unsigned char)*s); | |
642 else | |
643 putchar(*s); | |
644 } | |
645 } | |
646 | |
647 static void | |
648 json_item(void) | |
649 { | |
650 static int json_firstitem = 1; | |
651 struct item *v, *found = NULL; | |
652 size_t i; | |
653 | |
654 /* must have a video id */ | |
655 if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
656 return; | |
657 | |
658 for (i = 0; i < search_res->nitems; i++) { | |
659 v = &(search_res->items[i]); | |
660 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->… | |
661 found = v; | |
662 } | |
663 /* Only print the video if it was found in the feed aswell. | |
664 This way it filters away shorts too. */ | |
665 if (!found) | |
666 return; | |
667 | |
668 if (!json_firstitem) | |
669 fputs(",\n", stdout); | |
670 json_firstitem = 0; | |
671 | |
672 fputs("{\n\t\"id\": \"", stdout); | |
673 json_printfield(ctx.fields[FeedFieldId].str.data); | |
674 fputs("\"", stdout); | |
675 | |
676 /* just print the original timestamp, it should conform */ | |
677 fputs(",\n\t\"date_published\": \"", stdout); | |
678 string_print(&ctx.fields[FeedFieldTime].str); | |
679 fputs("\"", stdout); | |
680 | |
681 fputs(",\n\t\"title\": \"", stdout); | |
682 json_printfield(ctx.fields[FeedFieldTitle].str.data); | |
683 if (found->duration[0]) { | |
684 fputs(" [", stdout); | |
685 json_printfield(found->duration); | |
686 fputs("]", stdout); | |
687 } | |
688 fputs("\"", stdout); | |
689 | |
690 if (ctx.fields[FeedFieldLink].str.len) { | |
691 fputs(",\n\t\"url\": \"", stdout); | |
692 json_printfield(ctx.fields[FeedFieldLink].str.data); | |
693 fputs("\"", stdout); | |
694 } | |
695 | |
696 if (ctx.fields[FeedFieldAuthor].str.len) { | |
697 fputs(",\n\t\"authors\": [{\"name\": \"", stdout); | |
698 json_printfield(ctx.fields[FeedFieldAuthor].str.data); | |
699 fputs("\"}]", stdout); | |
700 } | |
701 | |
702 fputs(",\n\t\"content_text\": \"", stdout); | |
703 json_printfield(ctx.fields[FeedFieldContent].str.data); | |
704 fputs("\"\n}", stdout); | |
705 } | |
706 | |
707 static void | |
708 sfeed_item(void) | |
709 { | |
710 struct item *v, *found = NULL; | |
711 size_t i; | |
712 | |
713 /* must have a video id */ | |
714 if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
715 return; | |
716 | |
717 for (i = 0; i < search_res->nitems; i++) { | |
718 v = &(search_res->items[i]); | |
719 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->… | |
720 found = v; | |
721 } | |
722 /* Only print the video if it was found in the feed aswell. | |
723 This way it filters away shorts too. */ | |
724 if (!found) | |
725 return; | |
726 | |
727 string_print_timestamp(&ctx.fields[FeedFieldTime].str); | |
728 putchar(FieldSeparator); | |
729 string_print(&ctx.fields[FeedFieldTitle].str); | |
730 if (found->duration[0]) { | |
731 fputs(" [", stdout); | |
732 fputs(found->duration, stdout); | |
733 fputs("]", stdout); | |
734 } | |
735 putchar(FieldSeparator); | |
736 string_print(&ctx.fields[FeedFieldLink].str); | |
737 putchar(FieldSeparator); | |
738 string_print_encoded(&ctx.fields[FeedFieldContent].str); | |
739 putchar(FieldSeparator); | |
740 fputs("plain", stdout); | |
741 putchar(FieldSeparator); | |
742 string_print(&ctx.fields[FeedFieldId].str); | |
743 putchar(FieldSeparator); | |
744 string_print(&ctx.fields[FeedFieldAuthor].str); | |
745 putchar(FieldSeparator); | |
746 /* no/empty enclosure */ | |
747 putchar(FieldSeparator); | |
748 /* empty category */ | |
749 putchar('\n'); | |
750 } | |
751 | |
752 static void | |
753 twtxt_item(void) | |
754 { | |
755 struct item *v, *found = NULL; | |
756 size_t i; | |
757 | |
758 /* must have a video id */ | |
759 if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
760 return; | |
761 | |
762 for (i = 0; i < search_res->nitems; i++) { | |
763 v = &(search_res->items[i]); | |
764 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->… | |
765 found = v; | |
766 } | |
767 /* Only print the video if it was found in the feed aswell. | |
768 This way it filters away shorts too. */ | |
769 if (!found) | |
770 return; | |
771 | |
772 string_print(&ctx.fields[FeedFieldTime].str); | |
773 putchar(FieldSeparator); | |
774 string_print(&ctx.fields[FeedFieldTitle].str); | |
775 if (found->duration[0]) { | |
776 fputs(" [", stdout); | |
777 fputs(found->duration, stdout); | |
778 fputs("]", stdout); | |
779 } | |
780 fputs(": ", stdout); | |
781 string_print(&ctx.fields[FeedFieldLink].str); | |
782 putchar('\n'); | |
783 } | |
784 | |
785 static int | |
786 istag(const char *name, size_t len, const char *name2, size_t len2) | |
787 { | |
788 return (len == len2 && !strcasecmp(name, name2)); | |
789 } | |
790 | |
791 static int | |
792 isattr(const char *name, size_t len, const char *name2, size_t len2) | |
793 { | |
794 return (len == len2 && !strcasecmp(name, name2)); | |
795 } | |
796 | |
797 static void | |
798 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, | |
799 const char *v, size_t vl) | |
800 { | |
801 if (ISINCONTENT(ctx)) | |
802 return; | |
803 | |
804 if (!ctx.tag.id) | |
805 return; | |
806 | |
807 if (ISCONTENTTAG(ctx)) | |
808 return; | |
809 | |
810 if (ctx.tag.id == AtomTagLink) { | |
811 if (isattr(n, nl, STRP("rel"))) { | |
812 string_append(&attrrel, v, vl); | |
813 } else if (isattr(n, nl, STRP("href"))) { | |
814 string_append(&tmpstr, v, vl); | |
815 } | |
816 } | |
817 } | |
818 | |
819 static void | |
820 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, siz… | |
821 const char *data, size_t datalen) | |
822 { | |
823 char buf[8]; | |
824 int len; | |
825 | |
826 if (ISINCONTENT(ctx)) | |
827 return; | |
828 | |
829 if (!ctx.tag.id) | |
830 return; | |
831 | |
832 /* try to translate entity, else just pass as data to | |
833 * xmlattr handler. */ | |
834 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0) | |
835 xmlattr(p, t, tl, n, nl, buf, (size_t)len); | |
836 else | |
837 xmlattr(p, t, tl, n, nl, data, datalen); | |
838 } | |
839 | |
840 static void | |
841 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size… | |
842 { | |
843 if (ISINCONTENT(ctx)) | |
844 return; | |
845 | |
846 if (attrrel.len && isattr(n, nl, STRP("rel"))) | |
847 string_clear(&attrrel); | |
848 else if (tmpstr.len && | |
849 (isattr(n, nl, STRP("href")) || | |
850 isattr(n, nl, STRP("url")))) | |
851 string_clear(&tmpstr); /* use the last value for multipl… | |
852 } | |
853 | |
854 static void | |
855 xmldata(XMLParser *p, const char *s, size_t len) | |
856 { | |
857 if (!ctx.field) | |
858 return; | |
859 | |
860 string_append(ctx.field, s, len); | |
861 } | |
862 | |
863 static void | |
864 xmldataentity(XMLParser *p, const char *data, size_t datalen) | |
865 { | |
866 char buf[8]; | |
867 int len; | |
868 | |
869 if (!ctx.field) | |
870 return; | |
871 | |
872 /* try to translate entity, else just pass as data to | |
873 * xmldata handler. */ | |
874 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0) | |
875 xmldata(p, buf, (size_t)len); | |
876 else | |
877 xmldata(p, data, datalen); | |
878 } | |
879 | |
880 static void | |
881 xmltagstart(XMLParser *p, const char *t, size_t tl) | |
882 { | |
883 const FeedTag *f; | |
884 | |
885 if (ISINCONTENT(ctx)) | |
886 return; | |
887 | |
888 /* start of RSS or Atom item / entry */ | |
889 if (ctx.feedtype == FeedTypeNone) { | |
890 if (istag(t, tl, STRP("entry"))) | |
891 ctx.feedtype = FeedTypeAtom; | |
892 return; | |
893 } | |
894 | |
895 /* field tagid already set or nested tags. */ | |
896 if (ctx.tag.id) { | |
897 /* nested <author><name> for Atom */ | |
898 if (ctx.tag.id == AtomTagAuthor && | |
899 istag(t, tl, STRP("name"))) { | |
900 memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ct… | |
901 } else { | |
902 return; /* other nested tags are not allowed: re… | |
903 } | |
904 } | |
905 | |
906 /* in item */ | |
907 if (ctx.tag.id == TagUnknown) { | |
908 if (!(f = gettag(ctx.feedtype, t, tl))) | |
909 f = ¬ag; | |
910 memcpy(&(ctx.tag), f, sizeof(ctx.tag)); | |
911 } | |
912 | |
913 ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent); | |
914 string_clear(&attrrel); | |
915 } | |
916 | |
917 static void | |
918 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) | |
919 { | |
920 enum TagId tagid; | |
921 | |
922 if (ISINCONTENT(ctx)) | |
923 return; | |
924 | |
925 /* set tag type based on its attribute value */ | |
926 if (ctx.tag.id == AtomTagLink) { | |
927 /* empty or "alternate": other types could be | |
928 "enclosure", "related", "self" or "via" */ | |
929 if (!attrrel.len || isattr(attrrel.data, attrrel.len, ST… | |
930 ctx.tag.id = AtomTagLinkAlternate; | |
931 else | |
932 ctx.tag.id = AtomTagLink; /* unknown */ | |
933 } | |
934 | |
935 tagid = ctx.tag.id; | |
936 | |
937 /* map tag type to field: unknown or lesser priority is ignored, | |
938 when tags of the same type are repeated only the first is use… | |
939 if (fieldmap[tagid] == -1 || | |
940 tagid <= ctx.fields[fieldmap[tagid]].tagid) { | |
941 return; | |
942 } | |
943 | |
944 if (ctx.iscontenttag) { | |
945 ctx.iscontent = 1; | |
946 ctx.iscontenttag = 0; | |
947 } | |
948 | |
949 ctx.field = &(ctx.fields[fieldmap[tagid]].str); | |
950 ctx.fields[fieldmap[tagid]].tagid = tagid; | |
951 | |
952 /* clear field if it is overwritten (with a priority order) for … | |
953 value, if the field can have multiple values then do not clea… | |
954 string_clear(ctx.field); | |
955 } | |
956 | |
957 static void | |
958 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) | |
959 { | |
960 size_t i; | |
961 | |
962 if (ctx.feedtype == FeedTypeNone) | |
963 return; | |
964 | |
965 if (ISINCONTENT(ctx)) { | |
966 /* not a closed content field */ | |
967 if (!istag(ctx.tag.name, ctx.tag.len, t, tl)) | |
968 return; | |
969 } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)… | |
970 /* matched tag end: close it */ | |
971 } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom && | |
972 istag(t, tl, STRP("entry"))))) /* Atom */ | |
973 { | |
974 /* end of Atom entry */ | |
975 printfields(); | |
976 | |
977 /* clear strings */ | |
978 for (i = 0; i < FeedFieldLast; i++) { | |
979 string_clear(&ctx.fields[i].str); | |
980 ctx.fields[i].tagid = TagUnknown; | |
981 } | |
982 /* allow parsing of Atom and RSS concatenated in one XML… | |
983 ctx.feedtype = FeedTypeNone; | |
984 } else { | |
985 return; /* not end of field */ | |
986 } | |
987 | |
988 /* temporary string: for fields that cannot be processed | |
989 directly and need more context, for example by its tag | |
990 attributes, like the Atom link rel="alternate|enclosure". */ | |
991 if (tmpstr.len && ctx.field) { | |
992 string_clear(ctx.field); | |
993 string_append(ctx.field, tmpstr.data, tmpstr.len); | |
994 } | |
995 | |
996 /* close field */ | |
997 string_clear(&tmpstr); /* reuse and clear temporary string */ | |
998 | |
999 if (ctx.tag.id == AtomTagAuthorName) | |
1000 memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* … | |
1001 else | |
1002 memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag)); | |
1003 | |
1004 ctx.iscontent = 0; | |
1005 ctx.field = NULL; | |
1006 } | |
1007 | |
1008 static char * | |
1009 request_channel_feed(const char *channelid) | |
1010 { | |
1011 char path[2048]; | |
1012 int r; | |
1013 | |
1014 r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%… | |
1015 /* check if request is too long (truncation) */ | |
1016 if (r < 0 || (size_t)r >= sizeof(path)) | |
1017 return NULL; | |
1018 | |
1019 return request("www.youtube.com", path, ""); | |
1020 } | |
1021 | |
1022 int | |
1023 isvalidchannel(const char *s) | |
1024 { | |
1025 size_t len; | |
1026 | |
1027 for (len = 0; *s; s++, len++) { | |
1028 if (ISALPHA((unsigned char)*s) || | |
1029 ISDIGIT((unsigned char)*s) || | |
1030 *s == '-' || *s == '_') | |
1031 continue; | |
1032 return 0; | |
1033 } | |
1034 | |
1035 return *s == '\0' && len == 24; | |
1036 } | |
1037 | |
1038 void | |
1039 usage(void) | |
1040 { | |
1041 const char *line1 = "Bad Request, path should be the channel id … | |
1042 const char *line2 = "Supported extensions are: [atom|gph|html|js… | |
1043 | |
1044 if (cgimode) { | |
1045 if (godmode) { | |
1046 printf("3%s\tErr\t%s\t%s\r\n", line1, server_nam… | |
1047 printf("3%s\tErr\t%s\t%s\r\n", line2, server_nam… | |
1048 } else { | |
1049 fputs("Status: 400 Bad Request\r\n", stdout); | |
1050 fputs("Content-Type: text/plain; charset=utf-8\r… | |
1051 printf("400 %s\n", line1); | |
1052 printf("\n%s", line2); | |
1053 } | |
1054 exit(0); | |
1055 } else { | |
1056 fputs("usage: feed <channelid> [atom|gph|html|json|tsv|t… | |
1057 fputs("For example: feed UCrbvoMC0zUvPL8vjswhLOSw txt\n"… | |
1058 exit(1); | |
1059 } | |
1060 } | |
1061 | |
1062 int | |
1063 main(int argc, char *argv[]) | |
1064 { | |
1065 char buf[256]; | |
1066 const char *channelid = NULL; | |
1067 char *data, *format = "tsv", *p, *path = NULL, *tmp; | |
1068 size_t i; | |
1069 | |
1070 if (pledge("stdio dns inet rpath unveil", NULL) == -1) | |
1071 err(1, "pledge"); | |
1072 | |
1073 if ((tmp = getenv("REQUEST_URI"))) | |
1074 path = tmp; | |
1075 else if ((tmp = getenv("REQUEST"))) | |
1076 path = tmp; | |
1077 | |
1078 if (path) { | |
1079 cgimode = 1; | |
1080 | |
1081 if ((tmp = getenv("SERVER_NAME"))) | |
1082 server_name = tmp; | |
1083 if ((tmp = getenv("SERVER_PORT"))) | |
1084 server_port = tmp; | |
1085 if ((tmp = getenv("SERVER_PROTOCOL")) && strstr(tmp, "go… | |
1086 godmode = 1; | |
1087 | |
1088 strlcpy(buf, path, sizeof(buf)); | |
1089 path = buf; | |
1090 | |
1091 if (!(p = strrchr(path, '/'))) | |
1092 usage(); | |
1093 | |
1094 channelid = p + 1; | |
1095 if ((p = strrchr(channelid, '.'))) { | |
1096 *p = '\0'; /* NULL terminate */ | |
1097 format = p + 1; | |
1098 } | |
1099 } else { | |
1100 if (argc <= 1) | |
1101 usage(); | |
1102 | |
1103 channelid = argv[1]; | |
1104 if (argc > 2) | |
1105 format = argv[2]; | |
1106 } | |
1107 if (!channelid || !isvalidchannel(channelid)) | |
1108 usage(); | |
1109 | |
1110 if (!strcmp(format, "atom") || !strcmp(format, "xml")) | |
1111 printfields = atom_item; | |
1112 else if (!strcmp(format, "gph")) | |
1113 printfields = gph_item; | |
1114 else if (!strcmp(format, "html")) | |
1115 printfields = html_item; | |
1116 else if (!strcmp(format, "json")) | |
1117 printfields = json_item; | |
1118 else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed")) | |
1119 printfields = sfeed_item; | |
1120 else if (!strcmp(format, "txt") || !strcmp(format, "twtxt")) | |
1121 printfields = twtxt_item; | |
1122 else | |
1123 usage(); | |
1124 | |
1125 search_res = youtube_channel_videos(channelid); | |
1126 if (!search_res || search_res->nitems == 0) { | |
1127 /* error or no videos found */ | |
1128 return 0; | |
1129 } | |
1130 | |
1131 if (!(data = request_channel_feed(channelid))) | |
1132 return 1; /* error, no data at all */ | |
1133 | |
1134 if (pledge("stdio", NULL) == -1) | |
1135 err(1, "pledge"); | |
1136 | |
1137 setxmldata(data, strlen(data)); | |
1138 | |
1139 memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag)); | |
1140 | |
1141 parser.xmlattr = xmlattr; | |
1142 parser.xmlattrentity = xmlattrentity; | |
1143 parser.xmlattrstart = xmlattrstart; | |
1144 parser.xmlcdata = xmldata; | |
1145 parser.xmldata = xmldata; | |
1146 parser.xmldataentity = xmldataentity; | |
1147 parser.xmltagend = xmltagend; | |
1148 parser.xmltagstart = xmltagstart; | |
1149 parser.xmltagstartparsed = xmltagstartparsed; | |
1150 | |
1151 /* init all fields, make sure it has a value */ | |
1152 for (i = 0; i < FeedFieldLast; i++) { | |
1153 string_append(&(ctx.fields[i].str), " ", 1); | |
1154 string_clear(&(ctx.fields[i].str)); | |
1155 } | |
1156 | |
1157 if (cgimode && !godmode) { | |
1158 fputs("Status: 200 OK\r\n", stdout); | |
1159 if (!strcmp(format, "atom") || !strcmp(format, "xml")) | |
1160 fputs("Content-Type: text/xml; charset=utf-8\r\n… | |
1161 else if (!strcmp(format, "html")) | |
1162 fputs("Content-Type: text/html; charset=utf-8\r\… | |
1163 else if (!strcmp(format, "json")) | |
1164 fputs("Content-Type: application/json; charset=u… | |
1165 else | |
1166 fputs("Content-Type: text/plain; charset=utf-8\r… | |
1167 } | |
1168 | |
1169 if (!strcmp(format, "atom") || !strcmp(format, "xml")) | |
1170 atom_header(); | |
1171 else if (!strcmp(format, "gph")) | |
1172 gph_header(); | |
1173 else if (!strcmp(format, "html")) | |
1174 html_header(); | |
1175 else if (!strcmp(format, "json")) | |
1176 json_header(); | |
1177 | |
1178 /* NOTE: getnext is defined in xml.h for inline optimization */ | |
1179 xml_parse(&parser); | |
1180 | |
1181 if (!strcmp(format, "atom") || !strcmp(format, "xml")) | |
1182 atom_footer(); | |
1183 else if (!strcmp(format, "gph")) | |
1184 gph_footer(); | |
1185 else if (!strcmp(format, "html")) | |
1186 html_footer(); | |
1187 else if (!strcmp(format, "json")) | |
1188 json_footer(); | |
1189 | |
1190 return 0; | |
1191 } |