Introduction
Introduction Statistics Contact Development Disclaimer Help
feed.c - frontends - front-ends for some sites (experiment)
Log
Files
Refs
README
LICENSE
---
feed.c (29885B)
---
1 #include <err.h>
2 #include <errno.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <strings.h>
8 #include <time.h>
9 #include <unistd.h>
10
11 #include "https.h"
12 #include "util.h"
13 #include "youtube.h"
14 #include "xml.h"
15
16 #define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag))
17 #define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
18
19 /* string and byte-length */
20 #define STRP(s) s,sizeof(s)-1
21
22 enum FeedType {
23 FeedTypeNone = 0,
24 FeedTypeAtom = 2
25 };
26
27 /* String data / memory pool */
28 typedef struct string {
29 char *data; /* data */
30 size_t len; /* string length */
31 size_t bufsiz; /* allocated size */
32 } String;
33
34 /* NOTE: the order of these fields (content, date, author) indicate the
35 * priority to use them, from least important to high. */
36 enum TagId {
37 TagUnknown = 0,
38 /* Atom */
39 /* creation date has higher priority */
40 AtomTagPublished,
41 AtomTagTitle,
42 AtomTagMediaDescription,
43 AtomTagId,
44 AtomTagLink,
45 AtomTagLinkAlternate,
46 AtomTagAuthor, AtomTagAuthorName,
47 TagYoutubeVideoId,
48 TagLast
49 };
50
51 typedef struct feedtag {
52 char *name; /* name of tag to match */
53 size_t len; /* len of `name` */
54 enum TagId id; /* unique ID */
55 } FeedTag;
56
57 typedef struct field {
58 String str;
59 enum TagId tagid; /* tagid set previously, used for tag priority…
60 } FeedField;
61
62 enum {
63 /* sfeed fields */
64 FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldConte…
65 FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCateg…
66 FeedFieldYoutubeId, /* yt:videoId */
67 FeedFieldLast
68 };
69
70 typedef struct feedcontext {
71 String *field; /* current FeedItem field String …
72 FeedField fields[FeedFieldLast]; /* data for current item…
73 FeedTag tag; /* unique current parsed tag */
74 int iscontent; /* in content data */
75 int iscontenttag; /* in content tag */
76 enum FeedType feedtype;
77 } FeedContext;
78
79 static long long datetounix(long long, int, int, int, int, int);
80 static FeedTag * gettag(enum FeedType, const char *, size_t);
81 static long gettzoffset(const char *);
82 static int isattr(const char *, size_t, const char *, size_t);
83 static int istag(const char *, size_t, const char *, size_t);
84 static int parsetime(const char *, long long *);
85
86 static void atom_header(void);
87 static void atom_item(void);
88 static void atom_footer(void);
89 static void gph_header(void);
90 static void gph_footer(void);
91 static void html_header(void);
92 static void html_footer(void);
93 static void json_header(void);
94 static void json_item(void);
95 static void json_footer(void);
96 static void sfeed_item(void); /* TSV / sfeed */
97 static void twtxt_item(void);
98
99 static void string_append(String *, const char *, size_t);
100 static void string_buffer_realloc(String *, size_t);
101 static void string_clear(String *);
102 static void string_print_encoded(String *);
103 static void string_print_timestamp(String *);
104 static void string_print(String *);
105 static void xmlattr(XMLParser *, const char *, size_t, const char *, siz…
106 const char *, size_t);
107 static void xmlattrentity(XMLParser *, const char *, size_t, const char …
108 size_t, const char *, size_t);
109 static void xmlattrstart(XMLParser *, const char *, size_t, const char *,
110 size_t);
111 static void xmldata(XMLParser *, const char *, size_t);
112 static void xmldataentity(XMLParser *, const char *, size_t);
113 static void xmltagend(XMLParser *, const char *, size_t, int);
114 static void xmltagstart(XMLParser *, const char *, size_t);
115 static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
116
117 /* Atom, must be alphabetical order */
118 static const FeedTag atomtags[] = {
119 { STRP("author"), AtomTagAuthor },
120 { STRP("id"), AtomTagId },
121 /* Atom: <link href="" />, RSS has <link></link> */
122 { STRP("link"), AtomTagLink },
123 { STRP("media:description"), AtomTagMediaDescription },
124 { STRP("published"), AtomTagPublished },
125 { STRP("title"), AtomTagTitle },
126 { STRP("yt:videoId"), TagYoutubeVideoId }
127 };
128
129 /* special case: nested <author><name> */
130 static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
131 static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorNa…
132
133 /* reference to no / unknown tag */
134 static const FeedTag notag = { STRP(""), TagUnknown };
135
136 /* map TagId type to RSS/Atom field, all tags must be defined */
137 static const int fieldmap[TagLast] = {
138 [TagUnknown] = -1,
139 /* Atom */
140 [AtomTagPublished] = FeedFieldTime,
141 [AtomTagTitle] = FeedFieldTitle,
142 [AtomTagMediaDescription] = FeedFieldContent,
143 [AtomTagId] = FeedFieldId,
144 [AtomTagLink] = -1,
145 [AtomTagLinkAlternate] = FeedFieldLink,
146 [AtomTagAuthor] = -1,
147 [AtomTagAuthorName] = FeedFieldAuthor,
148 [TagYoutubeVideoId] = FeedFieldYoutubeId
149 };
150
151 static const int FieldSeparator = '\t';
152
153 static FeedContext ctx;
154 static XMLParser parser; /* XML parser state */
155 static String attrrel, tmpstr;
156
157 static struct search_response *search_res = NULL;
158 static void (*printfields)(void) = sfeed_item;
159 static int cgimode = 0, godmode = 0;
160 static const char *server_name = "127.0.0.1", *server_port = "70";
161
162 static int
163 tagcmp(const void *v1, const void *v2)
164 {
165 return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name);
166 }
167
168 /* Unique tagid for parsed tag name. */
169 static FeedTag *
170 gettag(enum FeedType feedtype, const char *name, size_t namelen)
171 {
172 FeedTag f, *r = NULL;
173
174 f.name = (char *)name;
175
176 switch (feedtype) {
177 case FeedTypeAtom:
178 r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atom…
179 sizeof(atomtags[0]), tagcmp);
180 break;
181 default:
182 break;
183 }
184
185 return r;
186 }
187
188 /* Clear string only; don't free, prevents unnecessary reallocation. */
189 static void
190 string_clear(String *s)
191 {
192 if (s->data)
193 s->data[0] = '\0';
194 s->len = 0;
195 }
196
197 static void
198 string_buffer_realloc(String *s, size_t newlen)
199 {
200 size_t alloclen;
201
202 if (newlen > SIZE_MAX / 2) {
203 alloclen = SIZE_MAX;
204 } else {
205 for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
206 ;
207 }
208 if (!(s->data = realloc(s->data, alloclen)))
209 err(1, "realloc");
210 s->bufsiz = alloclen;
211 }
212
213 /* Append data to String, s->data and data may not overlap. */
214 static void
215 string_append(String *s, const char *data, size_t len)
216 {
217 if (!len)
218 return;
219
220 if (s->len >= SIZE_MAX - len) {
221 errno = ENOMEM;
222 err(1, "realloc");
223 }
224
225 /* check if allocation is necessary, never shrink the buffer. */
226 if (s->len + len >= s->bufsiz)
227 string_buffer_realloc(s, s->len + len + 1);
228 memcpy(s->data + s->len, data, len);
229 s->len += len;
230 s->data[s->len] = '\0';
231 }
232
233 /* Print text, encode TABs, newlines and '\', remove other whitespace.
234 * Remove leading and trailing whitespace. */
235 static void
236 string_print_encoded(String *s)
237 {
238 const char *p, *e;
239
240 if (!s->data || !s->len)
241 return;
242
243 p = s->data;
244 e = p + strlen(p);
245
246 for (; *p && p != e; p++) {
247 switch (*p) {
248 case '\n': putchar('\\'); putchar('n'); break;
249 case '\\': putchar('\\'); putchar('\\'); break;
250 case '\t': putchar('\\'); putchar('t'); break;
251 default:
252 /* ignore control chars */
253 if (!ISCNTRL((unsigned char)*p))
254 putchar(*p);
255 break;
256 }
257 }
258 }
259
260 /* Print text, replace TABs, carriage return and other whitespace with '…
261 * Other control chars are removed. Remove leading and trailing whitespa…
262 static void
263 string_print(String *s)
264 {
265 char *p, *e;
266
267 if (!s->data || !s->len)
268 return;
269
270 p = s->data;
271 e = p + s->len;
272 for (; *p && p != e; p++) {
273 if (ISSPACE((unsigned char)*p))
274 putchar(' '); /* any whitespace to space */
275 else if (!ISCNTRL((unsigned char)*p))
276 /* ignore other control chars */
277 putchar(*p);
278 }
279 }
280
281 /* Print as UNIX timestamp, print nothing if the time is empty or invali…
282 static void
283 string_print_timestamp(String *s)
284 {
285 long long t;
286
287 if (!s->data || !s->len)
288 return;
289
290 if (parsetime(s->data, &t) != -1)
291 printf("%lld", t);
292 }
293
294 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestam…
295 Parameters should be passed as they are in a struct tm:
296 that is: year = year - 1900, month = month - 1. */
297 static long long
298 datetounix(long long year, int mon, int day, int hour, int min, int sec)
299 {
300 /* seconds in a month in a regular (non-leap) year */
301 static const long secs_through_month[] = {
302 0, 31 * 86400, 59 * 86400, 90 * 86400,
303 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
304 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
305 int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
306 long long t;
307
308 /* optimization: handle common range year 1902 up to and includi…
309 if (year - 2ULL <= 136) {
310 /* amount of leap days relative to 1970: every 4 years */
311 leaps = (year - 68) >> 2;
312 if (!((year - 68) & 3)) {
313 leaps--;
314 is_leap = 1;
315 } else {
316 is_leap = 0;
317 }
318 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 8…
319 } else {
320 /* general leap year calculation:
321 leap years occur mostly every 4 years but every 100 y…
322 a leap year is skipped unless the year is divisible b…
323 cycles = (year - 100) / 400;
324 rem = (year - 100) % 400;
325 if (rem < 0) {
326 cycles--;
327 rem += 400;
328 }
329 if (!rem) {
330 is_leap = 1;
331 } else {
332 if (rem >= 300)
333 centuries = 3, rem -= 300;
334 else if (rem >= 200)
335 centuries = 2, rem -= 200;
336 else if (rem >= 100)
337 centuries = 1, rem -= 100;
338 if (rem) {
339 leaps = rem / 4U;
340 rem %= 4U;
341 is_leap = !rem;
342 }
343 }
344 leaps += (97 * cycles) + (24 * centuries) - is_leap;
345
346 /* adjust 8 leap days from 1970 up to and including 2000:
347 ((30 * 365) + 8) * 86400 = 946771200 */
348 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94…
349 }
350 t += secs_through_month[mon];
351 if (is_leap && mon >= 2)
352 t += 86400;
353 t += 86400LL * (day - 1);
354 t += 3600LL * hour;
355 t += 60LL * min;
356 t += sec;
357
358 return t;
359 }
360
361 /* Get timezone from string, return time offset in seconds from UTC. */
362 static long
363 gettzoffset(const char *s)
364 {
365 const char *p;
366 long tzhour = 0, tzmin = 0;
367 size_t i;
368
369 switch (*s) {
370 case '-': /* offset */
371 case '+':
372 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*…
373 tzhour = (tzhour * 10) + (*p - '0');
374 if (*p == ':')
375 p++;
376 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p+…
377 tzmin = (tzmin * 10) + (*p - '0');
378 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ?…
379 default: /* timezone name */
380 break;
381 }
382 return 0;
383 }
384
385 /* Parse time string `s` into the UNIX timestamp `tp`.
386 Returns 0 on success or -1 on failure. */
387 static int
388 parsetime(const char *s, long long *tp)
389 {
390 int va[6] = { 0 }, i, v, vi;
391
392 /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H…
393 if (!ISDIGIT((unsigned char)s[0]) ||
394 !ISDIGIT((unsigned char)s[1]) ||
395 !ISDIGIT((unsigned char)s[2]) ||
396 !ISDIGIT((unsigned char)s[3]))
397 return -1;
398
399 /* parse time parts (and possibly remaining date parts) */
400 for (vi = 0; *s && vi < 6; vi++) {
401 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
402 ISDIGIT((unsigned char)*s); s++, i++)…
403 v = (v * 10) + (*s - '0');
404 }
405 va[vi] = v;
406
407 if ((vi < 2 && *s == '-') ||
408 (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s)…
409 (vi > 2 && *s == ':'))
410 s++;
411 }
412
413 /* invalid range */
414 if (va[0] < 0 || va[0] > 9999 ||
415 va[1] < 1 || va[1] > 12 ||
416 va[2] < 1 || va[2] > 31 ||
417 va[3] < 0 || va[3] > 23 ||
418 va[4] < 0 || va[4] > 59 ||
419 va[5] < 0 || va[5] > 60) /* allow leap second */
420 return -1;
421
422 *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], v…
423 gettzoffset(s);
424
425 return 0;
426 }
427
428 static void
429 atom_header(void)
430 {
431 fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
432 "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
433 "\t<title>Newsfeed</title>\n", stdout);
434 }
435
436 static void
437 atom_footer(void)
438 {
439 fputs("</feed>\n", stdout);
440 }
441
442 static void
443 atom_item(void)
444 {
445 struct item *v, *found = NULL;
446 size_t i;
447
448 /* must have a video id */
449 if (!ctx.fields[FeedFieldYoutubeId].str.len)
450 return;
451
452 for (i = 0; i < search_res->nitems; i++) {
453 v = &(search_res->items[i]);
454 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
455 found = v;
456 }
457 /* Only print the video if it was found in the feed aswell.
458 This way it filters away shorts too. */
459 if (!found)
460 return;
461
462 fputs("<entry>\n\t<title>", stdout);
463 xmlencode(ctx.fields[FeedFieldTitle].str.data);
464 if (found->duration[0]) {
465 fputs(" [", stdout);
466 xmlencode(found->duration);
467 fputs("]", stdout);
468 }
469 fputs("</title>\n", stdout);
470 if (ctx.fields[FeedFieldLink].str.len) {
471 fputs("\t<link rel=\"alternate\" href=\"", stdout);
472 xmlencode(ctx.fields[FeedFieldLink].str.data);
473 fputs("\" />\n", stdout);
474 }
475 /* prefer link over id for Atom <id>. */
476 fputs("\t<id>", stdout);
477 if (ctx.fields[FeedFieldLink].str.len)
478 xmlencode(ctx.fields[FeedFieldLink].str.data);
479 else if (ctx.fields[FeedFieldId].str.len)
480 xmlencode(ctx.fields[FeedFieldId].str.data);
481 fputs("</id>\n", stdout);
482
483 /* just print the original timestamp, it should conform */
484 fputs("\t<updated>", stdout);
485 string_print(&ctx.fields[FeedFieldTime].str);
486 fputs("</updated>\n", stdout);
487
488 if (ctx.fields[FeedFieldAuthor].str.len) {
489 fputs("\t<author><name>", stdout);
490 xmlencode(ctx.fields[FeedFieldAuthor].str.data);
491 fputs("</name></author>\n", stdout);
492 }
493 if (ctx.fields[FeedFieldContent].str.len) {
494 fputs("\t<content>", stdout);
495 xmlencode(ctx.fields[FeedFieldContent].str.data);
496 fputs("</content>\n", stdout);
497 }
498 fputs("</entry>\n", stdout);
499 }
500
501
502 static void
503 html_header(void)
504 {
505 fputs("<!DOCTYPE HTML>\n"
506 "<html>\n"
507 "<head>\n"
508 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=…
509 "</head>\n"
510 "<body><pre>\n", stdout);
511 }
512
513 static void
514 html_footer(void)
515 {
516 fputs("</pre></body>\n</html>\n", stdout);
517 }
518
519 static void
520 html_item(void)
521 {
522 struct item *v, *found = NULL;
523 size_t i;
524
525 /* must have a video id */
526 if (!ctx.fields[FeedFieldYoutubeId].str.len)
527 return;
528
529 for (i = 0; i < search_res->nitems; i++) {
530 v = &(search_res->items[i]);
531 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
532 found = v;
533 }
534 /* Only print the video if it was found in the feed aswell.
535 This way it filters away shorts too. */
536 if (!found)
537 return;
538
539 /* just print the original timestamp, it should conform */
540 xmlencode(ctx.fields[FeedFieldTime].str.data);
541 fputs("&nbsp;", stdout);
542
543 if (ctx.fields[FeedFieldLink].str.len) {
544 fputs("<a href=\"", stdout);
545 xmlencode(ctx.fields[FeedFieldLink].str.data);
546 fputs("\">", stdout);
547 }
548
549 xmlencode(ctx.fields[FeedFieldTitle].str.data);
550
551 if (found->duration[0]) {
552 fputs(" [", stdout);
553 xmlencode(found->duration);
554 fputs("]", stdout);
555 }
556 if (ctx.fields[FeedFieldLink].str.len) {
557 fputs("</a>", stdout);
558 }
559 fputs("\n", stdout);
560 }
561
562 static void
563 gphencode(const char *s)
564 {
565 gophertext(stdout, s, strlen(s));
566 }
567
568 static void
569 gph_header(void)
570 {
571 }
572
573 static void
574 gph_footer(void)
575 {
576 fputs(".\r\n", stdout);
577 }
578
579 static void
580 gph_item(void)
581 {
582 struct item *v, *found = NULL;
583 size_t i;
584
585 /* must have a video id */
586 if (!ctx.fields[FeedFieldYoutubeId].str.len)
587 return;
588
589 for (i = 0; i < search_res->nitems; i++) {
590 v = &(search_res->items[i]);
591 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
592 found = v;
593 }
594 /* Only print the video if it was found in the feed aswell.
595 This way it filters away shorts too. */
596 if (!found)
597 return;
598
599 fputs("h", stdout);
600 /* just print the original timestamp, it should conform */
601 gphencode(ctx.fields[FeedFieldTime].str.data);
602 fputs(" ", stdout);
603 gphencode(ctx.fields[FeedFieldTitle].str.data);
604 if (found->duration[0]) {
605 fputs(" [", stdout);
606 gphencode(found->duration);
607 fputs("]", stdout);
608 }
609 fputs("\t", stdout);
610 if (ctx.fields[FeedFieldLink].str.len) {
611 fputs("URL:", stdout);
612 gphencode(ctx.fields[FeedFieldLink].str.data);
613 }
614 printf("\t%s\t%s\r\n", server_name, server_port);
615 }
616
617 static void
618 json_header(void)
619 {
620 fputs("{\n"
621 "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
622 "\"title\": \"Newsfeed\",\n"
623 "\"items\": [\n", stdout);
624 }
625
626 static void
627 json_footer(void)
628 {
629 fputs("]\n}\n", stdout);
630 }
631
632 static void
633 json_printfield(const char *s)
634 {
635 for (; *s; s++) {
636 if (*s == '\\')
637 fputs("\\\\", stdout);
638 else if (*s == '"')
639 fputs("\\\"", stdout);
640 else if (ISCNTRL((unsigned char)*s))
641 printf("\\u00%02x", (unsigned char)*s);
642 else
643 putchar(*s);
644 }
645 }
646
647 static void
648 json_item(void)
649 {
650 static int json_firstitem = 1;
651 struct item *v, *found = NULL;
652 size_t i;
653
654 /* must have a video id */
655 if (!ctx.fields[FeedFieldYoutubeId].str.len)
656 return;
657
658 for (i = 0; i < search_res->nitems; i++) {
659 v = &(search_res->items[i]);
660 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
661 found = v;
662 }
663 /* Only print the video if it was found in the feed aswell.
664 This way it filters away shorts too. */
665 if (!found)
666 return;
667
668 if (!json_firstitem)
669 fputs(",\n", stdout);
670 json_firstitem = 0;
671
672 fputs("{\n\t\"id\": \"", stdout);
673 json_printfield(ctx.fields[FeedFieldId].str.data);
674 fputs("\"", stdout);
675
676 /* just print the original timestamp, it should conform */
677 fputs(",\n\t\"date_published\": \"", stdout);
678 string_print(&ctx.fields[FeedFieldTime].str);
679 fputs("\"", stdout);
680
681 fputs(",\n\t\"title\": \"", stdout);
682 json_printfield(ctx.fields[FeedFieldTitle].str.data);
683 if (found->duration[0]) {
684 fputs(" [", stdout);
685 json_printfield(found->duration);
686 fputs("]", stdout);
687 }
688 fputs("\"", stdout);
689
690 if (ctx.fields[FeedFieldLink].str.len) {
691 fputs(",\n\t\"url\": \"", stdout);
692 json_printfield(ctx.fields[FeedFieldLink].str.data);
693 fputs("\"", stdout);
694 }
695
696 if (ctx.fields[FeedFieldAuthor].str.len) {
697 fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
698 json_printfield(ctx.fields[FeedFieldAuthor].str.data);
699 fputs("\"}]", stdout);
700 }
701
702 fputs(",\n\t\"content_text\": \"", stdout);
703 json_printfield(ctx.fields[FeedFieldContent].str.data);
704 fputs("\"\n}", stdout);
705 }
706
707 static void
708 sfeed_item(void)
709 {
710 struct item *v, *found = NULL;
711 size_t i;
712
713 /* must have a video id */
714 if (!ctx.fields[FeedFieldYoutubeId].str.len)
715 return;
716
717 for (i = 0; i < search_res->nitems; i++) {
718 v = &(search_res->items[i]);
719 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
720 found = v;
721 }
722 /* Only print the video if it was found in the feed aswell.
723 This way it filters away shorts too. */
724 if (!found)
725 return;
726
727 string_print_timestamp(&ctx.fields[FeedFieldTime].str);
728 putchar(FieldSeparator);
729 string_print(&ctx.fields[FeedFieldTitle].str);
730 if (found->duration[0]) {
731 fputs(" [", stdout);
732 fputs(found->duration, stdout);
733 fputs("]", stdout);
734 }
735 putchar(FieldSeparator);
736 string_print(&ctx.fields[FeedFieldLink].str);
737 putchar(FieldSeparator);
738 string_print_encoded(&ctx.fields[FeedFieldContent].str);
739 putchar(FieldSeparator);
740 fputs("plain", stdout);
741 putchar(FieldSeparator);
742 string_print(&ctx.fields[FeedFieldId].str);
743 putchar(FieldSeparator);
744 string_print(&ctx.fields[FeedFieldAuthor].str);
745 putchar(FieldSeparator);
746 /* no/empty enclosure */
747 putchar(FieldSeparator);
748 /* empty category */
749 putchar('\n');
750 }
751
752 static void
753 twtxt_item(void)
754 {
755 struct item *v, *found = NULL;
756 size_t i;
757
758 /* must have a video id */
759 if (!ctx.fields[FeedFieldYoutubeId].str.len)
760 return;
761
762 for (i = 0; i < search_res->nitems; i++) {
763 v = &(search_res->items[i]);
764 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->…
765 found = v;
766 }
767 /* Only print the video if it was found in the feed aswell.
768 This way it filters away shorts too. */
769 if (!found)
770 return;
771
772 string_print(&ctx.fields[FeedFieldTime].str);
773 putchar(FieldSeparator);
774 string_print(&ctx.fields[FeedFieldTitle].str);
775 if (found->duration[0]) {
776 fputs(" [", stdout);
777 fputs(found->duration, stdout);
778 fputs("]", stdout);
779 }
780 fputs(": ", stdout);
781 string_print(&ctx.fields[FeedFieldLink].str);
782 putchar('\n');
783 }
784
785 static int
786 istag(const char *name, size_t len, const char *name2, size_t len2)
787 {
788 return (len == len2 && !strcasecmp(name, name2));
789 }
790
791 static int
792 isattr(const char *name, size_t len, const char *name2, size_t len2)
793 {
794 return (len == len2 && !strcasecmp(name, name2));
795 }
796
797 static void
798 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
799 const char *v, size_t vl)
800 {
801 if (ISINCONTENT(ctx))
802 return;
803
804 if (!ctx.tag.id)
805 return;
806
807 if (ISCONTENTTAG(ctx))
808 return;
809
810 if (ctx.tag.id == AtomTagLink) {
811 if (isattr(n, nl, STRP("rel"))) {
812 string_append(&attrrel, v, vl);
813 } else if (isattr(n, nl, STRP("href"))) {
814 string_append(&tmpstr, v, vl);
815 }
816 }
817 }
818
819 static void
820 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, siz…
821 const char *data, size_t datalen)
822 {
823 char buf[8];
824 int len;
825
826 if (ISINCONTENT(ctx))
827 return;
828
829 if (!ctx.tag.id)
830 return;
831
832 /* try to translate entity, else just pass as data to
833 * xmlattr handler. */
834 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
835 xmlattr(p, t, tl, n, nl, buf, (size_t)len);
836 else
837 xmlattr(p, t, tl, n, nl, data, datalen);
838 }
839
840 static void
841 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size…
842 {
843 if (ISINCONTENT(ctx))
844 return;
845
846 if (attrrel.len && isattr(n, nl, STRP("rel")))
847 string_clear(&attrrel);
848 else if (tmpstr.len &&
849 (isattr(n, nl, STRP("href")) ||
850 isattr(n, nl, STRP("url"))))
851 string_clear(&tmpstr); /* use the last value for multipl…
852 }
853
854 static void
855 xmldata(XMLParser *p, const char *s, size_t len)
856 {
857 if (!ctx.field)
858 return;
859
860 string_append(ctx.field, s, len);
861 }
862
863 static void
864 xmldataentity(XMLParser *p, const char *data, size_t datalen)
865 {
866 char buf[8];
867 int len;
868
869 if (!ctx.field)
870 return;
871
872 /* try to translate entity, else just pass as data to
873 * xmldata handler. */
874 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
875 xmldata(p, buf, (size_t)len);
876 else
877 xmldata(p, data, datalen);
878 }
879
880 static void
881 xmltagstart(XMLParser *p, const char *t, size_t tl)
882 {
883 const FeedTag *f;
884
885 if (ISINCONTENT(ctx))
886 return;
887
888 /* start of RSS or Atom item / entry */
889 if (ctx.feedtype == FeedTypeNone) {
890 if (istag(t, tl, STRP("entry")))
891 ctx.feedtype = FeedTypeAtom;
892 return;
893 }
894
895 /* field tagid already set or nested tags. */
896 if (ctx.tag.id) {
897 /* nested <author><name> for Atom */
898 if (ctx.tag.id == AtomTagAuthor &&
899 istag(t, tl, STRP("name"))) {
900 memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ct…
901 } else {
902 return; /* other nested tags are not allowed: re…
903 }
904 }
905
906 /* in item */
907 if (ctx.tag.id == TagUnknown) {
908 if (!(f = gettag(ctx.feedtype, t, tl)))
909 f = &notag;
910 memcpy(&(ctx.tag), f, sizeof(ctx.tag));
911 }
912
913 ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
914 string_clear(&attrrel);
915 }
916
917 static void
918 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
919 {
920 enum TagId tagid;
921
922 if (ISINCONTENT(ctx))
923 return;
924
925 /* set tag type based on its attribute value */
926 if (ctx.tag.id == AtomTagLink) {
927 /* empty or "alternate": other types could be
928 "enclosure", "related", "self" or "via" */
929 if (!attrrel.len || isattr(attrrel.data, attrrel.len, ST…
930 ctx.tag.id = AtomTagLinkAlternate;
931 else
932 ctx.tag.id = AtomTagLink; /* unknown */
933 }
934
935 tagid = ctx.tag.id;
936
937 /* map tag type to field: unknown or lesser priority is ignored,
938 when tags of the same type are repeated only the first is use…
939 if (fieldmap[tagid] == -1 ||
940 tagid <= ctx.fields[fieldmap[tagid]].tagid) {
941 return;
942 }
943
944 if (ctx.iscontenttag) {
945 ctx.iscontent = 1;
946 ctx.iscontenttag = 0;
947 }
948
949 ctx.field = &(ctx.fields[fieldmap[tagid]].str);
950 ctx.fields[fieldmap[tagid]].tagid = tagid;
951
952 /* clear field if it is overwritten (with a priority order) for …
953 value, if the field can have multiple values then do not clea…
954 string_clear(ctx.field);
955 }
956
957 static void
958 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
959 {
960 size_t i;
961
962 if (ctx.feedtype == FeedTypeNone)
963 return;
964
965 if (ISINCONTENT(ctx)) {
966 /* not a closed content field */
967 if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
968 return;
969 } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)…
970 /* matched tag end: close it */
971 } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
972 istag(t, tl, STRP("entry"))))) /* Atom */
973 {
974 /* end of Atom entry */
975 printfields();
976
977 /* clear strings */
978 for (i = 0; i < FeedFieldLast; i++) {
979 string_clear(&ctx.fields[i].str);
980 ctx.fields[i].tagid = TagUnknown;
981 }
982 /* allow parsing of Atom and RSS concatenated in one XML…
983 ctx.feedtype = FeedTypeNone;
984 } else {
985 return; /* not end of field */
986 }
987
988 /* temporary string: for fields that cannot be processed
989 directly and need more context, for example by its tag
990 attributes, like the Atom link rel="alternate|enclosure". */
991 if (tmpstr.len && ctx.field) {
992 string_clear(ctx.field);
993 string_append(ctx.field, tmpstr.data, tmpstr.len);
994 }
995
996 /* close field */
997 string_clear(&tmpstr); /* reuse and clear temporary string */
998
999 if (ctx.tag.id == AtomTagAuthorName)
1000 memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* …
1001 else
1002 memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
1003
1004 ctx.iscontent = 0;
1005 ctx.field = NULL;
1006 }
1007
1008 static char *
1009 request_channel_feed(const char *channelid)
1010 {
1011 char path[2048];
1012 int r;
1013
1014 r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%…
1015 /* check if request is too long (truncation) */
1016 if (r < 0 || (size_t)r >= sizeof(path))
1017 return NULL;
1018
1019 return request("www.youtube.com", path, "");
1020 }
1021
1022 int
1023 isvalidchannel(const char *s)
1024 {
1025 size_t len;
1026
1027 for (len = 0; *s; s++, len++) {
1028 if (ISALPHA((unsigned char)*s) ||
1029 ISDIGIT((unsigned char)*s) ||
1030 *s == '-' || *s == '_')
1031 continue;
1032 return 0;
1033 }
1034
1035 return *s == '\0' && len == 24;
1036 }
1037
1038 void
1039 usage(void)
1040 {
1041 const char *line1 = "Bad Request, path should be the channel id …
1042 const char *line2 = "Supported extensions are: [atom|gph|html|js…
1043
1044 if (cgimode) {
1045 if (godmode) {
1046 printf("3%s\tErr\t%s\t%s\r\n", line1, server_nam…
1047 printf("3%s\tErr\t%s\t%s\r\n", line2, server_nam…
1048 } else {
1049 fputs("Status: 400 Bad Request\r\n", stdout);
1050 fputs("Content-Type: text/plain; charset=utf-8\r…
1051 printf("400 %s\n", line1);
1052 printf("\n%s", line2);
1053 }
1054 exit(0);
1055 } else {
1056 fputs("usage: feed <channelid> [atom|gph|html|json|tsv|t…
1057 fputs("For example: feed UCrbvoMC0zUvPL8vjswhLOSw txt\n"…
1058 exit(1);
1059 }
1060 }
1061
1062 int
1063 main(int argc, char *argv[])
1064 {
1065 char buf[256];
1066 const char *channelid = NULL;
1067 char *data, *format = "tsv", *p, *path = NULL, *tmp;
1068 size_t i;
1069
1070 if (pledge("stdio dns inet rpath unveil", NULL) == -1)
1071 err(1, "pledge");
1072
1073 if ((tmp = getenv("REQUEST_URI")))
1074 path = tmp;
1075 else if ((tmp = getenv("REQUEST")))
1076 path = tmp;
1077
1078 if (path) {
1079 cgimode = 1;
1080
1081 if ((tmp = getenv("SERVER_NAME")))
1082 server_name = tmp;
1083 if ((tmp = getenv("SERVER_PORT")))
1084 server_port = tmp;
1085 if ((tmp = getenv("SERVER_PROTOCOL")) && strstr(tmp, "go…
1086 godmode = 1;
1087
1088 strlcpy(buf, path, sizeof(buf));
1089 path = buf;
1090
1091 if (!(p = strrchr(path, '/')))
1092 usage();
1093
1094 channelid = p + 1;
1095 if ((p = strrchr(channelid, '.'))) {
1096 *p = '\0'; /* NULL terminate */
1097 format = p + 1;
1098 }
1099 } else {
1100 if (argc <= 1)
1101 usage();
1102
1103 channelid = argv[1];
1104 if (argc > 2)
1105 format = argv[2];
1106 }
1107 if (!channelid || !isvalidchannel(channelid))
1108 usage();
1109
1110 if (!strcmp(format, "atom") || !strcmp(format, "xml"))
1111 printfields = atom_item;
1112 else if (!strcmp(format, "gph"))
1113 printfields = gph_item;
1114 else if (!strcmp(format, "html"))
1115 printfields = html_item;
1116 else if (!strcmp(format, "json"))
1117 printfields = json_item;
1118 else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed"))
1119 printfields = sfeed_item;
1120 else if (!strcmp(format, "txt") || !strcmp(format, "twtxt"))
1121 printfields = twtxt_item;
1122 else
1123 usage();
1124
1125 search_res = youtube_channel_videos(channelid);
1126 if (!search_res || search_res->nitems == 0) {
1127 /* error or no videos found */
1128 return 0;
1129 }
1130
1131 if (!(data = request_channel_feed(channelid)))
1132 return 1; /* error, no data at all */
1133
1134 if (pledge("stdio", NULL) == -1)
1135 err(1, "pledge");
1136
1137 setxmldata(data, strlen(data));
1138
1139 memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
1140
1141 parser.xmlattr = xmlattr;
1142 parser.xmlattrentity = xmlattrentity;
1143 parser.xmlattrstart = xmlattrstart;
1144 parser.xmlcdata = xmldata;
1145 parser.xmldata = xmldata;
1146 parser.xmldataentity = xmldataentity;
1147 parser.xmltagend = xmltagend;
1148 parser.xmltagstart = xmltagstart;
1149 parser.xmltagstartparsed = xmltagstartparsed;
1150
1151 /* init all fields, make sure it has a value */
1152 for (i = 0; i < FeedFieldLast; i++) {
1153 string_append(&(ctx.fields[i].str), " ", 1);
1154 string_clear(&(ctx.fields[i].str));
1155 }
1156
1157 if (cgimode && !godmode) {
1158 fputs("Status: 200 OK\r\n", stdout);
1159 if (!strcmp(format, "atom") || !strcmp(format, "xml"))
1160 fputs("Content-Type: text/xml; charset=utf-8\r\n…
1161 else if (!strcmp(format, "html"))
1162 fputs("Content-Type: text/html; charset=utf-8\r\…
1163 else if (!strcmp(format, "json"))
1164 fputs("Content-Type: application/json; charset=u…
1165 else
1166 fputs("Content-Type: text/plain; charset=utf-8\r…
1167 }
1168
1169 if (!strcmp(format, "atom") || !strcmp(format, "xml"))
1170 atom_header();
1171 else if (!strcmp(format, "gph"))
1172 gph_header();
1173 else if (!strcmp(format, "html"))
1174 html_header();
1175 else if (!strcmp(format, "json"))
1176 json_header();
1177
1178 /* NOTE: getnext is defined in xml.h for inline optimization */
1179 xml_parse(&parser);
1180
1181 if (!strcmp(format, "atom") || !strcmp(format, "xml"))
1182 atom_footer();
1183 else if (!strcmp(format, "gph"))
1184 gph_footer();
1185 else if (!strcmp(format, "html"))
1186 html_footer();
1187 else if (!strcmp(format, "json"))
1188 json_footer();
1189
1190 return 0;
1191 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.