add initial version of youtube/feed - frontends - front-ends for some sites (ex… | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit f5a6863b5397d1cc3ad31de291be11fae6256b5f | |
parent 7b18c287f2fcf98227ff2ec1fdd4eeb8050e8166 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Wed, 10 May 2023 01:10:51 +0200 | |
add initial version of youtube/feed | |
This fetches the Youtube Atom feed and the channel videos and combines the data. | |
It can output: | |
- Atom | |
- sfeed(5) | |
- JSON / JSON Feed | |
It can run in command-line and CGI mode. | |
For now it only adds the video duration in the title and filters away Youtube | |
shorts. | |
The Atom parser is based on sfeed. | |
Diffstat: | |
M Makefile | 4 ++++ | |
M util.h | 7 +++++++ | |
A youtube/feed.c | 1001 +++++++++++++++++++++++++++++… | |
3 files changed, 1012 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/Makefile b/Makefile | |
@@ -22,6 +22,7 @@ LIBTLS_LDFLAGS_STATIC = -ltls -lssl -lcrypto -static | |
BIN = \ | |
youtube/cgi \ | |
youtube/cli \ | |
+ youtube/feed \ | |
youtube/gopher | |
SRC = ${BIN:=.c} \ | |
@@ -68,6 +69,9 @@ youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o | |
youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o | |
${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS… | |
+youtube/feed: ${LIB} youtube/youtube.o youtube/feed.o | |
+ ${CC} -o $@ youtube/feed.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTL… | |
+ | |
youtube/gopher: ${LIB} youtube/youtube.o youtube/gopher.o | |
${CC} -o $@ youtube/gopher.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIB… | |
diff --git a/util.h b/util.h | |
@@ -3,6 +3,13 @@ | |
#define unveil(p1,p2) 0 | |
#endif | |
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */ | |
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) | |
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f) | |
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10) | |
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) | |
+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c)) | |
+ | |
#undef strlcat | |
size_t strlcat(char *, const char *, size_t); | |
#undef strlcpy | |
diff --git a/youtube/feed.c b/youtube/feed.c | |
@@ -0,0 +1,1001 @@ | |
+#include <err.h> | |
+#include <errno.h> | |
+#include <stdint.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+#include <strings.h> | |
+#include <time.h> | |
+ | |
+#include "https.h" | |
+#include "util.h" | |
+#include "youtube.h" | |
+#include "xml.h" | |
+ | |
+#define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag)) | |
+#define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag) | |
+ | |
+/* string and byte-length */ | |
+#define STRP(s) s,sizeof(s)-1 | |
+ | |
+enum FeedType { | |
+ FeedTypeNone = 0, | |
+ FeedTypeAtom = 2 | |
+}; | |
+ | |
+/* String data / memory pool */ | |
+typedef struct string { | |
+ char *data; /* data */ | |
+ size_t len; /* string length */ | |
+ size_t bufsiz; /* allocated size */ | |
+} String; | |
+ | |
+/* NOTE: the order of these fields (content, date, author) indicate the | |
+ * priority to use them, from least important to high. */ | |
+enum TagId { | |
+ TagUnknown = 0, | |
+ /* Atom */ | |
+ /* creation date has higher priority */ | |
+ AtomTagPublished, | |
+ AtomTagTitle, | |
+ AtomTagMediaDescription, | |
+ AtomTagId, | |
+ AtomTagLink, | |
+ AtomTagLinkAlternate, | |
+ AtomTagAuthor, AtomTagAuthorName, | |
+ TagYoutubeVideoId, | |
+ TagLast | |
+}; | |
+ | |
+typedef struct feedtag { | |
+ char *name; /* name of tag to match */ | |
+ size_t len; /* len of `name` */ | |
+ enum TagId id; /* unique ID */ | |
+} FeedTag; | |
+ | |
+typedef struct field { | |
+ String str; | |
+ enum TagId tagid; /* tagid set previously, used for tag priority */ | |
+} FeedField; | |
+ | |
+enum { | |
+ /* sfeed fields */ | |
+ FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent, | |
+ FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory, | |
+ FeedFieldYoutubeId, /* yt:videoId */ | |
+ FeedFieldLast | |
+}; | |
+ | |
+typedef struct feedcontext { | |
+ String *field; /* current FeedItem field String */ | |
+ FeedField fields[FeedFieldLast]; /* data for current item */ | |
+ FeedTag tag; /* unique current parsed tag */ | |
+ int iscontent; /* in content data */ | |
+ int iscontenttag; /* in content tag */ | |
+ enum FeedType feedtype; | |
+} FeedContext; | |
+ | |
+static long long datetounix(long long, int, int, int, int, int); | |
+static FeedTag * gettag(enum FeedType, const char *, size_t); | |
+static long gettzoffset(const char *); | |
+static int isattr(const char *, size_t, const char *, size_t); | |
+static int istag(const char *, size_t, const char *, size_t); | |
+static int parsetime(const char *, long long *); | |
+ | |
+static void atom_header(void); | |
+static void atom_item(void); | |
+static void atom_footer(void); | |
+static void json_header(void); | |
+static void json_item(void); | |
+static void json_footer(void); | |
+static void sfeed_item(void); /* TSV / sfeed */ | |
+ | |
+static void string_append(String *, const char *, size_t); | |
+static void string_buffer_realloc(String *, size_t); | |
+static void string_clear(String *); | |
+static void string_print_encoded(String *); | |
+static void string_print_timestamp(String *); | |
+static void string_print(String *); | |
+static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t, | |
+ const char *, size_t); | |
+static void xmlattrentity(XMLParser *, const char *, size_t, const char *, | |
+ size_t, const char *, size_t); | |
+static void xmlattrstart(XMLParser *, const char *, size_t, const char *, | |
+ size_t); | |
+static void xmldata(XMLParser *, const char *, size_t); | |
+static void xmldataentity(XMLParser *, const char *, size_t); | |
+static void xmltagend(XMLParser *, const char *, size_t, int); | |
+static void xmltagstart(XMLParser *, const char *, size_t); | |
+static void xmltagstartparsed(XMLParser *, const char *, size_t, int); | |
+ | |
+/* Atom, must be alphabetical order */ | |
+static const FeedTag atomtags[] = { | |
+ { STRP("author"), AtomTagAuthor }, | |
+ { STRP("id"), AtomTagId }, | |
+ /* Atom: <link href="" />, RSS has <link></link> */ | |
+ { STRP("link"), AtomTagLink }, | |
+ { STRP("media:description"), AtomTagMediaDescription }, | |
+ { STRP("published"), AtomTagPublished }, | |
+ { STRP("title"), AtomTagTitle }, | |
+ { STRP("yt:videoId"), TagYoutubeVideoId } | |
+}; | |
+ | |
+/* special case: nested <author><name> */ | |
+static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor }; | |
+static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName }; | |
+ | |
+/* reference to no / unknown tag */ | |
+static const FeedTag notag = { STRP(""), TagUnknown }; | |
+ | |
+/* map TagId type to RSS/Atom field, all tags must be defined */ | |
+static const int fieldmap[TagLast] = { | |
+ [TagUnknown] = -1, | |
+ /* Atom */ | |
+ [AtomTagPublished] = FeedFieldTime, | |
+ [AtomTagTitle] = FeedFieldTitle, | |
+ [AtomTagMediaDescription] = FeedFieldContent, | |
+ [AtomTagId] = FeedFieldId, | |
+ [AtomTagLink] = -1, | |
+ [AtomTagLinkAlternate] = FeedFieldLink, | |
+ [AtomTagAuthor] = -1, | |
+ [AtomTagAuthorName] = FeedFieldAuthor, | |
+ [TagYoutubeVideoId] = FeedFieldYoutubeId | |
+}; | |
+ | |
+static const int FieldSeparator = '\t'; | |
+ | |
+static FeedContext ctx; | |
+static XMLParser parser; /* XML parser state */ | |
+static String attrrel, tmpstr; | |
+ | |
+static struct search_response *search_res = NULL; | |
+static void (*printfields)(void) = sfeed_item; | |
+static int cgimode = 0; | |
+ | |
+static int | |
+tagcmp(const void *v1, const void *v2) | |
+{ | |
+ return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name); | |
+} | |
+ | |
+/* Unique tagid for parsed tag name. */ | |
+static FeedTag * | |
+gettag(enum FeedType feedtype, const char *name, size_t namelen) | |
+{ | |
+ FeedTag f, *r = NULL; | |
+ | |
+ f.name = (char *)name; | |
+ | |
+ switch (feedtype) { | |
+ case FeedTypeAtom: | |
+ r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0… | |
+ sizeof(atomtags[0]), tagcmp); | |
+ break; | |
+ default: | |
+ break; | |
+ } | |
+ | |
+ return r; | |
+} | |
+ | |
+/* Clear string only; don't free, prevents unnecessary reallocation. */ | |
+static void | |
+string_clear(String *s) | |
+{ | |
+ if (s->data) | |
+ s->data[0] = '\0'; | |
+ s->len = 0; | |
+} | |
+ | |
+static void | |
+string_buffer_realloc(String *s, size_t newlen) | |
+{ | |
+ size_t alloclen; | |
+ | |
+ if (newlen > SIZE_MAX / 2) { | |
+ alloclen = SIZE_MAX; | |
+ } else { | |
+ for (alloclen = 64; alloclen <= newlen; alloclen *= 2) | |
+ ; | |
+ } | |
+ if (!(s->data = realloc(s->data, alloclen))) | |
+ err(1, "realloc"); | |
+ s->bufsiz = alloclen; | |
+} | |
+ | |
+/* Append data to String, s->data and data may not overlap. */ | |
+static void | |
+string_append(String *s, const char *data, size_t len) | |
+{ | |
+ if (!len) | |
+ return; | |
+ | |
+ if (s->len >= SIZE_MAX - len) { | |
+ errno = ENOMEM; | |
+ err(1, "realloc"); | |
+ } | |
+ | |
+ /* check if allocation is necessary, never shrink the buffer. */ | |
+ if (s->len + len >= s->bufsiz) | |
+ string_buffer_realloc(s, s->len + len + 1); | |
+ memcpy(s->data + s->len, data, len); | |
+ s->len += len; | |
+ s->data[s->len] = '\0'; | |
+} | |
+ | |
+/* Print text, encode TABs, newlines and '\', remove other whitespace. | |
+ * Remove leading and trailing whitespace. */ | |
+static void | |
+string_print_encoded(String *s) | |
+{ | |
+ const char *p, *e; | |
+ | |
+ if (!s->data || !s->len) | |
+ return; | |
+ | |
+ p = s->data; | |
+ e = p + strlen(p); | |
+ | |
+ for (; *p && p != e; p++) { | |
+ switch (*p) { | |
+ case '\n': putchar('\\'); putchar('n'); break; | |
+ case '\\': putchar('\\'); putchar('\\'); break; | |
+ case '\t': putchar('\\'); putchar('t'); break; | |
+ default: | |
+ /* ignore control chars */ | |
+ if (!ISCNTRL((unsigned char)*p)) | |
+ putchar(*p); | |
+ break; | |
+ } | |
+ } | |
+} | |
+ | |
+/* Print text, replace TABs, carriage return and other whitespace with ' '. | |
+ * Other control chars are removed. Remove leading and trailing whitespace. */ | |
+static void | |
+string_print(String *s) | |
+{ | |
+ char *p, *e; | |
+ | |
+ if (!s->data || !s->len) | |
+ return; | |
+ | |
+ p = s->data; | |
+ e = p + s->len; | |
+ for (; *p && p != e; p++) { | |
+ if (ISSPACE((unsigned char)*p)) | |
+ putchar(' '); /* any whitespace to space */ | |
+ else if (!ISCNTRL((unsigned char)*p)) | |
+ /* ignore other control chars */ | |
+ putchar(*p); | |
+ } | |
+} | |
+ | |
+/* Print as UNIX timestamp, print nothing if the time is empty or invalid. */ | |
+static void | |
+string_print_timestamp(String *s) | |
+{ | |
+ long long t; | |
+ | |
+ if (!s->data || !s->len) | |
+ return; | |
+ | |
+ if (parsetime(s->data, &t) != -1) | |
+ printf("%lld", t); | |
+} | |
+ | |
+/* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp. | |
+ Parameters should be passed as they are in a struct tm: | |
+ that is: year = year - 1900, month = month - 1. */ | |
+static long long | |
+datetounix(long long year, int mon, int day, int hour, int min, int sec) | |
+{ | |
+ /* seconds in a month in a regular (non-leap) year */ | |
+ static const long secs_through_month[] = { | |
+ 0, 31 * 86400, 59 * 86400, 90 * 86400, | |
+ 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400, | |
+ 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 }; | |
+ int is_leap = 0, cycles, centuries = 0, leaps = 0, rem; | |
+ long long t; | |
+ | |
+ /* optimization: handle common range year 1902 up to and including 203… | |
+ if (year - 2ULL <= 136) { | |
+ /* amount of leap days relative to 1970: every 4 years */ | |
+ leaps = (year - 68) >> 2; | |
+ if (!((year - 68) & 3)) { | |
+ leaps--; | |
+ is_leap = 1; | |
+ } else { | |
+ is_leap = 0; | |
+ } | |
+ t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 =… | |
+ } else { | |
+ /* general leap year calculation: | |
+ leap years occur mostly every 4 years but every 100 years | |
+ a leap year is skipped unless the year is divisible by 400 … | |
+ cycles = (year - 100) / 400; | |
+ rem = (year - 100) % 400; | |
+ if (rem < 0) { | |
+ cycles--; | |
+ rem += 400; | |
+ } | |
+ if (!rem) { | |
+ is_leap = 1; | |
+ } else { | |
+ if (rem >= 300) | |
+ centuries = 3, rem -= 300; | |
+ else if (rem >= 200) | |
+ centuries = 2, rem -= 200; | |
+ else if (rem >= 100) | |
+ centuries = 1, rem -= 100; | |
+ if (rem) { | |
+ leaps = rem / 4U; | |
+ rem %= 4U; | |
+ is_leap = !rem; | |
+ } | |
+ } | |
+ leaps += (97 * cycles) + (24 * centuries) - is_leap; | |
+ | |
+ /* adjust 8 leap days from 1970 up to and including 2000: | |
+ ((30 * 365) + 8) * 86400 = 946771200 */ | |
+ t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94677120… | |
+ } | |
+ t += secs_through_month[mon]; | |
+ if (is_leap && mon >= 2) | |
+ t += 86400; | |
+ t += 86400LL * (day - 1); | |
+ t += 3600LL * hour; | |
+ t += 60LL * min; | |
+ t += sec; | |
+ | |
+ return t; | |
+} | |
+ | |
+/* Get timezone from string, return time offset in seconds from UTC. | |
+ * NOTE: only parses timezones in RFC-822, many other timezone names are | |
+ * ambiguous anyway. | |
+ * ANSI and military zones are defined wrong in RFC822 and are unsupported, | |
+ * see note on RFC2822 4.3 page 32. */ | |
+static long | |
+gettzoffset(const char *s) | |
+{ | |
+ const char *p; | |
+ long tzhour = 0, tzmin = 0; | |
+ size_t i; | |
+ | |
+ switch (*s) { | |
+ case '-': /* offset */ | |
+ case '+': | |
+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i+… | |
+ tzhour = (tzhour * 10) + (*p - '0'); | |
+ if (*p == ':') | |
+ p++; | |
+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) | |
+ tzmin = (tzmin * 10) + (*p - '0'); | |
+ return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : … | |
+ default: /* timezone name */ | |
+ break; | |
+ } | |
+ return 0; | |
+} | |
+ | |
+/* Parse time string `s` into the UNIX timestamp `tp`. | |
+ Returns 0 on success or -1 on failure. */ | |
+static int | |
+parsetime(const char *s, long long *tp) | |
+{ | |
+ int va[6] = { 0 }, i, v, vi; | |
+ | |
+ /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" … | |
+ if (!ISDIGIT((unsigned char)s[0]) || | |
+ !ISDIGIT((unsigned char)s[1]) || | |
+ !ISDIGIT((unsigned char)s[2]) || | |
+ !ISDIGIT((unsigned char)s[3])) | |
+ return -1; | |
+ | |
+ /* parse time parts (and possibly remaining date parts) */ | |
+ for (vi = 0; *s && vi < 6; vi++) { | |
+ for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) && | |
+ ISDIGIT((unsigned char)*s); s++, i++) { | |
+ v = (v * 10) + (*s - '0'); | |
+ } | |
+ va[vi] = v; | |
+ | |
+ if ((vi < 2 && *s == '-') || | |
+ (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) || | |
+ (vi > 2 && *s == ':')) | |
+ s++; | |
+ } | |
+ | |
+ /* invalid range */ | |
+ if (va[0] < 0 || va[0] > 9999 || | |
+ va[1] < 1 || va[1] > 12 || | |
+ va[2] < 1 || va[2] > 31 || | |
+ va[3] < 0 || va[3] > 23 || | |
+ va[4] < 0 || va[4] > 59 || | |
+ va[5] < 0 || va[5] > 60) /* allow leap second */ | |
+ return -1; | |
+ | |
+ *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) - | |
+ gettzoffset(s); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static void | |
+atom_header(void) | |
+{ | |
+ fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" | |
+ "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n" | |
+ "\t<title>Newsfeed</title>\n", stdout); | |
+} | |
+ | |
+static void | |
+atom_footer(void) | |
+{ | |
+ fputs("</feed>\n", stdout); | |
+} | |
+ | |
+static void | |
+atom_item(void) | |
+{ | |
+ struct item *v, *found = NULL; | |
+ size_t i; | |
+ | |
+ /* must have a video id */ | |
+ if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
+ return; | |
+ | |
+ for (i = 0; i < search_res->nitems; i++) { | |
+ v = &(search_res->items[i]); | |
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) | |
+ found = v; | |
+ } | |
+ /* Only print the video if it was found in the feed aswell. | |
+ This way it filters away shorts too. */ | |
+ if (!found) | |
+ return; | |
+ | |
+ fputs("<entry>\n\t<title>", stdout); | |
+ xmlencode(ctx.fields[FeedFieldTitle].str.data); | |
+ if (found->duration[0]) { | |
+ fputs(" [", stdout); | |
+ xmlencode(found->duration); | |
+ fputs("]", stdout); | |
+ } | |
+ fputs("</title>\n", stdout); | |
+ if (ctx.fields[FeedFieldLink].str.len) { | |
+ fputs("\t<link rel=\"alternate\" href=\"", stdout); | |
+ xmlencode(ctx.fields[FeedFieldLink].str.data); | |
+ fputs("\" />\n", stdout); | |
+ } | |
+ /* prefer link over id for Atom <id>. */ | |
+ fputs("\t<id>", stdout); | |
+ if (ctx.fields[FeedFieldLink].str.len) | |
+ xmlencode(ctx.fields[FeedFieldLink].str.data); | |
+ else if (ctx.fields[FeedFieldId].str.len) | |
+ xmlencode(ctx.fields[FeedFieldId].str.data); | |
+ fputs("</id>\n", stdout); | |
+ | |
+ /* just print the original timestamp, it should conform */ | |
+ fputs("\t<updated>", stdout); | |
+ string_print(&ctx.fields[FeedFieldTime].str); | |
+ fputs("</updated>\n", stdout); | |
+ | |
+ if (ctx.fields[FeedFieldAuthor].str.len) { | |
+ fputs("\t<author><name>", stdout); | |
+ xmlencode(ctx.fields[FeedFieldAuthor].str.data); | |
+ fputs("</name></author>\n", stdout); | |
+ } | |
+ if (ctx.fields[FeedFieldContent].str.len) { | |
+ fputs("\t<content>", stdout); | |
+ xmlencode(ctx.fields[FeedFieldContent].str.data); | |
+ fputs("</content>\n", stdout); | |
+ } | |
+ fputs("</entry>\n", stdout); | |
+} | |
+ | |
+static void | |
+json_header(void) | |
+{ | |
+ fputs("{\n" | |
+ "\"version\": \"https://jsonfeed.org/version/1.1\",\n" | |
+ "\"title\": \"Newsfeed\",\n" | |
+ "\"items\": [\n", stdout); | |
+} | |
+ | |
+static void | |
+json_footer(void) | |
+{ | |
+ fputs("]\n}\n", stdout); | |
+} | |
+ | |
+static void | |
+json_printfield(const char *s) | |
+{ | |
+ for (; *s; s++) { | |
+ if (*s == '\\') | |
+ fputs("\\\\", stdout); | |
+ else if (*s == '"') | |
+ fputs("\\\"", stdout); | |
+ else if (ISCNTRL((unsigned char)*s)) | |
+ printf("\\u00%02x", (unsigned char)*s); | |
+ else | |
+ putchar(*s); | |
+ } | |
+} | |
+ | |
+static void | |
+json_item(void) | |
+{ | |
+ static int json_firstitem = 1; | |
+ struct item *v, *found = NULL; | |
+ size_t i; | |
+ | |
+ /* must have a video id */ | |
+ if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
+ return; | |
+ | |
+ for (i = 0; i < search_res->nitems; i++) { | |
+ v = &(search_res->items[i]); | |
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) | |
+ found = v; | |
+ } | |
+ /* Only print the video if it was found in the feed aswell. | |
+ This way it filters away shorts too. */ | |
+ if (!found) | |
+ return; | |
+ | |
+ if (!json_firstitem) | |
+ fputs(",\n", stdout); | |
+ json_firstitem = 0; | |
+ | |
+ fputs("{\n\t\"id\": \"", stdout); | |
+ json_printfield(ctx.fields[FeedFieldId].str.data); | |
+ fputs("\"", stdout); | |
+ | |
+ /* just print the original timestamp, it should conform */ | |
+ fputs(",\n\t\"date_published\": \"", stdout); | |
+ string_print(&ctx.fields[FeedFieldTime].str); | |
+ fputs("\"", stdout); | |
+ | |
+ fputs(",\n\t\"title\": \"", stdout); | |
+ json_printfield(ctx.fields[FeedFieldTitle].str.data); | |
+ if (found->duration[0]) { | |
+ fputs(" [", stdout); | |
+ json_printfield(found->duration); | |
+ fputs("]", stdout); | |
+ } | |
+ fputs("\"", stdout); | |
+ | |
+ if (ctx.fields[FeedFieldLink].str.len) { | |
+ fputs(",\n\t\"url\": \"", stdout); | |
+ json_printfield(ctx.fields[FeedFieldLink].str.data); | |
+ fputs("\"", stdout); | |
+ } | |
+ | |
+ if (ctx.fields[FeedFieldAuthor].str.len) { | |
+ fputs(",\n\t\"authors\": [{\"name\": \"", stdout); | |
+ json_printfield(ctx.fields[FeedFieldAuthor].str.data); | |
+ fputs("\"}]", stdout); | |
+ } | |
+ | |
+ fputs(",\n\t\"content_text\": \"", stdout); | |
+ json_printfield(ctx.fields[FeedFieldContent].str.data); | |
+ fputs("\"\n}", stdout); | |
+} | |
+ | |
+static void | |
+sfeed_item(void) | |
+{ | |
+ struct item *v, *found = NULL; | |
+ size_t i; | |
+ | |
+ /* must have a video id */ | |
+ if (!ctx.fields[FeedFieldYoutubeId].str.len) | |
+ return; | |
+ | |
+ for (i = 0; i < search_res->nitems; i++) { | |
+ v = &(search_res->items[i]); | |
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) | |
+ found = v; | |
+ } | |
+ /* Only print the video if it was found in the feed aswell. | |
+ This way it filters away shorts too. */ | |
+ if (!found) | |
+ return; | |
+ | |
+ string_print_timestamp(&ctx.fields[FeedFieldTime].str); | |
+ putchar(FieldSeparator); | |
+ string_print(&ctx.fields[FeedFieldTitle].str); | |
+ if (found->duration[0]) { | |
+ fputs(" [", stdout); | |
+ fputs(found->duration, stdout); | |
+ fputs("]", stdout); | |
+ } | |
+ putchar(FieldSeparator); | |
+ string_print(&ctx.fields[FeedFieldLink].str); | |
+ putchar(FieldSeparator); | |
+ string_print_encoded(&ctx.fields[FeedFieldContent].str); | |
+ putchar(FieldSeparator); | |
+ fputs("plain", stdout); | |
+ putchar(FieldSeparator); | |
+ string_print(&ctx.fields[FeedFieldId].str); | |
+ putchar(FieldSeparator); | |
+ string_print(&ctx.fields[FeedFieldAuthor].str); | |
+ putchar(FieldSeparator); | |
+ /* no/empty enclosure */ | |
+ putchar(FieldSeparator); | |
+ /* empty category */ | |
+ putchar('\n'); | |
+} | |
+ | |
+static int | |
+istag(const char *name, size_t len, const char *name2, size_t len2) | |
+{ | |
+ return (len == len2 && !strcasecmp(name, name2)); | |
+} | |
+ | |
+static int | |
+isattr(const char *name, size_t len, const char *name2, size_t len2) | |
+{ | |
+ return (len == len2 && !strcasecmp(name, name2)); | |
+} | |
+ | |
+static void | |
+xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, | |
+ const char *v, size_t vl) | |
+{ | |
+ if (ISINCONTENT(ctx)) | |
+ return; | |
+ | |
+ if (!ctx.tag.id) | |
+ return; | |
+ | |
+ if (ISCONTENTTAG(ctx)) | |
+ return; | |
+ | |
+ if (ctx.tag.id == AtomTagLink) { | |
+ if (isattr(n, nl, STRP("rel"))) { | |
+ string_append(&attrrel, v, vl); | |
+ } else if (isattr(n, nl, STRP("href"))) { | |
+ string_append(&tmpstr, v, vl); | |
+ } | |
+ } | |
+} | |
+ | |
+static void | |
+xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, | |
+ const char *data, size_t datalen) | |
+{ | |
+ char buf[8]; | |
+ int len; | |
+ | |
+ if (ISINCONTENT(ctx)) | |
+ return; | |
+ | |
+ if (!ctx.tag.id) | |
+ return; | |
+ | |
+ /* try to translate entity, else just pass as data to | |
+ * xmlattr handler. */ | |
+ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0) | |
+ xmlattr(p, t, tl, n, nl, buf, (size_t)len); | |
+ else | |
+ xmlattr(p, t, tl, n, nl, data, datalen); | |
+} | |
+ | |
+static void | |
+xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl) | |
+{ | |
+ if (ISINCONTENT(ctx)) | |
+ return; | |
+ | |
+ if (attrrel.len && isattr(n, nl, STRP("rel"))) | |
+ string_clear(&attrrel); | |
+ else if (tmpstr.len && | |
+ (isattr(n, nl, STRP("href")) || | |
+ isattr(n, nl, STRP("url")))) | |
+ string_clear(&tmpstr); /* use the last value for multiple attr… | |
+} | |
+ | |
+static void | |
+xmldata(XMLParser *p, const char *s, size_t len) | |
+{ | |
+ if (!ctx.field) | |
+ return; | |
+ | |
+ string_append(ctx.field, s, len); | |
+} | |
+ | |
+static void | |
+xmldataentity(XMLParser *p, const char *data, size_t datalen) | |
+{ | |
+ char buf[8]; | |
+ int len; | |
+ | |
+ if (!ctx.field) | |
+ return; | |
+ | |
+ /* try to translate entity, else just pass as data to | |
+ * xmldata handler. */ | |
+ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0) | |
+ xmldata(p, buf, (size_t)len); | |
+ else | |
+ xmldata(p, data, datalen); | |
+} | |
+ | |
+static void | |
+xmltagstart(XMLParser *p, const char *t, size_t tl) | |
+{ | |
+ const FeedTag *f; | |
+ | |
+ if (ISINCONTENT(ctx)) | |
+ return; | |
+ | |
+ /* start of RSS or Atom item / entry */ | |
+ if (ctx.feedtype == FeedTypeNone) { | |
+ if (istag(t, tl, STRP("entry"))) | |
+ ctx.feedtype = FeedTypeAtom; | |
+ return; | |
+ } | |
+ | |
+ /* field tagid already set or nested tags. */ | |
+ if (ctx.tag.id) { | |
+ /* nested <author><name> for Atom */ | |
+ if (ctx.tag.id == AtomTagAuthor && | |
+ istag(t, tl, STRP("name"))) { | |
+ memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag)… | |
+ } else { | |
+ return; /* other nested tags are not allowed: return */ | |
+ } | |
+ } | |
+ | |
+ /* in item */ | |
+ if (ctx.tag.id == TagUnknown) { | |
+ if (!(f = gettag(ctx.feedtype, t, tl))) | |
+ f = ¬ag; | |
+ memcpy(&(ctx.tag), f, sizeof(ctx.tag)); | |
+ } | |
+ | |
+ ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent); | |
+ string_clear(&attrrel); | |
+} | |
+ | |
+static void | |
+xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) | |
+{ | |
+ enum TagId tagid; | |
+ | |
+ if (ISINCONTENT(ctx)) | |
+ return; | |
+ | |
+ /* set tag type based on its attribute value */ | |
+ if (ctx.tag.id == AtomTagLink) { | |
+ /* empty or "alternate": other types could be | |
+ "enclosure", "related", "self" or "via" */ | |
+ if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("al… | |
+ ctx.tag.id = AtomTagLinkAlternate; | |
+ else | |
+ ctx.tag.id = AtomTagLink; /* unknown */ | |
+ } | |
+ | |
+ tagid = ctx.tag.id; | |
+ | |
+ /* map tag type to field: unknown or lesser priority is ignored, | |
+ when tags of the same type are repeated only the first is used. */ | |
+ if (fieldmap[tagid] == -1 || | |
+ tagid <= ctx.fields[fieldmap[tagid]].tagid) { | |
+ return; | |
+ } | |
+ | |
+ if (ctx.iscontenttag) { | |
+ ctx.iscontent = 1; | |
+ ctx.iscontenttag = 0; | |
+ } | |
+ | |
+ ctx.field = &(ctx.fields[fieldmap[tagid]].str); | |
+ ctx.fields[fieldmap[tagid]].tagid = tagid; | |
+ | |
+ /* clear field if it is overwritten (with a priority order) for the new | |
+ value, if the field can have multiple values then do not clear it. … | |
+ string_clear(ctx.field); | |
+} | |
+ | |
+static void | |
+xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) | |
+{ | |
+ size_t i; | |
+ | |
+ if (ctx.feedtype == FeedTypeNone) | |
+ return; | |
+ | |
+ if (ISINCONTENT(ctx)) { | |
+ /* not a closed content field */ | |
+ if (!istag(ctx.tag.name, ctx.tag.len, t, tl)) | |
+ return; | |
+ } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) { | |
+ /* matched tag end: close it */ | |
+ } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom && | |
+ istag(t, tl, STRP("entry"))))) /* Atom */ | |
+ { | |
+ /* end of Atom entry */ | |
+ printfields(); | |
+ | |
+ /* clear strings */ | |
+ for (i = 0; i < FeedFieldLast; i++) { | |
+ string_clear(&ctx.fields[i].str); | |
+ ctx.fields[i].tagid = TagUnknown; | |
+ } | |
+ /* allow parsing of Atom and RSS concatenated in one XML strea… | |
+ ctx.feedtype = FeedTypeNone; | |
+ } else { | |
+ return; /* not end of field */ | |
+ } | |
+ | |
+ /* temporary string: for fields that cannot be processed | |
+ directly and need more context, for example by its tag | |
+ attributes, like the Atom link rel="alternate|enclosure". */ | |
+ if (tmpstr.len && ctx.field) { | |
+ string_clear(ctx.field); | |
+ string_append(ctx.field, tmpstr.data, tmpstr.len); | |
+ } | |
+ | |
+ /* close field */ | |
+ string_clear(&tmpstr); /* reuse and clear temporary string */ | |
+ | |
+ if (ctx.tag.id == AtomTagAuthorName) | |
+ memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer … | |
+ else | |
+ memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag)); | |
+ | |
+ ctx.iscontent = 0; | |
+ ctx.field = NULL; | |
+} | |
+ | |
+static char * | |
+request_channel_feed(const char *channelid) | |
+{ | |
+ char path[2048]; | |
+ int r; | |
+ | |
+ r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", ch… | |
+ /* check if request is too long (truncation) */ | |
+ if (r < 0 || (size_t)r >= sizeof(path)) | |
+ return NULL; | |
+ | |
+ return request("www.youtube.com", path, ""); | |
+} | |
+ | |
+int | |
+isvalidchannel(const char *s) | |
+{ | |
+ size_t len; | |
+ | |
+ for (len = 0; *s; s++, len++) { | |
+ if (ISALPHA((unsigned char)*s) || | |
+ ISDIGIT((unsigned char)*s) || | |
+ *s == '-' || *s == '_') | |
+ continue; | |
+ return 0; | |
+ } | |
+ | |
+ return *s == '\0' && len == 24; | |
+} | |
+ | |
+void | |
+usage(void) | |
+{ | |
+ if (cgimode) { | |
+ fputs("Status: 400 Bad Request\r\n", stdout); | |
+ fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdou… | |
+ fputs("400 Bad Request\n", stdout); | |
+ exit(0); | |
+ } else { | |
+ fputs("usage: feed <channelid> [atom|json|tsv]\n", stderr); | |
+ exit(1); | |
+ } | |
+} | |
+ | |
+int | |
+main(int argc, char *argv[]) | |
+{ | |
+ char buf[256]; | |
+ const char *channelid = NULL; | |
+ char *data, *format = "tsv", *p, *requesturi, *tmp; | |
+ size_t i; | |
+ | |
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1) | |
+ err(1, "pledge"); | |
+ | |
+ if ((tmp = getenv("REQUEST_URI"))) { | |
+ cgimode = 1; | |
+ | |
+ strlcpy(buf, tmp, sizeof(buf)); | |
+ requesturi = buf; | |
+ | |
+ if (!(p = strrchr(requesturi, '/'))) | |
+ usage(); | |
+ | |
+ channelid = p + 1; | |
+ if ((p = strrchr(channelid, '.'))) { | |
+ *p = '\0'; /* NULL terminate */ | |
+ format = p + 1; | |
+ } | |
+ } else { | |
+ if (argc <= 1) | |
+ usage(); | |
+ | |
+ channelid = argv[1]; | |
+ if (argc > 2) | |
+ format = argv[2]; | |
+ } | |
+ if (!channelid || !isvalidchannel(channelid)) | |
+ usage(); | |
+ | |
+ if (!strcmp(format, "atom") || !strcmp(format, "xml")) | |
+ printfields = atom_item; | |
+ else if (!strcmp(format, "json")) | |
+ printfields = json_item; | |
+ else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed")) | |
+ printfields = sfeed_item; | |
+ else | |
+ usage(); | |
+ | |
+ search_res = youtube_channel_videos(channelid); | |
+ if (!search_res || search_res->nitems == 0) { | |
+ /* error or no videos found */ | |
+ return 0; | |
+ } | |
+ | |
+ if (!(data = request_channel_feed(channelid))) | |
+ return 1; /* error, no data at all */ | |
+ | |
+ if (pledge("stdio", NULL) == -1) | |
+ err(1, "pledge"); | |
+ | |
+ setxmldata(data, strlen(data)); | |
+ | |
+ memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag)); | |
+ | |
+ parser.xmlattr = xmlattr; | |
+ parser.xmlattrentity = xmlattrentity; | |
+ parser.xmlattrstart = xmlattrstart; | |
+ parser.xmlcdata = xmldata; | |
+ parser.xmldata = xmldata; | |
+ parser.xmldataentity = xmldataentity; | |
+ parser.xmltagend = xmltagend; | |
+ parser.xmltagstart = xmltagstart; | |
+ parser.xmltagstartparsed = xmltagstartparsed; | |
+ | |
+ /* init all fields, make sure it has a value */ | |
+ for (i = 0; i < FeedFieldLast; i++) { | |
+ string_append(&(ctx.fields[i].str), " ", 1); | |
+ string_clear(&(ctx.fields[i].str)); | |
+ } | |
+ | |
+ if (cgimode) { | |
+ fputs("Status: 200 OK\r\n", stdout); | |
+ if (!strcmp(format, "atom") || !strcmp(format, "xml")) | |
+ fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n",… | |
+ else if (!strcmp(format, "json")) | |
+ fputs("Content-Type: application/json; charset=utf-8\r… | |
+ else | |
+ fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n… | |
+ } | |
+ | |
+ if (!strcmp(format, "atom") || !strcmp(format, "xml")) | |
+ atom_header(); | |
+ else if (!strcmp(format, "json")) | |
+ json_header(); | |
+ | |
+ /* NOTE: getnext is defined in xml.h for inline optimization */ | |
+ xml_parse(&parser); | |
+ | |
+ if (!strcmp(format, "atom")) | |
+ atom_footer(); | |
+ else if (!strcmp(format, "json")) | |
+ json_footer(); | |
+ | |
+ return 0; | |
+} |