Introduction
Introduction Statistics Contact Development Disclaimer Help
add initial version of youtube/feed - frontends - front-ends for some sites (ex…
Log
Files
Refs
README
LICENSE
---
commit f5a6863b5397d1cc3ad31de291be11fae6256b5f
parent 7b18c287f2fcf98227ff2ec1fdd4eeb8050e8166
Author: Hiltjo Posthuma <[email protected]>
Date: Wed, 10 May 2023 01:10:51 +0200
add initial version of youtube/feed
This fetches the Youtube Atom feed and the channel videos and combines the data.
It can output:
- Atom
- sfeed(5)
- JSON / JSON Feed
It can run in command-line and CGI mode.
For now it only adds the video duration in the title and filters away Youtube
shorts.
The Atom parser is based on sfeed.
Diffstat:
M Makefile | 4 ++++
M util.h | 7 +++++++
A youtube/feed.c | 1001 +++++++++++++++++++++++++++++…
3 files changed, 1012 insertions(+), 0 deletions(-)
---
diff --git a/Makefile b/Makefile
@@ -22,6 +22,7 @@ LIBTLS_LDFLAGS_STATIC = -ltls -lssl -lcrypto -static
BIN = \
youtube/cgi \
youtube/cli \
+ youtube/feed \
youtube/gopher
SRC = ${BIN:=.c} \
@@ -68,6 +69,9 @@ youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o
youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o
${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS…
+youtube/feed: ${LIB} youtube/youtube.o youtube/feed.o
+ ${CC} -o $@ youtube/feed.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTL…
+
youtube/gopher: ${LIB} youtube/youtube.o youtube/gopher.o
${CC} -o $@ youtube/gopher.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIB…
diff --git a/util.h b/util.h
@@ -3,6 +3,13 @@
#define unveil(p1,p2) 0
#endif
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
+
#undef strlcat
size_t strlcat(char *, const char *, size_t);
#undef strlcpy
diff --git a/youtube/feed.c b/youtube/feed.c
@@ -0,0 +1,1001 @@
+#include <err.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+
+#include "https.h"
+#include "util.h"
+#include "youtube.h"
+#include "xml.h"
+
+#define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag))
+#define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
+
+/* string and byte-length */
+#define STRP(s) s,sizeof(s)-1
+
+enum FeedType {
+ FeedTypeNone = 0,
+ FeedTypeAtom = 2
+};
+
+/* String data / memory pool */
+typedef struct string {
+ char *data; /* data */
+ size_t len; /* string length */
+ size_t bufsiz; /* allocated size */
+} String;
+
+/* NOTE: the order of these fields (content, date, author) indicate the
+ * priority to use them, from least important to high. */
+enum TagId {
+ TagUnknown = 0,
+ /* Atom */
+ /* creation date has higher priority */
+ AtomTagPublished,
+ AtomTagTitle,
+ AtomTagMediaDescription,
+ AtomTagId,
+ AtomTagLink,
+ AtomTagLinkAlternate,
+ AtomTagAuthor, AtomTagAuthorName,
+ TagYoutubeVideoId,
+ TagLast
+};
+
+typedef struct feedtag {
+ char *name; /* name of tag to match */
+ size_t len; /* len of `name` */
+ enum TagId id; /* unique ID */
+} FeedTag;
+
+typedef struct field {
+ String str;
+ enum TagId tagid; /* tagid set previously, used for tag priority */
+} FeedField;
+
+enum {
+ /* sfeed fields */
+ FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
+ FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
+ FeedFieldYoutubeId, /* yt:videoId */
+ FeedFieldLast
+};
+
+typedef struct feedcontext {
+ String *field; /* current FeedItem field String */
+ FeedField fields[FeedFieldLast]; /* data for current item */
+ FeedTag tag; /* unique current parsed tag */
+ int iscontent; /* in content data */
+ int iscontenttag; /* in content tag */
+ enum FeedType feedtype;
+} FeedContext;
+
+static long long datetounix(long long, int, int, int, int, int);
+static FeedTag * gettag(enum FeedType, const char *, size_t);
+static long gettzoffset(const char *);
+static int isattr(const char *, size_t, const char *, size_t);
+static int istag(const char *, size_t, const char *, size_t);
+static int parsetime(const char *, long long *);
+
+static void atom_header(void);
+static void atom_item(void);
+static void atom_footer(void);
+static void json_header(void);
+static void json_item(void);
+static void json_footer(void);
+static void sfeed_item(void); /* TSV / sfeed */
+
+static void string_append(String *, const char *, size_t);
+static void string_buffer_realloc(String *, size_t);
+static void string_clear(String *);
+static void string_print_encoded(String *);
+static void string_print_timestamp(String *);
+static void string_print(String *);
+static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t,
+ const char *, size_t);
+static void xmlattrentity(XMLParser *, const char *, size_t, const char *,
+ size_t, const char *, size_t);
+static void xmlattrstart(XMLParser *, const char *, size_t, const char *,
+ size_t);
+static void xmldata(XMLParser *, const char *, size_t);
+static void xmldataentity(XMLParser *, const char *, size_t);
+static void xmltagend(XMLParser *, const char *, size_t, int);
+static void xmltagstart(XMLParser *, const char *, size_t);
+static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
+
+/* Atom, must be alphabetical order */
+static const FeedTag atomtags[] = {
+ { STRP("author"), AtomTagAuthor },
+ { STRP("id"), AtomTagId },
+ /* Atom: <link href="" />, RSS has <link></link> */
+ { STRP("link"), AtomTagLink },
+ { STRP("media:description"), AtomTagMediaDescription },
+ { STRP("published"), AtomTagPublished },
+ { STRP("title"), AtomTagTitle },
+ { STRP("yt:videoId"), TagYoutubeVideoId }
+};
+
+/* special case: nested <author><name> */
+static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
+static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
+
+/* reference to no / unknown tag */
+static const FeedTag notag = { STRP(""), TagUnknown };
+
+/* map TagId type to RSS/Atom field, all tags must be defined */
+static const int fieldmap[TagLast] = {
+ [TagUnknown] = -1,
+ /* Atom */
+ [AtomTagPublished] = FeedFieldTime,
+ [AtomTagTitle] = FeedFieldTitle,
+ [AtomTagMediaDescription] = FeedFieldContent,
+ [AtomTagId] = FeedFieldId,
+ [AtomTagLink] = -1,
+ [AtomTagLinkAlternate] = FeedFieldLink,
+ [AtomTagAuthor] = -1,
+ [AtomTagAuthorName] = FeedFieldAuthor,
+ [TagYoutubeVideoId] = FeedFieldYoutubeId
+};
+
+static const int FieldSeparator = '\t';
+
+static FeedContext ctx;
+static XMLParser parser; /* XML parser state */
+static String attrrel, tmpstr;
+
+static struct search_response *search_res = NULL;
+static void (*printfields)(void) = sfeed_item;
+static int cgimode = 0;
+
+static int
+tagcmp(const void *v1, const void *v2)
+{
+ return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name);
+}
+
+/* Unique tagid for parsed tag name. */
+static FeedTag *
+gettag(enum FeedType feedtype, const char *name, size_t namelen)
+{
+ FeedTag f, *r = NULL;
+
+ f.name = (char *)name;
+
+ switch (feedtype) {
+ case FeedTypeAtom:
+ r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0…
+ sizeof(atomtags[0]), tagcmp);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+/* Clear string only; don't free, prevents unnecessary reallocation. */
+static void
+string_clear(String *s)
+{
+ if (s->data)
+ s->data[0] = '\0';
+ s->len = 0;
+}
+
+static void
+string_buffer_realloc(String *s, size_t newlen)
+{
+ size_t alloclen;
+
+ if (newlen > SIZE_MAX / 2) {
+ alloclen = SIZE_MAX;
+ } else {
+ for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
+ ;
+ }
+ if (!(s->data = realloc(s->data, alloclen)))
+ err(1, "realloc");
+ s->bufsiz = alloclen;
+}
+
+/* Append data to String, s->data and data may not overlap. */
+static void
+string_append(String *s, const char *data, size_t len)
+{
+ if (!len)
+ return;
+
+ if (s->len >= SIZE_MAX - len) {
+ errno = ENOMEM;
+ err(1, "realloc");
+ }
+
+ /* check if allocation is necessary, never shrink the buffer. */
+ if (s->len + len >= s->bufsiz)
+ string_buffer_realloc(s, s->len + len + 1);
+ memcpy(s->data + s->len, data, len);
+ s->len += len;
+ s->data[s->len] = '\0';
+}
+
+/* Print text, encode TABs, newlines and '\', remove other whitespace.
+ * Remove leading and trailing whitespace. */
+static void
+string_print_encoded(String *s)
+{
+ const char *p, *e;
+
+ if (!s->data || !s->len)
+ return;
+
+ p = s->data;
+ e = p + strlen(p);
+
+ for (; *p && p != e; p++) {
+ switch (*p) {
+ case '\n': putchar('\\'); putchar('n'); break;
+ case '\\': putchar('\\'); putchar('\\'); break;
+ case '\t': putchar('\\'); putchar('t'); break;
+ default:
+ /* ignore control chars */
+ if (!ISCNTRL((unsigned char)*p))
+ putchar(*p);
+ break;
+ }
+ }
+}
+
+/* Print text, replace TABs, carriage return and other whitespace with ' '.
+ * Other control chars are removed. Remove leading and trailing whitespace. */
+static void
+string_print(String *s)
+{
+ char *p, *e;
+
+ if (!s->data || !s->len)
+ return;
+
+ p = s->data;
+ e = p + s->len;
+ for (; *p && p != e; p++) {
+ if (ISSPACE((unsigned char)*p))
+ putchar(' '); /* any whitespace to space */
+ else if (!ISCNTRL((unsigned char)*p))
+ /* ignore other control chars */
+ putchar(*p);
+ }
+}
+
+/* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
+static void
+string_print_timestamp(String *s)
+{
+ long long t;
+
+ if (!s->data || !s->len)
+ return;
+
+ if (parsetime(s->data, &t) != -1)
+ printf("%lld", t);
+}
+
+/* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
+ Parameters should be passed as they are in a struct tm:
+ that is: year = year - 1900, month = month - 1. */
+static long long
+datetounix(long long year, int mon, int day, int hour, int min, int sec)
+{
+ /* seconds in a month in a regular (non-leap) year */
+ static const long secs_through_month[] = {
+ 0, 31 * 86400, 59 * 86400, 90 * 86400,
+ 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
+ 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
+ int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
+ long long t;
+
+ /* optimization: handle common range year 1902 up to and including 203…
+ if (year - 2ULL <= 136) {
+ /* amount of leap days relative to 1970: every 4 years */
+ leaps = (year - 68) >> 2;
+ if (!((year - 68) & 3)) {
+ leaps--;
+ is_leap = 1;
+ } else {
+ is_leap = 0;
+ }
+ t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 =…
+ } else {
+ /* general leap year calculation:
+ leap years occur mostly every 4 years but every 100 years
+ a leap year is skipped unless the year is divisible by 400 …
+ cycles = (year - 100) / 400;
+ rem = (year - 100) % 400;
+ if (rem < 0) {
+ cycles--;
+ rem += 400;
+ }
+ if (!rem) {
+ is_leap = 1;
+ } else {
+ if (rem >= 300)
+ centuries = 3, rem -= 300;
+ else if (rem >= 200)
+ centuries = 2, rem -= 200;
+ else if (rem >= 100)
+ centuries = 1, rem -= 100;
+ if (rem) {
+ leaps = rem / 4U;
+ rem %= 4U;
+ is_leap = !rem;
+ }
+ }
+ leaps += (97 * cycles) + (24 * centuries) - is_leap;
+
+ /* adjust 8 leap days from 1970 up to and including 2000:
+ ((30 * 365) + 8) * 86400 = 946771200 */
+ t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94677120…
+ }
+ t += secs_through_month[mon];
+ if (is_leap && mon >= 2)
+ t += 86400;
+ t += 86400LL * (day - 1);
+ t += 3600LL * hour;
+ t += 60LL * min;
+ t += sec;
+
+ return t;
+}
+
+/* Get timezone from string, return time offset in seconds from UTC.
+ * NOTE: only parses timezones in RFC-822, many other timezone names are
+ * ambiguous anyway.
+ * ANSI and military zones are defined wrong in RFC822 and are unsupported,
+ * see note on RFC2822 4.3 page 32. */
+static long
+gettzoffset(const char *s)
+{
+ const char *p;
+ long tzhour = 0, tzmin = 0;
+ size_t i;
+
+ switch (*s) {
+ case '-': /* offset */
+ case '+':
+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i+…
+ tzhour = (tzhour * 10) + (*p - '0');
+ if (*p == ':')
+ p++;
+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
+ tzmin = (tzmin * 10) + (*p - '0');
+ return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : …
+ default: /* timezone name */
+ break;
+ }
+ return 0;
+}
+
+/* Parse time string `s` into the UNIX timestamp `tp`.
+ Returns 0 on success or -1 on failure. */
+static int
+parsetime(const char *s, long long *tp)
+{
+ int va[6] = { 0 }, i, v, vi;
+
+ /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" …
+ if (!ISDIGIT((unsigned char)s[0]) ||
+ !ISDIGIT((unsigned char)s[1]) ||
+ !ISDIGIT((unsigned char)s[2]) ||
+ !ISDIGIT((unsigned char)s[3]))
+ return -1;
+
+ /* parse time parts (and possibly remaining date parts) */
+ for (vi = 0; *s && vi < 6; vi++) {
+ for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
+ ISDIGIT((unsigned char)*s); s++, i++) {
+ v = (v * 10) + (*s - '0');
+ }
+ va[vi] = v;
+
+ if ((vi < 2 && *s == '-') ||
+ (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
+ (vi > 2 && *s == ':'))
+ s++;
+ }
+
+ /* invalid range */
+ if (va[0] < 0 || va[0] > 9999 ||
+ va[1] < 1 || va[1] > 12 ||
+ va[2] < 1 || va[2] > 31 ||
+ va[3] < 0 || va[3] > 23 ||
+ va[4] < 0 || va[4] > 59 ||
+ va[5] < 0 || va[5] > 60) /* allow leap second */
+ return -1;
+
+ *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
+ gettzoffset(s);
+
+ return 0;
+}
+
+static void
+atom_header(void)
+{
+ fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
+ "\t<title>Newsfeed</title>\n", stdout);
+}
+
+static void
+atom_footer(void)
+{
+ fputs("</feed>\n", stdout);
+}
+
+static void
+atom_item(void)
+{
+ struct item *v, *found = NULL;
+ size_t i;
+
+ /* must have a video id */
+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
+ return;
+
+ for (i = 0; i < search_res->nitems; i++) {
+ v = &(search_res->items[i]);
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
+ found = v;
+ }
+ /* Only print the video if it was found in the feed aswell.
+ This way it filters away shorts too. */
+ if (!found)
+ return;
+
+ fputs("<entry>\n\t<title>", stdout);
+ xmlencode(ctx.fields[FeedFieldTitle].str.data);
+ if (found->duration[0]) {
+ fputs(" [", stdout);
+ xmlencode(found->duration);
+ fputs("]", stdout);
+ }
+ fputs("</title>\n", stdout);
+ if (ctx.fields[FeedFieldLink].str.len) {
+ fputs("\t<link rel=\"alternate\" href=\"", stdout);
+ xmlencode(ctx.fields[FeedFieldLink].str.data);
+ fputs("\" />\n", stdout);
+ }
+ /* prefer link over id for Atom <id>. */
+ fputs("\t<id>", stdout);
+ if (ctx.fields[FeedFieldLink].str.len)
+ xmlencode(ctx.fields[FeedFieldLink].str.data);
+ else if (ctx.fields[FeedFieldId].str.len)
+ xmlencode(ctx.fields[FeedFieldId].str.data);
+ fputs("</id>\n", stdout);
+
+ /* just print the original timestamp, it should conform */
+ fputs("\t<updated>", stdout);
+ string_print(&ctx.fields[FeedFieldTime].str);
+ fputs("</updated>\n", stdout);
+
+ if (ctx.fields[FeedFieldAuthor].str.len) {
+ fputs("\t<author><name>", stdout);
+ xmlencode(ctx.fields[FeedFieldAuthor].str.data);
+ fputs("</name></author>\n", stdout);
+ }
+ if (ctx.fields[FeedFieldContent].str.len) {
+ fputs("\t<content>", stdout);
+ xmlencode(ctx.fields[FeedFieldContent].str.data);
+ fputs("</content>\n", stdout);
+ }
+ fputs("</entry>\n", stdout);
+}
+
+static void
+json_header(void)
+{
+ fputs("{\n"
+ "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
+ "\"title\": \"Newsfeed\",\n"
+ "\"items\": [\n", stdout);
+}
+
+static void
+json_footer(void)
+{
+ fputs("]\n}\n", stdout);
+}
+
+static void
+json_printfield(const char *s)
+{
+ for (; *s; s++) {
+ if (*s == '\\')
+ fputs("\\\\", stdout);
+ else if (*s == '"')
+ fputs("\\\"", stdout);
+ else if (ISCNTRL((unsigned char)*s))
+ printf("\\u00%02x", (unsigned char)*s);
+ else
+ putchar(*s);
+ }
+}
+
+static void
+json_item(void)
+{
+ static int json_firstitem = 1;
+ struct item *v, *found = NULL;
+ size_t i;
+
+ /* must have a video id */
+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
+ return;
+
+ for (i = 0; i < search_res->nitems; i++) {
+ v = &(search_res->items[i]);
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
+ found = v;
+ }
+ /* Only print the video if it was found in the feed aswell.
+ This way it filters away shorts too. */
+ if (!found)
+ return;
+
+ if (!json_firstitem)
+ fputs(",\n", stdout);
+ json_firstitem = 0;
+
+ fputs("{\n\t\"id\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldId].str.data);
+ fputs("\"", stdout);
+
+ /* just print the original timestamp, it should conform */
+ fputs(",\n\t\"date_published\": \"", stdout);
+ string_print(&ctx.fields[FeedFieldTime].str);
+ fputs("\"", stdout);
+
+ fputs(",\n\t\"title\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldTitle].str.data);
+ if (found->duration[0]) {
+ fputs(" [", stdout);
+ json_printfield(found->duration);
+ fputs("]", stdout);
+ }
+ fputs("\"", stdout);
+
+ if (ctx.fields[FeedFieldLink].str.len) {
+ fputs(",\n\t\"url\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldLink].str.data);
+ fputs("\"", stdout);
+ }
+
+ if (ctx.fields[FeedFieldAuthor].str.len) {
+ fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldAuthor].str.data);
+ fputs("\"}]", stdout);
+ }
+
+ fputs(",\n\t\"content_text\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldContent].str.data);
+ fputs("\"\n}", stdout);
+}
+
+static void
+sfeed_item(void)
+{
+ struct item *v, *found = NULL;
+ size_t i;
+
+ /* must have a video id */
+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
+ return;
+
+ for (i = 0; i < search_res->nitems; i++) {
+ v = &(search_res->items[i]);
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
+ found = v;
+ }
+ /* Only print the video if it was found in the feed aswell.
+ This way it filters away shorts too. */
+ if (!found)
+ return;
+
+ string_print_timestamp(&ctx.fields[FeedFieldTime].str);
+ putchar(FieldSeparator);
+ string_print(&ctx.fields[FeedFieldTitle].str);
+ if (found->duration[0]) {
+ fputs(" [", stdout);
+ fputs(found->duration, stdout);
+ fputs("]", stdout);
+ }
+ putchar(FieldSeparator);
+ string_print(&ctx.fields[FeedFieldLink].str);
+ putchar(FieldSeparator);
+ string_print_encoded(&ctx.fields[FeedFieldContent].str);
+ putchar(FieldSeparator);
+ fputs("plain", stdout);
+ putchar(FieldSeparator);
+ string_print(&ctx.fields[FeedFieldId].str);
+ putchar(FieldSeparator);
+ string_print(&ctx.fields[FeedFieldAuthor].str);
+ putchar(FieldSeparator);
+ /* no/empty enclosure */
+ putchar(FieldSeparator);
+ /* empty category */
+ putchar('\n');
+}
+
+static int
+istag(const char *name, size_t len, const char *name2, size_t len2)
+{
+ return (len == len2 && !strcasecmp(name, name2));
+}
+
+static int
+isattr(const char *name, size_t len, const char *name2, size_t len2)
+{
+ return (len == len2 && !strcasecmp(name, name2));
+}
+
+static void
+xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
+ const char *v, size_t vl)
+{
+ if (ISINCONTENT(ctx))
+ return;
+
+ if (!ctx.tag.id)
+ return;
+
+ if (ISCONTENTTAG(ctx))
+ return;
+
+ if (ctx.tag.id == AtomTagLink) {
+ if (isattr(n, nl, STRP("rel"))) {
+ string_append(&attrrel, v, vl);
+ } else if (isattr(n, nl, STRP("href"))) {
+ string_append(&tmpstr, v, vl);
+ }
+ }
+}
+
+static void
+xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
+ const char *data, size_t datalen)
+{
+ char buf[8];
+ int len;
+
+ if (ISINCONTENT(ctx))
+ return;
+
+ if (!ctx.tag.id)
+ return;
+
+ /* try to translate entity, else just pass as data to
+ * xmlattr handler. */
+ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
+ xmlattr(p, t, tl, n, nl, buf, (size_t)len);
+ else
+ xmlattr(p, t, tl, n, nl, data, datalen);
+}
+
+static void
+xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl)
+{
+ if (ISINCONTENT(ctx))
+ return;
+
+ if (attrrel.len && isattr(n, nl, STRP("rel")))
+ string_clear(&attrrel);
+ else if (tmpstr.len &&
+ (isattr(n, nl, STRP("href")) ||
+ isattr(n, nl, STRP("url"))))
+ string_clear(&tmpstr); /* use the last value for multiple attr…
+}
+
+static void
+xmldata(XMLParser *p, const char *s, size_t len)
+{
+ if (!ctx.field)
+ return;
+
+ string_append(ctx.field, s, len);
+}
+
+static void
+xmldataentity(XMLParser *p, const char *data, size_t datalen)
+{
+ char buf[8];
+ int len;
+
+ if (!ctx.field)
+ return;
+
+ /* try to translate entity, else just pass as data to
+ * xmldata handler. */
+ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
+ xmldata(p, buf, (size_t)len);
+ else
+ xmldata(p, data, datalen);
+}
+
+static void
+xmltagstart(XMLParser *p, const char *t, size_t tl)
+{
+ const FeedTag *f;
+
+ if (ISINCONTENT(ctx))
+ return;
+
+ /* start of RSS or Atom item / entry */
+ if (ctx.feedtype == FeedTypeNone) {
+ if (istag(t, tl, STRP("entry")))
+ ctx.feedtype = FeedTypeAtom;
+ return;
+ }
+
+ /* field tagid already set or nested tags. */
+ if (ctx.tag.id) {
+ /* nested <author><name> for Atom */
+ if (ctx.tag.id == AtomTagAuthor &&
+ istag(t, tl, STRP("name"))) {
+ memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag)…
+ } else {
+ return; /* other nested tags are not allowed: return */
+ }
+ }
+
+ /* in item */
+ if (ctx.tag.id == TagUnknown) {
+ if (!(f = gettag(ctx.feedtype, t, tl)))
+ f = &notag;
+ memcpy(&(ctx.tag), f, sizeof(ctx.tag));
+ }
+
+ ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
+ string_clear(&attrrel);
+}
+
+static void
+xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ enum TagId tagid;
+
+ if (ISINCONTENT(ctx))
+ return;
+
+ /* set tag type based on its attribute value */
+ if (ctx.tag.id == AtomTagLink) {
+ /* empty or "alternate": other types could be
+ "enclosure", "related", "self" or "via" */
+ if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("al…
+ ctx.tag.id = AtomTagLinkAlternate;
+ else
+ ctx.tag.id = AtomTagLink; /* unknown */
+ }
+
+ tagid = ctx.tag.id;
+
+ /* map tag type to field: unknown or lesser priority is ignored,
+ when tags of the same type are repeated only the first is used. */
+ if (fieldmap[tagid] == -1 ||
+ tagid <= ctx.fields[fieldmap[tagid]].tagid) {
+ return;
+ }
+
+ if (ctx.iscontenttag) {
+ ctx.iscontent = 1;
+ ctx.iscontenttag = 0;
+ }
+
+ ctx.field = &(ctx.fields[fieldmap[tagid]].str);
+ ctx.fields[fieldmap[tagid]].tagid = tagid;
+
+ /* clear field if it is overwritten (with a priority order) for the new
+ value, if the field can have multiple values then do not clear it. …
+ string_clear(ctx.field);
+}
+
+static void
+xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ size_t i;
+
+ if (ctx.feedtype == FeedTypeNone)
+ return;
+
+ if (ISINCONTENT(ctx)) {
+ /* not a closed content field */
+ if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
+ return;
+ } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
+ /* matched tag end: close it */
+ } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
+ istag(t, tl, STRP("entry"))))) /* Atom */
+ {
+ /* end of Atom entry */
+ printfields();
+
+ /* clear strings */
+ for (i = 0; i < FeedFieldLast; i++) {
+ string_clear(&ctx.fields[i].str);
+ ctx.fields[i].tagid = TagUnknown;
+ }
+ /* allow parsing of Atom and RSS concatenated in one XML strea…
+ ctx.feedtype = FeedTypeNone;
+ } else {
+ return; /* not end of field */
+ }
+
+ /* temporary string: for fields that cannot be processed
+ directly and need more context, for example by its tag
+ attributes, like the Atom link rel="alternate|enclosure". */
+ if (tmpstr.len && ctx.field) {
+ string_clear(ctx.field);
+ string_append(ctx.field, tmpstr.data, tmpstr.len);
+ }
+
+ /* close field */
+ string_clear(&tmpstr); /* reuse and clear temporary string */
+
+ if (ctx.tag.id == AtomTagAuthorName)
+ memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer …
+ else
+ memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
+
+ ctx.iscontent = 0;
+ ctx.field = NULL;
+}
+
+static char *
+request_channel_feed(const char *channelid)
+{
+ char path[2048];
+ int r;
+
+ r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", ch…
+ /* check if request is too long (truncation) */
+ if (r < 0 || (size_t)r >= sizeof(path))
+ return NULL;
+
+ return request("www.youtube.com", path, "");
+}
+
+int
+isvalidchannel(const char *s)
+{
+ size_t len;
+
+ for (len = 0; *s; s++, len++) {
+ if (ISALPHA((unsigned char)*s) ||
+ ISDIGIT((unsigned char)*s) ||
+ *s == '-' || *s == '_')
+ continue;
+ return 0;
+ }
+
+ return *s == '\0' && len == 24;
+}
+
+void
+usage(void)
+{
+ if (cgimode) {
+ fputs("Status: 400 Bad Request\r\n", stdout);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdou…
+ fputs("400 Bad Request\n", stdout);
+ exit(0);
+ } else {
+ fputs("usage: feed <channelid> [atom|json|tsv]\n", stderr);
+ exit(1);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ char buf[256];
+ const char *channelid = NULL;
+ char *data, *format = "tsv", *p, *requesturi, *tmp;
+ size_t i;
+
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1)
+ err(1, "pledge");
+
+ if ((tmp = getenv("REQUEST_URI"))) {
+ cgimode = 1;
+
+ strlcpy(buf, tmp, sizeof(buf));
+ requesturi = buf;
+
+ if (!(p = strrchr(requesturi, '/')))
+ usage();
+
+ channelid = p + 1;
+ if ((p = strrchr(channelid, '.'))) {
+ *p = '\0'; /* NULL terminate */
+ format = p + 1;
+ }
+ } else {
+ if (argc <= 1)
+ usage();
+
+ channelid = argv[1];
+ if (argc > 2)
+ format = argv[2];
+ }
+ if (!channelid || !isvalidchannel(channelid))
+ usage();
+
+ if (!strcmp(format, "atom") || !strcmp(format, "xml"))
+ printfields = atom_item;
+ else if (!strcmp(format, "json"))
+ printfields = json_item;
+ else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed"))
+ printfields = sfeed_item;
+ else
+ usage();
+
+ search_res = youtube_channel_videos(channelid);
+ if (!search_res || search_res->nitems == 0) {
+ /* error or no videos found */
+ return 0;
+ }
+
+ if (!(data = request_channel_feed(channelid)))
+ return 1; /* error, no data at all */
+
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
+ setxmldata(data, strlen(data));
+
+ memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
+
+ parser.xmlattr = xmlattr;
+ parser.xmlattrentity = xmlattrentity;
+ parser.xmlattrstart = xmlattrstart;
+ parser.xmlcdata = xmldata;
+ parser.xmldata = xmldata;
+ parser.xmldataentity = xmldataentity;
+ parser.xmltagend = xmltagend;
+ parser.xmltagstart = xmltagstart;
+ parser.xmltagstartparsed = xmltagstartparsed;
+
+ /* init all fields, make sure it has a value */
+ for (i = 0; i < FeedFieldLast; i++) {
+ string_append(&(ctx.fields[i].str), " ", 1);
+ string_clear(&(ctx.fields[i].str));
+ }
+
+ if (cgimode) {
+ fputs("Status: 200 OK\r\n", stdout);
+ if (!strcmp(format, "atom") || !strcmp(format, "xml"))
+ fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n",…
+ else if (!strcmp(format, "json"))
+ fputs("Content-Type: application/json; charset=utf-8\r…
+ else
+ fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n…
+ }
+
+ if (!strcmp(format, "atom") || !strcmp(format, "xml"))
+ atom_header();
+ else if (!strcmp(format, "json"))
+ json_header();
+
+ /* NOTE: getnext is defined in xml.h for inline optimization */
+ xml_parse(&parser);
+
+ if (!strcmp(format, "atom"))
+ atom_footer();
+ else if (!strcmp(format, "json"))
+ json_footer();
+
+ return 0;
+}
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.