GopherProxy

	add initial version of youtube/feed - frontends - front-ends for some sites (ex…
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit f5a6863b5397d1cc3ad31de291be11fae6256b5f
	parent 7b18c287f2fcf98227ff2ec1fdd4eeb8050e8166
	Author: Hiltjo Posthuma <[email protected]>
	Date: Wed, 10 May 2023 01:10:51 +0200

	add initial version of youtube/feed

	This fetches the Youtube Atom feed and the channel videos and combines the data.

	It can output:
	- Atom
	- sfeed(5)
	- JSON / JSON Feed

	It can run in command-line and CGI mode.

	For now it only adds the video duration in the title and filters away Youtube
	shorts.

	The Atom parser is based on sfeed.

	Diffstat:
	M Makefile \| 4 ++++
	M util.h \| 7 +++++++
	A youtube/feed.c \| 1001 +++++++++++++++++++++++++++++…

	3 files changed, 1012 insertions(+), 0 deletions(-)
	---
	diff --git a/Makefile b/Makefile
	@@ -22,6 +22,7 @@ LIBTLS_LDFLAGS_STATIC = -ltls -lssl -lcrypto -static
	BIN = \
	youtube/cgi \
	youtube/cli \
	+ youtube/feed \
	youtube/gopher

	SRC = ${BIN:=.c} \
	@@ -68,6 +69,9 @@ youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o
	youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o
	${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS…

	+youtube/feed: ${LIB} youtube/youtube.o youtube/feed.o
	+ ${CC} -o $@ youtube/feed.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTL…
	+
	youtube/gopher: ${LIB} youtube/youtube.o youtube/gopher.o
	${CC} -o $@ youtube/gopher.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIB…

	diff --git a/util.h b/util.h
	@@ -3,6 +3,13 @@
	#define unveil(p1,p2) 0
	#endif

	+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
	+#define ISALPHA(c) ((((unsigned)c) \| 32) - 'a' < 26)
	+#define ISCNTRL(c) ((c) < ' ' \|\| (c) == 0x7f)
	+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
	+#define ISSPACE(c) ((c) == ' ' \|\| ((((unsigned)c) - '\t') < 5))
	+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) \| 32) : (c))
	+
	#undef strlcat
	size_t strlcat(char , const char , size_t);
	#undef strlcpy
	diff --git a/youtube/feed.c b/youtube/feed.c
	@@ -0,0 +1,1001 @@
	+#include <err.h>
	+#include <errno.h>
	+#include <stdint.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+#include <strings.h>
	+#include <time.h>
	+
	+#include "https.h"
	+#include "util.h"
	+#include "youtube.h"
	+#include "xml.h"
	+
	+#define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag))
	+#define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
	+
	+/* string and byte-length */
	+#define STRP(s) s,sizeof(s)-1
	+
	+enum FeedType {
	+ FeedTypeNone = 0,
	+ FeedTypeAtom = 2
	+};
	+
	+/* String data / memory pool */
	+typedef struct string {
	+ char data; / data */
	+ size_t len; /* string length */
	+ size_t bufsiz; /* allocated size */
	+} String;
	+
	+/* NOTE: the order of these fields (content, date, author) indicate the
	+ * priority to use them, from least important to high. */
	+enum TagId {
	+ TagUnknown = 0,
	+ /* Atom */
	+ /* creation date has higher priority */
	+ AtomTagPublished,
	+ AtomTagTitle,
	+ AtomTagMediaDescription,
	+ AtomTagId,
	+ AtomTagLink,
	+ AtomTagLinkAlternate,
	+ AtomTagAuthor, AtomTagAuthorName,
	+ TagYoutubeVideoId,
	+ TagLast
	+};
	+
	+typedef struct feedtag {
	+ char name; / name of tag to match */
	+ size_t len; /* len of `name` */
	+ enum TagId id; /* unique ID */
	+} FeedTag;
	+
	+typedef struct field {
	+ String str;
	+ enum TagId tagid; /* tagid set previously, used for tag priority */
	+} FeedField;
	+
	+enum {
	+ /* sfeed fields */
	+ FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
	+ FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
	+ FeedFieldYoutubeId, /* yt:videoId */
	+ FeedFieldLast
	+};
	+
	+typedef struct feedcontext {
	+ String field; / current FeedItem field String */
	+ FeedField fields[FeedFieldLast]; /* data for current item */
	+ FeedTag tag; /* unique current parsed tag */
	+ int iscontent; /* in content data */
	+ int iscontenttag; /* in content tag */
	+ enum FeedType feedtype;
	+} FeedContext;
	+
	+static long long datetounix(long long, int, int, int, int, int);
	+static FeedTag * gettag(enum FeedType, const char *, size_t);
	+static long gettzoffset(const char *);
	+static int isattr(const char , size_t, const char , size_t);
	+static int istag(const char , size_t, const char , size_t);
	+static int parsetime(const char , long long );
	+
	+static void atom_header(void);
	+static void atom_item(void);
	+static void atom_footer(void);
	+static void json_header(void);
	+static void json_item(void);
	+static void json_footer(void);
	+static void sfeed_item(void); /* TSV / sfeed */
	+
	+static void string_append(String , const char , size_t);
	+static void string_buffer_realloc(String *, size_t);
	+static void string_clear(String *);
	+static void string_print_encoded(String *);
	+static void string_print_timestamp(String *);
	+static void string_print(String *);
	+static void xmlattr(XMLParser , const char , size_t, const char *, size_t,
	+ const char *, size_t);
	+static void xmlattrentity(XMLParser , const char , size_t, const char *,
	+ size_t, const char *, size_t);
	+static void xmlattrstart(XMLParser , const char , size_t, const char *,
	+ size_t);
	+static void xmldata(XMLParser , const char , size_t);
	+static void xmldataentity(XMLParser , const char , size_t);
	+static void xmltagend(XMLParser , const char , size_t, int);
	+static void xmltagstart(XMLParser , const char , size_t);
	+static void xmltagstartparsed(XMLParser , const char , size_t, int);
	+
	+/* Atom, must be alphabetical order */
	+static const FeedTag atomtags[] = {
	+ { STRP("author"), AtomTagAuthor },
	+ { STRP("id"), AtomTagId },
	+ /* Atom: <link href="" />, RSS has <link></link> */
	+ { STRP("link"), AtomTagLink },
	+ { STRP("media:description"), AtomTagMediaDescription },
	+ { STRP("published"), AtomTagPublished },
	+ { STRP("title"), AtomTagTitle },
	+ { STRP("yt:videoId"), TagYoutubeVideoId }
	+};
	+
	+/* special case: nested <author><name> */
	+static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
	+static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
	+
	+/* reference to no / unknown tag */
	+static const FeedTag notag = { STRP(""), TagUnknown };
	+
	+/* map TagId type to RSS/Atom field, all tags must be defined */
	+static const int fieldmap[TagLast] = {
	+ [TagUnknown] = -1,
	+ /* Atom */
	+ [AtomTagPublished] = FeedFieldTime,
	+ [AtomTagTitle] = FeedFieldTitle,
	+ [AtomTagMediaDescription] = FeedFieldContent,
	+ [AtomTagId] = FeedFieldId,
	+ [AtomTagLink] = -1,
	+ [AtomTagLinkAlternate] = FeedFieldLink,
	+ [AtomTagAuthor] = -1,
	+ [AtomTagAuthorName] = FeedFieldAuthor,
	+ [TagYoutubeVideoId] = FeedFieldYoutubeId
	+};
	+
	+static const int FieldSeparator = '\t';
	+
	+static FeedContext ctx;
	+static XMLParser parser; /* XML parser state */
	+static String attrrel, tmpstr;
	+
	+static struct search_response *search_res = NULL;
	+static void (*printfields)(void) = sfeed_item;
	+static int cgimode = 0;
	+
	+static int
	+tagcmp(const void v1, const void v2)
	+{
	+ return strcasecmp(((FeedTag )v1)->name, ((FeedTag )v2)->name);
	+}
	+
	+/* Unique tagid for parsed tag name. */
	+static FeedTag *
	+gettag(enum FeedType feedtype, const char *name, size_t namelen)
	+{
	+ FeedTag f, *r = NULL;
	+
	+ f.name = (char *)name;
	+
	+ switch (feedtype) {
	+ case FeedTypeAtom:
	+ r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0…
	+ sizeof(atomtags[0]), tagcmp);
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ return r;
	+}
	+
	+/* Clear string only; don't free, prevents unnecessary reallocation. */
	+static void
	+string_clear(String *s)
	+{
	+ if (s->data)
	+ s->data[0] = '\0';
	+ s->len = 0;
	+}
	+
	+static void
	+string_buffer_realloc(String *s, size_t newlen)
	+{
	+ size_t alloclen;
	+
	+ if (newlen > SIZE_MAX / 2) {
	+ alloclen = SIZE_MAX;
	+ } else {
	+ for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
	+ ;
	+ }
	+ if (!(s->data = realloc(s->data, alloclen)))
	+ err(1, "realloc");
	+ s->bufsiz = alloclen;
	+}
	+
	+/* Append data to String, s->data and data may not overlap. */
	+static void
	+string_append(String s, const char data, size_t len)
	+{
	+ if (!len)
	+ return;
	+
	+ if (s->len >= SIZE_MAX - len) {
	+ errno = ENOMEM;
	+ err(1, "realloc");
	+ }
	+
	+ /* check if allocation is necessary, never shrink the buffer. */
	+ if (s->len + len >= s->bufsiz)
	+ string_buffer_realloc(s, s->len + len + 1);
	+ memcpy(s->data + s->len, data, len);
	+ s->len += len;
	+ s->data[s->len] = '\0';
	+}
	+
	+/* Print text, encode TABs, newlines and '\', remove other whitespace.
	+ * Remove leading and trailing whitespace. */
	+static void
	+string_print_encoded(String *s)
	+{
	+ const char p, e;
	+
	+ if (!s->data \|\| !s->len)
	+ return;
	+
	+ p = s->data;
	+ e = p + strlen(p);
	+
	+ for (; *p && p != e; p++) {
	+ switch (*p) {
	+ case '\n': putchar('\\'); putchar('n'); break;
	+ case '\\': putchar('\\'); putchar('\\'); break;
	+ case '\t': putchar('\\'); putchar('t'); break;
	+ default:
	+ /* ignore control chars */
	+ if (!ISCNTRL((unsigned char)*p))
	+ putchar(*p);
	+ break;
	+ }
	+ }
	+}
	+
	+/* Print text, replace TABs, carriage return and other whitespace with ' '.
	+ * Other control chars are removed. Remove leading and trailing whitespace. */
	+static void
	+string_print(String *s)
	+{
	+ char p, e;
	+
	+ if (!s->data \|\| !s->len)
	+ return;
	+
	+ p = s->data;
	+ e = p + s->len;
	+ for (; *p && p != e; p++) {
	+ if (ISSPACE((unsigned char)*p))
	+ putchar(' '); /* any whitespace to space */
	+ else if (!ISCNTRL((unsigned char)*p))
	+ /* ignore other control chars */
	+ putchar(*p);
	+ }
	+}
	+
	+/* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
	+static void
	+string_print_timestamp(String *s)
	+{
	+ long long t;
	+
	+ if (!s->data \|\| !s->len)
	+ return;
	+
	+ if (parsetime(s->data, &t) != -1)
	+ printf("%lld", t);
	+}
	+
	+/* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
	+ Parameters should be passed as they are in a struct tm:
	+ that is: year = year - 1900, month = month - 1. */
	+static long long
	+datetounix(long long year, int mon, int day, int hour, int min, int sec)
	+{
	+ /* seconds in a month in a regular (non-leap) year */
	+ static const long secs_through_month[] = {
	+ 0, 31 * 86400, 59 * 86400, 90 * 86400,
	+ 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
	+ 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
	+ int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
	+ long long t;
	+
	+ /* optimization: handle common range year 1902 up to and including 203…
	+ if (year - 2ULL <= 136) {
	+ /* amount of leap days relative to 1970: every 4 years */
	+ leaps = (year - 68) >> 2;
	+ if (!((year - 68) & 3)) {
	+ leaps--;
	+ is_leap = 1;
	+ } else {
	+ is_leap = 0;
	+ }
	+ t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 =…
	+ } else {
	+ /* general leap year calculation:
	+ leap years occur mostly every 4 years but every 100 years
	+ a leap year is skipped unless the year is divisible by 400 …
	+ cycles = (year - 100) / 400;
	+ rem = (year - 100) % 400;
	+ if (rem < 0) {
	+ cycles--;
	+ rem += 400;
	+ }
	+ if (!rem) {
	+ is_leap = 1;
	+ } else {
	+ if (rem >= 300)
	+ centuries = 3, rem -= 300;
	+ else if (rem >= 200)
	+ centuries = 2, rem -= 200;
	+ else if (rem >= 100)
	+ centuries = 1, rem -= 100;
	+ if (rem) {
	+ leaps = rem / 4U;
	+ rem %= 4U;
	+ is_leap = !rem;
	+ }
	+ }
	+ leaps += (97 * cycles) + (24 * centuries) - is_leap;
	+
	+ /* adjust 8 leap days from 1970 up to and including 2000:
	+ ((30 * 365) + 8) * 86400 = 946771200 */
	+ t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 94677120…
	+ }
	+ t += secs_through_month[mon];
	+ if (is_leap && mon >= 2)
	+ t += 86400;
	+ t += 86400LL * (day - 1);
	+ t += 3600LL * hour;
	+ t += 60LL * min;
	+ t += sec;
	+
	+ return t;
	+}
	+
	+/* Get timezone from string, return time offset in seconds from UTC.
	+ * NOTE: only parses timezones in RFC-822, many other timezone names are
	+ * ambiguous anyway.
	+ * ANSI and military zones are defined wrong in RFC822 and are unsupported,
	+ * see note on RFC2822 4.3 page 32. */
	+static long
	+gettzoffset(const char *s)
	+{
	+ const char *p;
	+ long tzhour = 0, tzmin = 0;
	+ size_t i;
	+
	+ switch (*s) {
	+ case '-': /* offset */
	+ case '+':
	+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i+…
	+ tzhour = (tzhour * 10) + (*p - '0');
	+ if (*p == ':')
	+ p++;
	+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
	+ tzmin = (tzmin * 10) + (*p - '0');
	+ return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : …
	+ default: /* timezone name */
	+ break;
	+ }
	+ return 0;
	+}
	+
	+/* Parse time string `s` into the UNIX timestamp `tp`.
	+ Returns 0 on success or -1 on failure. */
	+static int
	+parsetime(const char s, long long tp)
	+{
	+ int va[6] = { 0 }, i, v, vi;
	+
	+ /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" …
	+ if (!ISDIGIT((unsigned char)s[0]) \|\|
	+ !ISDIGIT((unsigned char)s[1]) \|\|
	+ !ISDIGIT((unsigned char)s[2]) \|\|
	+ !ISDIGIT((unsigned char)s[3]))
	+ return -1;
	+
	+ /* parse time parts (and possibly remaining date parts) */
	+ for (vi = 0; *s && vi < 6; vi++) {
	+ for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
	+ ISDIGIT((unsigned char)*s); s++, i++) {
	+ v = (v * 10) + (*s - '0');
	+ }
	+ va[vi] = v;
	+
	+ if ((vi < 2 && *s == '-') \|\|
	+ (vi == 2 && (s == 'T' \|\| ISSPACE((unsigned char)s))) \|\|
	+ (vi > 2 && *s == ':'))
	+ s++;
	+ }
	+
	+ /* invalid range */
	+ if (va[0] < 0 \|\| va[0] > 9999 \|\|
	+ va[1] < 1 \|\| va[1] > 12 \|\|
	+ va[2] < 1 \|\| va[2] > 31 \|\|
	+ va[3] < 0 \|\| va[3] > 23 \|\|
	+ va[4] < 0 \|\| va[4] > 59 \|\|
	+ va[5] < 0 \|\| va[5] > 60) /* allow leap second */
	+ return -1;
	+
	+ *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
	+ gettzoffset(s);
	+
	+ return 0;
	+}
	+
	+static void
	+atom_header(void)
	+{
	+ fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
	+ "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
	+ "\t<title>Newsfeed</title>\n", stdout);
	+}
	+
	+static void
	+atom_footer(void)
	+{
	+ fputs("</feed>\n", stdout);
	+}
	+
	+static void
	+atom_item(void)
	+{
	+ struct item v, found = NULL;
	+ size_t i;
	+
	+ /* must have a video id */
	+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
	+ return;
	+
	+ for (i = 0; i < search_res->nitems; i++) {
	+ v = &(search_res->items[i]);
	+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
	+ found = v;
	+ }
	+ /* Only print the video if it was found in the feed aswell.
	+ This way it filters away shorts too. */
	+ if (!found)
	+ return;
	+
	+ fputs("<entry>\n\t<title>", stdout);
	+ xmlencode(ctx.fields[FeedFieldTitle].str.data);
	+ if (found->duration[0]) {
	+ fputs(" [", stdout);
	+ xmlencode(found->duration);
	+ fputs("]", stdout);
	+ }
	+ fputs("</title>\n", stdout);
	+ if (ctx.fields[FeedFieldLink].str.len) {
	+ fputs("\t<link rel=\"alternate\" href=\"", stdout);
	+ xmlencode(ctx.fields[FeedFieldLink].str.data);
	+ fputs("\" />\n", stdout);
	+ }
	+ /* prefer link over id for Atom <id>. */
	+ fputs("\t<id>", stdout);
	+ if (ctx.fields[FeedFieldLink].str.len)
	+ xmlencode(ctx.fields[FeedFieldLink].str.data);
	+ else if (ctx.fields[FeedFieldId].str.len)
	+ xmlencode(ctx.fields[FeedFieldId].str.data);
	+ fputs("</id>\n", stdout);
	+
	+ /* just print the original timestamp, it should conform */
	+ fputs("\t<updated>", stdout);
	+ string_print(&ctx.fields[FeedFieldTime].str);
	+ fputs("</updated>\n", stdout);
	+
	+ if (ctx.fields[FeedFieldAuthor].str.len) {
	+ fputs("\t<author><name>", stdout);
	+ xmlencode(ctx.fields[FeedFieldAuthor].str.data);
	+ fputs("</name></author>\n", stdout);
	+ }
	+ if (ctx.fields[FeedFieldContent].str.len) {
	+ fputs("\t<content>", stdout);
	+ xmlencode(ctx.fields[FeedFieldContent].str.data);
	+ fputs("</content>\n", stdout);
	+ }
	+ fputs("</entry>\n", stdout);
	+}
	+
	+static void
	+json_header(void)
	+{
	+ fputs("{\n"
	+ "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
	+ "\"title\": \"Newsfeed\",\n"
	+ "\"items\": [\n", stdout);
	+}
	+
	+static void
	+json_footer(void)
	+{
	+ fputs("]\n}\n", stdout);
	+}
	+
	+static void
	+json_printfield(const char *s)
	+{
	+ for (; *s; s++) {
	+ if (*s == '\\')
	+ fputs("\\\\", stdout);
	+ else if (*s == '"')
	+ fputs("\\\"", stdout);
	+ else if (ISCNTRL((unsigned char)*s))
	+ printf("\\u00%02x", (unsigned char)*s);
	+ else
	+ putchar(*s);
	+ }
	+}
	+
	+static void
	+json_item(void)
	+{
	+ static int json_firstitem = 1;
	+ struct item v, found = NULL;
	+ size_t i;
	+
	+ /* must have a video id */
	+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
	+ return;
	+
	+ for (i = 0; i < search_res->nitems; i++) {
	+ v = &(search_res->items[i]);
	+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
	+ found = v;
	+ }
	+ /* Only print the video if it was found in the feed aswell.
	+ This way it filters away shorts too. */
	+ if (!found)
	+ return;
	+
	+ if (!json_firstitem)
	+ fputs(",\n", stdout);
	+ json_firstitem = 0;
	+
	+ fputs("{\n\t\"id\": \"", stdout);
	+ json_printfield(ctx.fields[FeedFieldId].str.data);
	+ fputs("\"", stdout);
	+
	+ /* just print the original timestamp, it should conform */
	+ fputs(",\n\t\"date_published\": \"", stdout);
	+ string_print(&ctx.fields[FeedFieldTime].str);
	+ fputs("\"", stdout);
	+
	+ fputs(",\n\t\"title\": \"", stdout);
	+ json_printfield(ctx.fields[FeedFieldTitle].str.data);
	+ if (found->duration[0]) {
	+ fputs(" [", stdout);
	+ json_printfield(found->duration);
	+ fputs("]", stdout);
	+ }
	+ fputs("\"", stdout);
	+
	+ if (ctx.fields[FeedFieldLink].str.len) {
	+ fputs(",\n\t\"url\": \"", stdout);
	+ json_printfield(ctx.fields[FeedFieldLink].str.data);
	+ fputs("\"", stdout);
	+ }
	+
	+ if (ctx.fields[FeedFieldAuthor].str.len) {
	+ fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
	+ json_printfield(ctx.fields[FeedFieldAuthor].str.data);
	+ fputs("\"}]", stdout);
	+ }
	+
	+ fputs(",\n\t\"content_text\": \"", stdout);
	+ json_printfield(ctx.fields[FeedFieldContent].str.data);
	+ fputs("\"\n}", stdout);
	+}
	+
	+static void
	+sfeed_item(void)
	+{
	+ struct item v, found = NULL;
	+ size_t i;
	+
	+ /* must have a video id */
	+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
	+ return;
	+
	+ for (i = 0; i < search_res->nitems; i++) {
	+ v = &(search_res->items[i]);
	+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
	+ found = v;
	+ }
	+ /* Only print the video if it was found in the feed aswell.
	+ This way it filters away shorts too. */
	+ if (!found)
	+ return;
	+
	+ string_print_timestamp(&ctx.fields[FeedFieldTime].str);
	+ putchar(FieldSeparator);
	+ string_print(&ctx.fields[FeedFieldTitle].str);
	+ if (found->duration[0]) {
	+ fputs(" [", stdout);
	+ fputs(found->duration, stdout);
	+ fputs("]", stdout);
	+ }
	+ putchar(FieldSeparator);
	+ string_print(&ctx.fields[FeedFieldLink].str);
	+ putchar(FieldSeparator);
	+ string_print_encoded(&ctx.fields[FeedFieldContent].str);
	+ putchar(FieldSeparator);
	+ fputs("plain", stdout);
	+ putchar(FieldSeparator);
	+ string_print(&ctx.fields[FeedFieldId].str);
	+ putchar(FieldSeparator);
	+ string_print(&ctx.fields[FeedFieldAuthor].str);
	+ putchar(FieldSeparator);
	+ /* no/empty enclosure */
	+ putchar(FieldSeparator);
	+ /* empty category */
	+ putchar('\n');
	+}
	+
	+static int
	+istag(const char name, size_t len, const char name2, size_t len2)
	+{
	+ return (len == len2 && !strcasecmp(name, name2));
	+}
	+
	+static int
	+isattr(const char name, size_t len, const char name2, size_t len2)
	+{
	+ return (len == len2 && !strcasecmp(name, name2));
	+}
	+
	+static void
	+xmlattr(XMLParser p, const char t, size_t tl, const char *n, size_t nl,
	+ const char *v, size_t vl)
	+{
	+ if (ISINCONTENT(ctx))
	+ return;
	+
	+ if (!ctx.tag.id)
	+ return;
	+
	+ if (ISCONTENTTAG(ctx))
	+ return;
	+
	+ if (ctx.tag.id == AtomTagLink) {
	+ if (isattr(n, nl, STRP("rel"))) {
	+ string_append(&attrrel, v, vl);
	+ } else if (isattr(n, nl, STRP("href"))) {
	+ string_append(&tmpstr, v, vl);
	+ }
	+ }
	+}
	+
	+static void
	+xmlattrentity(XMLParser p, const char t, size_t tl, const char *n, size_t nl,
	+ const char *data, size_t datalen)
	+{
	+ char buf[8];
	+ int len;
	+
	+ if (ISINCONTENT(ctx))
	+ return;
	+
	+ if (!ctx.tag.id)
	+ return;
	+
	+ /* try to translate entity, else just pass as data to
	+ * xmlattr handler. */
	+ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
	+ xmlattr(p, t, tl, n, nl, buf, (size_t)len);
	+ else
	+ xmlattr(p, t, tl, n, nl, data, datalen);
	+}
	+
	+static void
	+xmlattrstart(XMLParser p, const char t, size_t tl, const char *n, size_t nl)
	+{
	+ if (ISINCONTENT(ctx))
	+ return;
	+
	+ if (attrrel.len && isattr(n, nl, STRP("rel")))
	+ string_clear(&attrrel);
	+ else if (tmpstr.len &&
	+ (isattr(n, nl, STRP("href")) \|\|
	+ isattr(n, nl, STRP("url"))))
	+ string_clear(&tmpstr); /* use the last value for multiple attr…
	+}
	+
	+static void
	+xmldata(XMLParser p, const char s, size_t len)
	+{
	+ if (!ctx.field)
	+ return;
	+
	+ string_append(ctx.field, s, len);
	+}
	+
	+static void
	+xmldataentity(XMLParser p, const char data, size_t datalen)
	+{
	+ char buf[8];
	+ int len;
	+
	+ if (!ctx.field)
	+ return;
	+
	+ /* try to translate entity, else just pass as data to
	+ * xmldata handler. */
	+ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
	+ xmldata(p, buf, (size_t)len);
	+ else
	+ xmldata(p, data, datalen);
	+}
	+
	+static void
	+xmltagstart(XMLParser p, const char t, size_t tl)
	+{
	+ const FeedTag *f;
	+
	+ if (ISINCONTENT(ctx))
	+ return;
	+
	+ /* start of RSS or Atom item / entry */
	+ if (ctx.feedtype == FeedTypeNone) {
	+ if (istag(t, tl, STRP("entry")))
	+ ctx.feedtype = FeedTypeAtom;
	+ return;
	+ }
	+
	+ /* field tagid already set or nested tags. */
	+ if (ctx.tag.id) {
	+ /* nested <author><name> for Atom */
	+ if (ctx.tag.id == AtomTagAuthor &&
	+ istag(t, tl, STRP("name"))) {
	+ memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag)…
	+ } else {
	+ return; /* other nested tags are not allowed: return */
	+ }
	+ }
	+
	+ /* in item */
	+ if (ctx.tag.id == TagUnknown) {
	+ if (!(f = gettag(ctx.feedtype, t, tl)))
	+ f = &notag;
	+ memcpy(&(ctx.tag), f, sizeof(ctx.tag));
	+ }
	+
	+ ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
	+ string_clear(&attrrel);
	+}
	+
	+static void
	+xmltagstartparsed(XMLParser p, const char t, size_t tl, int isshort)
	+{
	+ enum TagId tagid;
	+
	+ if (ISINCONTENT(ctx))
	+ return;
	+
	+ /* set tag type based on its attribute value */
	+ if (ctx.tag.id == AtomTagLink) {
	+ /* empty or "alternate": other types could be
	+ "enclosure", "related", "self" or "via" */
	+ if (!attrrel.len \|\| isattr(attrrel.data, attrrel.len, STRP("al…
	+ ctx.tag.id = AtomTagLinkAlternate;
	+ else
	+ ctx.tag.id = AtomTagLink; /* unknown */
	+ }
	+
	+ tagid = ctx.tag.id;
	+
	+ /* map tag type to field: unknown or lesser priority is ignored,
	+ when tags of the same type are repeated only the first is used. */
	+ if (fieldmap[tagid] == -1 \|\|
	+ tagid <= ctx.fields[fieldmap[tagid]].tagid) {
	+ return;
	+ }
	+
	+ if (ctx.iscontenttag) {
	+ ctx.iscontent = 1;
	+ ctx.iscontenttag = 0;
	+ }
	+
	+ ctx.field = &(ctx.fields[fieldmap[tagid]].str);
	+ ctx.fields[fieldmap[tagid]].tagid = tagid;
	+
	+ /* clear field if it is overwritten (with a priority order) for the new
	+ value, if the field can have multiple values then do not clear it. …
	+ string_clear(ctx.field);
	+}
	+
	+static void
	+xmltagend(XMLParser p, const char t, size_t tl, int isshort)
	+{
	+ size_t i;
	+
	+ if (ctx.feedtype == FeedTypeNone)
	+ return;
	+
	+ if (ISINCONTENT(ctx)) {
	+ /* not a closed content field */
	+ if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
	+ return;
	+ } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
	+ /* matched tag end: close it */
	+ } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
	+ istag(t, tl, STRP("entry"))))) /* Atom */
	+ {
	+ /* end of Atom entry */
	+ printfields();
	+
	+ /* clear strings */
	+ for (i = 0; i < FeedFieldLast; i++) {
	+ string_clear(&ctx.fields[i].str);
	+ ctx.fields[i].tagid = TagUnknown;
	+ }
	+ /* allow parsing of Atom and RSS concatenated in one XML strea…
	+ ctx.feedtype = FeedTypeNone;
	+ } else {
	+ return; /* not end of field */
	+ }
	+
	+ /* temporary string: for fields that cannot be processed
	+ directly and need more context, for example by its tag
	+ attributes, like the Atom link rel="alternate\|enclosure". */
	+ if (tmpstr.len && ctx.field) {
	+ string_clear(ctx.field);
	+ string_append(ctx.field, tmpstr.data, tmpstr.len);
	+ }
	+
	+ /* close field */
	+ string_clear(&tmpstr); /* reuse and clear temporary string */
	+
	+ if (ctx.tag.id == AtomTagAuthorName)
	+ memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer …
	+ else
	+ memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
	+
	+ ctx.iscontent = 0;
	+ ctx.field = NULL;
	+}
	+
	+static char *
	+request_channel_feed(const char *channelid)
	+{
	+ char path[2048];
	+ int r;
	+
	+ r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", ch…
	+ /* check if request is too long (truncation) */
	+ if (r < 0 \|\| (size_t)r >= sizeof(path))
	+ return NULL;
	+
	+ return request("www.youtube.com", path, "");
	+}
	+
	+int
	+isvalidchannel(const char *s)
	+{
	+ size_t len;
	+
	+ for (len = 0; *s; s++, len++) {
	+ if (ISALPHA((unsigned char)*s) \|\|
	+ ISDIGIT((unsigned char)*s) \|\|
	+ s == '-' \|\| s == '_')
	+ continue;
	+ return 0;
	+ }
	+
	+ return *s == '\0' && len == 24;
	+}
	+
	+void
	+usage(void)
	+{
	+ if (cgimode) {
	+ fputs("Status: 400 Bad Request\r\n", stdout);
	+ fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdou…
	+ fputs("400 Bad Request\n", stdout);
	+ exit(0);
	+ } else {
	+ fputs("usage: feed <channelid> [atom\|json\|tsv]\n", stderr);
	+ exit(1);
	+ }
	+}
	+
	+int
	+main(int argc, char *argv[])
	+{
	+ char buf[256];
	+ const char *channelid = NULL;
	+ char data, format = "tsv", p, requesturi, *tmp;
	+ size_t i;
	+
	+ if (pledge("stdio dns inet rpath unveil", NULL) == -1)
	+ err(1, "pledge");
	+
	+ if ((tmp = getenv("REQUEST_URI"))) {
	+ cgimode = 1;
	+
	+ strlcpy(buf, tmp, sizeof(buf));
	+ requesturi = buf;
	+
	+ if (!(p = strrchr(requesturi, '/')))
	+ usage();
	+
	+ channelid = p + 1;
	+ if ((p = strrchr(channelid, '.'))) {
	+ p = '\0'; / NULL terminate */
	+ format = p + 1;
	+ }
	+ } else {
	+ if (argc <= 1)
	+ usage();
	+
	+ channelid = argv[1];
	+ if (argc > 2)
	+ format = argv[2];
	+ }
	+ if (!channelid \|\| !isvalidchannel(channelid))
	+ usage();
	+
	+ if (!strcmp(format, "atom") \|\| !strcmp(format, "xml"))
	+ printfields = atom_item;
	+ else if (!strcmp(format, "json"))
	+ printfields = json_item;
	+ else if (!strcmp(format, "tsv") \|\| !strcmp(format, "sfeed"))
	+ printfields = sfeed_item;
	+ else
	+ usage();
	+
	+ search_res = youtube_channel_videos(channelid);
	+ if (!search_res \|\| search_res->nitems == 0) {
	+ /* error or no videos found */
	+ return 0;
	+ }
	+
	+ if (!(data = request_channel_feed(channelid)))
	+ return 1; /* error, no data at all */
	+
	+ if (pledge("stdio", NULL) == -1)
	+ err(1, "pledge");
	+
	+ setxmldata(data, strlen(data));
	+
	+ memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
	+
	+ parser.xmlattr = xmlattr;
	+ parser.xmlattrentity = xmlattrentity;
	+ parser.xmlattrstart = xmlattrstart;
	+ parser.xmlcdata = xmldata;
	+ parser.xmldata = xmldata;
	+ parser.xmldataentity = xmldataentity;
	+ parser.xmltagend = xmltagend;
	+ parser.xmltagstart = xmltagstart;
	+ parser.xmltagstartparsed = xmltagstartparsed;
	+
	+ /* init all fields, make sure it has a value */
	+ for (i = 0; i < FeedFieldLast; i++) {
	+ string_append(&(ctx.fields[i].str), " ", 1);
	+ string_clear(&(ctx.fields[i].str));
	+ }
	+
	+ if (cgimode) {
	+ fputs("Status: 200 OK\r\n", stdout);
	+ if (!strcmp(format, "atom") \|\| !strcmp(format, "xml"))
	+ fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n",…
	+ else if (!strcmp(format, "json"))
	+ fputs("Content-Type: application/json; charset=utf-8\r…
	+ else
	+ fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n…
	+ }
	+
	+ if (!strcmp(format, "atom") \|\| !strcmp(format, "xml"))
	+ atom_header();
	+ else if (!strcmp(format, "json"))
	+ json_header();
	+
	+ /* NOTE: getnext is defined in xml.h for inline optimization */
	+ xml_parse(&parser);
	+
	+ if (!strcmp(format, "atom"))
	+ atom_footer();
	+ else if (!strcmp(format, "json"))
	+ json_footer();
	+
	+ return 0;
	+}