GopherProxy

	youtube: add channel2tsv output - frontends - front-ends for some sites (experi…
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 2be30b4f834c64d4478e8cff231ee9b29601edc0
	parent 0ddeddd9e7acba6abe47ccaf8563b712cf96a037
	Author: Hiltjo Posthuma <[email protected]>
	Date: Sat, 11 Feb 2023 19:01:42 +0100

	youtube: add channel2tsv output

	* Make the parser a bit less strict so it can also parse the channel page
	with videos.
	* Add a function that can fetch the channel videos by channel ID.
	* Add a tool that outputs channel videos to a TAB-separated format.

	Diffstat:
	M Makefile \| 6 +++++-
	A youtube/channel2tsv.c \| 108 +++++++++++++++++++++++++++++…
	M youtube/youtube.c \| 93 +++++++++++++++++++++++------…
	M youtube/youtube.h \| 3 +++

	4 files changed, 187 insertions(+), 23 deletions(-)
	---
	diff --git a/Makefile b/Makefile
	@@ -25,6 +25,7 @@ BIN = \
	reddit/cli \
	reddit/gopher \
	youtube/cgi \
	+ youtube/channel2tsv \
	youtube/cli \
	youtube/gopher

	@@ -97,11 +98,14 @@ twitch/cgi: ${LIB} twitch/twitch.o twitch/cgi.o
	twitch/gopher: ${LIB} twitch/twitch.o twitch/gopher.o
	${CC} -o $@ twitch/gopher.o twitch/twitch.o ${LIB} ${LDFLAGS} ${LIBTLS…

	-youtube: youtube/cgi youtube/cli youtube/gopher
	+youtube: youtube/cgi youtube/channel2tsv youtube/cli youtube/gopher

	youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o
	${CC} -o $@ youtube/cgi.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS…

	+youtube/channel2tsv: ${LIB} youtube/youtube.o youtube/channel2tsv.o
	+ ${CC} -o $@ youtube/channel2tsv.o youtube/youtube.o ${LIB} ${LDFLAGS} …
	+
	youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o
	${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS…

	diff --git a/youtube/channel2tsv.c b/youtube/channel2tsv.c
	@@ -0,0 +1,108 @@
	+#include <sys/socket.h>
	+#include <sys/types.h>
	+
	+#include <ctype.h>
	+#include <errno.h>
	+#include <netdb.h>
	+#include <stdarg.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+#include <unistd.h>
	+
	+#include "https.h"
	+#include "util.h"
	+#include "youtube.h"
	+
	+#define OUT(s) fputs((s), stdout)
	+#define OUTESCAPE(s) printescape((s))
	+
	+/* print: ignore control-characters */
	+void
	+printescape(const char *s)
	+{
	+ for (; *s; ++s)
	+ if (!iscntrl((unsigned char)*s))
	+ fputc(*s, stdout);
	+}
	+
	+int
	+render(struct search_response *r)
	+{
	+ struct item *videos = r->items;
	+ size_t i;
	+
	+ if (pledge("stdio", NULL) == -1) {
	+ fprintf(stderr, "pledge: %s\n", strerror(errno));
	+ exit(1);
	+ }
	+
	+ for (i = 0; i < r->nitems; i++) {
	+ switch (videos[i].linktype) {
	+ case Channel:
	+ case Movie:
	+ case Playlist:
	+ continue;
	+ default:
	+ break;
	+ }
	+
	+ OUTESCAPE(videos[i].id);
	+ OUT("\t");
	+ if (videos[i].id[0]) {
	+ OUT("https://www.youtube.com/embed/");
	+ OUTESCAPE(videos[i].id);
	+ }
	+ OUT("\t");
	+ OUTESCAPE(videos[i].title);
	+ OUT("\t");
	+ OUTESCAPE(videos[i].publishedat);
	+ OUT("\t");
	+ OUTESCAPE(videos[i].viewcount);
	+ OUT("\t");
	+ OUTESCAPE(videos[i].duration);
	+ OUT("\n");
	+ }
	+
	+ return 0;
	+}
	+
	+static void
	+usage(const char *argv0)
	+{
	+ fprintf(stderr, "usage: %s <channelid>\n", argv0);
	+ exit(1);
	+}
	+
	+int
	+main(int argc, char *argv[])
	+{
	+ struct search_response *r;
	+ char channelid[1024];
	+
	+ if (pledge("stdio dns inet rpath unveil", NULL) == -1) {
	+ fprintf(stderr, "pledge: %s\n", strerror(errno));
	+ exit(1);
	+ }
	+ if (unveil(TLS_CA_CERT_FILE, "r") == -1) {
	+ fprintf(stderr, "unveil: %s\n", strerror(errno));
	+ exit(1);
	+ }
	+ if (unveil(NULL, NULL) == -1) {
	+ fprintf(stderr, "unveil: %s\n", strerror(errno));
	+ exit(1);
	+ }
	+
	+ if (argc < 2 \|\| !argv[1][0])
	+ usage(argv[0]);
	+ if (!uriencode(argv[1], channelid, sizeof(channelid)))
	+ usage(argv[0]);
	+
	+ r = youtube_channel_videos(channelid);
	+ if (!r \|\| r->nitems == 0)
	+ exit(1);
	+
	+ render(r);
	+
	+ return 0;
	+}
	diff --git a/youtube/youtube.c b/youtube/youtube.c
	@@ -22,6 +22,20 @@ youtube_request(const char *path)
	}

	static char *
	+request_channel_videos(const char *channelid)
	+{
	+ char path[4096];
	+ int r;
	+
	+ r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid);
	+ /* check if request is too long (truncation) */
	+ if (r < 0 \|\| (size_t)r >= sizeof(path))
	+ return NULL;
	+
	+ return youtube_request(path);
	+}
	+
	+static char *
	request_search(const char s, const char page, const char *order)
	{
	char path[4096];
	@@ -90,11 +104,11 @@ processnode(struct json_node *nodes, size_t depth, const c…
	/* new item, structures can be very deep, just check the end for:
	(items\|contents)[].videoRenderer objects */
	if (depth >= 3 &&
	- nodes[depth - 3].type == JSON_TYPE_ARRAY &&
	- nodes[depth - 2].type == JSON_TYPE_OBJECT &&
	+// nodes[depth - 3].type == JSON_TYPE_ARRAY &&
	+// nodes[depth - 2].type == JSON_TYPE_OBJECT &&
	nodes[depth - 1].type == JSON_TYPE_OBJECT &&
	- (!strcmp(nodes[depth - 3].name, "items") \|\|
	- !strcmp(nodes[depth - 3].name, "contents")) &&
	+// (!strcmp(nodes[depth - 3].name, "items") \|\|
	+// !strcmp(nodes[depth - 3].name, "content")) &&
	!strcmp(nodes[depth - 1].name, "videoRenderer")) {
	r->nitems++;
	return;
	@@ -105,27 +119,28 @@ processnode(struct json_node *nodes, size_t depth, const …
	item = &(r->items[r->nitems - 1]);

	if (depth >= 4 &&
	- nodes[depth - 4].type == JSON_TYPE_ARRAY &&
	- nodes[depth - 3].type == JSON_TYPE_OBJECT &&
	- nodes[depth - 2].type == JSON_TYPE_OBJECT &&
	+// nodes[depth - 4].type == JSON_TYPE_ARRAY &&
	+// nodes[depth - 3].type == JSON_TYPE_OBJECT &&
	+// nodes[depth - 2].type == JSON_TYPE_OBJECT &&
	nodes[depth - 1].type == JSON_TYPE_STRING &&
	- (!strcmp(nodes[depth - 4].name, "items") \|\|
	- !strcmp(nodes[depth - 4].name, "contents")) &&
	+// (!strcmp(nodes[depth - 4].name, "items") \|\|
	+// !strcmp(nodes[depth - 4].name, "contents")) &&
	!strcmp(nodes[depth - 2].name, "videoRenderer") &&
	!strcmp(nodes[depth - 1].name, "videoId")) {
	strlcpy(item->id, value, sizeof(item->id));
	}

	if (depth >= 7 &&
	- nodes[depth - 7].type == JSON_TYPE_ARRAY &&
	- nodes[depth - 6].type == JSON_TYPE_OBJECT &&
	+// nodes[depth - 7].type == JSON_TYPE_ARRAY &&
	+// nodes[depth - 6].type == JSON_TYPE_OBJECT &&
	nodes[depth - 5].type == JSON_TYPE_OBJECT &&
	nodes[depth - 4].type == JSON_TYPE_OBJECT &&
	nodes[depth - 3].type == JSON_TYPE_ARRAY &&
	nodes[depth - 2].type == JSON_TYPE_OBJECT &&
	nodes[depth - 1].type == JSON_TYPE_STRING &&
	- (!strcmp(nodes[depth - 7].name, "items") \|\|
	- !strcmp(nodes[depth - 7].name, "contents")) &&
	+// (!strcmp(nodes[depth - 7].name, "items") \|\|
	+// !strcmp(nodes[depth - 7].name, "contents")) &&
	+
	!strcmp(nodes[depth - 5].name, "videoRenderer") &&
	!strcmp(nodes[depth - 4].name, "title") &&
	!strcmp(nodes[depth - 3].name, "runs") &&
	@@ -135,13 +150,13 @@ processnode(struct json_node *nodes, size_t depth, const …
	}

	if (depth >= 5 &&
	- nodes[depth - 5].type == JSON_TYPE_ARRAY &&
	+// nodes[depth - 5].type == JSON_TYPE_ARRAY &&
	nodes[depth - 4].type == JSON_TYPE_OBJECT &&
	nodes[depth - 3].type == JSON_TYPE_OBJECT &&
	nodes[depth - 2].type == JSON_TYPE_OBJECT &&
	nodes[depth - 1].type == JSON_TYPE_STRING &&
	- (!strcmp(nodes[depth - 5].name, "items") \|\|
	- !strcmp(nodes[depth - 5].name, "contents")) &&
	+// (!strcmp(nodes[depth - 5].name, "items") \|\|
	+// !strcmp(nodes[depth - 5].name, "contents")) &&
	!strcmp(nodes[depth - 3].name, "videoRenderer") &&
	!strcmp(nodes[depth - 1].name, "simpleText")) {
	if (!strcmp(nodes[depth - 2].name, "viewCountText") &&
	@@ -157,7 +172,7 @@ processnode(struct json_node *nodes, size_t depth, const ch…
	}

	if (depth >= 9 &&
	- nodes[depth - 9].type == JSON_TYPE_ARRAY &&
	+// nodes[depth - 9].type == JSON_TYPE_ARRAY &&
	nodes[depth - 8].type == JSON_TYPE_OBJECT &&
	nodes[depth - 7].type == JSON_TYPE_OBJECT &&
	nodes[depth - 6].type == JSON_TYPE_OBJECT &&
	@@ -166,8 +181,8 @@ processnode(struct json_node *nodes, size_t depth, const ch…
	nodes[depth - 3].type == JSON_TYPE_OBJECT &&
	nodes[depth - 2].type == JSON_TYPE_OBJECT &&
	nodes[depth - 1].type == JSON_TYPE_STRING &&
	- (!strcmp(nodes[depth - 9].name, "items") \|\|
	- !strcmp(nodes[depth - 9].name, "contents")) &&
	+// (!strcmp(nodes[depth - 9].name, "items") \|\|
	+// !strcmp(nodes[depth - 9].name, "contents")) &&
	!strcmp(nodes[depth - 7].name, "videoRenderer") &&
	!strcmp(nodes[depth - 6].name, "longBylineText") &&
	!strcmp(nodes[depth - 5].name, "runs") &&
	@@ -179,15 +194,15 @@ processnode(struct json_node *nodes, size_t depth, const …
	}

	if (depth >= 7 &&
	- nodes[depth - 7].type == JSON_TYPE_ARRAY &&
	+// nodes[depth - 7].type == JSON_TYPE_ARRAY &&
	nodes[depth - 6].type == JSON_TYPE_OBJECT &&
	nodes[depth - 5].type == JSON_TYPE_OBJECT &&
	nodes[depth - 4].type == JSON_TYPE_OBJECT &&
	nodes[depth - 3].type == JSON_TYPE_ARRAY &&
	nodes[depth - 2].type == JSON_TYPE_OBJECT &&
	nodes[depth - 1].type == JSON_TYPE_STRING &&
	- (!strcmp(nodes[depth - 7].name, "items") \|\|
	- !strcmp(nodes[depth - 7].name, "contents")) &&
	+// (!strcmp(nodes[depth - 7].name, "items") \|\|
	+// !strcmp(nodes[depth - 7].name, "contents")) &&
	!strcmp(nodes[depth - 5].name, "videoRenderer") &&
	!strcmp(nodes[depth - 4].name, "longBylineText") &&
	!strcmp(nodes[depth - 3].name, "runs")) {
	@@ -231,3 +246,37 @@ youtube_search(const char rawsearch, const char page, co…

	return r;
	}
	+
	+struct search_response *
	+youtube_channel_videos(const char *channelid)
	+{
	+ struct search_response *r;
	+ char data, s, start, end;
	+ int ret;
	+
	+ if (!(data = request_channel_videos(channelid)))
	+ return NULL;
	+
	+ if (!(s = strstr(data, "\r\n\r\n")))
	+ return NULL; /* invalid response */
	+ /* skip header */
	+ s += strlen("\r\n\r\n");
	+
	+ if (!(r = calloc(1, sizeof(*r))))
	+ return NULL;
	+
	+ if (extractjson(s, &start, &end) == -1) {
	+ fprintf(stderr, "error extracting JSON");
	+ free(r);
	+ return NULL;
	+ }
	+
	+ ret = parsejson(start, end - start, processnode, r);
	+ if (ret < 0) {
	+// fprintf(stderr, "error parsing JSON");
	+ free(r);
	+ return NULL;
	+ }
	+
	+ return r;
	+}
	diff --git a/youtube/youtube.h b/youtube/youtube.h
	@@ -19,3 +19,6 @@ struct search_response {

	struct search_response *
	youtube_search(const char rawsearch, const char page, const char *order);
	+
	+struct search_response *
	+youtube_channel_videos(const char *channelid);