youtube: add channel2tsv output - frontends - front-ends for some sites (experi… | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 2be30b4f834c64d4478e8cff231ee9b29601edc0 | |
parent 0ddeddd9e7acba6abe47ccaf8563b712cf96a037 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sat, 11 Feb 2023 19:01:42 +0100 | |
youtube: add channel2tsv output | |
* Make the parser a bit less strict so it can also parse the channel page | |
with videos. | |
* Add a function that can fetch the channel videos by channel ID. | |
* Add a tool that outputs channel videos to a TAB-separated format. | |
Diffstat: | |
M Makefile | 6 +++++- | |
A youtube/channel2tsv.c | 108 +++++++++++++++++++++++++++++… | |
M youtube/youtube.c | 93 +++++++++++++++++++++++------… | |
M youtube/youtube.h | 3 +++ | |
4 files changed, 187 insertions(+), 23 deletions(-) | |
--- | |
diff --git a/Makefile b/Makefile | |
@@ -25,6 +25,7 @@ BIN = \ | |
reddit/cli \ | |
reddit/gopher \ | |
youtube/cgi \ | |
+ youtube/channel2tsv \ | |
youtube/cli \ | |
youtube/gopher | |
@@ -97,11 +98,14 @@ twitch/cgi: ${LIB} twitch/twitch.o twitch/cgi.o | |
twitch/gopher: ${LIB} twitch/twitch.o twitch/gopher.o | |
${CC} -o $@ twitch/gopher.o twitch/twitch.o ${LIB} ${LDFLAGS} ${LIBTLS… | |
-youtube: youtube/cgi youtube/cli youtube/gopher | |
+youtube: youtube/cgi youtube/channel2tsv youtube/cli youtube/gopher | |
youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o | |
${CC} -o $@ youtube/cgi.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS… | |
+youtube/channel2tsv: ${LIB} youtube/youtube.o youtube/channel2tsv.o | |
+ ${CC} -o $@ youtube/channel2tsv.o youtube/youtube.o ${LIB} ${LDFLAGS} … | |
+ | |
youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o | |
${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS… | |
diff --git a/youtube/channel2tsv.c b/youtube/channel2tsv.c | |
@@ -0,0 +1,108 @@ | |
+#include <sys/socket.h> | |
+#include <sys/types.h> | |
+ | |
+#include <ctype.h> | |
+#include <errno.h> | |
+#include <netdb.h> | |
+#include <stdarg.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+#include <unistd.h> | |
+ | |
+#include "https.h" | |
+#include "util.h" | |
+#include "youtube.h" | |
+ | |
+#define OUT(s) fputs((s), stdout) | |
+#define OUTESCAPE(s) printescape((s)) | |
+ | |
+/* print: ignore control-characters */ | |
+void | |
+printescape(const char *s) | |
+{ | |
+ for (; *s; ++s) | |
+ if (!iscntrl((unsigned char)*s)) | |
+ fputc(*s, stdout); | |
+} | |
+ | |
+int | |
+render(struct search_response *r) | |
+{ | |
+ struct item *videos = r->items; | |
+ size_t i; | |
+ | |
+ if (pledge("stdio", NULL) == -1) { | |
+ fprintf(stderr, "pledge: %s\n", strerror(errno)); | |
+ exit(1); | |
+ } | |
+ | |
+ for (i = 0; i < r->nitems; i++) { | |
+ switch (videos[i].linktype) { | |
+ case Channel: | |
+ case Movie: | |
+ case Playlist: | |
+ continue; | |
+ default: | |
+ break; | |
+ } | |
+ | |
+ OUTESCAPE(videos[i].id); | |
+ OUT("\t"); | |
+ if (videos[i].id[0]) { | |
+ OUT("https://www.youtube.com/embed/"); | |
+ OUTESCAPE(videos[i].id); | |
+ } | |
+ OUT("\t"); | |
+ OUTESCAPE(videos[i].title); | |
+ OUT("\t"); | |
+ OUTESCAPE(videos[i].publishedat); | |
+ OUT("\t"); | |
+ OUTESCAPE(videos[i].viewcount); | |
+ OUT("\t"); | |
+ OUTESCAPE(videos[i].duration); | |
+ OUT("\n"); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+static void | |
+usage(const char *argv0) | |
+{ | |
+ fprintf(stderr, "usage: %s <channelid>\n", argv0); | |
+ exit(1); | |
+} | |
+ | |
+int | |
+main(int argc, char *argv[]) | |
+{ | |
+ struct search_response *r; | |
+ char channelid[1024]; | |
+ | |
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1) { | |
+ fprintf(stderr, "pledge: %s\n", strerror(errno)); | |
+ exit(1); | |
+ } | |
+ if (unveil(TLS_CA_CERT_FILE, "r") == -1) { | |
+ fprintf(stderr, "unveil: %s\n", strerror(errno)); | |
+ exit(1); | |
+ } | |
+ if (unveil(NULL, NULL) == -1) { | |
+ fprintf(stderr, "unveil: %s\n", strerror(errno)); | |
+ exit(1); | |
+ } | |
+ | |
+ if (argc < 2 || !argv[1][0]) | |
+ usage(argv[0]); | |
+ if (!uriencode(argv[1], channelid, sizeof(channelid))) | |
+ usage(argv[0]); | |
+ | |
+ r = youtube_channel_videos(channelid); | |
+ if (!r || r->nitems == 0) | |
+ exit(1); | |
+ | |
+ render(r); | |
+ | |
+ return 0; | |
+} | |
diff --git a/youtube/youtube.c b/youtube/youtube.c | |
@@ -22,6 +22,20 @@ youtube_request(const char *path) | |
} | |
static char * | |
+request_channel_videos(const char *channelid) | |
+{ | |
+ char path[4096]; | |
+ int r; | |
+ | |
+ r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid); | |
+ /* check if request is too long (truncation) */ | |
+ if (r < 0 || (size_t)r >= sizeof(path)) | |
+ return NULL; | |
+ | |
+ return youtube_request(path); | |
+} | |
+ | |
+static char * | |
request_search(const char *s, const char *page, const char *order) | |
{ | |
char path[4096]; | |
@@ -90,11 +104,11 @@ processnode(struct json_node *nodes, size_t depth, const c… | |
/* new item, structures can be very deep, just check the end for: | |
(items|contents)[].videoRenderer objects */ | |
if (depth >= 3 && | |
- nodes[depth - 3].type == JSON_TYPE_ARRAY && | |
- nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
+// nodes[depth - 3].type == JSON_TYPE_ARRAY && | |
+// nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
nodes[depth - 1].type == JSON_TYPE_OBJECT && | |
- (!strcmp(nodes[depth - 3].name, "items") || | |
- !strcmp(nodes[depth - 3].name, "contents")) && | |
+// (!strcmp(nodes[depth - 3].name, "items") || | |
+// !strcmp(nodes[depth - 3].name, "content")) && | |
!strcmp(nodes[depth - 1].name, "videoRenderer")) { | |
r->nitems++; | |
return; | |
@@ -105,27 +119,28 @@ processnode(struct json_node *nodes, size_t depth, const … | |
item = &(r->items[r->nitems - 1]); | |
if (depth >= 4 && | |
- nodes[depth - 4].type == JSON_TYPE_ARRAY && | |
- nodes[depth - 3].type == JSON_TYPE_OBJECT && | |
- nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
+// nodes[depth - 4].type == JSON_TYPE_ARRAY && | |
+// nodes[depth - 3].type == JSON_TYPE_OBJECT && | |
+// nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
nodes[depth - 1].type == JSON_TYPE_STRING && | |
- (!strcmp(nodes[depth - 4].name, "items") || | |
- !strcmp(nodes[depth - 4].name, "contents")) && | |
+// (!strcmp(nodes[depth - 4].name, "items") || | |
+// !strcmp(nodes[depth - 4].name, "contents")) && | |
!strcmp(nodes[depth - 2].name, "videoRenderer") && | |
!strcmp(nodes[depth - 1].name, "videoId")) { | |
strlcpy(item->id, value, sizeof(item->id)); | |
} | |
if (depth >= 7 && | |
- nodes[depth - 7].type == JSON_TYPE_ARRAY && | |
- nodes[depth - 6].type == JSON_TYPE_OBJECT && | |
+// nodes[depth - 7].type == JSON_TYPE_ARRAY && | |
+// nodes[depth - 6].type == JSON_TYPE_OBJECT && | |
nodes[depth - 5].type == JSON_TYPE_OBJECT && | |
nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
nodes[depth - 3].type == JSON_TYPE_ARRAY && | |
nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
nodes[depth - 1].type == JSON_TYPE_STRING && | |
- (!strcmp(nodes[depth - 7].name, "items") || | |
- !strcmp(nodes[depth - 7].name, "contents")) && | |
+// (!strcmp(nodes[depth - 7].name, "items") || | |
+// !strcmp(nodes[depth - 7].name, "contents")) && | |
+ | |
!strcmp(nodes[depth - 5].name, "videoRenderer") && | |
!strcmp(nodes[depth - 4].name, "title") && | |
!strcmp(nodes[depth - 3].name, "runs") && | |
@@ -135,13 +150,13 @@ processnode(struct json_node *nodes, size_t depth, const … | |
} | |
if (depth >= 5 && | |
- nodes[depth - 5].type == JSON_TYPE_ARRAY && | |
+// nodes[depth - 5].type == JSON_TYPE_ARRAY && | |
nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
nodes[depth - 3].type == JSON_TYPE_OBJECT && | |
nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
nodes[depth - 1].type == JSON_TYPE_STRING && | |
- (!strcmp(nodes[depth - 5].name, "items") || | |
- !strcmp(nodes[depth - 5].name, "contents")) && | |
+// (!strcmp(nodes[depth - 5].name, "items") || | |
+// !strcmp(nodes[depth - 5].name, "contents")) && | |
!strcmp(nodes[depth - 3].name, "videoRenderer") && | |
!strcmp(nodes[depth - 1].name, "simpleText")) { | |
if (!strcmp(nodes[depth - 2].name, "viewCountText") && | |
@@ -157,7 +172,7 @@ processnode(struct json_node *nodes, size_t depth, const ch… | |
} | |
if (depth >= 9 && | |
- nodes[depth - 9].type == JSON_TYPE_ARRAY && | |
+// nodes[depth - 9].type == JSON_TYPE_ARRAY && | |
nodes[depth - 8].type == JSON_TYPE_OBJECT && | |
nodes[depth - 7].type == JSON_TYPE_OBJECT && | |
nodes[depth - 6].type == JSON_TYPE_OBJECT && | |
@@ -166,8 +181,8 @@ processnode(struct json_node *nodes, size_t depth, const ch… | |
nodes[depth - 3].type == JSON_TYPE_OBJECT && | |
nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
nodes[depth - 1].type == JSON_TYPE_STRING && | |
- (!strcmp(nodes[depth - 9].name, "items") || | |
- !strcmp(nodes[depth - 9].name, "contents")) && | |
+// (!strcmp(nodes[depth - 9].name, "items") || | |
+// !strcmp(nodes[depth - 9].name, "contents")) && | |
!strcmp(nodes[depth - 7].name, "videoRenderer") && | |
!strcmp(nodes[depth - 6].name, "longBylineText") && | |
!strcmp(nodes[depth - 5].name, "runs") && | |
@@ -179,15 +194,15 @@ processnode(struct json_node *nodes, size_t depth, const … | |
} | |
if (depth >= 7 && | |
- nodes[depth - 7].type == JSON_TYPE_ARRAY && | |
+// nodes[depth - 7].type == JSON_TYPE_ARRAY && | |
nodes[depth - 6].type == JSON_TYPE_OBJECT && | |
nodes[depth - 5].type == JSON_TYPE_OBJECT && | |
nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
nodes[depth - 3].type == JSON_TYPE_ARRAY && | |
nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
nodes[depth - 1].type == JSON_TYPE_STRING && | |
- (!strcmp(nodes[depth - 7].name, "items") || | |
- !strcmp(nodes[depth - 7].name, "contents")) && | |
+// (!strcmp(nodes[depth - 7].name, "items") || | |
+// !strcmp(nodes[depth - 7].name, "contents")) && | |
!strcmp(nodes[depth - 5].name, "videoRenderer") && | |
!strcmp(nodes[depth - 4].name, "longBylineText") && | |
!strcmp(nodes[depth - 3].name, "runs")) { | |
@@ -231,3 +246,37 @@ youtube_search(const char *rawsearch, const char *page, co… | |
return r; | |
} | |
+ | |
+struct search_response * | |
+youtube_channel_videos(const char *channelid) | |
+{ | |
+ struct search_response *r; | |
+ char *data, *s, *start, *end; | |
+ int ret; | |
+ | |
+ if (!(data = request_channel_videos(channelid))) | |
+ return NULL; | |
+ | |
+ if (!(s = strstr(data, "\r\n\r\n"))) | |
+ return NULL; /* invalid response */ | |
+ /* skip header */ | |
+ s += strlen("\r\n\r\n"); | |
+ | |
+ if (!(r = calloc(1, sizeof(*r)))) | |
+ return NULL; | |
+ | |
+ if (extractjson(s, &start, &end) == -1) { | |
+ fprintf(stderr, "error extracting JSON"); | |
+ free(r); | |
+ return NULL; | |
+ } | |
+ | |
+ ret = parsejson(start, end - start, processnode, r); | |
+ if (ret < 0) { | |
+// fprintf(stderr, "error parsing JSON"); | |
+ free(r); | |
+ return NULL; | |
+ } | |
+ | |
+ return r; | |
+} | |
diff --git a/youtube/youtube.h b/youtube/youtube.h | |
@@ -19,3 +19,6 @@ struct search_response { | |
struct search_response * | |
youtube_search(const char *rawsearch, const char *page, const char *order); | |
+ | |
+struct search_response * | |
+youtube_channel_videos(const char *channelid); |