Introduction
Introduction Statistics Contact Development Disclaimer Help
youtube: add channel2tsv output - frontends - front-ends for some sites (experi…
Log
Files
Refs
README
LICENSE
---
commit 2be30b4f834c64d4478e8cff231ee9b29601edc0
parent 0ddeddd9e7acba6abe47ccaf8563b712cf96a037
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 11 Feb 2023 19:01:42 +0100
youtube: add channel2tsv output
* Make the parser a bit less strict so it can also parse the channel page
with videos.
* Add a function that can fetch the channel videos by channel ID.
* Add a tool that outputs channel videos to a TAB-separated format.
Diffstat:
M Makefile | 6 +++++-
A youtube/channel2tsv.c | 108 +++++++++++++++++++++++++++++…
M youtube/youtube.c | 93 +++++++++++++++++++++++------…
M youtube/youtube.h | 3 +++
4 files changed, 187 insertions(+), 23 deletions(-)
---
diff --git a/Makefile b/Makefile
@@ -25,6 +25,7 @@ BIN = \
reddit/cli \
reddit/gopher \
youtube/cgi \
+ youtube/channel2tsv \
youtube/cli \
youtube/gopher
@@ -97,11 +98,14 @@ twitch/cgi: ${LIB} twitch/twitch.o twitch/cgi.o
twitch/gopher: ${LIB} twitch/twitch.o twitch/gopher.o
${CC} -o $@ twitch/gopher.o twitch/twitch.o ${LIB} ${LDFLAGS} ${LIBTLS…
-youtube: youtube/cgi youtube/cli youtube/gopher
+youtube: youtube/cgi youtube/channel2tsv youtube/cli youtube/gopher
youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o
${CC} -o $@ youtube/cgi.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS…
+youtube/channel2tsv: ${LIB} youtube/youtube.o youtube/channel2tsv.o
+ ${CC} -o $@ youtube/channel2tsv.o youtube/youtube.o ${LIB} ${LDFLAGS} …
+
youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o
${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS…
diff --git a/youtube/channel2tsv.c b/youtube/channel2tsv.c
@@ -0,0 +1,108 @@
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "https.h"
+#include "util.h"
+#include "youtube.h"
+
+#define OUT(s) fputs((s), stdout)
+#define OUTESCAPE(s) printescape((s))
+
+/* print: ignore control-characters */
+void
+printescape(const char *s)
+{
+ for (; *s; ++s)
+ if (!iscntrl((unsigned char)*s))
+ fputc(*s, stdout);
+}
+
+int
+render(struct search_response *r)
+{
+ struct item *videos = r->items;
+ size_t i;
+
+ if (pledge("stdio", NULL) == -1) {
+ fprintf(stderr, "pledge: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < r->nitems; i++) {
+ switch (videos[i].linktype) {
+ case Channel:
+ case Movie:
+ case Playlist:
+ continue;
+ default:
+ break;
+ }
+
+ OUTESCAPE(videos[i].id);
+ OUT("\t");
+ if (videos[i].id[0]) {
+ OUT("https://www.youtube.com/embed/");
+ OUTESCAPE(videos[i].id);
+ }
+ OUT("\t");
+ OUTESCAPE(videos[i].title);
+ OUT("\t");
+ OUTESCAPE(videos[i].publishedat);
+ OUT("\t");
+ OUTESCAPE(videos[i].viewcount);
+ OUT("\t");
+ OUTESCAPE(videos[i].duration);
+ OUT("\n");
+ }
+
+ return 0;
+}
+
+static void
+usage(const char *argv0)
+{
+ fprintf(stderr, "usage: %s <channelid>\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct search_response *r;
+ char channelid[1024];
+
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1) {
+ fprintf(stderr, "pledge: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (unveil(TLS_CA_CERT_FILE, "r") == -1) {
+ fprintf(stderr, "unveil: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (unveil(NULL, NULL) == -1) {
+ fprintf(stderr, "unveil: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ if (argc < 2 || !argv[1][0])
+ usage(argv[0]);
+ if (!uriencode(argv[1], channelid, sizeof(channelid)))
+ usage(argv[0]);
+
+ r = youtube_channel_videos(channelid);
+ if (!r || r->nitems == 0)
+ exit(1);
+
+ render(r);
+
+ return 0;
+}
diff --git a/youtube/youtube.c b/youtube/youtube.c
@@ -22,6 +22,20 @@ youtube_request(const char *path)
}
static char *
+request_channel_videos(const char *channelid)
+{
+ char path[4096];
+ int r;
+
+ r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid);
+ /* check if request is too long (truncation) */
+ if (r < 0 || (size_t)r >= sizeof(path))
+ return NULL;
+
+ return youtube_request(path);
+}
+
+static char *
request_search(const char *s, const char *page, const char *order)
{
char path[4096];
@@ -90,11 +104,11 @@ processnode(struct json_node *nodes, size_t depth, const c…
/* new item, structures can be very deep, just check the end for:
(items|contents)[].videoRenderer objects */
if (depth >= 3 &&
- nodes[depth - 3].type == JSON_TYPE_ARRAY &&
- nodes[depth - 2].type == JSON_TYPE_OBJECT &&
+// nodes[depth - 3].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_OBJECT &&
- (!strcmp(nodes[depth - 3].name, "items") ||
- !strcmp(nodes[depth - 3].name, "contents")) &&
+// (!strcmp(nodes[depth - 3].name, "items") ||
+// !strcmp(nodes[depth - 3].name, "content")) &&
!strcmp(nodes[depth - 1].name, "videoRenderer")) {
r->nitems++;
return;
@@ -105,27 +119,28 @@ processnode(struct json_node *nodes, size_t depth, const …
item = &(r->items[r->nitems - 1]);
if (depth >= 4 &&
- nodes[depth - 4].type == JSON_TYPE_ARRAY &&
- nodes[depth - 3].type == JSON_TYPE_OBJECT &&
- nodes[depth - 2].type == JSON_TYPE_OBJECT &&
+// nodes[depth - 4].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 3].type == JSON_TYPE_OBJECT &&
+// nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 4].name, "items") ||
- !strcmp(nodes[depth - 4].name, "contents")) &&
+// (!strcmp(nodes[depth - 4].name, "items") ||
+// !strcmp(nodes[depth - 4].name, "contents")) &&
!strcmp(nodes[depth - 2].name, "videoRenderer") &&
!strcmp(nodes[depth - 1].name, "videoId")) {
strlcpy(item->id, value, sizeof(item->id));
}
if (depth >= 7 &&
- nodes[depth - 7].type == JSON_TYPE_ARRAY &&
- nodes[depth - 6].type == JSON_TYPE_OBJECT &&
+// nodes[depth - 7].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 6].type == JSON_TYPE_OBJECT &&
nodes[depth - 5].type == JSON_TYPE_OBJECT &&
nodes[depth - 4].type == JSON_TYPE_OBJECT &&
nodes[depth - 3].type == JSON_TYPE_ARRAY &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 7].name, "items") ||
- !strcmp(nodes[depth - 7].name, "contents")) &&
+// (!strcmp(nodes[depth - 7].name, "items") ||
+// !strcmp(nodes[depth - 7].name, "contents")) &&
+
!strcmp(nodes[depth - 5].name, "videoRenderer") &&
!strcmp(nodes[depth - 4].name, "title") &&
!strcmp(nodes[depth - 3].name, "runs") &&
@@ -135,13 +150,13 @@ processnode(struct json_node *nodes, size_t depth, const …
}
if (depth >= 5 &&
- nodes[depth - 5].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 5].type == JSON_TYPE_ARRAY &&
nodes[depth - 4].type == JSON_TYPE_OBJECT &&
nodes[depth - 3].type == JSON_TYPE_OBJECT &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 5].name, "items") ||
- !strcmp(nodes[depth - 5].name, "contents")) &&
+// (!strcmp(nodes[depth - 5].name, "items") ||
+// !strcmp(nodes[depth - 5].name, "contents")) &&
!strcmp(nodes[depth - 3].name, "videoRenderer") &&
!strcmp(nodes[depth - 1].name, "simpleText")) {
if (!strcmp(nodes[depth - 2].name, "viewCountText") &&
@@ -157,7 +172,7 @@ processnode(struct json_node *nodes, size_t depth, const ch…
}
if (depth >= 9 &&
- nodes[depth - 9].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 9].type == JSON_TYPE_ARRAY &&
nodes[depth - 8].type == JSON_TYPE_OBJECT &&
nodes[depth - 7].type == JSON_TYPE_OBJECT &&
nodes[depth - 6].type == JSON_TYPE_OBJECT &&
@@ -166,8 +181,8 @@ processnode(struct json_node *nodes, size_t depth, const ch…
nodes[depth - 3].type == JSON_TYPE_OBJECT &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 9].name, "items") ||
- !strcmp(nodes[depth - 9].name, "contents")) &&
+// (!strcmp(nodes[depth - 9].name, "items") ||
+// !strcmp(nodes[depth - 9].name, "contents")) &&
!strcmp(nodes[depth - 7].name, "videoRenderer") &&
!strcmp(nodes[depth - 6].name, "longBylineText") &&
!strcmp(nodes[depth - 5].name, "runs") &&
@@ -179,15 +194,15 @@ processnode(struct json_node *nodes, size_t depth, const …
}
if (depth >= 7 &&
- nodes[depth - 7].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 7].type == JSON_TYPE_ARRAY &&
nodes[depth - 6].type == JSON_TYPE_OBJECT &&
nodes[depth - 5].type == JSON_TYPE_OBJECT &&
nodes[depth - 4].type == JSON_TYPE_OBJECT &&
nodes[depth - 3].type == JSON_TYPE_ARRAY &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 7].name, "items") ||
- !strcmp(nodes[depth - 7].name, "contents")) &&
+// (!strcmp(nodes[depth - 7].name, "items") ||
+// !strcmp(nodes[depth - 7].name, "contents")) &&
!strcmp(nodes[depth - 5].name, "videoRenderer") &&
!strcmp(nodes[depth - 4].name, "longBylineText") &&
!strcmp(nodes[depth - 3].name, "runs")) {
@@ -231,3 +246,37 @@ youtube_search(const char *rawsearch, const char *page, co…
return r;
}
+
+struct search_response *
+youtube_channel_videos(const char *channelid)
+{
+ struct search_response *r;
+ char *data, *s, *start, *end;
+ int ret;
+
+ if (!(data = request_channel_videos(channelid)))
+ return NULL;
+
+ if (!(s = strstr(data, "\r\n\r\n")))
+ return NULL; /* invalid response */
+ /* skip header */
+ s += strlen("\r\n\r\n");
+
+ if (!(r = calloc(1, sizeof(*r))))
+ return NULL;
+
+ if (extractjson(s, &start, &end) == -1) {
+ fprintf(stderr, "error extracting JSON");
+ free(r);
+ return NULL;
+ }
+
+ ret = parsejson(start, end - start, processnode, r);
+ if (ret < 0) {
+// fprintf(stderr, "error parsing JSON");
+ free(r);
+ return NULL;
+ }
+
+ return r;
+}
diff --git a/youtube/youtube.h b/youtube/youtube.h
@@ -19,3 +19,6 @@ struct search_response {
struct search_response *
youtube_search(const char *rawsearch, const char *page, const char *order);
+
+struct search_response *
+youtube_channel_videos(const char *channelid);
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.