youtube: fix JSON extraction - frontends - front-ends for some sites (experimen… | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 416869b0c0f2efa0f43c93a59c6d9a89c01d9aec | |
parent 5dbcb6f3c9ab9a48446054e954147b652fb26407 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sat, 17 Oct 2020 18:38:35 +0200 | |
youtube: fix JSON extraction | |
Youtube does not always serve the same content. This was a recent Youtube | |
change to intentionally break scrapers again. | |
Diffstat: | |
M youtube/youtube.c | 17 +++++++++++++---- | |
1 file changed, 13 insertions(+), 4 deletions(-) | |
--- | |
diff --git a/youtube/youtube.c b/youtube/youtube.c | |
@@ -55,12 +55,21 @@ request_search(const char *s, const char *page, const char … | |
int | |
extractjson(const char *s, char **start, char **end) | |
{ | |
- if (!(*start = strstr(s, "window[\"ytInitialData\"] = "))) | |
+ *start = strstr(s, "window[\"ytInitialData\"] = "); | |
+ if (*start) { | |
+ (*start) += sizeof("window[\"ytInitialData\"] = ") - 1; | |
+ } else { | |
+ *start = strstr(s, "var ytInitialData = "); | |
+ if (*start) | |
+ (*start) += sizeof("var ytInitialData = ") - 1; | |
+ } | |
+ if (!*start) | |
return -1; | |
- if (!(*end = strstr(*start, "};\n"))) | |
+ *end = strstr(*start, "};\n"); | |
+ if (!*end) | |
+ *end = strstr(*start, "}; \n"); | |
+ if (!*end) | |
return -1; | |
- | |
- (*start) += sizeof("window[\"ytInitialData\"] = ") - 1; | |
(*end)++; | |
return 0; |