Fix title parsing for imdb. - annna - Annna the nice friendly bot. | |
git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws6… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
--- | |
commit 0925485876164dc16cd6dd149ec600ba690b117a | |
parent 81fd3449a3013908f44bfad761da51b99430101a | |
Author: Annna Robert-Houdin <[email protected]> | |
Date: Sun, 5 Dec 2021 19:56:33 +0100 | |
Fix title parsing for imdb. | |
Thanks Bob! | |
Diffstat: | |
M imdb2gopherbay | 23 +++++++++++++++++------ | |
1 file changed, 17 insertions(+), 6 deletions(-) | |
--- | |
diff --git a/imdb2gopherbay b/imdb2gopherbay | |
@@ -6,16 +6,27 @@ then | |
exit 1 | |
fi | |
+extractjson() { | |
+awk ' | |
+/<script id="__NEXT_DATA__"/ { | |
+ match($0, "<script id=\"__NEXT_DATA__\"[^>]*>"); | |
+ s = substr($0, RSTART + RLENGTH);OB | |
+ match(s, "</script>"); | |
+ s = substr(s, 1, RSTART - 1); | |
+ print s; | |
+}' | |
+} | |
+ | |
imdburi="$1" | |
title="$(curl -s "${imdburi}" \ | |
- | xml2tsv 2>/dev/null \ | |
- | grep __NEXT_ \ | |
- | cut -f 4- \ | |
- | sed 's,\\\\,\\,g' \ | |
- | jshon -e head -e 9 -e 1 -e children -u \ | |
- | sed 's, - IMDb,,')" | |
+ | extractjson \ | |
+ | json2tsv \ | |
+ | grep associatedTitle.originalTitleText.text \ | |
+ | head -n 1 \ | |
+ | cut -f 3)" | |
[ -z "${title}" ] && exit 1 | |
printf "%s\n" "${title}" | |
+ |