Add a cleaner for subtitles. - annna - Annna the nice friendly bot. | |
git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws6… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
--- | |
commit 30eb06f31d2a42e280ea01521f6baa1bca2fec33 | |
parent a622486a185d90ca0225311dbc9e88a9fbab5994 | |
Author: Annna Robert-Houdin <[email protected]> | |
Date: Sun, 10 May 2020 18:57:47 +0200 | |
Add a cleaner for subtitles. | |
Be careful, it is under Mafia Domain. | |
Thanks leot! | |
Diffstat: | |
M subtitle-paste | 3 +++ | |
A ytautosubcleaner.awk | 51 +++++++++++++++++++++++++++++… | |
2 files changed, 54 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/subtitle-paste b/subtitle-paste | |
@@ -22,6 +22,9 @@ if [ $(stat -c%s "${ofile}") -eq 0 ]; | |
then | |
rm "${ofile}" | |
else | |
+ # Make it more human readable. | |
+ awk -f /home/annna/bin/ytautosubcleaner.awk < "${ofile}" > "${ofile}.b… | |
+ mv "${ofile}.bak" "${ofile}" | |
printf "gopher://bitreich.org/0/p/%s\n" "${ofile}" | |
fi | |
diff --git a/ytautosubcleaner.awk b/ytautosubcleaner.awk | |
@@ -0,0 +1,51 @@ | |
+#!/usr/bin/awk -f | |
+ | |
+# This file is licensed under Mafia Domain. So be careful. | |
+ | |
+# | |
+# Make YouTube automatic subtitles more human readable | |
+# | |
+ | |
+/^[0-9]+:[0-9]+:[0-9]+\.[0-9]+ -->/ { | |
+ sub(/ align:start position:0%$/, "") | |
+ | |
+ if (caption && split(caption, lines, "\n") > 2) { | |
+ print caption | |
+ } | |
+ | |
+ ignore = 0 | |
+ caption = $0 | |
+ next | |
+} | |
+ | |
+ignore { | |
+ next | |
+} | |
+ | |
+# skip extra empty lines | |
+/^ +$/ { | |
+ next | |
+} | |
+ | |
+# current caption contains <c> garbage and will be repeated in the next | |
+# caption, ignore it | |
+caption && /><c>/ { | |
+ ignore = 1 | |
+ caption = "" | |
+ next | |
+} | |
+ | |
+caption { | |
+ caption = caption "\n" $0 | |
+ next | |
+} | |
+ | |
+{ | |
+} | |
+ | |
+END { | |
+ if (caption && split(caption, lines, "\n") > 2) { | |
+ print caption | |
+ } | |
+} |