ytautosubcleaner.awk - annna - Annna the nice friendly bot. | |
git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws6… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
--- | |
ytautosubcleaner.awk (657B) | |
--- | |
1 #!/usr/bin/awk -f | |
2 | |
3 # This file is licensed under Mafia Domain. So be careful. | |
4 | |
5 # | |
6 # Make YouTube automatic subtitles more human readable | |
7 # | |
8 | |
9 /^[0-9]+:[0-9]+:[0-9]+\.[0-9]+ -->/ { | |
10 sub(/ align:start.*$/, "") | |
11 | |
12 if (caption && split(caption, lines, "\n") > 2) { | |
13 print caption | |
14 } | |
15 | |
16 ignore = 0 | |
17 caption = $0 | |
18 next | |
19 } | |
20 | |
21 ignore { | |
22 next | |
23 } | |
24 | |
25 # skip extra empty lines | |
26 /^ +$/ { | |
27 next | |
28 } | |
29 | |
30 # current caption contains <c> garbage and will be repeated in the next | |
31 # caption, ignore it | |
32 caption && /><c>/ { | |
33 ignore = 1 | |
34 caption = "" | |
35 next | |
36 } | |
37 | |
38 caption { | |
39 caption = caption "\n" $0 | |
40 next | |
41 } | |
42 | |
43 { | |
44 print | |
45 } | |
46 | |
47 END { | |
48 if (caption && split(caption, lines, "\n") > 2) { | |
49 print caption | |
50 } | |
51 } |