Do not bail on unicode updown. - annna - Annna the nice friendly bot. | |
git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws6… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
--- | |
commit 4ffd92dd7f0d31c1861358af130ae196cf2eebc8 | |
parent 1f6ef5cb4d6daf7e972a972de7b2ef208e2612e6 | |
Author: Annna Robert-Houdin <[email protected]> | |
Date: Mon, 14 Oct 2024 21:10:41 +0200 | |
Do not bail on unicode updown. | |
Diffstat: | |
M updown | 115 ++++++++++++++++-------------… | |
1 file changed, 59 insertions(+), 56 deletions(-) | |
--- | |
diff --git a/updown b/updown | |
@@ -11,60 +11,60 @@ import sys | |
import re | |
replacements = { | |
- u'A' : u'\N{FOR ALL}', | |
- u'B' : u'\N{GREEK SMALL LETTER XI}', | |
- u'C' : u'\N{ROMAN NUMERAL REVERSED ONE HUNDRED}', | |
- u'D' : u'\N{LEFT HALF BLACK CIRCLE}', | |
- u'E' : u'\N{LATIN CAPITAL LETTER REVERSED E}', | |
- u'F' : u'\N{TURNED CAPITAL F}', | |
- u'G' : u'\N{TURNED SANS-SERIF CAPITAL G}', | |
- u'J' : u'\N{LATIN SMALL LETTER LONG S}', | |
- u'K' : u'\N{RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT}', | |
- u'L' : u'\ua780', | |
- u'M' : u'W', | |
- u'N' : u'\N{LATIN LETTER SMALL CAPITAL REVERSED N}', | |
- u'P' : u'\N{CYRILLIC CAPITAL LETTER KOMI DE}', | |
- u'Q' : u'\N{GREEK CAPITAL LETTER OMICRON WITH TONOS}', | |
- u'R' : u'\N{LATIN LETTER SMALL CAPITAL TURNED R}', | |
- u'T' : u'\N{UP TACK}', | |
- u'U' : u'\N{INTERSECTION}', | |
- u'V' : u'\u0245', | |
- u'Y' : u'\N{TURNED SANS-SERIF CAPITAL Y}', | |
- 'a' : u"\u0250", | |
- 'b' : u'q', | |
- 'c' : u"\u0254", | |
- 'd' : u'p', | |
- 'e' : u"\u01DD", | |
- 'f' : u"\u025F", | |
- 'g' : u"\u0183", | |
- 'h' : u'\u0265', | |
- 'i' : u'\u0131', | |
- 'j' : u'\u027E', | |
- 'k' : u'\u029E', | |
- 'm' : u'\u026F', | |
- 'n' : u'u', | |
- 'r' : u'\u0279', | |
- 't' : u'\u0287', | |
- 'p' : u'd', | |
- 'u' : u'n', | |
- 'q' : u'b', | |
- 'v' : u'\u028C', | |
- 'w' : u'\u028D', | |
- 'y' : u'\u028E', | |
- '.' : u'\u02D9', | |
- '[' : u']', | |
- '(' : u')', | |
- '{' : u'}', | |
- '?' : u'\u00BF', | |
- '!' : u'\u00A1', | |
- "\'" :u',', | |
- '>' : u'<', | |
- '<' : u'>', | |
- '_' : u'\u203E', | |
- ';' : u'\u061B', | |
- '\u203F' : u'\u2040', | |
- '\u2045' : u'\u2046', | |
- '\u2234' : u'\u2235', | |
+ u'A' : u'\N{FOR ALL}', | |
+ u'B' : u'\N{GREEK SMALL LETTER XI}', | |
+ u'C' : u'\N{ROMAN NUMERAL REVERSED ONE HUNDRED}', | |
+ u'D' : u'\N{LEFT HALF BLACK CIRCLE}', | |
+ u'E' : u'\N{LATIN CAPITAL LETTER REVERSED E}', | |
+ u'F' : u'\N{TURNED CAPITAL F}', | |
+ u'G' : u'\N{TURNED SANS-SERIF CAPITAL G}', | |
+ u'J' : u'\N{LATIN SMALL LETTER LONG S}', | |
+ u'K' : u'\N{RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT}', | |
+ u'L' : u'\ua780', | |
+ u'M' : u'W', | |
+ u'N' : u'\N{LATIN LETTER SMALL CAPITAL REVERSED N}', | |
+ u'P' : u'\N{CYRILLIC CAPITAL LETTER KOMI DE}', | |
+ u'Q' : u'\N{GREEK CAPITAL LETTER OMICRON WITH TONOS}', | |
+ u'R' : u'\N{LATIN LETTER SMALL CAPITAL TURNED R}', | |
+ u'T' : u'\N{UP TACK}', | |
+ u'U' : u'\N{INTERSECTION}', | |
+ u'V' : u'\u0245', | |
+ u'Y' : u'\N{TURNED SANS-SERIF CAPITAL Y}', | |
+ 'a' : u"\u0250", | |
+ 'b' : u'q', | |
+ 'c' : u"\u0254", | |
+ 'd' : u'p', | |
+ 'e' : u"\u01DD", | |
+ 'f' : u"\u025F", | |
+ 'g' : u"\u0183", | |
+ 'h' : u'\u0265', | |
+ 'i' : u'\u0131', | |
+ 'j' : u'\u027E', | |
+ 'k' : u'\u029E', | |
+ 'm' : u'\u026F', | |
+ 'n' : u'u', | |
+ 'r' : u'\u0279', | |
+ 't' : u'\u0287', | |
+ 'p' : u'd', | |
+ 'u' : u'n', | |
+ 'q' : u'b', | |
+ 'v' : u'\u028C', | |
+ 'w' : u'\u028D', | |
+ 'y' : u'\u028E', | |
+ '.' : u'\u02D9', | |
+ '[' : u']', | |
+ '(' : u')', | |
+ '{' : u'}', | |
+ '?' : u'\u00BF', | |
+ '!' : u'\u00A1', | |
+ "\'" :u',', | |
+ '>' : u'<', | |
+ '<' : u'>', | |
+ '_' : u'\u203E', | |
+ ';' : u'\u061B', | |
+ '\u203F' : u'\u2040', | |
+ '\u2045' : u'\u2046', | |
+ '\u2234' : u'\u2235', | |
} | |
def main(args): | |
@@ -79,8 +79,11 @@ def main(args): | |
except: | |
istr += i | |
- istr = istr.decode("utf-8").encode("utf-8") | |
- print istr | |
+ try: | |
+ istr = istr.encode("utf-8") | |
+ except UnicodeDecodeError: | |
+ istr = istr.decode("utf-8").encode("utf-8") | |
+ print(istr) | |
if __name__ == "__main__": |