Normalize unicode - toot - Unnamed repository; edit this file 'description' to … | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 2ecc6a28c6b1cd2efd4bd94d801954e87ab1b320 | |
parent cb1f7b4e61e66ceecf91fe286ac9f44166ef3b25 | |
Author: Ivan Habunek <[email protected]> | |
Date: Sun, 21 Jan 2018 16:39:40 +0100 | |
Normalize unicode | |
Diffstat: | |
toot/utils.py | 5 ++++- | |
1 file changed, 4 insertions(+), 1 deletion(-) | |
--- | |
diff --git a/toot/utils.py b/toot/utils.py | |
@@ -2,6 +2,7 @@ | |
import re | |
import socket | |
+import unicodedata | |
from bs4 import BeautifulSoup | |
@@ -10,7 +11,9 @@ from toot.exceptions import ConsoleError | |
def get_text(html): | |
"""Converts html to text, strips all tags.""" | |
- return BeautifulSoup(html, "html.parser").get_text().replace(''', "'") | |
+ text = BeautifulSoup(html, "html.parser").get_text().replace(''', "'") | |
+ | |
+ return unicodedata.normalize('NFKC', text) | |
def parse_html(html): |