Introduction
Introduction Statistics Contact Development Disclaimer Help
Normalize unicode - toot - Unnamed repository; edit this file 'description' to …
Log
Files
Refs
LICENSE
---
commit 2ecc6a28c6b1cd2efd4bd94d801954e87ab1b320
parent cb1f7b4e61e66ceecf91fe286ac9f44166ef3b25
Author: Ivan Habunek <[email protected]>
Date: Sun, 21 Jan 2018 16:39:40 +0100
Normalize unicode
Diffstat:
toot/utils.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
---
diff --git a/toot/utils.py b/toot/utils.py
@@ -2,6 +2,7 @@
import re
import socket
+import unicodedata
from bs4 import BeautifulSoup
@@ -10,7 +11,9 @@ from toot.exceptions import ConsoleError
def get_text(html):
"""Converts html to text, strips all tags."""
- return BeautifulSoup(html, "html.parser").get_text().replace('&apos;', "'")
+ text = BeautifulSoup(html, "html.parser").get_text().replace('&apos;', "'")
+
+ return unicodedata.normalize('NFKC', text)
def parse_html(html):
You are viewing proxied material from vernunftzentrum.de. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.