mirror of
https://github.com/searxng/searxng
synced 2024-01-01 18:24:07 +00:00
Replace every bunch of whitespaces with only one space in HTML text
This commit is contained in:
parent
a3d444ab85
commit
52a57ee045
@ -119,6 +119,8 @@ class HTMLTextExtractor(HTMLParser):
|
||||
|
||||
|
||||
def html_to_text(html):
|
||||
html = html.replace('\n', ' ')
|
||||
html = ' '.join(html.split())
|
||||
s = HTMLTextExtractor()
|
||||
s.feed(html)
|
||||
return s.get_text()
|
||||
|
Loading…
Reference in New Issue
Block a user