[fix] Python 3.9: use html.unescape instead of HTMLParser.unescape

This commit is contained in:
Alexandre Flament 2020-12-17 09:57:03 +01:00
parent 6402fb0b9a
commit eb33ae6893

View File

@ -12,10 +12,10 @@
# @todo embedded (needs some md5 from video page) # @todo embedded (needs some md5 from video page)
from json import loads from json import loads
from html import unescape
from urllib.parse import urlencode from urllib.parse import urlencode
from lxml import html from lxml import html
from dateutil import parser from dateutil import parser
from html.parser import HTMLParser
from searx.utils import extract_text from searx.utils import extract_text
@ -55,13 +55,12 @@ def response(resp):
if "content" not in response: if "content" not in response:
return [] return []
dom = html.fromstring(response["content"]) dom = html.fromstring(response["content"])
p = HTMLParser()
# parse results # parse results
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):
videoid = result.xpath(url_xpath)[0] videoid = result.xpath(url_xpath)[0]
url = base_url + videoid url = base_url + videoid
title = p.unescape(extract_text(result.xpath(title_xpath))) title = unescape(extract_text(result.xpath(title_xpath)))
try: try:
thumbnail = extract_text(result.xpath(thumbnail_xpath)[0]) thumbnail = extract_text(result.xpath(thumbnail_xpath)[0])
except: except: