Merge pull request #613 from return42/pylint-bing-images

[pylint] Bing (Images) engine
This commit is contained in:
Alexandre Flament 2022-01-02 22:00:55 +01:00 committed by GitHub
commit d83aa2b0d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 27 additions and 28 deletions

View File

@ -1,11 +1,13 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
""" # lint: pylint
Bing (Images) """Bing (Images)
""" """
from urllib.parse import urlencode
from lxml import html
from json import loads from json import loads
from urllib.parse import urlencode
from lxml import html
from searx.utils import match_language from searx.utils import match_language
from searx.engines.bing import language_aliases from searx.engines.bing import language_aliases
@ -77,31 +79,28 @@ def response(resp):
# parse results # parse results
for result in dom.xpath('//div[@class="imgpt"]'): for result in dom.xpath('//div[@class="imgpt"]'):
try: img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0] # Microsoft seems to experiment with this code so don't make the path too specific,
# Microsoft seems to experiment with this code so don't make the path too specific, # just catch the text section for the first anchor in img_info assuming this to be
# just catch the text section for the first anchor in img_info assuming this to be # the originating site.
# the originating site. source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
m = loads(result.xpath('./a/@m')[0]) m = loads(result.xpath('./a/@m')[0])
# strip 'Unicode private use area' highlighting, they render to Tux # strip 'Unicode private use area' highlighting, they render to Tux
# the Linux penguin and a standing diamond on my machine... # the Linux penguin and a standing diamond on my machine...
title = m.get('t', '').replace('\ue000', '').replace('\ue001', '') title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
results.append( results.append(
{ {
'template': 'images.html', 'template': 'images.html',
'url': m['purl'], 'url': m['purl'],
'thumbnail_src': m['turl'], 'thumbnail_src': m['turl'],
'img_src': m['murl'], 'img_src': m['murl'],
'content': '', 'content': '',
'title': title, 'title': title,
'source': source, 'source': source,
'img_format': img_format, 'img_format': img_format,
} }
) )
except:
continue
return results return results