[fix] update 1x engine

This commit is contained in:
Adam Tauber 2019-10-16 13:27:05 +02:00
parent c98a2df36d
commit 6ca1622378

View File

@ -11,8 +11,8 @@
""" """
from lxml import html from lxml import html
import re
from searx.url_utils import urlencode, urljoin from searx.url_utils import urlencode, urljoin
from searx.engines.xpath import extract_text
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
@ -34,41 +34,18 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
# get links from result-text dom = html.fromstring(resp.text)
regex = re.compile('(</a>|<a)') for res in dom.xpath('//div[@class="List-item MainListing"]'):
results_parts = re.split(regex, resp.text)
cur_element = ''
# iterate over link parts
for result_part in results_parts:
# processed start and end of link # processed start and end of link
if result_part == '<a': link = res.xpath('//a')[0]
cur_element = result_part
continue
elif result_part != '</a>':
cur_element += result_part
continue
cur_element += result_part
# fix xml-error
cur_element = cur_element.replace('"></a>', '"/></a>')
dom = html.fromstring(cur_element)
link = dom.xpath('//a')[0]
url = urljoin(base_url, link.attrib.get('href')) url = urljoin(base_url, link.attrib.get('href'))
title = link.attrib.get('title', '') title = extract_text(link)
thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src']) thumbnail_src = urljoin(base_url, res.xpath('.//img')[0].attrib['src'])
# TODO: get image with higher resolution # TODO: get image with higher resolution
img_src = thumbnail_src img_src = thumbnail_src
# check if url is showing to a photo
if '/photo/' not in url:
continue
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,