From 96422e5c9f056e233d53d819ec38ed7bfad5dc83 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 9 Mar 2021 08:34:57 +0100 Subject: [PATCH] [fix] APKMirror engine - update xpath selectors and fix img_src BTW: make the code slightly more readable Signed-off-by: Markus Heiser --- Makefile | 1 + searx/engines/apkmirror.py | 42 ++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index a01bbe687..4fedcd143 100644 --- a/Makefile +++ b/Makefile @@ -197,6 +197,7 @@ PYLINT_FILES=\ searx/engines/mediathekviewweb.py \ searx/engines/google_scholar.py \ searx/engines/yahoo_news.py \ + searx/engines/apkmirror.py \ searx_extra/update/update_external_bangs.py test.pylint: pyenvinstall diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index a9ddd711a..05a635883 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -1,13 +1,21 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +"""APKMirror """ - APK Mirror -""" + +# pylint: disable=invalid-name, missing-function-docstring from urllib.parse import urlencode from lxml import html -from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex -# about +from searx import logger +from searx.utils import ( + eval_xpath_list, + eval_xpath_getindex, + extract_text, +) + +logger = logger.getChild('APKMirror engine') + about = { "website": 'https://www.apkmirror.com', "wikidata_id": None, @@ -18,11 +26,8 @@ about = { } # engine dependent config -categories = ['it'] +categories = ['files'] paging = True - -# I am not 100% certain about this, as apkmirror appears to be a wordpress site, -# which might support time_range searching. If you want to implement it, go ahead. time_range_support = False # search-url @@ -30,37 +35,34 @@ base_url = 'https://www.apkmirror.com' search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}' -# do search-request def request(query, params): - - params['url'] = search_url.format(pageno=params['pageno'], - query=urlencode({'s': query})) + params['url'] = search_url.format( + pageno = params['pageno'], + query = urlencode({'s': query}), + ) + logger.debug("query_url --> %s", params['url']) return params -# get response from search-request def response(resp): results = [] dom = html.fromstring(resp.text) # parse results - for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]//div[@class="appRow"]'): + for result in eval_xpath_list(dom, "//div[@id='content']//div[@class='listWidget']/div/div[@class='appRow']"): link = eval_xpath_getindex(result, './/h5/a', 0) + url = base_url + link.attrib.get('href') + '#downloads' title = extract_text(link) - thumbnail_src = base_url\ - + eval_xpath_getindex(result, './/img', 0).attrib.get('src').replace('&w=32&h=32', '&w=64&h=64') - + img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0) res = { 'url': url, 'title': title, - 'thumbnail_src': thumbnail_src + 'img_src': img_src } - # append result results.append(res) - # return results return results