Merge pull request #2642 from return42/fix-apkmirror

[fix] APKMirror engine - update xpath selectors and fix img_src
2024-01-01 19:24:07 +01:00 · 2021-03-11 09:48:31 +01:00 · 2021-03-11 09:48:31 +01:00 · af3e969c5a
commit af3e969c5a
parent 8b650e6a2d 96422e5c9f
2 changed files with 23 additions and 20 deletions
--- a/1
+++ b/1
@ -196,6 +196,7 @@ PYLINT_FILES=\
 	searx/engines/mediathekviewweb.py \
 	searx/engines/google_scholar.py \
 	searx/engines/yahoo_news.py \
+	searx/engines/apkmirror.py \
 	searx_extra/update/update_external_bangs.py

 test.pylint: pyenvinstall
--- a/searx/engines/apkmirror.py
+++ b/searx/engines/apkmirror.py
@ -1,13 +1,21 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+"""APKMirror
 """
- APK Mirror
-"""
+
+# pylint: disable=invalid-name, missing-function-docstring

 from urllib.parse import urlencode
 from lxml import html
-from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex

-# about
+from searx import logger
+from searx.utils import (
+    eval_xpath_list,
+    eval_xpath_getindex,
+    extract_text,
+)
+
+logger = logger.getChild('APKMirror engine')
+
 about = {
    "website": 'https://www.apkmirror.com',
    "wikidata_id": None,
@ -18,11 +26,8 @@ about = {
 }

 # engine dependent config
-categories = ['it']
+categories = ['files']
 paging = True
-
-# I am not 100% certain about this, as apkmirror appears to be a wordpress site,
-# which might support time_range searching. If you want to implement it, go ahead.
 time_range_support = False

 # search-url
@ -30,37 +35,34 @@ base_url = 'https://www.apkmirror.com'
 search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}'


-# do search-request
 def request(query, params):
-
-    params['url'] = search_url.format(pageno=params['pageno'],
-                                      query=urlencode({'s': query}))
+    params['url'] = search_url.format(
+        pageno = params['pageno'],
+        query = urlencode({'s': query}),
+    )
+    logger.debug("query_url --> %s", params['url'])
    return params


-# get response from search-request
 def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    # parse results
-    for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]//div[@class="appRow"]'):
+    for result in eval_xpath_list(dom, "//div[@id='content']//div[@class='listWidget']/div/div[@class='appRow']"):

        link = eval_xpath_getindex(result, './/h5/a', 0)
+
        url = base_url + link.attrib.get('href') + '#downloads'
        title = extract_text(link)
-        thumbnail_src = base_url\
-            + eval_xpath_getindex(result, './/img', 0).attrib.get('src').replace('&w=32&h=32', '&w=64&h=64')
-
+        img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
        res = {
            'url': url,
            'title': title,
-            'thumbnail_src': thumbnail_src
+            'img_src': img_src
        }

-        # append result
        results.append(res)

-    # return results
    return results