Merge branch 'searxng:master' into eng-2

2024-01-01 19:24:07 +01:00 · 2022-07-09 06:41:39 +00:00 · 2022-07-09 06:41:39 +00:00 · cc190603b3
commit cc190603b3
parent 8e876ad9ff 7e695c6644
168 changed files with 13153 additions and 5987 deletions
--- a/searx/engines/init.py
+++ b/searx/engines/init.py
@ -149,7 +149,11 @@ def set_loggers(engine, engine_name):
    engine.logger = logger.getChild(engine_name)
    # the engine may have load some other engines
    # may sure the logger is initialized
-    for module_name, module in sys.modules.items():
+    # use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"
+    # see https://github.com/python/cpython/issues/89516
+    # and https://docs.python.org/3.10/library/sys.html#sys.modules
+    modules = sys.modules.copy()
+    for module_name, module in modules.items():
        if (
            module_name.startswith("searx.engines")
            and module_name != "searx.engines.__init__"
--- a/searx/engines/google_play_apps.py
+++ b/searx/engines/google_play_apps.py
@ -0,0 +1,68 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+  Google Play Apps
+"""
+
+from urllib.parse import urlencode
+from lxml import html
+from searx.utils import (
+    eval_xpath,
+    extract_url,
+    extract_text,
+    eval_xpath_list,
+    eval_xpath_getindex,
+)
+
+about = {
+    "website": "https://play.google.com/",
+    "wikidata_id": "Q79576",
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": "HTML",
+}
+
+categories = ["files", "apps"]
+search_url = "https://play.google.com/store/search?{query}&c=apps"
+
+
+def request(query, params):
+    params["url"] = search_url.format(query=urlencode({"q": query}))
+
+    return params
+
+
+def response(resp):
+    results = []
+
+    dom = html.fromstring(resp.text)
+
+    if eval_xpath(dom, '//div[@class="v6DsQb"]'):
+        return []
+
+    spot = eval_xpath_getindex(dom, '//div[@class="ipRz4"]', 0, None)
+    if spot is not None:
+        url = extract_url(eval_xpath(spot, './a[@class="Qfxief"]/@href'), search_url)
+        title = extract_text(eval_xpath(spot, './/div[@class="vWM94c"]'))
+        content = extract_text(eval_xpath(spot, './/div[@class="LbQbAe"]'))
+        img = extract_text(eval_xpath(spot, './/img[@class="T75of bzqKMd"]/@src'))
+
+        results.append({"url": url, "title": title, "content": content, "img_src": img})
+
+    more = eval_xpath_list(dom, '//c-wiz[@jsrenderer="RBsfwb"]//div[@role="listitem"]', min_len=1)
+    for result in more:
+        url = extract_url(eval_xpath(result, ".//a/@href"), search_url)
+        title = extract_text(eval_xpath(result, './/span[@class="DdYX5"]'))
+        content = extract_text(eval_xpath(result, './/span[@class="wMUdtb"]'))
+        img = extract_text(
+            eval_xpath(
+                result,
+                './/img[@class="T75of stzEZd" or @class="T75of etjhNc Q8CSx "]/@src',
+            )
+        )
+
+        results.append({"url": url, "title": title, "content": content, "img_src": img})
+
+    for suggestion in eval_xpath_list(dom, '//c-wiz[@jsrenderer="qyd4Kb"]//div[@class="ULeU3b neq64b"]'):
+        results.append({"suggestion": extract_text(eval_xpath(suggestion, './/div[@class="Epkrse "]'))})
+
+    return results
--- a/searx/engines/lingva.py
+++ b/searx/engines/lingva.py
@ -0,0 +1,68 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Lingva (alternative Google Translate frontend)"""
+
+from json import loads
+
+about = {
+    "website": 'https://lingva.ml',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://github.com/thedaviddelta/lingva-translate#public-apis',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+engine_type = 'online_dictionary'
+categories = ['general']
+
+url = "https://lingva.ml"
+search_url = "{url}/api/v1/{from_lang}/{to_lang}/{query}"
+
+
+def request(_query, params):
+    params['url'] = search_url.format(
+        url=url, from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query']
+    )
+    return params
+
+
+def response(resp):
+    results = []
+
+    result = loads(resp.text)
+    info = result["info"]
+    from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1])
+
+    if "typo" in info:
+        results.append({"suggestion": from_to_prefix + info["typo"]})
+
+    if 'definitions' in info:  # pylint: disable=too-many-nested-blocks
+        for definition in info['definitions']:
+            if 'list' in definition:
+                for item in definition['list']:
+                    if 'synonyms' in item:
+                        for synonym in item['synonyms']:
+                            results.append({"suggestion": from_to_prefix + synonym})
+
+    infobox = ""
+
+    for translation in info["extraTranslations"]:
+        infobox += f"<b>{translation['type']}</b>"
+
+        for word in translation["list"]:
+            infobox += f"<dl><dt>{word['word']}</dt>"
+
+            for meaning in word["meanings"]:
+                infobox += f"<dd>{meaning}</dd>"
+
+            infobox += "</dl>"
+
+    results.append(
+        {
+            'infobox': result["translation"],
+            'content': infobox,
+        }
+    )
+
+    return results
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@ -29,6 +29,7 @@ about = {
 # engine dependent config
 categories = ['map']
 paging = False
+language_support = True

 # search-url
 base_url = 'https://nominatim.openstreetmap.org/'
@ -141,6 +142,9 @@ def request(query, params):
    params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
    params['route'] = route_re.match(query)
    params['headers']['User-Agent'] = searx_useragent()
+
+    accept_language = 'en' if params['language'] == 'all' else params['language']
+    params['headers']['Accept-Language'] = accept_language
    return params


@ -202,7 +206,7 @@ def get_wikipedia_image(raw_value):
    return get_external_url('wikimedia_image', raw_value)


-def fetch_wikidata(nominatim_json, user_langage):
+def fetch_wikidata(nominatim_json, user_language):
    """Update nominatim_json using the result of an unique to wikidata

    For result in nominatim_json:
@ -223,10 +227,10 @@ def fetch_wikidata(nominatim_json, user_langage):
                wd_to_results.setdefault(wd_id, []).append(result)

    if wikidata_ids:
-        user_langage = 'en' if user_langage == 'all' else user_langage
+        user_language = 'en' if user_language == 'all' else user_language.split('-')[0]
        wikidata_ids_str = " ".join(wikidata_ids)
        query = wikidata_image_sparql.replace('%WIKIDATA_IDS%', sparql_string_escape(wikidata_ids_str)).replace(
-            '%LANGUAGE%', sparql_string_escape(user_langage)
+            '%LANGUAGE%', sparql_string_escape(user_language)
        )
        wikidata_json = send_wikidata_query(query)
        for wd_result in wikidata_json.get('results', {}).get('bindings', {}):
@ -241,7 +245,7 @@ def fetch_wikidata(nominatim_json, user_langage):
                # overwrite wikipedia link
                wikipedia_name = wd_result.get('wikipediaName', {}).get('value')
                if wikipedia_name:
-                    result['extratags']['wikipedia'] = user_langage + ':' + wikipedia_name
+                    result['extratags']['wikipedia'] = user_language + ':' + wikipedia_name
                # get website if not already defined
                website = wd_result.get('website', {}).get('value')
                if (
--- a/searx/engines/zlibrary.py
+++ b/searx/engines/zlibrary.py
@ -39,7 +39,7 @@ def init(engine_settings=None):
        resp = http_get('https://z-lib.org', timeout=5.0)
        if resp.ok:
            dom = html.fromstring(resp.text)
-            base_url = "https:" + extract_text(
+            base_url = extract_text(
                eval_xpath(dom, './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href')
            )
    logger.debug("using base_url: %s" % base_url)