Merge branch 'searxng:master' into eng-2

This commit is contained in:
liimee 2022-07-09 06:41:39 +00:00 committed by GitHub
commit cc190603b3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
168 changed files with 13153 additions and 5987 deletions

View file

@ -149,7 +149,11 @@ def set_loggers(engine, engine_name):
engine.logger = logger.getChild(engine_name)
# the engine may have load some other engines
# may sure the logger is initialized
for module_name, module in sys.modules.items():
# use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"
# see https://github.com/python/cpython/issues/89516
# and https://docs.python.org/3.10/library/sys.html#sys.modules
modules = sys.modules.copy()
for module_name, module in modules.items():
if (
module_name.startswith("searx.engines")
and module_name != "searx.engines.__init__"

View file

@ -0,0 +1,68 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Google Play Apps
"""
from urllib.parse import urlencode
from lxml import html
from searx.utils import (
eval_xpath,
extract_url,
extract_text,
eval_xpath_list,
eval_xpath_getindex,
)
about = {
"website": "https://play.google.com/",
"wikidata_id": "Q79576",
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
categories = ["files", "apps"]
search_url = "https://play.google.com/store/search?{query}&c=apps"
def request(query, params):
params["url"] = search_url.format(query=urlencode({"q": query}))
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
if eval_xpath(dom, '//div[@class="v6DsQb"]'):
return []
spot = eval_xpath_getindex(dom, '//div[@class="ipRz4"]', 0, None)
if spot is not None:
url = extract_url(eval_xpath(spot, './a[@class="Qfxief"]/@href'), search_url)
title = extract_text(eval_xpath(spot, './/div[@class="vWM94c"]'))
content = extract_text(eval_xpath(spot, './/div[@class="LbQbAe"]'))
img = extract_text(eval_xpath(spot, './/img[@class="T75of bzqKMd"]/@src'))
results.append({"url": url, "title": title, "content": content, "img_src": img})
more = eval_xpath_list(dom, '//c-wiz[@jsrenderer="RBsfwb"]//div[@role="listitem"]', min_len=1)
for result in more:
url = extract_url(eval_xpath(result, ".//a/@href"), search_url)
title = extract_text(eval_xpath(result, './/span[@class="DdYX5"]'))
content = extract_text(eval_xpath(result, './/span[@class="wMUdtb"]'))
img = extract_text(
eval_xpath(
result,
'.//img[@class="T75of stzEZd" or @class="T75of etjhNc Q8CSx "]/@src',
)
)
results.append({"url": url, "title": title, "content": content, "img_src": img})
for suggestion in eval_xpath_list(dom, '//c-wiz[@jsrenderer="qyd4Kb"]//div[@class="ULeU3b neq64b"]'):
results.append({"suggestion": extract_text(eval_xpath(suggestion, './/div[@class="Epkrse "]'))})
return results

68
searx/engines/lingva.py Normal file
View file

@ -0,0 +1,68 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Lingva (alternative Google Translate frontend)"""
from json import loads
about = {
"website": 'https://lingva.ml',
"wikidata_id": None,
"official_api_documentation": 'https://github.com/thedaviddelta/lingva-translate#public-apis',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
engine_type = 'online_dictionary'
categories = ['general']
url = "https://lingva.ml"
search_url = "{url}/api/v1/{from_lang}/{to_lang}/{query}"
def request(_query, params):
params['url'] = search_url.format(
url=url, from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query']
)
return params
def response(resp):
results = []
result = loads(resp.text)
info = result["info"]
from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1])
if "typo" in info:
results.append({"suggestion": from_to_prefix + info["typo"]})
if 'definitions' in info: # pylint: disable=too-many-nested-blocks
for definition in info['definitions']:
if 'list' in definition:
for item in definition['list']:
if 'synonyms' in item:
for synonym in item['synonyms']:
results.append({"suggestion": from_to_prefix + synonym})
infobox = ""
for translation in info["extraTranslations"]:
infobox += f"<b>{translation['type']}</b>"
for word in translation["list"]:
infobox += f"<dl><dt>{word['word']}</dt>"
for meaning in word["meanings"]:
infobox += f"<dd>{meaning}</dd>"
infobox += "</dl>"
results.append(
{
'infobox': result["translation"],
'content': infobox,
}
)
return results

View file

@ -29,6 +29,7 @@ about = {
# engine dependent config
categories = ['map']
paging = False
language_support = True
# search-url
base_url = 'https://nominatim.openstreetmap.org/'
@ -141,6 +142,9 @@ def request(query, params):
params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
params['route'] = route_re.match(query)
params['headers']['User-Agent'] = searx_useragent()
accept_language = 'en' if params['language'] == 'all' else params['language']
params['headers']['Accept-Language'] = accept_language
return params
@ -202,7 +206,7 @@ def get_wikipedia_image(raw_value):
return get_external_url('wikimedia_image', raw_value)
def fetch_wikidata(nominatim_json, user_langage):
def fetch_wikidata(nominatim_json, user_language):
"""Update nominatim_json using the result of an unique to wikidata
For result in nominatim_json:
@ -223,10 +227,10 @@ def fetch_wikidata(nominatim_json, user_langage):
wd_to_results.setdefault(wd_id, []).append(result)
if wikidata_ids:
user_langage = 'en' if user_langage == 'all' else user_langage
user_language = 'en' if user_language == 'all' else user_language.split('-')[0]
wikidata_ids_str = " ".join(wikidata_ids)
query = wikidata_image_sparql.replace('%WIKIDATA_IDS%', sparql_string_escape(wikidata_ids_str)).replace(
'%LANGUAGE%', sparql_string_escape(user_langage)
'%LANGUAGE%', sparql_string_escape(user_language)
)
wikidata_json = send_wikidata_query(query)
for wd_result in wikidata_json.get('results', {}).get('bindings', {}):
@ -241,7 +245,7 @@ def fetch_wikidata(nominatim_json, user_langage):
# overwrite wikipedia link
wikipedia_name = wd_result.get('wikipediaName', {}).get('value')
if wikipedia_name:
result['extratags']['wikipedia'] = user_langage + ':' + wikipedia_name
result['extratags']['wikipedia'] = user_language + ':' + wikipedia_name
# get website if not already defined
website = wd_result.get('website', {}).get('value')
if (

View file

@ -39,7 +39,7 @@ def init(engine_settings=None):
resp = http_get('https://z-lib.org', timeout=5.0)
if resp.ok:
dom = html.fromstring(resp.text)
base_url = "https:" + extract_text(
base_url = extract_text(
eval_xpath(dom, './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href')
)
logger.debug("using base_url: %s" % base_url)