This commit is contained in:
Markus Heiser 2025-01-14 14:24:08 +01:00 committed by GitHub
commit 1d62b7fad3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
142 changed files with 3863 additions and 2070 deletions

View file

@ -139,6 +139,7 @@ from searx.utils import (
get_embeded_stream_url,
)
from searx.enginelib.traits import EngineTraits
from searx.result_types import Answer
if TYPE_CHECKING:
import logging
@ -274,10 +275,14 @@ def _parse_search(resp):
result_list = []
dom = html.fromstring(resp.text)
# I doubt that Brave is still providing the "answer" class / I haven't seen
# answers in brave for a long time.
answer_tag = eval_xpath_getindex(dom, '//div[@class="answer"]', 0, default=None)
if answer_tag:
url = eval_xpath_getindex(dom, '//div[@id="featured_snippet"]/a[@class="result-header"]/@href', 0, default=None)
result_list.append({'answer': extract_text(answer_tag), 'url': url})
answer = extract_text(answer_tag)
if answer is not None:
Answer(results=result_list, answer=answer, url=url)
# xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'
xpath_results = '//div[contains(@class, "snippet ")]'

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Deepl translation engine"""
from json import loads
from searx.result_types import Translations
about = {
"website": 'https://deepl.com',
@ -41,16 +41,13 @@ def request(_query, params):
def response(resp):
results = []
result = loads(resp.text)
translations = result['translations']
infobox = "<dl>"
result = resp.json()
for translation in translations:
infobox += f"<dd>{translation['text']}</dd>"
if not result.get('translations'):
return results
infobox += "</dl>"
results.append({'answer': infobox})
translations = [Translations.Item(text=t['text']) for t in result['translations']]
Translations(results=results, translations=translations)
return results

View file

@ -3,9 +3,12 @@
Dictzone
"""
from urllib.parse import urljoin
import urllib.parse
from lxml import html
from searx.utils import eval_xpath
from searx.utils import eval_xpath, extract_text
from searx.result_types import Translations
from searx.network import get as http_get # https://github.com/searxng/searxng/issues/762
# about
about = {
@ -19,42 +22,83 @@ about = {
engine_type = 'online_dictionary'
categories = ['general', 'translate']
url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
base_url = "https://dictzone.com"
weight = 100
results_xpath = './/table[@id="r"]/tr'
https_support = True
def request(query, params): # pylint: disable=unused-argument
params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query'])
from_lang = params["from_lang"][2] # "english"
to_lang = params["to_lang"][2] # "german"
query = params["query"]
params["url"] = f"{base_url}/{from_lang}-{to_lang}-dictionary/{urllib.parse.quote_plus(query)}"
return params
def _clean_up_node(node):
for x in ["./i", "./span", "./button"]:
for n in node.xpath(x):
n.getparent().remove(n)
def response(resp):
results = []
item_list = []
if not resp.ok:
return results
dom = html.fromstring(resp.text)
for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
try:
from_result, to_results_raw = eval_xpath(result, './td')
except: # pylint: disable=bare-except
for result in eval_xpath(dom, ".//table[@id='r']//tr"):
# each row is an Translations.Item
td_list = result.xpath("./td")
if len(td_list) != 2:
# ignore header columns "tr/th"
continue
to_results = []
for to_result in eval_xpath(to_results_raw, './p/a'):
t = to_result.text_content()
if t.strip():
to_results.append(to_result.text_content())
col_from, col_to = td_list
_clean_up_node(col_from)
results.append(
{
'url': urljoin(str(resp.url), '?%d' % k),
'title': from_result.text_content(),
'content': '; '.join(to_results),
}
)
text = f"{extract_text(col_from)}"
synonyms = []
p_list = col_to.xpath(".//p")
for i, p_item in enumerate(p_list):
smpl: str = extract_text(p_list[i].xpath("./i[@class='smpl']")) # type: ignore
_clean_up_node(p_item)
p_text: str = extract_text(p_item) # type: ignore
if smpl:
p_text += " // " + smpl
if i == 0:
text += f" : {p_text}"
continue
synonyms.append(p_text)
item = Translations.Item(text=text, synonyms=synonyms)
item_list.append(item)
# the "autotranslate" of dictzone is loaded by the JS from URL:
# https://dictzone.com/trans/hello%20world/en_de
from_lang = resp.search_params["from_lang"][1] # "en"
to_lang = resp.search_params["to_lang"][1] # "de"
query = resp.search_params["query"]
# works only sometimes?
autotranslate = http_get(f"{base_url}/trans/{query}/{from_lang}_{to_lang}", timeout=1.0)
if autotranslate.ok and autotranslate.text:
item_list.insert(0, Translations.Item(text=autotranslate.text))
Translations(results=results, translations=item_list, url=resp.search_params["url"])
return results

View file

@ -27,6 +27,7 @@ from searx.network import get # see https://github.com/searxng/searxng/issues/7
from searx import redisdb
from searx.enginelib.traits import EngineTraits
from searx.exceptions import SearxEngineCaptchaException
from searx.result_types import Answer
if TYPE_CHECKING:
import logging
@ -398,12 +399,7 @@ def response(resp):
):
current_query = resp.search_params["data"].get("q")
results.append(
{
'answer': zero_click,
'url': "https://duckduckgo.com/?" + urlencode({"q": current_query}),
}
)
Answer(results=results, answer=zero_click, url="https://duckduckgo.com/?" + urlencode({"q": current_query}))
return results

View file

@ -21,6 +21,7 @@ from lxml import html
from searx.data import WIKIDATA_UNITS
from searx.utils import extract_text, html_to_text, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
from searx.result_types import Answer
if TYPE_CHECKING:
import logging
@ -99,9 +100,10 @@ def response(resp):
# add answer if there is one
answer = search_res.get('Answer', '')
if answer:
logger.debug('AnswerType="%s" Answer="%s"', search_res.get('AnswerType'), answer)
if search_res.get('AnswerType') not in ['calc', 'ip']:
results.append({'answer': html_to_text(answer), 'url': search_res.get('AbstractURL', '')})
answer_type = search_res.get('AnswerType')
logger.debug('AnswerType="%s" Answer="%s"', answer_type, answer)
if isinstance(answer, str) and answer_type not in ['calc', 'ip']:
Answer(results=results, answer=html_to_text(answer), url=search_res.get('AbstractURL', ''))
# add infobox
if 'Definition' in search_res:

View file

@ -25,6 +25,7 @@ from searx.locales import language_tag, region_tag, get_official_locales
from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.exceptions import SearxEngineCaptchaException
from searx.enginelib.traits import EngineTraits
from searx.result_types import Answer
if TYPE_CHECKING:
import logging
@ -331,12 +332,7 @@ def response(resp):
for item in answer_list:
for bubble in eval_xpath(item, './/div[@class="nnFGuf"]'):
bubble.drop_tree()
results.append(
{
'answer': extract_text(item),
'url': (eval_xpath(item, '../..//a/@href') + [None])[0],
}
)
Answer(results=results, answer=extract_text(item), url=(eval_xpath(item, '../..//a/@href') + [None])[0])
# parse results

View file

@ -2,7 +2,8 @@
"""LibreTranslate (Free and Open Source Machine Translation API)"""
import random
from json import dumps
import json
from searx.result_types import Translations
about = {
"website": 'https://libretranslate.com',
@ -16,19 +17,27 @@ about = {
engine_type = 'online_dictionary'
categories = ['general', 'translate']
base_url = "https://translate.terraprint.co"
api_key = ''
base_url = "https://libretranslate.com/translate"
api_key = ""
def request(_query, params):
request_url = random.choice(base_url) if isinstance(base_url, list) else base_url
if request_url.startswith("https://libretranslate.com") and not api_key:
return None
params['url'] = f"{request_url}/translate"
args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query']}
args = {
'q': params['query'],
'source': params['from_lang'][1],
'target': params['to_lang'][1],
'alternatives': 3,
}
if api_key:
args['api_key'] = api_key
params['data'] = dumps(args)
params['data'] = json.dumps(args)
params['method'] = 'POST'
params['headers'] = {'Content-Type': 'application/json'}
params['req_url'] = request_url
@ -41,13 +50,10 @@ def response(resp):
json_resp = resp.json()
text = json_resp.get('translatedText')
if not text:
return results
from_lang = resp.search_params["from_lang"][1]
to_lang = resp.search_params["to_lang"][1]
query = resp.search_params["query"]
req_url = resp.search_params["req_url"]
if text:
results.append({"answer": text, "url": f"{req_url}/?source={from_lang}&target={to_lang}&q={query}"})
item = Translations.Item(text=text, examples=json_resp.get('alternatives', []))
Translations(results=results, translations=[item])
return results

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Lingva (alternative Google Translate frontend)"""
from json import loads
from searx.result_types import Translations
about = {
"website": 'https://lingva.ml',
@ -16,20 +16,17 @@ engine_type = 'online_dictionary'
categories = ['general', 'translate']
url = "https://lingva.thedaviddelta.com"
search_url = "{url}/api/v1/{from_lang}/{to_lang}/{query}"
def request(_query, params):
params['url'] = search_url.format(
url=url, from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query']
)
params['url'] = f"{url}/api/v1/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}"
return params
def response(resp):
results = []
result = loads(resp.text)
result = resp.json()
info = result["info"]
from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1])
@ -38,28 +35,39 @@ def response(resp):
if 'definitions' in info: # pylint: disable=too-many-nested-blocks
for definition in info['definitions']:
if 'list' in definition:
for item in definition['list']:
if 'synonyms' in item:
for synonym in item['synonyms']:
results.append({"suggestion": from_to_prefix + synonym})
for item in definition.get('list', []):
for synonym in item.get('synonyms', []):
results.append({"suggestion": from_to_prefix + synonym})
infobox = ""
data = []
for definition in info['definitions']:
for translation in definition['list']:
data.append(
Translations.Item(
text=result['translation'],
definitions=[translation['definition']] if translation['definition'] else [],
examples=[translation['example']] if translation['example'] else [],
synonyms=translation['synonyms'],
)
)
for translation in info["extraTranslations"]:
for word in translation["list"]:
infobox += f"<dl><dt>{word['word']}</dt>"
data.append(
Translations.Item(
text=word['word'],
definitions=word['meanings'],
)
)
for meaning in word["meanings"]:
infobox += f"<dd>{meaning}</dd>"
if not data and result['translation']:
data.append(Translations.Item(text=result['translation']))
infobox += "</dl>"
results.append(
{
'infobox': result["translation"],
'content': infobox,
}
params = resp.search_params
Translations(
results=results,
translations=data,
url=f"{url}/{params['from_lang'][1]}/{params['to_lang'][1]}/{params['query']}",
)
return results

View file

@ -3,8 +3,9 @@
import random
import re
from urllib.parse import urlencode
from flask_babel import gettext
import urllib.parse
from searx.result_types import Translations
about = {
"website": 'https://codeberg.org/aryak/mozhi',
@ -28,37 +29,33 @@ def request(_query, params):
request_url = random.choice(base_url) if isinstance(base_url, list) else base_url
args = {'from': params['from_lang'][1], 'to': params['to_lang'][1], 'text': params['query'], 'engine': mozhi_engine}
params['url'] = f"{request_url}/api/translate?{urlencode(args)}"
params['url'] = f"{request_url}/api/translate?{urllib.parse.urlencode(args)}"
return params
def response(resp):
results = []
translation = resp.json()
infobox = ""
item = Translations.Item(text=translation['translated-text'])
if translation['target_transliteration'] and not re.match(
re_transliteration_unsupported, translation['target_transliteration']
):
infobox = f"<b>{translation['target_transliteration']}</b>"
item.transliteration = translation['target_transliteration']
if translation['word_choices']:
for word in translation['word_choices']:
infobox += f"<dl><dt>{word['word']}: {word['definition']}</dt>"
if word.get('definition'):
item.definitions.append(word['definition'])
if word['examples_target']:
for example in word['examples_target']:
infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
for example in word.get('examples_target', []):
item.examples.append(re.sub(r"<|>", "", example).lstrip('- '))
infobox += "</dl>"
item.synonyms = translation.get('source_synonyms', [])
if translation['source_synonyms']:
infobox += f"<dl><dt>{gettext('Synonyms')}: {', '.join(translation['source_synonyms'])}</dt></dl>"
result = {
'infobox': translation['translated-text'],
'content': infobox,
}
return [result]
url = urllib.parse.urlparse(resp.search_params["url"])
# remove the api path
url = url._replace(path="", fragment="").geturl()
Translations(results=results, translations=[item], url=url)
return results

View file

@ -4,16 +4,16 @@
"""
import re
from json import loads
from urllib.parse import urlencode
import urllib.parse
from functools import partial
from flask_babel import gettext
from searx.data import OSM_KEYS_TAGS, CURRENCIES
from searx.utils import searx_useragent
from searx.external_urls import get_external_url
from searx.engines.wikidata import send_wikidata_query, sparql_string_escape, get_thumbnail
from searx.result_types import Answer
# about
about = {
@ -37,8 +37,7 @@ search_string = 'search?{query}&polygon_geojson=1&format=jsonv2&addressdetails=1
result_id_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
result_lat_lon_url = 'https://www.openstreetmap.org/?mlat={lat}&mlon={lon}&zoom={zoom}&layers=M'
route_url = 'https://graphhopper.com/maps/?point={}&point={}&locale=en-US&vehicle=car&weighting=fastest&turn_costs=true&use_miles=false&layer=Omniscale' # pylint: disable=line-too-long
route_re = re.compile('(?:from )?(.+) to (.+)')
route_url = 'https://graphhopper.com/maps'
wikidata_image_sparql = """
select ?item ?itemLabel ?image ?sign ?symbol ?website ?wikipediaName
@ -138,27 +137,25 @@ KEY_RANKS = {k: i for i, k in enumerate(KEY_ORDER)}
def request(query, params):
"""do search-request"""
params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
params['route'] = route_re.match(query)
params['headers']['User-Agent'] = searx_useragent()
if 'Accept-Language' not in params['headers']:
params['headers']['Accept-Language'] = 'en'
params['url'] = base_url + search_string.format(query=urllib.parse.urlencode({'q': query}))
return params
def response(resp):
"""get response from search-request"""
results = []
nominatim_json = loads(resp.text)
nominatim_json = resp.json()
user_language = resp.search_params['language']
if resp.search_params['route']:
results.append(
{
'answer': gettext('Get directions'),
'url': route_url.format(*resp.search_params['route'].groups()),
}
l = re.findall(r"from\s+(.*)\s+to\s+(.+)", resp.search_params["query"])
if not l:
l = re.findall(r"\s*(.*)\s+to\s+(.+)", resp.search_params["query"])
if l:
point1, point2 = [urllib.parse.quote_plus(p) for p in l[0]]
Answer(
results=results,
answer=gettext('Show route in map ..'),
url=f"{route_url}/?point={point1}&point={point2}",
)
# simplify the code below: make sure extratags is a dictionary

View file

@ -156,6 +156,7 @@ def parse_tineye_match(match_json):
def response(resp):
"""Parse HTTP response from TinEye."""
results = []
# handle the 422 client side errors, and the possible 400 status code error
if resp.status_code in (400, 422):
@ -182,14 +183,14 @@ def response(resp):
message = ','.join(description)
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
# results.append({'answer': message})
logger.error(message)
return []
# from searx.result_types import Answer
# Answer(results=results, answer=message)
logger.info(message)
return results
# Raise for all other responses
resp.raise_for_status()
results = []
json_data = resp.json()
for match_json in json_data['matches']:

View file

@ -3,6 +3,10 @@
"""
import urllib.parse
from searx.result_types import Translations
# about
about = {
"website": 'https://mymemory.translated.net/',
@ -15,8 +19,8 @@ about = {
engine_type = 'online_dictionary'
categories = ['general', 'translate']
url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
api_url = "https://api.mymemory.translated.net"
web_url = "https://mymemory.translated.net"
weight = 100
https_support = True
@ -24,29 +28,32 @@ api_key = ''
def request(query, params): # pylint: disable=unused-argument
args = {"q": params["query"], "langpair": f"{params['from_lang'][1]}|{params['to_lang'][1]}"}
if api_key:
key_form = '&key=' + api_key
else:
key_form = ''
params['url'] = url.format(
from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query'], key=key_form
)
args["key"] = api_key
params['url'] = f"{api_url}/get?{urllib.parse.urlencode(args)}"
return params
def response(resp):
results = []
results.append(
{
'url': web_url.format(
from_lang=resp.search_params['from_lang'][2],
to_lang=resp.search_params['to_lang'][2],
query=resp.search_params['query'],
),
'title': '[{0}-{1}] {2}'.format(
resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query']
),
'content': resp.json()['responseData']['translatedText'],
}
)
data = resp.json()
args = {
"q": resp.search_params["query"],
"lang": resp.search_params.get("searxng_locale", "en"), # ui language
"sl": resp.search_params['from_lang'][1],
"tl": resp.search_params['to_lang'][1],
}
link = f"{web_url}/search.php?{urllib.parse.urlencode(args)}"
text = data['responseData']['translatedText']
examples = [f"{m['segment']} : {m['translation']}" for m in data['matches'] if m['translation'] != text]
item = Translations.Item(text=text, examples=examples)
Translations(results=results, translations=[item], url=link)
return results

View file

@ -262,7 +262,7 @@ def request(query, params):
def response(resp): # pylint: disable=too-many-branches
'''Scrap *results* from the response (see :ref:`engine results`).'''
'''Scrap *results* from the response (see :ref:`result types`).'''
if no_result_for_http_status and resp.status_code in no_result_for_http_status:
return []