mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
Merge branch 'searxng:master' into elasticsearch-custom-query
This commit is contained in:
commit
82d1544a6b
341 changed files with 29669 additions and 12534 deletions
|
|
@ -5,6 +5,7 @@
|
|||
# pylint: disable=use-dict-literal
|
||||
|
||||
import json
|
||||
import html
|
||||
from urllib.parse import urlencode, quote_plus
|
||||
|
||||
import lxml
|
||||
|
|
@ -162,7 +163,7 @@ def stract(query, _lang):
|
|||
if not resp.ok:
|
||||
return []
|
||||
|
||||
return [suggestion['raw'] for suggestion in resp.json()]
|
||||
return [html.unescape(suggestion['raw']) for suggestion in resp.json()]
|
||||
|
||||
|
||||
def startpage(query, sxng_locale):
|
||||
|
|
|
|||
|
|
@ -14,17 +14,7 @@ import typing
|
|||
import logging
|
||||
import pathlib
|
||||
|
||||
try:
|
||||
import tomllib
|
||||
|
||||
pytomlpp = None
|
||||
USE_TOMLLIB = True
|
||||
except ImportError:
|
||||
import pytomlpp
|
||||
|
||||
tomllib = None
|
||||
USE_TOMLLIB = False
|
||||
|
||||
from ..compat import tomllib
|
||||
|
||||
__all__ = ['Config', 'UNSET', 'SchemaIssue']
|
||||
|
||||
|
|
@ -32,7 +22,7 @@ log = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class FALSE:
|
||||
"""Class of ``False`` singelton"""
|
||||
"""Class of ``False`` singleton"""
|
||||
|
||||
# pylint: disable=multiple-statements
|
||||
def __init__(self, msg):
|
||||
|
|
@ -91,7 +81,7 @@ class Config:
|
|||
return cfg
|
||||
|
||||
def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
|
||||
"""Construtor of class Config.
|
||||
"""Constructor of class Config.
|
||||
|
||||
:param cfg_schema: Schema of the configuration
|
||||
:param deprecated: dictionary that maps deprecated configuration names to a messages
|
||||
|
|
@ -169,7 +159,7 @@ class Config:
|
|||
return pathlib.Path(str(val))
|
||||
|
||||
def pyobj(self, name, default=UNSET):
|
||||
"""Get python object refered by full qualiffied name (FQN) in the config
|
||||
"""Get python object referred by full qualiffied name (FQN) in the config
|
||||
string."""
|
||||
|
||||
fqn = self.get(name, default)
|
||||
|
|
@ -183,19 +173,10 @@ class Config:
|
|||
|
||||
|
||||
def toml_load(file_name):
|
||||
if USE_TOMLLIB:
|
||||
# Python >= 3.11
|
||||
try:
|
||||
with open(file_name, "rb") as f:
|
||||
return tomllib.load(f)
|
||||
except tomllib.TOMLDecodeError as exc:
|
||||
msg = str(exc).replace('\t', '').replace('\n', ' ')
|
||||
log.error("%s: %s", file_name, msg)
|
||||
raise
|
||||
# fallback to pytomlpp for Python < 3.11
|
||||
try:
|
||||
return pytomlpp.load(file_name)
|
||||
except pytomlpp.DecodeError as exc:
|
||||
with open(file_name, "rb") as f:
|
||||
return tomllib.load(f)
|
||||
except tomllib.TOMLDecodeError as exc:
|
||||
msg = str(exc).replace('\t', '').replace('\n', ' ')
|
||||
log.error("%s: %s", file_name, msg)
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -76,11 +76,11 @@ LONG_MAX = 150
|
|||
LONG_MAX_SUSPICIOUS = 10
|
||||
"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
|
||||
|
||||
API_WONDOW = 3600
|
||||
API_WINDOW = 3600
|
||||
"""Time (sec) before sliding window for API requests (format != html) expires."""
|
||||
|
||||
API_MAX = 4
|
||||
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
|
||||
"""Maximum requests from one IP in the :py:obj:`API_WINDOW`"""
|
||||
|
||||
SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
|
||||
"""Time (sec) before sliding window for one suspicious IP expires."""
|
||||
|
|
@ -103,7 +103,7 @@ def filter_request(
|
|||
return None
|
||||
|
||||
if request.args.get('format', 'html') != 'html':
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.API_WINDOW:' + network.compressed, API_WINDOW)
|
||||
if c > API_MAX:
|
||||
return too_many_requests(network, "too many request in API_WINDOW")
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ And in the HTML template from flask a stylesheet link is needed (the value of
|
|||
|
||||
<link rel="stylesheet"
|
||||
href="{{ url_for('client_token', token=link_token) }}"
|
||||
type="text/css" />
|
||||
type="text/css" >
|
||||
|
||||
.. _X-Forwarded-For:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||
|
|
@ -55,10 +55,10 @@ from ._helpers import (
|
|||
)
|
||||
|
||||
TOKEN_LIVE_TIME = 600
|
||||
"""Livetime (sec) of limiter's CSS token."""
|
||||
"""Lifetime (sec) of limiter's CSS token."""
|
||||
|
||||
PING_LIVE_TIME = 3600
|
||||
"""Livetime (sec) of the ping-key from a client (request)"""
|
||||
"""Lifetime (sec) of the ping-key from a client (request)"""
|
||||
|
||||
PING_KEY = 'SearXNG_limiter.ping'
|
||||
"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
|
||||
|
|
|
|||
18
searx/compat.py
Normal file
18
searx/compat.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Compatibility with older versions"""
|
||||
|
||||
# pylint: disable=unused-import
|
||||
|
||||
__all__ = [
|
||||
"tomllib",
|
||||
]
|
||||
|
||||
import sys
|
||||
|
||||
# TOML (lib) compatibility
|
||||
# ------------------------
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
import tomllib
|
||||
else:
|
||||
import tomli as tomllib
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -5,7 +5,7 @@
|
|||
],
|
||||
"ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
|
||||
"versions": [
|
||||
"126.0",
|
||||
"125.0"
|
||||
"132.0",
|
||||
"131.0"
|
||||
]
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
229
searx/engines/adobe_stock.py
Normal file
229
searx/engines/adobe_stock.py
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""`Adobe Stock`_ is a service that gives access to millions of royalty-free
|
||||
assets. Assets types include photos, vectors, illustrations, templates, 3D
|
||||
assets, videos, motion graphics templates and audio tracks.
|
||||
|
||||
.. Adobe Stock: https://stock.adobe.com/
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
||||
The engine has the following mandatory setting:
|
||||
|
||||
- SearXNG's :ref:`engine categories`
|
||||
- Adobe-Stock's :py:obj:`adobe_order`
|
||||
- Adobe-Stock's :py:obj:`adobe_content_types`
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: adobe stock
|
||||
engine: adobe_stock
|
||||
shortcut: asi
|
||||
categories: [images]
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
|
||||
|
||||
- name: adobe stock video
|
||||
engine: adobe_stock
|
||||
network: adobe stock
|
||||
shortcut: asi
|
||||
categories: [videos]
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["video"]
|
||||
|
||||
Implementation
|
||||
==============
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import isodate
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
||||
logger: logging.Logger
|
||||
|
||||
about = {
|
||||
"website": "https://stock.adobe.com/",
|
||||
"wikidata_id": "Q5977430",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
categories = []
|
||||
paging = True
|
||||
send_accept_language_header = True
|
||||
results_per_page = 10
|
||||
|
||||
base_url = "https://stock.adobe.com"
|
||||
|
||||
adobe_order: str = ""
|
||||
"""Sort order, can be one of:
|
||||
|
||||
- ``relevance`` or
|
||||
- ``featured`` or
|
||||
- ``creation`` (most recent) or
|
||||
- ``nb_downloads`` (number of downloads)
|
||||
"""
|
||||
|
||||
ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
|
||||
adobe_content_types: list = []
|
||||
"""A list of of content types. The following content types are offered:
|
||||
|
||||
- Images: ``image``
|
||||
- Videos: ``video``
|
||||
- Templates: ``template``
|
||||
- 3D: ``3d``
|
||||
- Audio ``audio``
|
||||
|
||||
Additional subcategories:
|
||||
|
||||
- Photos: ``photo``
|
||||
- Illustrations: ``illustration``
|
||||
- Vectors: ``zip_vector`` (Vectors),
|
||||
"""
|
||||
|
||||
# Do we need support for "free_collection" and "include_stock_enterprise"?
|
||||
|
||||
|
||||
def init(_):
|
||||
if not categories:
|
||||
raise ValueError("adobe_stock engine: categories is unset")
|
||||
|
||||
# adobe_order
|
||||
if not adobe_order:
|
||||
raise ValueError("adobe_stock engine: adobe_order is unset")
|
||||
if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
|
||||
raise ValueError(f"unsupported adobe_order: {adobe_order}")
|
||||
|
||||
# adobe_content_types
|
||||
if not adobe_content_types:
|
||||
raise ValueError("adobe_stock engine: adobe_content_types is unset")
|
||||
|
||||
if isinstance(adobe_content_types, list):
|
||||
for t in adobe_content_types:
|
||||
if t not in ADOBE_VALID_TYPES:
|
||||
raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
|
||||
else:
|
||||
raise ValueError(
|
||||
"adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
|
||||
)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
args = {
|
||||
"k": query,
|
||||
"limit": results_per_page,
|
||||
"order": adobe_order,
|
||||
"search_page": params["pageno"],
|
||||
"search_type": "pagination",
|
||||
}
|
||||
|
||||
for content_type in ADOBE_VALID_TYPES:
|
||||
args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
|
||||
|
||||
params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
|
||||
|
||||
# headers required to bypass bot-detection
|
||||
if params["searxng_locale"] == "all":
|
||||
params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def parse_image_item(item):
|
||||
return {
|
||||
"template": "images.html",
|
||||
"url": item["content_url"],
|
||||
"title": item["title"],
|
||||
"content": item["asset_type"],
|
||||
"img_src": item["content_thumb_extra_large_url"],
|
||||
"thumbnail_src": item["thumbnail_url"],
|
||||
"resolution": f"{item['content_original_width']}x{item['content_original_height']}",
|
||||
"img_format": item["format"],
|
||||
"author": item["author"],
|
||||
}
|
||||
|
||||
|
||||
def parse_video_item(item):
|
||||
|
||||
# in video items, the title is more or less a "content description", we try
|
||||
# to reduce the lenght of the title ..
|
||||
|
||||
title = item["title"]
|
||||
content = ""
|
||||
if "." in title.strip()[:-1]:
|
||||
content = title
|
||||
title = title.split(".", 1)[0]
|
||||
elif "," in title:
|
||||
content = title
|
||||
title = title.split(",", 1)[0]
|
||||
elif len(title) > 50:
|
||||
content = title
|
||||
title = ""
|
||||
for w in content.split(" "):
|
||||
title += f" {w}"
|
||||
if len(title) > 50:
|
||||
title = title.strip() + "\u2026"
|
||||
break
|
||||
|
||||
return {
|
||||
"template": "videos.html",
|
||||
"url": item["content_url"],
|
||||
"title": title,
|
||||
"content": content,
|
||||
# https://en.wikipedia.org/wiki/ISO_8601#Durations
|
||||
"length": isodate.parse_duration(item["time_duration"]),
|
||||
"publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
|
||||
"thumbnail": item["thumbnail_url"],
|
||||
"iframe_src": item["video_small_preview_url"],
|
||||
"metadata": item["asset_type"],
|
||||
}
|
||||
|
||||
|
||||
def parse_audio_item(item):
|
||||
audio_data = item["audio_data"]
|
||||
content = audio_data.get("description") or ""
|
||||
if audio_data.get("album"):
|
||||
content = audio_data["album"] + " - " + content
|
||||
|
||||
return {
|
||||
"url": item["content_url"],
|
||||
"title": item["title"],
|
||||
"content": content,
|
||||
# "thumbnail": base_url + item["thumbnail_url"],
|
||||
"iframe_src": audio_data["preview"]["url"],
|
||||
"publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
|
||||
"length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
|
||||
"author": item.get("artist_name"),
|
||||
}
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json_resp = resp.json()
|
||||
|
||||
if isinstance(json_resp["items"], list):
|
||||
return None
|
||||
for item in json_resp["items"].values():
|
||||
if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
|
||||
result = parse_image_item(item)
|
||||
elif item["asset_type"].lower() == "video":
|
||||
result = parse_video_item(item)
|
||||
elif item["asset_type"].lower() == "audio":
|
||||
result = parse_audio_item(item)
|
||||
else:
|
||||
logger.error("no handle for %s --> %s", item["asset_type"], item)
|
||||
continue
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
83
searx/engines/alpinelinux.py
Normal file
83
searx/engines/alpinelinux.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""`Alpine Linux binary packages`_. `Alpine Linux`_ is a Linux-based operation
|
||||
system designed to be small, simple and secure. Contrary to many other Linux
|
||||
distributions, it uses musl, BusyBox and OpenRC. Alpine is mostly used on
|
||||
servers and for Docker images.
|
||||
|
||||
.. _Alpine Linux binary packages: https://pkgs.alpinelinux.org
|
||||
.. _Alpine Linux: https://www.alpinelinux.org
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from dateutil import parser
|
||||
|
||||
from searx.utils import eval_xpath, eval_xpath_list, extract_text
|
||||
|
||||
about = {
|
||||
'website': 'https://www.alpinelinux.org',
|
||||
'wikidata_id': 'Q4033826',
|
||||
'use_official_api': False,
|
||||
'official_api_documentation': None,
|
||||
'require_api_key': False,
|
||||
'results': 'HTML',
|
||||
}
|
||||
paging = True
|
||||
categories = ['packages', 'it']
|
||||
|
||||
base_url = "https://pkgs.alpinelinux.org"
|
||||
alpine_arch = 'x86_64'
|
||||
"""Kernel architecture: ``x86_64``, ``x86``, ``aarch64``, ``armhf``,
|
||||
``ppc64le``, ``s390x``, ``armv7`` or ``riscv64``"""
|
||||
|
||||
ARCH_RE = re.compile("x86_64|x86|aarch64|armhf|ppc64le|s390x|armv7|riscv64")
|
||||
"""Regular expression to match supported architectures in the query string."""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
query_arch = ARCH_RE.search(query)
|
||||
if query_arch:
|
||||
query_arch = query_arch.group(0)
|
||||
query = query.replace(query_arch, '').strip()
|
||||
|
||||
args = {
|
||||
# use wildcards to match more than just packages with the exact same
|
||||
# name as the query
|
||||
'name': f"*{query}*",
|
||||
'page': params['pageno'],
|
||||
'arch': query_arch or alpine_arch,
|
||||
}
|
||||
params['url'] = f"{base_url}/packages?{urlencode(args)}"
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
doc = html.fromstring(resp.text)
|
||||
for result in eval_xpath_list(doc, "//table/tbody/tr"):
|
||||
|
||||
if len(result.xpath("./td")) < 9:
|
||||
# skip non valid entries in the result table
|
||||
# e.g the "No item found..." message
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'packages.html',
|
||||
'url': base_url + extract_text(eval_xpath(result, './td[contains(@class, "package")]/a/@href')),
|
||||
'title': extract_text(eval_xpath(result, './td[contains(@class, "package")]')),
|
||||
'package_name': extract_text(eval_xpath(result, './td[contains(@class, "package")]')),
|
||||
'publishedDate': parser.parse(extract_text(eval_xpath(result, './td[contains(@class, "bdate")]'))),
|
||||
'version': extract_text(eval_xpath(result, './td[contains(@class, "version")]')),
|
||||
'homepage': extract_text(eval_xpath(result, './td[contains(@class, "url")]/a/@href')),
|
||||
'maintainer': extract_text(eval_xpath(result, './td[contains(@class, "maintainer")]')),
|
||||
'license_name': extract_text(eval_xpath(result, './td[contains(@class, "license")]')),
|
||||
'tags': [extract_text(eval_xpath(result, './td[contains(@class, "repo")]'))],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -34,10 +34,10 @@ Implementations
|
|||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from urllib.parse import quote
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.data import ENGINE_TRAITS
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ about: Dict[str, Any] = {
|
|||
|
||||
# engine dependent config
|
||||
categories: List[str] = ["files"]
|
||||
paging: bool = False
|
||||
paging: bool = True
|
||||
|
||||
# search-url
|
||||
base_url: str = "https://annas-archive.org"
|
||||
|
|
@ -99,9 +99,18 @@ def init(engine_settings=None): # pylint: disable=unused-argument
|
|||
|
||||
|
||||
def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||
q = quote(query)
|
||||
lang = traits.get_language(params["language"], traits.all_locale) # type: ignore
|
||||
params["url"] = base_url + f"/search?lang={lang or ''}&content={aa_content}&ext={aa_ext}&sort={aa_sort}&q={q}"
|
||||
args = {
|
||||
'lang': lang,
|
||||
'content': aa_content,
|
||||
'ext': aa_ext,
|
||||
'sort': aa_sort,
|
||||
'q': query,
|
||||
'page': params['pageno'],
|
||||
}
|
||||
# filter out None and empty values
|
||||
filtered_args = dict((k, v) for k, v in args.items() if v)
|
||||
params["url"] = f"{base_url}/search?{urlencode(filtered_args)}"
|
||||
return params
|
||||
|
||||
|
||||
|
|
@ -128,12 +137,12 @@ def response(resp) -> List[Dict[str, Optional[str]]]:
|
|||
def _get_result(item):
|
||||
return {
|
||||
'template': 'paper.html',
|
||||
'url': base_url + item.xpath('./@href')[0],
|
||||
'url': base_url + extract_text(eval_xpath_getindex(item, './@href', 0)),
|
||||
'title': extract_text(eval_xpath(item, './/h3/text()[1]')),
|
||||
'publisher': extract_text(eval_xpath(item, './/div[contains(@class, "text-sm")]')),
|
||||
'authors': [extract_text(eval_xpath(item, './/div[contains(@class, "italic")]'))],
|
||||
'content': extract_text(eval_xpath(item, './/div[contains(@class, "text-xs")]')),
|
||||
'thumbnail': item.xpath('.//img/@src')[0],
|
||||
'thumbnail': extract_text(eval_xpath_getindex(item, './/img/@src', 0, default=None), allow_none=True),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -184,3 +193,8 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
|
||||
engine_traits.custom['sort'].append(x.get("value"))
|
||||
|
||||
# for better diff; sort the persistence of these traits
|
||||
engine_traits.custom['content'].sort()
|
||||
engine_traits.custom['ext'].sort()
|
||||
engine_traits.custom['sort'].sort()
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ paging = True
|
|||
number_of_results = 10
|
||||
|
||||
# shortcuts for advanced search
|
||||
shorcut_dict = {
|
||||
shortcut_dict = {
|
||||
# user-friendly keywords
|
||||
'format:': 'dcformat:',
|
||||
'author:': 'dccreator:',
|
||||
|
|
@ -55,7 +55,7 @@ shorcut_dict = {
|
|||
|
||||
def request(query, params):
|
||||
# replace shortcuts with API advanced search keywords
|
||||
for key, val in shorcut_dict.items():
|
||||
for key, val in shortcut_dict.items():
|
||||
query = re.sub(key, val, query)
|
||||
|
||||
# basic search
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ import string
|
|||
from urllib.parse import urlencode
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from searx import utils
|
||||
|
||||
# Engine metadata
|
||||
about = {
|
||||
"website": "https://www.bilibili.com",
|
||||
|
|
@ -56,6 +58,8 @@ def request(query, params):
|
|||
|
||||
# Format the video duration
|
||||
def format_duration(duration):
|
||||
if not ":" in duration:
|
||||
return None
|
||||
minutes, seconds = map(int, duration.split(":"))
|
||||
total_seconds = minutes * 60 + seconds
|
||||
|
||||
|
|
@ -70,7 +74,7 @@ def response(resp):
|
|||
results = []
|
||||
|
||||
for item in search_res.get("data", {}).get("result", []):
|
||||
title = item["title"]
|
||||
title = utils.html_to_text(item["title"])
|
||||
url = item["arcurl"]
|
||||
thumbnail = item["pic"]
|
||||
description = item["description"]
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ On the `preference page`_ Bing offers a lot of languages an regions (see section
|
|||
LANGUAGE and COUNTRY/REGION). The Language is the language of the UI, we need
|
||||
in SearXNG to get the translations of data such as *"published last week"*.
|
||||
|
||||
There is a description of the offical search-APIs_, unfortunately this is not
|
||||
There is a description of the official search-APIs_, unfortunately this is not
|
||||
the API we can use or that bing itself would use. You can look up some things
|
||||
in the API to get a better picture of bing, but the value specifications like
|
||||
the market codes are usually outdated or at least no longer used by bing itself.
|
||||
|
|
@ -91,7 +91,7 @@ def request(query, params):
|
|||
page = params.get('pageno', 1)
|
||||
query_params = {
|
||||
'q': query,
|
||||
# if arg 'pq' is missed, somtimes on page 4 we get results from page 1,
|
||||
# if arg 'pq' is missed, sometimes on page 4 we get results from page 1,
|
||||
# don't ask why it is only sometimes / its M$ and they have never been
|
||||
# deterministic ;)
|
||||
'pq': query,
|
||||
|
|
@ -177,7 +177,7 @@ def response(resp):
|
|||
logger.debug('result error :\n%s', e)
|
||||
|
||||
if result_len and _page_offset(resp.search_params.get("pageno", 0)) > result_len:
|
||||
# Avoid reading more results than avalaible.
|
||||
# Avoid reading more results than available.
|
||||
# For example, if there is 100 results from some search and we try to get results from 120 to 130,
|
||||
# Bing will send back the results from 0 to 10 and no error.
|
||||
# If we compare results count with the first parameter of the request we can avoid this "invalid" results.
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ def response(resp):
|
|||
'url': metadata['purl'],
|
||||
'thumbnail_src': metadata['turl'],
|
||||
'img_src': metadata['murl'],
|
||||
'content': metadata['desc'],
|
||||
'content': metadata.get('desc'),
|
||||
'title': title,
|
||||
'source': source,
|
||||
'resolution': img_format[0],
|
||||
|
|
|
|||
|
|
@ -123,7 +123,9 @@ def response(resp):
|
|||
thumbnail = None
|
||||
imagelink = eval_xpath_getindex(newsitem, './/a[@class="imagelink"]//img', 0, None)
|
||||
if imagelink is not None:
|
||||
thumbnail = 'https://www.bing.com/' + imagelink.attrib.get('src')
|
||||
thumbnail = imagelink.attrib.get('src')
|
||||
if not thumbnail.startswith("https://www.bing.com"):
|
||||
thumbnail = 'https://www.bing.com/' + thumbnail
|
||||
|
||||
results.append(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -123,7 +123,6 @@ from typing import Any, TYPE_CHECKING
|
|||
from urllib.parse import (
|
||||
urlencode,
|
||||
urlparse,
|
||||
parse_qs,
|
||||
)
|
||||
|
||||
from dateutil import parser
|
||||
|
|
@ -137,6 +136,7 @@ from searx.utils import (
|
|||
eval_xpath_list,
|
||||
eval_xpath_getindex,
|
||||
js_variable_to_python,
|
||||
get_embeded_stream_url,
|
||||
)
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
|
@ -311,7 +311,7 @@ def _parse_search(resp):
|
|||
# In my tests a video tag in the WEB search was most often not a
|
||||
# video, except the ones from youtube ..
|
||||
|
||||
iframe_src = _get_iframe_src(url)
|
||||
iframe_src = get_embeded_stream_url(url)
|
||||
if iframe_src:
|
||||
item['iframe_src'] = iframe_src
|
||||
item['template'] = 'videos.html'
|
||||
|
|
@ -328,15 +328,6 @@ def _parse_search(resp):
|
|||
return result_list
|
||||
|
||||
|
||||
def _get_iframe_src(url):
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.path == '/watch' and parsed_url.query:
|
||||
video_id = parse_qs(parsed_url.query).get('v', []) # type: ignore
|
||||
if video_id:
|
||||
return 'https://www.youtube-nocookie.com/embed/' + video_id[0] # type: ignore
|
||||
return None
|
||||
|
||||
|
||||
def _parse_news(json_resp):
|
||||
result_list = []
|
||||
|
||||
|
|
@ -392,7 +383,7 @@ def _parse_videos(json_resp):
|
|||
if result['thumbnail'] is not None:
|
||||
item['thumbnail'] = result['thumbnail']['src']
|
||||
|
||||
iframe_src = _get_iframe_src(url)
|
||||
iframe_src = get_embeded_stream_url(url)
|
||||
if iframe_src:
|
||||
item['iframe_src'] = iframe_src
|
||||
|
||||
|
|
@ -426,14 +417,15 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
print("ERROR: response from Brave is not OK.")
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
for option in dom.xpath('//div[@id="language-select"]//option'):
|
||||
for option in dom.xpath('//section//option[@value="en-us"]/../option'):
|
||||
|
||||
ui_lang = option.get('value')
|
||||
try:
|
||||
if '-' in ui_lang:
|
||||
l = babel.Locale.parse(ui_lang, sep='-')
|
||||
if l.territory:
|
||||
sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
|
||||
else:
|
||||
sxng_tag = language_tag(babel.Locale.parse(ui_lang))
|
||||
sxng_tag = language_tag(babel.Locale.parse(ui_lang, sep='-'))
|
||||
|
||||
except babel.UnknownLocaleError:
|
||||
print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
|
||||
|
|
@ -453,7 +445,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from Brave is not OK.")
|
||||
|
||||
country_js = resp.text[resp.text.index("options:{all") + len('options:') :]
|
||||
country_js = resp.text[resp.text.index("options:{all") + len('options:') :] # type: ignore
|
||||
country_js = country_js[: country_js.index("},k={default")]
|
||||
country_tags = js_variable_to_python(country_js)
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,6 @@ def response(resp):
|
|||
|
||||
excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0]
|
||||
content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False)
|
||||
# it is better to emit <br/> instead of |, but html tags are verboten
|
||||
content = content.strip().replace('\n', ' | ')
|
||||
content = ' '.join(content.split())
|
||||
|
||||
|
|
|
|||
68
searx/engines/cloudflareai.py
Normal file
68
searx/engines/cloudflareai.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Cloudflare AI engine"""
|
||||
|
||||
from json import loads, dumps
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
|
||||
about = {
|
||||
"website": 'https://ai.cloudflare.com',
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": 'https://developers.cloudflare.com/workers-ai',
|
||||
"use_official_api": True,
|
||||
"require_api_key": True,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
cf_account_id = ''
|
||||
cf_ai_api = ''
|
||||
cf_ai_gateway = ''
|
||||
|
||||
cf_ai_model = ''
|
||||
cf_ai_model_display_name = 'Cloudflare AI'
|
||||
|
||||
# Assistant messages hint to the AI about the desired output format. Not all models support this role.
|
||||
cf_ai_model_assistant = 'Keep your answers as short and effective as possible.'
|
||||
# System messages define the AI's personality. You can use them to set rules and how you expect the AI to behave.
|
||||
cf_ai_model_system = 'You are a self-aware language model who is honest and direct about any question from the user.'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
params['query'] = query
|
||||
|
||||
params['url'] = f'https://gateway.ai.cloudflare.com/v1/{cf_account_id}/{cf_ai_gateway}/workers-ai/{cf_ai_model}'
|
||||
|
||||
params['method'] = 'POST'
|
||||
|
||||
params['headers']['Authorization'] = f'Bearer {cf_ai_api}'
|
||||
params['headers']['Content-Type'] = 'application/json'
|
||||
|
||||
params['data'] = dumps(
|
||||
{
|
||||
'messages': [
|
||||
{'role': 'assistant', 'content': cf_ai_model_assistant},
|
||||
{'role': 'system', 'content': cf_ai_model_system},
|
||||
{'role': 'user', 'content': params['query']},
|
||||
]
|
||||
}
|
||||
).encode('utf-8')
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
json = loads(resp.text)
|
||||
|
||||
if 'error' in json:
|
||||
raise SearxEngineAPIException('Cloudflare AI error: ' + json['error'])
|
||||
|
||||
if 'result' in json:
|
||||
results.append(
|
||||
{
|
||||
'content': json['result']['response'],
|
||||
'infobox': cf_ai_model_display_name,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -10,6 +10,8 @@ engine offers some additional settings:
|
|||
- :py:obj:`api_order`
|
||||
- :py:obj:`search_endpoint`
|
||||
- :py:obj:`show_avatar`
|
||||
- :py:obj:`api_key`
|
||||
- :py:obj:`api_username`
|
||||
|
||||
Example
|
||||
=======
|
||||
|
|
@ -27,6 +29,20 @@ for the ``paddling.com`` forum:
|
|||
categories: ['social media', 'sports']
|
||||
show_avatar: true
|
||||
|
||||
If the forum is private, you need to add an API key and username for the search:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: paddling
|
||||
engine: discourse
|
||||
shortcut: paddle
|
||||
base_url: 'https://forums.paddling.com/'
|
||||
api_order: views
|
||||
categories: ['social media', 'sports']
|
||||
show_avatar: true
|
||||
api_key: '<KEY>'
|
||||
api_username: 'system'
|
||||
|
||||
|
||||
Implementations
|
||||
===============
|
||||
|
|
@ -65,6 +81,12 @@ api_order = 'likes'
|
|||
show_avatar = False
|
||||
"""Show avatar of the user who send the post."""
|
||||
|
||||
api_key = ''
|
||||
"""API key of the Discourse forum."""
|
||||
|
||||
api_username = ''
|
||||
"""API username of the Discourse forum."""
|
||||
|
||||
paging = True
|
||||
time_range_support = True
|
||||
|
||||
|
|
@ -98,6 +120,12 @@ def request(query, params):
|
|||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}
|
||||
|
||||
if api_key != '':
|
||||
params['headers']['Api-Key'] = api_key
|
||||
|
||||
if api_username != '':
|
||||
params['headers']['Api-Username'] = api_username
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,14 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
DuckDuckGo Lite
|
||||
~~~~~~~~~~~~~~~
|
||||
DuckDuckGo WEB
|
||||
~~~~~~~~~~~~~~
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from urllib.parse import urlencode, quote_plus
|
||||
import json
|
||||
import babel
|
||||
import lxml.html
|
||||
|
|
@ -18,13 +20,13 @@ from searx import (
|
|||
)
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
extr,
|
||||
extract_text,
|
||||
)
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx import redisdb
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.utils import extr
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
|
@ -42,7 +44,7 @@ about = {
|
|||
}
|
||||
|
||||
send_accept_language_header = True
|
||||
"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
|
||||
"""DuckDuckGo-Lite tries to guess user's preferred language from the HTTP
|
||||
``Accept-Language``. Optional the user can select a region filter (but not a
|
||||
language).
|
||||
"""
|
||||
|
|
@ -53,47 +55,37 @@ paging = True
|
|||
time_range_support = True
|
||||
safesearch = True # user can't select but the results are filtered
|
||||
|
||||
url = 'https://lite.duckduckgo.com/lite/'
|
||||
# url_ping = 'https://duckduckgo.com/t/sl_l'
|
||||
url = "https://html.duckduckgo.com/html"
|
||||
|
||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
||||
__CACHE = []
|
||||
|
||||
|
||||
def cache_vqd(query, value):
|
||||
def _cache_key(query: str, region: str):
|
||||
return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{query}//{region}")
|
||||
|
||||
|
||||
def cache_vqd(query: str, region: str, value: str):
|
||||
"""Caches a ``vqd`` value from a query."""
|
||||
c = redisdb.client()
|
||||
if c:
|
||||
logger.debug("cache vqd value: %s", value)
|
||||
key = 'SearXNG_ddg_web_vqd' + redislib.secret_hash(query)
|
||||
c.set(key, value, ex=600)
|
||||
logger.debug("VALKEY cache vqd value: %s (%s)", value, region)
|
||||
c.set(_cache_key(query, region), value, ex=600)
|
||||
|
||||
else:
|
||||
logger.debug("MEM cache vqd value: %s (%s)", value, region)
|
||||
if len(__CACHE) > 100: # cache vqd from last 100 queries
|
||||
__CACHE.pop(0)
|
||||
__CACHE.append((_cache_key(query, region), value))
|
||||
|
||||
|
||||
def get_vqd(query):
|
||||
"""Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
|
||||
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
|
||||
response.
|
||||
def get_vqd(query: str, region: str, force_request: bool = False):
|
||||
"""Returns the ``vqd`` that fits to the *query*.
|
||||
|
||||
.. hint::
|
||||
|
||||
If an empty string is returned there are no results for the ``query`` and
|
||||
therefore no ``vqd`` value.
|
||||
|
||||
DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
|
||||
(such as extremely long search terms that are often sent by bots), no ``vqd``
|
||||
value can be determined.
|
||||
|
||||
If SearXNG cannot determine a ``vqd`` value, then no request should go out
|
||||
to DDG:
|
||||
|
||||
A request with a wrong ``vqd`` value leads to DDG temporarily putting
|
||||
SearXNG's IP on a block list.
|
||||
|
||||
Requests from IPs in this block list run into timeouts.
|
||||
|
||||
Not sure, but it seems the block list is a sliding window: to get my IP rid
|
||||
from the bot list I had to cool down my IP for 1h (send no requests from
|
||||
that IP to DDG).
|
||||
:param query: The query term
|
||||
:param region: DDG's region code
|
||||
:param force_request: force a request to get a vqd value from DDG
|
||||
|
||||
TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used
|
||||
by all request to DDG:
|
||||
|
|
@ -104,29 +96,47 @@ def get_vqd(query):
|
|||
- DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
|
||||
- DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
|
||||
|
||||
DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
|
||||
(such as extremely long search terms that are often sent by bots), no ``vqd``
|
||||
value can be determined.
|
||||
|
||||
If SearXNG cannot determine a ``vqd`` value, then no request should go out
|
||||
to DDG.
|
||||
|
||||
.. attention::
|
||||
|
||||
A request with a wrong ``vqd`` value leads to DDG temporarily putting
|
||||
SearXNG's IP on a block list.
|
||||
|
||||
Requests from IPs in this block list run into timeouts. Not sure, but it
|
||||
seems the block list is a sliding window: to get my IP rid from the bot list
|
||||
I had to cool down my IP for 1h (send no requests from that IP to DDG).
|
||||
"""
|
||||
value = None
|
||||
key = _cache_key(query, region)
|
||||
|
||||
c = redisdb.client()
|
||||
if c:
|
||||
key = 'SearXNG_ddg_web_vqd' + redislib.secret_hash(query)
|
||||
value = c.get(key)
|
||||
if value or value == b'':
|
||||
value = value.decode('utf-8')
|
||||
logger.debug("re-use cached vqd value: %s", value)
|
||||
value = value.decode('utf-8') # type: ignore
|
||||
logger.debug("re-use CACHED vqd value: %s", value)
|
||||
return value
|
||||
|
||||
query_url = 'https://duckduckgo.com/?' + urlencode({'q': query})
|
||||
res = get(query_url)
|
||||
doc = lxml.html.fromstring(res.text)
|
||||
for script in doc.xpath("//script[@type='text/javascript']"):
|
||||
script = script.text
|
||||
if 'vqd="' in script:
|
||||
value = extr(script, 'vqd="', '"')
|
||||
break
|
||||
logger.debug("new vqd value: '%s'", value)
|
||||
if value is not None:
|
||||
cache_vqd(query, value)
|
||||
return value
|
||||
for k, value in __CACHE:
|
||||
if k == key:
|
||||
logger.debug("MEM re-use CACHED vqd value: %s", value)
|
||||
return value
|
||||
|
||||
if force_request:
|
||||
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
|
||||
if resp.status_code == 200: # type: ignore
|
||||
value = extr(resp.text, 'vqd="', '"') # type: ignore
|
||||
if value:
|
||||
logger.debug("vqd value from DDG request: %s", value)
|
||||
cache_vqd(query, region, value)
|
||||
return value
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
||||
|
|
@ -154,9 +164,10 @@ def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
|||
|
||||
.. hint::
|
||||
|
||||
`DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
|
||||
selection to the user, only a region can be selected by the user
|
||||
(``eng_region`` from the example above). DDG-lite stores the selected
|
||||
`DDG-lite <https://lite.duckduckgo.com/lite>`__ and the *no Javascript*
|
||||
page https://html.duckduckgo.com/html do not offer a language selection
|
||||
to the user, only a region can be selected by the user (``eng_region``
|
||||
from the example above). DDG-lite and *no Javascript* store the selected
|
||||
region in a cookie::
|
||||
|
||||
params['cookies']['kl'] = eng_region # 'ar-es'
|
||||
|
|
@ -240,10 +251,27 @@ def request(query, params):
|
|||
|
||||
query = quote_ddg_bangs(query)
|
||||
|
||||
# request needs a vqd argument
|
||||
vqd = get_vqd(query)
|
||||
if len(query) >= 500:
|
||||
# DDG does not accept queries with more than 499 chars
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
# Advanced search syntax ends in CAPTCHA
|
||||
# https://duckduckgo.com/duckduckgo-help-pages/results/syntax/
|
||||
query = " ".join(
|
||||
[
|
||||
x.removeprefix("site:").removeprefix("intitle:").removeprefix("inurl:").removeprefix("filetype:")
|
||||
for x in query.split()
|
||||
]
|
||||
)
|
||||
eng_region: str = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore
|
||||
if eng_region == "wt-wt":
|
||||
# https://html.duckduckgo.com/html sets an empty value for "all".
|
||||
eng_region = ""
|
||||
|
||||
params['data']['kl'] = eng_region
|
||||
params['cookies']['kl'] = eng_region
|
||||
|
||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
||||
# eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
||||
|
||||
params['url'] = url
|
||||
|
|
@ -251,45 +279,79 @@ def request(query, params):
|
|||
params['data']['q'] = query
|
||||
|
||||
# The API is not documented, so we do some reverse engineering and emulate
|
||||
# what https://lite.duckduckgo.com/lite/ does when you press "next Page"
|
||||
# link again and again ..
|
||||
# what https://html.duckduckgo.com/html does when you press "next Page" link
|
||||
# again and again ..
|
||||
|
||||
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
params['data']['vqd'] = vqd
|
||||
|
||||
# initial page does not have an offset
|
||||
params['headers']['Sec-Fetch-Dest'] = "document"
|
||||
params['headers']['Sec-Fetch-Mode'] = "navigate" # at least this one is used by ddg's bot detection
|
||||
params['headers']['Sec-Fetch-Site'] = "same-origin"
|
||||
params['headers']['Sec-Fetch-User'] = "?1"
|
||||
|
||||
# Form of the initial search page does have empty values in the form
|
||||
if params['pageno'] == 1:
|
||||
|
||||
params['data']['b'] = ""
|
||||
|
||||
params['data']['df'] = ''
|
||||
if params['time_range'] in time_range_dict:
|
||||
|
||||
params['data']['df'] = time_range_dict[params['time_range']]
|
||||
params['cookies']['df'] = time_range_dict[params['time_range']]
|
||||
|
||||
if params['pageno'] == 2:
|
||||
|
||||
# second page does have an offset of 20
|
||||
offset = (params['pageno'] - 1) * 20
|
||||
params['data']['s'] = offset
|
||||
params['data']['dc'] = offset + 1
|
||||
|
||||
elif params['pageno'] > 2:
|
||||
|
||||
# third and following pages do have an offset of 20 + n*50
|
||||
offset = 20 + (params['pageno'] - 2) * 50
|
||||
params['data']['s'] = offset
|
||||
params['data']['dc'] = offset + 1
|
||||
|
||||
# initial page does not have additional data in the input form
|
||||
if params['pageno'] > 1:
|
||||
|
||||
# initial page does not have these additional data in the input form
|
||||
params['data']['o'] = form_data.get('o', 'json')
|
||||
params['data']['api'] = form_data.get('api', 'd.js')
|
||||
params['data']['nextParams'] = form_data.get('nextParams', '')
|
||||
params['data']['v'] = form_data.get('v', 'l')
|
||||
params['headers']['Referer'] = 'https://lite.duckduckgo.com/'
|
||||
params['headers']['Referer'] = url
|
||||
|
||||
params['data']['kl'] = eng_region
|
||||
params['cookies']['kl'] = eng_region
|
||||
vqd = get_vqd(query, eng_region, force_request=False)
|
||||
|
||||
params['data']['df'] = ''
|
||||
if params['time_range'] in time_range_dict:
|
||||
params['data']['df'] = time_range_dict[params['time_range']]
|
||||
params['cookies']['df'] = time_range_dict[params['time_range']]
|
||||
# Certain conditions must be met in order to call up one of the
|
||||
# following pages ...
|
||||
|
||||
if vqd:
|
||||
params['data']['vqd'] = vqd # follow up pages / requests needs a vqd argument
|
||||
else:
|
||||
# Don't try to call follow up pages without a vqd value. DDG
|
||||
# recognizes this as a request from a bot. This lowers the
|
||||
# reputation of the SearXNG IP and DDG starts to activate CAPTCHAs.
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
if params['searxng_locale'].startswith("zh"):
|
||||
# Some locales (at least China) do not have a "next page" button and ddg
|
||||
# will return a HTTP/2 403 Forbidden for a request of such a page.
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
logger.debug("param data: %s", params['data'])
|
||||
logger.debug("param cookies: %s", params['cookies'])
|
||||
return params
|
||||
|
||||
|
||||
def is_ddg_captcha(dom):
|
||||
"""In case of CAPTCHA ddg response its own *not a Robot* dialog and is not
|
||||
redirected to a CAPTCHA page."""
|
||||
|
||||
return bool(eval_xpath(dom, "//form[@id='challenge-form']"))
|
||||
|
||||
|
||||
def response(resp):
|
||||
|
|
@ -300,38 +362,40 @@ def response(resp):
|
|||
results = []
|
||||
doc = lxml.html.fromstring(resp.text)
|
||||
|
||||
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
|
||||
if is_ddg_captcha(doc):
|
||||
# set suspend time to zero is OK --> ddg does not block the IP
|
||||
raise SearxEngineCaptchaException(suspended_time=0, message=f"CAPTCHA ({resp.search_params['data'].get('kl')})")
|
||||
|
||||
if len(result_table) == 2:
|
||||
# some locales (at least China) does not have a "next page" button and
|
||||
# the layout of the HTML tables is different.
|
||||
result_table = result_table[1]
|
||||
elif not len(result_table) >= 3:
|
||||
# no more results
|
||||
return []
|
||||
else:
|
||||
result_table = result_table[2]
|
||||
# update form data from response
|
||||
form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
|
||||
if len(form):
|
||||
form = eval_xpath(doc, '//input[@name="vqd"]/..')
|
||||
if len(form):
|
||||
# some locales (at least China) does not have a "next page" button
|
||||
form = form[0]
|
||||
form_vqd = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
|
||||
|
||||
form = form[0]
|
||||
form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
|
||||
form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
|
||||
form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
|
||||
logger.debug('form_data: %s', form_data)
|
||||
cache_vqd(resp.search_params['data']['q'], resp.search_params['data']['kl'], form_vqd)
|
||||
|
||||
tr_rows = eval_xpath(result_table, './/tr')
|
||||
# In the last <tr> is the form of the 'previous/next page' links
|
||||
tr_rows = tr_rows[:-1]
|
||||
# just select "web-result" and ignore results of class "result--ad result--ad--small"
|
||||
for div_result in eval_xpath(doc, '//div[@id="links"]/div[contains(@class, "web-result")]'):
|
||||
|
||||
len_tr_rows = len(tr_rows)
|
||||
offset = 0
|
||||
item = {}
|
||||
title = eval_xpath(div_result, './/h2/a')
|
||||
if not title:
|
||||
# this is the "No results." item in the result list
|
||||
continue
|
||||
item["title"] = extract_text(title)
|
||||
item["url"] = eval_xpath(div_result, './/h2/a/@href')[0]
|
||||
item["content"] = extract_text(eval_xpath(div_result, './/a[contains(@class, "result__snippet")]')[0])
|
||||
|
||||
zero_click_info_xpath = '//html/body/form/div/table[2]/tr[2]/td/text()'
|
||||
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
|
||||
results.append(item)
|
||||
|
||||
if zero_click and "Your IP address is" not in zero_click:
|
||||
zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
|
||||
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip() # type: ignore
|
||||
|
||||
if zero_click and (
|
||||
"Your IP address is" not in zero_click
|
||||
and "Your user agent:" not in zero_click
|
||||
and "URL Decoded:" not in zero_click
|
||||
):
|
||||
current_query = resp.search_params["data"].get("q")
|
||||
|
||||
results.append(
|
||||
|
|
@ -341,33 +405,6 @@ def response(resp):
|
|||
}
|
||||
)
|
||||
|
||||
while len_tr_rows >= offset + 4:
|
||||
|
||||
# assemble table rows we need to scrap
|
||||
tr_title = tr_rows[offset]
|
||||
tr_content = tr_rows[offset + 1]
|
||||
offset += 4
|
||||
|
||||
# ignore sponsored Adds <tr class="result-sponsored">
|
||||
if tr_content.get('class') == 'result-sponsored':
|
||||
continue
|
||||
|
||||
a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
|
||||
if a_tag is None:
|
||||
continue
|
||||
|
||||
td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
|
||||
if td_content is None:
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
'title': a_tag.text_content(),
|
||||
'content': extract_text(td_content),
|
||||
'url': a_tag.get('href'),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
|
@ -375,7 +412,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
"""Fetch languages & regions from DuckDuckGo.
|
||||
|
||||
SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
|
||||
DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
|
||||
DuckDuckGo's language "Browsers preferred language" (``wt_WT``) makes no
|
||||
sense in a SearXNG request since SearXNG's ``all`` will not add a
|
||||
``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``
|
||||
is ``wt-wt`` (the region).
|
||||
|
|
@ -405,7 +442,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from DuckDuckGo is not OK.")
|
||||
|
||||
js_code = extr(resp.text, 'regions:', ',snippetLengths')
|
||||
js_code = extr(resp.text, 'regions:', ',snippetLengths') # type: ignore
|
||||
|
||||
regions = json.loads(js_code)
|
||||
for eng_tag, name in regions.items():
|
||||
|
|
@ -439,7 +476,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
engine_traits.custom['lang_region'] = {}
|
||||
|
||||
js_code = extr(resp.text, 'languages:', ',regions')
|
||||
js_code = extr(resp.text, 'languages:', ',regions') # type: ignore
|
||||
|
||||
languages = js_variable_to_python(js_code)
|
||||
for eng_lang, name in languages.items():
|
||||
|
|
|
|||
|
|
@ -4,15 +4,15 @@ DuckDuckGo Extra (images, videos, news)
|
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import get_embeded_stream_url
|
||||
|
||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
||||
from searx.engines.duckduckgo import (
|
||||
get_ddg_lang,
|
||||
get_vqd,
|
||||
)
|
||||
from searx.engines.duckduckgo import get_ddg_lang, get_vqd
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
|
@ -47,15 +47,16 @@ search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
|
|||
|
||||
|
||||
def request(query, params):
|
||||
eng_region: str = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore
|
||||
|
||||
# request needs a vqd argument
|
||||
vqd = get_vqd(query)
|
||||
vqd = get_vqd(query, eng_region, force_request=True)
|
||||
|
||||
if not vqd:
|
||||
# some search terms do not have results and therefore no vqd value
|
||||
params['url'] = None
|
||||
return params
|
||||
|
||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
||||
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
||||
|
||||
args = {
|
||||
|
|
@ -85,6 +86,12 @@ def request(query, params):
|
|||
|
||||
params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
|
||||
|
||||
# sending these two headers prevents rate limiting for the query
|
||||
params['headers'] = {
|
||||
'Referer': 'https://duckduckgo.com/',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
|
@ -108,7 +115,7 @@ def _video_result(result):
|
|||
'title': result['title'],
|
||||
'content': result['description'],
|
||||
'thumbnail': result['images'].get('small') or result['images'].get('medium'),
|
||||
'iframe_src': result['embed_url'],
|
||||
'iframe_src': get_embeded_stream_url(result['content']),
|
||||
'source': result['provider'],
|
||||
'length': result['duration'],
|
||||
'metadata': result.get('uploader'),
|
||||
|
|
|
|||
|
|
@ -35,8 +35,8 @@ def response(resp):
|
|||
results = []
|
||||
|
||||
for item in search_res:
|
||||
img = 'https://findthatmeme.us-southeast-1.linodeobjects.com/' + item['image_path']
|
||||
thumb = 'https://findthatmeme.us-southeast-1.linodeobjects.com/thumb/' + item.get('thumbnail', '')
|
||||
img = 'https://s3.thehackerblog.com/findthatmeme/' + item['image_path']
|
||||
thumb = 'https://s3.thehackerblog.com/findthatmeme/thumb/' + item.get('thumbnail', '')
|
||||
date = datetime.strptime(item["updated_at"].split("T")[0], "%Y-%m-%d")
|
||||
formatted_date = datetime.utcfromtimestamp(date.timestamp())
|
||||
|
||||
|
|
|
|||
97
searx/engines/geizhals.py
Normal file
97
searx/engines/geizhals.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Geizhals is a German website to compare the price of a product on the
|
||||
most common German shopping sites and find the lowest price.
|
||||
|
||||
The sorting of the search results can be influenced by the following additions
|
||||
to the search term:
|
||||
|
||||
``asc`` or ``price``
|
||||
To sort by price in ascending order.
|
||||
|
||||
``desc``
|
||||
To sort by price in descending order.
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import eval_xpath, eval_xpath_list, extract_text
|
||||
|
||||
about = {
|
||||
'website': 'https://geizhals.de',
|
||||
'wikidata_id': 'Q15977657',
|
||||
'use_official_api': False,
|
||||
'official_api_documentation': None,
|
||||
'require_api_key': False,
|
||||
'results': 'HTML',
|
||||
'language': 'de',
|
||||
}
|
||||
paging = True
|
||||
categories = ['shopping']
|
||||
|
||||
base_url = "https://geizhals.de"
|
||||
sort_order = 'relevance'
|
||||
|
||||
SORT_RE = re.compile(r"sort:(\w+)")
|
||||
sort_order_map = {
|
||||
'relevance': None,
|
||||
'price': 'p',
|
||||
'asc': 'p',
|
||||
'desc': '-p',
|
||||
}
|
||||
|
||||
|
||||
def request(query, params):
|
||||
sort = None
|
||||
|
||||
sort_order_path = SORT_RE.search(query)
|
||||
if sort_order_path:
|
||||
sort = sort_order_map.get(sort_order_path.group(1))
|
||||
query = SORT_RE.sub("", query)
|
||||
logger.debug(query)
|
||||
|
||||
args = {
|
||||
'fs': query,
|
||||
'pg': params['pageno'],
|
||||
'toggle_all': 1, # load item specs
|
||||
'sort': sort,
|
||||
}
|
||||
params['url'] = f"{base_url}/?{urlencode(args)}"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
for result in eval_xpath_list(dom, "//article[contains(@class, 'listview__item')]"):
|
||||
content = []
|
||||
for spec in eval_xpath_list(result, ".//div[contains(@class, 'specs-grid__item')]"):
|
||||
content.append(f"{extract_text(eval_xpath(spec, './dt'))}: {extract_text(eval_xpath(spec, './dd'))}")
|
||||
|
||||
metadata = [
|
||||
extract_text(eval_xpath(result, ".//div[contains(@class, 'stars-rating-label')]")),
|
||||
extract_text(eval_xpath(result, ".//div[contains(@class, 'listview__offercount')]")),
|
||||
]
|
||||
|
||||
item = {
|
||||
'template': 'products.html',
|
||||
'url': (
|
||||
base_url + "/" + extract_text(eval_xpath(result, ".//a[contains(@class, 'listview__name-link')]/@href"))
|
||||
),
|
||||
'title': extract_text(eval_xpath(result, ".//h3[contains(@class, 'listview__name')]")),
|
||||
'content': ' | '.join(content),
|
||||
'thumbnail': extract_text(eval_xpath(result, ".//img[contains(@class, 'listview__image')]/@src")),
|
||||
'metadata': ', '.join(item for item in metadata if item),
|
||||
}
|
||||
|
||||
best_price = extract_text(eval_xpath(result, ".//a[contains(@class, 'listview__price-link')]")).split(" ")
|
||||
if len(best_price) > 1:
|
||||
item["price"] = f"Bestes Angebot: {best_price[1]}€"
|
||||
results.append(item)
|
||||
|
||||
return results
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
Gentoo Wiki
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, urljoin
|
||||
from lxml import html
|
||||
from searx.utils import extract_text
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://wiki.gentoo.org/',
|
||||
"wikidata_id": 'Q1050637',
|
||||
"official_api_documentation": 'https://wiki.gentoo.org/api.php',
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it', 'software wikis']
|
||||
paging = True
|
||||
base_url = 'https://wiki.gentoo.org'
|
||||
|
||||
# xpath queries
|
||||
xpath_results = '//ul[@class="mw-search-results"]/li'
|
||||
xpath_link = './/div[@class="mw-search-result-heading"]/a'
|
||||
xpath_content = './/div[@class="searchresult"]'
|
||||
|
||||
|
||||
# cut 'en' from 'en-US', 'de' from 'de-CH', and so on
|
||||
def locale_to_lang_code(locale):
|
||||
if locale.find('-') >= 0:
|
||||
locale = locale.split('-')[0]
|
||||
return locale
|
||||
|
||||
|
||||
# wikis for some languages were moved off from the main site, we need to make
|
||||
# requests to correct URLs to be able to get results in those languages
|
||||
lang_urls = {
|
||||
'en': {'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}'},
|
||||
'others': {
|
||||
'base': 'https://wiki.gentoo.org',
|
||||
'search': '/index.php?title=Special:Search&offset={offset}&{query}\
|
||||
&profile=translation&languagefilter={language}',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# get base & search URLs for selected language
|
||||
def get_lang_urls(language):
|
||||
if language != 'en':
|
||||
return lang_urls['others']
|
||||
return lang_urls['en']
|
||||
|
||||
|
||||
# Language names to build search requests for
|
||||
# those languages which are hosted on the main site.
|
||||
main_langs = {
|
||||
'ar': 'العربية',
|
||||
'bg': 'Български',
|
||||
'cs': 'Česky',
|
||||
'da': 'Dansk',
|
||||
'el': 'Ελληνικά',
|
||||
'es': 'Español',
|
||||
'he': 'עברית',
|
||||
'hr': 'Hrvatski',
|
||||
'hu': 'Magyar',
|
||||
'it': 'Italiano',
|
||||
'ko': '한국어',
|
||||
'lt': 'Lietuviškai',
|
||||
'nl': 'Nederlands',
|
||||
'pl': 'Polski',
|
||||
'pt': 'Português',
|
||||
'ru': 'Русский',
|
||||
'sl': 'Slovenský',
|
||||
'th': 'ไทย',
|
||||
'uk': 'Українська',
|
||||
'zh': '简体中文',
|
||||
}
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
# translate the locale (e.g. 'en-US') to language code ('en')
|
||||
language = locale_to_lang_code(params['language'])
|
||||
|
||||
# if our language is hosted on the main site, we need to add its name
|
||||
# to the query in order to narrow the results to that language
|
||||
if language in main_langs:
|
||||
query += ' (' + main_langs[language] + ')'
|
||||
|
||||
# prepare the request parameters
|
||||
query = urlencode({'search': query})
|
||||
offset = (params['pageno'] - 1) * 20
|
||||
|
||||
# get request URLs for our language of choice
|
||||
urls = get_lang_urls(language)
|
||||
search_url = urls['base'] + urls['search']
|
||||
|
||||
params['url'] = search_url.format(query=query, offset=offset, language=language)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
# get the base URL for the language in which request was made
|
||||
language = locale_to_lang_code(resp.search_params['language'])
|
||||
url = get_lang_urls(language)['base']
|
||||
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath(xpath_results):
|
||||
link = result.xpath(xpath_link)[0]
|
||||
href = urljoin(url, link.attrib.get('href'))
|
||||
title = extract_text(link)
|
||||
content = extract_text(result.xpath(xpath_content))
|
||||
|
||||
results.append({'url': href, 'title': title, 'content': content})
|
||||
|
||||
return results
|
||||
|
|
@ -1,7 +1,8 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Engine to search in collaborative software platforms based on Gitea_.
|
||||
"""Engine to search in collaborative software platforms based on Gitea_ or Forgejo_.
|
||||
|
||||
.. _Gitea: https://about.gitea.com/
|
||||
.. _Forgejo: https://forgejo.org/
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
|
@ -23,6 +24,11 @@ Optional settings are:
|
|||
base_url: https://gitea.com
|
||||
shortcut: gitea
|
||||
|
||||
- name: forgejo.com
|
||||
engine: gitea
|
||||
base_url: https://code.forgejo.org
|
||||
shortcut: forgejo
|
||||
|
||||
If you would like to use additional instances, just configure new engines in the
|
||||
:ref:`settings <settings engine>` and set the ``base_url``.
|
||||
|
||||
|
|
@ -95,13 +101,14 @@ def response(resp):
|
|||
'url': item.get('html_url'),
|
||||
'title': item.get('full_name'),
|
||||
'content': ' / '.join(content),
|
||||
'img_src': item.get('owner', {}).get('avatar_url'),
|
||||
# Use Repository Avatar and fall back to Owner Avatar if not set.
|
||||
'thumbnail': item.get('avatar_url') or item.get('owner', {}).get('avatar_url'),
|
||||
'package_name': item.get('name'),
|
||||
'maintainer': item.get('owner', {}).get('login'),
|
||||
'maintainer': item.get('owner', {}).get('username'),
|
||||
'publishedDate': parser.parse(item.get("updated_at") or item.get("created_at")),
|
||||
'tags': item.get('topics', []),
|
||||
'popularity': item.get('stargazers_count'),
|
||||
'homepage': item.get('homepage'),
|
||||
'popularity': item.get('stars_count'),
|
||||
'homepage': item.get('website'),
|
||||
'source_code_url': item.get('clone_url'),
|
||||
}
|
||||
)
|
||||
|
|
|
|||
95
searx/engines/gitlab.py
Normal file
95
searx/engines/gitlab.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Engine to search in collaborative software platforms based on GitLab_ with
|
||||
the `GitLab REST API`_.
|
||||
|
||||
.. _GitLab: https://about.gitlab.com/install/
|
||||
.. _GitLab REST API: https://docs.gitlab.com/ee/api/
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
||||
The engine has the following mandatory setting:
|
||||
|
||||
- :py:obj:`base_url`
|
||||
|
||||
Optional settings are:
|
||||
|
||||
- :py:obj:`api_path`
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: gitlab
|
||||
engine: gitlab
|
||||
base_url: https://gitlab.com
|
||||
shortcut: gl
|
||||
about:
|
||||
website: https://gitlab.com/
|
||||
wikidata_id: Q16639197
|
||||
|
||||
- name: gnome
|
||||
engine: gitlab
|
||||
base_url: https://gitlab.gnome.org
|
||||
shortcut: gn
|
||||
about:
|
||||
website: https://gitlab.gnome.org
|
||||
wikidata_id: Q44316
|
||||
|
||||
Implementations
|
||||
===============
|
||||
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from dateutil import parser
|
||||
|
||||
about = {
|
||||
"website": None,
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": "https://docs.gitlab.com/ee/api/",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
categories = ['it', 'repos']
|
||||
paging = True
|
||||
|
||||
base_url: str = ""
|
||||
"""Base URL of the GitLab host."""
|
||||
|
||||
api_path: str = 'api/v4/projects'
|
||||
"""The path the `project API <https://docs.gitlab.com/ee/api/projects.html>`_.
|
||||
|
||||
The default path should work fine usually.
|
||||
"""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
args = {'search': query, 'page': params['pageno']}
|
||||
params['url'] = f"{base_url}/{api_path}?{urlencode(args)}"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
for item in resp.json():
|
||||
results.append(
|
||||
{
|
||||
'template': 'packages.html',
|
||||
'url': item.get('web_url'),
|
||||
'title': item.get('name'),
|
||||
'content': item.get('description'),
|
||||
'thumbnail': item.get('avatar_url'),
|
||||
'package_name': item.get('name'),
|
||||
'maintainer': item.get('namespace', {}).get('name'),
|
||||
'publishedDate': parser.parse(item.get('last_activity_at') or item.get("created_at")),
|
||||
'tags': item.get('tag_list', []),
|
||||
'popularity': item.get('star_count'),
|
||||
'homepage': item.get('readme_url'),
|
||||
'source_code_url': item.get('http_url_to_repo'),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -59,11 +59,6 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
|
|||
# specific xpath variables
|
||||
# ------------------------
|
||||
|
||||
results_xpath = './/div[contains(@jscontroller, "SC7lYd")]'
|
||||
title_xpath = './/a/h3[1]'
|
||||
href_xpath = './/a[h3]/@href'
|
||||
content_xpath = './/div[@data-sncf="1"]'
|
||||
|
||||
# Suggestions are links placed in a *card-section*, we extract only the text
|
||||
# from the links not the links itself.
|
||||
suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
|
||||
|
|
@ -334,31 +329,38 @@ def response(resp):
|
|||
# results --> answer
|
||||
answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
|
||||
for item in answer_list:
|
||||
for bubble in eval_xpath(item, './/div[@class="nnFGuf"]'):
|
||||
bubble.drop_tree()
|
||||
results.append(
|
||||
{
|
||||
'answer': item.xpath("normalize-space()"),
|
||||
'answer': extract_text(item),
|
||||
'url': (eval_xpath(item, '../..//a/@href') + [None])[0],
|
||||
}
|
||||
)
|
||||
|
||||
# parse results
|
||||
|
||||
for result in eval_xpath_list(dom, results_xpath): # pylint: disable=too-many-nested-blocks
|
||||
for result in eval_xpath_list(dom, './/div[contains(@jscontroller, "SC7lYd")]'):
|
||||
# pylint: disable=too-many-nested-blocks
|
||||
|
||||
try:
|
||||
title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
|
||||
title_tag = eval_xpath_getindex(result, './/a/h3[1]', 0, default=None)
|
||||
if title_tag is None:
|
||||
# this not one of the common google results *section*
|
||||
logger.debug('ignoring item from the result_xpath list: missing title')
|
||||
continue
|
||||
title = extract_text(title_tag)
|
||||
|
||||
url = eval_xpath_getindex(result, href_xpath, 0, None)
|
||||
url = eval_xpath_getindex(result, './/a[h3]/@href', 0, None)
|
||||
if url is None:
|
||||
logger.debug('ignoring item from the result_xpath list: missing url of title "%s"', title)
|
||||
continue
|
||||
|
||||
content_nodes = eval_xpath(result, content_xpath)
|
||||
content_nodes = eval_xpath(result, './/div[contains(@data-sncf, "1")]')
|
||||
for item in content_nodes:
|
||||
for script in item.xpath(".//script"):
|
||||
script.getparent().remove(script)
|
||||
|
||||
content = extract_text(content_nodes)
|
||||
|
||||
if not content:
|
||||
|
|
@ -439,7 +441,7 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
|
|||
try:
|
||||
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
|
||||
except babel.UnknownLocaleError:
|
||||
print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
|
||||
print("INFO: google UI language %s (%s) is unknown by babel" % (eng_lang, x.text.split("(")[0].strip()))
|
||||
continue
|
||||
sxng_lang = language_tag(locale)
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ from searx.engines.google import (
|
|||
detect_google_sorry,
|
||||
)
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.utils import get_embeded_stream_url
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
|
@ -125,6 +126,7 @@ def response(resp):
|
|||
'content': content,
|
||||
'author': pub_info,
|
||||
'thumbnail': thumbnail,
|
||||
'iframe_src': get_embeded_stream_url(url),
|
||||
'template': 'videos.html',
|
||||
}
|
||||
)
|
||||
|
|
|
|||
|
|
@ -57,7 +57,11 @@ def request(query, params):
|
|||
|
||||
if params['time_range']:
|
||||
search_type = 'search_by_date'
|
||||
timestamp = (datetime.now() - relativedelta(**{f"{params['time_range']}s": 1})).timestamp()
|
||||
timestamp = (
|
||||
# pylint: disable=unexpected-keyword-arg
|
||||
datetime.now()
|
||||
- relativedelta(**{f"{params['time_range']}s": 1}) # type: ignore
|
||||
).timestamp()
|
||||
query_params["numericFilters"] = f"created_at_i>{timestamp}"
|
||||
|
||||
params["url"] = f"{base_url}/{search_type}?{urlencode(query_params)}"
|
||||
|
|
|
|||
|
|
@ -1,71 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Internet Archive scholar(science)
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text
|
||||
|
||||
about = {
|
||||
"website": "https://scholar.archive.org/",
|
||||
"wikidata_id": "Q115667709",
|
||||
"official_api_documentation": "https://scholar.archive.org/api/redoc",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
|
||||
base_url = "https://scholar.archive.org"
|
||||
results_per_page = 15
|
||||
|
||||
|
||||
def request(query, params):
|
||||
args = {
|
||||
"q": query,
|
||||
"limit": results_per_page,
|
||||
"offset": (params["pageno"] - 1) * results_per_page,
|
||||
}
|
||||
params["url"] = f"{base_url}/search?{urlencode(args)}"
|
||||
params["headers"]["Accept"] = "application/json"
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json = resp.json()
|
||||
|
||||
for result in json["results"]:
|
||||
publishedDate, content, doi = None, '', None
|
||||
|
||||
if result['biblio'].get('release_date'):
|
||||
publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d")
|
||||
|
||||
if len(result['abstracts']) > 0:
|
||||
content = result['abstracts'][0].get('body')
|
||||
elif len(result['_highlights']) > 0:
|
||||
content = result['_highlights'][0]
|
||||
|
||||
if len(result['releases']) > 0:
|
||||
doi = result['releases'][0].get('doi')
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': result['fulltext']['access_url'],
|
||||
'title': result['biblio'].get('title') or result['biblio'].get('container_name'),
|
||||
'content': html_to_text(content),
|
||||
'publisher': result['biblio'].get('publisher'),
|
||||
'doi': doi,
|
||||
'journal': result['biblio'].get('container_name'),
|
||||
'authors': result['biblio'].get('contrib_names'),
|
||||
'tags': result['tags'],
|
||||
'publishedDate': publishedDate,
|
||||
'issns': result['biblio'].get('issns'),
|
||||
'pdf_url': result['fulltext'].get('access_url'),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -7,6 +7,8 @@ import random
|
|||
from urllib.parse import quote_plus, urlparse
|
||||
from dateutil import parser
|
||||
|
||||
from searx.utils import humanize_number
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://api.invidious.io/',
|
||||
|
|
@ -91,7 +93,8 @@ def response(resp):
|
|||
"url": url,
|
||||
"title": result.get("title", ""),
|
||||
"content": result.get("description", ""),
|
||||
'length': length,
|
||||
"length": length,
|
||||
"views": humanize_number(result['viewCount']),
|
||||
"template": "videos.html",
|
||||
"author": result.get("author"),
|
||||
"publishedDate": publishedDate,
|
||||
|
|
|
|||
|
|
@ -16,23 +16,17 @@ from json import loads
|
|||
from urllib.parse import urlencode
|
||||
from searx.utils import to_string, html_to_text
|
||||
|
||||
|
||||
# parameters for generating a request
|
||||
search_url = None
|
||||
url_query = None
|
||||
url_prefix = ""
|
||||
content_query = None
|
||||
title_query = None
|
||||
content_html_to_text = False
|
||||
title_html_to_text = False
|
||||
paging = False
|
||||
suggestion_query = ''
|
||||
results_query = ''
|
||||
method = 'GET'
|
||||
request_body = ''
|
||||
|
||||
cookies = {}
|
||||
headers = {}
|
||||
'''Some engines might offer different result based on cookies or headers.
|
||||
Possible use-case: To set safesearch cookie or header to moderate.'''
|
||||
|
||||
paging = False
|
||||
# parameters for engines with paging support
|
||||
#
|
||||
# number of results on each page
|
||||
|
|
@ -41,6 +35,16 @@ page_size = 1
|
|||
# number of the first page (usually 0 or 1)
|
||||
first_page_num = 1
|
||||
|
||||
# parameters for parsing the response
|
||||
results_query = ''
|
||||
url_query = None
|
||||
url_prefix = ""
|
||||
title_query = None
|
||||
content_query = None
|
||||
suggestion_query = ''
|
||||
title_html_to_text = False
|
||||
content_html_to_text = False
|
||||
|
||||
|
||||
def iterate(iterable):
|
||||
if isinstance(iterable, dict):
|
||||
|
|
@ -98,9 +102,8 @@ def query(data, query_string):
|
|||
|
||||
|
||||
def request(query, params): # pylint: disable=redefined-outer-name
|
||||
query = urlencode({'q': query})[2:]
|
||||
fp = {'query': urlencode({'q': query})[2:]} # pylint: disable=invalid-name
|
||||
|
||||
fp = {'query': query} # pylint: disable=invalid-name
|
||||
if paging and search_url.find('{pageno}') >= 0:
|
||||
fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num
|
||||
|
||||
|
|
@ -108,7 +111,12 @@ def request(query, params): # pylint: disable=redefined-outer-name
|
|||
params['headers'].update(headers)
|
||||
|
||||
params['url'] = search_url.format(**fp)
|
||||
params['query'] = query
|
||||
params['method'] = method
|
||||
|
||||
if request_body:
|
||||
# don't url-encode the query if it's in the request body
|
||||
fp['query'] = query
|
||||
params['data'] = request_body.format(**fp)
|
||||
|
||||
return params
|
||||
|
||||
|
|
@ -146,7 +154,11 @@ def response(resp):
|
|||
}
|
||||
)
|
||||
else:
|
||||
for url, title, content in zip(query(json, url_query), query(json, title_query), query(json, content_query)):
|
||||
for result in json:
|
||||
url = query(result, url_query)[0]
|
||||
title = query(result, title_query)[0]
|
||||
content = query(result, content_query)[0]
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': url_prefix + to_string(url),
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ def request(_query, params):
|
|||
|
||||
params['method'] = 'POST'
|
||||
params['headers'] = {'Content-Type': 'application/json'}
|
||||
params['req_url'] = request_url
|
||||
|
||||
return params
|
||||
|
||||
|
|
@ -40,7 +41,13 @@ def response(resp):
|
|||
|
||||
json_resp = resp.json()
|
||||
text = json_resp.get('translatedText')
|
||||
|
||||
from_lang = resp.search_params["from_lang"][1]
|
||||
to_lang = resp.search_params["to_lang"][1]
|
||||
query = resp.search_params["query"]
|
||||
req_url = resp.search_params["req_url"]
|
||||
|
||||
if text:
|
||||
results.append({'answer': text})
|
||||
results.append({"answer": text, "url": f"{req_url}/?source={from_lang}&target={to_lang}&q={query}"})
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ categories = ['images']
|
|||
paging = True
|
||||
|
||||
endpoint = 'photos'
|
||||
base_url = 'https://loc.gov'
|
||||
base_url = 'https://www.loc.gov'
|
||||
search_string = "/{endpoint}/?sp={page}&{query}&fo=json"
|
||||
|
||||
|
||||
|
|
@ -63,8 +63,8 @@ def response(resp):
|
|||
if not url:
|
||||
continue
|
||||
|
||||
img_src = result['item'].get('service_medium')
|
||||
if not img_src or img_src == 'https://memory.loc.gov/pp/grp.gif':
|
||||
img_list = result.get('image_url')
|
||||
if not img_list:
|
||||
continue
|
||||
|
||||
title = result['title']
|
||||
|
|
@ -88,8 +88,8 @@ def response(resp):
|
|||
'url': url,
|
||||
'title': title,
|
||||
'content': ' / '.join([i for i in content_items if i]),
|
||||
'img_src': img_src,
|
||||
'thumbnail_src': result['item'].get('thumb_gallery'),
|
||||
'img_src': img_list[-1],
|
||||
'thumbnail_src': img_list[0],
|
||||
'author': author,
|
||||
}
|
||||
)
|
||||
|
|
|
|||
95
searx/engines/mariadb_server.py
Normal file
95
searx/engines/mariadb_server.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""MariaDB is a community driven fork of MySQL. Before enabling MariaDB engine,
|
||||
you must the install the pip package ``mariadb`` along with the necessary
|
||||
prerequities.
|
||||
|
||||
`See the following documentation for more details
|
||||
<https://mariadb.com/docs/server/connect/programming-languages/c/install/>`_
|
||||
|
||||
Example
|
||||
=======
|
||||
|
||||
This is an example configuration for querying a MariaDB server:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: my_database
|
||||
engine: mariadb_server
|
||||
database: my_database
|
||||
username: searxng
|
||||
password: password
|
||||
limit: 5
|
||||
query_str: 'SELECT * from my_table WHERE my_column=%(query)s'
|
||||
|
||||
Implementations
|
||||
===============
|
||||
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
try:
|
||||
import mariadb
|
||||
except ImportError:
|
||||
# import error is ignored because the admin has to install mysql manually to use
|
||||
# the engine
|
||||
pass
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
engine_type = 'offline'
|
||||
|
||||
host = "127.0.0.1"
|
||||
"""Hostname of the DB connector"""
|
||||
|
||||
port = 3306
|
||||
"""Port of the DB connector"""
|
||||
|
||||
database = ""
|
||||
"""Name of the database."""
|
||||
|
||||
username = ""
|
||||
"""Username for the DB connection."""
|
||||
|
||||
password = ""
|
||||
"""Password for the DB connection."""
|
||||
|
||||
query_str = ""
|
||||
"""SQL query that returns the result items."""
|
||||
|
||||
limit = 10
|
||||
paging = True
|
||||
result_template = 'key-value.html'
|
||||
_connection = None
|
||||
|
||||
|
||||
def init(engine_settings):
|
||||
global _connection # pylint: disable=global-statement
|
||||
|
||||
if 'query_str' not in engine_settings:
|
||||
raise ValueError('query_str cannot be empty')
|
||||
|
||||
if not engine_settings['query_str'].lower().startswith('select '):
|
||||
raise ValueError('only SELECT query is supported')
|
||||
|
||||
_connection = mariadb.connect(database=database, user=username, password=password, host=host, port=port)
|
||||
|
||||
|
||||
def search(query, params):
|
||||
query_params = {'query': query}
|
||||
query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit)
|
||||
logger.debug("SQL Query: %s", query_to_run)
|
||||
|
||||
with _connection.cursor() as cur:
|
||||
cur.execute(query_to_run, query_params)
|
||||
results = []
|
||||
col_names = [i[0] for i in cur.description]
|
||||
for res in cur:
|
||||
result = dict(zip(col_names, map(str, res)))
|
||||
result['template'] = result_template
|
||||
results.append(result)
|
||||
return results
|
||||
|
|
@ -100,6 +100,12 @@ base_url: str = 'https://{language}.wikipedia.org/'
|
|||
ISO 639-1 language code (en, de, fr ..) of the search language.
|
||||
"""
|
||||
|
||||
api_path: str = 'w/api.php'
|
||||
"""The path the PHP api is listening on.
|
||||
|
||||
The default path should work fine usually.
|
||||
"""
|
||||
|
||||
timestamp_format = '%Y-%m-%dT%H:%M:%SZ'
|
||||
"""The longhand version of MediaWiki time strings."""
|
||||
|
||||
|
|
@ -113,12 +119,7 @@ def request(query, params):
|
|||
else:
|
||||
params['language'] = params['language'].split('-')[0]
|
||||
|
||||
if base_url.endswith('/'):
|
||||
api_url = base_url + 'w/api.php?'
|
||||
else:
|
||||
api_url = base_url + '/w/api.php?'
|
||||
api_url = api_url.format(language=params['language'])
|
||||
|
||||
api_url = f"{base_url.rstrip('/')}/{api_path}?".format(language=params['language'])
|
||||
offset = (params['pageno'] - 1) * number_of_results
|
||||
|
||||
args = {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,15 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Mojeek (general, images, news)"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from searx.utils import eval_xpath, eval_xpath_list, extract_text
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
about = {
|
||||
'website': 'https://mojeek.com',
|
||||
|
|
@ -42,6 +45,18 @@ news_url_xpath = './/h2/a/@href'
|
|||
news_title_xpath = './/h2/a'
|
||||
news_content_xpath = './/p[@class="s"]'
|
||||
|
||||
language_param = 'lb'
|
||||
region_param = 'arc'
|
||||
|
||||
_delta_kwargs = {'day': 'days', 'week': 'weeks', 'month': 'months', 'year': 'years'}
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
traits: EngineTraits
|
||||
|
||||
|
||||
def init(_):
|
||||
if search_type not in ('', 'images', 'news'):
|
||||
|
|
@ -53,13 +68,16 @@ def request(query, params):
|
|||
'q': query,
|
||||
'safe': min(params['safesearch'], 1),
|
||||
'fmt': search_type,
|
||||
language_param: traits.get_language(params['searxng_locale'], traits.custom['language_all']),
|
||||
region_param: traits.get_region(params['searxng_locale'], traits.custom['region_all']),
|
||||
}
|
||||
|
||||
if search_type == '':
|
||||
args['s'] = 10 * (params['pageno'] - 1)
|
||||
|
||||
if params['time_range'] and search_type != 'images':
|
||||
args["since"] = (datetime.now() - relativedelta(**{f"{params['time_range']}s": 1})).strftime("%Y%m%d")
|
||||
kwargs = {_delta_kwargs[params['time_range']]: 1}
|
||||
args["since"] = (datetime.now() - relativedelta(**kwargs)).strftime("%Y%m%d") # type: ignore
|
||||
logger.debug(args["since"])
|
||||
|
||||
params['url'] = f"{base_url}/search?{urlencode(args)}"
|
||||
|
|
@ -94,7 +112,7 @@ def _image_results(dom):
|
|||
'template': 'images.html',
|
||||
'url': extract_text(eval_xpath(result, image_url_xpath)),
|
||||
'title': extract_text(eval_xpath(result, image_title_xpath)),
|
||||
'img_src': base_url + extract_text(eval_xpath(result, image_img_src_xpath)),
|
||||
'img_src': base_url + extract_text(eval_xpath(result, image_img_src_xpath)), # type: ignore
|
||||
'content': '',
|
||||
}
|
||||
)
|
||||
|
|
@ -130,3 +148,31 @@ def response(resp):
|
|||
return _news_results(dom)
|
||||
|
||||
raise ValueError(f"Invalid search type {search_type}")
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx import network
|
||||
from searx.locales import get_official_locales, region_tag
|
||||
from babel import Locale, UnknownLocaleError
|
||||
import contextlib
|
||||
|
||||
resp = network.get(base_url + "/preferences", headers={'Accept-Language': 'en-US,en;q=0.5'})
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
languages = eval_xpath_list(dom, f'//select[@name="{language_param}"]/option/@value')
|
||||
|
||||
engine_traits.custom['language_all'] = languages[0]
|
||||
|
||||
for code in languages[1:]:
|
||||
with contextlib.suppress(UnknownLocaleError):
|
||||
locale = Locale(code)
|
||||
engine_traits.languages[locale.language] = code
|
||||
|
||||
regions = eval_xpath_list(dom, f'//select[@name="{region_param}"]/option/@value')
|
||||
|
||||
engine_traits.custom['region_all'] = regions[1]
|
||||
|
||||
for code in regions[2:]:
|
||||
for locale in get_official_locales(code, engine_traits.languages):
|
||||
engine_traits.regions[region_tag(locale)] = code
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ Otherwise, follow instructions provided by Mullvad for enabling the VPN on Linux
|
|||
update of SearXNG!
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from httpx import Response
|
||||
from lxml import html
|
||||
|
|
@ -37,6 +39,8 @@ traits: EngineTraits
|
|||
|
||||
use_cache: bool = True # non-cache use only has 100 searches per day!
|
||||
|
||||
leta_engine: str = 'google'
|
||||
|
||||
search_url = "https://leta.mullvad.net"
|
||||
|
||||
# about
|
||||
|
|
@ -61,6 +65,11 @@ time_range_dict = {
|
|||
"year": "y1",
|
||||
}
|
||||
|
||||
available_leta_engines = [
|
||||
'google', # first will be default if provided engine is invalid
|
||||
'brave',
|
||||
]
|
||||
|
||||
|
||||
def is_vpn_connected(dom: html.HtmlElement) -> bool:
|
||||
"""Returns true if the VPN is connected, False otherwise"""
|
||||
|
|
@ -80,11 +89,22 @@ def assign_headers(headers: dict) -> dict:
|
|||
def request(query: str, params: dict):
|
||||
country = traits.get_region(params.get('searxng_locale', 'all'), traits.all_locale) # type: ignore
|
||||
|
||||
result_engine = leta_engine
|
||||
if leta_engine not in available_leta_engines:
|
||||
result_engine = available_leta_engines[0]
|
||||
logger.warning(
|
||||
'Configured engine "%s" not one of the available engines %s, defaulting to "%s"',
|
||||
leta_engine,
|
||||
available_leta_engines,
|
||||
result_engine,
|
||||
)
|
||||
|
||||
params['url'] = search_url
|
||||
params['method'] = 'POST'
|
||||
params['data'] = {
|
||||
"q": query,
|
||||
"gl": country if country is str else '',
|
||||
'engine': result_engine,
|
||||
}
|
||||
# pylint: disable=undefined-variable
|
||||
if use_cache:
|
||||
|
|
@ -107,8 +127,15 @@ def request(query: str, params: dict):
|
|||
return params
|
||||
|
||||
|
||||
def extract_result(dom_result: html.HtmlElement):
|
||||
[a_elem, h3_elem, p_elem] = eval_xpath_list(dom_result, 'div/div/*')
|
||||
def extract_result(dom_result: list[html.HtmlElement]):
|
||||
# Infoboxes sometimes appear in the beginning and will have a length of 0
|
||||
if len(dom_result) == 3:
|
||||
[a_elem, h3_elem, p_elem] = dom_result
|
||||
elif len(dom_result) == 4:
|
||||
[_, a_elem, h3_elem, p_elem] = dom_result
|
||||
else:
|
||||
return None
|
||||
|
||||
return {
|
||||
'url': extract_text(a_elem.text),
|
||||
'title': extract_text(h3_elem),
|
||||
|
|
@ -116,6 +143,14 @@ def extract_result(dom_result: html.HtmlElement):
|
|||
}
|
||||
|
||||
|
||||
def extract_results(search_results: html.HtmlElement):
|
||||
for search_result in search_results:
|
||||
dom_result = eval_xpath_list(search_result, 'div/div/*')
|
||||
result = extract_result(dom_result)
|
||||
if result is not None:
|
||||
yield result
|
||||
|
||||
|
||||
def response(resp: Response):
|
||||
"""Checks if connected to Mullvad VPN, then extracts the search results from
|
||||
the DOM resp: requests response object"""
|
||||
|
|
@ -124,7 +159,7 @@ def response(resp: Response):
|
|||
if not is_vpn_connected(dom):
|
||||
raise SearxEngineResponseException('Not connected to Mullvad VPN')
|
||||
search_results = eval_xpath(dom.body, '//main/div[2]/div')
|
||||
return [extract_result(sr) for sr in search_results]
|
||||
return list(extract_results(search_results))
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
|
|
|
|||
|
|
@ -34,12 +34,25 @@ except ImportError:
|
|||
|
||||
engine_type = 'offline'
|
||||
auth_plugin = 'caching_sha2_password'
|
||||
|
||||
host = "127.0.0.1"
|
||||
"""Hostname of the DB connector"""
|
||||
|
||||
port = 3306
|
||||
"""Port of the DB connector"""
|
||||
|
||||
database = ""
|
||||
"""Name of the database."""
|
||||
|
||||
username = ""
|
||||
"""Username for the DB connection."""
|
||||
|
||||
password = ""
|
||||
"""Password for the DB connection."""
|
||||
|
||||
query_str = ""
|
||||
"""SQL query that returns the result items."""
|
||||
|
||||
limit = 10
|
||||
paging = True
|
||||
result_template = 'key-value.html'
|
||||
|
|
|
|||
71
searx/engines/openlibrary.py
Normal file
71
searx/engines/openlibrary.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Open library (books)
|
||||
"""
|
||||
from urllib.parse import urlencode
|
||||
import re
|
||||
|
||||
from dateutil import parser
|
||||
|
||||
about = {
|
||||
'website': 'https://openlibrary.org',
|
||||
'wikidata_id': 'Q1201876',
|
||||
'require_api_key': False,
|
||||
'use_official_api': False,
|
||||
'official_api_documentation': 'https://openlibrary.org/developers/api',
|
||||
}
|
||||
|
||||
paging = True
|
||||
categories = []
|
||||
|
||||
base_url = "https://openlibrary.org"
|
||||
results_per_page = 10
|
||||
|
||||
|
||||
def request(query, params):
|
||||
args = {
|
||||
'q': query,
|
||||
'page': params['pageno'],
|
||||
'limit': results_per_page,
|
||||
}
|
||||
params['url'] = f"{base_url}/search.json?{urlencode(args)}"
|
||||
return params
|
||||
|
||||
|
||||
def _parse_date(date):
|
||||
try:
|
||||
return parser.parse(date)
|
||||
except parser.ParserError:
|
||||
return None
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
for item in resp.json().get("docs", []):
|
||||
cover = None
|
||||
if 'lending_identifier_s' in item:
|
||||
cover = f"https://archive.org/services/img/{item['lending_identifier_s']}"
|
||||
|
||||
published = item.get('publish_date')
|
||||
if published:
|
||||
published_dates = [date for date in map(_parse_date, published) if date]
|
||||
if published_dates:
|
||||
published = min(published_dates)
|
||||
|
||||
if not published:
|
||||
published = parser.parse(str(item.get('first_published_year')))
|
||||
|
||||
result = {
|
||||
'template': 'paper.html',
|
||||
'url': f"{base_url}{item['key']}",
|
||||
'title': item['title'],
|
||||
'content': re.sub(r"\{|\}", "", item['first_sentence'][0]) if item.get('first_sentence') else '',
|
||||
'isbn': item.get('isbn', [])[:5],
|
||||
'authors': item.get('author_name', []),
|
||||
'thumbnail': cover,
|
||||
'publishedDate': published,
|
||||
'tags': item.get('subject', [])[:10] + item.get('place', [])[:10],
|
||||
}
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
|
@ -14,7 +14,7 @@ import babel
|
|||
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.locales import language_tag
|
||||
from searx.utils import html_to_text
|
||||
from searx.utils import html_to_text, humanize_number
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
traits: EngineTraits
|
||||
|
|
@ -124,6 +124,7 @@ def video_response(resp):
|
|||
'content': html_to_text(result.get('description') or ''),
|
||||
'author': result.get('account', {}).get('displayName'),
|
||||
'length': minute_to_hm(result.get('duration')),
|
||||
'views': humanize_number(result['views']),
|
||||
'template': 'videos.html',
|
||||
'publishedDate': parse(result['publishedAt']),
|
||||
'iframe_src': result.get('embedUrl'),
|
||||
|
|
|
|||
|
|
@ -53,6 +53,8 @@ from urllib.parse import urlencode
|
|||
import datetime
|
||||
from dateutil import parser
|
||||
|
||||
from searx.utils import humanize_number
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://github.com/TeamPiped/Piped/',
|
||||
|
|
@ -138,6 +140,7 @@ def response(resp):
|
|||
"title": result.get("title", ""),
|
||||
"publishedDate": parser.parse(time.ctime(uploaded / 1000)) if uploaded != -1 else None,
|
||||
"iframe_src": _frontend_url() + '/embed' + result.get("url", ""),
|
||||
"views": humanize_number(result["views"]),
|
||||
}
|
||||
length = result.get("duration")
|
||||
if length:
|
||||
|
|
|
|||
|
|
@ -29,12 +29,25 @@ except ImportError:
|
|||
pass
|
||||
|
||||
engine_type = 'offline'
|
||||
|
||||
host = "127.0.0.1"
|
||||
"""Hostname of the DB connector"""
|
||||
|
||||
port = "5432"
|
||||
"""Port of the DB connector"""
|
||||
|
||||
database = ""
|
||||
"""Name of the database."""
|
||||
|
||||
username = ""
|
||||
"""Username for the DB connection."""
|
||||
|
||||
password = ""
|
||||
"""Password for the DB connection."""
|
||||
|
||||
query_str = ""
|
||||
"""SQL query that returns the result items."""
|
||||
|
||||
limit = 10
|
||||
paging = True
|
||||
result_template = 'key-value.html'
|
||||
|
|
|
|||
|
|
@ -49,7 +49,11 @@ from flask_babel import gettext
|
|||
import babel
|
||||
import lxml
|
||||
|
||||
from searx.exceptions import SearxEngineAPIException, SearxEngineTooManyRequestsException
|
||||
from searx.exceptions import (
|
||||
SearxEngineAPIException,
|
||||
SearxEngineTooManyRequestsException,
|
||||
SearxEngineCaptchaException,
|
||||
)
|
||||
from searx.network import raise_for_httperror
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
|
@ -57,6 +61,7 @@ from searx.utils import (
|
|||
eval_xpath,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
get_embeded_stream_url,
|
||||
)
|
||||
|
||||
traits: EngineTraits
|
||||
|
|
@ -187,6 +192,8 @@ def parse_web_api(resp):
|
|||
error_code = data.get('error_code')
|
||||
if error_code == 24:
|
||||
raise SearxEngineTooManyRequestsException()
|
||||
if search_results.get("data", {}).get("error_data", {}).get("captchaUrl") is not None:
|
||||
raise SearxEngineCaptchaException()
|
||||
msg = ",".join(data.get('message', ['unknown']))
|
||||
raise SearxEngineAPIException(f"{msg} ({error_code})")
|
||||
|
||||
|
|
@ -297,6 +304,7 @@ def parse_web_api(resp):
|
|||
'title': title,
|
||||
'url': res_url,
|
||||
'content': content,
|
||||
'iframe_src': get_embeded_stream_url(res_url),
|
||||
'publishedDate': pub_date,
|
||||
'thumbnail': thumbnail,
|
||||
'template': 'videos.html',
|
||||
|
|
|
|||
|
|
@ -165,10 +165,12 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
countrycodes = set()
|
||||
for region in country_list:
|
||||
if region['iso_3166_1'] not in babel_reg_list:
|
||||
# country_list contains duplicates that differ only in upper/lower case
|
||||
_reg = region['iso_3166_1'].upper()
|
||||
if _reg not in babel_reg_list:
|
||||
print(f"ERROR: region tag {region['iso_3166_1']} is unknown by babel")
|
||||
continue
|
||||
countrycodes.add(region['iso_3166_1'])
|
||||
countrycodes.add(_reg)
|
||||
|
||||
countrycodes = list(countrycodes)
|
||||
countrycodes.sort()
|
||||
|
|
|
|||
|
|
@ -1,98 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Słownik Języka Polskiego
|
||||
|
||||
Dictionary of the polish language from PWN (sjp.pwn)
|
||||
"""
|
||||
|
||||
from lxml.html import fromstring
|
||||
from searx import logger
|
||||
from searx.utils import extract_text
|
||||
from searx.network import raise_for_httperror
|
||||
|
||||
logger = logger.getChild('sjp engine')
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://sjp.pwn.pl',
|
||||
"wikidata_id": 'Q55117369',
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"language": 'pl',
|
||||
}
|
||||
|
||||
categories = ['dictionaries']
|
||||
paging = False
|
||||
|
||||
URL = 'https://sjp.pwn.pl'
|
||||
SEARCH_URL = URL + '/szukaj/{query}.html'
|
||||
|
||||
word_xpath = '//div[@class="query"]'
|
||||
dict_xpath = [
|
||||
'//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]',
|
||||
'//div[@class="wyniki sjp-wyniki sjp-anchor"]',
|
||||
'//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]',
|
||||
]
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = SEARCH_URL.format(query=query)
|
||||
logger.debug(f"query_url --> {params['url']}")
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
raise_for_httperror(resp)
|
||||
dom = fromstring(resp.text)
|
||||
word = extract_text(dom.xpath(word_xpath))
|
||||
|
||||
definitions = []
|
||||
|
||||
for dict_src in dict_xpath:
|
||||
for src in dom.xpath(dict_src):
|
||||
src_text = extract_text(src.xpath('.//span[@class="entry-head-title"]/text()')).strip()
|
||||
|
||||
src_defs = []
|
||||
for def_item in src.xpath('.//div[contains(@class, "ribbon-element")]'):
|
||||
if def_item.xpath('./div[@class="znacz"]'):
|
||||
sub_defs = []
|
||||
for def_sub_item in def_item.xpath('./div[@class="znacz"]'):
|
||||
def_sub_text = extract_text(def_sub_item).lstrip('0123456789. ')
|
||||
sub_defs.append(def_sub_text)
|
||||
src_defs.append((word, sub_defs))
|
||||
else:
|
||||
def_text = extract_text(def_item).strip()
|
||||
def_link = def_item.xpath('./span/a/@href')
|
||||
if 'doroszewski' in def_link[0]:
|
||||
def_text = f"<a href='{def_link[0]}'>{def_text}</a>"
|
||||
src_defs.append((def_text, ''))
|
||||
|
||||
definitions.append((src_text, src_defs))
|
||||
|
||||
if not definitions:
|
||||
return results
|
||||
|
||||
infobox = ''
|
||||
for src in definitions:
|
||||
infobox += f"<div><small>{src[0]}</small>"
|
||||
infobox += "<ul>"
|
||||
for def_text, sub_def in src[1]:
|
||||
infobox += f"<li>{def_text}</li>"
|
||||
if sub_def:
|
||||
infobox += "<ol>"
|
||||
for sub_def_text in sub_def:
|
||||
infobox += f"<li>{sub_def_text}</li>"
|
||||
infobox += "</ol>"
|
||||
infobox += "</ul></div>"
|
||||
|
||||
results.append(
|
||||
{
|
||||
'infobox': word,
|
||||
'content': infobox,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -41,8 +41,13 @@ import sqlite3
|
|||
import contextlib
|
||||
|
||||
engine_type = 'offline'
|
||||
|
||||
database = ""
|
||||
"""Filename of the SQLite DB."""
|
||||
|
||||
query_str = ""
|
||||
"""SQL query that returns the result items."""
|
||||
|
||||
limit = 10
|
||||
paging = True
|
||||
result_template = 'key-value.html'
|
||||
|
|
|
|||
|
|
@ -142,7 +142,7 @@ search_url = base_url + '/sp/search'
|
|||
|
||||
# specific xpath variables
|
||||
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
||||
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
|
||||
# not ads: div[@class="result"] are the direct children of div[@id="results"]
|
||||
search_form_xpath = '//form[@id="search"]'
|
||||
"""XPath of Startpage's origin search form
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ ends.
|
|||
|
||||
from json import dumps
|
||||
from searx.utils import searx_useragent
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
about = {
|
||||
"website": "https://stract.com/",
|
||||
|
|
@ -18,7 +19,10 @@ about = {
|
|||
categories = ['general']
|
||||
paging = True
|
||||
|
||||
search_url = "https://stract.com/beta/api/search"
|
||||
base_url = "https://stract.com/beta/api"
|
||||
search_url = base_url + "/search"
|
||||
|
||||
traits: EngineTraits
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
|
@ -29,7 +33,14 @@ def request(query, params):
|
|||
'Content-Type': 'application/json',
|
||||
'User-Agent': searx_useragent(),
|
||||
}
|
||||
params['data'] = dumps({'query': query, 'page': params['pageno'] - 1})
|
||||
region = traits.get_region(params["searxng_locale"], default=traits.all_locale)
|
||||
params['data'] = dumps(
|
||||
{
|
||||
'query': query,
|
||||
'page': params['pageno'] - 1,
|
||||
'selectedRegion': region,
|
||||
}
|
||||
)
|
||||
|
||||
return params
|
||||
|
||||
|
|
@ -47,3 +58,24 @@ def response(resp):
|
|||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx import network
|
||||
from babel import Locale, languages
|
||||
from searx.locales import region_tag
|
||||
|
||||
territories = Locale("en").territories
|
||||
|
||||
json = network.get(base_url + "/docs/openapi.json").json()
|
||||
regions = json['components']['schemas']['Region']['enum']
|
||||
|
||||
engine_traits.all_locale = regions[0]
|
||||
|
||||
for region in regions[1:]:
|
||||
for code, name in territories.items():
|
||||
if region not in (code, name):
|
||||
continue
|
||||
for lang in languages.get_official_languages(code, de_facto=True):
|
||||
engine_traits.regions[region_tag(Locale(lang, code))] = region
|
||||
|
|
|
|||
|
|
@ -14,10 +14,16 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
|
|||
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
from flask_babel import gettext
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
about = {
|
||||
"website": 'https://tineye.com',
|
||||
"wikidata_id": 'Q2382535',
|
||||
|
|
@ -34,7 +40,7 @@ categories = ['general']
|
|||
paging = True
|
||||
safesearch = False
|
||||
base_url = 'https://tineye.com'
|
||||
search_string = '/result_json/?page={page}&{query}'
|
||||
search_string = '/api/v1/result_json/?page={page}&{query}'
|
||||
|
||||
FORMAT_NOT_SUPPORTED = gettext(
|
||||
"Could not read that image url. This may be due to an unsupported file"
|
||||
|
|
@ -120,7 +126,7 @@ def parse_tineye_match(match_json):
|
|||
|
||||
crawl_date = backlink_json.get("crawl_date")
|
||||
if crawl_date:
|
||||
crawl_date = datetime.fromisoformat(crawl_date[:-3])
|
||||
crawl_date = datetime.strptime(crawl_date, '%Y-%m-%d')
|
||||
else:
|
||||
crawl_date = datetime.min
|
||||
|
||||
|
|
@ -150,29 +156,15 @@ def parse_tineye_match(match_json):
|
|||
|
||||
def response(resp):
|
||||
"""Parse HTTP response from TinEye."""
|
||||
results = []
|
||||
|
||||
try:
|
||||
# handle the 422 client side errors, and the possible 400 status code error
|
||||
if resp.status_code in (400, 422):
|
||||
json_data = resp.json()
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
msg = "can't parse JSON response // %s" % exc
|
||||
logger.error(msg)
|
||||
json_data = {'error': msg}
|
||||
|
||||
# handle error codes from Tineye
|
||||
|
||||
if resp.is_error:
|
||||
if resp.status_code in (400, 422):
|
||||
|
||||
message = 'HTTP status: %s' % resp.status_code
|
||||
error = json_data.get('error')
|
||||
s_key = json_data.get('suggestions', {}).get('key', '')
|
||||
|
||||
if error and s_key:
|
||||
message = "%s (%s)" % (error, s_key)
|
||||
elif error:
|
||||
message = error
|
||||
suggestions = json_data.get('suggestions', {})
|
||||
message = f'HTTP Status Code: {resp.status_code}'
|
||||
|
||||
if resp.status_code == 422:
|
||||
s_key = suggestions.get('key', '')
|
||||
if s_key == "Invalid image URL":
|
||||
# test https://docs.searxng.org/_static/searxng-wordmark.svg
|
||||
message = FORMAT_NOT_SUPPORTED
|
||||
|
|
@ -182,16 +174,23 @@ def response(resp):
|
|||
elif s_key == 'Download Error':
|
||||
# test https://notexists
|
||||
message = DOWNLOAD_ERROR
|
||||
else:
|
||||
logger.warning("Unknown suggestion key encountered: %s", s_key)
|
||||
else: # 400
|
||||
description = suggestions.get('description')
|
||||
if isinstance(description, list):
|
||||
message = ','.join(description)
|
||||
|
||||
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
|
||||
# results.append({'answer': message})
|
||||
logger.error(message)
|
||||
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
|
||||
# results.append({'answer': message})
|
||||
logger.error(message)
|
||||
return []
|
||||
|
||||
return results
|
||||
# Raise for all other responses
|
||||
resp.raise_for_status()
|
||||
|
||||
resp.raise_for_status()
|
||||
|
||||
# append results from matches
|
||||
results = []
|
||||
json_data = resp.json()
|
||||
|
||||
for match_json in json_data['matches']:
|
||||
|
||||
|
|
@ -209,7 +208,7 @@ def response(resp):
|
|||
'title': backlink['image_name'],
|
||||
'img_src': backlink['url'],
|
||||
'format': tineye_match['image_format'],
|
||||
'widht': tineye_match['width'],
|
||||
'width': tineye_match['width'],
|
||||
'height': tineye_match['height'],
|
||||
'publishedDate': backlink['crawl_date'],
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ void_arch = 'x86_64'
|
|||
"""Default architecture to search for. For valid values see :py:obj:`ARCH_RE`"""
|
||||
|
||||
ARCH_RE = re.compile('aarch64-musl|armv6l-musl|armv7l-musl|x86_64-musl|aarch64|armv6l|armv7l|i686|x86_64')
|
||||
"""Regular expresion that match a architecture in the query string."""
|
||||
"""Regular expression that match a architecture in the query string."""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import datetime
|
|||
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.utils import html_to_text, humanize_bytes
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://commons.wikimedia.org/',
|
||||
|
|
@ -74,7 +76,7 @@ def response(resp):
|
|||
result = {
|
||||
'url': imageinfo["descriptionurl"],
|
||||
'title': title,
|
||||
'content': item["snippet"],
|
||||
'content': html_to_text(item["snippet"]),
|
||||
}
|
||||
|
||||
if search_type == "images":
|
||||
|
|
@ -93,7 +95,7 @@ def response(resp):
|
|||
elif search_type == "files":
|
||||
result['template'] = 'files.html'
|
||||
result['metadata'] = imageinfo['mime']
|
||||
result['size'] = imageinfo['size']
|
||||
result['size'] = humanize_bytes(imageinfo['size'])
|
||||
elif search_type == "audio":
|
||||
result['iframe_src'] = imageinfo['url']
|
||||
|
||||
|
|
|
|||
|
|
@ -20,13 +20,9 @@ about = {
|
|||
categories = ['general']
|
||||
paging = False
|
||||
|
||||
URL = 'https://www.wordnik.com'
|
||||
SEARCH_URL = URL + '/words/{query}'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = SEARCH_URL.format(query=query)
|
||||
logger.debug(f"query_url --> {params['url']}")
|
||||
params['url'] = f"https://www.wordnik.com/words/{query}"
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ Request:
|
|||
- :py:obj:`search_url`
|
||||
- :py:obj:`lang_all`
|
||||
- :py:obj:`soft_max_redirects`
|
||||
- :py:obj:`method`
|
||||
- :py:obj:`request_body`
|
||||
- :py:obj:`cookies`
|
||||
- :py:obj:`headers`
|
||||
|
||||
|
|
@ -151,6 +153,16 @@ headers = {}
|
|||
'''Some engines might offer different result based headers. Possible use-case:
|
||||
To set header to moderate.'''
|
||||
|
||||
method = 'GET'
|
||||
'''Some engines might require to do POST requests for search.'''
|
||||
|
||||
request_body = ''
|
||||
'''The body of the request. This can only be used if different :py:obj:`method`
|
||||
is set, e.g. ``POST``. For formatting see the documentation of :py:obj:`search_url`::
|
||||
|
||||
search={query}&page={pageno}{time_range}{safe_search}
|
||||
'''
|
||||
|
||||
paging = False
|
||||
'''Engine supports paging [True or False].'''
|
||||
|
||||
|
|
@ -236,8 +248,14 @@ def request(query, params):
|
|||
params['headers'].update(headers)
|
||||
|
||||
params['url'] = search_url.format(**fargs)
|
||||
params['soft_max_redirects'] = soft_max_redirects
|
||||
params['method'] = method
|
||||
|
||||
if request_body:
|
||||
# don't url-encode the query if it's in the request body
|
||||
fargs['query'] = query
|
||||
params['data'] = request_body.format(**fargs)
|
||||
|
||||
params['soft_max_redirects'] = soft_max_redirects
|
||||
params['raise_for_httperror'] = False
|
||||
|
||||
return params
|
||||
|
|
|
|||
|
|
@ -118,6 +118,8 @@ def _base_url() -> str:
|
|||
url = engines['yacy'].base_url # type: ignore
|
||||
if isinstance(url, list):
|
||||
url = random.choice(url)
|
||||
if url.endswith("/"):
|
||||
url = url[:-1]
|
||||
return url
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from searx.utils import (
|
|||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
html_to_text,
|
||||
)
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
|
@ -133,12 +134,20 @@ def response(resp):
|
|||
url = parse_url(url)
|
||||
|
||||
title = eval_xpath_getindex(result, './/h3//a/@aria-label', 0, default='')
|
||||
title = extract_text(title)
|
||||
title: str = extract_text(title)
|
||||
content = eval_xpath_getindex(result, './/div[contains(@class, "compText")]', 0, default='')
|
||||
content = extract_text(content, allow_none=True)
|
||||
content: str = extract_text(content, allow_none=True)
|
||||
|
||||
# append result
|
||||
results.append({'url': url, 'title': title, 'content': content})
|
||||
results.append(
|
||||
{
|
||||
'url': url,
|
||||
# title sometimes contains HTML tags / see
|
||||
# https://github.com/searxng/searxng/issues/3790
|
||||
'title': " ".join(html_to_text(title).strip().split()),
|
||||
'content': " ".join(html_to_text(content).strip().split()),
|
||||
}
|
||||
)
|
||||
|
||||
for suggestion in eval_xpath_list(dom, '//div[contains(@class, "AlsoTry")]//table//a'):
|
||||
# append suggestion
|
||||
|
|
|
|||
133
searx/engines/yandex.py
Normal file
133
searx/engines/yandex.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Yandex (Web, images)"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from html import unescape
|
||||
from lxml import html
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.utils import humanize_bytes, eval_xpath, eval_xpath_list, extract_text, extr
|
||||
|
||||
|
||||
# Engine metadata
|
||||
about = {
|
||||
"website": 'https://yandex.com/',
|
||||
"wikidata_id": 'Q5281',
|
||||
"official_api_documentation": "?",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
}
|
||||
|
||||
# Engine configuration
|
||||
categories = []
|
||||
paging = True
|
||||
search_type = ""
|
||||
|
||||
# Search URL
|
||||
base_url_web = 'https://yandex.com/search/site/'
|
||||
base_url_images = 'https://yandex.com/images/search'
|
||||
|
||||
results_xpath = '//li[contains(@class, "serp-item")]'
|
||||
url_xpath = './/a[@class="b-serp-item__title-link"]/@href'
|
||||
title_xpath = './/h3[@class="b-serp-item__title"]/a[@class="b-serp-item__title-link"]/span'
|
||||
content_xpath = './/div[@class="b-serp-item__content"]//div[@class="b-serp-item__text"]'
|
||||
|
||||
|
||||
def catch_bad_response(resp):
|
||||
if resp.url.path.startswith('/showcaptcha'):
|
||||
raise SearxEngineCaptchaException()
|
||||
|
||||
|
||||
def request(query, params):
|
||||
query_params_web = {
|
||||
"tmpl_version": "releases",
|
||||
"text": query,
|
||||
"web": "1",
|
||||
"frame": "1",
|
||||
"searchid": "3131712",
|
||||
}
|
||||
|
||||
query_params_images = {
|
||||
"text": query,
|
||||
"uinfo": "sw-1920-sh-1080-ww-1125-wh-999",
|
||||
}
|
||||
|
||||
if params['pageno'] > 1:
|
||||
query_params_web.update({"p": params["pageno"] - 1})
|
||||
query_params_images.update({"p": params["pageno"] - 1})
|
||||
|
||||
params["cookies"] = {'cookie': "yp=1716337604.sp.family%3A0#1685406411.szm.1:1920x1080:1920x999"}
|
||||
|
||||
if search_type == 'web':
|
||||
params['url'] = f"{base_url_web}?{urlencode(query_params_web)}"
|
||||
elif search_type == 'images':
|
||||
params['url'] = f"{base_url_images}?{urlencode(query_params_images)}"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
if search_type == 'web':
|
||||
|
||||
catch_bad_response(resp)
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
results = []
|
||||
|
||||
for result in eval_xpath_list(dom, results_xpath):
|
||||
results.append(
|
||||
{
|
||||
'url': extract_text(eval_xpath(result, url_xpath)),
|
||||
'title': extract_text(eval_xpath(result, title_xpath)),
|
||||
'content': extract_text(eval_xpath(result, content_xpath)),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
if search_type == 'images':
|
||||
|
||||
catch_bad_response(resp)
|
||||
|
||||
html_data = html.fromstring(resp.text)
|
||||
html_sample = unescape(html.tostring(html_data, encoding='unicode'))
|
||||
|
||||
content_between_tags = extr(
|
||||
html_sample, '{"location":"/images/search/', 'advRsyaSearchColumn":null}}', default="fail"
|
||||
)
|
||||
json_data = '{"location":"/images/search/' + content_between_tags + 'advRsyaSearchColumn":null}}'
|
||||
|
||||
if content_between_tags == "fail":
|
||||
content_between_tags = extr(html_sample, '{"location":"/images/search/', 'false}}}')
|
||||
json_data = '{"location":"/images/search/' + content_between_tags + 'false}}}'
|
||||
|
||||
json_resp = loads(json_data)
|
||||
|
||||
results = []
|
||||
for _, item_data in json_resp['initialState']['serpList']['items']['entities'].items():
|
||||
title = item_data['snippet']['title']
|
||||
source = item_data['snippet']['url']
|
||||
thumb = item_data['image']
|
||||
fullsize_image = item_data['viewerData']['dups'][0]['url']
|
||||
height = item_data['viewerData']['dups'][0]['h']
|
||||
width = item_data['viewerData']['dups'][0]['w']
|
||||
filesize = item_data['viewerData']['dups'][0]['fileSizeInBytes']
|
||||
humanized_filesize = humanize_bytes(filesize)
|
||||
|
||||
results.append(
|
||||
{
|
||||
'title': title,
|
||||
'url': source,
|
||||
'img_src': fullsize_image,
|
||||
'filesize': humanized_filesize,
|
||||
'thumbnail_src': thumb,
|
||||
'template': 'images.html',
|
||||
'resolution': f'{width} x {height}',
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
return []
|
||||
|
|
@ -67,6 +67,8 @@ def response(resp):
|
|||
|
||||
for result in resp.json()[1]['results']:
|
||||
if search_type == "web":
|
||||
if result['type'] != 'Organic':
|
||||
continue
|
||||
results.append(_web_result(result))
|
||||
elif search_type == "images":
|
||||
results.append(_images_result(result))
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ from flask_babel import gettext
|
|||
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.data import ENGINE_TRAITS
|
||||
from searx.exceptions import SearxException
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import httpx
|
||||
|
|
@ -108,13 +109,21 @@ def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
|||
zlib_year_to=zlib_year_to,
|
||||
zlib_ext=zlib_ext,
|
||||
)
|
||||
params["verify"] = False
|
||||
return params
|
||||
|
||||
|
||||
def domain_is_seized(dom):
|
||||
return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
|
||||
|
||||
|
||||
def response(resp: httpx.Response) -> List[Dict[str, Any]]:
|
||||
results: List[Dict[str, Any]] = []
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
if domain_is_seized(dom):
|
||||
raise SearxException(f"zlibrary domain is seized: {base_url}")
|
||||
|
||||
for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'):
|
||||
results.append(_parse_result(item))
|
||||
|
||||
|
|
@ -168,22 +177,30 @@ def _parse_result(item) -> Dict[str, Any]:
|
|||
|
||||
def fetch_traits(engine_traits: EngineTraits) -> None:
|
||||
"""Fetch languages and other search arguments from zlibrary's search form."""
|
||||
# pylint: disable=import-outside-toplevel
|
||||
# pylint: disable=import-outside-toplevel, too-many-branches
|
||||
|
||||
import babel
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.locales import language_tag
|
||||
|
||||
resp = get(base_url, verify=False)
|
||||
if not resp.ok: # type: ignore
|
||||
raise RuntimeError("Response from zlibrary's search page is not OK.")
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
if domain_is_seized(dom):
|
||||
print(f"ERROR: zlibrary domain is seized: {base_url}")
|
||||
# don't change anything, re-use the existing values
|
||||
engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
|
||||
engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
|
||||
engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
|
||||
return
|
||||
|
||||
engine_traits.all_locale = ""
|
||||
engine_traits.custom["ext"] = []
|
||||
engine_traits.custom["year_from"] = []
|
||||
engine_traits.custom["year_to"] = []
|
||||
|
||||
resp = get(base_url)
|
||||
if not resp.ok: # type: ignore
|
||||
raise RuntimeError("Response from zlibrary's search page is not OK.")
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"):
|
||||
engine_traits.custom["year_from"].append(year.get("value"))
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Exception types raised by SearXNG modules.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
|
|
@ -61,7 +62,7 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
|||
"""This settings contains the default suspended time (default 86400 sec / 1
|
||||
day)."""
|
||||
|
||||
def __init__(self, suspended_time: int = None, message: str = 'Access denied'):
|
||||
def __init__(self, suspended_time: int | None = None, message: str = 'Access denied'):
|
||||
"""Generic exception to raise when an engine denies access to the results.
|
||||
|
||||
:param suspended_time: How long the engine is going to be suspended in
|
||||
|
|
@ -70,12 +71,13 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
|||
:param message: Internal message. Defaults to ``Access denied``
|
||||
:type message: str
|
||||
"""
|
||||
suspended_time = suspended_time or self._get_default_suspended_time()
|
||||
if suspended_time is None:
|
||||
suspended_time = self._get_default_suspended_time()
|
||||
super().__init__(message + ', suspended_time=' + str(suspended_time))
|
||||
self.suspended_time = suspended_time
|
||||
self.message = message
|
||||
|
||||
def _get_default_suspended_time(self):
|
||||
def _get_default_suspended_time(self) -> int:
|
||||
from searx import get_setting # pylint: disable=C0415
|
||||
|
||||
return get_setting(self.SUSPEND_TIME_SETTING)
|
||||
|
|
@ -88,7 +90,7 @@ class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
|||
"""This settings contains the default suspended time (default 86400 sec / 1
|
||||
day)."""
|
||||
|
||||
def __init__(self, suspended_time=None, message='CAPTCHA'):
|
||||
def __init__(self, suspended_time: int | None = None, message='CAPTCHA'):
|
||||
super().__init__(message=message, suspended_time=suspended_time)
|
||||
|
||||
|
||||
|
|
@ -102,7 +104,7 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
|
|||
"""This settings contains the default suspended time (default 3660 sec / 1
|
||||
hour)."""
|
||||
|
||||
def __init__(self, suspended_time=None, message='Too many request'):
|
||||
def __init__(self, suspended_time: int | None = None, message='Too many request'):
|
||||
super().__init__(message=message, suspended_time=suspended_time)
|
||||
|
||||
|
||||
|
|
|
|||
38
searx/favicons/__init__.py
Normal file
38
searx/favicons/__init__.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for providing the favicons in SearXNG"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["init", "favicon_url", "favicon_proxy"]
|
||||
|
||||
import pathlib
|
||||
from searx import logger
|
||||
from searx import get_setting
|
||||
from .proxy import favicon_url, favicon_proxy
|
||||
|
||||
logger = logger.getChild('favicons')
|
||||
|
||||
|
||||
def is_active():
|
||||
return bool(get_setting("search.favicon_resolver", False))
|
||||
|
||||
|
||||
def init():
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
|
||||
from . import config, cache, proxy
|
||||
from .. import settings_loader
|
||||
|
||||
cfg_file = (settings_loader.get_user_cfg_folder() or pathlib.Path("/etc/searxng")) / "favicons.toml"
|
||||
if not cfg_file.exists():
|
||||
if is_active():
|
||||
logger.error(f"missing favicon config: {cfg_file}")
|
||||
cfg_file = config.DEFAULT_CFG_TOML_PATH
|
||||
|
||||
logger.debug(f"load favicon config: {cfg_file}")
|
||||
cfg = config.FaviconConfig.from_toml_file(cfg_file, use_cache=True)
|
||||
cache.init(cfg.cache)
|
||||
proxy.init(cfg.proxy)
|
||||
|
||||
del cache, config, proxy, cfg, settings_loader
|
||||
12
searx/favicons/__main__.py
Normal file
12
searx/favicons/__main__.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Command line implementation"""
|
||||
|
||||
import typer
|
||||
|
||||
from . import cache
|
||||
from . import init
|
||||
|
||||
init()
|
||||
app = typer.Typer()
|
||||
app.add_typer(cache.app, name="cache", help="commands related to the cache")
|
||||
app()
|
||||
476
searx/favicons/cache.py
Normal file
476
searx/favicons/cache.py
Normal file
|
|
@ -0,0 +1,476 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for caching favicons.
|
||||
|
||||
:py:obj:`FaviconCacheConfig`:
|
||||
Configuration of the favicon cache
|
||||
|
||||
:py:obj:`FaviconCache`:
|
||||
Abstract base class for the implementation of a favicon cache.
|
||||
|
||||
:py:obj:`FaviconCacheSQLite`:
|
||||
Favicon cache that manages the favicon BLOBs in a SQLite DB.
|
||||
|
||||
:py:obj:`FaviconCacheNull`:
|
||||
Fallback solution if the configured cache cannot be used for system reasons.
|
||||
|
||||
----
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import Literal
|
||||
|
||||
import os
|
||||
import abc
|
||||
import dataclasses
|
||||
import hashlib
|
||||
import logging
|
||||
import sqlite3
|
||||
import tempfile
|
||||
import time
|
||||
import typer
|
||||
|
||||
import msgspec
|
||||
|
||||
from searx import sqlitedb
|
||||
from searx import logger
|
||||
from searx.utils import humanize_bytes, humanize_number
|
||||
|
||||
CACHE: "FaviconCache"
|
||||
FALLBACK_ICON = b"FALLBACK_ICON"
|
||||
|
||||
logger = logger.getChild('favicons.cache')
|
||||
app = typer.Typer()
|
||||
|
||||
|
||||
@app.command()
|
||||
def state():
|
||||
"""show state of the cache"""
|
||||
print(CACHE.state().report())
|
||||
|
||||
|
||||
@app.command()
|
||||
def maintenance(force: bool = True, debug: bool = False):
|
||||
"""perform maintenance of the cache"""
|
||||
root_log = logging.getLogger()
|
||||
if debug:
|
||||
root_log.setLevel(logging.DEBUG)
|
||||
else:
|
||||
root_log.handlers = []
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter("%(message)s"))
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
state_t0 = CACHE.state()
|
||||
CACHE.maintenance(force=force)
|
||||
state_t1 = CACHE.state()
|
||||
state_delta = state_t0 - state_t1
|
||||
print("The cache has been reduced by:")
|
||||
print(state_delta.report("\n- {descr}: {val}").lstrip("\n"))
|
||||
|
||||
|
||||
def init(cfg: "FaviconCacheConfig"):
|
||||
"""Initialization of a global ``CACHE``"""
|
||||
|
||||
global CACHE # pylint: disable=global-statement
|
||||
if cfg.db_type == "sqlite":
|
||||
if sqlite3.sqlite_version_info <= (3, 35):
|
||||
logger.critical(
|
||||
"Disable favicon caching completely: SQLite library (%s) is too old! (require >= 3.35)",
|
||||
sqlite3.sqlite_version,
|
||||
)
|
||||
CACHE = FaviconCacheNull(cfg)
|
||||
else:
|
||||
CACHE = FaviconCacheSQLite(cfg)
|
||||
elif cfg.db_type == "mem":
|
||||
logger.error("Favicons are cached in memory, don't use this in production!")
|
||||
CACHE = FaviconCacheMEM(cfg)
|
||||
else:
|
||||
raise NotImplementedError(f"favicons db_type '{cfg.db_type}' is unknown")
|
||||
|
||||
|
||||
class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||
"""Configuration of the favicon cache."""
|
||||
|
||||
db_type: Literal["sqlite", "mem"] = "sqlite"
|
||||
"""Type of the database:
|
||||
|
||||
``sqlite``:
|
||||
:py:obj:`.cache.FaviconCacheSQLite`
|
||||
|
||||
``mem``:
|
||||
:py:obj:`.cache.FaviconCacheMEM` (not recommended)
|
||||
"""
|
||||
|
||||
db_url: str = tempfile.gettempdir() + os.sep + "faviconcache.db"
|
||||
"""URL of the SQLite DB, the path to the database file."""
|
||||
|
||||
HOLD_TIME: int = 60 * 60 * 24 * 30 # 30 days
|
||||
"""Hold time (default in sec.), after which a BLOB is removed from the cache."""
|
||||
|
||||
LIMIT_TOTAL_BYTES: int = 1024 * 1024 * 50 # 50 MB
|
||||
"""Maximum of bytes (default) stored in the cache of all blobs. Note: The
|
||||
limit is only reached at each maintenance interval after which the oldest
|
||||
BLOBs are deleted; the limit is exceeded during the maintenance period. If
|
||||
the maintenance period is *too long* or maintenance is switched off
|
||||
completely, the cache grows uncontrollably."""
|
||||
|
||||
BLOB_MAX_BYTES: int = 1024 * 20 # 20 KB
|
||||
"""The maximum BLOB size in bytes that a favicon may have so that it can be
|
||||
saved in the cache. If the favicon is larger, it is not saved in the cache
|
||||
and must be requested by the client via the proxy."""
|
||||
|
||||
MAINTENANCE_PERIOD: int = 60 * 60
|
||||
"""Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
|
||||
``auto``."""
|
||||
|
||||
MAINTENANCE_MODE: Literal["auto", "off"] = "auto"
|
||||
"""Type of maintenance mode
|
||||
|
||||
``auto``:
|
||||
Maintenance is carried out automatically as part of the maintenance
|
||||
intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
|
||||
|
||||
``off``:
|
||||
Maintenance is switched off and must be carried out by an external process
|
||||
if required.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FaviconCacheStats:
|
||||
"""Dataclass wich provides information on the status of the cache."""
|
||||
|
||||
favicons: int | None = None
|
||||
bytes: int | None = None
|
||||
domains: int | None = None
|
||||
resolvers: int | None = None
|
||||
|
||||
field_descr = (
|
||||
("favicons", "number of favicons in cache", humanize_number),
|
||||
("bytes", "total size (approx. bytes) of cache", humanize_bytes),
|
||||
("domains", "total number of domains in cache", humanize_number),
|
||||
("resolvers", "number of resolvers", str),
|
||||
)
|
||||
|
||||
def __sub__(self, other) -> FaviconCacheStats:
|
||||
if not isinstance(other, self.__class__):
|
||||
raise TypeError(f"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
|
||||
kwargs = {}
|
||||
for field, _, _ in self.field_descr:
|
||||
self_val, other_val = getattr(self, field), getattr(other, field)
|
||||
if None in (self_val, other_val):
|
||||
continue
|
||||
if isinstance(self_val, int):
|
||||
kwargs[field] = self_val - other_val
|
||||
else:
|
||||
kwargs[field] = self_val
|
||||
return self.__class__(**kwargs)
|
||||
|
||||
def report(self, fmt: str = "{descr}: {val}\n"):
|
||||
s = []
|
||||
for field, descr, cast in self.field_descr:
|
||||
val = getattr(self, field)
|
||||
if val is None:
|
||||
val = "--"
|
||||
else:
|
||||
val = cast(val)
|
||||
s.append(fmt.format(descr=descr, val=val))
|
||||
return "".join(s)
|
||||
|
||||
|
||||
class FaviconCache(abc.ABC):
|
||||
"""Abstract base class for the implementation of a favicon cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
"""An instance of the favicon cache is build up from the configuration."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
"""Returns ``None`` or the tuple of ``(data, mime)`` that has been
|
||||
registered in the cache. The ``None`` indicates that there was no entry
|
||||
in the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
"""Set data and mime-type in the cache. If data is None, the
|
||||
:py:obj:`FALLBACK_ICON` is registered. in the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def state(self) -> FaviconCacheStats:
|
||||
"""Returns a :py:obj:`FaviconCacheStats` (key/values) with information
|
||||
on the state of the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def maintenance(self, force=False):
|
||||
"""Performs maintenance on the cache"""
|
||||
|
||||
|
||||
class FaviconCacheNull(FaviconCache):
|
||||
"""A dummy favicon cache that caches nothing / a fallback solution. The
|
||||
NullCache is used when more efficient caches such as the
|
||||
:py:obj:`FaviconCacheSQLite` cannot be used because, for example, the SQLite
|
||||
library is only available in an old version and does not meet the
|
||||
requirements."""
|
||||
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
return None
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
return None
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
return False
|
||||
|
||||
def state(self):
|
||||
return FaviconCacheStats(favicons=0)
|
||||
|
||||
def maintenance(self, force=False):
|
||||
pass
|
||||
|
||||
|
||||
class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache):
|
||||
"""Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB
|
||||
model in the SQLite DB is implemented using the abstract class
|
||||
:py:obj:`sqlitedb.SQLiteAppl`.
|
||||
|
||||
The following configurations are required / supported:
|
||||
|
||||
- :py:obj:`FaviconCacheConfig.db_url`
|
||||
- :py:obj:`FaviconCacheConfig.HOLD_TIME`
|
||||
- :py:obj:`FaviconCacheConfig.LIMIT_TOTAL_BYTES`
|
||||
- :py:obj:`FaviconCacheConfig.BLOB_MAX_BYTES`
|
||||
- :py:obj:`MAINTENANCE_PERIOD`
|
||||
- :py:obj:`MAINTENANCE_MODE`
|
||||
"""
|
||||
|
||||
DB_SCHEMA = 1
|
||||
|
||||
DDL_BLOBS = """\
|
||||
CREATE TABLE IF NOT EXISTS blobs (
|
||||
sha256 TEXT,
|
||||
bytes_c INTEGER,
|
||||
mime TEXT NOT NULL,
|
||||
data BLOB NOT NULL,
|
||||
PRIMARY KEY (sha256))"""
|
||||
|
||||
"""Table to store BLOB objects by their sha256 hash values."""
|
||||
|
||||
DDL_BLOB_MAP = """\
|
||||
CREATE TABLE IF NOT EXISTS blob_map (
|
||||
m_time INTEGER DEFAULT (strftime('%s', 'now')), -- last modified (unix epoch) time in sec.
|
||||
sha256 TEXT,
|
||||
resolver TEXT,
|
||||
authority TEXT,
|
||||
PRIMARY KEY (resolver, authority))"""
|
||||
|
||||
"""Table to map from (resolver, authority) to sha256 hash values."""
|
||||
|
||||
DDL_CREATE_TABLES = {
|
||||
"blobs": DDL_BLOBS,
|
||||
"blob_map": DDL_BLOB_MAP,
|
||||
}
|
||||
|
||||
SQL_DROP_LEFTOVER_BLOBS = (
|
||||
"DELETE FROM blobs WHERE sha256 IN ("
|
||||
" SELECT b.sha256"
|
||||
" FROM blobs b"
|
||||
" LEFT JOIN blob_map bm"
|
||||
" ON b.sha256 = bm.sha256"
|
||||
" WHERE bm.sha256 IS NULL)"
|
||||
)
|
||||
"""Delete blobs.sha256 (BLOBs) no longer in blob_map.sha256."""
|
||||
|
||||
SQL_ITER_BLOBS_SHA256_BYTES_C = (
|
||||
"SELECT b.sha256, b.bytes_c FROM blobs b"
|
||||
" JOIN blob_map bm "
|
||||
" ON b.sha256 = bm.sha256"
|
||||
" ORDER BY bm.m_time ASC"
|
||||
)
|
||||
|
||||
SQL_INSERT_BLOBS = (
|
||||
"INSERT INTO blobs (sha256, bytes_c, mime, data) VALUES (?, ?, ?, ?)"
|
||||
" ON CONFLICT (sha256) DO NOTHING"
|
||||
) # fmt: skip
|
||||
|
||||
SQL_INSERT_BLOB_MAP = (
|
||||
"INSERT INTO blob_map (sha256, resolver, authority) VALUES (?, ?, ?)"
|
||||
" ON CONFLICT DO UPDATE "
|
||||
" SET sha256=excluded.sha256, m_time=strftime('%s', 'now')"
|
||||
)
|
||||
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
"""An instance of the favicon cache is build up from the configuration.""" #
|
||||
|
||||
if cfg.db_url == ":memory:":
|
||||
logger.critical("don't use SQLite DB in :memory: in production!!")
|
||||
super().__init__(cfg.db_url)
|
||||
self.cfg = cfg
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
|
||||
sql = "SELECT sha256 FROM blob_map WHERE resolver = ? AND authority = ?"
|
||||
res = self.DB.execute(sql, (resolver, authority)).fetchone()
|
||||
if res is None:
|
||||
return None
|
||||
|
||||
data, mime = (None, None)
|
||||
sha256 = res[0]
|
||||
if sha256 == FALLBACK_ICON:
|
||||
return data, mime
|
||||
|
||||
sql = "SELECT data, mime FROM blobs WHERE sha256 = ?"
|
||||
res = self.DB.execute(sql, (sha256,)).fetchone()
|
||||
if res is not None:
|
||||
data, mime = res
|
||||
return data, mime
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
|
||||
if self.cfg.MAINTENANCE_MODE == "auto" and int(time.time()) > self.next_maintenance_time:
|
||||
# Should automatic maintenance be moved to a new thread?
|
||||
self.maintenance()
|
||||
|
||||
if data is not None and mime is None:
|
||||
logger.error(
|
||||
"favicon resolver %s tries to cache mime-type None for authority %s",
|
||||
resolver,
|
||||
authority,
|
||||
)
|
||||
return False
|
||||
|
||||
bytes_c = len(data or b"")
|
||||
if bytes_c > self.cfg.BLOB_MAX_BYTES:
|
||||
logger.info(
|
||||
"favicon of resolver: %s / authority: %s to big to cache (bytes: %s) " % (resolver, authority, bytes_c)
|
||||
)
|
||||
return False
|
||||
|
||||
if data is None:
|
||||
sha256 = FALLBACK_ICON
|
||||
else:
|
||||
sha256 = hashlib.sha256(data).hexdigest()
|
||||
|
||||
with self.connect() as conn:
|
||||
if sha256 != FALLBACK_ICON:
|
||||
conn.execute(self.SQL_INSERT_BLOBS, (sha256, bytes_c, mime, data))
|
||||
conn.execute(self.SQL_INSERT_BLOB_MAP, (sha256, resolver, authority))
|
||||
|
||||
return True
|
||||
|
||||
@property
|
||||
def next_maintenance_time(self) -> int:
|
||||
"""Returns (unix epoch) time of the next maintenance."""
|
||||
|
||||
return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE")
|
||||
|
||||
def maintenance(self, force=False):
|
||||
|
||||
# Prevent parallel DB maintenance cycles from other DB connections
|
||||
# (e.g. in multi thread or process environments).
|
||||
|
||||
if not force and int(time.time()) < self.next_maintenance_time:
|
||||
logger.debug("no maintenance required yet, next maintenance interval is in the future")
|
||||
return
|
||||
self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property!
|
||||
|
||||
# do maintenance tasks
|
||||
|
||||
with self.connect() as conn:
|
||||
|
||||
# drop items not in HOLD time
|
||||
res = conn.execute(
|
||||
f"DELETE FROM blob_map"
|
||||
f" WHERE cast(m_time as integer) < cast(strftime('%s', 'now') as integer) - {self.cfg.HOLD_TIME}"
|
||||
)
|
||||
logger.debug("dropped %s obsolete blob_map items from db", res.rowcount)
|
||||
res = conn.execute(self.SQL_DROP_LEFTOVER_BLOBS)
|
||||
logger.debug("dropped %s obsolete BLOBS from db", res.rowcount)
|
||||
|
||||
# drop old items to be in LIMIT_TOTAL_BYTES
|
||||
total_bytes = conn.execute("SELECT SUM(bytes_c) FROM blobs").fetchone()[0] or 0
|
||||
if total_bytes > self.cfg.LIMIT_TOTAL_BYTES:
|
||||
|
||||
x = total_bytes - self.cfg.LIMIT_TOTAL_BYTES
|
||||
c = 0
|
||||
sha_list = []
|
||||
for row in conn.execute(self.SQL_ITER_BLOBS_SHA256_BYTES_C):
|
||||
sha256, bytes_c = row
|
||||
sha_list.append(sha256)
|
||||
c += bytes_c
|
||||
if c > x:
|
||||
break
|
||||
if sha_list:
|
||||
conn.execute("DELETE FROM blobs WHERE sha256 IN ('%s')" % "','".join(sha_list))
|
||||
conn.execute("DELETE FROM blob_map WHERE sha256 IN ('%s')" % "','".join(sha_list))
|
||||
logger.debug("dropped %s blobs with total size of %s bytes", len(sha_list), c)
|
||||
|
||||
def _query_val(self, sql, default=None):
|
||||
val = self.DB.execute(sql).fetchone()
|
||||
if val is not None:
|
||||
val = val[0]
|
||||
if val is None:
|
||||
val = default
|
||||
return val
|
||||
|
||||
def state(self) -> FaviconCacheStats:
|
||||
return FaviconCacheStats(
|
||||
favicons=self._query_val("SELECT count(*) FROM blobs", 0),
|
||||
bytes=self._query_val("SELECT SUM(bytes_c) FROM blobs", 0),
|
||||
domains=self._query_val("SELECT count(*) FROM (SELECT authority FROM blob_map GROUP BY authority)", 0),
|
||||
resolvers=self._query_val("SELECT count(*) FROM (SELECT resolver FROM blob_map GROUP BY resolver)", 0),
|
||||
)
|
||||
|
||||
|
||||
class FaviconCacheMEM(FaviconCache):
|
||||
"""Favicon cache in process' memory. Its just a POC that stores the
|
||||
favicons in the memory of the process.
|
||||
|
||||
.. attention::
|
||||
|
||||
Don't use it in production, it will blow up your memory!!
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, cfg):
|
||||
|
||||
self.cfg = cfg
|
||||
self._data = {}
|
||||
self._sha_mime = {}
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[bytes | None, str | None]:
|
||||
|
||||
sha, mime = self._sha_mime.get(f"{resolver}:{authority}", (None, None))
|
||||
if sha is None:
|
||||
return None
|
||||
data = self._data.get(sha)
|
||||
if data == FALLBACK_ICON:
|
||||
data = None
|
||||
return data, mime
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
|
||||
if data is None:
|
||||
data = FALLBACK_ICON
|
||||
mime = None
|
||||
|
||||
elif mime is None:
|
||||
logger.error(
|
||||
"favicon resolver %s tries to cache mime-type None for authority %s",
|
||||
resolver,
|
||||
authority,
|
||||
)
|
||||
return False
|
||||
|
||||
digest = hashlib.sha256(data).hexdigest()
|
||||
self._data[digest] = data
|
||||
self._sha_mime[f"{resolver}:{authority}"] = (digest, mime)
|
||||
return True
|
||||
|
||||
def state(self):
|
||||
return FaviconCacheStats(favicons=len(self._data.keys()))
|
||||
|
||||
def maintenance(self, force=False):
|
||||
pass
|
||||
65
searx/favicons/config.py
Normal file
65
searx/favicons/config.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=missing-module-docstring
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib
|
||||
import msgspec
|
||||
|
||||
from .cache import FaviconCacheConfig
|
||||
from .proxy import FaviconProxyConfig
|
||||
|
||||
CONFIG_SCHEMA: int = 1
|
||||
"""Version of the configuration schema."""
|
||||
|
||||
TOML_CACHE_CFG: dict[str, "FaviconConfig"] = {}
|
||||
"""Cache config objects by TOML's filename."""
|
||||
|
||||
DEFAULT_CFG_TOML_PATH = pathlib.Path(__file__).parent / "favicons.toml"
|
||||
|
||||
|
||||
class FaviconConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||
"""The class aggregates configurations of the favicon tools"""
|
||||
|
||||
cfg_schema: int
|
||||
"""Config's schema version. The specification of the version of the schema
|
||||
is mandatory, currently only version :py:obj:`CONFIG_SCHEMA` is supported.
|
||||
By specifying a version, it is possible to ensure downward compatibility in
|
||||
the event of future changes to the configuration schema"""
|
||||
|
||||
cache: FaviconCacheConfig = msgspec.field(default_factory=FaviconCacheConfig)
|
||||
"""Setup of the :py:obj:`.cache.FaviconCacheConfig`."""
|
||||
|
||||
proxy: FaviconProxyConfig = msgspec.field(default_factory=FaviconProxyConfig)
|
||||
"""Setup of the :py:obj:`.proxy.FaviconProxyConfig`."""
|
||||
|
||||
@classmethod
|
||||
def from_toml_file(cls, cfg_file: pathlib.Path, use_cache: bool) -> "FaviconConfig":
|
||||
"""Create a config object from a TOML file, the ``use_cache`` argument
|
||||
specifies whether a cache should be used.
|
||||
"""
|
||||
|
||||
cached = TOML_CACHE_CFG.get(str(cfg_file))
|
||||
if use_cache and cached:
|
||||
return cached
|
||||
|
||||
with cfg_file.open("rb") as f:
|
||||
data = f.read()
|
||||
|
||||
cfg = msgspec.toml.decode(data, type=_FaviconConfig)
|
||||
schema = cfg.favicons.cfg_schema
|
||||
if schema != CONFIG_SCHEMA:
|
||||
raise ValueError(
|
||||
f"config schema version {CONFIG_SCHEMA} is needed, version {schema} is given in {cfg_file}"
|
||||
)
|
||||
|
||||
cfg = cfg.favicons
|
||||
if use_cache and cached:
|
||||
TOML_CACHE_CFG[str(cfg_file.resolve())] = cfg
|
||||
|
||||
return cfg
|
||||
|
||||
|
||||
class _FaviconConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||
# wrapper struct for root object "favicons."
|
||||
favicons: FaviconConfig
|
||||
25
searx/favicons/favicons.toml
Normal file
25
searx/favicons/favicons.toml
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
[favicons]
|
||||
|
||||
cfg_schema = 1 # config's schema version no.
|
||||
|
||||
[favicons.proxy]
|
||||
|
||||
# max_age = 5184000 # 60 days / default: 7 days (604800 sec)
|
||||
|
||||
# [favicons.proxy.resolver_map]
|
||||
#
|
||||
# The available favicon resolvers are registered here.
|
||||
#
|
||||
# "duckduckgo" = "searx.favicons.resolvers.duckduckgo"
|
||||
# "allesedv" = "searx.favicons.resolvers.allesedv"
|
||||
# "google" = "searx.favicons.resolvers.google"
|
||||
# "yandex" = "searx.favicons.resolvers.yandex"
|
||||
|
||||
[favicons.cache]
|
||||
|
||||
# db_url = "/var/cache/searxng/faviconcache.db" # default: "/tmp/faviconcache.db"
|
||||
# HOLD_TIME = 5184000 # 60 days / default: 30 days
|
||||
# LIMIT_TOTAL_BYTES = 2147483648 # 2 GB / default: 50 MB
|
||||
# BLOB_MAX_BYTES = 40960 # 40 KB / default 20 KB
|
||||
# MAINTENANCE_MODE = "off" # default: "auto"
|
||||
# MAINTENANCE_PERIOD = 600 # 10min / default: 1h
|
||||
237
searx/favicons/proxy.py
Normal file
237
searx/favicons/proxy.py
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for a favicon proxy"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable
|
||||
|
||||
import importlib
|
||||
import base64
|
||||
import pathlib
|
||||
import urllib.parse
|
||||
|
||||
import flask
|
||||
from httpx import HTTPError
|
||||
import msgspec
|
||||
|
||||
from searx import get_setting
|
||||
|
||||
from searx.webutils import new_hmac, is_hmac_of
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
|
||||
from .resolvers import DEFAULT_RESOLVER_MAP
|
||||
from . import cache
|
||||
|
||||
DEFAULT_FAVICON_URL = {}
|
||||
CFG: FaviconProxyConfig = None # type: ignore
|
||||
|
||||
|
||||
def init(cfg: FaviconProxyConfig):
|
||||
global CFG # pylint: disable=global-statement
|
||||
CFG = cfg
|
||||
|
||||
|
||||
def _initial_resolver_map():
|
||||
d = {}
|
||||
name: str = get_setting("search.favicon_resolver", None) # type: ignore
|
||||
if name:
|
||||
func = DEFAULT_RESOLVER_MAP.get(name)
|
||||
if func:
|
||||
d = {name: f"searx.favicons.resolvers.{func.__name__}"}
|
||||
return d
|
||||
|
||||
|
||||
class FaviconProxyConfig(msgspec.Struct):
|
||||
"""Configuration of the favicon proxy."""
|
||||
|
||||
max_age: int = 60 * 60 * 24 * 7 # seven days
|
||||
"""HTTP header Cache-Control_ ``max-age``
|
||||
|
||||
.. _Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
|
||||
"""
|
||||
|
||||
secret_key: str = get_setting("server.secret_key") # type: ignore
|
||||
"""By default, the value from :ref:`server.secret_key <settings server>`
|
||||
setting is used."""
|
||||
|
||||
resolver_timeout: int = get_setting("outgoing.request_timeout") # type: ignore
|
||||
"""Timeout which the resolvers should not exceed, is usually passed to the
|
||||
outgoing request of the resolver. By default, the value from
|
||||
:ref:`outgoing.request_timeout <settings outgoing>` setting is used."""
|
||||
|
||||
resolver_map: dict[str, str] = msgspec.field(default_factory=_initial_resolver_map)
|
||||
"""The resolver_map is a key / value dictionary where the key is the name of
|
||||
the resolver and the value is the fully qualifying name (fqn) of resolver's
|
||||
function (the callable). The resolvers from the python module
|
||||
:py:obj:`searx.favicons.resolver` are available by default."""
|
||||
|
||||
def get_resolver(self, name: str) -> Callable | None:
|
||||
"""Returns the callable object (function) of the resolver with the
|
||||
``name``. If no resolver is registered for the ``name``, ``None`` is
|
||||
returned.
|
||||
"""
|
||||
fqn = self.resolver_map.get(name)
|
||||
if fqn is None:
|
||||
return None
|
||||
mod_name, _, func_name = fqn.rpartition('.')
|
||||
mod = importlib.import_module(mod_name)
|
||||
func = getattr(mod, func_name)
|
||||
if func is None:
|
||||
raise ValueError(f"resolver {fqn} is not implemented")
|
||||
return func
|
||||
|
||||
favicon_path: str = get_setting("ui.static_path") + "/themes/{theme}/img/empty_favicon.svg" # type: ignore
|
||||
favicon_mime_type: str = "image/svg+xml"
|
||||
|
||||
def favicon(self, **replacements):
|
||||
"""Returns pathname and mimetype of the default favicon."""
|
||||
return (
|
||||
pathlib.Path(self.favicon_path.format(**replacements)),
|
||||
self.favicon_mime_type,
|
||||
)
|
||||
|
||||
def favicon_data_url(self, **replacements):
|
||||
"""Returns data image URL of the default favicon."""
|
||||
|
||||
cache_key = ", ".join(f"{x}:{replacements[x]}" for x in sorted(list(replacements.keys()), key=str))
|
||||
data_url = DEFAULT_FAVICON_URL.get(cache_key)
|
||||
if data_url is not None:
|
||||
return data_url
|
||||
|
||||
fav, mimetype = CFG.favicon(**replacements)
|
||||
# hint: encoding utf-8 limits favicons to be a SVG image
|
||||
with fav.open("r", encoding="utf-8") as f:
|
||||
data_url = f.read()
|
||||
|
||||
data_url = urllib.parse.quote(data_url)
|
||||
data_url = f"data:{mimetype};utf8,{data_url}"
|
||||
DEFAULT_FAVICON_URL[cache_key] = data_url
|
||||
return data_url
|
||||
|
||||
|
||||
def favicon_proxy():
|
||||
"""REST API of SearXNG's favicon proxy service
|
||||
|
||||
::
|
||||
|
||||
/favicon_proxy?authority=<...>&h=<...>
|
||||
|
||||
``authority``:
|
||||
Domain name :rfc:`3986` / see :py:obj:`favicon_url`
|
||||
|
||||
``h``:
|
||||
HMAC :rfc:`2104`, build up from the :ref:`server.secret_key <settings
|
||||
server>` setting.
|
||||
|
||||
"""
|
||||
authority = flask.request.args.get('authority')
|
||||
|
||||
# malformed request or RFC 3986 authority
|
||||
if not authority or "/" in authority:
|
||||
return '', 400
|
||||
|
||||
# malformed request / does not have authorisation
|
||||
if not is_hmac_of(
|
||||
CFG.secret_key,
|
||||
authority.encode(),
|
||||
flask.request.args.get('h', ''),
|
||||
):
|
||||
return '', 400
|
||||
|
||||
resolver = flask.request.preferences.get_value('favicon_resolver') # type: ignore
|
||||
# if resolver is empty or not valid, just return HTTP 400.
|
||||
if not resolver or resolver not in CFG.resolver_map.keys():
|
||||
return "", 400
|
||||
|
||||
data, mime = search_favicon(resolver, authority)
|
||||
|
||||
if data is not None and mime is not None:
|
||||
resp = flask.Response(data, mimetype=mime) # type: ignore
|
||||
resp.headers['Cache-Control'] = f"max-age={CFG.max_age}"
|
||||
return resp
|
||||
|
||||
# return default favicon from static path
|
||||
theme = flask.request.preferences.get_value("theme") # type: ignore
|
||||
fav, mimetype = CFG.favicon(theme=theme)
|
||||
return flask.send_from_directory(fav.parent, fav.name, mimetype=mimetype)
|
||||
|
||||
|
||||
def search_favicon(resolver: str, authority: str) -> tuple[None | bytes, None | str]:
|
||||
"""Sends the request to the favicon resolver and returns a tuple for the
|
||||
favicon. The tuple consists of ``(data, mime)``, if the resolver has not
|
||||
determined a favicon, both values are ``None``.
|
||||
|
||||
``data``:
|
||||
Binary data of the favicon.
|
||||
|
||||
``mime``:
|
||||
Mime type of the favicon.
|
||||
|
||||
"""
|
||||
|
||||
data, mime = (None, None)
|
||||
|
||||
func = CFG.get_resolver(resolver)
|
||||
if func is None:
|
||||
return data, mime
|
||||
|
||||
# to avoid superfluous requests to the resolver, first look in the cache
|
||||
data_mime = cache.CACHE(resolver, authority)
|
||||
if data_mime is not None:
|
||||
return data_mime
|
||||
|
||||
try:
|
||||
data, mime = func(authority, timeout=CFG.resolver_timeout)
|
||||
if data is None or mime is None:
|
||||
data, mime = (None, None)
|
||||
|
||||
except (HTTPError, SearxEngineResponseException):
|
||||
pass
|
||||
|
||||
cache.CACHE.set(resolver, authority, mime, data)
|
||||
return data, mime
|
||||
|
||||
|
||||
def favicon_url(authority: str) -> str:
|
||||
"""Function to generate the image URL used for favicons in SearXNG's result
|
||||
lists. The ``authority`` argument (aka netloc / :rfc:`3986`) is usually a
|
||||
(sub-) domain name. This function is used in the HTML (jinja) templates.
|
||||
|
||||
.. code:: html
|
||||
|
||||
<div class="favicon">
|
||||
<img src="{{ favicon_url(result.parsed_url.netloc) }}">
|
||||
</div>
|
||||
|
||||
The returned URL is a route to :py:obj:`favicon_proxy` REST API.
|
||||
|
||||
If the favicon is already in the cache, the returned URL is a `data URL`_
|
||||
(something like ``data:image/png;base64,...``). By generating a data url from
|
||||
the :py:obj:`.cache.FaviconCache`, additional HTTP roundtripps via the
|
||||
:py:obj:`favicon_proxy` are saved. However, it must also be borne in mind
|
||||
that data urls are not cached in the client (web browser).
|
||||
|
||||
.. _data URL: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
|
||||
|
||||
"""
|
||||
|
||||
resolver = flask.request.preferences.get_value('favicon_resolver') # type: ignore
|
||||
# if resolver is empty or not valid, just return nothing.
|
||||
if not resolver or resolver not in CFG.resolver_map.keys():
|
||||
return ""
|
||||
|
||||
data_mime = cache.CACHE(resolver, authority)
|
||||
|
||||
if data_mime == (None, None):
|
||||
# we have already checked, the resolver does not have a favicon
|
||||
theme = flask.request.preferences.get_value("theme") # type: ignore
|
||||
return CFG.favicon_data_url(theme=theme)
|
||||
|
||||
if data_mime is not None:
|
||||
data, mime = data_mime
|
||||
return f"data:{mime};base64,{str(base64.b64encode(data), 'utf-8')}" # type: ignore
|
||||
|
||||
h = new_hmac(CFG.secret_key, authority.encode())
|
||||
proxy_url = flask.url_for('favicon_proxy')
|
||||
query = urllib.parse.urlencode({"authority": authority, "h": h})
|
||||
return f"{proxy_url}?{query}"
|
||||
100
searx/favicons/resolvers.py
Normal file
100
searx/favicons/resolvers.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations of the favicon *resolvers* that are available in the favicon
|
||||
proxy by default. A *resolver* is a function that obtains the favicon from an
|
||||
external source. The *resolver* function receives two arguments (``domain,
|
||||
timeout``) and returns a tuple ``(data, mime)``.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["DEFAULT_RESOLVER_MAP", "allesedv", "duckduckgo", "google", "yandex"]
|
||||
|
||||
from typing import Callable
|
||||
from searx import network
|
||||
from searx import logger
|
||||
|
||||
DEFAULT_RESOLVER_MAP: dict[str, Callable]
|
||||
logger = logger.getChild('favicons.resolvers')
|
||||
|
||||
|
||||
def _req_args(**kwargs):
|
||||
# add the request arguments from the searx.network
|
||||
d = {"raise_for_httperror": False}
|
||||
d.update(kwargs)
|
||||
return d
|
||||
|
||||
|
||||
def allesedv(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from allesedv.com / https://favicon.allesedv.com/"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://f1.allesedv.com/32/{domain}"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will just return a 200 regardless of the favicon existing or not
|
||||
# sometimes will be correct size, sometimes not
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
mime = response.headers['Content-Type']
|
||||
if mime != 'image/gif':
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def duckduckgo(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from duckduckgo.com / https://blog.jim-nielsen.com/2021/displaying-favicons-for-any-domain/"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://icons.duckduckgo.com/ip2/{domain}.ico"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will return a 404 if the favicon does not exist and a 200 if it does,
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
# api will respond with a 32x32 png image
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def google(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from google.com"""
|
||||
data, mime = (None, None)
|
||||
|
||||
# URL https://www.google.com/s2/favicons?sz=32&domain={domain}" will be
|
||||
# redirected (HTTP 301 Moved Permanently) to t1.gstatic.com/faviconV2:
|
||||
url = (
|
||||
f"https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL"
|
||||
f"&url=https://{domain}&size=32"
|
||||
)
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will return a 404 if the favicon does not exist and a 200 if it does,
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
# api will respond with a 32x32 png image
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def yandex(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from yandex.com"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://favicon.yandex.net/favicon/{domain}"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# api will respond with a 16x16 png image, if it doesn't exist, it will be a
|
||||
# 1x1 png image (70 bytes)
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200 and len(response.content) > 70:
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
DEFAULT_RESOLVER_MAP = {
|
||||
"allesedv": allesedv,
|
||||
"duckduckgo": duckduckgo,
|
||||
"google": google,
|
||||
"yandex": yandex,
|
||||
}
|
||||
87
searx/infopage/fr/about.md
Normal file
87
searx/infopage/fr/about.md
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
# A propos de SearXNG
|
||||
|
||||
SearXNG est un [Métamoteur] qui agrège les résultats d'autres
|
||||
{{link('moteurs de recherche', 'preferences')}} tout en ne sauvegardant
|
||||
aucune informations à propos de ses utilisateurs.
|
||||
|
||||
Le projet SearXNG est maintenu par une communauté ouverte.
|
||||
Rejoignez-nous sur Matrix si vous avez des questions ou simplement pour
|
||||
discuter de SearXNG: [#searxng:matrix.org].
|
||||
|
||||
Aidez-nous à rendre SearXNG meilleur.
|
||||
|
||||
- Vous pouvez améliorer les traductions de SearXNG avec l'outil
|
||||
[Weblate].
|
||||
- Suivez le développement, contribuez au projet ou remontez des erreurs
|
||||
en utilisant le [dépôt de sources].
|
||||
- Pour obtenir de plus amples informations, consultez la documentation
|
||||
en ligne du [projet SearXNG].
|
||||
|
||||
## Pourquoi l'utiliser ?
|
||||
|
||||
- SearXNG ne vous fournira pas de résultats aussi personnalisés que
|
||||
Google, mais il ne générera pas non plus de suivi sur vous.
|
||||
- SearXNG ne se soucis pas des recherches que vous faites, ne partage
|
||||
aucune information avec des tiers et ne peut pas être utilisé contre
|
||||
vous.
|
||||
- SearXNG est un logiciel libre. Son code source est 100% ouvert et tout
|
||||
le mode est encouragé à l'améliorer.
|
||||
|
||||
Si vous êtes soucieux du respect de la vie privée et des libertés sur
|
||||
Internet, faites de SearXNG votre moteur de recherche par défaut. Vous
|
||||
pouvez aussi installer et utiliser SearXNG sur votre propre serveur.
|
||||
|
||||
## Comment le configurer comme moteur de recherche par défaut ?
|
||||
|
||||
SearXNG prend en charge [OpenSearch]. Pour plus d'informations sur la
|
||||
manière de modifier votre moteur de recherche par défaut, veuillez
|
||||
consulter la documentation de votre navigateur :
|
||||
|
||||
- [Firefox]
|
||||
- [Microsoft Edge] - Ce lien propose aussi les instructions pour les
|
||||
navigateurs Chrome et Safari.
|
||||
- Les navigateurs basés sur [Chromium] permettent d'ajouter des sites de
|
||||
navigation sans même y accéder.
|
||||
|
||||
Lorsqu'un moteur de recherche est ajouté, son nom doit être unique. Si
|
||||
vous ne pouvez pas ajouter un moteur de recherche, veuillez :
|
||||
|
||||
- Supprimer le doublon (le nom par défaut est SearXNG) ou bien
|
||||
- Contacter le propriétaire de l'instance que vous souhaitez utiliser
|
||||
afin qu'il modifie le nom de celle-ci.
|
||||
|
||||
## Comment ça marche ?
|
||||
|
||||
SearXNG est une reprise logicielle du projet [searx] [Métamoteur],
|
||||
lui-même inspiré du [projet Seeks]. Il assure la confidentialité en
|
||||
mélangeant vos recherches vers d'autres plateformes sans stocker aucune
|
||||
données de recherche. SearXNG peut être ajouté à la barre de recherche
|
||||
de votre navigateur et même être utilisé comme moteur de recherche par
|
||||
défaut.
|
||||
|
||||
Le lien "{{link('statistiques des moteurs', 'stats')}}" présente des
|
||||
informations anonymisées concernant l'utilisation des divers moteurs de
|
||||
recherche.
|
||||
|
||||
## Comment reprendre la main ?
|
||||
|
||||
SearXNG apprécie votre préoccupation concernant les traces de recherche.
|
||||
N'hésitez pas à utiliser le [dépôt de sources] et à maintenir votre
|
||||
propre instance de recherche.
|
||||
|
||||
Ajouter votre instance à la [liste d'instances
|
||||
publiques]({{get_setting('brand.public_instances')}}) afin d'aider
|
||||
d'autres personnes à protéger leur vie privée et rendre l'Internet plus
|
||||
libre. Plus Internet sera décentralisé, plus nous aurons de liberté !
|
||||
|
||||
[dépôt de sources]: {{GIT_URL}}
|
||||
[#searxng:matrix.org]: https://matrix.to/#/#searxng:matrix.org
|
||||
[projet SearXNG]: {{get_setting('brand.docs_url')}}
|
||||
[searx]: https://github.com/searx/searx
|
||||
[Métamoteur]: https://fr.wikipedia.org/wiki/M%C3%A9tamoteur
|
||||
[Weblate]: https://translate.codeberg.org/projects/searxng/
|
||||
[projet Seeks]: https://beniz.github.io/seeks/
|
||||
[OpenSearch]: https://github.com/dewitt/opensearch/blob/master/opensearch-1-1-draft-6.md
|
||||
[Firefox]: https://support.mozilla.org/en-US/kb/add-or-remove-search-engine-firefox
|
||||
[Microsoft Edge]: https://support.microsoft.com/en-us/help/4028574/microsoft-edge-change-the-default-search-engine
|
||||
[Chromium]: https://www.chromium.org/tab-to-search
|
||||
97
searx/infopage/fr/search-syntax.md
Normal file
97
searx/infopage/fr/search-syntax.md
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# Syntaxe de recherche
|
||||
|
||||
SearXNG permet de modifier les catégories de recherche, les moteurs
|
||||
utilisés ou encore la langue de recherche par l'intermédiaire d'une
|
||||
syntaxe dédiée. La liste des moteurs de recherche, de catégories et de
|
||||
langues disponibles est accessible depuis la page de
|
||||
{{link('préférences', 'preferences')}}.
|
||||
|
||||
## `!` Spécifier un moteur ou une catégorie
|
||||
|
||||
Pour restreindre la recherche à un moteur ou une catégorie, utilisez le
|
||||
caractère "!". Voici quelques exemples d'utilisation :
|
||||
|
||||
- Rechercher **paris** sur Wikipédia.
|
||||
|
||||
- {{search('!wp paris')}}
|
||||
- {{search('!wikipedia paris')}}
|
||||
|
||||
- Rechercher **paris** dans la catégorie **Carte**.
|
||||
|
||||
- {{search('!map paris')}}
|
||||
|
||||
- Rechercher des **Images**.
|
||||
|
||||
- {{search('!images Wau Holland')}}
|
||||
|
||||
Les abréviations de moteurs et de langues sont aussi valides. Il est
|
||||
possible d'accumuler les moteurs et catégories dans une requête
|
||||
complexe. Par exemple, {{search('!map !ddg !wp paris')}} recherchera
|
||||
**paris** dans la catégorie **Carte** de DuckDuckGo et Wikipédia.
|
||||
|
||||
## `:` Spécifier une langue
|
||||
|
||||
Utilisez le préfixe ":" pour limiter la recherche à une langue en
|
||||
particulier. Par exemple :
|
||||
|
||||
- Rechercher dans les pages françaises de Wikipédia.
|
||||
|
||||
- {{search(':fr !wp Wau Holland')}}
|
||||
|
||||
## `!!<bang>` Recherches externes (!Bang)
|
||||
|
||||
SearXNG supporte les recherches [DuckDuckGo] de type "!Bang". Utilisez
|
||||
le préfixe "!!" pour être automatiquement redirigé vers un moteur de
|
||||
recherche externe. Par exemple :
|
||||
|
||||
- Rechercher sur Wikipédia en langue française.
|
||||
|
||||
- {{search('!!wfr Wau Holland')}}
|
||||
|
||||
Prenez garde au fait que de telles recherches sont exécutées directement
|
||||
sur le moteur externe. Dans ce cas, SearXNG ne peut pas protéger votre
|
||||
vie privée.
|
||||
|
||||
[DuckDuckGo]: https://duckduckgo.com/bang
|
||||
|
||||
## `!!` Redirection automatique
|
||||
|
||||
En utilisant "!!" suivi d'un ou plusieurs espaces lors de votre
|
||||
recherche, vous serez automatiquement redirigé vers le premier résultat
|
||||
de recherche. Cela correspondant au fonctionnement "J'ai de la chance"
|
||||
du moteur Google. Par exemple :
|
||||
|
||||
- Rechercher et être redirigé directement vers le premier lien
|
||||
correspondant.
|
||||
|
||||
- {{search('!! Wau Holland')}}
|
||||
|
||||
Prenez garde au fait qu'aucune vérification ne peut être faite
|
||||
concernant le premier lien retourné. Il pourrait même s'agir d'un site
|
||||
dangereux. Dans ce cas, SearXNG ne peut pas protéger votre vie
|
||||
privée. Soyez prudent en utilisant cette fonctionnalité.
|
||||
|
||||
## Requêtes spéciales
|
||||
|
||||
Dans la section _requêtes spéciales_ de la page de {{link('préférences',
|
||||
'preferences')}} se trouve une liste de mots clés à usage particulier.
|
||||
Par exemple :
|
||||
|
||||
- Générer une valeur aléatoire.
|
||||
|
||||
- {{search('random uuid')}}
|
||||
|
||||
- Calculer une moyenne.
|
||||
|
||||
- {{search('avg 123 548 2.04 24.2')}}
|
||||
|
||||
- Afficher la valeur de la variable _User-Agent_ utilisée par votre
|
||||
navigateur (doit être activé manuellement).
|
||||
|
||||
- {{search('user-agent')}}
|
||||
|
||||
- Convertir une chaîne de caractères en valeurs de hachage ("hash digests")
|
||||
(doit être activé manuellement).
|
||||
|
||||
- {{search('md5 lorem ipsum')}}
|
||||
- {{search('sha512 lorem ipsum')}}
|
||||
|
|
@ -128,9 +128,6 @@ _INSTALLED = False
|
|||
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
||||
"""Base configuration (schema) of the botdetection."""
|
||||
|
||||
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
|
||||
"""Local Limiter configuration."""
|
||||
|
||||
CFG_DEPRECATED = {
|
||||
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
||||
}
|
||||
|
|
@ -138,8 +135,12 @@ CFG_DEPRECATED = {
|
|||
|
||||
def get_cfg() -> config.Config:
|
||||
global CFG # pylint: disable=global-statement
|
||||
|
||||
if CFG is None:
|
||||
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
|
||||
from . import settings_loader # pylint: disable=import-outside-toplevel
|
||||
|
||||
cfg_file = (settings_loader.get_user_cfg_folder() or Path("/etc/searxng")) / "limiter.toml"
|
||||
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, CFG_DEPRECATED)
|
||||
return CFG
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ ipv6_prefix = 48
|
|||
|
||||
[botdetection.ip_limit]
|
||||
|
||||
# To get unlimited access in a local network, by default link-lokal addresses
|
||||
# To get unlimited access in a local network, by default link-local addresses
|
||||
# (networks) are not monitored by the ip_limit
|
||||
filter_link_local = false
|
||||
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ _TR_LOCALES: list[str] = []
|
|||
|
||||
|
||||
def get_translation_locales() -> list[str]:
|
||||
"""Returns the list of transaltion locales (*underscore*). The list is
|
||||
"""Returns the list of translation locales (*underscore*). The list is
|
||||
generated from the translation folders in :origin:`searx/translations`"""
|
||||
|
||||
global _TR_LOCALES # pylint:disable=global-statement
|
||||
|
|
@ -152,7 +152,7 @@ def locales_initialize():
|
|||
def region_tag(locale: babel.Locale) -> str:
|
||||
"""Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
|
||||
if not locale.territory:
|
||||
raise ValueError('%s missed a territory')
|
||||
raise ValueError('babel.Locale %s: missed a territory' % locale)
|
||||
return locale.language + '-' + locale.territory
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from timeit import default_timer
|
|||
from operator import itemgetter
|
||||
|
||||
from searx.engines import engines
|
||||
from searx.openmetrics import OpenMetricsFamily
|
||||
from .models import HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage
|
||||
from .error_recorder import count_error, count_exception, errors_per_engines
|
||||
|
||||
|
|
@ -149,7 +150,9 @@ def get_reliabilities(engline_name_list, checker_results):
|
|||
checker_result = checker_results.get(engine_name, {})
|
||||
checker_success = checker_result.get('success', True)
|
||||
errors = engine_errors.get(engine_name) or []
|
||||
if counter('engine', engine_name, 'search', 'count', 'sent') == 0:
|
||||
sent_count = counter('engine', engine_name, 'search', 'count', 'sent')
|
||||
|
||||
if sent_count == 0:
|
||||
# no request
|
||||
reliability = None
|
||||
elif checker_success and not errors:
|
||||
|
|
@ -164,8 +167,9 @@ def get_reliabilities(engline_name_list, checker_results):
|
|||
|
||||
reliabilities[engine_name] = {
|
||||
'reliability': reliability,
|
||||
'sent_count': sent_count,
|
||||
'errors': errors,
|
||||
'checker': checker_results.get(engine_name, {}).get('errors', {}),
|
||||
'checker': checker_result.get('errors', {}),
|
||||
}
|
||||
return reliabilities
|
||||
|
||||
|
|
@ -245,3 +249,57 @@ def get_engines_stats(engine_name_list):
|
|||
'max_time': math.ceil(max_time_total or 0),
|
||||
'max_result_count': math.ceil(max_result_count or 0),
|
||||
}
|
||||
|
||||
|
||||
def openmetrics(engine_stats, engine_reliabilities):
|
||||
metrics = [
|
||||
OpenMetricsFamily(
|
||||
key="searxng_engines_response_time_total_seconds",
|
||||
type_hint="gauge",
|
||||
help_hint="The average total response time of the engine",
|
||||
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
|
||||
data=[engine['total'] or 0 for engine in engine_stats['time']],
|
||||
),
|
||||
OpenMetricsFamily(
|
||||
key="searxng_engines_response_time_processing_seconds",
|
||||
type_hint="gauge",
|
||||
help_hint="The average processing response time of the engine",
|
||||
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
|
||||
data=[engine['processing'] or 0 for engine in engine_stats['time']],
|
||||
),
|
||||
OpenMetricsFamily(
|
||||
key="searxng_engines_response_time_http_seconds",
|
||||
type_hint="gauge",
|
||||
help_hint="The average HTTP response time of the engine",
|
||||
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
|
||||
data=[engine['http'] or 0 for engine in engine_stats['time']],
|
||||
),
|
||||
OpenMetricsFamily(
|
||||
key="searxng_engines_result_count_total",
|
||||
type_hint="counter",
|
||||
help_hint="The total amount of results returned by the engine",
|
||||
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
|
||||
data=[engine['result_count'] or 0 for engine in engine_stats['time']],
|
||||
),
|
||||
OpenMetricsFamily(
|
||||
key="searxng_engines_request_count_total",
|
||||
type_hint="counter",
|
||||
help_hint="The total amount of user requests made to this engine",
|
||||
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
|
||||
data=[
|
||||
engine_reliabilities.get(engine['name'], {}).get('sent_count', 0) or 0
|
||||
for engine in engine_stats['time']
|
||||
],
|
||||
),
|
||||
OpenMetricsFamily(
|
||||
key="searxng_engines_reliability_total",
|
||||
type_hint="counter",
|
||||
help_hint="The overall reliability of the engine",
|
||||
data_info=[{'engine_name': engine['name']} for engine in engine_stats['time']],
|
||||
data=[
|
||||
engine_reliabilities.get(engine['name'], {}).get('reliability', 0) or 0
|
||||
for engine in engine_stats['time']
|
||||
],
|
||||
),
|
||||
]
|
||||
return "".join([str(metric) for metric in metrics])
|
||||
|
|
|
|||
|
|
@ -11,16 +11,12 @@ from typing import Any, Dict
|
|||
import httpx
|
||||
from httpx_socks import AsyncProxyTransport
|
||||
from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError
|
||||
import uvloop
|
||||
|
||||
from searx import logger
|
||||
|
||||
# Optional uvloop (support Python 3.6)
|
||||
try:
|
||||
import uvloop
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
uvloop.install()
|
||||
|
||||
uvloop.install()
|
||||
|
||||
|
||||
logger = logger.getChild('searx.network.client')
|
||||
|
|
|
|||
|
|
@ -233,8 +233,7 @@ class Network:
|
|||
del kwargs['raise_for_httperror']
|
||||
return do_raise_for_httperror
|
||||
|
||||
@staticmethod
|
||||
def patch_response(response, do_raise_for_httperror):
|
||||
def patch_response(self, response, do_raise_for_httperror):
|
||||
if isinstance(response, httpx.Response):
|
||||
# requests compatibility (response is not streamed)
|
||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||
|
|
@ -242,8 +241,11 @@ class Network:
|
|||
|
||||
# raise an exception
|
||||
if do_raise_for_httperror:
|
||||
raise_for_httperror(response)
|
||||
|
||||
try:
|
||||
raise_for_httperror(response)
|
||||
except:
|
||||
self._logger.warning(f"HTTP Request failed: {response.request.method} {response.request.url}")
|
||||
raise
|
||||
return response
|
||||
|
||||
def is_valid_response(self, response):
|
||||
|
|
@ -269,7 +271,7 @@ class Network:
|
|||
else:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
if self.is_valid_response(response) or retries <= 0:
|
||||
return Network.patch_response(response, do_raise_for_httperror)
|
||||
return self.patch_response(response, do_raise_for_httperror)
|
||||
except httpx.RemoteProtocolError as e:
|
||||
if not was_disconnected:
|
||||
# the server has closed the connection:
|
||||
|
|
|
|||
35
searx/openmetrics.py
Normal file
35
searx/openmetrics.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Module providing support for displaying data in OpenMetrics format"""
|
||||
|
||||
|
||||
class OpenMetricsFamily: # pylint: disable=too-few-public-methods
|
||||
"""A family of metrics.
|
||||
The key parameter is the metric name that should be used (snake case).
|
||||
The type_hint parameter must be one of 'counter', 'gauge', 'histogram', 'summary'.
|
||||
The help_hint parameter is a short string explaining the metric.
|
||||
The data_info parameter is a dictionary of descriptionary parameters for the data point (e.g. request method/path).
|
||||
The data parameter is a flat list of the actual data in shape of a primive type.
|
||||
|
||||
See https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md for more information.
|
||||
"""
|
||||
|
||||
def __init__(self, key: str, type_hint: str, help_hint: str, data_info: list, data: list):
|
||||
self.key = key
|
||||
self.type_hint = type_hint
|
||||
self.help_hint = help_hint
|
||||
self.data_info = data_info
|
||||
self.data = data
|
||||
|
||||
def __str__(self):
|
||||
text_representation = f"""# HELP {self.key} {self.help_hint}
|
||||
# TYPE {self.key} {self.type_hint}
|
||||
"""
|
||||
|
||||
for i, data_info_dict in enumerate(self.data_info):
|
||||
if not data_info_dict or not self.data[i]:
|
||||
continue
|
||||
|
||||
info_representation = ','.join([f"{key}=\"{value}\"" for (key, value) in data_info_dict.items()])
|
||||
text_representation += f"{self.key}{{{info_representation}}} {self.data[i]}\n"
|
||||
|
||||
return text_representation
|
||||
|
|
@ -3,19 +3,27 @@
|
|||
"""
|
||||
|
||||
import ast
|
||||
import re
|
||||
import operator
|
||||
from multiprocessing import Process, Queue
|
||||
from typing import Callable
|
||||
|
||||
import flask
|
||||
import babel
|
||||
from flask_babel import gettext
|
||||
from searx import settings
|
||||
|
||||
from searx.plugins import logger
|
||||
|
||||
name = "Basic Calculator"
|
||||
description = gettext("Calculate mathematical expressions via the search bar")
|
||||
default_on = False
|
||||
default_on = True
|
||||
|
||||
preference_section = 'general'
|
||||
plugin_id = 'calculator'
|
||||
|
||||
operators = {
|
||||
logger = logger.getChild(plugin_id)
|
||||
|
||||
operators: dict[type, Callable] = {
|
||||
ast.Add: operator.add,
|
||||
ast.Sub: operator.sub,
|
||||
ast.Mult: operator.mul,
|
||||
|
|
@ -35,11 +43,15 @@ def _eval_expr(expr):
|
|||
>>> _eval_expr('1 + 2*3**(4^5) / (6 + -7)')
|
||||
-5.0
|
||||
"""
|
||||
return _eval(ast.parse(expr, mode='eval').body)
|
||||
try:
|
||||
return _eval(ast.parse(expr, mode='eval').body)
|
||||
except ZeroDivisionError:
|
||||
# This is undefined
|
||||
return ""
|
||||
|
||||
|
||||
def _eval(node):
|
||||
if isinstance(node, ast.Constant) and isinstance(node.value, int):
|
||||
if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)):
|
||||
return node.value
|
||||
|
||||
if isinstance(node, ast.BinOp):
|
||||
|
|
@ -51,10 +63,31 @@ def _eval(node):
|
|||
raise TypeError(node)
|
||||
|
||||
|
||||
def timeout_func(timeout, func, *args, **kwargs):
|
||||
|
||||
def handler(q: Queue, func, args, **kwargs): # pylint:disable=invalid-name
|
||||
try:
|
||||
q.put(func(*args, **kwargs))
|
||||
except:
|
||||
q.put(None)
|
||||
raise
|
||||
|
||||
que = Queue()
|
||||
p = Process(target=handler, args=(que, func, args), kwargs=kwargs)
|
||||
p.start()
|
||||
p.join(timeout=timeout)
|
||||
ret_val = None
|
||||
if not p.is_alive():
|
||||
ret_val = que.get()
|
||||
else:
|
||||
logger.debug("terminate function after timeout is exceeded")
|
||||
p.terminate()
|
||||
p.join()
|
||||
p.close()
|
||||
return ret_val
|
||||
|
||||
|
||||
def post_search(_request, search):
|
||||
# don't run on public instances due to possible attack surfaces
|
||||
if settings['server']['public_instance']:
|
||||
return True
|
||||
|
||||
# only show the result of the expression on the first page
|
||||
if search.search_query.pageno > 1:
|
||||
|
|
@ -68,21 +101,30 @@ def post_search(_request, search):
|
|||
# replace commonly used math operators with their proper Python operator
|
||||
query = query.replace("x", "*").replace(":", "/")
|
||||
|
||||
# use UI language
|
||||
ui_locale = babel.Locale.parse(flask.request.preferences.get_value('locale'), sep='-')
|
||||
|
||||
# parse the number system in a localized way
|
||||
def _decimal(match: re.Match) -> str:
|
||||
val = match.string[match.start() : match.end()]
|
||||
val = babel.numbers.parse_decimal(val, ui_locale, numbering_system="latn")
|
||||
return str(val)
|
||||
|
||||
decimal = ui_locale.number_symbols["latn"]["decimal"]
|
||||
group = ui_locale.number_symbols["latn"]["group"]
|
||||
query = re.sub(f"[0-9]+[{decimal}|{group}][0-9]+[{decimal}|{group}]?[0-9]?", _decimal, query)
|
||||
|
||||
# only numbers and math operators are accepted
|
||||
if any(str.isalpha(c) for c in query):
|
||||
return True
|
||||
|
||||
# in python, powers are calculated via **
|
||||
query_py_formatted = query.replace("^", "**")
|
||||
try:
|
||||
result = str(_eval_expr(query_py_formatted))
|
||||
if result != query:
|
||||
search.result_container.answers['calculate'] = {'answer': f"{query} = {result}"}
|
||||
except (TypeError, SyntaxError, ArithmeticError):
|
||||
pass
|
||||
|
||||
# Prevent the runtime from being longer than 50 ms
|
||||
result = timeout_func(0.05, _eval_expr, query_py_formatted)
|
||||
if result is None or result == "":
|
||||
return True
|
||||
result = babel.numbers.format_decimal(result, locale=ui_locale)
|
||||
search.result_container.answers['calculate'] = {'answer': f"{search.search_query.query} = {result}"}
|
||||
return True
|
||||
|
||||
|
||||
def is_allowed():
|
||||
return not settings['server']['public_instance']
|
||||
|
|
|
|||
|
|
@ -1,35 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=missing-module-docstring
|
||||
|
||||
from flask_babel import gettext
|
||||
from searx.plugins import logger
|
||||
|
||||
name = gettext('Hostname replace')
|
||||
description = "Deprecated / contact system admin to configure 'Hostnames plugin'!!"
|
||||
default_on = False
|
||||
preference_section = 'general'
|
||||
|
||||
plugin_id = 'hostname_replace'
|
||||
logger = logger.getChild(plugin_id)
|
||||
|
||||
REPORTED = False
|
||||
|
||||
|
||||
def deprecated_msg():
|
||||
global REPORTED # pylint: disable=global-statement
|
||||
if REPORTED:
|
||||
return
|
||||
logger.error(
|
||||
"'Hostname replace' plugin is deprecated and will be dropped soon!"
|
||||
" Configure 'Hostnames plugin':"
|
||||
" https://docs.searxng.org/src/searx.plugins.hostnames.html"
|
||||
)
|
||||
REPORTED = True
|
||||
|
||||
|
||||
def on_result(_request, _search, result):
|
||||
# pylint: disable=import-outside-toplevel, cyclic-import
|
||||
from searx.plugins.hostnames import on_result as hostnames_on_result
|
||||
|
||||
deprecated_msg()
|
||||
return hostnames_on_result(_request, _search, result)
|
||||
|
|
@ -1,17 +1,19 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=too-many-branches
|
||||
"""In addition to rewriting/replace reslut URLs, the *hoostnames* plugin offers
|
||||
other features.
|
||||
|
||||
"""
|
||||
.. attention::
|
||||
|
||||
The 'Hostnames plugin' from `PR-3463
|
||||
<https://github.com/searxng/searxng/pull/3463>`_ is a rewrite of the
|
||||
'Hostname replace' plugin. Backwards compatibility is guaranteed for a
|
||||
transitional period, but this will end soon.
|
||||
The **"Hostname replace"** plugin has been replace by **"Hostnames
|
||||
plugin"**, see :pull:`3463` & :pull:`3552`.
|
||||
|
||||
**To maintainers of SearXNG instances, please modify your old plugin config
|
||||
to the new.**
|
||||
The **Hostnames plugin** can be enabled by adding it to the
|
||||
``enabled_plugins`` **list** in the ``setting.yml`` like so.
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
enabled_plugins:
|
||||
- 'Hostnames plugin'
|
||||
...
|
||||
|
||||
- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
|
||||
replaced by other hostnames.
|
||||
|
|
@ -96,7 +98,7 @@ from flask_babel import gettext
|
|||
|
||||
from searx import settings
|
||||
from searx.plugins import logger
|
||||
from searx.settings_loader import get_yaml_file
|
||||
from searx.settings_loader import get_yaml_cfg
|
||||
|
||||
name = gettext('Hostnames plugin')
|
||||
description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname')
|
||||
|
|
@ -118,7 +120,7 @@ def _load_regular_expressions(settings_key):
|
|||
|
||||
# load external file with configuration
|
||||
if isinstance(setting_value, str):
|
||||
setting_value = get_yaml_file(setting_value)
|
||||
setting_value = get_yaml_cfg(setting_value)
|
||||
|
||||
if isinstance(setting_value, list):
|
||||
return {re.compile(r) for r in setting_value}
|
||||
|
|
@ -129,29 +131,8 @@ def _load_regular_expressions(settings_key):
|
|||
return {}
|
||||
|
||||
|
||||
# compatibility fallback for old hostname replace plugin
|
||||
# TODO: remove in the future once most/all instance maintainers finished migrating # pylint: disable=fixme
|
||||
def _load_regular_expressions_with_fallback(settings_key):
|
||||
expressions = _load_regular_expressions(settings_key)
|
||||
if expressions:
|
||||
return expressions
|
||||
|
||||
# fallback to the old `hostname_replace` settings format
|
||||
# pylint: disable=import-outside-toplevel, cyclic-import
|
||||
hostname_replace_config = settings.get('hostname_replace', {})
|
||||
if hostname_replace_config:
|
||||
from searx.plugins.hostname_replace import deprecated_msg
|
||||
|
||||
deprecated_msg()
|
||||
|
||||
if settings_key == 'replace':
|
||||
return {re.compile(p): r for (p, r) in hostname_replace_config.items() if r}
|
||||
|
||||
return {re.compile(p) for (p, r) in hostname_replace_config.items() if not r}
|
||||
|
||||
|
||||
replacements = _load_regular_expressions_with_fallback('replace')
|
||||
removables = _load_regular_expressions_with_fallback('remove')
|
||||
replacements = _load_regular_expressions('replace')
|
||||
removables = _load_regular_expressions('remove')
|
||||
high_priority = _load_regular_expressions('high_priority')
|
||||
low_priority = _load_regular_expressions('low_priority')
|
||||
|
||||
|
|
@ -163,10 +144,10 @@ def _matches_parsed_url(result, pattern):
|
|||
def on_result(_request, _search, result):
|
||||
for pattern, replacement in replacements.items():
|
||||
if _matches_parsed_url(result, pattern):
|
||||
logger.debug(result['url'])
|
||||
# logger.debug(result['url'])
|
||||
result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
|
||||
result['url'] = urlunparse(result[parsed])
|
||||
logger.debug(result['url'])
|
||||
# logger.debug(result['url'])
|
||||
|
||||
for url_field in _url_fields:
|
||||
if not result.get(url_field):
|
||||
|
|
|
|||
|
|
@ -28,5 +28,5 @@ def post_search(request, search):
|
|||
search.result_container.answers['ip'] = {'answer': gettext('Your IP is: ') + ip}
|
||||
elif ua_regex.match(search.search_query.query):
|
||||
ua = request.user_agent
|
||||
search.result_container.answers['user-agent'] = {'answer': gettext('Your user-agent is: ') + ua}
|
||||
search.result_container.answers['user-agent'] = {'answer': gettext('Your user-agent is: ') + ua.string}
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -234,7 +234,7 @@ def _parse_text_and_convert(search, from_query, to_query):
|
|||
value = target_from_si(float(value))
|
||||
|
||||
if measured.group('E'):
|
||||
# when incomming notation is scientific, outgoing notation is scientific
|
||||
# when incoming notation is scientific, outgoing notation is scientific
|
||||
result = babel.numbers.format_scientific(value, locale=_locale)
|
||||
else:
|
||||
result = babel.numbers.format_decimal(value, locale=_locale, format='#,##0.##########;-#')
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from collections import OrderedDict
|
|||
import flask
|
||||
import babel
|
||||
|
||||
from searx import settings, autocomplete
|
||||
from searx import settings, autocomplete, favicons
|
||||
from searx.enginelib import Engine
|
||||
from searx.plugins import Plugin
|
||||
from searx.locales import LOCALE_NAMES
|
||||
|
|
@ -325,7 +325,7 @@ class ClientPref:
|
|||
# hint: searx.webapp.get_client_settings should be moved into this class
|
||||
|
||||
locale: babel.Locale
|
||||
"""Locale prefered by the client."""
|
||||
"""Locale preferred by the client."""
|
||||
|
||||
def __init__(self, locale: Optional[babel.Locale] = None):
|
||||
self.locale = locale
|
||||
|
|
@ -406,6 +406,11 @@ class Preferences:
|
|||
locked=is_locked('autocomplete'),
|
||||
choices=list(autocomplete.backends.keys()) + ['']
|
||||
),
|
||||
'favicon_resolver': EnumStringSetting(
|
||||
settings['search']['favicon_resolver'],
|
||||
locked=is_locked('favicon_resolver'),
|
||||
choices=list(favicons.proxy.CFG.resolver_map.keys()) + ['']
|
||||
),
|
||||
'image_proxy': BooleanSetting(
|
||||
settings['server']['image_proxy'],
|
||||
locked=is_locked('image_proxy')
|
||||
|
|
@ -441,7 +446,7 @@ class Preferences:
|
|||
'simple_style': EnumStringSetting(
|
||||
settings['ui']['theme_args']['simple_style'],
|
||||
locked=is_locked('simple_style'),
|
||||
choices=['', 'auto', 'light', 'dark']
|
||||
choices=['', 'auto', 'light', 'dark', 'black']
|
||||
),
|
||||
'center_alignment': BooleanSetting(
|
||||
settings['ui']['center_alignment'],
|
||||
|
|
@ -474,7 +479,6 @@ class Preferences:
|
|||
self.plugins = PluginsSetting('plugins', plugins=plugins)
|
||||
self.tokens = SetSetting('tokens')
|
||||
self.client = client or ClientPref()
|
||||
self.unknown_params: Dict[str, str] = {}
|
||||
|
||||
def get_as_url_params(self):
|
||||
"""Return preferences as URL parameters"""
|
||||
|
|
@ -518,10 +522,6 @@ class Preferences:
|
|||
self.plugins.parse_cookie(input_data.get('disabled_plugins', ''), input_data.get('enabled_plugins', ''))
|
||||
elif user_setting_name == 'tokens':
|
||||
self.tokens.parse(user_setting)
|
||||
elif not any(
|
||||
user_setting_name.startswith(x) for x in ['enabled_', 'disabled_', 'engine_', 'category_', 'plugin_']
|
||||
):
|
||||
self.unknown_params[user_setting_name] = user_setting
|
||||
|
||||
def parse_form(self, input_data: Dict[str, str]):
|
||||
"""Parse formular (``<input>``) data from a ``flask.request.form``"""
|
||||
|
|
@ -546,8 +546,7 @@ class Preferences:
|
|||
disabled_plugins.append(user_setting_name)
|
||||
elif user_setting_name == 'tokens':
|
||||
self.tokens.parse_form(user_setting)
|
||||
else:
|
||||
self.unknown_params[user_setting_name] = user_setting
|
||||
|
||||
self.key_value_settings['categories'].parse_form(enabled_categories)
|
||||
self.engines.parse_form(disabled_engines)
|
||||
self.plugins.parse_form(disabled_plugins)
|
||||
|
|
@ -558,8 +557,6 @@ class Preferences:
|
|||
ret_val = None
|
||||
if user_setting_name in self.key_value_settings:
|
||||
ret_val = self.key_value_settings[user_setting_name].get_value()
|
||||
if user_setting_name in self.unknown_params:
|
||||
ret_val = self.unknown_params[user_setting_name]
|
||||
return ret_val
|
||||
|
||||
def save(self, resp: flask.Response):
|
||||
|
|
@ -572,8 +569,6 @@ class Preferences:
|
|||
self.engines.save(resp)
|
||||
self.plugins.save(resp)
|
||||
self.tokens.save('tokens', resp)
|
||||
for k, v in self.unknown_params.items():
|
||||
resp.set_cookie(k, v, max_age=COOKIE_MAX_AGE)
|
||||
return resp
|
||||
|
||||
def validate_token(self, engine):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=invalid-name, missing-module-docstring, missing-class-docstring
|
||||
|
||||
from __future__ import annotations
|
||||
from abc import abstractmethod, ABC
|
||||
import re
|
||||
|
||||
|
|
@ -258,7 +259,7 @@ class RawTextQuery:
|
|||
FeelingLuckyParser, # redirect to the first link in the results list
|
||||
]
|
||||
|
||||
def __init__(self, query, disabled_engines):
|
||||
def __init__(self, query: str, disabled_engines: list):
|
||||
assert isinstance(query, str)
|
||||
# input parameters
|
||||
self.query = query
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from typing import List, NamedTuple, Set
|
|||
from urllib.parse import urlparse, unquote
|
||||
|
||||
from searx import logger
|
||||
from searx import utils
|
||||
from searx.engines import engines
|
||||
from searx.metrics import histogram_observe, counter_add, count_error
|
||||
|
||||
|
|
@ -366,9 +365,9 @@ class ResultContainer:
|
|||
result['score'] = result_score(result, result.get('priority'))
|
||||
# removing html content and whitespace duplications
|
||||
if result.get('content'):
|
||||
result['content'] = utils.html_to_text(result['content']).strip()
|
||||
result['content'] = result['content'].strip()
|
||||
if result.get('title'):
|
||||
result['title'] = ' '.join(utils.html_to_text(result['title']).strip().split())
|
||||
result['title'] = ' '.join(result['title'].strip().split())
|
||||
|
||||
for result_engine in result['engines']:
|
||||
counter_add(result['score'], 'engine', result_engine, 'score')
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ if (next_call_ts == false or next_call_ts == nil) then
|
|||
-- 2/ the next call is a random time between start_after_from and start_after_to
|
||||
local initial_delay = math.random(start_after_from, start_after_to)
|
||||
redis.call('SET', redis_key, now + initial_delay)
|
||||
return { false, delay }
|
||||
return { false, initial_delay }
|
||||
end
|
||||
|
||||
-- next_call_ts is defined
|
||||
|
|
|
|||
|
|
@ -137,9 +137,6 @@ class OnlineProcessor(EngineProcessor):
|
|||
self.engine.request(query, params)
|
||||
|
||||
# ignoring empty urls
|
||||
if params['url'] is None:
|
||||
return None
|
||||
|
||||
if not params['url']:
|
||||
return None
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ def name_to_iso4217(name):
|
|||
currency = CURRENCIES['names'].get(name, [name])
|
||||
if isinstance(currency, str):
|
||||
return currency
|
||||
return currency[0]
|
||||
return currency[-1]
|
||||
|
||||
|
||||
def iso4217_to_name(iso4217, language):
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ STYLE_NAMES = {
|
|||
'AUTO': 'auto',
|
||||
'LIGHT': 'light',
|
||||
'DARK': 'dark',
|
||||
'BLACK': 'black',
|
||||
}
|
||||
|
||||
BRAND_CUSTOM_LINKS = {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,10 @@ general:
|
|||
contact_url: false
|
||||
# record stats
|
||||
enable_metrics: true
|
||||
# expose stats in open metrics format at /metrics
|
||||
# leave empty to disable (no password set)
|
||||
# open_metrics: <password>
|
||||
open_metrics: ''
|
||||
|
||||
brand:
|
||||
new_issue_url: https://github.com/searxng/searxng/issues/new
|
||||
|
|
@ -35,6 +39,9 @@ search:
|
|||
autocomplete: ""
|
||||
# minimun characters to type before autocompleter starts
|
||||
autocomplete_min: 4
|
||||
# backend for the favicon near URL in search results.
|
||||
# Available resolvers: "allesedv", "duckduckgo", "google", "yandex" - leave blank to turn it off by default.
|
||||
favicon_resolver: ""
|
||||
# Default search language - leave blank to detect from browser information or
|
||||
# use codes from 'languages.py'
|
||||
default_lang: "auto"
|
||||
|
|
@ -219,19 +226,16 @@ outgoing:
|
|||
#
|
||||
# enabled_plugins:
|
||||
# # these plugins are enabled if nothing is configured ..
|
||||
# - 'Basic Calculator'
|
||||
# - 'Hash plugin'
|
||||
# - 'Self Information'
|
||||
# - 'Tracker URL remover'
|
||||
# - 'Unit converter plugin'
|
||||
# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy
|
||||
# # these plugins are disabled if nothing is configured ..
|
||||
# - 'Hostnames plugin' # see 'hostnames' configuration below
|
||||
# - 'Basic Calculator'
|
||||
# - 'Open Access DOI rewrite'
|
||||
# - 'Tor check plugin'
|
||||
# # Read the docs before activate: auto-detection of the language could be
|
||||
# # detrimental to users expectations / users can activate the plugin in the
|
||||
# # preferences if they want.
|
||||
# - 'Autodetect search language'
|
||||
|
||||
# Configuration of the "Hostnames plugin":
|
||||
#
|
||||
|
|
@ -325,6 +329,41 @@ engines:
|
|||
shortcut: 9g
|
||||
disabled: true
|
||||
|
||||
- name: adobe stock
|
||||
engine: adobe_stock
|
||||
shortcut: asi
|
||||
categories: ["images"]
|
||||
# https://docs.searxng.org/dev/engines/online/adobe_stock.html
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
|
||||
timeout: 6
|
||||
disabled: true
|
||||
|
||||
- name: adobe stock video
|
||||
engine: adobe_stock
|
||||
shortcut: asv
|
||||
network: adobe stock
|
||||
categories: ["videos"]
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["video"]
|
||||
timeout: 6
|
||||
disabled: true
|
||||
|
||||
- name: adobe stock audio
|
||||
engine: adobe_stock
|
||||
shortcut: asa
|
||||
network: adobe stock
|
||||
categories: ["music"]
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["audio"]
|
||||
timeout: 6
|
||||
disabled: true
|
||||
|
||||
- name: alpine linux packages
|
||||
engine: alpinelinux
|
||||
disabled: true
|
||||
shortcut: alp
|
||||
|
||||
- name: annas archive
|
||||
engine: annas_archive
|
||||
disabled: true
|
||||
|
|
@ -404,7 +443,6 @@ engines:
|
|||
shortcut: wp
|
||||
# add "list" to the array to get results in the results list
|
||||
display_type: ["infobox"]
|
||||
base_url: 'https://{language}.wikipedia.org/'
|
||||
categories: [general]
|
||||
|
||||
- name: bilibili
|
||||
|
|
@ -477,6 +515,23 @@ engines:
|
|||
# to show premium or plus results too:
|
||||
# skip_premium: false
|
||||
|
||||
- name: cloudflareai
|
||||
engine: cloudflareai
|
||||
shortcut: cfai
|
||||
# get api token and accont id from https://developers.cloudflare.com/workers-ai/get-started/rest-api/
|
||||
cf_account_id: 'your_cf_accout_id'
|
||||
cf_ai_api: 'your_cf_api'
|
||||
# create your ai gateway by https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/
|
||||
cf_ai_gateway: 'your_cf_ai_gateway_name'
|
||||
# find the model name from https://developers.cloudflare.com/workers-ai/models/#text-generation
|
||||
cf_ai_model: 'ai_model_name'
|
||||
# custom your preferences
|
||||
# cf_ai_model_display_name: 'Cloudflare AI'
|
||||
# cf_ai_model_assistant: 'prompts_for_assistant_role'
|
||||
# cf_ai_model_system: 'prompts_for_system_role'
|
||||
timeout: 30
|
||||
disabled: true
|
||||
|
||||
# - name: core.ac.uk
|
||||
# engine: core
|
||||
# categories: science
|
||||
|
|
@ -506,6 +561,8 @@ engines:
|
|||
url_query: link
|
||||
title_query: title
|
||||
content_query: snippet
|
||||
title_html_to_text: true
|
||||
content_html_to_text: true
|
||||
disabled: true
|
||||
about:
|
||||
website: https://crowdview.ai/
|
||||
|
|
@ -557,33 +614,6 @@ engines:
|
|||
categories: general
|
||||
shortcut: cc
|
||||
|
||||
- name: bahnhof
|
||||
engine: json_engine
|
||||
search_url: https://www.bahnhof.de/api/stations/search/{query}
|
||||
url_prefix: https://www.bahnhof.de/
|
||||
url_query: slug
|
||||
title_query: name
|
||||
content_query: state
|
||||
shortcut: bf
|
||||
disabled: true
|
||||
about:
|
||||
website: https://www.bahn.de
|
||||
wikidata_id: Q22811603
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
language: de
|
||||
tests:
|
||||
bahnhof:
|
||||
matrix:
|
||||
query: berlin
|
||||
lang: en
|
||||
result_container:
|
||||
- not_empty
|
||||
- ['one_title_contains', 'Berlin Hauptbahnhof']
|
||||
test:
|
||||
- unique_results
|
||||
|
||||
- name: deezer
|
||||
engine: deezer
|
||||
shortcut: dz
|
||||
|
|
@ -618,6 +648,24 @@ engines:
|
|||
shortcut: dh
|
||||
categories: [it, packages]
|
||||
|
||||
- name: encyclosearch
|
||||
engine: json_engine
|
||||
shortcut: es
|
||||
categories: general
|
||||
paging: true
|
||||
search_url: https://encyclosearch.org/encyclosphere/search?q={query}&page={pageno}&resultsPerPage=15
|
||||
results_query: Results
|
||||
url_query: SourceURL
|
||||
title_query: Title
|
||||
content_query: Description
|
||||
disabled: true
|
||||
about:
|
||||
website: https://encyclosearch.org
|
||||
official_api_documentation: https://encyclosearch.org/docs/#/rest-api
|
||||
use_official_api: true
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: erowid
|
||||
engine: xpath
|
||||
paging: true
|
||||
|
|
@ -792,34 +840,40 @@ engines:
|
|||
timeout: 8.0
|
||||
disabled: true
|
||||
|
||||
- name: geizhals
|
||||
engine: geizhals
|
||||
shortcut: geiz
|
||||
disabled: true
|
||||
|
||||
- name: genius
|
||||
engine: genius
|
||||
shortcut: gen
|
||||
|
||||
- name: gentoo
|
||||
engine: gentoo
|
||||
engine: mediawiki
|
||||
shortcut: ge
|
||||
timeout: 10.0
|
||||
categories: ["it", "software wikis"]
|
||||
base_url: "https://wiki.gentoo.org/"
|
||||
api_path: "api.php"
|
||||
search_type: text
|
||||
timeout: 10
|
||||
|
||||
- name: gitlab
|
||||
engine: json_engine
|
||||
paging: true
|
||||
search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno}
|
||||
url_query: web_url
|
||||
title_query: name_with_namespace
|
||||
content_query: description
|
||||
page_size: 20
|
||||
categories: [it, repos]
|
||||
engine: gitlab
|
||||
base_url: https://gitlab.com
|
||||
shortcut: gl
|
||||
timeout: 10.0
|
||||
disabled: true
|
||||
about:
|
||||
website: https://about.gitlab.com/
|
||||
website: https://gitlab.com/
|
||||
wikidata_id: Q16639197
|
||||
official_api_documentation: https://docs.gitlab.com/ee/api/
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
# - name: gnome
|
||||
# engine: gitlab
|
||||
# base_url: https://gitlab.gnome.org
|
||||
# shortcut: gn
|
||||
# about:
|
||||
# website: https://gitlab.gnome.org
|
||||
# wikidata_id: Q44316
|
||||
|
||||
- name: github
|
||||
engine: github
|
||||
|
|
@ -898,26 +952,6 @@ engines:
|
|||
shortcut: mi
|
||||
disabled: true
|
||||
|
||||
- name: gpodder
|
||||
engine: json_engine
|
||||
shortcut: gpod
|
||||
timeout: 4.0
|
||||
paging: false
|
||||
search_url: https://gpodder.net/search.json?q={query}
|
||||
url_query: url
|
||||
title_query: title
|
||||
content_query: description
|
||||
page_size: 19
|
||||
categories: music
|
||||
disabled: true
|
||||
about:
|
||||
website: https://gpodder.net
|
||||
wikidata_id: Q3093354
|
||||
official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/
|
||||
use_official_api: false
|
||||
requires_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: habrahabr
|
||||
engine: xpath
|
||||
paging: true
|
||||
|
|
@ -1230,6 +1264,7 @@ engines:
|
|||
# read https://docs.searxng.org/dev/engines/online/mullvad_leta.html
|
||||
# - name: mullvadleta
|
||||
# engine: mullvad_leta
|
||||
# leta_engine: google # choose one of the following: google, brave
|
||||
# use_cache: true # Only 100 non-cache searches per day, suggested only for private instances
|
||||
# search_url: https://leta.mullvad.net
|
||||
# categories: [general, web]
|
||||
|
|
@ -1280,6 +1315,12 @@ engines:
|
|||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: openlibrary
|
||||
engine: openlibrary
|
||||
shortcut: ol
|
||||
timeout: 5
|
||||
disabled: true
|
||||
|
||||
- name: openmeteo
|
||||
engine: open_meteo
|
||||
shortcut: om
|
||||
|
|
@ -1540,6 +1581,25 @@ engines:
|
|||
engine: reddit
|
||||
shortcut: re
|
||||
page_size: 25
|
||||
disabled: true
|
||||
|
||||
- name: right dao
|
||||
engine: xpath
|
||||
paging: true
|
||||
page_size: 12
|
||||
search_url: https://rightdao.com/search?q={query}&start={pageno}
|
||||
results_xpath: //div[contains(@class, "description")]
|
||||
url_xpath: ../div[contains(@class, "title")]/a/@href
|
||||
title_xpath: ../div[contains(@class, "title")]
|
||||
content_xpath: .
|
||||
categories: general
|
||||
shortcut: rd
|
||||
disabled: true
|
||||
about:
|
||||
website: https://rightdao.com/
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: HTML
|
||||
|
||||
- name: rottentomatoes
|
||||
engine: rottentomatoes
|
||||
|
|
@ -1597,11 +1657,6 @@ engines:
|
|||
api_site: 'askubuntu'
|
||||
categories: [it, q&a]
|
||||
|
||||
- name: internetarchivescholar
|
||||
engine: internet_archive_scholar
|
||||
shortcut: ias
|
||||
timeout: 15.0
|
||||
|
||||
- name: superuser
|
||||
engine: stackexchange
|
||||
shortcut: su
|
||||
|
|
@ -1780,6 +1835,22 @@ engines:
|
|||
engine: unsplash
|
||||
shortcut: us
|
||||
|
||||
- name: yandex
|
||||
engine: yandex
|
||||
categories: general
|
||||
search_type: web
|
||||
shortcut: yd
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: yandex images
|
||||
engine: yandex
|
||||
categories: images
|
||||
search_type: images
|
||||
shortcut: ydi
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: yandex music
|
||||
engine: yandex_music
|
||||
shortcut: ydm
|
||||
|
|
@ -1828,25 +1899,6 @@ engines:
|
|||
about:
|
||||
website: https://wiby.me/
|
||||
|
||||
- name: alexandria
|
||||
engine: json_engine
|
||||
shortcut: alx
|
||||
categories: general
|
||||
paging: true
|
||||
search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno}
|
||||
results_query: results
|
||||
title_query: title
|
||||
url_query: url
|
||||
content_query: snippet
|
||||
timeout: 1.5
|
||||
disabled: true
|
||||
about:
|
||||
website: https://alexandria.org/
|
||||
official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md
|
||||
use_official_api: true
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: wikibooks
|
||||
engine: mediawiki
|
||||
weight: 0.5
|
||||
|
|
@ -2015,6 +2067,16 @@ engines:
|
|||
# query_str: 'SELECT * from mytable WHERE fieldname=%(query)s'
|
||||
# shortcut: mysql
|
||||
|
||||
# Required dependency: mariadb
|
||||
# - name: mariadb
|
||||
# engine: mariadb_server
|
||||
# database: mydatabase
|
||||
# username: user
|
||||
# password: pass
|
||||
# limit: 10
|
||||
# query_str: 'SELECT * from mytable WHERE fieldname=%(query)s'
|
||||
# shortcut: mdb
|
||||
|
||||
- name: 1337x
|
||||
engine: 1337x
|
||||
shortcut: 1337x
|
||||
|
|
@ -2124,28 +2186,35 @@ engines:
|
|||
disabled: true
|
||||
|
||||
- name: yacy
|
||||
# https://docs.searxng.org/dev/engines/online/yacy.html
|
||||
engine: yacy
|
||||
categories: general
|
||||
search_type: text
|
||||
base_url:
|
||||
- https://yacy.searchlab.eu
|
||||
- https://search.lomig.me
|
||||
- https://yacy.ecosys.eu
|
||||
- https://search.webproject.link
|
||||
# see https://github.com/searxng/searxng/pull/3631#issuecomment-2240903027
|
||||
# - https://search.kyun.li
|
||||
# - https://yacy.securecomcorp.eu
|
||||
# - https://yacy.myserv.ca
|
||||
# - https://yacy.nsupdate.info
|
||||
# - https://yacy.electroncash.de
|
||||
shortcut: ya
|
||||
disabled: true
|
||||
# required if you aren't using HTTPS for your local yacy instance
|
||||
# https://docs.searxng.org/dev/engines/online/yacy.html
|
||||
# enable_http: true
|
||||
# timeout: 3.0
|
||||
# search_mode: 'global'
|
||||
# if you aren't using HTTPS for your local yacy instance disable https
|
||||
# enable_http: false
|
||||
search_mode: 'global'
|
||||
# timeout can be reduced in 'local' search mode
|
||||
timeout: 5.0
|
||||
|
||||
- name: yacy images
|
||||
engine: yacy
|
||||
network: yacy
|
||||
categories: images
|
||||
search_type: image
|
||||
shortcut: yai
|
||||
disabled: true
|
||||
# timeout can be reduced in 'local' search mode
|
||||
timeout: 5.0
|
||||
|
||||
- name: rumble
|
||||
engine: rumble
|
||||
|
|
@ -2165,7 +2234,6 @@ engines:
|
|||
- name: wordnik
|
||||
engine: wordnik
|
||||
shortcut: def
|
||||
base_url: https://www.wordnik.com/
|
||||
categories: [dictionaries]
|
||||
timeout: 5.0
|
||||
|
||||
|
|
@ -2211,13 +2279,6 @@ engines:
|
|||
seekr_category: videos
|
||||
disabled: true
|
||||
|
||||
- name: sjp.pwn
|
||||
engine: sjp
|
||||
shortcut: sjp
|
||||
base_url: https://sjp.pwn.pl/
|
||||
timeout: 5.0
|
||||
disabled: true
|
||||
|
||||
- name: stract
|
||||
engine: stract
|
||||
shortcut: str
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ searx_dir = abspath(dirname(__file__))
|
|||
logger = logging.getLogger('searx')
|
||||
OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
|
||||
SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
|
||||
SIMPLE_STYLE = ('auto', 'light', 'dark')
|
||||
SIMPLE_STYLE = ('auto', 'light', 'dark', 'black')
|
||||
CATEGORIES_AS_TABS = {
|
||||
'general': {},
|
||||
'images': {},
|
||||
|
|
@ -143,6 +143,7 @@ SCHEMA = {
|
|||
'contact_url': SettingsValue((None, False, str), None),
|
||||
'donation_url': SettingsValue((bool, str), "https://docs.searxng.org/donate.html"),
|
||||
'enable_metrics': SettingsValue(bool, True),
|
||||
'open_metrics': SettingsValue(str, ''),
|
||||
},
|
||||
'brand': {
|
||||
'issue_url': SettingsValue(str, 'https://github.com/searxng/searxng/issues'),
|
||||
|
|
@ -156,6 +157,7 @@ SCHEMA = {
|
|||
'safe_search': SettingsValue((0, 1, 2), 0),
|
||||
'autocomplete': SettingsValue(str, ''),
|
||||
'autocomplete_min': SettingsValue(int, 4),
|
||||
'favicon_resolver': SettingsValue(str, ''),
|
||||
'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
|
||||
'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS),
|
||||
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
|
||||
|
|
|
|||
|
|
@ -1,68 +1,116 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=missing-module-docstring, too-many-branches
|
||||
"""Implementations for loading configurations from YAML files. This essentially
|
||||
includes the configuration of the (:ref:`SearXNG appl <searxng settings.yml>`)
|
||||
server. The default configuration for the application server is loaded from the
|
||||
:origin:`DEFAULT_SETTINGS_FILE <searx/settings.yml>`. This default
|
||||
configuration can be completely replaced or :ref:`customized individually
|
||||
<use_default_settings.yml>` and the ``SEARXNG_SETTINGS_PATH`` environment
|
||||
variable can be used to set the location from which the local customizations are
|
||||
to be loaded. The rules used for this can be found in the
|
||||
:py:obj:`get_user_cfg_folder` function.
|
||||
|
||||
from typing import Optional
|
||||
from os import environ
|
||||
from os.path import dirname, join, abspath, isfile
|
||||
- By default, local configurations are expected in folder ``/etc/searxng`` from
|
||||
where applications can load them with the :py:obj:`get_yaml_cfg` function.
|
||||
|
||||
- By default, customized :ref:`SearXNG appl <searxng settings.yml>` settings are
|
||||
expected in a file named ``settings.yml``.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os.path
|
||||
from collections.abc import Mapping
|
||||
from itertools import filterfalse
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from searx.exceptions import SearxSettingsException
|
||||
|
||||
searx_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
searx_dir = abspath(dirname(__file__))
|
||||
SETTINGS_YAML = Path("settings.yml")
|
||||
DEFAULT_SETTINGS_FILE = Path(searx_dir) / SETTINGS_YAML
|
||||
"""The :origin:`searx/settings.yml` file with all the default settings."""
|
||||
|
||||
|
||||
def existing_filename_or_none(file_name: str) -> Optional[str]:
|
||||
if isfile(file_name):
|
||||
return file_name
|
||||
return None
|
||||
|
||||
|
||||
def load_yaml(file_name):
|
||||
def load_yaml(file_name: str | Path):
|
||||
"""Load YAML config from a file."""
|
||||
try:
|
||||
with open(file_name, 'r', encoding='utf-8') as settings_yaml:
|
||||
return yaml.safe_load(settings_yaml)
|
||||
return yaml.safe_load(settings_yaml) or {}
|
||||
except IOError as e:
|
||||
raise SearxSettingsException(e, file_name) from e
|
||||
raise SearxSettingsException(e, str(file_name)) from e
|
||||
except yaml.YAMLError as e:
|
||||
raise SearxSettingsException(e, file_name) from e
|
||||
raise SearxSettingsException(e, str(file_name)) from e
|
||||
|
||||
|
||||
def get_yaml_file(file_name):
|
||||
path = existing_filename_or_none(join(searx_dir, file_name))
|
||||
if path is None:
|
||||
raise FileNotFoundError(f"File {file_name} does not exist!")
|
||||
def get_yaml_cfg(file_name: str | Path) -> dict:
|
||||
"""Shortcut to load a YAML config from a file, located in the
|
||||
|
||||
return load_yaml(path)
|
||||
|
||||
|
||||
def get_default_settings_path():
|
||||
return existing_filename_or_none(join(searx_dir, 'settings.yml'))
|
||||
|
||||
|
||||
def get_user_settings_path() -> Optional[str]:
|
||||
"""Get an user settings file.
|
||||
By descending priority:
|
||||
1. ``environ['SEARXNG_SETTINGS_PATH']``
|
||||
2. ``/etc/searxng/settings.yml`` except if ``SEARXNG_DISABLE_ETC_SETTINGS`` is ``true`` or ``1``
|
||||
3. ``None``
|
||||
- :py:obj:`get_user_cfg_folder` or
|
||||
- in the ``searx`` folder of the SearXNG installation
|
||||
"""
|
||||
|
||||
# check the environment variable SEARXNG_SETTINGS_PATH
|
||||
# if the environment variable is defined, this is the last check
|
||||
if 'SEARXNG_SETTINGS_PATH' in environ:
|
||||
return existing_filename_or_none(environ['SEARXNG_SETTINGS_PATH'])
|
||||
folder = get_user_cfg_folder() or Path(searx_dir)
|
||||
fname = folder / file_name
|
||||
if not fname.is_file():
|
||||
raise FileNotFoundError(f"File {fname} does not exist!")
|
||||
|
||||
# if SEARXNG_DISABLE_ETC_SETTINGS don't look any further
|
||||
if environ.get('SEARXNG_DISABLE_ETC_SETTINGS', '').lower() in ('1', 'true'):
|
||||
return None
|
||||
return load_yaml(fname)
|
||||
|
||||
# check /etc/searxng/settings.yml
|
||||
# (continue with other locations if the file is not found)
|
||||
return existing_filename_or_none('/etc/searxng/settings.yml')
|
||||
|
||||
def get_user_cfg_folder() -> Path | None:
|
||||
"""Returns folder where the local configurations are located.
|
||||
|
||||
1. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a
|
||||
folder (e.g. ``/etc/mysxng/``), all local configurations are expected in
|
||||
this folder. The settings of the :ref:`SearXNG appl <searxng
|
||||
settings.yml>` then expected in ``settings.yml``
|
||||
(e.g. ``/etc/mysxng/settings.yml``).
|
||||
|
||||
2. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a file
|
||||
(e.g. ``/etc/mysxng/myinstance.yml``), this file contains the settings of
|
||||
the :ref:`SearXNG appl <searxng settings.yml>` and the folder
|
||||
(e.g. ``/etc/mysxng/``) is used for all other configurations.
|
||||
|
||||
This type (``SEARXNG_SETTINGS_PATH`` points to a file) is suitable for
|
||||
use cases in which different profiles of the :ref:`SearXNG appl <searxng
|
||||
settings.yml>` are to be managed, such as in test scenarios.
|
||||
|
||||
3. If folder ``/etc/searxng`` exists, it is used.
|
||||
|
||||
In case none of the above path exists, ``None`` is returned. In case of
|
||||
environment ``SEARXNG_SETTINGS_PATH`` is set, but the (folder or file) does
|
||||
not exists, a :py:obj:`EnvironmentError` is raised.
|
||||
|
||||
"""
|
||||
|
||||
folder = None
|
||||
settings_path = os.environ.get("SEARXNG_SETTINGS_PATH")
|
||||
|
||||
# Disable default /etc/searxng is intended exclusively for internal testing purposes
|
||||
# and is therefore not documented!
|
||||
disable_etc = os.environ.get('SEARXNG_DISABLE_ETC_SETTINGS', '').lower() in ('1', 'true')
|
||||
|
||||
if settings_path:
|
||||
# rule 1. and 2.
|
||||
settings_path = Path(settings_path)
|
||||
if settings_path.is_dir():
|
||||
folder = settings_path
|
||||
elif settings_path.is_file():
|
||||
folder = settings_path.parent
|
||||
else:
|
||||
raise EnvironmentError(1, f"{settings_path} not exists!", settings_path)
|
||||
|
||||
if not folder and not disable_etc:
|
||||
# default: rule 3.
|
||||
folder = Path("/etc/searxng")
|
||||
if not folder.is_dir():
|
||||
folder = None
|
||||
|
||||
return folder
|
||||
|
||||
|
||||
def update_dict(default_dict, user_dict):
|
||||
|
|
@ -74,7 +122,9 @@ def update_dict(default_dict, user_dict):
|
|||
return default_dict
|
||||
|
||||
|
||||
def update_settings(default_settings, user_settings):
|
||||
def update_settings(default_settings: dict, user_settings: dict):
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
# merge everything except the engines
|
||||
for k, v in user_settings.items():
|
||||
if k not in ('use_default_settings', 'engines'):
|
||||
|
|
@ -124,6 +174,7 @@ def update_settings(default_settings, user_settings):
|
|||
|
||||
|
||||
def is_use_default_settings(user_settings):
|
||||
|
||||
use_default_settings = user_settings.get('use_default_settings')
|
||||
if use_default_settings is True:
|
||||
return True
|
||||
|
|
@ -134,25 +185,37 @@ def is_use_default_settings(user_settings):
|
|||
raise ValueError('Invalid value for use_default_settings')
|
||||
|
||||
|
||||
def load_settings(load_user_settings=True):
|
||||
default_settings_path = get_default_settings_path()
|
||||
user_settings_path = get_user_settings_path()
|
||||
if user_settings_path is None or not load_user_settings:
|
||||
# no user settings
|
||||
return (load_yaml(default_settings_path), 'load the default settings from {}'.format(default_settings_path))
|
||||
def load_settings(load_user_settings=True) -> tuple[dict, str]:
|
||||
"""Function for loading the settings of the SearXNG application
|
||||
(:ref:`settings.yml <searxng settings.yml>`)."""
|
||||
|
||||
# user settings
|
||||
user_settings = load_yaml(user_settings_path)
|
||||
if is_use_default_settings(user_settings):
|
||||
msg = f"load the default settings from {DEFAULT_SETTINGS_FILE}"
|
||||
cfg = load_yaml(DEFAULT_SETTINGS_FILE)
|
||||
cfg_folder = get_user_cfg_folder()
|
||||
|
||||
if not load_user_settings or not cfg_folder:
|
||||
return cfg, msg
|
||||
|
||||
settings_yml = os.environ.get("SEARXNG_SETTINGS_PATH")
|
||||
if settings_yml and Path(settings_yml).is_file():
|
||||
# see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a file
|
||||
settings_yml = Path(settings_yml).name
|
||||
else:
|
||||
# see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a folder
|
||||
settings_yml = SETTINGS_YAML
|
||||
|
||||
cfg_file = cfg_folder / settings_yml
|
||||
if not cfg_file.exists():
|
||||
return cfg, msg
|
||||
|
||||
msg = f"load the user settings from {cfg_file}"
|
||||
user_cfg = load_yaml(cfg_file)
|
||||
|
||||
if is_use_default_settings(user_cfg):
|
||||
# the user settings are merged with the default configuration
|
||||
default_settings = load_yaml(default_settings_path)
|
||||
update_settings(default_settings, user_settings)
|
||||
return (
|
||||
default_settings,
|
||||
'merge the default settings ( {} ) and the user settings ( {} )'.format(
|
||||
default_settings_path, user_settings_path
|
||||
),
|
||||
)
|
||||
msg = f"merge the default settings ( {DEFAULT_SETTINGS_FILE} ) and the user settings ( {cfg_file} )"
|
||||
update_settings(cfg, user_cfg)
|
||||
else:
|
||||
cfg = user_cfg
|
||||
|
||||
# the user settings, fully replace the default configuration
|
||||
return (user_settings, 'load the user settings from {}'.format(user_settings_path))
|
||||
return cfg, msg
|
||||
|
|
|
|||
323
searx/sqlitedb.py
Normal file
323
searx/sqlitedb.py
Normal file
|
|
@ -0,0 +1,323 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations to make access to SQLite databases a little more convenient.
|
||||
|
||||
:py:obj:`SQLiteAppl`
|
||||
Abstract class with which DB applications can be implemented.
|
||||
|
||||
:py:obj:`SQLiteProperties`:
|
||||
Class to manage properties stored in a database.
|
||||
|
||||
----
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import re
|
||||
import sqlite3
|
||||
import threading
|
||||
import abc
|
||||
|
||||
from searx import logger
|
||||
|
||||
logger = logger.getChild('sqlitedb')
|
||||
|
||||
|
||||
class SQLiteAppl(abc.ABC):
|
||||
"""Abstract base class for implementing convenient DB access in SQLite
|
||||
applications. In the constructor, a :py:obj:`SQLiteProperties` instance is
|
||||
already aggregated under ``self.properties``."""
|
||||
|
||||
DDL_CREATE_TABLES: dict[str, str] = {}
|
||||
|
||||
DB_SCHEMA: int = 1
|
||||
"""As soon as changes are made to the DB schema, the version number must be
|
||||
increased. Changes to the version number require the DB to be recreated (or
|
||||
migrated / if an migration path exists and is implemented)."""
|
||||
|
||||
SQLITE_THREADING_MODE = {
|
||||
0: "single-thread",
|
||||
1: "multi-thread",
|
||||
3: "serialized"}[sqlite3.threadsafety] # fmt:skip
|
||||
"""Threading mode of the SQLite library. Depends on the options used at
|
||||
compile time and is different for different distributions and architectures.
|
||||
|
||||
Possible values are 0:``single-thread``, 1:``multi-thread``,
|
||||
3:``serialized`` (see :py:obj:`sqlite3.threadsafety`). Pre- Python 3.11
|
||||
this value was hard coded to 1.
|
||||
|
||||
Depending on this value, optimizations are made, e.g. in “serialized” mode
|
||||
it is not necessary to create a separate DB connector for each thread.
|
||||
"""
|
||||
|
||||
SQLITE_JOURNAL_MODE = "WAL"
|
||||
SQLITE_CONNECT_ARGS = {
|
||||
# "timeout": 5.0,
|
||||
# "detect_types": 0,
|
||||
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
|
||||
"cached_statements": 0, # https://github.com/python/cpython/issues/118172
|
||||
# "uri": False,
|
||||
"autocommit": False,
|
||||
} # fmt:skip
|
||||
"""Connection arguments (:py:obj:`sqlite3.connect`)
|
||||
|
||||
``check_same_thread``:
|
||||
Is disabled by default when :py:obj:`SQLITE_THREADING_MODE` is
|
||||
``serialized``. The check is more of a hindrance in this case because it
|
||||
would prevent a DB connector from being used in multiple threads.
|
||||
|
||||
``autocommit``:
|
||||
Is disabled by default. Note: autocommit option has been added in Python
|
||||
3.12.
|
||||
|
||||
``cached_statements``:
|
||||
Is set to ``0`` by default. Note: Python 3.12+ fetch result are not
|
||||
consistent in multi-threading application and causing an API misuse error.
|
||||
|
||||
The multithreading use in SQLiteAppl is intended and supported if
|
||||
threadsafety is set to 3 (aka "serialized"). CPython supports “serialized”
|
||||
from version 3.12 on, but unfortunately only with errors:
|
||||
|
||||
- https://github.com/python/cpython/issues/118172
|
||||
- https://github.com/python/cpython/issues/123873
|
||||
|
||||
The workaround for SQLite3 multithreading cache inconsistency ist to set
|
||||
option ``cached_statements`` to ``0`` by default.
|
||||
"""
|
||||
|
||||
def __init__(self, db_url):
|
||||
|
||||
self.db_url = db_url
|
||||
self.properties = SQLiteProperties(db_url)
|
||||
self.thread_local = threading.local()
|
||||
self._init_done = False
|
||||
self._compatibility()
|
||||
|
||||
def _compatibility(self):
|
||||
|
||||
if self.SQLITE_THREADING_MODE == "serialized":
|
||||
self._DB = None
|
||||
else:
|
||||
msg = (
|
||||
f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
|
||||
" read https://docs.python.org/3/library/sqlite3.html#sqlite3.threadsafety"
|
||||
)
|
||||
if threading.active_count() > 1:
|
||||
logger.error(msg)
|
||||
else:
|
||||
logger.warning(msg)
|
||||
|
||||
if sqlite3.sqlite_version_info <= (3, 35):
|
||||
# See "Generalize UPSERT:" in https://sqlite.org/releaselog/3_35_0.html
|
||||
logger.critical(
|
||||
"SQLite runtime library version %s is not supported (require >= 3.35)", sqlite3.sqlite_version
|
||||
)
|
||||
|
||||
def connect(self) -> sqlite3.Connection:
|
||||
"""Creates a new DB connection (:py:obj:`SQLITE_CONNECT_ARGS`). If not
|
||||
already done, the DB schema is set up
|
||||
"""
|
||||
if sys.version_info < (3, 12):
|
||||
# Prior Python 3.12 there is no "autocommit" option
|
||||
self.SQLITE_CONNECT_ARGS.pop("autocommit", None)
|
||||
|
||||
self.init()
|
||||
logger.debug("%s: connect to DB: %s // %s", self.__class__.__name__, self.db_url, self.SQLITE_CONNECT_ARGS)
|
||||
conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore
|
||||
conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
|
||||
self.register_functions(conn)
|
||||
return conn
|
||||
|
||||
def register_functions(self, conn):
|
||||
"""Create user-defined_ SQL functions.
|
||||
|
||||
``REGEXP(<pattern>, <field>)`` : 0 | 1
|
||||
`re.search`_ returns (int) 1 for a match and 0 for none match of
|
||||
``<pattern>`` in ``<field>``.
|
||||
|
||||
.. code:: sql
|
||||
|
||||
SELECT '12' AS field WHERE REGEXP('^[0-9][0-9]$', field)
|
||||
-- 12
|
||||
|
||||
SELECT REGEXP('[0-9][0-9]', 'X12Y')
|
||||
-- 1
|
||||
SELECT REGEXP('[0-9][0-9]', 'X1Y')
|
||||
-- 0
|
||||
|
||||
.. _user-defined: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.create_function
|
||||
.. _deterministic: https://sqlite.org/deterministic.html
|
||||
.. _re.search: https://docs.python.org/3/library/re.html#re.search
|
||||
"""
|
||||
|
||||
conn.create_function('regexp', 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True)
|
||||
|
||||
@property
|
||||
def DB(self) -> sqlite3.Connection:
|
||||
"""Provides a DB connection. The connection is a *singleton* and
|
||||
therefore well suited for read access. If
|
||||
:py:obj:`SQLITE_THREADING_MODE` is ``serialized`` only one DB connection
|
||||
is created for all threads.
|
||||
|
||||
.. note::
|
||||
|
||||
For dedicated `transaction control`_, it is recommended to create a
|
||||
new connection (:py:obj:`SQLiteAppl.connect`).
|
||||
|
||||
.. _transaction control:
|
||||
https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
|
||||
"""
|
||||
|
||||
if getattr(self.thread_local, 'DB', None) is None:
|
||||
self.thread_local.DB = self.connect()
|
||||
|
||||
# Theoretically it is possible to reuse the DB cursor across threads as
|
||||
# of Python 3.12, in practice the threading of the cursor seems to me to
|
||||
# be so faulty that I prefer to establish one connection per thread
|
||||
|
||||
self.thread_local.DB.commit()
|
||||
return self.thread_local.DB
|
||||
|
||||
# In "serialized" mode, SQLite can be safely used by multiple threads
|
||||
# with no restriction.
|
||||
#
|
||||
# if self.SQLITE_THREADING_MODE != "serialized":
|
||||
# if getattr(self.thread_local, 'DB', None) is None:
|
||||
# self.thread_local.DB = self.connect()
|
||||
# return self.thread_local.DB
|
||||
#
|
||||
# if self._DB is None:
|
||||
# self._DB = self.connect() # pylint: disable=attribute-defined-outside-init
|
||||
# return self._DB
|
||||
|
||||
def init(self):
|
||||
"""Initializes the DB schema and properties, is only executed once even
|
||||
if called several times."""
|
||||
|
||||
if self._init_done:
|
||||
return
|
||||
self._init_done = True
|
||||
|
||||
logger.debug("init DB: %s", self.db_url)
|
||||
self.properties.init()
|
||||
ver = self.properties("DB_SCHEMA")
|
||||
if ver is None:
|
||||
with self.properties.DB:
|
||||
self.create_schema(self.properties.DB)
|
||||
else:
|
||||
ver = int(ver)
|
||||
if ver != self.DB_SCHEMA:
|
||||
raise sqlite3.DatabaseError("Expected DB schema v%s, DB schema is v%s" % (self.DB_SCHEMA, ver))
|
||||
logger.debug("DB_SCHEMA = %s", ver)
|
||||
|
||||
def create_schema(self, conn):
|
||||
|
||||
logger.debug("create schema ..")
|
||||
with conn:
|
||||
for table_name, sql in self.DDL_CREATE_TABLES.items():
|
||||
conn.execute(sql)
|
||||
self.properties.set(f"Table {table_name} created", table_name)
|
||||
self.properties.set("DB_SCHEMA", self.DB_SCHEMA)
|
||||
self.properties.set("LAST_MAINTENANCE", "")
|
||||
|
||||
|
||||
class SQLiteProperties(SQLiteAppl):
|
||||
"""Simple class to manage properties of a DB application in the DB. The
|
||||
object has its own DB connection and transaction area.
|
||||
|
||||
.. code:: sql
|
||||
|
||||
CREATE TABLE IF NOT EXISTS properties (
|
||||
name TEXT,
|
||||
value TEXT,
|
||||
m_time INTEGER DEFAULT (strftime('%s', 'now')),
|
||||
PRIMARY KEY (name))
|
||||
|
||||
"""
|
||||
|
||||
SQLITE_JOURNAL_MODE = "WAL"
|
||||
|
||||
DDL_PROPERTIES = """\
|
||||
CREATE TABLE IF NOT EXISTS properties (
|
||||
name TEXT,
|
||||
value TEXT,
|
||||
m_time INTEGER DEFAULT (strftime('%s', 'now')), -- last modified (unix epoch) time in sec.
|
||||
PRIMARY KEY (name))"""
|
||||
|
||||
"""Table to store properties of the DB application"""
|
||||
|
||||
SQL_GET = "SELECT value FROM properties WHERE name = ?"
|
||||
SQL_M_TIME = "SELECT m_time FROM properties WHERE name = ?"
|
||||
SQL_SET = (
|
||||
"INSERT INTO properties (name, value) VALUES (?, ?)"
|
||||
" ON CONFLICT(name) DO UPDATE"
|
||||
" SET value=excluded.value, m_time=strftime('%s', 'now')"
|
||||
)
|
||||
SQL_TABLE_EXISTS = (
|
||||
"SELECT name FROM sqlite_master"
|
||||
" WHERE type='table' AND name='properties'"
|
||||
) # fmt:skip
|
||||
SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
|
||||
SQLITE_CONNECT_ARGS["autocommit"] = True # This option has no effect before Python 3.12
|
||||
|
||||
def __init__(self, db_url: str): # pylint: disable=super-init-not-called
|
||||
|
||||
self.db_url = db_url
|
||||
self.thread_local = threading.local()
|
||||
self._init_done = False
|
||||
self._compatibility()
|
||||
|
||||
def init(self):
|
||||
"""Initializes DB schema of the properties in the DB."""
|
||||
|
||||
if self._init_done:
|
||||
return
|
||||
self._init_done = True
|
||||
logger.debug("init properties of DB: %s", self.db_url)
|
||||
with self.DB as conn:
|
||||
res = conn.execute(self.SQL_TABLE_EXISTS)
|
||||
if res.fetchone() is None: # DB schema needs to be be created
|
||||
self.create_schema(conn)
|
||||
|
||||
def __call__(self, name, default=None):
|
||||
"""Returns the value of the property ``name`` or ``default`` if property
|
||||
not exists in DB."""
|
||||
|
||||
res = self.DB.execute(self.SQL_GET, (name,)).fetchone()
|
||||
if res is None:
|
||||
return default
|
||||
return res[0]
|
||||
|
||||
def set(self, name, value):
|
||||
"""Set ``value`` of property ``name`` in DB. If property already
|
||||
exists, update the ``m_time`` (and the value)."""
|
||||
|
||||
self.DB.execute(self.SQL_SET, (name, value))
|
||||
|
||||
if sys.version_info <= (3, 12):
|
||||
# Prior Python 3.12 there is no "autocommit" option / lets commit
|
||||
# explicitely.
|
||||
self.DB.commit()
|
||||
|
||||
def row(self, name, default=None):
|
||||
"""Returns the DB row of property ``name`` or ``default`` if property
|
||||
not exists in DB."""
|
||||
|
||||
cur = self.DB.cursor()
|
||||
cur.execute("SELECT * FROM properties WHERE name = ?", (name,))
|
||||
res = cur.fetchone()
|
||||
if res is None:
|
||||
return default
|
||||
col_names = [column[0] for column in cur.description]
|
||||
return dict(zip(col_names, res))
|
||||
|
||||
def m_time(self, name, default: int = 0) -> int:
|
||||
"""Last modification time of this property."""
|
||||
res = self.DB.execute(self.SQL_M_TIME, (name,)).fetchone()
|
||||
if res is None:
|
||||
return default
|
||||
return int(res[0])
|
||||
|
||||
def create_schema(self, conn):
|
||||
with conn:
|
||||
conn.execute(self.DDL_PROPERTIES)
|
||||
1
searx/static/themes/simple/css/rss.min.css
vendored
Normal file
1
searx/static/themes/simple/css/rss.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
1
searx/static/themes/simple/css/rss.min.css.map
Normal file
1
searx/static/themes/simple/css/rss.min.css.map
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue