mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
324 lines
11 KiB
Python
324 lines
11 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
# lint: pylint
|
|
# pylint: disable=global-statement
|
|
# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring
|
|
|
|
import asyncio
|
|
import ipaddress
|
|
from itertools import cycle
|
|
|
|
import httpx
|
|
|
|
from .client import new_client, get_loop
|
|
|
|
|
|
DEFAULT_NAME = "__DEFAULT__"
|
|
NETWORKS = {}
|
|
# requests compatibility when reading proxy settings from settings.yml
|
|
PROXY_PATTERN_MAPPING = {
|
|
"http": "http://",
|
|
"https": "https://",
|
|
"socks4": "socks4://",
|
|
"socks5": "socks5://",
|
|
"socks5h": "socks5h://",
|
|
"http:": "http://",
|
|
"https:": "https://",
|
|
"socks4:": "socks4://",
|
|
"socks5:": "socks5://",
|
|
"socks5h:": "socks5h://",
|
|
}
|
|
|
|
ADDRESS_MAPPING = {"ipv4": "0.0.0.0", "ipv6": "::"}
|
|
|
|
|
|
class Network:
|
|
|
|
__slots__ = (
|
|
"enable_http",
|
|
"verify",
|
|
"enable_http2",
|
|
"max_connections",
|
|
"max_keepalive_connections",
|
|
"keepalive_expiry",
|
|
"local_addresses",
|
|
"proxies",
|
|
"max_redirects",
|
|
"retries",
|
|
"retry_on_http_error",
|
|
"_local_addresses_cycle",
|
|
"_proxies_cycle",
|
|
"_clients",
|
|
)
|
|
|
|
def __init__(
|
|
# pylint: disable=too-many-arguments
|
|
self,
|
|
enable_http=True,
|
|
verify=True,
|
|
enable_http2=False,
|
|
max_connections=None,
|
|
max_keepalive_connections=None,
|
|
keepalive_expiry=None,
|
|
proxies=None,
|
|
local_addresses=None,
|
|
retries=0,
|
|
retry_on_http_error=None,
|
|
max_redirects=30,
|
|
):
|
|
|
|
self.enable_http = enable_http
|
|
self.verify = verify
|
|
self.enable_http2 = enable_http2
|
|
self.max_connections = max_connections
|
|
self.max_keepalive_connections = max_keepalive_connections
|
|
self.keepalive_expiry = keepalive_expiry
|
|
self.proxies = proxies
|
|
self.local_addresses = local_addresses
|
|
self.retries = retries
|
|
self.retry_on_http_error = retry_on_http_error
|
|
self.max_redirects = max_redirects
|
|
self._local_addresses_cycle = self.get_ipaddress_cycle()
|
|
self._proxies_cycle = self.get_proxy_cycles()
|
|
self._clients = {}
|
|
self.check_parameters()
|
|
|
|
def check_parameters(self):
|
|
for address in self.iter_ipaddresses():
|
|
if "/" in address:
|
|
ipaddress.ip_network(address, False)
|
|
else:
|
|
ipaddress.ip_address(address)
|
|
|
|
if self.proxies is not None and not isinstance(self.proxies, (str, dict)):
|
|
raise ValueError("proxies type has to be str, dict or None")
|
|
|
|
def iter_ipaddresses(self):
|
|
local_addresses = self.local_addresses
|
|
if not local_addresses:
|
|
return
|
|
if isinstance(local_addresses, str):
|
|
local_addresses = [local_addresses]
|
|
for address in local_addresses:
|
|
yield address
|
|
|
|
def get_ipaddress_cycle(self):
|
|
while True:
|
|
count = 0
|
|
for address in self.iter_ipaddresses():
|
|
if "/" in address:
|
|
for a in ipaddress.ip_network(address, False).hosts():
|
|
yield str(a)
|
|
count += 1
|
|
else:
|
|
a = ipaddress.ip_address(address)
|
|
yield str(a)
|
|
count += 1
|
|
if count == 0:
|
|
yield None
|
|
|
|
def iter_proxies(self):
|
|
if not self.proxies:
|
|
return
|
|
# https://www.python-httpx.org/compatibility/#proxy-keys
|
|
if isinstance(self.proxies, str):
|
|
yield "all://", [self.proxies]
|
|
else:
|
|
for pattern, proxy_url in self.proxies.items():
|
|
pattern = PROXY_PATTERN_MAPPING.get(pattern, pattern)
|
|
if isinstance(proxy_url, str):
|
|
proxy_url = [proxy_url]
|
|
yield pattern, proxy_url
|
|
|
|
def get_proxy_cycles(self):
|
|
proxy_settings = {}
|
|
for pattern, proxy_urls in self.iter_proxies():
|
|
proxy_settings[pattern] = cycle(proxy_urls)
|
|
while True:
|
|
# pylint: disable=stop-iteration-return
|
|
yield tuple(
|
|
(pattern, next(proxy_url_cycle))
|
|
for pattern, proxy_url_cycle in proxy_settings.items()
|
|
)
|
|
|
|
def get_client(self, verify=None, max_redirects=None):
|
|
verify = self.verify if verify is None else verify
|
|
max_redirects = self.max_redirects if max_redirects is None else max_redirects
|
|
local_address = next(self._local_addresses_cycle)
|
|
proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key
|
|
key = (verify, max_redirects, local_address, proxies)
|
|
if key not in self._clients or self._clients[key].is_closed:
|
|
self._clients[key] = new_client(
|
|
self.enable_http,
|
|
verify,
|
|
self.enable_http2,
|
|
self.max_connections,
|
|
self.max_keepalive_connections,
|
|
self.keepalive_expiry,
|
|
dict(proxies),
|
|
local_address,
|
|
0,
|
|
max_redirects,
|
|
)
|
|
return self._clients[key]
|
|
|
|
async def aclose(self):
|
|
async def close_client(client: httpx.AsyncClient):
|
|
try:
|
|
await client.aclose()
|
|
except httpx.HTTPError:
|
|
pass
|
|
|
|
await asyncio.gather(
|
|
*[close_client(client) for client in self._clients.values()],
|
|
return_exceptions=False
|
|
)
|
|
|
|
@staticmethod
|
|
def get_kwargs_clients(kwargs):
|
|
kwargs_clients = {}
|
|
if "verify" in kwargs:
|
|
kwargs_clients["verify"] = kwargs.pop("verify")
|
|
if "max_redirects" in kwargs:
|
|
kwargs_clients["max_redirects"] = kwargs.pop("max_redirects")
|
|
return kwargs_clients
|
|
|
|
def is_valid_respones(self, response):
|
|
# pylint: disable=too-many-boolean-expressions
|
|
if (
|
|
(self.retry_on_http_error is True and 400 <= response.status_code <= 599)
|
|
or (
|
|
isinstance(self.retry_on_http_error, list)
|
|
and response.status_code in self.retry_on_http_error
|
|
)
|
|
or (
|
|
isinstance(self.retry_on_http_error, int)
|
|
and response.status_code == self.retry_on_http_error
|
|
)
|
|
):
|
|
return False
|
|
return True
|
|
|
|
async def request(self, method, url, **kwargs):
|
|
retries = self.retries
|
|
while retries >= 0: # pragma: no cover
|
|
kwargs_clients = Network.get_kwargs_clients(kwargs)
|
|
client = self.get_client(**kwargs_clients)
|
|
try:
|
|
response = await client.request(method, url, **kwargs)
|
|
if self.is_valid_respones(response) or retries <= 0:
|
|
return response
|
|
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
|
if retries <= 0:
|
|
raise e
|
|
retries -= 1
|
|
|
|
def stream(self, method, url, **kwargs):
|
|
retries = self.retries
|
|
while retries >= 0: # pragma: no cover
|
|
kwargs_clients = Network.get_kwargs_clients(kwargs)
|
|
client = self.get_client(**kwargs_clients)
|
|
try:
|
|
response = client.stream(method, url, **kwargs)
|
|
if self.is_valid_respones(response) or retries <= 0:
|
|
return response
|
|
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
|
if retries <= 0:
|
|
raise e
|
|
retries -= 1
|
|
|
|
@classmethod
|
|
async def aclose_all(cls):
|
|
global NETWORKS
|
|
await asyncio.gather(
|
|
*[network.aclose() for network in NETWORKS.values()],
|
|
return_exceptions=False
|
|
)
|
|
|
|
@classmethod
|
|
def close_all(cls):
|
|
future = asyncio.run_coroutine_threadsafe(Network.aclose_all(), get_loop())
|
|
future.result()
|
|
|
|
|
|
def get_network(name=None):
|
|
global NETWORKS
|
|
if name:
|
|
return NETWORKS.get(name)
|
|
if DEFAULT_NAME not in NETWORKS:
|
|
NETWORKS[DEFAULT_NAME] = Network({})
|
|
return NETWORKS[DEFAULT_NAME]
|
|
|
|
|
|
def initialize(settings_engines=None, settings_outgoing=None):
|
|
# pylint: disable=import-outside-toplevel)
|
|
from searx.engines import engines
|
|
from searx import settings
|
|
|
|
# pylint: enable=import-outside-toplevel)
|
|
|
|
global NETWORKS
|
|
|
|
settings_engines = settings_engines or settings["engines"]
|
|
settings_outgoing = settings_outgoing or settings["outgoing"]
|
|
|
|
# default parameters for AsyncHTTPTransport
|
|
# see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # pylint: disable=line-too-long
|
|
default_params = {
|
|
"enable_http": False,
|
|
"verify": True,
|
|
"enable_http2": settings_outgoing["enable_http2"],
|
|
"max_connections": settings_outgoing["pool_connections"],
|
|
"max_keepalive_connections": settings_outgoing["pool_maxsize"],
|
|
"keepalive_expiry": settings_outgoing["keepalive_expiry"],
|
|
"local_addresses": settings_outgoing["source_ips"],
|
|
"proxies": settings_outgoing["proxies"],
|
|
"max_redirects": settings_outgoing["max_redirects"],
|
|
"retries": settings_outgoing["retries"],
|
|
"retry_on_http_error": None,
|
|
}
|
|
|
|
def new_network(params):
|
|
nonlocal default_params
|
|
result = {}
|
|
result.update(default_params)
|
|
result.update(params)
|
|
return Network(**result)
|
|
|
|
def iter_networks():
|
|
nonlocal settings_engines
|
|
for engine_spec in settings_engines:
|
|
engine_name = engine_spec["name"]
|
|
engine = engines.get(engine_name)
|
|
if engine is None:
|
|
continue
|
|
network = getattr(engine, "network", None)
|
|
yield engine_name, engine, network
|
|
|
|
if NETWORKS:
|
|
Network.close_all()
|
|
NETWORKS.clear()
|
|
NETWORKS[DEFAULT_NAME] = new_network({})
|
|
NETWORKS["ipv4"] = new_network({"local_addresses": "0.0.0.0"})
|
|
NETWORKS["ipv6"] = new_network({"local_addresses": "::"})
|
|
|
|
# define networks from outgoing.networks
|
|
for network_name, network in settings_outgoing["networks"].items():
|
|
NETWORKS[network_name] = new_network(network)
|
|
|
|
# define networks from engines.[i].network (except references)
|
|
for engine_name, engine, network in iter_networks():
|
|
if network is None:
|
|
network = {}
|
|
for attribute_name, attribute_value in default_params.items():
|
|
if hasattr(engine, attribute_name):
|
|
network[attribute_name] = getattr(engine, attribute_name)
|
|
else:
|
|
network[attribute_name] = attribute_value
|
|
NETWORKS[engine_name] = new_network(network)
|
|
elif isinstance(network, dict):
|
|
NETWORKS[engine_name] = new_network(network)
|
|
|
|
# define networks from engines.[i].network (references)
|
|
for engine_name, engine, network in iter_networks():
|
|
if isinstance(network, str):
|
|
NETWORKS[engine_name] = NETWORKS[network]
|