mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
Merge 6e9f7c276e
into cb945276b6
This commit is contained in:
commit
c331cf64cf
10 changed files with 401 additions and 50 deletions
|
@ -173,3 +173,4 @@ features or generally made searx better:
|
||||||
- Austin Olacsi `<https://github.com/Austin-Olacsi>`
|
- Austin Olacsi `<https://github.com/Austin-Olacsi>`
|
||||||
- @micsthepick
|
- @micsthepick
|
||||||
- Daniel Kukula `<https://github.com/dkuku>`
|
- Daniel Kukula `<https://github.com/dkuku>`
|
||||||
|
- @czaky `<https://github.com/czaky>`
|
||||||
|
|
|
@ -47,6 +47,7 @@ engine is shown. Most of the options have a default value or even are optional.
|
||||||
max_keepalive_connections: 10
|
max_keepalive_connections: 10
|
||||||
keepalive_expiry: 5.0
|
keepalive_expiry: 5.0
|
||||||
using_tor_proxy: false
|
using_tor_proxy: false
|
||||||
|
proxy_request_redundancy: 1
|
||||||
proxies:
|
proxies:
|
||||||
http:
|
http:
|
||||||
- http://proxy1:8080
|
- http://proxy1:8080
|
||||||
|
@ -154,6 +155,9 @@ engine is shown. Most of the options have a default value or even are optional.
|
||||||
``proxies`` :
|
``proxies`` :
|
||||||
Overwrites proxy settings from :ref:`settings outgoing`.
|
Overwrites proxy settings from :ref:`settings outgoing`.
|
||||||
|
|
||||||
|
``proxy_request_redundancy`` :
|
||||||
|
Overwrites proxy settings from :ref:`settings outgoing`.
|
||||||
|
|
||||||
``using_tor_proxy`` :
|
``using_tor_proxy`` :
|
||||||
Using tor proxy (``true``) or not (``false``) for this engine. The default is
|
Using tor proxy (``true``) or not (``false``) for this engine. The default is
|
||||||
taken from ``using_tor_proxy`` of the :ref:`settings outgoing`.
|
taken from ``using_tor_proxy`` of the :ref:`settings outgoing`.
|
||||||
|
@ -241,4 +245,3 @@ Example configuration in settings.yml for a German and English speaker:
|
||||||
|
|
||||||
When searching, the default google engine will return German results and
|
When searching, the default google engine will return German results and
|
||||||
"google english" will return English results.
|
"google english" will return English results.
|
||||||
|
|
||||||
|
|
|
@ -22,9 +22,9 @@ Communication with search engines.
|
||||||
# and https://www.python-httpx.org/compatibility/#ssl-configuration
|
# and https://www.python-httpx.org/compatibility/#ssl-configuration
|
||||||
# verify: ~/.mitmproxy/mitmproxy-ca-cert.cer
|
# verify: ~/.mitmproxy/mitmproxy-ca-cert.cer
|
||||||
#
|
#
|
||||||
# uncomment below section if you want to use a proxyq see: SOCKS proxies
|
# Uncomment below section if you want to use a proxy. See:
|
||||||
# https://2.python-requests.org/en/latest/user/advanced/#proxies
|
# https://2.python-requests.org/en/latest/user/advanced/#proxies
|
||||||
# are also supported: see
|
# SOCKS proxies are also supported. See:
|
||||||
# https://2.python-requests.org/en/latest/user/advanced/#socks
|
# https://2.python-requests.org/en/latest/user/advanced/#socks
|
||||||
#
|
#
|
||||||
# proxies:
|
# proxies:
|
||||||
|
@ -34,6 +34,11 @@ Communication with search engines.
|
||||||
#
|
#
|
||||||
# using_tor_proxy: true
|
# using_tor_proxy: true
|
||||||
#
|
#
|
||||||
|
# Uncomment below if you want to make multiple request in parallel
|
||||||
|
# through all the proxies at once:
|
||||||
|
#
|
||||||
|
# proxy_request_redundancy: 4
|
||||||
|
#
|
||||||
# Extra seconds to add in order to account for the time taken by the proxy
|
# Extra seconds to add in order to account for the time taken by the proxy
|
||||||
#
|
#
|
||||||
# extra_proxy_timeout: 10.0
|
# extra_proxy_timeout: 10.0
|
||||||
|
@ -70,6 +75,10 @@ Communication with search engines.
|
||||||
If there are more than one proxy for one protocol (http, https),
|
If there are more than one proxy for one protocol (http, https),
|
||||||
requests to the engines are distributed in a round-robin fashion.
|
requests to the engines are distributed in a round-robin fashion.
|
||||||
|
|
||||||
|
``proxy_request_redundancy`` :
|
||||||
|
Cycle the proxies (``1``) on by one or use them in parallel (``> 1``) for all engines.
|
||||||
|
The default is ``1`` and can be overwritten in the :ref:`settings engine`
|
||||||
|
|
||||||
``source_ips`` :
|
``source_ips`` :
|
||||||
If you use multiple network interfaces, define from which IP the requests must
|
If you use multiple network interfaces, define from which IP the requests must
|
||||||
be made. Example:
|
be made. Example:
|
||||||
|
@ -106,5 +115,3 @@ Communication with search engines.
|
||||||
``using_tor_proxy`` :
|
``using_tor_proxy`` :
|
||||||
Using tor proxy (``true``) or not (``false``) for all engines. The default is
|
Using tor proxy (``true``) or not (``false``) for all engines. The default is
|
||||||
``false`` and can be overwritten in the :ref:`settings engine`
|
``false`` and can be overwritten in the :ref:`settings engine`
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -110,6 +110,10 @@ class Engine: # pylint: disable=too-few-public-methods
|
||||||
https: socks5://proxy:port
|
https: socks5://proxy:port
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
proxy_request_redundancy: int
|
||||||
|
"""Cycle proxies one by one (``1``) or
|
||||||
|
use them in parallel at once (``> 1``) for this engine."""
|
||||||
|
|
||||||
disabled: bool
|
disabled: bool
|
||||||
"""To disable by default the engine, but not deleting it. It will allow the
|
"""To disable by default the engine, but not deleting it. It will allow the
|
||||||
user to manually activate it in the settings."""
|
user to manually activate it in the settings."""
|
||||||
|
|
|
@ -1,14 +1,18 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring, global-statement
|
# pylint: disable=missing-module-docstring, global-statement
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import contextlib
|
||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
from ssl import SSLContext
|
from ssl import SSLContext
|
||||||
import threading
|
import threading
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict, Iterable
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
import httpcore
|
||||||
from httpx_socks import AsyncProxyTransport
|
from httpx_socks import AsyncProxyTransport
|
||||||
from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError
|
from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError
|
||||||
|
|
||||||
|
@ -112,7 +116,8 @@ class AsyncProxyTransportFixed(AsyncProxyTransport):
|
||||||
raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
|
raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
|
||||||
|
|
||||||
|
|
||||||
def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries):
|
def get_socks_transport(verify, http2, local_address, proxy_url, limit, retries):
|
||||||
|
"""Return an AsyncProxyTransport."""
|
||||||
# support socks5h (requests compatibility):
|
# support socks5h (requests compatibility):
|
||||||
# https://requests.readthedocs.io/en/master/user/advanced/#socks
|
# https://requests.readthedocs.io/en/master/user/advanced/#socks
|
||||||
# socks5:// hostname is resolved on client side
|
# socks5:// hostname is resolved on client side
|
||||||
|
@ -141,7 +146,8 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_transport(verify, http2, local_address, proxy_url, limit, retries):
|
def get_http_transport(verify, http2, local_address, proxy_url, limit, retries):
|
||||||
|
"""Return an AsyncHTTPTransport."""
|
||||||
verify = get_sslcontexts(None, None, verify, True, http2) if verify is True else verify
|
verify = get_sslcontexts(None, None, verify, True, http2) if verify is True else verify
|
||||||
return httpx.AsyncHTTPTransport(
|
return httpx.AsyncHTTPTransport(
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
|
@ -154,6 +160,169 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_single_transport(
|
||||||
|
limit: httpx.Limits | None = None,
|
||||||
|
proxy_url: str | None = None,
|
||||||
|
local_address: str | None = None,
|
||||||
|
retries: int = 0,
|
||||||
|
*,
|
||||||
|
verify: bool = True,
|
||||||
|
http2: bool = True,
|
||||||
|
) -> httpx.AsyncBaseTransport:
|
||||||
|
"""Generate a single, non-parallel transport.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
limit : httpx.Limits
|
||||||
|
Limits applied to the to the transport.
|
||||||
|
proxy_url : str | None, optional
|
||||||
|
Proxy to use for the transport.
|
||||||
|
local_address : str | None, optional
|
||||||
|
local address to specify in the connection.
|
||||||
|
retries : int, optional
|
||||||
|
how many times to retry the request, by default 0
|
||||||
|
verify : bool, optional
|
||||||
|
Verify the certificates, by default True
|
||||||
|
http2 : bool, optional
|
||||||
|
Enable HTTP2 protocol, by default True
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
httpx.AsyncBaseTransport
|
||||||
|
An async transport object.
|
||||||
|
"""
|
||||||
|
limit = limit or httpx.Limits()
|
||||||
|
if proxy_url and proxy_url.startswith(('socks4://', 'socks5://', 'socks5h://')):
|
||||||
|
return get_socks_transport(verify, http2, local_address, proxy_url, limit, retries)
|
||||||
|
return get_http_transport(verify, http2, local_address, proxy_url, limit, retries)
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncParallelTransport(httpx.AsyncBaseTransport):
|
||||||
|
"""Fan out request to multiple base transports."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
transports: Iterable[httpx.AsyncBaseTransport],
|
||||||
|
proxy_request_redundancy: int,
|
||||||
|
network_logger: logging.Logger,
|
||||||
|
) -> None:
|
||||||
|
"""Init the parallel transport using a list of base `transports`."""
|
||||||
|
self._logger = network_logger or logger
|
||||||
|
self._transports = list(transports)
|
||||||
|
if len(self._transports) == 0:
|
||||||
|
msg = "Got an empty list of (proxy) transports."
|
||||||
|
raise ValueError(msg)
|
||||||
|
if proxy_request_redundancy < 1:
|
||||||
|
self._logger.warning("Invalid proxy_request_redundancy specified: %d", proxy_request_redundancy)
|
||||||
|
proxy_request_redundancy = 1
|
||||||
|
self._proxy_request_redundancy = proxy_request_redundancy
|
||||||
|
self._index = random.randrange(len(self._transports)) # noqa: S311
|
||||||
|
|
||||||
|
async def handle_async_request(
|
||||||
|
self,
|
||||||
|
request: httpx.Request,
|
||||||
|
) -> httpx.Response:
|
||||||
|
# pylint: disable=too-many-branches
|
||||||
|
"""Issue parallel requests to all sub-transports.
|
||||||
|
|
||||||
|
Return the response of the first completed.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
request : httpx.Request
|
||||||
|
Request to pass to the transports.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
httpx.Response
|
||||||
|
Response from the first completed request.
|
||||||
|
|
||||||
|
"""
|
||||||
|
response = None # non-error response, taking precedence
|
||||||
|
error_response = None # any error response
|
||||||
|
request_error = None # any request related exception
|
||||||
|
tcount = len(self._transports)
|
||||||
|
redundancy = self._proxy_request_redundancy
|
||||||
|
pending = [
|
||||||
|
asyncio.create_task(self._transports[i % tcount].handle_async_request(request))
|
||||||
|
for i in range(self._index, self._index + redundancy)
|
||||||
|
]
|
||||||
|
self._index = (self._index + redundancy) % tcount
|
||||||
|
while pending:
|
||||||
|
if len(pending) == 1:
|
||||||
|
return await pending.pop()
|
||||||
|
done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
|
||||||
|
for task in done:
|
||||||
|
try:
|
||||||
|
result = task.result()
|
||||||
|
if not result.is_error:
|
||||||
|
response = result
|
||||||
|
elif result.status_code == 404 and response is None:
|
||||||
|
error_response = response = result
|
||||||
|
elif not error_response:
|
||||||
|
self._logger.warning("Error response: %s for %s", result.status_code, request.url)
|
||||||
|
error_response = result
|
||||||
|
except (
|
||||||
|
httpx.HTTPError,
|
||||||
|
httpcore.ProtocolError,
|
||||||
|
httpcore.NetworkError,
|
||||||
|
httpcore.TimeoutException,
|
||||||
|
# Low level semaphore errors.
|
||||||
|
ValueError,
|
||||||
|
) as e:
|
||||||
|
if not request_error:
|
||||||
|
self._logger.warning("Request error: %s for %s", e, request.url)
|
||||||
|
request_error = e
|
||||||
|
if response:
|
||||||
|
break
|
||||||
|
if pending:
|
||||||
|
with contextlib.suppress(asyncio.exceptions.CancelledError):
|
||||||
|
gather = asyncio.gather(*pending)
|
||||||
|
gather.cancel()
|
||||||
|
self._logger.debug("Cancelling %d/%d redundant proxy requests.", len(pending), redundancy)
|
||||||
|
await gather
|
||||||
|
if response:
|
||||||
|
return response
|
||||||
|
if error_response:
|
||||||
|
return error_response
|
||||||
|
msg = "No valid response."
|
||||||
|
if request_error:
|
||||||
|
raise httpx.RequestError(msg) from request_error
|
||||||
|
raise httpx.RequestError(msg)
|
||||||
|
|
||||||
|
async def aclose(self) -> None:
|
||||||
|
"""Close all the transports."""
|
||||||
|
for transport in self._transports:
|
||||||
|
await transport.aclose()
|
||||||
|
|
||||||
|
|
||||||
|
def get_transport(
|
||||||
|
proxy_urls: list,
|
||||||
|
limit: httpx.Limits | None = None,
|
||||||
|
local_address: str | None = None,
|
||||||
|
proxy_request_redundancy: int = 1,
|
||||||
|
retries: int = 0,
|
||||||
|
network_logger: logging.Logger = logger,
|
||||||
|
*,
|
||||||
|
verify: bool = True,
|
||||||
|
http2: bool = True,
|
||||||
|
) -> httpx.AsyncBaseTransport:
|
||||||
|
"""Return a single http/proxy transport or the parallel version of those."""
|
||||||
|
limit = limit or httpx.Limits()
|
||||||
|
# pylint: disable=unnecessary-lambda-assignment
|
||||||
|
transport = lambda proxy_url: get_single_transport(
|
||||||
|
verify=verify,
|
||||||
|
http2=http2,
|
||||||
|
local_address=local_address,
|
||||||
|
proxy_url=proxy_url,
|
||||||
|
limit=limit,
|
||||||
|
retries=retries,
|
||||||
|
)
|
||||||
|
if len(proxy_urls or []) <= 1:
|
||||||
|
return transport(proxy_urls[0] if proxy_urls else None)
|
||||||
|
return AsyncParallelTransport(map(transport, proxy_urls), proxy_request_redundancy, network_logger)
|
||||||
|
|
||||||
|
|
||||||
def new_client(
|
def new_client(
|
||||||
# pylint: disable=too-many-arguments
|
# pylint: disable=too-many-arguments
|
||||||
enable_http,
|
enable_http,
|
||||||
|
@ -163,10 +332,12 @@ def new_client(
|
||||||
max_keepalive_connections,
|
max_keepalive_connections,
|
||||||
keepalive_expiry,
|
keepalive_expiry,
|
||||||
proxies,
|
proxies,
|
||||||
|
proxy_request_redundancy,
|
||||||
local_address,
|
local_address,
|
||||||
retries,
|
retries,
|
||||||
max_redirects,
|
max_redirects,
|
||||||
hook_log_response,
|
hook_log_response,
|
||||||
|
network_logger,
|
||||||
):
|
):
|
||||||
limit = httpx.Limits(
|
limit = httpx.Limits(
|
||||||
max_connections=max_connections,
|
max_connections=max_connections,
|
||||||
|
@ -175,20 +346,24 @@ def new_client(
|
||||||
)
|
)
|
||||||
# See https://www.python-httpx.org/advanced/#routing
|
# See https://www.python-httpx.org/advanced/#routing
|
||||||
mounts = {}
|
mounts = {}
|
||||||
for pattern, proxy_url in proxies.items():
|
for pattern, proxy_urls in proxies.items():
|
||||||
if not enable_http and pattern.startswith('http://'):
|
if not enable_http and pattern.startswith('http://'):
|
||||||
continue
|
continue
|
||||||
if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'):
|
mounts[pattern] = get_transport(
|
||||||
mounts[pattern] = get_transport_for_socks_proxy(
|
verify=verify,
|
||||||
verify, enable_http2, local_address, proxy_url, limit, retries
|
http2=enable_http2,
|
||||||
)
|
local_address=local_address,
|
||||||
else:
|
proxy_urls=proxy_urls,
|
||||||
mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries)
|
proxy_request_redundancy=proxy_request_redundancy,
|
||||||
|
limit=limit,
|
||||||
|
retries=retries,
|
||||||
|
network_logger=network_logger,
|
||||||
|
)
|
||||||
|
|
||||||
if not enable_http:
|
if not enable_http:
|
||||||
mounts['http://'] = AsyncHTTPTransportNoHttp()
|
mounts['http://'] = AsyncHTTPTransportNoHttp()
|
||||||
|
|
||||||
transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
|
transport = get_http_transport(verify, enable_http2, local_address, None, limit, retries)
|
||||||
|
|
||||||
event_hooks = None
|
event_hooks = None
|
||||||
if hook_log_response:
|
if hook_log_response:
|
||||||
|
|
|
@ -2,10 +2,11 @@
|
||||||
# pylint: disable=global-statement
|
# pylint: disable=global-statement
|
||||||
# pylint: disable=missing-module-docstring, missing-class-docstring
|
# pylint: disable=missing-module-docstring, missing-class-docstring
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import atexit
|
import atexit
|
||||||
import asyncio
|
import asyncio
|
||||||
import ipaddress
|
import ipaddress
|
||||||
from itertools import cycle
|
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
@ -46,12 +47,14 @@ class Network:
|
||||||
'keepalive_expiry',
|
'keepalive_expiry',
|
||||||
'local_addresses',
|
'local_addresses',
|
||||||
'proxies',
|
'proxies',
|
||||||
|
'proxy_request_redundancy',
|
||||||
'using_tor_proxy',
|
'using_tor_proxy',
|
||||||
'max_redirects',
|
'max_redirects',
|
||||||
'retries',
|
'retries',
|
||||||
'retry_on_http_error',
|
'retry_on_http_error',
|
||||||
'_local_addresses_cycle',
|
'_local_addresses_cycle',
|
||||||
'_proxies_cycle',
|
'_proxies_cycle',
|
||||||
|
'_proxies_by_pattern',
|
||||||
'_clients',
|
'_clients',
|
||||||
'_logger',
|
'_logger',
|
||||||
)
|
)
|
||||||
|
@ -68,6 +71,7 @@ class Network:
|
||||||
max_keepalive_connections=None,
|
max_keepalive_connections=None,
|
||||||
keepalive_expiry=None,
|
keepalive_expiry=None,
|
||||||
proxies=None,
|
proxies=None,
|
||||||
|
proxy_request_redundancy=1,
|
||||||
using_tor_proxy=False,
|
using_tor_proxy=False,
|
||||||
local_addresses=None,
|
local_addresses=None,
|
||||||
retries=0,
|
retries=0,
|
||||||
|
@ -83,13 +87,15 @@ class Network:
|
||||||
self.max_keepalive_connections = max_keepalive_connections
|
self.max_keepalive_connections = max_keepalive_connections
|
||||||
self.keepalive_expiry = keepalive_expiry
|
self.keepalive_expiry = keepalive_expiry
|
||||||
self.proxies = proxies
|
self.proxies = proxies
|
||||||
|
self.proxy_request_redundancy = proxy_request_redundancy
|
||||||
self.using_tor_proxy = using_tor_proxy
|
self.using_tor_proxy = using_tor_proxy
|
||||||
self.local_addresses = local_addresses
|
self.local_addresses = local_addresses
|
||||||
self.retries = retries
|
self.retries = retries
|
||||||
self.retry_on_http_error = retry_on_http_error
|
self.retry_on_http_error = retry_on_http_error
|
||||||
self.max_redirects = max_redirects
|
self.max_redirects = max_redirects
|
||||||
self._local_addresses_cycle = self.get_ipaddress_cycle()
|
self._local_addresses_cycle = self.get_ipaddress_cycle()
|
||||||
self._proxies_cycle = self.get_proxy_cycles()
|
# Contains a dictionary with a list of proxies by pattern.
|
||||||
|
self._proxies_by_pattern = dict(self.iter_proxies())
|
||||||
self._clients = {}
|
self._clients = {}
|
||||||
self._logger = logger.getChild(logger_name) if logger_name else logger
|
self._logger = logger.getChild(logger_name) if logger_name else logger
|
||||||
self.check_parameters()
|
self.check_parameters()
|
||||||
|
@ -132,21 +138,17 @@ class Network:
|
||||||
return
|
return
|
||||||
# https://www.python-httpx.org/compatibility/#proxy-keys
|
# https://www.python-httpx.org/compatibility/#proxy-keys
|
||||||
if isinstance(self.proxies, str):
|
if isinstance(self.proxies, str):
|
||||||
yield 'all://', [self.proxies]
|
yield 'all://', (self.proxies,)
|
||||||
else:
|
elif isinstance(self.proxies, dict):
|
||||||
for pattern, proxy_url in self.proxies.items():
|
for pattern, proxy_urls in self.proxies.items():
|
||||||
pattern = PROXY_PATTERN_MAPPING.get(pattern, pattern)
|
pattern = PROXY_PATTERN_MAPPING.get(pattern, pattern)
|
||||||
if isinstance(proxy_url, str):
|
if isinstance(proxy_urls, str):
|
||||||
proxy_url = [proxy_url]
|
yield pattern, (proxy_urls,)
|
||||||
yield pattern, proxy_url
|
else:
|
||||||
|
yield pattern, tuple(proxy_urls)
|
||||||
def get_proxy_cycles(self):
|
else:
|
||||||
proxy_settings = {}
|
msg = "`proxies` need to be either a string or a patthern to url dictionary."
|
||||||
for pattern, proxy_urls in self.iter_proxies():
|
raise ValueError(msg)
|
||||||
proxy_settings[pattern] = cycle(proxy_urls)
|
|
||||||
while True:
|
|
||||||
# pylint: disable=stop-iteration-return
|
|
||||||
yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items())
|
|
||||||
|
|
||||||
async def log_response(self, response: httpx.Response):
|
async def log_response(self, response: httpx.Response):
|
||||||
request = response.request
|
request = response.request
|
||||||
|
@ -181,10 +183,11 @@ class Network:
|
||||||
verify = self.verify if verify is None else verify
|
verify = self.verify if verify is None else verify
|
||||||
max_redirects = self.max_redirects if max_redirects is None else max_redirects
|
max_redirects = self.max_redirects if max_redirects is None else max_redirects
|
||||||
local_address = next(self._local_addresses_cycle)
|
local_address = next(self._local_addresses_cycle)
|
||||||
proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key
|
|
||||||
key = (verify, max_redirects, local_address, proxies)
|
|
||||||
hook_log_response = self.log_response if searx_debug else None
|
hook_log_response = self.log_response if searx_debug else None
|
||||||
if key not in self._clients or self._clients[key].is_closed:
|
proxies = self._proxies_by_pattern
|
||||||
|
key = (verify, max_redirects, local_address)
|
||||||
|
client = self._clients.get(key)
|
||||||
|
if not client or client.is_closed:
|
||||||
client = new_client(
|
client = new_client(
|
||||||
self.enable_http,
|
self.enable_http,
|
||||||
verify,
|
verify,
|
||||||
|
@ -192,17 +195,19 @@ class Network:
|
||||||
self.max_connections,
|
self.max_connections,
|
||||||
self.max_keepalive_connections,
|
self.max_keepalive_connections,
|
||||||
self.keepalive_expiry,
|
self.keepalive_expiry,
|
||||||
dict(proxies),
|
proxies,
|
||||||
|
self.proxy_request_redundancy,
|
||||||
local_address,
|
local_address,
|
||||||
0,
|
0,
|
||||||
max_redirects,
|
max_redirects,
|
||||||
hook_log_response,
|
hook_log_response,
|
||||||
|
self._logger,
|
||||||
)
|
)
|
||||||
if self.using_tor_proxy and not await self.check_tor_proxy(client, proxies):
|
if self.using_tor_proxy and not await self.check_tor_proxy(client, proxies):
|
||||||
await client.aclose()
|
await client.aclose()
|
||||||
raise httpx.ProxyError('Network configuration problem: not using Tor')
|
raise httpx.ProxyError('Network configuration problem: not using Tor')
|
||||||
self._clients[key] = client
|
self._clients[key] = client
|
||||||
return self._clients[key]
|
return client
|
||||||
|
|
||||||
async def aclose(self):
|
async def aclose(self):
|
||||||
async def close_client(client):
|
async def close_client(client):
|
||||||
|
@ -340,6 +345,7 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
||||||
'local_addresses': settings_outgoing['source_ips'],
|
'local_addresses': settings_outgoing['source_ips'],
|
||||||
'using_tor_proxy': settings_outgoing['using_tor_proxy'],
|
'using_tor_proxy': settings_outgoing['using_tor_proxy'],
|
||||||
'proxies': settings_outgoing['proxies'],
|
'proxies': settings_outgoing['proxies'],
|
||||||
|
'proxy_request_redundancy': settings_outgoing['proxy_request_redundancy'],
|
||||||
'max_redirects': settings_outgoing['max_redirects'],
|
'max_redirects': settings_outgoing['max_redirects'],
|
||||||
'retries': settings_outgoing['retries'],
|
'retries': settings_outgoing['retries'],
|
||||||
'retry_on_http_error': None,
|
'retry_on_http_error': None,
|
||||||
|
|
|
@ -178,9 +178,9 @@ outgoing:
|
||||||
# and https://www.python-httpx.org/compatibility/#ssl-configuration
|
# and https://www.python-httpx.org/compatibility/#ssl-configuration
|
||||||
# verify: ~/.mitmproxy/mitmproxy-ca-cert.cer
|
# verify: ~/.mitmproxy/mitmproxy-ca-cert.cer
|
||||||
#
|
#
|
||||||
# uncomment below section if you want to use a proxyq see: SOCKS proxies
|
# Uncomment below section if you want to use a proxy. See:
|
||||||
# https://2.python-requests.org/en/latest/user/advanced/#proxies
|
# https://2.python-requests.org/en/latest/user/advanced/#proxies
|
||||||
# are also supported: see
|
# SOCKS proxies are also supported. See:
|
||||||
# https://2.python-requests.org/en/latest/user/advanced/#socks
|
# https://2.python-requests.org/en/latest/user/advanced/#socks
|
||||||
#
|
#
|
||||||
# proxies:
|
# proxies:
|
||||||
|
@ -190,6 +190,11 @@ outgoing:
|
||||||
#
|
#
|
||||||
# using_tor_proxy: true
|
# using_tor_proxy: true
|
||||||
#
|
#
|
||||||
|
# Uncomment below if you want to make multiple request in parallel
|
||||||
|
# through all the proxies at once:
|
||||||
|
#
|
||||||
|
# proxy_request_redundancy: 4
|
||||||
|
#
|
||||||
# Extra seconds to add in order to account for the time taken by the proxy
|
# Extra seconds to add in order to account for the time taken by the proxy
|
||||||
#
|
#
|
||||||
# extra_proxy_timeout: 10
|
# extra_proxy_timeout: 10
|
||||||
|
|
|
@ -221,6 +221,7 @@ SCHEMA = {
|
||||||
'max_redirects': SettingsValue(int, 30),
|
'max_redirects': SettingsValue(int, 30),
|
||||||
'retries': SettingsValue(int, 0),
|
'retries': SettingsValue(int, 0),
|
||||||
'proxies': SettingsValue((None, str, dict), None),
|
'proxies': SettingsValue((None, str, dict), None),
|
||||||
|
'proxy_request_redundancy': SettingsValue(int, 1),
|
||||||
'source_ips': SettingsValue((None, str, list), None),
|
'source_ips': SettingsValue((None, str, list), None),
|
||||||
# Tor configuration
|
# Tor configuration
|
||||||
'using_tor_proxy': SettingsValue(bool, False),
|
'using_tor_proxy': SettingsValue(bool, False),
|
||||||
|
|
144
tests/unit/network/test_client.py
Normal file
144
tests/unit/network/test_client.py
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Test module for the client and proxy handling code."""
|
||||||
|
|
||||||
|
from unittest.mock import patch, Mock
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from searx.network import client
|
||||||
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestClient(SearxTestCase):
|
||||||
|
"""Tests for the client and proxy handling code."""
|
||||||
|
|
||||||
|
def test_get_single_transport(self):
|
||||||
|
t = client.get_single_transport(proxy_url="socks4://local:1080")
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncProxyTransportFixed))
|
||||||
|
t = client.get_single_transport(proxy_url="socks5://local:1080")
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncProxyTransportFixed))
|
||||||
|
t = client.get_single_transport(proxy_url="socks5h://local:1080")
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncProxyTransportFixed))
|
||||||
|
t = client.get_single_transport(proxy_url="https://local:8080")
|
||||||
|
self.assertTrue(isinstance(t, httpx.AsyncHTTPTransport))
|
||||||
|
|
||||||
|
def test_get_parallel_transport(self):
|
||||||
|
t = client.get_transport(
|
||||||
|
proxy_urls=["socks5h://local:1080", "socks5h://local:1180"],
|
||||||
|
)
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncParallelTransport))
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
'searx.network.client.AsyncProxyTransportFixed.handle_async_request',
|
||||||
|
side_effect=[httpx.Response(200, html="<html/>"), httpx.Response(301, html="<html/>")],
|
||||||
|
)
|
||||||
|
async def test_parallel_transport_ok(self, handler_mock: Mock):
|
||||||
|
t = client.get_transport(
|
||||||
|
proxy_urls=["socks5h://local:1080", "socks5h://local:1180"],
|
||||||
|
)
|
||||||
|
request = httpx.Request(url="http://wiki.com", method="GET")
|
||||||
|
response = await t.handle_async_request(request)
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
handler_mock.assert_called_once_with(request)
|
||||||
|
|
||||||
|
response = await t.handle_async_request(request)
|
||||||
|
self.assertEqual(handler_mock.call_count, 2)
|
||||||
|
self.assertEqual(response.status_code, 301)
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
'searx.network.client.AsyncProxyTransportFixed.handle_async_request',
|
||||||
|
side_effect=[httpx.Response(403, html="<html/>"), httpx.Response(200, html="<html/>")],
|
||||||
|
)
|
||||||
|
async def test_parallel_transport_403(self, handler_mock: Mock):
|
||||||
|
t = client.get_transport(
|
||||||
|
proxy_urls=["socks5h://local:1080", "socks5h://local:1180"],
|
||||||
|
proxy_request_redundancy=2,
|
||||||
|
)
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncParallelTransport))
|
||||||
|
request = httpx.Request(url="http://wiki.com", method="GET")
|
||||||
|
response = await t.handle_async_request(request)
|
||||||
|
handler_mock.assert_called_with(request)
|
||||||
|
self.assertEqual(handler_mock.call_count, 2)
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
'searx.network.client.AsyncProxyTransportFixed.handle_async_request',
|
||||||
|
side_effect=[httpx.Response(404, html="<html/>"), httpx.Response(404, html="<html/>")],
|
||||||
|
)
|
||||||
|
async def test_parallel_transport_404_404(self, handler_mock: Mock):
|
||||||
|
t = client.get_transport(
|
||||||
|
proxy_urls=["socks5h://local:1080", "socks5h://local:1180"],
|
||||||
|
proxy_request_redundancy=2,
|
||||||
|
)
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncParallelTransport))
|
||||||
|
request = httpx.Request(url="http://wiki.com", method="GET")
|
||||||
|
response = await t.handle_async_request(request)
|
||||||
|
handler_mock.assert_called_with(request)
|
||||||
|
self.assertEqual(handler_mock.call_count, 2)
|
||||||
|
self.assertEqual(response.status_code, 404)
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
'searx.network.client.AsyncProxyTransportFixed.handle_async_request',
|
||||||
|
side_effect=[httpx.Response(200, html="<html/>"), httpx.Response(404, html="<html/>")],
|
||||||
|
)
|
||||||
|
async def test_parallel_transport_404_200(self, handler_mock: Mock):
|
||||||
|
t = client.get_transport(
|
||||||
|
proxy_urls=["socks5h://local:1080", "socks5h://local:1180"],
|
||||||
|
proxy_request_redundancy=2,
|
||||||
|
)
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncParallelTransport))
|
||||||
|
request = httpx.Request(url="http://wiki.com", method="GET")
|
||||||
|
response = await t.handle_async_request(request)
|
||||||
|
handler_mock.assert_called_with(request)
|
||||||
|
self.assertEqual(handler_mock.call_count, 2)
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
'searx.network.client.AsyncProxyTransportFixed.handle_async_request',
|
||||||
|
side_effect=[httpx.Response(403, html="<html/>"), httpx.Response(403, html="<html/>")],
|
||||||
|
)
|
||||||
|
async def test_parallel_transport_403_403(self, handler_mock: Mock):
|
||||||
|
t = client.get_transport(
|
||||||
|
proxy_urls=["socks5h://local:1080", "socks5h://local:1180"],
|
||||||
|
proxy_request_redundancy=2,
|
||||||
|
)
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncParallelTransport))
|
||||||
|
request = httpx.Request(url="http://wiki.com", method="GET")
|
||||||
|
response = await t.handle_async_request(request)
|
||||||
|
handler_mock.assert_called_with(request)
|
||||||
|
self.assertEqual(handler_mock.call_count, 2)
|
||||||
|
self.assertEqual(response.status_code, 403)
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
'searx.network.client.AsyncProxyTransportFixed.handle_async_request',
|
||||||
|
side_effect=[httpx.RequestError("OMG!"), httpx.Response(200, html="<html/>")],
|
||||||
|
)
|
||||||
|
async def test_parallel_transport_ex_ok(self, handler_mock: Mock):
|
||||||
|
t = client.get_transport(
|
||||||
|
proxy_urls=["socks5h://local:1080", "socks5h://local:1180"],
|
||||||
|
proxy_request_redundancy=2,
|
||||||
|
)
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncParallelTransport))
|
||||||
|
request = httpx.Request(url="http://wiki.com", method="GET")
|
||||||
|
response = await t.handle_async_request(request)
|
||||||
|
handler_mock.assert_called_with(request)
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
self.assertEqual(handler_mock.call_count, 2)
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
'searx.network.client.AsyncProxyTransportFixed.handle_async_request',
|
||||||
|
side_effect=[httpx.RequestError("OMG!"), httpx.RequestError("OMG!")],
|
||||||
|
)
|
||||||
|
async def test_parallel_transport_ex_ex(self, handler_mock: Mock):
|
||||||
|
t = client.get_transport(
|
||||||
|
proxy_urls=["socks5h://local:1080", "socks5h://local:1180"],
|
||||||
|
proxy_request_redundancy=2,
|
||||||
|
)
|
||||||
|
self.assertTrue(isinstance(t, client.AsyncParallelTransport))
|
||||||
|
request = httpx.Request(url="http://wiki.com", method="GET")
|
||||||
|
response = None
|
||||||
|
with self.assertRaises(httpx.RequestError):
|
||||||
|
response = await t.handle_async_request(request)
|
||||||
|
handler_mock.assert_called_with(request)
|
||||||
|
self.assertFalse(response)
|
||||||
|
self.assertEqual(handler_mock.call_count, 2)
|
|
@ -17,7 +17,7 @@ class TestNetwork(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
network = Network()
|
network = Network()
|
||||||
|
|
||||||
self.assertEqual(next(network._local_addresses_cycle), None)
|
self.assertEqual(next(network._local_addresses_cycle), None)
|
||||||
self.assertEqual(next(network._proxies_cycle), ())
|
self.assertEqual(network._proxies_by_pattern, {})
|
||||||
|
|
||||||
def test_ipaddress_cycle(self):
|
def test_ipaddress_cycle(self):
|
||||||
network = NETWORKS['ipv6']
|
network = NETWORKS['ipv6']
|
||||||
|
@ -47,26 +47,31 @@ class TestNetwork(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
Network(local_addresses=['not_an_ip_address'])
|
Network(local_addresses=['not_an_ip_address'])
|
||||||
|
|
||||||
def test_proxy_cycles(self):
|
def test_proxies_by_patterns(self):
|
||||||
network = Network(proxies='http://localhost:1337')
|
network = Network(proxies='http://localhost:1337')
|
||||||
self.assertEqual(next(network._proxies_cycle), (('all://', 'http://localhost:1337'),))
|
self.assertEqual(network._proxies_by_pattern, {'all://': ('http://localhost:1337',)})
|
||||||
|
|
||||||
network = Network(proxies={'https': 'http://localhost:1337', 'http': 'http://localhost:1338'})
|
network = Network(proxies={'https': 'http://localhost:1337', 'http': 'http://localhost:1338'})
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))
|
network._proxies_by_pattern,
|
||||||
)
|
{
|
||||||
self.assertEqual(
|
'https://': ('http://localhost:1337',),
|
||||||
next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))
|
'http://': ('http://localhost:1338',),
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
network = Network(
|
network = Network(
|
||||||
proxies={'https': ['http://localhost:1337', 'http://localhost:1339'], 'http': 'http://localhost:1338'}
|
proxies={'https': ['http://localhost:1337', 'http://localhost:1339'], 'http': 'http://localhost:1338'}
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))
|
network._proxies_by_pattern,
|
||||||
)
|
{
|
||||||
self.assertEqual(
|
'https://': (
|
||||||
next(network._proxies_cycle), (('https://', 'http://localhost:1339'), ('http://', 'http://localhost:1338'))
|
'http://localhost:1337',
|
||||||
|
'http://localhost:1339',
|
||||||
|
),
|
||||||
|
'http://': ('http://localhost:1338',),
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
|
|
Loading…
Add table
Reference in a new issue