mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
[enh] Add multiple outgoing proxies
credits go to @bauruine see https://github.com/searx/searx/pull/1958
This commit is contained in:
parent
2fc3b17c85
commit
3786920df9
7 changed files with 172 additions and 36 deletions
|
|
@ -25,7 +25,7 @@ from operator import itemgetter
|
|||
from searx import settings
|
||||
from searx import logger
|
||||
from searx.data import ENGINES_LANGUAGES
|
||||
from searx.poolrequests import get
|
||||
from searx.poolrequests import get, get_proxy_cycles
|
||||
from searx.utils import load_module, match_language, get_engine_from_settings
|
||||
|
||||
|
||||
|
|
@ -79,16 +79,18 @@ def load_engine(engine_data):
|
|||
logger.exception('Cannot load engine "{}"'.format(engine_module))
|
||||
return None
|
||||
|
||||
for param_name in engine_data:
|
||||
for param_name, param_value in engine_data.items():
|
||||
if param_name == 'engine':
|
||||
continue
|
||||
if param_name == 'categories':
|
||||
if engine_data['categories'] == 'none':
|
||||
pass
|
||||
elif param_name == 'categories':
|
||||
if param_value == 'none':
|
||||
engine.categories = []
|
||||
else:
|
||||
engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
|
||||
continue
|
||||
setattr(engine, param_name, engine_data[param_name])
|
||||
engine.categories = list(map(str.strip, param_value.split(',')))
|
||||
elif param_name == 'proxies':
|
||||
engine.proxies = get_proxy_cycles(param_value)
|
||||
else:
|
||||
setattr(engine, param_name, param_value)
|
||||
|
||||
for arg_name, arg_value in engine_default_args.items():
|
||||
if not hasattr(engine, arg_name):
|
||||
|
|
|
|||
|
|
@ -111,6 +111,32 @@ def get_time_for_thread():
|
|||
return threadLocal.total_time
|
||||
|
||||
|
||||
def get_proxy_cycles(proxy_settings):
|
||||
if not proxy_settings:
|
||||
return None
|
||||
# Backwards compatibility for single proxy in settings.yml
|
||||
for protocol, proxy in proxy_settings.items():
|
||||
if isinstance(proxy, str):
|
||||
proxy_settings[protocol] = [proxy]
|
||||
|
||||
for protocol in proxy_settings:
|
||||
proxy_settings[protocol] = cycle(proxy_settings[protocol])
|
||||
return proxy_settings
|
||||
|
||||
|
||||
GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies'))
|
||||
|
||||
|
||||
def get_proxies(proxy_cycles):
|
||||
if proxy_cycles:
|
||||
return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()}
|
||||
return None
|
||||
|
||||
|
||||
def get_global_proxies():
|
||||
return get_proxies(GLOBAL_PROXY_CYCLES)
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
"""same as requests/requests/api.py request(...)"""
|
||||
time_before_request = time()
|
||||
|
|
@ -119,8 +145,8 @@ def request(method, url, **kwargs):
|
|||
session = SessionSinglePool()
|
||||
|
||||
# proxies
|
||||
if kwargs.get('proxies') is None:
|
||||
kwargs['proxies'] = settings['outgoing'].get('proxies')
|
||||
if not kwargs.get('proxies'):
|
||||
kwargs['proxies'] = get_global_proxies()
|
||||
|
||||
# timeout
|
||||
if 'timeout' in kwargs:
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ def send_http_request(engine, request_params):
|
|||
|
||||
# setting engine based proxies
|
||||
if hasattr(engine, 'proxies'):
|
||||
request_args['proxies'] = engine.proxies
|
||||
request_args['proxies'] = requests_lib.get_proxies(engine.proxies)
|
||||
|
||||
# specific type of request (GET or POST)
|
||||
if request_params['method'] == 'GET':
|
||||
|
|
|
|||
|
|
@ -63,13 +63,15 @@ outgoing: # communication with search engines
|
|||
pool_connections : 100 # Number of different hosts
|
||||
pool_maxsize : 10 # Number of simultaneous requests by host
|
||||
# uncomment below section if you want to use a proxy
|
||||
# see http://docs.python-requests.org/en/latest/user/advanced/#proxies
|
||||
# SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks
|
||||
# proxies :
|
||||
# http : socks5h://127.0.0.1:9050
|
||||
# https: socks5h://127.0.0.1:9050
|
||||
# using_tor_proxy : True
|
||||
# extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy
|
||||
# see https://2.python-requests.org/en/latest/user/advanced/#proxies
|
||||
# SOCKS proxies are also supported: see https://2.python-requests.org/en/latest/user/advanced/#socks
|
||||
# proxies:
|
||||
# http:
|
||||
# - http://proxy1:8080
|
||||
# - http://proxy2:8080
|
||||
# https:
|
||||
# - http://proxy1:8080
|
||||
# - http://proxy2:8080
|
||||
# uncomment below section only if you have more than one network interface
|
||||
# which can be the source of outgoing search requests
|
||||
# source_ips:
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ from searx.plugins import plugins
|
|||
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
||||
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
|
||||
from searx.answerers import answerers
|
||||
from searx.poolrequests import get_global_proxies
|
||||
|
||||
|
||||
# serve pages with HTTP/1.1
|
||||
|
|
@ -149,8 +150,6 @@ _category_names = (gettext('files'),
|
|||
gettext('onions'),
|
||||
gettext('science'))
|
||||
|
||||
outgoing_proxies = settings['outgoing'].get('proxies') or None
|
||||
|
||||
_flask_babel_get_translations = flask_babel.get_translations
|
||||
|
||||
|
||||
|
|
@ -905,7 +904,7 @@ def image_proxy():
|
|||
stream=True,
|
||||
timeout=settings['outgoing']['request_timeout'],
|
||||
headers=headers,
|
||||
proxies=outgoing_proxies)
|
||||
proxies=get_global_proxies())
|
||||
|
||||
if resp.status_code == 304:
|
||||
return '', resp.status_code
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue