From 74d56f6cfb0dc8e30a6d4f95cce09937984cda2d Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 9 Feb 2021 14:33:36 +0100 Subject: [PATCH 1/2] [mod] poolrequests: for one (user request, engine) always use the same HTTPAdapter The duckduckgo engine requires an additional request after the results have been sent. This commit makes sure that the second request uses the same HTTPAdapter = the same IP address, and the same proxy. --- searx/poolrequests.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/searx/poolrequests.py b/searx/poolrequests.py index 25a6baed9..8b8681437 100644 --- a/searx/poolrequests.py +++ b/searx/poolrequests.py @@ -1,7 +1,7 @@ import sys from time import time from itertools import cycle -from threading import RLock, local +from threading import local import requests @@ -88,10 +88,12 @@ class SessionSinglePool(requests.Session): super().__init__() # reuse the same adapters - with RLock(): - self.adapters.clear() - self.mount('https://', next(https_adapters)) - self.mount('http://', next(http_adapters)) + self.adapters.clear() + + https_adapter = threadLocal.__dict__.setdefault('https_adapter', next(https_adapters)) + http_adapter = threadLocal.__dict__.setdefault('http_adapter', next(http_adapters)) + self.mount('https://', https_adapter) + self.mount('http://', http_adapter) def close(self): """Call super, but clear adapters since there are managed globaly""" From d2dac11392c89084e8d6143f09c27c5fefabdef9 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 9 Feb 2021 14:36:43 +0100 Subject: [PATCH 2/2] [mod] duckduckgo engine: better support of the language preference After the main request, send a second to https://duckduckgo.com/t/sl_h See https://github.com/searx/searx/issues/2259 --- searx/engines/duckduckgo.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 7f1378264..638f1211b 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -5,7 +5,8 @@ from lxml.html import fromstring from json import loads -from searx.utils import extract_text, match_language, eval_xpath +from searx.utils import extract_text, match_language, eval_xpath, dict_subset +from searx.poolrequests import get # about about = { @@ -35,6 +36,7 @@ language_aliases = { # search-url url = 'https://html.duckduckgo.com/html' +url_ping = 'https://duckduckgo.com/t/sl_h' time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm'} @@ -65,27 +67,27 @@ def request(query, params): params['url'] = url params['method'] = 'POST' - params['data']['b'] = '' params['data']['q'] = query - params['data']['df'] = '' + params['data']['b'] = '' region_code = get_region_code(params['language'], supported_languages) if region_code: params['data']['kl'] = region_code params['cookies']['kl'] = region_code - if params['time_range'] in time_range_dict: - params['data']['df'] = time_range_dict[params['time_range']] + params['data']['df'] = time_range_dict.get(params['time_range'], '') return params # get response from search-request def response(resp): + # ping + headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie']) + get(url_ping, headers=headers_ping) + + # parse the response results = [] - doc = fromstring(resp.text) - - # parse results for i, r in enumerate(eval_xpath(doc, result_xpath)): if i >= 30: break