Merge pull request #798 from dalf/searchpy4

[mod] add a search_one_request_safe function wrapper
This commit is contained in:
Adam Tauber 2016-12-30 13:01:04 +01:00 committed by GitHub
commit eaa0fb8102

View File

@ -20,6 +20,7 @@ import threading
from thread import start_new_thread from thread import start_new_thread
from time import time from time import time
from uuid import uuid4 from uuid import uuid4
import requests.exceptions
import searx.poolrequests as requests_lib import searx.poolrequests as requests_lib
from searx.engines import ( from searx.engines import (
categories, engines categories, engines
@ -37,8 +38,9 @@ number_of_searches = 0
def send_http_request(engine, request_params, timeout_limit): def send_http_request(engine, request_params, timeout_limit):
response = None # for page_load_time stats
try: time_before_request = time()
# create dictionary which contain all # create dictionary which contain all
# informations about the request # informations about the request
request_args = dict( request_args = dict(
@ -47,6 +49,7 @@ def send_http_request(engine, request_params, timeout_limit):
timeout=timeout_limit, timeout=timeout_limit,
verify=request_params['verify'] verify=request_params['verify']
) )
# specific type of request (GET or POST) # specific type of request (GET or POST)
if request_params['method'] == 'GET': if request_params['method'] == 'GET':
req = requests_lib.get req = requests_lib.get
@ -54,12 +57,15 @@ def send_http_request(engine, request_params, timeout_limit):
req = requests_lib.post req = requests_lib.post
request_args['data'] = request_params['data'] request_args['data'] = request_params['data']
# for page_load_time stats
time_before_request = time()
# send the request # send the request
response = req(request_params['url'], **request_args) response = req(request_params['url'], **request_args)
# is there a timeout (no parsing in this case)
timeout_overhead = 0.2 # seconds
search_duration = time() - request_params['started']
if search_duration > timeout_limit + timeout_overhead:
raise Timeout(response=response)
with threading.RLock(): with threading.RLock():
# no error : reset the suspend variables # no error : reset the suspend variables
engine.continuous_errors = 0 engine.continuous_errors = 0
@ -69,77 +75,80 @@ def send_http_request(engine, request_params, timeout_limit):
engine.stats['page_load_time'] += time() - time_before_request engine.stats['page_load_time'] += time() - time_before_request
engine.stats['page_load_count'] += 1 engine.stats['page_load_count'] += 1
# is there a timeout (no parsing in this case)
timeout_overhead = 0.2 # seconds
search_duration = time() - request_params['started']
if search_duration > timeout_limit + timeout_overhead:
logger.exception('engine timeout on HTTP request:'
'{0} (search duration : {1} ms, time-out: {2} )'
.format(engine.name, search_duration, timeout_limit))
with threading.RLock():
engine.stats['errors'] += 1
return False
# everything is ok : return the response # everything is ok : return the response
return response return response
except:
# increase errors stats
with threading.RLock():
engine.stats['errors'] += 1
engine.continuous_errors += 1
engine.suspend_end_time = time() + min(60, engine.continuous_errors)
# print engine name and specific error message
logger.exception('engine crash: {0}'.format(engine.name))
return False
def search_one_request(engine_name, query, request_params, result_container, timeout_limit):
engine = engines[engine_name]
def search_one_request(engine, query, request_params, timeout_limit):
# update request parameters dependent on # update request parameters dependent on
# search-engine (contained in engines folder) # search-engine (contained in engines folder)
engine.request(query, request_params) engine.request(query, request_params)
# TODO add support of offline engines
if request_params['url'] is None:
return False
# ignoring empty urls # ignoring empty urls
if request_params['url'] is None:
return []
if not request_params['url']: if not request_params['url']:
return False return []
# send request # send request
response = send_http_request(engine, request_params, timeout_limit) response = send_http_request(engine, request_params, timeout_limit)
# parse response
success = None
if response:
# parse the response # parse the response
response.search_params = request_params response.search_params = request_params
return engine.response(response)
def search_one_request_safe(engine_name, query, request_params, result_container, timeout_limit):
start_time = time()
engine = engines[engine_name]
try: try:
search_results = engine.response(response) # send requests and parse the results
except: search_results = search_one_request(engine, query, request_params, timeout_limit)
logger.exception('engine crash: {0}'.format(engine.name))
search_results = []
# add results # add results
for result in search_results: for result in search_results:
result['engine'] = engine.name result['engine'] = engine_name
result_container.extend(engine_name, search_results)
result_container.extend(engine.name, search_results)
success = True
else:
success = False
# update engine time when there is no exception
with threading.RLock(): with threading.RLock():
# update stats : total time engine.stats['engine_time'] += time() - start_time
engine.stats['engine_time'] += time() - request_params['started']
engine.stats['engine_time_count'] += 1 engine.stats['engine_time_count'] += 1
return success return True
except Exception as e:
engine.stats['errors'] += 1
search_duration = time() - start_time
requests_exception = False
if (issubclass(e.__class__, requests.exceptions.Timeout)):
# requests timeout (connect or read)
logger.error("engine {0} : HTTP requests timeout"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(engine_name, search_duration, timeout_limit, e.__class__.__name__))
requests_exception = True
if (issubclass(e.__class__, requests.exceptions.RequestException)):
# other requests exception
logger.exception("engine {0} : requests exception"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(engine_name, search_duration, timeout_limit, e))
requests_exception = True
else:
# others errors
logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
# update continuous_errors / suspend_end_time
if requests_exception:
with threading.RLock():
engine.continuous_errors += 1
engine.suspend_end_time = time() + min(60, engine.continuous_errors)
#
return False
def search_multiple_requests(requests, result_container, timeout_limit): def search_multiple_requests(requests, result_container, timeout_limit):
@ -148,7 +157,7 @@ def search_multiple_requests(requests, result_container, timeout_limit):
for engine_name, query, request_params in requests: for engine_name, query, request_params in requests:
th = threading.Thread( th = threading.Thread(
target=search_one_request, target=search_one_request_safe,
args=(engine_name, query, request_params, result_container, timeout_limit), args=(engine_name, query, request_params, result_container, timeout_limit),
name=search_id, name=search_id,
) )