forked from zaclys/searxng
		
	[mod] add a search_one_request_safe function wrapper to call search_one_request. All exceptions are catched in this wrapper.
This commit is contained in:
		
							parent
							
								
									bff41987a4
								
							
						
					
					
						commit
						0a2fde19d0
					
				
					 1 changed files with 88 additions and 79 deletions
				
			
		
							
								
								
									
										167
									
								
								searx/search.py
									
										
									
									
									
								
							
							
						
						
									
										167
									
								
								searx/search.py
									
										
									
									
									
								
							| 
						 | 
					@ -20,6 +20,7 @@ import threading
 | 
				
			||||||
from thread import start_new_thread
 | 
					from thread import start_new_thread
 | 
				
			||||||
from time import time
 | 
					from time import time
 | 
				
			||||||
from uuid import uuid4
 | 
					from uuid import uuid4
 | 
				
			||||||
 | 
					import requests.exceptions
 | 
				
			||||||
import searx.poolrequests as requests_lib
 | 
					import searx.poolrequests as requests_lib
 | 
				
			||||||
from searx.engines import (
 | 
					from searx.engines import (
 | 
				
			||||||
    categories, engines
 | 
					    categories, engines
 | 
				
			||||||
| 
						 | 
					@ -37,109 +38,117 @@ number_of_searches = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def send_http_request(engine, request_params, timeout_limit):
 | 
					def send_http_request(engine, request_params, timeout_limit):
 | 
				
			||||||
    response = None
 | 
					    # for page_load_time stats
 | 
				
			||||||
    try:
 | 
					    time_before_request = time()
 | 
				
			||||||
        # create dictionary which contain all
 | 
					 | 
				
			||||||
        # informations about the request
 | 
					 | 
				
			||||||
        request_args = dict(
 | 
					 | 
				
			||||||
            headers=request_params['headers'],
 | 
					 | 
				
			||||||
            cookies=request_params['cookies'],
 | 
					 | 
				
			||||||
            timeout=timeout_limit,
 | 
					 | 
				
			||||||
            verify=request_params['verify']
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        # specific type of request (GET or POST)
 | 
					 | 
				
			||||||
        if request_params['method'] == 'GET':
 | 
					 | 
				
			||||||
            req = requests_lib.get
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            req = requests_lib.post
 | 
					 | 
				
			||||||
            request_args['data'] = request_params['data']
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # for page_load_time stats
 | 
					    # create dictionary which contain all
 | 
				
			||||||
        time_before_request = time()
 | 
					    # informations about the request
 | 
				
			||||||
 | 
					    request_args = dict(
 | 
				
			||||||
 | 
					        headers=request_params['headers'],
 | 
				
			||||||
 | 
					        cookies=request_params['cookies'],
 | 
				
			||||||
 | 
					        timeout=timeout_limit,
 | 
				
			||||||
 | 
					        verify=request_params['verify']
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # send the request
 | 
					    # specific type of request (GET or POST)
 | 
				
			||||||
        response = req(request_params['url'], **request_args)
 | 
					    if request_params['method'] == 'GET':
 | 
				
			||||||
 | 
					        req = requests_lib.get
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        req = requests_lib.post
 | 
				
			||||||
 | 
					        request_args['data'] = request_params['data']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with threading.RLock():
 | 
					    # send the request
 | 
				
			||||||
            # no error : reset the suspend variables
 | 
					    response = req(request_params['url'], **request_args)
 | 
				
			||||||
            engine.continuous_errors = 0
 | 
					 | 
				
			||||||
            engine.suspend_end_time = 0
 | 
					 | 
				
			||||||
            # update stats with current page-load-time
 | 
					 | 
				
			||||||
            # only the HTTP request
 | 
					 | 
				
			||||||
            engine.stats['page_load_time'] += time() - time_before_request
 | 
					 | 
				
			||||||
            engine.stats['page_load_count'] += 1
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # is there a timeout (no parsing in this case)
 | 
					    # is there a timeout (no parsing in this case)
 | 
				
			||||||
        timeout_overhead = 0.2  # seconds
 | 
					    timeout_overhead = 0.2  # seconds
 | 
				
			||||||
        search_duration = time() - request_params['started']
 | 
					    search_duration = time() - request_params['started']
 | 
				
			||||||
        if search_duration > timeout_limit + timeout_overhead:
 | 
					    if search_duration > timeout_limit + timeout_overhead:
 | 
				
			||||||
            logger.exception('engine timeout on HTTP request:'
 | 
					        raise Timeout(response=response)
 | 
				
			||||||
                             '{0} (search duration : {1} ms, time-out: {2} )'
 | 
					 | 
				
			||||||
                             .format(engine.name, search_duration, timeout_limit))
 | 
					 | 
				
			||||||
            with threading.RLock():
 | 
					 | 
				
			||||||
                engine.stats['errors'] += 1
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # everything is ok : return the response
 | 
					    with threading.RLock():
 | 
				
			||||||
        return response
 | 
					        # no error : reset the suspend variables
 | 
				
			||||||
 | 
					        engine.continuous_errors = 0
 | 
				
			||||||
 | 
					        engine.suspend_end_time = 0
 | 
				
			||||||
 | 
					        # update stats with current page-load-time
 | 
				
			||||||
 | 
					        # only the HTTP request
 | 
				
			||||||
 | 
					        engine.stats['page_load_time'] += time() - time_before_request
 | 
				
			||||||
 | 
					        engine.stats['page_load_count'] += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    except:
 | 
					    # everything is ok : return the response
 | 
				
			||||||
        # increase errors stats
 | 
					    return response
 | 
				
			||||||
        with threading.RLock():
 | 
					 | 
				
			||||||
            engine.stats['errors'] += 1
 | 
					 | 
				
			||||||
            engine.continuous_errors += 1
 | 
					 | 
				
			||||||
            engine.suspend_end_time = time() + min(60, engine.continuous_errors)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # print engine name and specific error message
 | 
					 | 
				
			||||||
        logger.exception('engine crash: {0}'.format(engine.name))
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def search_one_request(engine_name, query, request_params, result_container, timeout_limit):
 | 
					def search_one_request(engine, query, request_params, timeout_limit):
 | 
				
			||||||
    engine = engines[engine_name]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # update request parameters dependent on
 | 
					    # update request parameters dependent on
 | 
				
			||||||
    # search-engine (contained in engines folder)
 | 
					    # search-engine (contained in engines folder)
 | 
				
			||||||
    engine.request(query, request_params)
 | 
					    engine.request(query, request_params)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # TODO add support of offline engines
 | 
					 | 
				
			||||||
    if request_params['url'] is None:
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # ignoring empty urls
 | 
					    # ignoring empty urls
 | 
				
			||||||
 | 
					    if request_params['url'] is None:
 | 
				
			||||||
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not request_params['url']:
 | 
					    if not request_params['url']:
 | 
				
			||||||
        return False
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # send request
 | 
					    # send request
 | 
				
			||||||
    response = send_http_request(engine, request_params, timeout_limit)
 | 
					    response = send_http_request(engine, request_params, timeout_limit)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # parse response
 | 
					    # parse the response
 | 
				
			||||||
    success = None
 | 
					    response.search_params = request_params
 | 
				
			||||||
    if response:
 | 
					    return engine.response(response)
 | 
				
			||||||
        # parse the response
 | 
					
 | 
				
			||||||
        response.search_params = request_params
 | 
					
 | 
				
			||||||
        try:
 | 
					def search_one_request_safe(engine_name, query, request_params, result_container, timeout_limit):
 | 
				
			||||||
            search_results = engine.response(response)
 | 
					    start_time = time()
 | 
				
			||||||
        except:
 | 
					    engine = engines[engine_name]
 | 
				
			||||||
            logger.exception('engine crash: {0}'.format(engine.name))
 | 
					
 | 
				
			||||||
            search_results = []
 | 
					    try:
 | 
				
			||||||
 | 
					        # send requests and parse the results
 | 
				
			||||||
 | 
					        search_results = search_one_request(engine, query, request_params, timeout_limit)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # add results
 | 
					        # add results
 | 
				
			||||||
        for result in search_results:
 | 
					        for result in search_results:
 | 
				
			||||||
            result['engine'] = engine.name
 | 
					            result['engine'] = engine_name
 | 
				
			||||||
 | 
					        result_container.extend(engine_name, search_results)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        result_container.extend(engine.name, search_results)
 | 
					        # update engine time when there is no exception
 | 
				
			||||||
 | 
					        with threading.RLock():
 | 
				
			||||||
 | 
					            engine.stats['engine_time'] += time() - start_time
 | 
				
			||||||
 | 
					            engine.stats['engine_time_count'] += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        success = True
 | 
					        return True
 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        success = False
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    with threading.RLock():
 | 
					    except Exception as e:
 | 
				
			||||||
        # update stats : total time
 | 
					        engine.stats['errors'] += 1
 | 
				
			||||||
        engine.stats['engine_time'] += time() - request_params['started']
 | 
					 | 
				
			||||||
        engine.stats['engine_time_count'] += 1
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return success
 | 
					        search_duration = time() - start_time
 | 
				
			||||||
 | 
					        requests_exception = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (issubclass(e.__class__, requests.exceptions.Timeout)):
 | 
				
			||||||
 | 
					            # requests timeout (connect or read)
 | 
				
			||||||
 | 
					            logger.error("engine {0} : HTTP requests timeout"
 | 
				
			||||||
 | 
					                         "(search duration : {1} s, timeout: {2} s) : {3}"
 | 
				
			||||||
 | 
					                         .format(engine_name, search_duration, timeout_limit, e.__class__.__name__))
 | 
				
			||||||
 | 
					            requests_exception = True
 | 
				
			||||||
 | 
					        if (issubclass(e.__class__, requests.exceptions.RequestException)):
 | 
				
			||||||
 | 
					            # other requests exception
 | 
				
			||||||
 | 
					            logger.exception("engine {0} : requests exception"
 | 
				
			||||||
 | 
					                             "(search duration : {1} s, timeout: {2} s) : {3}"
 | 
				
			||||||
 | 
					                             .format(engine_name, search_duration, timeout_limit, e))
 | 
				
			||||||
 | 
					            requests_exception = True
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            # others errors
 | 
				
			||||||
 | 
					            logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # update continuous_errors / suspend_end_time
 | 
				
			||||||
 | 
					        if requests_exception:
 | 
				
			||||||
 | 
					            with threading.RLock():
 | 
				
			||||||
 | 
					                engine.continuous_errors += 1
 | 
				
			||||||
 | 
					                engine.suspend_end_time = time() + min(60, engine.continuous_errors)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def search_multiple_requests(requests, result_container, timeout_limit):
 | 
					def search_multiple_requests(requests, result_container, timeout_limit):
 | 
				
			||||||
| 
						 | 
					@ -148,7 +157,7 @@ def search_multiple_requests(requests, result_container, timeout_limit):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for engine_name, query, request_params in requests:
 | 
					    for engine_name, query, request_params in requests:
 | 
				
			||||||
        th = threading.Thread(
 | 
					        th = threading.Thread(
 | 
				
			||||||
            target=search_one_request,
 | 
					            target=search_one_request_safe,
 | 
				
			||||||
            args=(engine_name, query, request_params, result_container, timeout_limit),
 | 
					            args=(engine_name, query, request_params, result_container, timeout_limit),
 | 
				
			||||||
            name=search_id,
 | 
					            name=search_id,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue