mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	
						commit
						38e0b9360b
					
				
					 4 changed files with 66 additions and 30 deletions
				
			
		| 
						 | 
				
			
			@ -5,6 +5,7 @@
 | 
			
		|||
import asyncio
 | 
			
		||||
import threading
 | 
			
		||||
import concurrent.futures
 | 
			
		||||
from types import MethodType
 | 
			
		||||
from timeit import default_timer
 | 
			
		||||
 | 
			
		||||
import httpx
 | 
			
		||||
| 
						 | 
				
			
			@ -161,19 +162,32 @@ def patch(url, data=None, **kwargs):
 | 
			
		|||
def delete(url, **kwargs):
 | 
			
		||||
    return request('delete', url, **kwargs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
 | 
			
		||||
    try:
 | 
			
		||||
        async with network.stream(method, url, **kwargs) as response:
 | 
			
		||||
            queue.put(response)
 | 
			
		||||
            async for chunk in response.aiter_bytes(65536):
 | 
			
		||||
            # aiter_raw: access the raw bytes on the response without applying any HTTP content decoding
 | 
			
		||||
            # https://www.python-httpx.org/quickstart/#streaming-responses
 | 
			
		||||
            async for chunk in response.aiter_raw(65536):
 | 
			
		||||
                if len(chunk) > 0:
 | 
			
		||||
                    queue.put(chunk)
 | 
			
		||||
    except httpx.ResponseClosed as e:
 | 
			
		||||
        # the response was closed
 | 
			
		||||
        pass
 | 
			
		||||
    except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e:
 | 
			
		||||
        queue.put(e)
 | 
			
		||||
    finally:
 | 
			
		||||
        queue.put(None)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _close_response_method(self):
 | 
			
		||||
    asyncio.run_coroutine_threadsafe(
 | 
			
		||||
        self.aclose(),
 | 
			
		||||
        get_loop()
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def stream(method, url, **kwargs):
 | 
			
		||||
    """Replace httpx.stream.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -191,10 +205,19 @@ def stream(method, url, **kwargs):
 | 
			
		|||
        stream_chunk_to_queue(get_network(), queue, method, url, **kwargs),
 | 
			
		||||
        get_loop()
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # yield response
 | 
			
		||||
    response = queue.get()
 | 
			
		||||
    if isinstance(response, Exception):
 | 
			
		||||
        raise response
 | 
			
		||||
    response.close = MethodType(_close_response_method, response)
 | 
			
		||||
    yield response
 | 
			
		||||
 | 
			
		||||
    # yield chunks
 | 
			
		||||
    chunk_or_exception = queue.get()
 | 
			
		||||
    while chunk_or_exception is not None:
 | 
			
		||||
        if isinstance(chunk_or_exception, Exception):
 | 
			
		||||
            raise chunk_or_exception
 | 
			
		||||
        yield chunk_or_exception
 | 
			
		||||
        chunk_or_exception = queue.get()
 | 
			
		||||
    return future.result()
 | 
			
		||||
    future.result()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -289,6 +289,14 @@ def initialize(settings_engines=None, settings_outgoing=None):
 | 
			
		|||
        if isinstance(network, str):
 | 
			
		||||
            NETWORKS[engine_name] = NETWORKS[network]
 | 
			
		||||
 | 
			
		||||
    # the /image_proxy endpoint has a dedicated network.
 | 
			
		||||
    # same parameters than the default network, but HTTP/2 is disabled.
 | 
			
		||||
    # It decreases the CPU load average, and the total time is more or less the same
 | 
			
		||||
    if 'image_proxy' not in NETWORKS:
 | 
			
		||||
        image_proxy_params = default_params.copy()
 | 
			
		||||
        image_proxy_params['enable_http2'] = False
 | 
			
		||||
        NETWORKS['image_proxy'] = new_network(image_proxy_params)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@atexit.register
 | 
			
		||||
def done():
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -262,11 +262,7 @@ def dict_subset(d, properties):
 | 
			
		|||
        >>> >> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'D'])
 | 
			
		||||
        {'A': 'a'}
 | 
			
		||||
    """
 | 
			
		||||
    result = {}
 | 
			
		||||
    for k in properties:
 | 
			
		||||
        if k in d:
 | 
			
		||||
            result[k] = d[k]
 | 
			
		||||
    return result
 | 
			
		||||
    return {k: d[k] for k in properties if k in d}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_torrent_size(filesize, filesize_multiplier):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -108,7 +108,7 @@ from searx.autocomplete import search_autocomplete, backends as autocomplete_bac
 | 
			
		|||
from searx.languages import language_codes as languages
 | 
			
		||||
from searx.locales import LOCALE_NAMES, UI_LOCALE_CODES, RTL_LOCALES
 | 
			
		||||
from searx.search import SearchWithPlugins, initialize as search_initialize
 | 
			
		||||
from searx.network import stream as http_stream
 | 
			
		||||
from searx.network import stream as http_stream, set_context_network_name
 | 
			
		||||
from searx.search.checker import get_result as checker_get_result
 | 
			
		||||
from searx.settings_loader import get_default_settings_path
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1065,7 +1065,7 @@ def _is_selected_language_supported(engine, preferences):  # pylint: disable=red
 | 
			
		|||
 | 
			
		||||
@app.route('/image_proxy', methods=['GET'])
 | 
			
		||||
def image_proxy():
 | 
			
		||||
    # pylint: disable=too-many-return-statements
 | 
			
		||||
    # pylint: disable=too-many-return-statements, too-many-branches
 | 
			
		||||
 | 
			
		||||
    url = request.args.get('url')
 | 
			
		||||
    if not url:
 | 
			
		||||
| 
						 | 
				
			
			@ -1076,17 +1076,21 @@ def image_proxy():
 | 
			
		|||
        return '', 400
 | 
			
		||||
 | 
			
		||||
    maximum_size = 5 * 1024 * 1024
 | 
			
		||||
 | 
			
		||||
    forward_resp = False
 | 
			
		||||
    resp = None
 | 
			
		||||
    try:
 | 
			
		||||
        headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
 | 
			
		||||
        headers['User-Agent'] = gen_useragent()
 | 
			
		||||
        request_headers = {
 | 
			
		||||
            'User-Agent': gen_useragent(),
 | 
			
		||||
            'Accept': 'image/webp,*/*',
 | 
			
		||||
            'Accept-Encoding': 'gzip, deflate',
 | 
			
		||||
            'Sec-GPC': '1',
 | 
			
		||||
            'DNT': '1',
 | 
			
		||||
        }
 | 
			
		||||
        set_context_network_name('image_proxy')
 | 
			
		||||
        stream = http_stream(
 | 
			
		||||
            method = 'GET',
 | 
			
		||||
            url = url,
 | 
			
		||||
            headers = headers,
 | 
			
		||||
            timeout = settings['outgoing']['request_timeout'],
 | 
			
		||||
            allow_redirects = True,
 | 
			
		||||
            max_redirects = 20
 | 
			
		||||
            headers = request_headers
 | 
			
		||||
        )
 | 
			
		||||
        resp = next(stream)
 | 
			
		||||
        content_length = resp.headers.get('Content-Length')
 | 
			
		||||
| 
						 | 
				
			
			@ -1095,32 +1099,37 @@ def image_proxy():
 | 
			
		|||
            and int(content_length) > maximum_size ):
 | 
			
		||||
            return 'Max size', 400
 | 
			
		||||
 | 
			
		||||
        if resp.status_code == 304:
 | 
			
		||||
            return '', resp.status_code
 | 
			
		||||
 | 
			
		||||
        if resp.status_code != 200:
 | 
			
		||||
            logger.debug(
 | 
			
		||||
                'image-proxy: wrong response code: {0}'.format(
 | 
			
		||||
                    resp.status_code))
 | 
			
		||||
            logger.debug('image-proxy: wrong response code: %i', resp.status_code)
 | 
			
		||||
            if resp.status_code >= 400:
 | 
			
		||||
                return '', resp.status_code
 | 
			
		||||
            return '', 400
 | 
			
		||||
 | 
			
		||||
        if not resp.headers.get('content-type', '').startswith('image/'):
 | 
			
		||||
            logger.debug(
 | 
			
		||||
                'image-proxy: wrong content-type: {0}'.format(
 | 
			
		||||
                    resp.headers.get('content-type')))
 | 
			
		||||
        if not resp.headers.get('Content-Type', '').startswith('image/'):
 | 
			
		||||
            logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', ''))
 | 
			
		||||
            return '', 400
 | 
			
		||||
 | 
			
		||||
        forward_resp = True
 | 
			
		||||
    except httpx.HTTPError:
 | 
			
		||||
        logger.exception('HTTP error')
 | 
			
		||||
        return '', 400
 | 
			
		||||
    finally:
 | 
			
		||||
        if resp and not forward_resp:
 | 
			
		||||
            # the code is about to return an HTTP 400 error to the browser
 | 
			
		||||
            # we make sure to close the response between searxng and the HTTP server
 | 
			
		||||
            try:
 | 
			
		||||
                resp.close()
 | 
			
		||||
            except httpx.HTTPError:
 | 
			
		||||
                logger.exception('HTTP error on closing')
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        headers = dict_subset(
 | 
			
		||||
            resp.headers,
 | 
			
		||||
            {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}
 | 
			
		||||
            {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        total_length = 0
 | 
			
		||||
 | 
			
		||||
        def forward_chunk():
 | 
			
		||||
            nonlocal total_length
 | 
			
		||||
            total_length = 0
 | 
			
		||||
            for chunk in stream:
 | 
			
		||||
                total_length += len(chunk)
 | 
			
		||||
                if total_length > maximum_size:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue