forked from zaclys/searxng
		
	Merge pull request #2132 from dalf/update_pr_1967
search.suspended_time settings: bug fixes
This commit is contained in:
		
						commit
						9d102fb08f
					
				
					 6 changed files with 63 additions and 30 deletions
				
			
		| 
						 | 
				
			
			@ -110,6 +110,13 @@ Global Settings
 | 
			
		|||
     default_lang: ""
 | 
			
		||||
     ban_time_on_fail: 5
 | 
			
		||||
     max_ban_time_on_fail: 120
 | 
			
		||||
     suspended_times:
 | 
			
		||||
       SearxEngineAccessDenied: 86400
 | 
			
		||||
       SearxEngineCaptcha: 86400
 | 
			
		||||
       SearxEngineTooManyRequests: 3600
 | 
			
		||||
       cf_SearxEngineCaptcha: 1296000
 | 
			
		||||
       cf_SearxEngineAccessDenied: 86400
 | 
			
		||||
       recaptcha_SearxEngineCaptcha: 604800
 | 
			
		||||
     formats:
 | 
			
		||||
       - html
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -159,6 +166,25 @@ Global Settings
 | 
			
		|||
``max_ban_time_on_fail``:
 | 
			
		||||
  Max ban time in seconds after engine errors.
 | 
			
		||||
 | 
			
		||||
``suspended_times``:
 | 
			
		||||
  Engine suspension time after error (in seconds; set to 0 to disable)
 | 
			
		||||
 | 
			
		||||
  ``SearxEngineAccessDenied``: 86400
 | 
			
		||||
    For error "Access denied" and "HTTP error [402, 403]"
 | 
			
		||||
 | 
			
		||||
  ``SearxEngineCaptcha``: 86400
 | 
			
		||||
    For error "CAPTCHA"
 | 
			
		||||
 | 
			
		||||
  ``SearxEngineTooManyRequests``: 3600
 | 
			
		||||
    For error "Too many request" and "HTTP error 429"
 | 
			
		||||
 | 
			
		||||
  Cloudflare CAPTCHA:
 | 
			
		||||
     - ``cf_SearxEngineCaptcha``: 1296000
 | 
			
		||||
     - ``cf_SearxEngineAccessDenied``: 86400
 | 
			
		||||
 | 
			
		||||
  Google CAPTCHA:
 | 
			
		||||
    - ``recaptcha_SearxEngineCaptcha``: 604800
 | 
			
		||||
 | 
			
		||||
``formats``:
 | 
			
		||||
  Result formats available from web, remove format to deny access (use lower
 | 
			
		||||
  case).
 | 
			
		||||
| 
						 | 
				
			
			@ -168,6 +194,7 @@ Global Settings
 | 
			
		|||
  - ``json``
 | 
			
		||||
  - ``rss``
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _settings server:
 | 
			
		||||
 | 
			
		||||
``server:``
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										8
									
								
								docs/src/searx.exceptions.rst
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								docs/src/searx.exceptions.rst
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,8 @@
 | 
			
		|||
.. _searx.exceptions:
 | 
			
		||||
 | 
			
		||||
==================
 | 
			
		||||
SearXNG Exceptions
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.exceptions
 | 
			
		||||
  :members:
 | 
			
		||||
| 
						 | 
				
			
			@ -62,8 +62,7 @@ sc_code = ''
 | 
			
		|||
def raise_captcha(resp):
 | 
			
		||||
 | 
			
		||||
    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
 | 
			
		||||
        # suspend CAPTCHA for 7 days
 | 
			
		||||
        raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600)
 | 
			
		||||
        raise SearxEngineCaptchaException()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_sc_code(headers):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,29 +1,19 @@
 | 
			
		|||
'''
 | 
			
		||||
searx is free software: you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU Affero General Public License as published by
 | 
			
		||||
the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
searx is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU Affero General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU Affero General Public License
 | 
			
		||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
 | 
			
		||||
 | 
			
		||||
(C) 2017- by Alexandre Flament, <alex@al-f.net>
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
# lint: pylint
 | 
			
		||||
"""Exception types raised by SearXNG modules.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from typing import Optional, Union
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SearxException(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
    """Base SearXNG exception."""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SearxParameterException(SearxException):
 | 
			
		||||
    """Raised when query miss a required paramater"""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, name, value):
 | 
			
		||||
        if value == '' or value is None:
 | 
			
		||||
            message = 'Empty ' + name + ' parameter'
 | 
			
		||||
| 
						 | 
				
			
			@ -70,26 +60,35 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
 | 
			
		|||
    """The website is blocking the access"""
 | 
			
		||||
 | 
			
		||||
    SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
 | 
			
		||||
    """This settings contains the default suspended time (default 86400 sec / 1
 | 
			
		||||
    day)."""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, suspended_time=None, message='Access denied'):
 | 
			
		||||
    def __init__(self, suspended_time: int = None, message: str = 'Access denied'):
 | 
			
		||||
        """Generic exception to raise when an engine denies access to the results.
 | 
			
		||||
 | 
			
		||||
        :param suspended_time: How long the engine is going to be suspended in
 | 
			
		||||
            second. Defaults to None.
 | 
			
		||||
        :type suspended_time: int, None
 | 
			
		||||
        :param message: Internal message.  Defaults to ``Access denied``
 | 
			
		||||
        :type message: str
 | 
			
		||||
        """
 | 
			
		||||
        suspended_time = suspended_time or self._get_default_suspended_time()
 | 
			
		||||
        super().__init__(message + ', suspended_time=' + str(suspended_time))
 | 
			
		||||
        self.suspended_time = suspended_time
 | 
			
		||||
        self.message = message
 | 
			
		||||
 | 
			
		||||
    def _get_default_suspended_time(self):
 | 
			
		||||
        from searx import get_setting
 | 
			
		||||
        from searx import get_setting  # pylint: disable=C0415
 | 
			
		||||
 | 
			
		||||
        return get_setting(self.SUSPEND_TIME_SETTING)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
 | 
			
		||||
    """The website has returned a CAPTCHA
 | 
			
		||||
 | 
			
		||||
    By default, searx stops sending requests to this engine for 1 day.
 | 
			
		||||
    """
 | 
			
		||||
    """The website has returned a CAPTCHA."""
 | 
			
		||||
 | 
			
		||||
    SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
 | 
			
		||||
    """This settings contains the default suspended time (default 86400 sec / 1
 | 
			
		||||
    day)."""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, suspended_time=None, message='CAPTCHA'):
 | 
			
		||||
        super().__init__(message=message, suspended_time=suspended_time)
 | 
			
		||||
| 
						 | 
				
			
			@ -102,6 +101,8 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
 | 
			
		|||
    """
 | 
			
		||||
 | 
			
		||||
    SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
 | 
			
		||||
    """This settings contains the default suspended time (default 3660 sec / 1
 | 
			
		||||
    hour)."""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, suspended_time=None, message='Too many request'):
 | 
			
		||||
        super().__init__(message=message, suspended_time=suspended_time)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -72,9 +72,7 @@ def raise_for_httperror(resp):
 | 
			
		|||
    if resp.status_code and resp.status_code >= 400:
 | 
			
		||||
        raise_for_captcha(resp)
 | 
			
		||||
        if resp.status_code in (402, 403):
 | 
			
		||||
            raise SearxEngineAccessDeniedException(
 | 
			
		||||
                message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24
 | 
			
		||||
            )
 | 
			
		||||
            raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code))
 | 
			
		||||
        if resp.status_code == 429:
 | 
			
		||||
            raise SearxEngineTooManyRequestsException()
 | 
			
		||||
        resp.raise_for_status()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,7 +45,7 @@ search:
 | 
			
		|||
  ban_time_on_fail: 5
 | 
			
		||||
  # max ban time in seconds after engine errors
 | 
			
		||||
  max_ban_time_on_fail: 120
 | 
			
		||||
  suspend_times:
 | 
			
		||||
  suspended_times:
 | 
			
		||||
    # Engine suspension time after error (in seconds; set to 0 to disable)
 | 
			
		||||
    # For error "Access denied" and "HTTP error [402, 403]"
 | 
			
		||||
    SearxEngineAccessDenied: 86400
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue