From 9f395dc57d9177c2856800bd4c9d9dc9e843c026 Mon Sep 17 00:00:00 2001 From: capric98 <42015599+capric98@users.noreply.github.com> Date: Thu, 7 Apr 2022 00:36:01 +0800 Subject: [PATCH] add log info --- searx/plugins/limiter.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py index 188a03eaf..59ff7e79a 100644 --- a/searx/plugins/limiter.py +++ b/searx/plugins/limiter.py @@ -25,6 +25,10 @@ default_on = False preference_section = 'service' +c_burst_limit = 15 +c_10min_limit = 150 + + re_bot = re.compile( r'(' + r'[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp' @@ -36,6 +40,11 @@ re_bot = re.compile( ) +if "logger" not in locals(): + import logging + logger = logging.getLogger("searx.plugins.limiter") + + def cdn_reported_ip(headers: dict) -> str: # some commonly used CDNs: cdn_keys = [ @@ -62,6 +71,7 @@ def is_accepted_request(inc_get_counter) -> bool: if request.path == '/image_proxy': if re_bot.match(user_agent): + logger.info(f"reject [{client_ip}]: client may be a bot described by UA=\"{user_agent}\"") return False return True @@ -69,13 +79,16 @@ def is_accepted_request(inc_get_counter) -> bool: c_burst = inc_get_counter(interval=20, keys=[b'IP limit, burst', client_ip]) c_10min = inc_get_counter(interval=600, keys=[b'IP limit, 10 minutes', client_ip]) - if c_burst > 15 or c_10min > 150: + if c_burst > c_burst_limit or c_10min > c_10min_limit: + logger.info(f"reject [{client_ip}]: c_burst({c_burst})>{c_burst_limit} or c_10min({c_10min})>{c_10min_limit}") return False if re_bot.match(user_agent): + logger.info(f"reject [{client_ip}]: client may be a bot described by UA=\"{user_agent}\"") return False if request.headers.get('Accept-Language', '').strip(): + logger.info(f"reject [{client_ip}]: empty Accept-Language") return False # If SearXNG is behind Cloudflare, all requests will get 429 because @@ -89,6 +102,7 @@ def is_accepted_request(inc_get_counter) -> bool: # return False if 'text/html' not in request.accept_mimetypes: + logger.info(f"reject [{client_ip}]: non-html request to /search") return False # IDK but maybe api limit should based on path not format?