forked from zaclys/searxng
		
	[enh] limiter plugin
can replace filtron: * rate limite the number of request per IP and per (IP, User-Agent) * block some bots use Redis data stored in Redis never contains the IP addresses, only HMAC using the secret_key Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									c82b9c68d2
								
							
						
					
					
						commit
						f79b0fce06
					
				
					 4 changed files with 140 additions and 0 deletions
				
			
		
							
								
								
									
										9
									
								
								docs/src/searx.plugins.limiter.rst
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								docs/src/searx.plugins.limiter.rst
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,9 @@ | |||
| .. _limiter plugin: | ||||
| 
 | ||||
| ============== | ||||
| Limiter Plugin | ||||
| ============== | ||||
| 
 | ||||
| .. automodule:: searx.plugins.limiter | ||||
|   :members: | ||||
| 
 | ||||
							
								
								
									
										129
									
								
								searx/plugins/limiter.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								searx/plugins/limiter.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,129 @@ | |||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| # lint: pylint | ||||
| # pyright: basic | ||||
| """Some bot protection / rate limitation | ||||
| 
 | ||||
| Enable the plugin in ``settings.yml``: | ||||
| 
 | ||||
| - ``server.limiter: true`` | ||||
| - ``redis.url: ...`` check the value, see :ref:`settings redis` | ||||
| """ | ||||
| 
 | ||||
| import hmac | ||||
| import re | ||||
| from flask import request | ||||
| 
 | ||||
| from searx.shared import redisdb | ||||
| 
 | ||||
| name = "Request limiter" | ||||
| description = "Limit the number of request" | ||||
| default_on = False | ||||
| preference_section = 'service' | ||||
| 
 | ||||
| 
 | ||||
| re_bot = re.compile( | ||||
|     r'(' | ||||
|     + r'[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp' | ||||
|     + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy' | ||||
|     + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot' | ||||
|     + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot' | ||||
|     + r'|ZmEu|BLEXBot|bitlybot' | ||||
|     + r')' | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| def is_accepted_request(inc_get_counter) -> bool: | ||||
|     # pylint: disable=too-many-return-statements | ||||
|     user_agent = request.headers.get('User-Agent', '') | ||||
|     x_forwarded_for = request.headers.get('X-Forwarded-For', '') | ||||
| 
 | ||||
|     if request.path == '/image_proxy': | ||||
|         if re_bot.match(user_agent): | ||||
|             return False | ||||
|         return True | ||||
| 
 | ||||
|     c = inc_get_counter(interval=25, keys=[b'IP limit, all paths', x_forwarded_for]) | ||||
|     if c > 30: | ||||
|         return False | ||||
| 
 | ||||
|     c = inc_get_counter(interval=60, keys=[b'useragent limit, all paths', x_forwarded_for, user_agent]) | ||||
|     if c > 30: | ||||
|         return False | ||||
| 
 | ||||
|     if request.path in ('/', '/search') and ('q' in request.args or 'q' in request.form): | ||||
|         if re_bot.match(user_agent): | ||||
|             return False | ||||
| 
 | ||||
|         if 'Accept-Language' not in request.headers: | ||||
|             return False | ||||
| 
 | ||||
|         if request.headers.get('Connection') == 'close': | ||||
|             return False | ||||
| 
 | ||||
|         accept_encoding_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')] | ||||
|         if 'gzip' not in accept_encoding_list or 'deflate' not in accept_encoding_list: | ||||
|             return False | ||||
| 
 | ||||
|         if 'text/html' not in request.accept_mimetypes: | ||||
|             return False | ||||
| 
 | ||||
|         if request.args.get('format', 'html') != 'html': | ||||
|             c = inc_get_counter(interval=3600, keys=[b'API limit', x_forwarded_for]) | ||||
|             if c > 4: | ||||
|                 return False | ||||
|     return True | ||||
| 
 | ||||
| 
 | ||||
| def create_inc_get_counter(redis_client, secret_key_bytes): | ||||
|     lua_script = """ | ||||
|     local slidingWindow = KEYS[1] | ||||
|     local key = KEYS[2] | ||||
|     local now = tonumber(redis.call('TIME')[1]) | ||||
|     local id = redis.call('INCR', 'counter') | ||||
|     if (id > 2^46) | ||||
|     then | ||||
|         redis.call('SET', 'count', 0) | ||||
|     end | ||||
|     redis.call('ZREMRANGEBYSCORE', key, 0, now - slidingWindow) | ||||
|     redis.call('ZADD', key, now, id) | ||||
|     local result = redis.call('ZCOUNT', key, 0, now+1) | ||||
|     redis.call('EXPIRE', key, slidingWindow) | ||||
|     return result | ||||
|     """ | ||||
|     script_sha = redis_client.script_load(lua_script) | ||||
| 
 | ||||
|     def inc_get_counter(interval, keys): | ||||
|         m = hmac.new(secret_key_bytes, digestmod='sha256') | ||||
|         for k in keys: | ||||
|             m.update(bytes(str(k), encoding='utf-8') or b'') | ||||
|             m.update(b"\0") | ||||
|         key = m.digest() | ||||
|         return redis_client.evalsha(script_sha, 2, interval, key) | ||||
| 
 | ||||
|     return inc_get_counter | ||||
| 
 | ||||
| 
 | ||||
| def create_pre_request(get_aggregation_count): | ||||
|     def pre_request(): | ||||
|         if not is_accepted_request(get_aggregation_count): | ||||
|             return '', 429 | ||||
|         return None | ||||
| 
 | ||||
|     return pre_request | ||||
| 
 | ||||
| 
 | ||||
| def init(app, settings): | ||||
| 
 | ||||
|     if not settings['server']['limiter']: | ||||
|         return False | ||||
| 
 | ||||
|     logger.debug("init limiter DB")  # pylint: disable=undefined-variable | ||||
|     if not redisdb.init(): | ||||
|         logger.error("init limiter DB failed!!!")  # pylint: disable=undefined-variable | ||||
|         return False | ||||
| 
 | ||||
|     redis_client = redisdb.client() | ||||
|     secret_key_bytes = bytes(settings['server']['secret_key'], encoding='utf-8') | ||||
|     inc_get_counter = create_inc_get_counter(redis_client, secret_key_bytes) | ||||
|     app.before_request(create_pre_request(inc_get_counter)) | ||||
|     return True | ||||
|  | @ -43,6 +43,7 @@ server: | |||
|   port: 8888 | ||||
|   bind_address: "127.0.0.1" | ||||
|   base_url: false  # Possible values: false or "https://example.org/location". | ||||
|   limiter: false  # rate limit the number of request on the instance, block some bots | ||||
| 
 | ||||
|   # If your instance owns a /etc/searxng/settings.yml file, then set the following | ||||
|   # values there. | ||||
|  |  | |||
|  | @ -163,6 +163,7 @@ SCHEMA = { | |||
|     'server': { | ||||
|         'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'), | ||||
|         'bind_address': SettingsValue(str, '127.0.0.1', 'SEARXNG_BIND_ADDRESS'), | ||||
|         'limiter': SettingsValue(bool, False), | ||||
|         'secret_key': SettingsValue(str, environ_name='SEARXNG_SECRET'), | ||||
|         'base_url': SettingsValue((False, str), False), | ||||
|         'image_proxy': SettingsValue(bool, False), | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Alexandre Flament
						Alexandre Flament