mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[enh] limiter plugin
can replace filtron: * rate limite the number of request per IP and per (IP, User-Agent) * block some bots use Redis data stored in Redis never contains the IP addresses, only HMAC using the secret_key Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									c82b9c68d2
								
							
						
					
					
						commit
						f79b0fce06
					
				
					 4 changed files with 140 additions and 0 deletions
				
			
		
							
								
								
									
										9
									
								
								docs/src/searx.plugins.limiter.rst
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								docs/src/searx.plugins.limiter.rst
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,9 @@ | ||||||
|  | .. _limiter plugin: | ||||||
|  | 
 | ||||||
|  | ============== | ||||||
|  | Limiter Plugin | ||||||
|  | ============== | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.plugins.limiter | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
							
								
								
									
										129
									
								
								searx/plugins/limiter.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								searx/plugins/limiter.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,129 @@ | ||||||
|  | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|  | # lint: pylint | ||||||
|  | # pyright: basic | ||||||
|  | """Some bot protection / rate limitation | ||||||
|  | 
 | ||||||
|  | Enable the plugin in ``settings.yml``: | ||||||
|  | 
 | ||||||
|  | - ``server.limiter: true`` | ||||||
|  | - ``redis.url: ...`` check the value, see :ref:`settings redis` | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | import hmac | ||||||
|  | import re | ||||||
|  | from flask import request | ||||||
|  | 
 | ||||||
|  | from searx.shared import redisdb | ||||||
|  | 
 | ||||||
|  | name = "Request limiter" | ||||||
|  | description = "Limit the number of request" | ||||||
|  | default_on = False | ||||||
|  | preference_section = 'service' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | re_bot = re.compile( | ||||||
|  |     r'(' | ||||||
|  |     + r'[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp' | ||||||
|  |     + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy' | ||||||
|  |     + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot' | ||||||
|  |     + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot' | ||||||
|  |     + r'|ZmEu|BLEXBot|bitlybot' | ||||||
|  |     + r')' | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def is_accepted_request(inc_get_counter) -> bool: | ||||||
|  |     # pylint: disable=too-many-return-statements | ||||||
|  |     user_agent = request.headers.get('User-Agent', '') | ||||||
|  |     x_forwarded_for = request.headers.get('X-Forwarded-For', '') | ||||||
|  | 
 | ||||||
|  |     if request.path == '/image_proxy': | ||||||
|  |         if re_bot.match(user_agent): | ||||||
|  |             return False | ||||||
|  |         return True | ||||||
|  | 
 | ||||||
|  |     c = inc_get_counter(interval=25, keys=[b'IP limit, all paths', x_forwarded_for]) | ||||||
|  |     if c > 30: | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  |     c = inc_get_counter(interval=60, keys=[b'useragent limit, all paths', x_forwarded_for, user_agent]) | ||||||
|  |     if c > 30: | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  |     if request.path in ('/', '/search') and ('q' in request.args or 'q' in request.form): | ||||||
|  |         if re_bot.match(user_agent): | ||||||
|  |             return False | ||||||
|  | 
 | ||||||
|  |         if 'Accept-Language' not in request.headers: | ||||||
|  |             return False | ||||||
|  | 
 | ||||||
|  |         if request.headers.get('Connection') == 'close': | ||||||
|  |             return False | ||||||
|  | 
 | ||||||
|  |         accept_encoding_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')] | ||||||
|  |         if 'gzip' not in accept_encoding_list or 'deflate' not in accept_encoding_list: | ||||||
|  |             return False | ||||||
|  | 
 | ||||||
|  |         if 'text/html' not in request.accept_mimetypes: | ||||||
|  |             return False | ||||||
|  | 
 | ||||||
|  |         if request.args.get('format', 'html') != 'html': | ||||||
|  |             c = inc_get_counter(interval=3600, keys=[b'API limit', x_forwarded_for]) | ||||||
|  |             if c > 4: | ||||||
|  |                 return False | ||||||
|  |     return True | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def create_inc_get_counter(redis_client, secret_key_bytes): | ||||||
|  |     lua_script = """ | ||||||
|  |     local slidingWindow = KEYS[1] | ||||||
|  |     local key = KEYS[2] | ||||||
|  |     local now = tonumber(redis.call('TIME')[1]) | ||||||
|  |     local id = redis.call('INCR', 'counter') | ||||||
|  |     if (id > 2^46) | ||||||
|  |     then | ||||||
|  |         redis.call('SET', 'count', 0) | ||||||
|  |     end | ||||||
|  |     redis.call('ZREMRANGEBYSCORE', key, 0, now - slidingWindow) | ||||||
|  |     redis.call('ZADD', key, now, id) | ||||||
|  |     local result = redis.call('ZCOUNT', key, 0, now+1) | ||||||
|  |     redis.call('EXPIRE', key, slidingWindow) | ||||||
|  |     return result | ||||||
|  |     """ | ||||||
|  |     script_sha = redis_client.script_load(lua_script) | ||||||
|  | 
 | ||||||
|  |     def inc_get_counter(interval, keys): | ||||||
|  |         m = hmac.new(secret_key_bytes, digestmod='sha256') | ||||||
|  |         for k in keys: | ||||||
|  |             m.update(bytes(str(k), encoding='utf-8') or b'') | ||||||
|  |             m.update(b"\0") | ||||||
|  |         key = m.digest() | ||||||
|  |         return redis_client.evalsha(script_sha, 2, interval, key) | ||||||
|  | 
 | ||||||
|  |     return inc_get_counter | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def create_pre_request(get_aggregation_count): | ||||||
|  |     def pre_request(): | ||||||
|  |         if not is_accepted_request(get_aggregation_count): | ||||||
|  |             return '', 429 | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|  |     return pre_request | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def init(app, settings): | ||||||
|  | 
 | ||||||
|  |     if not settings['server']['limiter']: | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  |     logger.debug("init limiter DB")  # pylint: disable=undefined-variable | ||||||
|  |     if not redisdb.init(): | ||||||
|  |         logger.error("init limiter DB failed!!!")  # pylint: disable=undefined-variable | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  |     redis_client = redisdb.client() | ||||||
|  |     secret_key_bytes = bytes(settings['server']['secret_key'], encoding='utf-8') | ||||||
|  |     inc_get_counter = create_inc_get_counter(redis_client, secret_key_bytes) | ||||||
|  |     app.before_request(create_pre_request(inc_get_counter)) | ||||||
|  |     return True | ||||||
|  | @ -43,6 +43,7 @@ server: | ||||||
|   port: 8888 |   port: 8888 | ||||||
|   bind_address: "127.0.0.1" |   bind_address: "127.0.0.1" | ||||||
|   base_url: false  # Possible values: false or "https://example.org/location". |   base_url: false  # Possible values: false or "https://example.org/location". | ||||||
|  |   limiter: false  # rate limit the number of request on the instance, block some bots | ||||||
| 
 | 
 | ||||||
|   # If your instance owns a /etc/searxng/settings.yml file, then set the following |   # If your instance owns a /etc/searxng/settings.yml file, then set the following | ||||||
|   # values there. |   # values there. | ||||||
|  |  | ||||||
|  | @ -163,6 +163,7 @@ SCHEMA = { | ||||||
|     'server': { |     'server': { | ||||||
|         'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'), |         'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'), | ||||||
|         'bind_address': SettingsValue(str, '127.0.0.1', 'SEARXNG_BIND_ADDRESS'), |         'bind_address': SettingsValue(str, '127.0.0.1', 'SEARXNG_BIND_ADDRESS'), | ||||||
|  |         'limiter': SettingsValue(bool, False), | ||||||
|         'secret_key': SettingsValue(str, environ_name='SEARXNG_SECRET'), |         'secret_key': SettingsValue(str, environ_name='SEARXNG_SECRET'), | ||||||
|         'base_url': SettingsValue((False, str), False), |         'base_url': SettingsValue((False, str), False), | ||||||
|         'image_proxy': SettingsValue(bool, False), |         'image_proxy': SettingsValue(bool, False), | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Alexandre Flament
						Alexandre Flament