mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[mod] botdetection: HTTP Fetch Metadata Request Headers
HTTP Fetch Metadata Request Headers [1][2] are used to detect bot requests. Bots with invalid *Fetch Metadata* will be redirected to the intro (`index`) page. [1] https://www.w3.org/TR/fetch-metadata/ [2] https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									543ab92fde
								
							
						
					
					
						commit
						1c9b28968d
					
				
					 4 changed files with 73 additions and 3 deletions
				
			
		|  | @ -53,6 +53,9 @@ Probe HTTP headers | |||
| .. automodule:: searx.botdetection.http_user_agent | ||||
|   :members: | ||||
| 
 | ||||
| .. automodule:: searx.botdetection.sec_fetch | ||||
|   :members: | ||||
| 
 | ||||
| .. _botdetection config: | ||||
| 
 | ||||
| Config | ||||
|  |  | |||
|  | @ -31,6 +31,9 @@ def dump_request(request: flask.Request): | |||
|         + " || Content-Length: %s" % request.headers.get('Content-Length') | ||||
|         + " || Connection: %s" % request.headers.get('Connection') | ||||
|         + " || User-Agent: %s" % request.headers.get('User-Agent') | ||||
|         + " || Sec-Fetch-Site: %s" % request.headers.get('Sec-Fetch-Site') | ||||
|         + " || Sec-Fetch-Mode: %s" % request.headers.get('Sec-Fetch-Mode') | ||||
|         + " || Sec-Fetch-Dest: %s" % request.headers.get('Sec-Fetch-Dest') | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										59
									
								
								searx/botdetection/http_sec_fetch.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								searx/botdetection/http_sec_fetch.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,59 @@ | |||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| Method ``http_sec_fetch`` | ||||
| ------------------------- | ||||
| 
 | ||||
| The ``http_sec_fetch`` method protect resources from web attacks with `Fetch | ||||
| Metadata`_.  A request is filtered out in case of: | ||||
| 
 | ||||
| - http header Sec-Fetch-Mode_ is invalid | ||||
| - http header Sec-Fetch-Dest_ is invalid | ||||
| 
 | ||||
| .. _Fetch Metadata: | ||||
|    https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header | ||||
| 
 | ||||
| .. Sec-Fetch-Dest: | ||||
|    https://developer.mozilla.org/en-US/docs/Web/API/Request/destination | ||||
| 
 | ||||
| .. Sec-Fetch-Mode: | ||||
|    https://developer.mozilla.org/en-US/docs/Web/API/Request/mode | ||||
| 
 | ||||
| 
 | ||||
| """ | ||||
| # pylint: disable=unused-argument | ||||
| 
 | ||||
| from __future__ import annotations | ||||
| from ipaddress import ( | ||||
|     IPv4Network, | ||||
|     IPv6Network, | ||||
| ) | ||||
| 
 | ||||
| import flask | ||||
| import werkzeug | ||||
| 
 | ||||
| from . import config | ||||
| from ._helpers import logger | ||||
| 
 | ||||
| 
 | ||||
| def filter_request( | ||||
|     network: IPv4Network | IPv6Network, | ||||
|     request: flask.Request, | ||||
|     cfg: config.Config, | ||||
| ) -> werkzeug.Response | None: | ||||
| 
 | ||||
|     val = request.headers.get("Sec-Fetch-Mode", "") | ||||
|     if val != "navigate": | ||||
|         logger.debug("invalid Sec-Fetch-Mode '%s'", val) | ||||
|         return flask.redirect(flask.url_for('index'), code=302) | ||||
| 
 | ||||
|     val = request.headers.get("Sec-Fetch-Site", "") | ||||
|     if val not in ('same-origin', 'same-site', 'none'): | ||||
|         logger.debug("invalid Sec-Fetch-Site '%s'", val) | ||||
|         flask.redirect(flask.url_for('index'), code=302) | ||||
| 
 | ||||
|     val = request.headers.get("Sec-Fetch-Dest", "") | ||||
|     if val != "document": | ||||
|         logger.debug("invalid Sec-Fetch-Dest '%s'", val) | ||||
|         flask.redirect(flask.url_for('index'), code=302) | ||||
| 
 | ||||
|     return None | ||||
|  | @ -111,6 +111,7 @@ from searx.botdetection import ( | |||
|     http_accept_encoding, | ||||
|     http_accept_language, | ||||
|     http_user_agent, | ||||
|     http_sec_fetch, | ||||
|     ip_limit, | ||||
|     ip_lists, | ||||
|     get_network, | ||||
|  | @ -177,16 +178,17 @@ def filter_request(request: flask.Request) -> werkzeug.Response | None: | |||
|         logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg) | ||||
|         return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429)) | ||||
| 
 | ||||
|     # methods applied on / | ||||
|     # methods applied on all requests | ||||
| 
 | ||||
|     for func in [ | ||||
|         http_user_agent, | ||||
|     ]: | ||||
|         val = func.filter_request(network, request, cfg) | ||||
|         if val is not None: | ||||
|             logger.debug(f"NOT OK ({func.__name__}): {network}: %s", dump_request(flask.request)) | ||||
|             return val | ||||
| 
 | ||||
|     # methods applied on /search | ||||
|     # methods applied on /search requests | ||||
| 
 | ||||
|     if request.path == '/search': | ||||
| 
 | ||||
|  | @ -195,12 +197,15 @@ def filter_request(request: flask.Request) -> werkzeug.Response | None: | |||
|             http_accept_encoding, | ||||
|             http_accept_language, | ||||
|             http_user_agent, | ||||
|             http_sec_fetch, | ||||
|             ip_limit, | ||||
|         ]: | ||||
|             val = func.filter_request(network, request, cfg) | ||||
|             if val is not None: | ||||
|                 logger.debug(f"NOT OK ({func.__name__}): {network}: %s", dump_request(flask.request)) | ||||
|                 return val | ||||
|     logger.debug(f"OK {network}: %s", dump_request(flask.request)) | ||||
|         logger.debug(f"OK: {network}: %s", dump_request(flask.request)) | ||||
| 
 | ||||
|     return None | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser