mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.
This commit is contained in:
		
							parent
							
								
									7fdfeca3a4
								
							
						
					
					
						commit
						15eef0ebdb
					
				
					 4 changed files with 133 additions and 38 deletions
				
			
		
							
								
								
									
										32
									
								
								searx/exceptions.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								searx/exceptions.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| ''' | ||||
| searx is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU Affero General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| searx is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU Affero General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU Affero General Public License | ||||
| along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| 
 | ||||
| (C) 2017- by Alexandre Flament, <alex@al-f.net> | ||||
| ''' | ||||
| 
 | ||||
| 
 | ||||
| class SearxException(Exception): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class SearxParameterException(SearxException): | ||||
| 
 | ||||
|     def __init__(self, name, value): | ||||
|         if value == '' or value is None: | ||||
|             message = 'Empty ' + name + ' parameter' | ||||
|         else: | ||||
|             message = 'Invalid value "' + value + '" for parameter ' + name | ||||
|         super(SearxParameterException, self).__init__(message) | ||||
|         self.parameter_name = name | ||||
|         self.parameter_value = value | ||||
|  | @ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery | |||
| from searx.results import ResultContainer | ||||
| from searx import logger | ||||
| from searx.plugins import plugins | ||||
| from searx.languages import language_codes | ||||
| from searx.exceptions import SearxParameterException | ||||
| 
 | ||||
| logger = logger.getChild('search') | ||||
| 
 | ||||
| number_of_searches = 0 | ||||
| 
 | ||||
| language_code_set = set(l[0].lower() for l in language_codes) | ||||
| language_code_set.add('all') | ||||
| 
 | ||||
| 
 | ||||
| def send_http_request(engine, request_params, start_time, timeout_limit): | ||||
|     # for page_load_time stats | ||||
|  | @ -182,33 +187,13 @@ def default_request_params(): | |||
| 
 | ||||
| 
 | ||||
| def get_search_query_from_webapp(preferences, form): | ||||
|     query = None | ||||
|     query_engines = [] | ||||
|     query_categories = [] | ||||
|     query_pageno = 1 | ||||
|     query_lang = 'all' | ||||
|     query_time_range = None | ||||
|     # no text for the query ? | ||||
|     if not form.get('q'): | ||||
|         raise SearxParameterException('q', '') | ||||
| 
 | ||||
|     # set blocked engines | ||||
|     disabled_engines = preferences.engines.get_disabled() | ||||
| 
 | ||||
|     # set specific language if set | ||||
|     query_lang = preferences.get_value('language') | ||||
| 
 | ||||
|     # safesearch | ||||
|     query_safesearch = preferences.get_value('safesearch') | ||||
| 
 | ||||
|     # TODO better exceptions | ||||
|     if not form.get('q'): | ||||
|         raise Exception('noquery') | ||||
| 
 | ||||
|     # set pagenumber | ||||
|     pageno_param = form.get('pageno', '1') | ||||
|     if not pageno_param.isdigit() or int(pageno_param) < 1: | ||||
|         pageno_param = 1 | ||||
| 
 | ||||
|     query_pageno = int(pageno_param) | ||||
| 
 | ||||
|     # parse query, if tags are set, which change | ||||
|     # the serch engine or search-language | ||||
|     raw_text_query = RawTextQuery(form['q'], disabled_engines) | ||||
|  | @ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form): | |||
|     # set query | ||||
|     query = raw_text_query.getSearchQuery() | ||||
| 
 | ||||
|     # get and check page number | ||||
|     pageno_param = form.get('pageno', '1') | ||||
|     if not pageno_param.isdigit() or int(pageno_param) < 1: | ||||
|         raise SearxParameterException('pageno', pageno_param) | ||||
|     query_pageno = int(pageno_param) | ||||
| 
 | ||||
|     # get language | ||||
|     # set specific language if set on request, query or preferences | ||||
|     # TODO support search with multible languages | ||||
|     if len(raw_text_query.languages): | ||||
|  | @ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form): | |||
|     else: | ||||
|         query_lang = preferences.get_value('language') | ||||
| 
 | ||||
|     # check language | ||||
|     if query_lang not in language_code_set: | ||||
|         raise SearxParameterException('language', query_lang) | ||||
| 
 | ||||
|     # get safesearch | ||||
|     if 'safesearch' in form: | ||||
|         query_safesearch = form.get('safesearch') | ||||
|         # first check safesearch | ||||
|         if not query_safesearch.isdigit(): | ||||
|             raise SearxParameterException('safesearch', query_safesearch) | ||||
|         query_safesearch = int(query_safesearch) | ||||
|     else: | ||||
|         query_safesearch = preferences.get_value('safesearch') | ||||
| 
 | ||||
|     # safesearch : second check | ||||
|     if query_safesearch < 0 or query_safesearch > 2: | ||||
|         raise SearxParameterException('safesearch', query_safesearch) | ||||
| 
 | ||||
|     # get time_range | ||||
|     query_time_range = form.get('time_range') | ||||
| 
 | ||||
|     # check time_range | ||||
|     if not(query_time_range is None)\ | ||||
|        and not (query_time_range in ['', 'day', 'week', 'month', 'year']): | ||||
|         raise SearxParameterException('time_range', query_time_range) | ||||
| 
 | ||||
|     # query_engines | ||||
|     query_engines = raw_text_query.engines | ||||
| 
 | ||||
|     # query_categories | ||||
|     query_categories = [] | ||||
| 
 | ||||
|     # if engines are calculated from query, | ||||
|     # set categories by using that informations | ||||
|     if query_engines and raw_text_query.specific: | ||||
|  |  | |||
|  | @ -11,6 +11,12 @@ | |||
|     <opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage> | ||||
|     <atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/> | ||||
|     <opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" /> | ||||
|     {% if error_message %} | ||||
|     <item> | ||||
|       <title>Error</title> | ||||
|       <description>{{ error_message|e }}</description> | ||||
|     </item> | ||||
|     {% endif %} | ||||
|     {% for r in results %} | ||||
|     <item> | ||||
|       <title>{{ r.title }}</title> | ||||
|  |  | |||
|  | @ -52,6 +52,7 @@ from flask import ( | |||
| from flask_babel import Babel, gettext, format_date, format_decimal | ||||
| from flask.json import jsonify | ||||
| from searx import settings, searx_dir, searx_debug | ||||
| from searx.exceptions import SearxException, SearxParameterException | ||||
| from searx.engines import ( | ||||
|     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines | ||||
| ) | ||||
|  | @ -400,6 +401,33 @@ def pre_request(): | |||
|             request.user_plugins.append(plugin) | ||||
| 
 | ||||
| 
 | ||||
| def index_error(output_format, error_message): | ||||
|     if output_format == 'json': | ||||
|         return Response(json.dumps({'error': error_message}), | ||||
|                         mimetype='application/json') | ||||
|     elif output_format == 'csv': | ||||
|         response = Response('', mimetype='application/csv') | ||||
|         cont_disp = 'attachment;Filename=searx.csv' | ||||
|         response.headers.add('Content-Disposition', cont_disp) | ||||
|         return response | ||||
|     elif output_format == 'rss': | ||||
|         response_rss = render( | ||||
|             'opensearch_response_rss.xml', | ||||
|             results=[], | ||||
|             q=request.form['q'] if 'q' in request.form else '', | ||||
|             number_of_results=0, | ||||
|             base_url=get_base_url(), | ||||
|             error_message=error_message | ||||
|         ) | ||||
|         return Response(response_rss, mimetype='text/xml') | ||||
|     else: | ||||
|         # html | ||||
|         request.errors.append(gettext('search error')) | ||||
|         return render( | ||||
|             'index.html', | ||||
|         ) | ||||
| 
 | ||||
| 
 | ||||
| @app.route('/search', methods=['GET', 'POST']) | ||||
| @app.route('/', methods=['GET', 'POST']) | ||||
| def index(): | ||||
|  | @ -408,10 +436,19 @@ def index(): | |||
|     Supported outputs: html, json, csv, rss. | ||||
|     """ | ||||
| 
 | ||||
|     # output_format | ||||
|     output_format = request.form.get('format', 'html') | ||||
|     if output_format not in ['html', 'csv', 'json', 'rss']: | ||||
|         output_format = 'html' | ||||
| 
 | ||||
|     # check if there is query | ||||
|     if request.form.get('q') is None: | ||||
|         return render( | ||||
|             'index.html', | ||||
|         ) | ||||
|         if output_format == 'html': | ||||
|             return render( | ||||
|                 'index.html', | ||||
|             ) | ||||
|         else: | ||||
|             return index_error(output_format, 'No query'), 400 | ||||
| 
 | ||||
|     # search | ||||
|     search_query = None | ||||
|  | @ -421,20 +458,24 @@ def index(): | |||
|         # search = Search(search_query) #  without plugins | ||||
|         search = SearchWithPlugins(search_query, request) | ||||
|         result_container = search.search() | ||||
|     except: | ||||
|         request.errors.append(gettext('search error')) | ||||
|     except Exception as e: | ||||
|         # log exception | ||||
|         logger.exception('search error') | ||||
|         return render( | ||||
|             'index.html', | ||||
|         ) | ||||
| 
 | ||||
|         # is it an invalid input parameter or something else ? | ||||
|         if (issubclass(e.__class__, SearxParameterException)): | ||||
|             return index_error(output_format, e.message), 400 | ||||
|         else: | ||||
|             return index_error(output_format, gettext('search error')), 500 | ||||
| 
 | ||||
|     # results | ||||
|     results = result_container.get_ordered_results() | ||||
|     number_of_results = result_container.results_number() | ||||
|     if number_of_results < result_container.results_length(): | ||||
|         number_of_results = 0 | ||||
| 
 | ||||
|     # UI | ||||
|     advanced_search = request.form.get('advanced_search', None) | ||||
|     output_format = request.form.get('format', 'html') | ||||
|     if output_format not in ['html', 'csv', 'json', 'rss']: | ||||
|         output_format = 'html' | ||||
| 
 | ||||
|     # output | ||||
|     for result in results: | ||||
|  | @ -470,10 +511,6 @@ def index(): | |||
|                 else: | ||||
|                     result['publishedDate'] = format_date(result['publishedDate']) | ||||
| 
 | ||||
|     number_of_results = result_container.results_number() | ||||
|     if number_of_results < result_container.results_length(): | ||||
|         number_of_results = 0 | ||||
| 
 | ||||
|     if output_format == 'json': | ||||
|         return Response(json.dumps({'query': search_query.query, | ||||
|                                     'number_of_results': number_of_results, | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Alexandre Flament
						Alexandre Flament