mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	New engine: Elasticsearch
This commit is contained in:
		
							parent
							
								
									aa6eaf603f
								
							
						
					
					
						commit
						43e697681e
					
				
					 3 changed files with 158 additions and 1 deletions
				
			
		
							
								
								
									
										142
									
								
								searx/engines/elasticsearch.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								searx/engines/elasticsearch.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,142 @@ | ||||||
|  | from json import loads, dumps | ||||||
|  | from lxml import html | ||||||
|  | from urllib.parse import quote, urljoin | ||||||
|  | from requests.auth import HTTPBasicAuth | ||||||
|  | from searx.utils import extract_text, get_torrent_size | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | base_url = 'http://localhost:9200' | ||||||
|  | username = '' | ||||||
|  | password = '' | ||||||
|  | index = '' | ||||||
|  | search_url = base_url + '/' + index + '/_search' | ||||||
|  | query_type = 'match' | ||||||
|  | custom_query_json = {} | ||||||
|  | show_metadata = False | ||||||
|  | categories = ['general'] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def init(engine_settings): | ||||||
|  |     if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types: | ||||||
|  |         raise ValueError('unsupported query type', engine_settings['query_type']) | ||||||
|  | 
 | ||||||
|  |     if index == '': | ||||||
|  |         raise ValueError('index cannot be empty') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def request(query, params): | ||||||
|  |     if query_type not in _available_query_types: | ||||||
|  |         return params | ||||||
|  | 
 | ||||||
|  |     if username and password: | ||||||
|  |         params['auth'] = HTTPBasicAuth(username, password) | ||||||
|  | 
 | ||||||
|  |     params['url'] = search_url | ||||||
|  |     params['method'] = 'GET' | ||||||
|  |     params['data'] = dumps(_available_query_types[query_type](query)) | ||||||
|  |     params['headers']['Content-Type'] = 'application/json' | ||||||
|  | 
 | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _match_query(query): | ||||||
|  |     """ | ||||||
|  |     The standard for full text queries. | ||||||
|  |     searx format: "key:value" e.g. city:berlin | ||||||
|  |     REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         key, value = query.split(':') | ||||||
|  |     except: | ||||||
|  |         raise ValueError('query format must be "key:value"') | ||||||
|  | 
 | ||||||
|  |     return {"query": {"match": {key: {'query': value}}}} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _simple_query_string_query(query): | ||||||
|  |     """ | ||||||
|  |     Accepts query strings, but it is less strict than query_string | ||||||
|  |     The field used can be specified in index.query.default_field in Elasticsearch. | ||||||
|  |     REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     return {'query': {'simple_query_string': {'query': query}}} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _term_query(query): | ||||||
|  |     """ | ||||||
|  |     Accepts one term and the name of the field. | ||||||
|  |     searx format: "key:value" e.g. city:berlin | ||||||
|  |     REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         key, value = query.split(':') | ||||||
|  |     except: | ||||||
|  |         raise ValueError('query format must be key:value') | ||||||
|  | 
 | ||||||
|  |     return {'query': {'term': {key: value}}} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _terms_query(query): | ||||||
|  |     """ | ||||||
|  |     Accepts multiple terms and the name of the field. | ||||||
|  |     searx format: "key:value1,value2" e.g. city:berlin,paris | ||||||
|  |     REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         key, values = query.split(':') | ||||||
|  |     except: | ||||||
|  |         raise ValueError('query format must be key:value1,value2') | ||||||
|  | 
 | ||||||
|  |     return {'query': {'terms': {key: values.split(',')}}} | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _custom_query(query): | ||||||
|  |     key, value = query.split(':') | ||||||
|  |     custom_query = custom_query_json | ||||||
|  |     for query_key, query_value in custom_query.items(): | ||||||
|  |         if query_key == '{{KEY}}': | ||||||
|  |             custom_query[key] = custom_query.pop(query_key) | ||||||
|  |         if query_value == '{{VALUE}}': | ||||||
|  |             custom_query[query_key] = value | ||||||
|  |     return custom_query | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     resp_json = loads(resp.text) | ||||||
|  |     if 'error' in resp_json: | ||||||
|  |         raise Exception(resp_json['error']) | ||||||
|  | 
 | ||||||
|  |     for result in resp_json['hits']['hits']: | ||||||
|  |         r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()} | ||||||
|  |         r['template'] = 'key-value.html' | ||||||
|  | 
 | ||||||
|  |         if show_metadata: | ||||||
|  |             r['metadata'] = {'index': result['_index'], | ||||||
|  |                              'id': result['_id'], | ||||||
|  |                              'score': result['_score']} | ||||||
|  | 
 | ||||||
|  |         results.append(r) | ||||||
|  | 
 | ||||||
|  |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | _available_query_types = { | ||||||
|  |     # Full text queries | ||||||
|  |     # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html | ||||||
|  |     'match': _match_query, | ||||||
|  |     'simple_query_string': _simple_query_string_query, | ||||||
|  | 
 | ||||||
|  |     # Term-level queries | ||||||
|  |     # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html | ||||||
|  |     'term': _term_query, | ||||||
|  |     'terms': _terms_query, | ||||||
|  | 
 | ||||||
|  |     # Query JSON defined by the instance administrator. | ||||||
|  |     'custom': _custom_query, | ||||||
|  | } | ||||||
|  | @ -110,6 +110,7 @@ def send_http_request(engine, request_params): | ||||||
|         req = requests_lib.get |         req = requests_lib.get | ||||||
|     else: |     else: | ||||||
|         req = requests_lib.post |         req = requests_lib.post | ||||||
|  | 
 | ||||||
|     request_args['data'] = request_params['data'] |     request_args['data'] = request_params['data'] | ||||||
| 
 | 
 | ||||||
|     # send the request |     # send the request | ||||||
|  |  | ||||||
|  | @ -231,6 +231,20 @@ engines: | ||||||
|     shortcut : ew |     shortcut : ew | ||||||
|     disabled : True |     disabled : True | ||||||
| 
 | 
 | ||||||
|  | #  - name : elasticsearch | ||||||
|  | #    shortcut : es | ||||||
|  | #    engine : elasticsearch | ||||||
|  | #    base_url : http://localhost:9200 | ||||||
|  | #    username : elastic | ||||||
|  | #    password : changeme | ||||||
|  | #    index : my-index | ||||||
|  | #    # available options: match, simple_query_string, term, terms, custom | ||||||
|  | #    query_type : match | ||||||
|  | #    # if query_type is set to custom, provide your query here | ||||||
|  | #    #custom_query_json: {"query":{"match_all": {}}} | ||||||
|  | #    #show_metadata: False | ||||||
|  | #    disabled : True | ||||||
|  | 
 | ||||||
|   - name : wikidata |   - name : wikidata | ||||||
|     engine : wikidata |     engine : wikidata | ||||||
|     shortcut : wd |     shortcut : wd | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Noémi Ványi
						Noémi Ványi