From ff527e268170852563830bf5b29a65515a98d2bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sat, 13 Mar 2021 20:27:47 +0100 Subject: [PATCH] Add Solr engine --- Makefile | 1 + searx/engines/solr.py | 74 +++++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 11 +++++++ 3 files changed, 86 insertions(+) create mode 100644 searx/engines/solr.py diff --git a/Makefile b/Makefile index a4e9110ed..745ff5b91 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,7 @@ PYLINT_FILES=\ searx/engines/google_images.py \ searx/engines/mediathekviewweb.py \ searx/engines/solidtorrents.py \ + searx/engines/solr.py \ searx/engines/google_scholar.py \ searx/engines/yahoo_news.py \ searx/engines/apkmirror.py \ diff --git a/searx/engines/solr.py b/searx/engines/solr.py new file mode 100644 index 000000000..0bfcbab36 --- /dev/null +++ b/searx/engines/solr.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Solr +""" + +# pylint: disable=global-statement, missing-function-docstring + +from json import loads +from urllib.parse import urlencode +from searx.exceptions import SearxEngineAPIException + + +base_url = 'http://localhost:8983' +collection = '' +rows = 10 +sort = '' # sorting: asc or desc +field_list = 'name' # list of field names to display on the UI +default_fields = '' # default field to query +query_fields = '' # query fields +_search_url = '' +paging = True + + +def init(_): + if collection == '': + raise ValueError('collection cannot be empty') + + global _search_url + _search_url = base_url + '/solr/' + collection + '/select?{params}' + + +def request(query, params): + query_params = {'q': query, 'rows': rows} + if field_list != '': + query_params['fl'] = field_list + if query_fields != '': + query_params['qf'] = query_fields + if default_fields != '': + query_params['df'] = default_fields + if sort != '': + query_params['sort'] = sort + + if 'pageno' in params: + query_params['start'] = rows * (params['pageno'] - 1) + + params['url'] = _search_url.format(params=urlencode(query_params)) + + return params + + +def response(resp): + resp_json = __get_response(resp) + + results = [] + for result in resp_json['response']['docs']: + r = {key: str(value) for key, value in result.items()} + if len(r) == 0: + continue + r['template'] = 'key-value.html' + results.append(r) + + return results + + +def __get_response(resp): + try: + resp_json = loads(resp.text) + except Exception as e: + raise SearxEngineAPIException("failed to parse response") from e + + if 'error' in resp_json: + raise SearxEngineAPIException(resp_json['error']['msg']) + + return resp_json diff --git a/searx/settings.yml b/searx/settings.yml index 85ba4b2fe..da84e82b5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -943,6 +943,17 @@ engines: # api_client_id : ******* # api_client_secret : ******* +# - name : solr +# engine : solr +# shortcut : slr +# base_url : http://localhost:8983 +# collection : collection_name +# sort : '' # sorting: asc or desc +# field_list : '' # comma separated list of field names to display on the UI +# default_fields : '' # default field to query +# query_fields : '' # query fields +# enable_http : True + - name : startpage engine : startpage shortcut : sp