From 7528e38c8a412d9f56eba05b34d2f24f3471f21d Mon Sep 17 00:00:00 2001 From: spongebob33 Date: Fri, 26 Mar 2021 12:22:49 +0100 Subject: [PATCH 1/3] add core.ac.uk engine --- searx/engines/core.py | 63 +++++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 9 +++++++ 2 files changed, 72 insertions(+) create mode 100644 searx/engines/core.py diff --git a/searx/engines/core.py b/searx/engines/core.py new file mode 100644 index 000000000..99b4b524b --- /dev/null +++ b/searx/engines/core.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + +Core Engine (science) + +""" + +from json import loads +from datetime import datetime +from urllib.parse import urlencode + +about = { + "website": 'https://core.ac.uk', + "wikidata_id": 'Q22661180', + "official_api_documentation": 'https://core.ac.uk/documentation/api/', + "use_official_api": True, + "require_api_key": True, + "results": 'JSON', +} + +categories = ['science'] + +paging = True +nb_per_page = 20 + + +# apikey = '' +apikey = 'MVBozuTX8QF9I1D0GviL5bCn2Ueat6NS' + + +base_url = 'https://core.ac.uk:443/api-v2/search/' +search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' + + +def request(query, params): + + search_path = search_string.format( + query=urlencode({'q': query}), + nb_per_page=nb_per_page, + page=params['pageno'], + apikey=apikey) + + params['url'] = base_url + search_path + return params + + +def response(resp): + results = [] + + json_data = loads(resp.text) + for result in json_data['data']: + time = result['_source']['publishedDate'] + if time is None: + date = datetime.now() + else: + date = datetime.fromtimestamp(time / 1000) + results.append({ + 'url': result['_source']['urls'][0], + 'title': result['_source']['title'], + 'content': result['_source']['description'], + 'publishedDate': date}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 5e0b4cd7d..4217e5bfa 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -272,6 +272,15 @@ engines: categories : images shortcut : cce + - name : core.ac.uk + engine : core + categories : science + shortcut : cor + # get your API key from: https://core.ac.uk/api-keys/register/ + # api_key : "xxxxxxxx" + # set api_key and comment out disabled .. + disabled: True + - name : crossref engine : json_engine paging : True From 8efabd3ab75085402dc40911425b6db6e1bbdab3 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 4 Apr 2021 12:48:24 +0200 Subject: [PATCH 2/3] [mod] core.ac.uk engine - add to list of pylint scripts - add debug log messages - move API key int `settings.yml` - improved readability - add some metadata to results Signed-off-by: Markus Heiser --- manage | 1 + searx/engines/core.py | 67 +++++++++++++++++++++++++++---------------- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/manage b/manage index 006241b45..9c322adb4 100755 --- a/manage +++ b/manage @@ -38,6 +38,7 @@ PYLINT_FILES=( searx/engines/yahoo_news.py searx/engines/apkmirror.py searx/engines/artic.py + searx/engines/core.py searx_extra/update/update_external_bangs.py searx/metrics/__init__.py ) diff --git a/searx/engines/core.py b/searx/engines/core.py index 99b4b524b..3a1147f35 100644 --- a/searx/engines/core.py +++ b/searx/engines/core.py @@ -1,14 +1,18 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - -Core Engine (science) +"""CORE (science) """ +# pylint: disable=missing-function-docstring from json import loads from datetime import datetime from urllib.parse import urlencode +from searx import logger +from searx.exceptions import SearxEngineAPIException + +logger = logger.getChild('CORE engine') + about = { "website": 'https://core.ac.uk', "wikidata_id": 'Q22661180', @@ -19,45 +23,60 @@ about = { } categories = ['science'] - paging = True -nb_per_page = 20 +nb_per_page = 10 +api_key = 'unset' -# apikey = '' -apikey = 'MVBozuTX8QF9I1D0GviL5bCn2Ueat6NS' - +logger = logger.getChild('CORE engine') base_url = 'https://core.ac.uk:443/api-v2/search/' search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' - def request(query, params): + if api_key == 'unset': + raise SearxEngineAPIException('missing CORE API key') + search_path = search_string.format( - query=urlencode({'q': query}), - nb_per_page=nb_per_page, - page=params['pageno'], - apikey=apikey) - + query = urlencode({'q': query}), + nb_per_page = nb_per_page, + page = params['pageno'], + apikey = api_key, + ) params['url'] = base_url + search_path - return params + logger.debug("query_url --> %s", params['url']) + return params def response(resp): results = [] - json_data = loads(resp.text) + for result in json_data['data']: - time = result['_source']['publishedDate'] - if time is None: - date = datetime.now() - else: + + source = result['_source'] + time = source['publishedDate'] or source['depositedDate'] + if time : date = datetime.fromtimestamp(time / 1000) + else: + date = None + + metadata = [] + if source['publisher'] and len(source['publisher']) > 3: + metadata.append(source['publisher']) + if source['topics']: + metadata.append(source['topics'][0]) + if source['doi']: + metadata.append(source['doi']) + metadata = ' / '.join(metadata) + results.append({ - 'url': result['_source']['urls'][0], - 'title': result['_source']['title'], - 'content': result['_source']['description'], - 'publishedDate': date}) + 'url': source['urls'][0].replace('http://', 'https://', 1), + 'title': source['title'], + 'content': source['description'], + 'publishedDate': date, + 'metadata' : metadata, + }) return results From 6d41255eb19d88fd7a0c94106a6d7fc4e948d018 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sat, 24 Apr 2021 08:52:26 +0200 Subject: [PATCH 3/3] [fix] Springer-Nature & core.ac need API key to work It is the whole engine definition that should be comment out : without the api_key nothing works. Signed-off-by: Markus Heiser --- searx/settings.yml | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/searx/settings.yml b/searx/settings.yml index 4217e5bfa..6e106a462 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -272,14 +272,12 @@ engines: categories : images shortcut : cce - - name : core.ac.uk - engine : core - categories : science - shortcut : cor - # get your API key from: https://core.ac.uk/api-keys/register/ - # api_key : "xxxxxxxx" - # set api_key and comment out disabled .. - disabled: True + # - name : core.ac.uk + # engine : core + # categories : science + # shortcut : cor + # # get your API key from: https://core.ac.uk/api-keys/register/ + # api_key : 'unset' - name : crossref engine : json_engine @@ -974,15 +972,13 @@ engines: # query_fields : '' # query fields # enable_http : True - - name : springer nature - engine : springer - # get your API key from: https://dev.springernature.com/signup - # api_key : "a69685087d07eca9f13db62f65b8f601" # working API key, for test & debug - # set api_key and comment out disabled .. - disabled: True - shortcut : springer - categories : science - timeout : 6.0 + # - name : springer nature + # engine : springer + # # get your API key from: https://dev.springernature.com/signup + # api_key : 'unset' # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" + # shortcut : springer + # categories : science + # timeout : 6.0 - name : startpage engine : startpage