From efd3a2d6d1376468226378c4435c6a81110ca261 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Fri, 8 Sep 2023 18:11:06 +0200 Subject: [PATCH] [feat] engine: implementation of internet archive scholar --- searx/engines/internet_archive_scholar.py | 72 +++++++++++++++++++++++ searx/settings.yml | 5 ++ utils/brand.env | 4 +- 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 searx/engines/internet_archive_scholar.py diff --git a/searx/engines/internet_archive_scholar.py b/searx/engines/internet_archive_scholar.py new file mode 100644 index 000000000..fdbc10026 --- /dev/null +++ b/searx/engines/internet_archive_scholar.py @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Internet Archive scholar(science) +""" + +from datetime import datetime +from urllib.parse import urlencode +from searx.utils import html_to_text + +about = { + "website": "https://scholar.archive.org/", + "wikidata_id": "Q115667709", + "official_api_documentation": "https://scholar.archive.org/api/redoc", + "use_official_api": True, + "require_api_key": False, + "results": "JSON", +} +categories = ['science', 'scientific publications'] +paging = True + +base_url = "https://scholar.archive.org" +results_per_page = 15 + + +def request(query, params): + args = { + "q": query, + "limit": results_per_page, + "offset": (params["pageno"] - 1) * results_per_page, + } + params["url"] = f"{base_url}/search?{urlencode(args)}" + params["headers"]["Accept"] = "application/json" + return params + + +def response(resp): + results = [] + + json = resp.json() + + for result in json["results"]: + publishedDate, content, doi = None, '', None + + if result['biblio'].get('release_date'): + publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d") + + if len(result['abstracts']) > 0: + content = result['abstracts'][0].get('body') + elif len(result['_highlights']) > 0: + content = result['_highlights'][0] + + if len(result['releases']) > 0: + doi = result['releases'][0].get('doi') + + results.append( + { + 'template': 'paper.html', + 'url': result['fulltext']['access_url'], + 'title': result['biblio']['title'], + 'content': html_to_text(content), + 'publisher': result['biblio'].get('publisher'), + 'doi': doi, + 'journal': result['biblio'].get('container_name'), + 'authors': result['biblio'].get('contrib_names'), + 'tags': result['tags'], + 'publishedDate': publishedDate, + 'issns': result['biblio'].get('issns'), + 'pdf_url': result['fulltext'].get('access_url'), + } + ) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 9ee256b28..c601cbfa4 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1348,6 +1348,11 @@ engines: api_site: 'askubuntu' categories: [it, q&a] + - name: internetarchivescholar + engine: internet_archive_scholar + shortcut: ias + timeout: 5.0 + - name: superuser engine: stackexchange shortcut: su diff --git a/utils/brand.env b/utils/brand.env index 31afce53c..c65e7e42d 100644 --- a/utils/brand.env +++ b/utils/brand.env @@ -1,5 +1,5 @@ export SEARXNG_URL='' export SEARXNG_PORT='8888' export SEARXNG_BIND_ADDRESS='127.0.0.1' -export GIT_URL='https://github.com/searxng/searxng' -export GIT_BRANCH='master' +export GIT_URL='https://github.com//Bnyro/searxng' +export GIT_BRANCH='scholar'