From 3ab826de225a19133ca08137f5a52aafdd384c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9on=20Tiek=C3=B6tter?= Date: Fri, 7 Jan 2022 01:30:52 +0100 Subject: [PATCH] Drop microsoft academic engine Microsoft academic was discontinued on 2021-12-31. Source: https://www.microsoft.com/en-us/research/project/academic/articles/microsoft-academic-to-expand-horizons-with-community-driven-approach/ --- searx/engines/microsoft_academic.py | 77 ----------------------------- searx/settings.yml | 6 --- 2 files changed, 83 deletions(-) delete mode 100644 searx/engines/microsoft_academic.py diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py deleted file mode 100644 index a869daf2f..000000000 --- a/searx/engines/microsoft_academic.py +++ /dev/null @@ -1,77 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - Microsoft Academic (Science) -""" - -from json import dumps, loads -from searx.utils import html_to_text - -# about -about = { - "website": 'https://academic.microsoft.com', - "wikidata_id": 'Q28136779', - "official_api_documentation": 'http://ma-graph.org/', - "use_official_api": False, - "require_api_key": False, - "results": 'JSON', -} - -categories = ['images'] -paging = True -search_url = 'https://academic.microsoft.com/api/search' -_paper_url = 'https://academic.microsoft.com/paper/{id}/reference' - - -def request(query, params): - params['url'] = search_url - params['method'] = 'POST' - params['headers']['content-type'] = 'application/json; charset=utf-8' - params['data'] = dumps( - { - 'query': query, - 'queryExpression': '', - 'filters': [], - 'orderBy': 0, - 'skip': (params['pageno'] - 1) * 10, - 'sortAscending': True, - 'take': 10, - 'includeCitationContexts': False, - 'profileId': '', - } - ) - - return params - - -def response(resp): - results = [] - response_data = loads(resp.text) - if not response_data: - return results - - for result in response_data.get('pr', {}): - if 'dn' not in result['paper']: - continue - - title = result['paper']['dn'] - content = _get_content(result['paper']) - url = _paper_url.format(id=result['paper']['id']) - results.append( - { - 'url': url, - 'title': html_to_text(title), - 'content': html_to_text(content), - } - ) - - return results - - -def _get_content(result): - if 'd' in result: - content = result['d'] - if len(content) > 300: - return content[:300] + '...' - return content - - return '' diff --git a/searx/settings.yml b/searx/settings.yml index f1d6fa1b1..e41daf6f1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -872,12 +872,6 @@ engines: # base_url: http://localhost:7700 # index: my-index - - name: microsoft academic - engine: microsoft_academic - categories: science - shortcut: ma - timeout: 6.0 - - name: mixcloud engine: mixcloud shortcut: mc