From ad725ce7d7df5afc2fc2f63405195b6bcfc8a8a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89milien=20=28perso=29?= <4016501+unixfox@users.noreply.github.com> Date: Tue, 19 Sep 2023 10:31:02 +0200 Subject: [PATCH] wikipedia wikidata infobox + disable wikisource (#2806) Co-authored-by: Markus Heiser --- searx/engines/wikidata.py | 19 ++++++++++++++++--- searx/engines/wikipedia.py | 32 +++++++++++++++++++++----------- searx/settings.yml | 7 +++++++ 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 5779daa0b..8fa3a97d3 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -41,6 +41,12 @@ about = { "results": 'JSON', } +display_type = ["infobox"] +"""A list of display types composed from ``infobox`` and ``list``. The latter +one will add a hit to the result list. The first one will show a hit in the +info box. Both values can be set, or one of the two can be set.""" + + # SPARQL SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql' SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain' @@ -268,8 +274,9 @@ def get_results(attribute_result, attributes, language): for url in value.split(', '): infobox_urls.append({'title': attribute.get_label(language), 'url': url, **attribute.kwargs}) # "normal" results (not infobox) include official website and Wikipedia links. - if attribute.kwargs.get('official') or attribute_type == WDArticle: + if "list" in display_type and (attribute.kwargs.get('official') or attribute_type == WDArticle): results.append({'title': infobox_title, 'url': url, "content": infobox_content}) + # update the infobox_id with the wikipedia URL # first the local wikipedia URL, and as fallback the english wikipedia URL if attribute_type == WDArticle and ( @@ -305,9 +312,15 @@ def get_results(attribute_result, attributes, language): # add the wikidata URL at the end infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']}) - if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0: + if ( + "list" in display_type + and img_src is None + and len(infobox_attributes) == 0 + and len(infobox_urls) == 1 + and len(infobox_content) == 0 + ): results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content}) - else: + elif "infobox" in display_type: results.append( { 'infobox': infobox_title, diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index b4b70208d..d825cbdea 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -77,6 +77,11 @@ about = { "results": 'JSON', } +display_type = ["infobox"] +"""A list of display types composed from ``infobox`` and ``list``. The latter +one will add a hit to the result list. The first one will show a hit in the +info box. Both values can be set, or one of the two can be set.""" + send_accept_language_header = True """The HTTP ``Accept-Language`` header is needed for wikis where LanguageConverter_ is enabled.""" @@ -185,18 +190,23 @@ def response(resp): api_result = resp.json() title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title')) wikipedia_link = api_result['content_urls']['desktop']['page'] - results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')}) - if api_result.get('type') == 'standard': - results.append( - { - 'infobox': title, - 'id': wikipedia_link, - 'content': api_result.get('extract', ''), - 'img_src': api_result.get('thumbnail', {}).get('source'), - 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], - } - ) + if "list" in display_type or api_result.get('type') != 'standard': + # show item in the result list if 'list' is in the display options or it + # is a item that can't be displayed in a infobox. + results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')}) + + if "infobox" in display_type: + if api_result.get('type') == 'standard': + results.append( + { + 'infobox': title, + 'id': wikipedia_link, + 'content': api_result.get('extract', ''), + 'img_src': api_result.get('thumbnail', {}).get('source'), + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], + } + ) return results diff --git a/searx/settings.yml b/searx/settings.yml index 3990ebaf3..4d8899caa 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -371,7 +371,10 @@ engines: - name: wikipedia engine: wikipedia shortcut: wp + # add "list" to the array to get results in the results list + display_type: ["infobox"] base_url: 'https://{language}.wikipedia.org/' + categories: [general] - name: bilibili engine: bilibili @@ -584,7 +587,10 @@ engines: shortcut: wd timeout: 3.0 weight: 2 + # add "list" to the array to get results in the results list + display_type: ["infobox"] tests: *tests_infobox + categories: [general] - name: duckduckgo engine: duckduckgo @@ -1622,6 +1628,7 @@ engines: categories: [general, wikimedia] base_url: "https://{language}.wikisource.org/" search_type: text + disabled: true about: website: https://www.wikisource.org/ wikidata_id: Q263