From 48cb58bd2ec4eb9cb4ba416f7ece75c3c6c41e55 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sat, 7 Oct 2023 10:26:04 +0200 Subject: [PATCH] [feat] duckduckgo: support for videos and news --- docs/dev/engines/online/duckduckgo.rst | 2 +- searx/data/engine_traits.json | 328 ++++++++++++++++++ searx/engines/duckduckgo.py | 6 +- ...ckduckgo_images.py => duckduckgo_extra.py} | 71 +++- searx/settings.yml | 19 +- 5 files changed, 405 insertions(+), 21 deletions(-) rename searx/engines/{duckduckgo_images.py => duckduckgo_extra.py} (51%) diff --git a/docs/dev/engines/online/duckduckgo.rst b/docs/dev/engines/online/duckduckgo.rst index a73b38983..0f1258ff9 100644 --- a/docs/dev/engines/online/duckduckgo.rst +++ b/docs/dev/engines/online/duckduckgo.rst @@ -12,7 +12,7 @@ DuckDuckGo Engines .. automodule:: searx.engines.duckduckgo :members: -.. automodule:: searx.engines.duckduckgo_images +.. automodule:: searx.engines.duckduckgo_extra :members: .. automodule:: searx.engines.duckduckgo_definitions diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index aee199b30..9cb47ee38 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -2390,6 +2390,334 @@ "zh-TW": "tw-tzh" } }, + "duckduckgo videos": { + "all_locale": "wt-wt", + "custom": { + "lang_region": { + "ar-DZ": "ar_DZ", + "ar-JO": "ar_JO", + "ar-SA": "ar_SA", + "bn-IN": "bn_IN", + "de-CH": "de_CH", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-CR": "es_CR", + "es-EC": "es_EC", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "nl-BE": "nl_BE", + "pt-BR": "pt_BR" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af_ZA", + "ar": "ar_EG", + "ast": "ast_ES", + "az_Latn": "az_AZ", + "be": "be_BY", + "bg": "bg_BG", + "bn": "bn_BD", + "br": "br_FR", + "bs_Latn": "bs_BA", + "ca": "ca_ES", + "cs": "cs_CZ", + "cy": "cy_GB", + "da": "da_DK", + "de": "de_DE", + "el": "el_GR", + "en": "en_US", + "eo": "eo_XX", + "es": "es_ES", + "et": "et_EE", + "eu": "eu_ES", + "fa": "fa_IR", + "fi": "fi_FI", + "fil": "tl_PH", + "fr": "fr_FR", + "ga": "ga_IE", + "gd": "gd_GB", + "gl": "gl_ES", + "he": "he_IL", + "hi": "hi_IN", + "hr": "hr_HR", + "hu": "hu_HU", + "hy": "hy_AM", + "id": "id_ID", + "is": "is_IS", + "it": "it_IT", + "ja": "ja_JP", + "kab": "kab_DZ", + "kn": "kn_IN", + "ko": "ko_KR", + "ku": "ku", + "kw": "kw_GB", + "lt": "lt_LT", + "lv": "lv_LV", + "ml": "ml_IN", + "mr": "mr_IN", + "ms": "ms_MY", + "nb": "nb_NO", + "nl": "nl_NL", + "nn": "nn_NO", + "pl": "pl_PL", + "pt": "pt_PT", + "ro": "ro_RO", + "ru": "ru_RU", + "sc": "sc_IT", + "si": "si_LK", + "sk": "sk_SK", + "sl": "sl_SI", + "sq": "sq_AL", + "sr_Cyrl": "sr_RS", + "sv": "sv_SE", + "ta": "ta_IN", + "te": "te_IN", + "th": "th_TH", + "tr": "tr_TR", + "uk": "uk_UA", + "ur": "ur_PK", + "vi": "vi_VN", + "zh_Hans": "zh_CN", + "zh_Hant": "zh_TW" + }, + "regions": { + "ar-SA": "xa-ar", + "bg-BG": "bg-bg", + "ca-ES": "es-ca", + "cs-CZ": "cz-cs", + "da-DK": "dk-da", + "de-AT": "at-de", + "de-CH": "ch-de", + "de-DE": "de-de", + "el-GR": "gr-el", + "en-AU": "au-en", + "en-CA": "ca-en", + "en-GB": "uk-en", + "en-IE": "ie-en", + "en-IL": "il-en", + "en-IN": "in-en", + "en-MY": "my-en", + "en-NZ": "nz-en", + "en-PH": "ph-en", + "en-PK": "pk-en", + "en-SG": "sg-en", + "en-US": "us-en", + "en-ZA": "za-en", + "es-AR": "ar-es", + "es-CL": "cl-es", + "es-CO": "co-es", + "es-ES": "es-es", + "es-MX": "mx-es", + "es-PE": "pe-es", + "es-US": "us-es", + "et-EE": "ee-et", + "fi-FI": "fi-fi", + "fr-BE": "be-fr", + "fr-CA": "ca-fr", + "fr-CH": "ch-fr", + "fr-FR": "fr-fr", + "hr-HR": "hr-hr", + "hu-HU": "hu-hu", + "id-ID": "id-en", + "it-IT": "it-it", + "ja-JP": "jp-jp", + "ko-KR": "kr-kr", + "lt-LT": "lt-lt", + "lv-LV": "lv-lv", + "nb-NO": "no-no", + "nl-BE": "be-nl", + "nl-NL": "nl-nl", + "pl-PL": "pl-pl", + "pt-BR": "br-pt", + "pt-PT": "pt-pt", + "ro-RO": "ro-ro", + "ru-RU": "ru-ru", + "sk-SK": "sk-sk", + "sl-SI": "sl-sl", + "sv-SE": "se-sv", + "th-TH": "th-en", + "tr-TR": "tr-tr", + "uk-UA": "ua-uk", + "vi-VN": "vn-en", + "zh-CN": "cn-zh", + "zh-HK": "hk-tzh", + "zh-TW": "tw-tzh" + } + }, + "duckduckgo news": { + "all_locale": "wt-wt", + "custom": { + "lang_region": { + "ar-DZ": "ar_DZ", + "ar-JO": "ar_JO", + "ar-SA": "ar_SA", + "bn-IN": "bn_IN", + "de-CH": "de_CH", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-CR": "es_CR", + "es-EC": "es_EC", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "nl-BE": "nl_BE", + "pt-BR": "pt_BR" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af_ZA", + "ar": "ar_EG", + "ast": "ast_ES", + "az_Latn": "az_AZ", + "be": "be_BY", + "bg": "bg_BG", + "bn": "bn_BD", + "br": "br_FR", + "bs_Latn": "bs_BA", + "ca": "ca_ES", + "cs": "cs_CZ", + "cy": "cy_GB", + "da": "da_DK", + "de": "de_DE", + "el": "el_GR", + "en": "en_US", + "eo": "eo_XX", + "es": "es_ES", + "et": "et_EE", + "eu": "eu_ES", + "fa": "fa_IR", + "fi": "fi_FI", + "fil": "tl_PH", + "fr": "fr_FR", + "ga": "ga_IE", + "gd": "gd_GB", + "gl": "gl_ES", + "he": "he_IL", + "hi": "hi_IN", + "hr": "hr_HR", + "hu": "hu_HU", + "hy": "hy_AM", + "id": "id_ID", + "is": "is_IS", + "it": "it_IT", + "ja": "ja_JP", + "kab": "kab_DZ", + "kn": "kn_IN", + "ko": "ko_KR", + "ku": "ku", + "kw": "kw_GB", + "lt": "lt_LT", + "lv": "lv_LV", + "ml": "ml_IN", + "mr": "mr_IN", + "ms": "ms_MY", + "nb": "nb_NO", + "nl": "nl_NL", + "nn": "nn_NO", + "pl": "pl_PL", + "pt": "pt_PT", + "ro": "ro_RO", + "ru": "ru_RU", + "sc": "sc_IT", + "si": "si_LK", + "sk": "sk_SK", + "sl": "sl_SI", + "sq": "sq_AL", + "sr_Cyrl": "sr_RS", + "sv": "sv_SE", + "ta": "ta_IN", + "te": "te_IN", + "th": "th_TH", + "tr": "tr_TR", + "uk": "uk_UA", + "ur": "ur_PK", + "vi": "vi_VN", + "zh_Hans": "zh_CN", + "zh_Hant": "zh_TW" + }, + "regions": { + "ar-SA": "xa-ar", + "bg-BG": "bg-bg", + "ca-ES": "es-ca", + "cs-CZ": "cz-cs", + "da-DK": "dk-da", + "de-AT": "at-de", + "de-CH": "ch-de", + "de-DE": "de-de", + "el-GR": "gr-el", + "en-AU": "au-en", + "en-CA": "ca-en", + "en-GB": "uk-en", + "en-IE": "ie-en", + "en-IL": "il-en", + "en-IN": "in-en", + "en-MY": "my-en", + "en-NZ": "nz-en", + "en-PH": "ph-en", + "en-PK": "pk-en", + "en-SG": "sg-en", + "en-US": "us-en", + "en-ZA": "za-en", + "es-AR": "ar-es", + "es-CL": "cl-es", + "es-CO": "co-es", + "es-ES": "es-es", + "es-MX": "mx-es", + "es-PE": "pe-es", + "es-US": "us-es", + "et-EE": "ee-et", + "fi-FI": "fi-fi", + "fr-BE": "be-fr", + "fr-CA": "ca-fr", + "fr-CH": "ch-fr", + "fr-FR": "fr-fr", + "hr-HR": "hr-hr", + "hu-HU": "hu-hu", + "id-ID": "id-en", + "it-IT": "it-it", + "ja-JP": "jp-jp", + "ko-KR": "kr-kr", + "lt-LT": "lt-lt", + "lv-LV": "lv-lv", + "nb-NO": "no-no", + "nl-BE": "be-nl", + "nl-NL": "nl-nl", + "pl-PL": "pl-pl", + "pt-BR": "br-pt", + "pt-PT": "pt-pt", + "ro-RO": "ro-ro", + "ru-RU": "ru-ru", + "sk-SK": "sk-sk", + "sl-SI": "sl-sl", + "sv-SE": "se-sv", + "th-TH": "th-en", + "tr-TR": "tr-tr", + "uk-UA": "ua-uk", + "vi-VN": "vn-en", + "zh-CN": "cn-zh", + "zh-HK": "hk-tzh", + "zh-TW": "tw-tzh" + } + }, "duckduckgo weather": { "all_locale": "wt-wt", "custom": { diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index ebb4745b9..d0e818faf 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -66,8 +66,10 @@ def cache_vqd(query, value): The vqd value depends on the query string and is needed for the follow up pages or the images loaded by a XMLHttpRequest: - - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...` - - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...` + - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...`` + - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...`` + - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...`` + - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...`` """ c = redisdb.client() diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_extra.py similarity index 51% rename from searx/engines/duckduckgo_images.py rename to searx/engines/duckduckgo_extra.py index 7e7f133b1..7e3a3282d 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_extra.py @@ -1,9 +1,10 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """ -DuckDuckGo Images -~~~~~~~~~~~~~~~~~ +DuckDuckGo Extra (images, videos, news) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ +from datetime import datetime from typing import TYPE_CHECKING from urllib.parse import urlencode @@ -32,6 +33,9 @@ about = { # engine dependent config categories = ['images', 'web'] +ddg_category = 'images' +"""The category must be any of ``images``, ``videos`` and ``news`` +""" paging = True safesearch = True send_accept_language_header = True @@ -39,6 +43,8 @@ send_accept_language_header = True safesearch_cookies = {0: '-2', 1: None, 2: '1'} safesearch_args = {0: '1', 1: None, 2: '1'} +search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'} + def request(query, params): @@ -69,28 +75,61 @@ def request(query, params): args['p'] = safe_search # "-1", "1" logger.debug("cookies: %s", params['cookies']) - args = urlencode(args) - params['url'] = 'https://duckduckgo.com/i.js?{args}'.format(args=args) + + params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}' return params +def _image_result(result): + return { + 'template': 'images.html', + 'url': result['url'], + 'title': result['title'], + 'content': '', + 'thumbnail_src': result['thumbnail'], + 'img_src': result['image'], + 'img_format': '%s x %s' % (result['width'], result['height']), + 'source': result['source'], + } + + +def _video_result(result): + return { + 'template': 'videos.html', + 'url': result['content'], + 'title': result['title'], + 'content': result['description'], + 'thumbnail': result['images'].get('small') or result['images'].get('medium'), + 'iframe_src': result['embed_url'], + 'source': result['provider'], + 'length': result['duration'], + 'metadata': result.get('uploader'), + } + + +def _news_result(result): + return { + 'url': result['url'], + 'title': result['title'], + 'content': result['excerpt'], + 'source': result['source'], + 'publishedDate': datetime.utcfromtimestamp(result['date']), + } + + def response(resp): results = [] res_json = resp.json() for result in res_json['results']: - results.append( - { - 'template': 'images.html', - 'title': result['title'], - 'content': '', - 'thumbnail_src': result['thumbnail'], - 'img_src': result['image'], - 'url': result['url'], - 'img_format': '%s x %s' % (result['width'], result['height']), - 'source': result['source'], - } - ) + if ddg_category == 'images': + results.append(_image_result(result)) + elif ddg_category == 'videos': + results.append(_video_result(result)) + elif ddg_category == 'news': + results.append(_news_result(result)) + else: + raise ValueError(f"Invalid duckduckgo category: {ddg_category}") return results diff --git a/searx/settings.yml b/searx/settings.yml index 5012f07ad..8b6d32301 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -603,9 +603,24 @@ engines: shortcut: ddg - name: duckduckgo images - engine: duckduckgo_images + engine: duckduckgo_extra + categories: [images, web] + ddg_category: images shortcut: ddi - timeout: 3.0 + disabled: true + + - name: duckduckgo videos + engine: duckduckgo_extra + categories: [videos, web] + ddg_category: videos + shortcut: ddv + disabled: true + + - name: duckduckgo news + engine: duckduckgo_extra + categories: [news, web] + ddg_category: news + shortcut: ddn disabled: true - name: duckduckgo weather