[feat] duckduckgo: support for videos and news

This commit is contained in:
Bnyro 2023-10-07 10:26:04 +02:00 committed by Markus Heiser
parent c3ab49cd90
commit 48cb58bd2e
5 changed files with 405 additions and 21 deletions

View File

@ -12,7 +12,7 @@ DuckDuckGo Engines
.. automodule:: searx.engines.duckduckgo .. automodule:: searx.engines.duckduckgo
:members: :members:
.. automodule:: searx.engines.duckduckgo_images .. automodule:: searx.engines.duckduckgo_extra
:members: :members:
.. automodule:: searx.engines.duckduckgo_definitions .. automodule:: searx.engines.duckduckgo_definitions

View File

@ -2390,6 +2390,334 @@
"zh-TW": "tw-tzh" "zh-TW": "tw-tzh"
} }
}, },
"duckduckgo videos": {
"all_locale": "wt-wt",
"custom": {
"lang_region": {
"ar-DZ": "ar_DZ",
"ar-JO": "ar_JO",
"ar-SA": "ar_SA",
"bn-IN": "bn_IN",
"de-CH": "de_CH",
"en-AU": "en_AU",
"en-CA": "en_CA",
"en-GB": "en_GB",
"es-AR": "es_AR",
"es-CL": "es_CL",
"es-CO": "es_CO",
"es-CR": "es_CR",
"es-EC": "es_EC",
"es-MX": "es_MX",
"es-PE": "es_PE",
"es-UY": "es_UY",
"es-VE": "es_VE",
"fr-BE": "fr_BE",
"fr-CA": "fr_CA",
"fr-CH": "fr_CH",
"nl-BE": "nl_BE",
"pt-BR": "pt_BR"
}
},
"data_type": "traits_v1",
"languages": {
"af": "af_ZA",
"ar": "ar_EG",
"ast": "ast_ES",
"az_Latn": "az_AZ",
"be": "be_BY",
"bg": "bg_BG",
"bn": "bn_BD",
"br": "br_FR",
"bs_Latn": "bs_BA",
"ca": "ca_ES",
"cs": "cs_CZ",
"cy": "cy_GB",
"da": "da_DK",
"de": "de_DE",
"el": "el_GR",
"en": "en_US",
"eo": "eo_XX",
"es": "es_ES",
"et": "et_EE",
"eu": "eu_ES",
"fa": "fa_IR",
"fi": "fi_FI",
"fil": "tl_PH",
"fr": "fr_FR",
"ga": "ga_IE",
"gd": "gd_GB",
"gl": "gl_ES",
"he": "he_IL",
"hi": "hi_IN",
"hr": "hr_HR",
"hu": "hu_HU",
"hy": "hy_AM",
"id": "id_ID",
"is": "is_IS",
"it": "it_IT",
"ja": "ja_JP",
"kab": "kab_DZ",
"kn": "kn_IN",
"ko": "ko_KR",
"ku": "ku",
"kw": "kw_GB",
"lt": "lt_LT",
"lv": "lv_LV",
"ml": "ml_IN",
"mr": "mr_IN",
"ms": "ms_MY",
"nb": "nb_NO",
"nl": "nl_NL",
"nn": "nn_NO",
"pl": "pl_PL",
"pt": "pt_PT",
"ro": "ro_RO",
"ru": "ru_RU",
"sc": "sc_IT",
"si": "si_LK",
"sk": "sk_SK",
"sl": "sl_SI",
"sq": "sq_AL",
"sr_Cyrl": "sr_RS",
"sv": "sv_SE",
"ta": "ta_IN",
"te": "te_IN",
"th": "th_TH",
"tr": "tr_TR",
"uk": "uk_UA",
"ur": "ur_PK",
"vi": "vi_VN",
"zh_Hans": "zh_CN",
"zh_Hant": "zh_TW"
},
"regions": {
"ar-SA": "xa-ar",
"bg-BG": "bg-bg",
"ca-ES": "es-ca",
"cs-CZ": "cz-cs",
"da-DK": "dk-da",
"de-AT": "at-de",
"de-CH": "ch-de",
"de-DE": "de-de",
"el-GR": "gr-el",
"en-AU": "au-en",
"en-CA": "ca-en",
"en-GB": "uk-en",
"en-IE": "ie-en",
"en-IL": "il-en",
"en-IN": "in-en",
"en-MY": "my-en",
"en-NZ": "nz-en",
"en-PH": "ph-en",
"en-PK": "pk-en",
"en-SG": "sg-en",
"en-US": "us-en",
"en-ZA": "za-en",
"es-AR": "ar-es",
"es-CL": "cl-es",
"es-CO": "co-es",
"es-ES": "es-es",
"es-MX": "mx-es",
"es-PE": "pe-es",
"es-US": "us-es",
"et-EE": "ee-et",
"fi-FI": "fi-fi",
"fr-BE": "be-fr",
"fr-CA": "ca-fr",
"fr-CH": "ch-fr",
"fr-FR": "fr-fr",
"hr-HR": "hr-hr",
"hu-HU": "hu-hu",
"id-ID": "id-en",
"it-IT": "it-it",
"ja-JP": "jp-jp",
"ko-KR": "kr-kr",
"lt-LT": "lt-lt",
"lv-LV": "lv-lv",
"nb-NO": "no-no",
"nl-BE": "be-nl",
"nl-NL": "nl-nl",
"pl-PL": "pl-pl",
"pt-BR": "br-pt",
"pt-PT": "pt-pt",
"ro-RO": "ro-ro",
"ru-RU": "ru-ru",
"sk-SK": "sk-sk",
"sl-SI": "sl-sl",
"sv-SE": "se-sv",
"th-TH": "th-en",
"tr-TR": "tr-tr",
"uk-UA": "ua-uk",
"vi-VN": "vn-en",
"zh-CN": "cn-zh",
"zh-HK": "hk-tzh",
"zh-TW": "tw-tzh"
}
},
"duckduckgo news": {
"all_locale": "wt-wt",
"custom": {
"lang_region": {
"ar-DZ": "ar_DZ",
"ar-JO": "ar_JO",
"ar-SA": "ar_SA",
"bn-IN": "bn_IN",
"de-CH": "de_CH",
"en-AU": "en_AU",
"en-CA": "en_CA",
"en-GB": "en_GB",
"es-AR": "es_AR",
"es-CL": "es_CL",
"es-CO": "es_CO",
"es-CR": "es_CR",
"es-EC": "es_EC",
"es-MX": "es_MX",
"es-PE": "es_PE",
"es-UY": "es_UY",
"es-VE": "es_VE",
"fr-BE": "fr_BE",
"fr-CA": "fr_CA",
"fr-CH": "fr_CH",
"nl-BE": "nl_BE",
"pt-BR": "pt_BR"
}
},
"data_type": "traits_v1",
"languages": {
"af": "af_ZA",
"ar": "ar_EG",
"ast": "ast_ES",
"az_Latn": "az_AZ",
"be": "be_BY",
"bg": "bg_BG",
"bn": "bn_BD",
"br": "br_FR",
"bs_Latn": "bs_BA",
"ca": "ca_ES",
"cs": "cs_CZ",
"cy": "cy_GB",
"da": "da_DK",
"de": "de_DE",
"el": "el_GR",
"en": "en_US",
"eo": "eo_XX",
"es": "es_ES",
"et": "et_EE",
"eu": "eu_ES",
"fa": "fa_IR",
"fi": "fi_FI",
"fil": "tl_PH",
"fr": "fr_FR",
"ga": "ga_IE",
"gd": "gd_GB",
"gl": "gl_ES",
"he": "he_IL",
"hi": "hi_IN",
"hr": "hr_HR",
"hu": "hu_HU",
"hy": "hy_AM",
"id": "id_ID",
"is": "is_IS",
"it": "it_IT",
"ja": "ja_JP",
"kab": "kab_DZ",
"kn": "kn_IN",
"ko": "ko_KR",
"ku": "ku",
"kw": "kw_GB",
"lt": "lt_LT",
"lv": "lv_LV",
"ml": "ml_IN",
"mr": "mr_IN",
"ms": "ms_MY",
"nb": "nb_NO",
"nl": "nl_NL",
"nn": "nn_NO",
"pl": "pl_PL",
"pt": "pt_PT",
"ro": "ro_RO",
"ru": "ru_RU",
"sc": "sc_IT",
"si": "si_LK",
"sk": "sk_SK",
"sl": "sl_SI",
"sq": "sq_AL",
"sr_Cyrl": "sr_RS",
"sv": "sv_SE",
"ta": "ta_IN",
"te": "te_IN",
"th": "th_TH",
"tr": "tr_TR",
"uk": "uk_UA",
"ur": "ur_PK",
"vi": "vi_VN",
"zh_Hans": "zh_CN",
"zh_Hant": "zh_TW"
},
"regions": {
"ar-SA": "xa-ar",
"bg-BG": "bg-bg",
"ca-ES": "es-ca",
"cs-CZ": "cz-cs",
"da-DK": "dk-da",
"de-AT": "at-de",
"de-CH": "ch-de",
"de-DE": "de-de",
"el-GR": "gr-el",
"en-AU": "au-en",
"en-CA": "ca-en",
"en-GB": "uk-en",
"en-IE": "ie-en",
"en-IL": "il-en",
"en-IN": "in-en",
"en-MY": "my-en",
"en-NZ": "nz-en",
"en-PH": "ph-en",
"en-PK": "pk-en",
"en-SG": "sg-en",
"en-US": "us-en",
"en-ZA": "za-en",
"es-AR": "ar-es",
"es-CL": "cl-es",
"es-CO": "co-es",
"es-ES": "es-es",
"es-MX": "mx-es",
"es-PE": "pe-es",
"es-US": "us-es",
"et-EE": "ee-et",
"fi-FI": "fi-fi",
"fr-BE": "be-fr",
"fr-CA": "ca-fr",
"fr-CH": "ch-fr",
"fr-FR": "fr-fr",
"hr-HR": "hr-hr",
"hu-HU": "hu-hu",
"id-ID": "id-en",
"it-IT": "it-it",
"ja-JP": "jp-jp",
"ko-KR": "kr-kr",
"lt-LT": "lt-lt",
"lv-LV": "lv-lv",
"nb-NO": "no-no",
"nl-BE": "be-nl",
"nl-NL": "nl-nl",
"pl-PL": "pl-pl",
"pt-BR": "br-pt",
"pt-PT": "pt-pt",
"ro-RO": "ro-ro",
"ru-RU": "ru-ru",
"sk-SK": "sk-sk",
"sl-SI": "sl-sl",
"sv-SE": "se-sv",
"th-TH": "th-en",
"tr-TR": "tr-tr",
"uk-UA": "ua-uk",
"vi-VN": "vn-en",
"zh-CN": "cn-zh",
"zh-HK": "hk-tzh",
"zh-TW": "tw-tzh"
}
},
"duckduckgo weather": { "duckduckgo weather": {
"all_locale": "wt-wt", "all_locale": "wt-wt",
"custom": { "custom": {

View File

@ -66,8 +66,10 @@ def cache_vqd(query, value):
The vqd value depends on the query string and is needed for the follow up The vqd value depends on the query string and is needed for the follow up
pages or the images loaded by a XMLHttpRequest: pages or the images loaded by a XMLHttpRequest:
- DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...` - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
- DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...` - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
- DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
- DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
""" """
c = redisdb.client() c = redisdb.client()

View File

@ -1,9 +1,10 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
""" """
DuckDuckGo Images DuckDuckGo Extra (images, videos, news)
~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
""" """
from datetime import datetime
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from urllib.parse import urlencode from urllib.parse import urlencode
@ -32,6 +33,9 @@ about = {
# engine dependent config # engine dependent config
categories = ['images', 'web'] categories = ['images', 'web']
ddg_category = 'images'
"""The category must be any of ``images``, ``videos`` and ``news``
"""
paging = True paging = True
safesearch = True safesearch = True
send_accept_language_header = True send_accept_language_header = True
@ -39,6 +43,8 @@ send_accept_language_header = True
safesearch_cookies = {0: '-2', 1: None, 2: '1'} safesearch_cookies = {0: '-2', 1: None, 2: '1'}
safesearch_args = {0: '1', 1: None, 2: '1'} safesearch_args = {0: '1', 1: None, 2: '1'}
search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
def request(query, params): def request(query, params):
@ -69,28 +75,61 @@ def request(query, params):
args['p'] = safe_search # "-1", "1" args['p'] = safe_search # "-1", "1"
logger.debug("cookies: %s", params['cookies']) logger.debug("cookies: %s", params['cookies'])
args = urlencode(args)
params['url'] = 'https://duckduckgo.com/i.js?{args}'.format(args=args) params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
return params return params
def _image_result(result):
return {
'template': 'images.html',
'url': result['url'],
'title': result['title'],
'content': '',
'thumbnail_src': result['thumbnail'],
'img_src': result['image'],
'img_format': '%s x %s' % (result['width'], result['height']),
'source': result['source'],
}
def _video_result(result):
return {
'template': 'videos.html',
'url': result['content'],
'title': result['title'],
'content': result['description'],
'thumbnail': result['images'].get('small') or result['images'].get('medium'),
'iframe_src': result['embed_url'],
'source': result['provider'],
'length': result['duration'],
'metadata': result.get('uploader'),
}
def _news_result(result):
return {
'url': result['url'],
'title': result['title'],
'content': result['excerpt'],
'source': result['source'],
'publishedDate': datetime.utcfromtimestamp(result['date']),
}
def response(resp): def response(resp):
results = [] results = []
res_json = resp.json() res_json = resp.json()
for result in res_json['results']: for result in res_json['results']:
results.append( if ddg_category == 'images':
{ results.append(_image_result(result))
'template': 'images.html', elif ddg_category == 'videos':
'title': result['title'], results.append(_video_result(result))
'content': '', elif ddg_category == 'news':
'thumbnail_src': result['thumbnail'], results.append(_news_result(result))
'img_src': result['image'], else:
'url': result['url'], raise ValueError(f"Invalid duckduckgo category: {ddg_category}")
'img_format': '%s x %s' % (result['width'], result['height']),
'source': result['source'],
}
)
return results return results

View File

@ -603,9 +603,24 @@ engines:
shortcut: ddg shortcut: ddg
- name: duckduckgo images - name: duckduckgo images
engine: duckduckgo_images engine: duckduckgo_extra
categories: [images, web]
ddg_category: images
shortcut: ddi shortcut: ddi
timeout: 3.0 disabled: true
- name: duckduckgo videos
engine: duckduckgo_extra
categories: [videos, web]
ddg_category: videos
shortcut: ddv
disabled: true
- name: duckduckgo news
engine: duckduckgo_extra
categories: [news, web]
ddg_category: news
shortcut: ddn
disabled: true disabled: true
- name: duckduckgo weather - name: duckduckgo weather