From 8a8c584fec2b8404b294ca5012d0ced5bc26d986 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 25 Dec 2022 15:33:46 +0100 Subject: [PATCH] [mod] Dailymotion: improved request API & upgrade to data_type: traits_v1 - fetch_traits(): fetch locales (and languages) from dailymotion API - removed obsolete data-type "supported_languages" - add documentation - improved argument list of the HTTP request: - add argument: family_filter_map - add conditional argument: localization Don't add localization and country arguments if the user does select a language (:de, :en, ..) - improve code quality (mainly improve readability) Signed-off-by: Markus Heiser --- docs/src/searx.engine.dailymotion.rst | 8 ++ searx/data/engine_traits.json | 78 ++++--------- searx/engines/dailymotion.py | 154 ++++++++++++++++---------- 3 files changed, 129 insertions(+), 111 deletions(-) create mode 100644 docs/src/searx.engine.dailymotion.rst diff --git a/docs/src/searx.engine.dailymotion.rst b/docs/src/searx.engine.dailymotion.rst new file mode 100644 index 000000000..84348e2d0 --- /dev/null +++ b/docs/src/searx.engine.dailymotion.rst @@ -0,0 +1,8 @@ +.. _dailymotion engine: + +=========== +Dailymotion +=========== + +.. automodule:: searx.engines.dailymotion + :members: diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index ec82c9690..e1c1a5a46 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -366,8 +366,29 @@ "dailymotion": { "all_locale": null, "custom": {}, - "data_type": "supported_languages", - "languages": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "fr": "fr", + "id": "id", + "it": "it", + "ja": "ja", + "ko": "ko", + "ms": "ms", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ro": "ro", + "ru": "ru", + "th": "th", + "tr": "tr", + "vi": "vi", + "zh": "zh" + }, "regions": { "ar-AE": "ar_AE", "ar-EG": "ar_EG", @@ -418,58 +439,7 @@ "zh-CN": "zh_CN", "zh-TW": "zh_TW" }, - "supported_languages": [ - "ar_AA", - "ar_AE", - "ar_EG", - "ar_SA", - "de_AT", - "de_CH", - "de_DE", - "el_GR", - "en_AU", - "en_CA", - "en_EN", - "en_GB", - "en_HK", - "en_IE", - "en_IN", - "en_NG", - "en_PH", - "en_PK", - "en_SG", - "en_US", - "en_ZA", - "es_AR", - "es_ES", - "es_MX", - "fr_BE", - "fr_CA", - "fr_CH", - "fr_CI", - "fr_FR", - "fr_MA", - "fr_SN", - "fr_TN", - "id_ID", - "it_CH", - "it_IT", - "ja_JP", - "ko_KR", - "ms_MY", - "nl_BE", - "nl_NL", - "pl_PL", - "pt_BR", - "pt_PT", - "ro_RO", - "ru_RU", - "th_TH", - "tr_TR", - "vi_VN", - "zh_CN", - "zh_TW" - ] + "supported_languages": {} }, "duckduckgo": { "all_locale": "wt-wt", diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 1da3f4e0e..d734ec3c8 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -1,9 +1,18 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Dailymotion (Videos) +# lint: pylint +""" +Dailymotion (Videos) +~~~~~~~~~~~~~~~~~~~~ + +.. _REST GET: https://developers.dailymotion.com/tools/ +.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters +.. _Video filters API: https://developers.dailymotion.com/api/#video-filters +.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection """ -from typing import Set +from typing import TYPE_CHECKING + from datetime import datetime, timedelta from urllib.parse import urlencode import time @@ -12,8 +21,16 @@ import babel from searx.exceptions import SearxEngineAPIException from searx import network from searx.utils import html_to_text +from searx.locales import region_tag, language_tag from searx.enginelib.traits import EngineTraits +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + # about about = { "website": 'https://www.dailymotion.com', @@ -38,11 +55,24 @@ time_delta_dict = { } safesearch = True -safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''} +safesearch_params = { + 2: {'is_created_for_kids': 'true'}, + 1: {'is_created_for_kids': 'true'}, + 0: {}, +} +"""True if this video is "Created for Kids" / intends to target an audience +under the age of 16 (``is_created_for_kids`` in `Video filters API`_ ) +""" -# search-url -# - https://developers.dailymotion.com/tools/ -# - https://www.dailymotion.com/doc/api/obj-video.html +family_filter_map = { + 2: 'true', + 1: 'true', + 0: 'false', +} +"""By default, the family filter is turned on. Setting this parameter to +``false`` will stop filtering-out explicit content from searches and global +contexts (``family_filter`` in `Global API Parameters`_ ). +""" result_fields = [ 'allow_embed', @@ -54,27 +84,21 @@ result_fields = [ 'thumbnail_360_url', 'id', ] -search_url = ( - 'https://api.dailymotion.com/videos?' - 'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}' -).format( - fields=','.join(result_fields), - password_protected='false', - private='false', - sort='relevance', - limit=number_of_results, -) +"""`Fields selection`_, by default, a few fields are returned. To request more +specific fields, the ``fields`` parameter is used with the list of fields +SearXNG needs in the response to build a video result list. +""" + +search_url = 'https://api.dailymotion.com/videos?' +"""URL to retrieve a list of videos. + +- `REST GET`_ +- `Global API Parameters`_ +- `Video filters API`_ +""" + iframe_src = "https://www.dailymotion.com/embed/video/{video_id}" - -# The request query filters by 'languages' & 'country', therefore instead of -# fetching only languages we need to fetch locales. -supported_languages_url = 'https://api.dailymotion.com/locales' -supported_languages_iso639: Set[str] = set() - - -def init(_engine_settings): - global supported_languages_iso639 - supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages]) +"""URL template to embed video in SearXNG's result list.""" def request(query, params): @@ -82,34 +106,42 @@ def request(query, params): if not query: return False - language = params['language'] - if language == 'all': - language = 'en-US' - locale = babel.Locale.parse(language, sep='-') + eng_region = traits.get_region(params['searxng_locale'], 'en_US') + eng_lang = traits.get_language(params['searxng_locale'], 'en') - language_iso639 = locale.language - if locale.language not in supported_languages_iso639: - language_iso639 = 'en' - - query_args = { + args = { 'search': query, - 'languages': language_iso639, + 'family_filter': family_filter_map.get(params['safesearch'], 'false'), + 'thumbnail_ratio': 'original', # original|widescreen|square + # https://developers.dailymotion.com/api/#video-filters + 'languages': eng_lang, 'page': params['pageno'], + 'password_protected': 'false', + 'private': 'false', + 'sort': 'relevance', + 'limit': number_of_results, + 'fields': ','.join(result_fields), } - if locale.territory: - localization = locale.language + '_' + locale.territory - if localization in supported_languages: - query_args['country'] = locale.territory + args.update(safesearch_params.get(params['safesearch'], {})) + + # Don't add localization and country arguments if the user does select a + # language (:de, :en, ..) + + if len(params['searxng_locale'].split('-')) > 1: + # https://developers.dailymotion.com/api/#global-parameters + args['localization'] = eng_region + args['country'] = eng_region.split('_')[1] + # Insufficient rights for the `ams_country' parameter of route `GET /videos' + # 'ams_country': eng_region.split('_')[1], time_delta = time_delta_dict.get(params["time_range"]) if time_delta: created_after = datetime.now() - time_delta - query_args['created_after'] = datetime.timestamp(created_after) + args['created_after'] = datetime.timestamp(created_after) - query_str = urlencode(query_args) - params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '') - params['raise_for_httperror'] = False + query_str = urlencode(args) + params['url'] = search_url + query_str return params @@ -168,31 +200,27 @@ def response(resp): return results -# get supported languages from their site -def _fetch_supported_languages(resp): - response_json = resp.json() - return [item['locale'] for item in response_json['list']] - - def fetch_traits(engine_traits: EngineTraits): - """Fetch regions from dailymotion. + """Fetch locales & languages from dailymotion. + Locales fetched from `api/locales `_. There are duplications in the locale codes returned from Dailymotion which can be ignored:: en_EN --> en_GB, en_US ar_AA --> ar_EG, ar_AE, ar_SA + The language list `api/languages `_ + contains over 7000 *languages* codes (see PR1071_). We use only those + language codes that are used in the locales. + + .. _PR1071: https://github.com/searxng/searxng/pull/1071 + """ - # pylint: disable=import-outside-toplevel - - engine_traits.data_type = 'supported_languages' # deprecated - - from searx.locales import region_tag resp = network.get('https://api.dailymotion.com/locales') if not resp.ok: - print("ERROR: response from peertube is not OK.") + print("ERROR: response from dailymotion/locales is not OK.") for item in resp.json()['list']: eng_tag = item['locale'] @@ -210,3 +238,15 @@ def fetch_traits(engine_traits: EngineTraits): print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) continue engine_traits.regions[sxng_tag] = eng_tag + + locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()] + + resp = network.get('https://api.dailymotion.com/languages') + if not resp.ok: + print("ERROR: response from dailymotion/languages is not OK.") + + for item in resp.json()['list']: + eng_tag = item['code'] + if eng_tag in locale_lang_list: + sxng_tag = language_tag(babel.Locale.parse(eng_tag)) + engine_traits.languages[sxng_tag] = eng_tag