[mod] Dailymotion: improved request API & upgrade to data_type: traits_v1

- fetch_traits(): fetch locales (and languages) from dailymotion API
- removed obsolete data-type "supported_languages"
- add documentation
- improved argument list of the HTTP request:
  - add argument: family_filter_map
  - add conditional argument: localization
    Don't add localization and country arguments if the user does select a
    language (:de, :en, ..)
- improve code quality (mainly improve readability)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-12-25 15:33:46 +01:00
parent 2499899554
commit 8a8c584fec
3 changed files with 129 additions and 111 deletions

View File

@ -0,0 +1,8 @@
.. _dailymotion engine:
===========
Dailymotion
===========
.. automodule:: searx.engines.dailymotion
:members:

View File

@ -366,8 +366,29 @@
"dailymotion": { "dailymotion": {
"all_locale": null, "all_locale": null,
"custom": {}, "custom": {},
"data_type": "supported_languages", "data_type": "traits_v1",
"languages": {}, "languages": {
"ar": "ar",
"de": "de",
"el": "el",
"en": "en",
"es": "es",
"fr": "fr",
"id": "id",
"it": "it",
"ja": "ja",
"ko": "ko",
"ms": "ms",
"nl": "nl",
"pl": "pl",
"pt": "pt",
"ro": "ro",
"ru": "ru",
"th": "th",
"tr": "tr",
"vi": "vi",
"zh": "zh"
},
"regions": { "regions": {
"ar-AE": "ar_AE", "ar-AE": "ar_AE",
"ar-EG": "ar_EG", "ar-EG": "ar_EG",
@ -418,58 +439,7 @@
"zh-CN": "zh_CN", "zh-CN": "zh_CN",
"zh-TW": "zh_TW" "zh-TW": "zh_TW"
}, },
"supported_languages": [ "supported_languages": {}
"ar_AA",
"ar_AE",
"ar_EG",
"ar_SA",
"de_AT",
"de_CH",
"de_DE",
"el_GR",
"en_AU",
"en_CA",
"en_EN",
"en_GB",
"en_HK",
"en_IE",
"en_IN",
"en_NG",
"en_PH",
"en_PK",
"en_SG",
"en_US",
"en_ZA",
"es_AR",
"es_ES",
"es_MX",
"fr_BE",
"fr_CA",
"fr_CH",
"fr_CI",
"fr_FR",
"fr_MA",
"fr_SN",
"fr_TN",
"id_ID",
"it_CH",
"it_IT",
"ja_JP",
"ko_KR",
"ms_MY",
"nl_BE",
"nl_NL",
"pl_PL",
"pt_BR",
"pt_PT",
"ro_RO",
"ru_RU",
"th_TH",
"tr_TR",
"vi_VN",
"zh_CN",
"zh_TW"
]
}, },
"duckduckgo": { "duckduckgo": {
"all_locale": "wt-wt", "all_locale": "wt-wt",

View File

@ -1,9 +1,18 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Dailymotion (Videos) # lint: pylint
"""
Dailymotion (Videos)
~~~~~~~~~~~~~~~~~~~~
.. _REST GET: https://developers.dailymotion.com/tools/
.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
.. _Video filters API: https://developers.dailymotion.com/api/#video-filters
.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
""" """
from typing import Set from typing import TYPE_CHECKING
from datetime import datetime, timedelta from datetime import datetime, timedelta
from urllib.parse import urlencode from urllib.parse import urlencode
import time import time
@ -12,8 +21,16 @@ import babel
from searx.exceptions import SearxEngineAPIException from searx.exceptions import SearxEngineAPIException
from searx import network from searx import network
from searx.utils import html_to_text from searx.utils import html_to_text
from searx.locales import region_tag, language_tag
from searx.enginelib.traits import EngineTraits from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about # about
about = { about = {
"website": 'https://www.dailymotion.com', "website": 'https://www.dailymotion.com',
@ -38,11 +55,24 @@ time_delta_dict = {
} }
safesearch = True safesearch = True
safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''} safesearch_params = {
2: {'is_created_for_kids': 'true'},
1: {'is_created_for_kids': 'true'},
0: {},
}
"""True if this video is "Created for Kids" / intends to target an audience
under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
"""
# search-url family_filter_map = {
# - https://developers.dailymotion.com/tools/ 2: 'true',
# - https://www.dailymotion.com/doc/api/obj-video.html 1: 'true',
0: 'false',
}
"""By default, the family filter is turned on. Setting this parameter to
``false`` will stop filtering-out explicit content from searches and global
contexts (``family_filter`` in `Global API Parameters`_ ).
"""
result_fields = [ result_fields = [
'allow_embed', 'allow_embed',
@ -54,27 +84,21 @@ result_fields = [
'thumbnail_360_url', 'thumbnail_360_url',
'id', 'id',
] ]
search_url = ( """`Fields selection`_, by default, a few fields are returned. To request more
'https://api.dailymotion.com/videos?' specific fields, the ``fields`` parameter is used with the list of fields
'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}' SearXNG needs in the response to build a video result list.
).format( """
fields=','.join(result_fields),
password_protected='false', search_url = 'https://api.dailymotion.com/videos?'
private='false', """URL to retrieve a list of videos.
sort='relevance',
limit=number_of_results, - `REST GET`_
) - `Global API Parameters`_
- `Video filters API`_
"""
iframe_src = "https://www.dailymotion.com/embed/video/{video_id}" iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
"""URL template to embed video in SearXNG's result list."""
# The request query filters by 'languages' & 'country', therefore instead of
# fetching only languages we need to fetch locales.
supported_languages_url = 'https://api.dailymotion.com/locales'
supported_languages_iso639: Set[str] = set()
def init(_engine_settings):
global supported_languages_iso639
supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages])
def request(query, params): def request(query, params):
@ -82,34 +106,42 @@ def request(query, params):
if not query: if not query:
return False return False
language = params['language'] eng_region = traits.get_region(params['searxng_locale'], 'en_US')
if language == 'all': eng_lang = traits.get_language(params['searxng_locale'], 'en')
language = 'en-US'
locale = babel.Locale.parse(language, sep='-')
language_iso639 = locale.language args = {
if locale.language not in supported_languages_iso639:
language_iso639 = 'en'
query_args = {
'search': query, 'search': query,
'languages': language_iso639, 'family_filter': family_filter_map.get(params['safesearch'], 'false'),
'thumbnail_ratio': 'original', # original|widescreen|square
# https://developers.dailymotion.com/api/#video-filters
'languages': eng_lang,
'page': params['pageno'], 'page': params['pageno'],
'password_protected': 'false',
'private': 'false',
'sort': 'relevance',
'limit': number_of_results,
'fields': ','.join(result_fields),
} }
if locale.territory: args.update(safesearch_params.get(params['safesearch'], {}))
localization = locale.language + '_' + locale.territory
if localization in supported_languages: # Don't add localization and country arguments if the user does select a
query_args['country'] = locale.territory # language (:de, :en, ..)
if len(params['searxng_locale'].split('-')) > 1:
# https://developers.dailymotion.com/api/#global-parameters
args['localization'] = eng_region
args['country'] = eng_region.split('_')[1]
# Insufficient rights for the `ams_country' parameter of route `GET /videos'
# 'ams_country': eng_region.split('_')[1],
time_delta = time_delta_dict.get(params["time_range"]) time_delta = time_delta_dict.get(params["time_range"])
if time_delta: if time_delta:
created_after = datetime.now() - time_delta created_after = datetime.now() - time_delta
query_args['created_after'] = datetime.timestamp(created_after) args['created_after'] = datetime.timestamp(created_after)
query_str = urlencode(query_args) query_str = urlencode(args)
params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '') params['url'] = search_url + query_str
params['raise_for_httperror'] = False
return params return params
@ -168,31 +200,27 @@ def response(resp):
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
response_json = resp.json()
return [item['locale'] for item in response_json['list']]
def fetch_traits(engine_traits: EngineTraits): def fetch_traits(engine_traits: EngineTraits):
"""Fetch regions from dailymotion. """Fetch locales & languages from dailymotion.
Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
There are duplications in the locale codes returned from Dailymotion which There are duplications in the locale codes returned from Dailymotion which
can be ignored:: can be ignored::
en_EN --> en_GB, en_US en_EN --> en_GB, en_US
ar_AA --> ar_EG, ar_AE, ar_SA ar_AA --> ar_EG, ar_AE, ar_SA
The language list `api/languages <https://api.dailymotion.com/languages>`_
contains over 7000 *languages* codes (see PR1071_). We use only those
language codes that are used in the locales.
.. _PR1071: https://github.com/searxng/searxng/pull/1071
""" """
# pylint: disable=import-outside-toplevel
engine_traits.data_type = 'supported_languages' # deprecated
from searx.locales import region_tag
resp = network.get('https://api.dailymotion.com/locales') resp = network.get('https://api.dailymotion.com/locales')
if not resp.ok: if not resp.ok:
print("ERROR: response from peertube is not OK.") print("ERROR: response from dailymotion/locales is not OK.")
for item in resp.json()['list']: for item in resp.json()['list']:
eng_tag = item['locale'] eng_tag = item['locale']
@ -210,3 +238,15 @@ def fetch_traits(engine_traits: EngineTraits):
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
continue continue
engine_traits.regions[sxng_tag] = eng_tag engine_traits.regions[sxng_tag] = eng_tag
locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
resp = network.get('https://api.dailymotion.com/languages')
if not resp.ok:
print("ERROR: response from dailymotion/languages is not OK.")
for item in resp.json()['list']:
eng_tag = item['code']
if eng_tag in locale_lang_list:
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
engine_traits.languages[sxng_tag] = eng_tag