[mod] Peertube: re-engineered & upgrade to data_type: traits_v1

- fetch_traits(): Fetch languages from peertube's search-index source code.

  [mod] Include migration of the request methode from 'supported_languages'
        to 'traits' (EngineTraits) object.
  [fix] old supported_languages_url is no longer valid since the sources
        has been moved to a different path.

- fixed code to pass pylint
- request(): complete re-implementation based on the API docs [1]
- response(): complete re-implementation, adds serveral fields missed before
- add source code documentation

[1] https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-10-02 23:52:11 +02:00
parent 6e5f22e558
commit a7fe22770a
4 changed files with 197 additions and 77 deletions

View File

@ -0,0 +1,19 @@
.. _peertube engines:
================
Peertube Engines
================
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
.. _peertube video engine:
Peertube Video
==============
.. automodule:: searx.engines.peertube
:members:

View File

@ -1468,31 +1468,32 @@
"peertube": { "peertube": {
"all_locale": null, "all_locale": null,
"custom": {}, "custom": {},
"data_type": "supported_languages", "data_type": "traits_v1",
"languages": {}, "languages": {
"ca": "ca",
"cs": "cs",
"de": "de",
"el": "el",
"en": "en",
"eo": "eo",
"es": "es",
"eu": "eu",
"fi": "fi",
"fr": "fr",
"gd": "gd",
"it": "it",
"ja": "ja",
"nl": "nl",
"pl": "pl",
"pt": "pt",
"ru": "ru",
"sv": "sv",
"zh": "zh",
"zh_Hans": "zh",
"zh_Hant": "zh"
},
"regions": {}, "regions": {},
"supported_languages": [ "supported_languages": {}
"ca",
"cs",
"de",
"el",
"en",
"eo",
"es",
"eu",
"fi",
"fr",
"gd",
"it",
"ja",
"nl",
"oc",
"pl",
"pt",
"ru",
"sv",
"zh"
]
}, },
"qwant": { "qwant": {
"all_locale": null, "all_locale": null,

View File

@ -1,18 +1,30 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
""" # lint: pylint
peertube (Videos) """Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
(more or less) the same REST API and the schema of the JSON result is identical.
""" """
from json import loads import re
from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import html_to_text from datetime import datetime
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
import babel
from searx import network
from searx.locales import language_tag
from searx.utils import html_to_text
from searx.enginelib.traits import EngineTraits
traits: EngineTraits
# about
about = { about = {
# pylint: disable=line-too-long
"website": 'https://joinpeertube.org', "website": 'https://joinpeertube.org',
"wikidata_id": 'Q50938515', "wikidata_id": 'Q50938515',
"official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html', "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
"use_official_api": True, "use_official_api": True,
"require_api_key": False, "require_api_key": False,
"results": 'JSON', "results": 'JSON',
@ -22,66 +34,155 @@ about = {
categories = ["videos"] categories = ["videos"]
paging = True paging = True
base_url = "https://peer.tube" base_url = "https://peer.tube"
supported_languages_url = 'https://peer.tube/api/v1/videos/languages' """Base URL of the Peertube instance. A list of instances is available at:
- https://instances.joinpeertube.org/instances
"""
time_range_support = True
time_range_table = {
'day': relativedelta(),
'week': relativedelta(weeks=-1),
'month': relativedelta(months=-1),
'year': relativedelta(years=-1),
}
safesearch = True
safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
def minute_to_hm(minute):
if isinstance(minute, int):
return "%d:%02d" % (divmod(minute, 60))
return None
# do search-request
def request(query, params): def request(query, params):
sanitized_url = base_url.rstrip("/") """Assemble request for the Peertube API"""
pageno = (params["pageno"] - 1) * 15
search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}" if not query:
query_dict = {"search": query} return False
language = params["language"].split("-")[0]
if "all" != language and language in supported_languages: # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
query_dict["languageOneOf"] = language eng_lang = traits.get_language(params['searxng_locale'], None)
params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
params['url'] = (
base_url.rstrip("/")
+ "/api/v1/search/videos?"
+ urlencode(
{
'search': query,
'searchTarget': 'search-index', # Vidiversum
'resultType': 'videos',
'start': (params['pageno'] - 1) * 10,
'count': 10,
# -createdAt: sort by date ascending / createdAt: date descending
'sort': '-match', # sort by *match descending*
'nsfw': safesearch_table[params['safesearch']],
}
)
)
if eng_lang is not None:
params['url'] += '&languageOneOf[]=' + eng_lang
params['url'] += '&boostLanguages[]=' + eng_lang
if params['time_range'] in time_range_table:
time = datetime.now().date() + time_range_table[params['time_range']]
params['url'] += '&startDate=' + time.isoformat()
return params return params
def _get_offset_from_pageno(pageno):
return (pageno - 1) * 15 + 1
# get response from search-request
def response(resp): def response(resp):
sanitized_url = base_url.rstrip("/") return video_response(resp)
def video_response(resp):
"""Parse video response from SepiaSearch and Peertube instances."""
results = [] results = []
search_res = loads(resp.text) json_data = resp.json()
# return empty array if there are no results if 'data' not in json_data:
if "data" not in search_res:
return [] return []
# parse results for result in json_data['data']:
for res in search_res["data"]: metadata = [
title = res["name"] x
url = sanitized_url + "/videos/watch/" + res["uuid"] for x in [
description = res["description"] result.get('channel', {}).get('displayName'),
if description: result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
content = html_to_text(res["description"]) ', '.join(result.get('tags', [])),
else: ]
content = "" if x
thumbnail = sanitized_url + res["thumbnailPath"] ]
publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
results.append( results.append(
{ {
"template": "videos.html", 'url': result['url'],
"url": url, 'title': result['name'],
"title": title, 'content': html_to_text(result.get('description') or ''),
"content": content, 'author': result.get('account', {}).get('displayName'),
"publishedDate": publishedDate, 'length': minute_to_hm(result.get('duration')),
"iframe_src": sanitized_url + res["embedPath"], 'template': 'videos.html',
"thumbnail": thumbnail, 'publishedDate': parse(result['publishedAt']),
'iframe_src': result.get('embedUrl'),
'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
'metadata': ' | '.join(metadata),
} }
) )
# return results
return results return results
def _fetch_supported_languages(resp): def fetch_traits(engine_traits: EngineTraits):
videolanguages = resp.json() """Fetch languages from peertube's search-index source code.
peertube_languages = list(videolanguages.keys())
return peertube_languages See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
.. _8ed5c729 - Refactor and redesign client:
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
.. _videoLanguages:
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
"""
resp = network.get(
'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
# the response from search-index repository is very slow
timeout=60,
)
if not resp.ok:
print("ERROR: response from peertube is not OK.")
return
js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
if not js_lang:
print("ERROR: can't determine languages from peertube")
return
for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
try:
eng_tag = lang.group(1)
if eng_tag == 'oc':
# Occitanis not known by babel, its closest relative is Catalan
# but 'ca' is already in the list of engine_traits.languages -->
# 'oc' will be ignored.
continue
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
except babel.UnknownLocaleError:
print("ERROR: %s is unknown by babel" % eng_tag)
continue
conflict = engine_traits.languages.get(sxng_tag)
if conflict:
if conflict != eng_tag:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
continue
engine_traits.languages[sxng_tag] = eng_tag
engine_traits.languages['zh_Hans'] = 'zh'
engine_traits.languages['zh_Hant'] = 'zh'

View File

@ -1758,9 +1758,8 @@ engines:
engine: peertube engine: peertube
shortcut: ptb shortcut: ptb
paging: true paging: true
# https://instances.joinpeertube.org/instances # alternatives see: https://instances.joinpeertube.org/instances
base_url: https://peertube.biz/ # base_url: https://tube.4aem.com
# base_url: https://tube.tardis.world/
categories: videos categories: videos
disabled: true disabled: true
timeout: 6.0 timeout: 6.0