forked from zaclys/searxng
		
	[mod] Peertube: re-engineered & upgrade to data_type: traits_v1
- fetch_traits(): Fetch languages from peertube's search-index source code.
  [mod] Include migration of the request methode from 'supported_languages'
        to 'traits' (EngineTraits) object.
  [fix] old supported_languages_url is no longer valid since the sources
        has been moved to a different path.
- fixed code to pass pylint
- request(): complete re-implementation based on the API docs [1]
- response(): complete re-implementation, adds serveral fields missed before
- add source code documentation
[1] https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									6e5f22e558
								
							
						
					
					
						commit
						a7fe22770a
					
				
					 4 changed files with 197 additions and 77 deletions
				
			
		
							
								
								
									
										19
									
								
								docs/src/searx.engines.peertube.rst
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								docs/src/searx.engines.peertube.rst
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,19 @@
 | 
			
		|||
.. _peertube engines:
 | 
			
		||||
 | 
			
		||||
================
 | 
			
		||||
Peertube Engines
 | 
			
		||||
================
 | 
			
		||||
 | 
			
		||||
.. contents:: Contents
 | 
			
		||||
   :depth: 2
 | 
			
		||||
   :local:
 | 
			
		||||
   :backlinks: entry
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _peertube video engine:
 | 
			
		||||
 | 
			
		||||
Peertube Video
 | 
			
		||||
==============
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.engines.peertube
 | 
			
		||||
  :members:
 | 
			
		||||
| 
						 | 
				
			
			@ -1468,31 +1468,32 @@
 | 
			
		|||
  "peertube": {
 | 
			
		||||
    "all_locale": null,
 | 
			
		||||
    "custom": {},
 | 
			
		||||
    "data_type": "supported_languages",
 | 
			
		||||
    "languages": {},
 | 
			
		||||
    "data_type": "traits_v1",
 | 
			
		||||
    "languages": {
 | 
			
		||||
      "ca": "ca",
 | 
			
		||||
      "cs": "cs",
 | 
			
		||||
      "de": "de",
 | 
			
		||||
      "el": "el",
 | 
			
		||||
      "en": "en",
 | 
			
		||||
      "eo": "eo",
 | 
			
		||||
      "es": "es",
 | 
			
		||||
      "eu": "eu",
 | 
			
		||||
      "fi": "fi",
 | 
			
		||||
      "fr": "fr",
 | 
			
		||||
      "gd": "gd",
 | 
			
		||||
      "it": "it",
 | 
			
		||||
      "ja": "ja",
 | 
			
		||||
      "nl": "nl",
 | 
			
		||||
      "pl": "pl",
 | 
			
		||||
      "pt": "pt",
 | 
			
		||||
      "ru": "ru",
 | 
			
		||||
      "sv": "sv",
 | 
			
		||||
      "zh": "zh",
 | 
			
		||||
      "zh_Hans": "zh",
 | 
			
		||||
      "zh_Hant": "zh"
 | 
			
		||||
    },
 | 
			
		||||
    "regions": {},
 | 
			
		||||
    "supported_languages": [
 | 
			
		||||
      "ca",
 | 
			
		||||
      "cs",
 | 
			
		||||
      "de",
 | 
			
		||||
      "el",
 | 
			
		||||
      "en",
 | 
			
		||||
      "eo",
 | 
			
		||||
      "es",
 | 
			
		||||
      "eu",
 | 
			
		||||
      "fi",
 | 
			
		||||
      "fr",
 | 
			
		||||
      "gd",
 | 
			
		||||
      "it",
 | 
			
		||||
      "ja",
 | 
			
		||||
      "nl",
 | 
			
		||||
      "oc",
 | 
			
		||||
      "pl",
 | 
			
		||||
      "pt",
 | 
			
		||||
      "ru",
 | 
			
		||||
      "sv",
 | 
			
		||||
      "zh"
 | 
			
		||||
    ]
 | 
			
		||||
    "supported_languages": {}
 | 
			
		||||
  },
 | 
			
		||||
  "qwant": {
 | 
			
		||||
    "all_locale": null,
 | 
			
		||||
| 
						 | 
				
			
			@ -4531,4 +4532,4 @@
 | 
			
		|||
      "zh_cht"
 | 
			
		||||
    ]
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,18 +1,30 @@
 | 
			
		|||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
"""
 | 
			
		||||
 peertube (Videos)
 | 
			
		||||
# lint: pylint
 | 
			
		||||
"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
 | 
			
		||||
(more or less) the same REST API and the schema of the JSON result is identical.
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from json import loads
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
import re
 | 
			
		||||
from urllib.parse import urlencode
 | 
			
		||||
from searx.utils import html_to_text
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from dateutil.parser import parse
 | 
			
		||||
from dateutil.relativedelta import relativedelta
 | 
			
		||||
 | 
			
		||||
import babel
 | 
			
		||||
 | 
			
		||||
from searx import network
 | 
			
		||||
from searx.locales import language_tag
 | 
			
		||||
from searx.utils import html_to_text
 | 
			
		||||
from searx.enginelib.traits import EngineTraits
 | 
			
		||||
 | 
			
		||||
traits: EngineTraits
 | 
			
		||||
 | 
			
		||||
# about
 | 
			
		||||
about = {
 | 
			
		||||
    # pylint: disable=line-too-long
 | 
			
		||||
    "website": 'https://joinpeertube.org',
 | 
			
		||||
    "wikidata_id": 'Q50938515',
 | 
			
		||||
    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
 | 
			
		||||
    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
 | 
			
		||||
    "use_official_api": True,
 | 
			
		||||
    "require_api_key": False,
 | 
			
		||||
    "results": 'JSON',
 | 
			
		||||
| 
						 | 
				
			
			@ -22,66 +34,155 @@ about = {
 | 
			
		|||
categories = ["videos"]
 | 
			
		||||
paging = True
 | 
			
		||||
base_url = "https://peer.tube"
 | 
			
		||||
supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
 | 
			
		||||
"""Base URL of the Peertube instance.  A list of instances is available at:
 | 
			
		||||
 | 
			
		||||
- https://instances.joinpeertube.org/instances
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
time_range_support = True
 | 
			
		||||
time_range_table = {
 | 
			
		||||
    'day': relativedelta(),
 | 
			
		||||
    'week': relativedelta(weeks=-1),
 | 
			
		||||
    'month': relativedelta(months=-1),
 | 
			
		||||
    'year': relativedelta(years=-1),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
safesearch = True
 | 
			
		||||
safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def minute_to_hm(minute):
 | 
			
		||||
    if isinstance(minute, int):
 | 
			
		||||
        return "%d:%02d" % (divmod(minute, 60))
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# do search-request
 | 
			
		||||
def request(query, params):
 | 
			
		||||
    sanitized_url = base_url.rstrip("/")
 | 
			
		||||
    pageno = (params["pageno"] - 1) * 15
 | 
			
		||||
    search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}"
 | 
			
		||||
    query_dict = {"search": query}
 | 
			
		||||
    language = params["language"].split("-")[0]
 | 
			
		||||
    if "all" != language and language in supported_languages:
 | 
			
		||||
        query_dict["languageOneOf"] = language
 | 
			
		||||
    params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
 | 
			
		||||
    """Assemble request for the Peertube API"""
 | 
			
		||||
 | 
			
		||||
    if not query:
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
 | 
			
		||||
    eng_lang = traits.get_language(params['searxng_locale'], None)
 | 
			
		||||
 | 
			
		||||
    params['url'] = (
 | 
			
		||||
        base_url.rstrip("/")
 | 
			
		||||
        + "/api/v1/search/videos?"
 | 
			
		||||
        + urlencode(
 | 
			
		||||
            {
 | 
			
		||||
                'search': query,
 | 
			
		||||
                'searchTarget': 'search-index',  # Vidiversum
 | 
			
		||||
                'resultType': 'videos',
 | 
			
		||||
                'start': (params['pageno'] - 1) * 10,
 | 
			
		||||
                'count': 10,
 | 
			
		||||
                # -createdAt: sort by date ascending / createdAt: date descending
 | 
			
		||||
                'sort': '-match',  # sort by *match descending*
 | 
			
		||||
                'nsfw': safesearch_table[params['safesearch']],
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    if eng_lang is not None:
 | 
			
		||||
        params['url'] += '&languageOneOf[]=' + eng_lang
 | 
			
		||||
        params['url'] += '&boostLanguages[]=' + eng_lang
 | 
			
		||||
 | 
			
		||||
    if params['time_range'] in time_range_table:
 | 
			
		||||
        time = datetime.now().date() + time_range_table[params['time_range']]
 | 
			
		||||
        params['url'] += '&startDate=' + time.isoformat()
 | 
			
		||||
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _get_offset_from_pageno(pageno):
 | 
			
		||||
    return (pageno - 1) * 15 + 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# get response from search-request
 | 
			
		||||
def response(resp):
 | 
			
		||||
    sanitized_url = base_url.rstrip("/")
 | 
			
		||||
    return video_response(resp)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def video_response(resp):
 | 
			
		||||
    """Parse video response from SepiaSearch and Peertube instances."""
 | 
			
		||||
    results = []
 | 
			
		||||
 | 
			
		||||
    search_res = loads(resp.text)
 | 
			
		||||
    json_data = resp.json()
 | 
			
		||||
 | 
			
		||||
    # return empty array if there are no results
 | 
			
		||||
    if "data" not in search_res:
 | 
			
		||||
    if 'data' not in json_data:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    # parse results
 | 
			
		||||
    for res in search_res["data"]:
 | 
			
		||||
        title = res["name"]
 | 
			
		||||
        url = sanitized_url + "/videos/watch/" + res["uuid"]
 | 
			
		||||
        description = res["description"]
 | 
			
		||||
        if description:
 | 
			
		||||
            content = html_to_text(res["description"])
 | 
			
		||||
        else:
 | 
			
		||||
            content = ""
 | 
			
		||||
        thumbnail = sanitized_url + res["thumbnailPath"]
 | 
			
		||||
        publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
 | 
			
		||||
    for result in json_data['data']:
 | 
			
		||||
        metadata = [
 | 
			
		||||
            x
 | 
			
		||||
            for x in [
 | 
			
		||||
                result.get('channel', {}).get('displayName'),
 | 
			
		||||
                result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
 | 
			
		||||
                ', '.join(result.get('tags', [])),
 | 
			
		||||
            ]
 | 
			
		||||
            if x
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        results.append(
 | 
			
		||||
            {
 | 
			
		||||
                "template": "videos.html",
 | 
			
		||||
                "url": url,
 | 
			
		||||
                "title": title,
 | 
			
		||||
                "content": content,
 | 
			
		||||
                "publishedDate": publishedDate,
 | 
			
		||||
                "iframe_src": sanitized_url + res["embedPath"],
 | 
			
		||||
                "thumbnail": thumbnail,
 | 
			
		||||
                'url': result['url'],
 | 
			
		||||
                'title': result['name'],
 | 
			
		||||
                'content': html_to_text(result.get('description') or ''),
 | 
			
		||||
                'author': result.get('account', {}).get('displayName'),
 | 
			
		||||
                'length': minute_to_hm(result.get('duration')),
 | 
			
		||||
                'template': 'videos.html',
 | 
			
		||||
                'publishedDate': parse(result['publishedAt']),
 | 
			
		||||
                'iframe_src': result.get('embedUrl'),
 | 
			
		||||
                'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
 | 
			
		||||
                'metadata': ' | '.join(metadata),
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    # return results
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _fetch_supported_languages(resp):
 | 
			
		||||
    videolanguages = resp.json()
 | 
			
		||||
    peertube_languages = list(videolanguages.keys())
 | 
			
		||||
    return peertube_languages
 | 
			
		||||
def fetch_traits(engine_traits: EngineTraits):
 | 
			
		||||
    """Fetch languages from peertube's search-index source code.
 | 
			
		||||
 | 
			
		||||
    See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
 | 
			
		||||
 | 
			
		||||
    .. _8ed5c729 - Refactor and redesign client:
 | 
			
		||||
       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
 | 
			
		||||
    .. _videoLanguages:
 | 
			
		||||
       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    resp = network.get(
 | 
			
		||||
        'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
 | 
			
		||||
        # the response from search-index repository is very slow
 | 
			
		||||
        timeout=60,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    if not resp.ok:
 | 
			
		||||
        print("ERROR: response from peertube is not OK.")
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
 | 
			
		||||
    if not js_lang:
 | 
			
		||||
        print("ERROR: can't determine languages from peertube")
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
 | 
			
		||||
        try:
 | 
			
		||||
            eng_tag = lang.group(1)
 | 
			
		||||
            if eng_tag == 'oc':
 | 
			
		||||
                # Occitanis not known by babel, its closest relative is Catalan
 | 
			
		||||
                # but 'ca' is already in the list of engine_traits.languages -->
 | 
			
		||||
                # 'oc' will be ignored.
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
 | 
			
		||||
 | 
			
		||||
        except babel.UnknownLocaleError:
 | 
			
		||||
            print("ERROR: %s is unknown by babel" % eng_tag)
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        conflict = engine_traits.languages.get(sxng_tag)
 | 
			
		||||
        if conflict:
 | 
			
		||||
            if conflict != eng_tag:
 | 
			
		||||
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
 | 
			
		||||
            continue
 | 
			
		||||
        engine_traits.languages[sxng_tag] = eng_tag
 | 
			
		||||
 | 
			
		||||
    engine_traits.languages['zh_Hans'] = 'zh'
 | 
			
		||||
    engine_traits.languages['zh_Hant'] = 'zh'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1758,9 +1758,8 @@ engines:
 | 
			
		|||
    engine: peertube
 | 
			
		||||
    shortcut: ptb
 | 
			
		||||
    paging: true
 | 
			
		||||
    # https://instances.joinpeertube.org/instances
 | 
			
		||||
    base_url: https://peertube.biz/
 | 
			
		||||
    # base_url: https://tube.tardis.world/
 | 
			
		||||
    # alternatives see: https://instances.joinpeertube.org/instances
 | 
			
		||||
    # base_url: https://tube.4aem.com
 | 
			
		||||
    categories: videos
 | 
			
		||||
    disabled: true
 | 
			
		||||
    timeout: 6.0
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue