forked from zaclys/searxng
		
	[mod] engine torznab - refactor & option to hide links
- torznab engine using types and clearer code - torznab option to hide torrent and magnet links. - document the torznab engine - add myself to authors Closes: https://github.com/searxng/searxng/issues/1124 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									da7c30291d
								
							
						
					
					
						commit
						401561cb58
					
				
					 4 changed files with 201 additions and 83 deletions
				
			
		| 
						 | 
				
			
			@ -168,3 +168,4 @@ features or generally made searx better:
 | 
			
		|||
- Milad Laly @Milad-Laly
 | 
			
		||||
- @llmII
 | 
			
		||||
- @blob42 `<https://blob42.xyz>`_
 | 
			
		||||
- Paolo Basso `<https://github.com/paolobasso99>`
 | 
			
		||||
							
								
								
									
										2
									
								
								docs/src/searx.engines.torznab.rst
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								docs/src/searx.engines.torznab.rst
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,2 @@
 | 
			
		|||
.. automodule:: searx.engines.torznab
 | 
			
		||||
   :members:
 | 
			
		||||
| 
						 | 
				
			
			@ -1,21 +1,83 @@
 | 
			
		|||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
# lint: pylint
 | 
			
		||||
"""Torznab WebAPI
 | 
			
		||||
""".. _torznab engine:
 | 
			
		||||
 | 
			
		||||
A engine that implements the `torznab WebAPI`_.
 | 
			
		||||
==============
 | 
			
		||||
Torznab WebAPI
 | 
			
		||||
==============
 | 
			
		||||
 | 
			
		||||
.. _torznab WebAPI: https://torznab.github.io/spec-1.3-draft/torznab
 | 
			
		||||
.. contents:: Contents
 | 
			
		||||
   :depth: 2
 | 
			
		||||
   :local:
 | 
			
		||||
   :backlinks: entry
 | 
			
		||||
 | 
			
		||||
Torznab_ is an API specification that provides a standardized way to query
 | 
			
		||||
torrent site for content. It is used by a number of torrent applications,
 | 
			
		||||
including Prowlarr_ and Jackett_.
 | 
			
		||||
 | 
			
		||||
Using this engine together with Prowlarr_ or Jackett_ allows you to search
 | 
			
		||||
a huge number of torrent sites which are not directly supported.
 | 
			
		||||
 | 
			
		||||
Configuration
 | 
			
		||||
=============
 | 
			
		||||
 | 
			
		||||
The engine has the following settings:
 | 
			
		||||
 | 
			
		||||
``base_url``:
 | 
			
		||||
  Torznab endpoint URL.
 | 
			
		||||
 | 
			
		||||
``api_key``:
 | 
			
		||||
  The API key to use for authentication.
 | 
			
		||||
 | 
			
		||||
``torznab_categories``:
 | 
			
		||||
  The categories to use for searching. This is a list of category IDs.  See
 | 
			
		||||
  Prowlarr-categories_ or Jackett-categories_ for more information.
 | 
			
		||||
 | 
			
		||||
``show_torrent_files``:
 | 
			
		||||
  Whether to show the torrent file in the search results.  Be carful as using
 | 
			
		||||
  this with Prowlarr_ or Jackett_ leaks the API key.  This should be used only
 | 
			
		||||
  if you are querying a Torznab endpoint without authentication or if the
 | 
			
		||||
  instance is private.  Be aware that private trackers may ban you if you share
 | 
			
		||||
  the torrent file.  Defaults to ``false``.
 | 
			
		||||
 | 
			
		||||
``show_magnet_links``:
 | 
			
		||||
  Whether to show the magnet link in the search results.  Be aware that private
 | 
			
		||||
  trackers may ban you if you share the magnet link.  Defaults to ``true``.
 | 
			
		||||
 | 
			
		||||
.. _Torznab:
 | 
			
		||||
   https://torznab.github.io/spec-1.3-draft/index.html
 | 
			
		||||
.. _Prowlarr:
 | 
			
		||||
   https://github.com/Prowlarr/Prowlarr
 | 
			
		||||
.. _Jackett:
 | 
			
		||||
   https://github.com/Jackett/Jackett
 | 
			
		||||
.. _Prowlarr-categories:
 | 
			
		||||
   https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories
 | 
			
		||||
.. _Jackett-categories:
 | 
			
		||||
   https://github.com/Jackett/Jackett/wiki/Jackett-Categories
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Implementations
 | 
			
		||||
===============
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
from __future__ import annotations
 | 
			
		||||
from typing import TYPE_CHECKING
 | 
			
		||||
 | 
			
		||||
from typing import List, Dict, Any
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from urllib.parse import quote
 | 
			
		||||
from lxml import etree
 | 
			
		||||
from lxml import etree  # type: ignore
 | 
			
		||||
 | 
			
		||||
from searx.exceptions import SearxEngineAPIException
 | 
			
		||||
 | 
			
		||||
# about
 | 
			
		||||
about = {
 | 
			
		||||
if TYPE_CHECKING:
 | 
			
		||||
    import httpx
 | 
			
		||||
    import logging
 | 
			
		||||
 | 
			
		||||
    logger: logging.Logger
 | 
			
		||||
 | 
			
		||||
# engine settings
 | 
			
		||||
about: Dict[str, Any] = {
 | 
			
		||||
    "website": None,
 | 
			
		||||
    "wikidata_id": None,
 | 
			
		||||
    "official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
 | 
			
		||||
| 
						 | 
				
			
			@ -23,27 +85,30 @@ about = {
 | 
			
		|||
    "require_api_key": False,
 | 
			
		||||
    "results": 'XML',
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
categories = ['files']
 | 
			
		||||
paging = False
 | 
			
		||||
time_range_support = False
 | 
			
		||||
categories: List[str] = ['files']
 | 
			
		||||
paging: bool = False
 | 
			
		||||
time_range_support: bool = False
 | 
			
		||||
 | 
			
		||||
# defined in settings.yml
 | 
			
		||||
# example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
 | 
			
		||||
base_url = ''
 | 
			
		||||
api_key = ''
 | 
			
		||||
base_url: str = ''
 | 
			
		||||
api_key: str = ''
 | 
			
		||||
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
 | 
			
		||||
torznab_categories = []
 | 
			
		||||
torznab_categories: List[str] = []
 | 
			
		||||
show_torrent_files: bool = False
 | 
			
		||||
show_magnet_links: bool = True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def init(engine_settings=None):  # pylint: disable=unused-argument
 | 
			
		||||
    """Initialize the engine."""
 | 
			
		||||
    if len(base_url) < 1:
 | 
			
		||||
        raise ValueError('missing torznab base_url')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def request(query, params):
 | 
			
		||||
def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
 | 
			
		||||
    """Build the request params."""
 | 
			
		||||
    search_url: str = base_url + '?t=search&q={search_query}'
 | 
			
		||||
 | 
			
		||||
    search_url = base_url + '?t=search&q={search_query}'
 | 
			
		||||
    if len(api_key) > 0:
 | 
			
		||||
        search_url += '&apikey={api_key}'
 | 
			
		||||
    if len(torznab_categories) > 0:
 | 
			
		||||
| 
						 | 
				
			
			@ -56,88 +121,135 @@ def request(query, params):
 | 
			
		|||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def response(resp):
 | 
			
		||||
def response(resp: httpx.Response) -> List[Dict[str, Any]]:
 | 
			
		||||
    """Parse the XML response and return a list of results."""
 | 
			
		||||
    results = []
 | 
			
		||||
 | 
			
		||||
    search_results = etree.XML(resp.content)
 | 
			
		||||
 | 
			
		||||
    # handle errors
 | 
			
		||||
    # https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
 | 
			
		||||
    # handle errors:  https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
 | 
			
		||||
    if search_results.tag == "error":
 | 
			
		||||
        raise SearxEngineAPIException(search_results.get("description"))
 | 
			
		||||
 | 
			
		||||
    for item in search_results[0].iterfind('item'):
 | 
			
		||||
        result = {'template': 'torrent.html'}
 | 
			
		||||
 | 
			
		||||
        enclosure = item.find('enclosure')
 | 
			
		||||
 | 
			
		||||
        result["filesize"] = int(enclosure.get('length'))
 | 
			
		||||
 | 
			
		||||
        link = get_property(item, 'link')
 | 
			
		||||
        guid = get_property(item, 'guid')
 | 
			
		||||
        comments = get_property(item, 'comments')
 | 
			
		||||
 | 
			
		||||
        # define url
 | 
			
		||||
        result["url"] = enclosure.get('url')
 | 
			
		||||
        if comments is not None and comments.startswith('http'):
 | 
			
		||||
            result["url"] = comments
 | 
			
		||||
        elif guid is not None and guid.startswith('http'):
 | 
			
		||||
            result["url"] = guid
 | 
			
		||||
 | 
			
		||||
        # define torrent file url
 | 
			
		||||
        result["torrentfile"] = None
 | 
			
		||||
        if enclosure.get('url').startswith("http"):
 | 
			
		||||
            result["torrentfile"] = enclosure.get('url')
 | 
			
		||||
        elif link is not None and link.startswith('http'):
 | 
			
		||||
            result["torrentfile"] = link
 | 
			
		||||
 | 
			
		||||
        # define magnet link
 | 
			
		||||
        result["magnetlink"] = get_torznab_attr(item, 'magneturl')
 | 
			
		||||
        if result["magnetlink"] is None:
 | 
			
		||||
            if enclosure.get('url').startswith("magnet"):
 | 
			
		||||
                result["magnetlink"] = enclosure.get('url')
 | 
			
		||||
            elif link is not None and link.startswith('magnet'):
 | 
			
		||||
                result["magnetlink"] = link
 | 
			
		||||
 | 
			
		||||
        result["title"] = get_property(item, 'title')
 | 
			
		||||
        result["files"] = get_property(item, 'files')
 | 
			
		||||
 | 
			
		||||
        result["publishedDate"] = None
 | 
			
		||||
        try:
 | 
			
		||||
            result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z')
 | 
			
		||||
        except (ValueError, TypeError) as e:
 | 
			
		||||
            logger.debug("ignore exception (publishedDate): %s", e)
 | 
			
		||||
 | 
			
		||||
        result["seed"] = get_torznab_attr(item, 'seeders')
 | 
			
		||||
 | 
			
		||||
        # define leech
 | 
			
		||||
        result["leech"] = get_torznab_attr(item, 'leechers')
 | 
			
		||||
        if result["leech"] is None and result["seed"] is not None:
 | 
			
		||||
            peers = get_torznab_attr(item, 'peers')
 | 
			
		||||
            if peers is not None:
 | 
			
		||||
                result["leech"] = int(peers) - int(result["seed"])
 | 
			
		||||
    channel: etree.Element = search_results[0]
 | 
			
		||||
 | 
			
		||||
    item: etree.Element
 | 
			
		||||
    for item in channel.iterfind('item'):
 | 
			
		||||
        result: Dict[str, Any] = build_result(item)
 | 
			
		||||
        results.append(result)
 | 
			
		||||
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_property(item, property_name):
 | 
			
		||||
    property_element = item.find(property_name)
 | 
			
		||||
def build_result(item: etree.Element) -> Dict[str, Any]:
 | 
			
		||||
    """Build a result from a XML item."""
 | 
			
		||||
 | 
			
		||||
    # extract attributes from XML
 | 
			
		||||
    # see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes
 | 
			
		||||
    enclosure: etree.Element | None = item.find('enclosure')
 | 
			
		||||
    enclosure_url: str | None = None
 | 
			
		||||
    if enclosure is not None:
 | 
			
		||||
        enclosure_url = enclosure.get('url')
 | 
			
		||||
 | 
			
		||||
    size = get_attribute(item, 'size')
 | 
			
		||||
    if not size and enclosure:
 | 
			
		||||
        size = enclosure.get('length')
 | 
			
		||||
    if size:
 | 
			
		||||
        size = int(size)
 | 
			
		||||
 | 
			
		||||
    guid = get_attribute(item, 'guid')
 | 
			
		||||
    comments = get_attribute(item, 'comments')
 | 
			
		||||
    pubDate = get_attribute(item, 'pubDate')
 | 
			
		||||
    seeders = get_torznab_attribute(item, 'seeders')
 | 
			
		||||
    leechers = get_torznab_attribute(item, 'leechers')
 | 
			
		||||
    peers = get_torznab_attribute(item, 'peers')
 | 
			
		||||
 | 
			
		||||
    # map attributes to searx result
 | 
			
		||||
    result: Dict[str, Any] = {
 | 
			
		||||
        'template': 'torrent.html',
 | 
			
		||||
        'title': get_attribute(item, 'title'),
 | 
			
		||||
        'filesize': size,
 | 
			
		||||
        'files': get_attribute(item, 'files'),
 | 
			
		||||
        'seed': seeders,
 | 
			
		||||
        'leech': _map_leechers(leechers, seeders, peers),
 | 
			
		||||
        'url': _map_result_url(guid, comments),
 | 
			
		||||
        'publishedDate': _map_published_date(pubDate),
 | 
			
		||||
        'torrentfile': None,
 | 
			
		||||
        'magnetlink': None,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    link = get_attribute(item, 'link')
 | 
			
		||||
    if show_torrent_files:
 | 
			
		||||
        result['torrentfile'] = _map_torrent_file(link, enclosure_url)
 | 
			
		||||
    if show_magnet_links:
 | 
			
		||||
        magneturl = get_torznab_attribute(item, 'magneturl')
 | 
			
		||||
        result['magnetlink'] = _map_magnet_link(magneturl, guid, enclosure_url, link)
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _map_result_url(guid: str | None, comments: str | None) -> str | None:
 | 
			
		||||
    if guid and guid.startswith('http'):
 | 
			
		||||
        return guid
 | 
			
		||||
    if comments and comments.startswith('http'):
 | 
			
		||||
        return comments
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _map_leechers(leechers: str | None, seeders: str | None, peers: str | None) -> str | None:
 | 
			
		||||
    if leechers:
 | 
			
		||||
        return leechers
 | 
			
		||||
    if seeders and peers:
 | 
			
		||||
        return str(int(peers) - int(seeders))
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _map_published_date(pubDate: str | None) -> datetime | None:
 | 
			
		||||
    if pubDate is not None:
 | 
			
		||||
        try:
 | 
			
		||||
            return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %z')
 | 
			
		||||
        except (ValueError, TypeError) as e:
 | 
			
		||||
            logger.debug("ignore exception (publishedDate): %s", e)
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _map_torrent_file(link: str | None, enclosure_url: str | None) -> str | None:
 | 
			
		||||
    if link and link.startswith('http'):
 | 
			
		||||
        return link
 | 
			
		||||
    if enclosure_url and enclosure_url.startswith('http'):
 | 
			
		||||
        return enclosure_url
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _map_magnet_link(
 | 
			
		||||
    magneturl: str | None,
 | 
			
		||||
    guid: str | None,
 | 
			
		||||
    enclosure_url: str | None,
 | 
			
		||||
    link: str | None,
 | 
			
		||||
) -> str | None:
 | 
			
		||||
    if magneturl and magneturl.startswith('magnet'):
 | 
			
		||||
        return magneturl
 | 
			
		||||
    if guid and guid.startswith('magnet'):
 | 
			
		||||
        return guid
 | 
			
		||||
    if enclosure_url and enclosure_url.startswith('magnet'):
 | 
			
		||||
        return enclosure_url
 | 
			
		||||
    if link and link.startswith('magnet'):
 | 
			
		||||
        return link
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_attribute(item: etree.Element, property_name: str) -> str | None:
 | 
			
		||||
    """Get attribute from item."""
 | 
			
		||||
    property_element: etree.Element | None = item.find(property_name)
 | 
			
		||||
    if property_element is not None:
 | 
			
		||||
        return property_element.text
 | 
			
		||||
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_torznab_attr(item, attr_name):
 | 
			
		||||
    element = item.find(
 | 
			
		||||
        './/torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name),
 | 
			
		||||
def get_torznab_attribute(item: etree.Element, attribute_name: str) -> str | None:
 | 
			
		||||
    """Get torznab special attribute from item."""
 | 
			
		||||
    element: etree.Element | None = item.find(
 | 
			
		||||
        './/torznab:attr[@name="{attribute_name}"]'.format(attribute_name=attribute_name),
 | 
			
		||||
        {'torznab': 'http://torznab.com/schemas/2015/feed'},
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    if element is not None:
 | 
			
		||||
        return element.get("value")
 | 
			
		||||
 | 
			
		||||
    return None
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1392,15 +1392,18 @@ engines:
 | 
			
		|||
    shortcut: tch
 | 
			
		||||
 | 
			
		||||
  # torznab engine lets you query any torznab compatible indexer.  Using this
 | 
			
		||||
  # engine in combination with Jackett (https://github.com/Jackett/Jackett)
 | 
			
		||||
  # opens the possibility to query a lot of public and private indexers directly
 | 
			
		||||
  # from SearXNG.
 | 
			
		||||
  # - name: torznab
 | 
			
		||||
  # engine in combination with Jackett opens the possibility to query a lot of
 | 
			
		||||
  # public and private indexers directly from SearXNG. More details at:
 | 
			
		||||
  # https://docs.searxng.org/src/searx.engines.torznab.html
 | 
			
		||||
  #
 | 
			
		||||
  # - name: Torznab EZTV
 | 
			
		||||
  #   engine: torznab
 | 
			
		||||
  #   shortcut: trz
 | 
			
		||||
  #   base_url: http://localhost:9117/api/v2.0/indexers/all/results/torznab
 | 
			
		||||
  #   shortcut: eztv
 | 
			
		||||
  #   base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab
 | 
			
		||||
  #   enable_http: true  # if using localhost
 | 
			
		||||
  #   api_key: xxxxxxxxxxxxxxx
 | 
			
		||||
  #   show_magnet_links: true
 | 
			
		||||
  #   show_torrent_files: false
 | 
			
		||||
  #   # https://github.com/Jackett/Jackett/wiki/Jackett-Categories
 | 
			
		||||
  #   torznab_categories:  # optional
 | 
			
		||||
  #     - 2000
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue