mirror of https://github.com/searxng/searxng.git
[mod] engine torznab - refactor & option to hide links
- torznab engine using types and clearer code - torznab option to hide torrent and magnet links. - document the torznab engine - add myself to authors Closes: https://github.com/searxng/searxng/issues/1124 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
da7c30291d
commit
401561cb58
|
@ -168,3 +168,4 @@ features or generally made searx better:
|
||||||
- Milad Laly @Milad-Laly
|
- Milad Laly @Milad-Laly
|
||||||
- @llmII
|
- @llmII
|
||||||
- @blob42 `<https://blob42.xyz>`_
|
- @blob42 `<https://blob42.xyz>`_
|
||||||
|
- Paolo Basso `<https://github.com/paolobasso99>`
|
|
@ -0,0 +1,2 @@
|
||||||
|
.. automodule:: searx.engines.torznab
|
||||||
|
:members:
|
|
@ -1,21 +1,83 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# lint: pylint
|
# lint: pylint
|
||||||
"""Torznab WebAPI
|
""".. _torznab engine:
|
||||||
|
|
||||||
A engine that implements the `torznab WebAPI`_.
|
==============
|
||||||
|
Torznab WebAPI
|
||||||
|
==============
|
||||||
|
|
||||||
.. _torznab WebAPI: https://torznab.github.io/spec-1.3-draft/torznab
|
.. contents:: Contents
|
||||||
|
:depth: 2
|
||||||
|
:local:
|
||||||
|
:backlinks: entry
|
||||||
|
|
||||||
|
Torznab_ is an API specification that provides a standardized way to query
|
||||||
|
torrent site for content. It is used by a number of torrent applications,
|
||||||
|
including Prowlarr_ and Jackett_.
|
||||||
|
|
||||||
|
Using this engine together with Prowlarr_ or Jackett_ allows you to search
|
||||||
|
a huge number of torrent sites which are not directly supported.
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
=============
|
||||||
|
|
||||||
|
The engine has the following settings:
|
||||||
|
|
||||||
|
``base_url``:
|
||||||
|
Torznab endpoint URL.
|
||||||
|
|
||||||
|
``api_key``:
|
||||||
|
The API key to use for authentication.
|
||||||
|
|
||||||
|
``torznab_categories``:
|
||||||
|
The categories to use for searching. This is a list of category IDs. See
|
||||||
|
Prowlarr-categories_ or Jackett-categories_ for more information.
|
||||||
|
|
||||||
|
``show_torrent_files``:
|
||||||
|
Whether to show the torrent file in the search results. Be carful as using
|
||||||
|
this with Prowlarr_ or Jackett_ leaks the API key. This should be used only
|
||||||
|
if you are querying a Torznab endpoint without authentication or if the
|
||||||
|
instance is private. Be aware that private trackers may ban you if you share
|
||||||
|
the torrent file. Defaults to ``false``.
|
||||||
|
|
||||||
|
``show_magnet_links``:
|
||||||
|
Whether to show the magnet link in the search results. Be aware that private
|
||||||
|
trackers may ban you if you share the magnet link. Defaults to ``true``.
|
||||||
|
|
||||||
|
.. _Torznab:
|
||||||
|
https://torznab.github.io/spec-1.3-draft/index.html
|
||||||
|
.. _Prowlarr:
|
||||||
|
https://github.com/Prowlarr/Prowlarr
|
||||||
|
.. _Jackett:
|
||||||
|
https://github.com/Jackett/Jackett
|
||||||
|
.. _Prowlarr-categories:
|
||||||
|
https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories
|
||||||
|
.. _Jackett-categories:
|
||||||
|
https://github.com/Jackett/Jackett/wiki/Jackett-Categories
|
||||||
|
|
||||||
|
|
||||||
|
Implementations
|
||||||
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from typing import List, Dict, Any
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
from lxml import etree
|
from lxml import etree # type: ignore
|
||||||
|
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from searx.exceptions import SearxEngineAPIException
|
||||||
|
|
||||||
# about
|
if TYPE_CHECKING:
|
||||||
about = {
|
import httpx
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger: logging.Logger
|
||||||
|
|
||||||
|
# engine settings
|
||||||
|
about: Dict[str, Any] = {
|
||||||
"website": None,
|
"website": None,
|
||||||
"wikidata_id": None,
|
"wikidata_id": None,
|
||||||
"official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
|
"official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
|
||||||
|
@ -23,27 +85,30 @@ about = {
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'XML',
|
"results": 'XML',
|
||||||
}
|
}
|
||||||
|
categories: List[str] = ['files']
|
||||||
categories = ['files']
|
paging: bool = False
|
||||||
paging = False
|
time_range_support: bool = False
|
||||||
time_range_support = False
|
|
||||||
|
|
||||||
# defined in settings.yml
|
# defined in settings.yml
|
||||||
# example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
|
# example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
|
||||||
base_url = ''
|
base_url: str = ''
|
||||||
api_key = ''
|
api_key: str = ''
|
||||||
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
|
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
|
||||||
torznab_categories = []
|
torznab_categories: List[str] = []
|
||||||
|
show_torrent_files: bool = False
|
||||||
|
show_magnet_links: bool = True
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings=None): # pylint: disable=unused-argument
|
def init(engine_settings=None): # pylint: disable=unused-argument
|
||||||
|
"""Initialize the engine."""
|
||||||
if len(base_url) < 1:
|
if len(base_url) < 1:
|
||||||
raise ValueError('missing torznab base_url')
|
raise ValueError('missing torznab base_url')
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Build the request params."""
|
||||||
|
search_url: str = base_url + '?t=search&q={search_query}'
|
||||||
|
|
||||||
search_url = base_url + '?t=search&q={search_query}'
|
|
||||||
if len(api_key) > 0:
|
if len(api_key) > 0:
|
||||||
search_url += '&apikey={api_key}'
|
search_url += '&apikey={api_key}'
|
||||||
if len(torznab_categories) > 0:
|
if len(torznab_categories) > 0:
|
||||||
|
@ -56,88 +121,135 @@ def request(query, params):
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp: httpx.Response) -> List[Dict[str, Any]]:
|
||||||
|
"""Parse the XML response and return a list of results."""
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
search_results = etree.XML(resp.content)
|
search_results = etree.XML(resp.content)
|
||||||
|
|
||||||
# handle errors
|
# handle errors: https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
|
||||||
# https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
|
|
||||||
if search_results.tag == "error":
|
if search_results.tag == "error":
|
||||||
raise SearxEngineAPIException(search_results.get("description"))
|
raise SearxEngineAPIException(search_results.get("description"))
|
||||||
|
|
||||||
for item in search_results[0].iterfind('item'):
|
channel: etree.Element = search_results[0]
|
||||||
result = {'template': 'torrent.html'}
|
|
||||||
|
|
||||||
enclosure = item.find('enclosure')
|
|
||||||
|
|
||||||
result["filesize"] = int(enclosure.get('length'))
|
|
||||||
|
|
||||||
link = get_property(item, 'link')
|
|
||||||
guid = get_property(item, 'guid')
|
|
||||||
comments = get_property(item, 'comments')
|
|
||||||
|
|
||||||
# define url
|
|
||||||
result["url"] = enclosure.get('url')
|
|
||||||
if comments is not None and comments.startswith('http'):
|
|
||||||
result["url"] = comments
|
|
||||||
elif guid is not None and guid.startswith('http'):
|
|
||||||
result["url"] = guid
|
|
||||||
|
|
||||||
# define torrent file url
|
|
||||||
result["torrentfile"] = None
|
|
||||||
if enclosure.get('url').startswith("http"):
|
|
||||||
result["torrentfile"] = enclosure.get('url')
|
|
||||||
elif link is not None and link.startswith('http'):
|
|
||||||
result["torrentfile"] = link
|
|
||||||
|
|
||||||
# define magnet link
|
|
||||||
result["magnetlink"] = get_torznab_attr(item, 'magneturl')
|
|
||||||
if result["magnetlink"] is None:
|
|
||||||
if enclosure.get('url').startswith("magnet"):
|
|
||||||
result["magnetlink"] = enclosure.get('url')
|
|
||||||
elif link is not None and link.startswith('magnet'):
|
|
||||||
result["magnetlink"] = link
|
|
||||||
|
|
||||||
result["title"] = get_property(item, 'title')
|
|
||||||
result["files"] = get_property(item, 'files')
|
|
||||||
|
|
||||||
result["publishedDate"] = None
|
|
||||||
try:
|
|
||||||
result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z')
|
|
||||||
except (ValueError, TypeError) as e:
|
|
||||||
logger.debug("ignore exception (publishedDate): %s", e)
|
|
||||||
|
|
||||||
result["seed"] = get_torznab_attr(item, 'seeders')
|
|
||||||
|
|
||||||
# define leech
|
|
||||||
result["leech"] = get_torznab_attr(item, 'leechers')
|
|
||||||
if result["leech"] is None and result["seed"] is not None:
|
|
||||||
peers = get_torznab_attr(item, 'peers')
|
|
||||||
if peers is not None:
|
|
||||||
result["leech"] = int(peers) - int(result["seed"])
|
|
||||||
|
|
||||||
|
item: etree.Element
|
||||||
|
for item in channel.iterfind('item'):
|
||||||
|
result: Dict[str, Any] = build_result(item)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def get_property(item, property_name):
|
def build_result(item: etree.Element) -> Dict[str, Any]:
|
||||||
property_element = item.find(property_name)
|
"""Build a result from a XML item."""
|
||||||
|
|
||||||
|
# extract attributes from XML
|
||||||
|
# see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes
|
||||||
|
enclosure: etree.Element | None = item.find('enclosure')
|
||||||
|
enclosure_url: str | None = None
|
||||||
|
if enclosure is not None:
|
||||||
|
enclosure_url = enclosure.get('url')
|
||||||
|
|
||||||
|
size = get_attribute(item, 'size')
|
||||||
|
if not size and enclosure:
|
||||||
|
size = enclosure.get('length')
|
||||||
|
if size:
|
||||||
|
size = int(size)
|
||||||
|
|
||||||
|
guid = get_attribute(item, 'guid')
|
||||||
|
comments = get_attribute(item, 'comments')
|
||||||
|
pubDate = get_attribute(item, 'pubDate')
|
||||||
|
seeders = get_torznab_attribute(item, 'seeders')
|
||||||
|
leechers = get_torznab_attribute(item, 'leechers')
|
||||||
|
peers = get_torznab_attribute(item, 'peers')
|
||||||
|
|
||||||
|
# map attributes to searx result
|
||||||
|
result: Dict[str, Any] = {
|
||||||
|
'template': 'torrent.html',
|
||||||
|
'title': get_attribute(item, 'title'),
|
||||||
|
'filesize': size,
|
||||||
|
'files': get_attribute(item, 'files'),
|
||||||
|
'seed': seeders,
|
||||||
|
'leech': _map_leechers(leechers, seeders, peers),
|
||||||
|
'url': _map_result_url(guid, comments),
|
||||||
|
'publishedDate': _map_published_date(pubDate),
|
||||||
|
'torrentfile': None,
|
||||||
|
'magnetlink': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
link = get_attribute(item, 'link')
|
||||||
|
if show_torrent_files:
|
||||||
|
result['torrentfile'] = _map_torrent_file(link, enclosure_url)
|
||||||
|
if show_magnet_links:
|
||||||
|
magneturl = get_torznab_attribute(item, 'magneturl')
|
||||||
|
result['magnetlink'] = _map_magnet_link(magneturl, guid, enclosure_url, link)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _map_result_url(guid: str | None, comments: str | None) -> str | None:
|
||||||
|
if guid and guid.startswith('http'):
|
||||||
|
return guid
|
||||||
|
if comments and comments.startswith('http'):
|
||||||
|
return comments
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _map_leechers(leechers: str | None, seeders: str | None, peers: str | None) -> str | None:
|
||||||
|
if leechers:
|
||||||
|
return leechers
|
||||||
|
if seeders and peers:
|
||||||
|
return str(int(peers) - int(seeders))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _map_published_date(pubDate: str | None) -> datetime | None:
|
||||||
|
if pubDate is not None:
|
||||||
|
try:
|
||||||
|
return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %z')
|
||||||
|
except (ValueError, TypeError) as e:
|
||||||
|
logger.debug("ignore exception (publishedDate): %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _map_torrent_file(link: str | None, enclosure_url: str | None) -> str | None:
|
||||||
|
if link and link.startswith('http'):
|
||||||
|
return link
|
||||||
|
if enclosure_url and enclosure_url.startswith('http'):
|
||||||
|
return enclosure_url
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _map_magnet_link(
|
||||||
|
magneturl: str | None,
|
||||||
|
guid: str | None,
|
||||||
|
enclosure_url: str | None,
|
||||||
|
link: str | None,
|
||||||
|
) -> str | None:
|
||||||
|
if magneturl and magneturl.startswith('magnet'):
|
||||||
|
return magneturl
|
||||||
|
if guid and guid.startswith('magnet'):
|
||||||
|
return guid
|
||||||
|
if enclosure_url and enclosure_url.startswith('magnet'):
|
||||||
|
return enclosure_url
|
||||||
|
if link and link.startswith('magnet'):
|
||||||
|
return link
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_attribute(item: etree.Element, property_name: str) -> str | None:
|
||||||
|
"""Get attribute from item."""
|
||||||
|
property_element: etree.Element | None = item.find(property_name)
|
||||||
if property_element is not None:
|
if property_element is not None:
|
||||||
return property_element.text
|
return property_element.text
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_torznab_attr(item, attr_name):
|
def get_torznab_attribute(item: etree.Element, attribute_name: str) -> str | None:
|
||||||
element = item.find(
|
"""Get torznab special attribute from item."""
|
||||||
'.//torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name),
|
element: etree.Element | None = item.find(
|
||||||
|
'.//torznab:attr[@name="{attribute_name}"]'.format(attribute_name=attribute_name),
|
||||||
{'torznab': 'http://torznab.com/schemas/2015/feed'},
|
{'torznab': 'http://torznab.com/schemas/2015/feed'},
|
||||||
)
|
)
|
||||||
|
|
||||||
if element is not None:
|
if element is not None:
|
||||||
return element.get("value")
|
return element.get("value")
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -1392,15 +1392,18 @@ engines:
|
||||||
shortcut: tch
|
shortcut: tch
|
||||||
|
|
||||||
# torznab engine lets you query any torznab compatible indexer. Using this
|
# torznab engine lets you query any torznab compatible indexer. Using this
|
||||||
# engine in combination with Jackett (https://github.com/Jackett/Jackett)
|
# engine in combination with Jackett opens the possibility to query a lot of
|
||||||
# opens the possibility to query a lot of public and private indexers directly
|
# public and private indexers directly from SearXNG. More details at:
|
||||||
# from SearXNG.
|
# https://docs.searxng.org/src/searx.engines.torznab.html
|
||||||
# - name: torznab
|
#
|
||||||
|
# - name: Torznab EZTV
|
||||||
# engine: torznab
|
# engine: torznab
|
||||||
# shortcut: trz
|
# shortcut: eztv
|
||||||
# base_url: http://localhost:9117/api/v2.0/indexers/all/results/torznab
|
# base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab
|
||||||
# enable_http: true # if using localhost
|
# enable_http: true # if using localhost
|
||||||
# api_key: xxxxxxxxxxxxxxx
|
# api_key: xxxxxxxxxxxxxxx
|
||||||
|
# show_magnet_links: true
|
||||||
|
# show_torrent_files: false
|
||||||
# # https://github.com/Jackett/Jackett/wiki/Jackett-Categories
|
# # https://github.com/Jackett/Jackett/wiki/Jackett-Categories
|
||||||
# torznab_categories: # optional
|
# torznab_categories: # optional
|
||||||
# - 2000
|
# - 2000
|
||||||
|
|
Loading…
Reference in New Issue