[feat] new engine: bt4g added & enabled and disable by default btdigg

Disable btdigg because on most SearXNG instances, SearXNG is blocked by btdigg due to cloudflare too many requests. This impementation did not parse the HTML page because there is an API in XML (RSS). The RSS feed provides fewer data like amount of seeders/leechers and the files in the torrent file. It's a tradeoff for a "stable" engine as the XML from RSS content will change way less than the HTML page. Closes: https://github.com/searxng/searxng/issues/2553
2023-08-04 18:15:13 +02:00 · 2023-08-04 18:15:13 +02:00 · 0fc8f99ecc
commit 0fc8f99ecc
parent 5fcc751856
2 changed files with 85 additions and 0 deletions
--- a/searx/engines/bt4g.py
+++ b/searx/engines/bt4g.py
@ -0,0 +1,80 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 BT4G (Videos, Music, Files)
 """
 import re
 from datetime import datetime
 from urllib.parse import quote
 from lxml import etree
 from searx.utils import get_torrent_size
 # about
 about = {
    "website": 'https://bt4gprx.com',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'XML',
 }
 # engine dependent config
 categories = ['files']
 paging = True
 time_range_support = True
 # search-url
 url = 'https://bt4gprx.com'
 search_url = url + '/search?q={search_term}&orderby={order_by}&category={category}&p={pageno}&page=rss'
 bt4g_order_by = 'relevance'  # relevance, size, seeders, time
 bt4g_category = 'all'  # all, audio, movie, doc, app, other
 def request(query, params):
    order_by = bt4g_order_by
    if params['time_range']:
        order_by = 'time'
    params['url'] = search_url.format(
        search_term=quote(query),
        order_by=order_by,
        category=bt4g_category,
        pageno=params['pageno'],
    )
    return params
 def response(resp):
    results = []
    search_results = etree.XML(resp.content)
    # return empty array if nothing is found
    if len(search_results) == 0:
        return []
    for entry in search_results.xpath('./channel/item'):
        title = entry.find("title").text
        link = entry.find("guid").text
        fullDescription = entry.find("description").text.split('<br>')
        filesize = fullDescription[1]
        filesizeParsed = re.split(r"([A-Z]+)", filesize)
        magnetlink = entry.find("link").text
        pubDate = entry.find("pubDate").text
        results.append(
            {
                'url': link,
                'title': title,
                'magnetlink': magnetlink,
                'seed': 'N/A',
                'leech': 'N/A',
                'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]),
                'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
                'template': 'torrent.html',
            }
        )
    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -415,6 +415,7 @@ engines:
  - name: btdigg
    engine: btdigg
    shortcut: bt
    disabled: true
  - name: ccc-tv
    engine: xpath
@ -1926,6 +1927,10 @@ engines:
      results: HTML
      language: ja
  - name: bt4g
    engine: bt4g
    shortcut: bt4g
 # Doku engine lets you access to any Doku wiki instance:
 # A public one or a privete/corporate one.
 #  - name: ubuntuwiki