From d92b3d96fdfad4dd009cefa3762d70fa76a987c7 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 4 Feb 2022 01:11:44 +0100 Subject: [PATCH] [fix] solidtorrents engine: JSON API no longer exists The API endpoint, we where using does not exist anymore. This patch is a rewrite that parses the HTML page. Related: https://github.com/paulgoio/searxng/issues/17 Closes: https://github.com/searxng/searxng/issues/858 Signed-off-by: Markus Heiser --- searx/engines/solidtorrents.py | 82 ++++++++++++++++++++++++---------- searx/settings.yml | 7 ++- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 614b38277..6a98a1c29 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -1,51 +1,85 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Solid Torrents - +"""SolidTorrents """ -from json import loads +from datetime import datetime from urllib.parse import urlencode +import random + +from lxml import html + +from searx.utils import extract_text, eval_xpath, eval_xpath_getindex about = { "website": 'https://www.solidtorrents.net/', "wikidata_id": None, "official_api_documentation": None, - "use_official_api": True, + "use_official_api": False, "require_api_key": False, - "results": 'JSON', + "results": 'HTML', } categories = ['files'] paging = True -base_url = 'https://www.solidtorrents.net/' -search_url = base_url + 'api/v1/search?{query}' +base_url = '' +base_url_rand = '' + +units = {"B": 1, "KB": 2 ** 10, "MB": 2 ** 20, "GB": 2 ** 30, "TB": 2 ** 40} + + +def size2int(size_str): + n, u = size_str.split() + return int(float(n.strip()) * units[u.strip()]) def request(query, params): - skip = (params['pageno'] - 1) * 20 - query = urlencode({'q': query, 'skip': skip}) + global base_url_rand # pylint: disable=global-statement + if isinstance(base_url, list): + base_url_rand = random.choice(base_url) + else: + base_url_rand = base_url + search_url = base_url_rand + '/search?{query}' + page = (params['pageno'] - 1) * 20 + query = urlencode({'q': query, 'page': page}) params['url'] = search_url.format(query=query) - logger.debug("query_url --> %s", params['url']) return params def response(resp): results = [] - search_results = loads(resp.text) + dom = html.fromstring(resp.text) + + for result in eval_xpath(dom, '//div[contains(@class, "search-result")]'): + a = eval_xpath_getindex(result, './div/h5/a', 0, None) + if a is None: + continue + title = extract_text(a) + url = eval_xpath_getindex(a, '@href', 0, None) + stats = eval_xpath(result, './div//div[contains(@class, "stats")]/div') + filesize = size2int(extract_text(stats[1])) + leech = extract_text(stats[2]) + seed = extract_text(stats[3]) + magnet = eval_xpath_getindex(result, './div//a[contains(@class, "dl-magnet")]/@href', 0, None) + + params = { + 'seed': seed, + 'leech': leech, + 'title': title, + 'url': base_url_rand + url, + 'filesize': filesize, + 'magnetlink': magnet, + 'template': "torrent.html", + } + + date_str = extract_text(stats[4]) + + try: + params['publishedDate'] = datetime.strptime(date_str, '%b %d, %Y') + except ValueError: + pass + + results.append(params) - for result in search_results["results"]: - results.append( - { - 'infohash': result["infohash"], - 'seed': result["swarm"]["seeders"], - 'leech': result["swarm"]["leechers"], - 'title': result["title"], - 'url': "https://solidtorrents.net/view/" + result["_id"], - 'filesize': result["size"], - 'magnetlink': result["magnet"], - 'template': "torrent.html", - } - ) return results diff --git a/searx/settings.yml b/searx/settings.yml index 9d91e5329..3d4c0e18a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1257,8 +1257,13 @@ engines: - name: solidtorrents engine: solidtorrents shortcut: solid - timeout: 3.0 + timeout: 4.0 disabled: false + base_url: + - https://solidtorrents.net + - https://solidtorrents.eu + - https://solidtorrents.to + - https://bitsearch.to # For this demo of the sqlite engine download: # https://liste.mediathekview.de/filmliste-v2.db.bz2