From d92b3d96fdfad4dd009cefa3762d70fa76a987c7 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 4 Feb 2022 01:11:44 +0100 Subject: [PATCH 1/4] [fix] solidtorrents engine: JSON API no longer exists The API endpoint, we where using does not exist anymore. This patch is a rewrite that parses the HTML page. Related: https://github.com/paulgoio/searxng/issues/17 Closes: https://github.com/searxng/searxng/issues/858 Signed-off-by: Markus Heiser --- searx/engines/solidtorrents.py | 82 ++++++++++++++++++++++++---------- searx/settings.yml | 7 ++- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 614b38277..6a98a1c29 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -1,51 +1,85 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Solid Torrents - +"""SolidTorrents """ -from json import loads +from datetime import datetime from urllib.parse import urlencode +import random + +from lxml import html + +from searx.utils import extract_text, eval_xpath, eval_xpath_getindex about = { "website": 'https://www.solidtorrents.net/', "wikidata_id": None, "official_api_documentation": None, - "use_official_api": True, + "use_official_api": False, "require_api_key": False, - "results": 'JSON', + "results": 'HTML', } categories = ['files'] paging = True -base_url = 'https://www.solidtorrents.net/' -search_url = base_url + 'api/v1/search?{query}' +base_url = '' +base_url_rand = '' + +units = {"B": 1, "KB": 2 ** 10, "MB": 2 ** 20, "GB": 2 ** 30, "TB": 2 ** 40} + + +def size2int(size_str): + n, u = size_str.split() + return int(float(n.strip()) * units[u.strip()]) def request(query, params): - skip = (params['pageno'] - 1) * 20 - query = urlencode({'q': query, 'skip': skip}) + global base_url_rand # pylint: disable=global-statement + if isinstance(base_url, list): + base_url_rand = random.choice(base_url) + else: + base_url_rand = base_url + search_url = base_url_rand + '/search?{query}' + page = (params['pageno'] - 1) * 20 + query = urlencode({'q': query, 'page': page}) params['url'] = search_url.format(query=query) - logger.debug("query_url --> %s", params['url']) return params def response(resp): results = [] - search_results = loads(resp.text) + dom = html.fromstring(resp.text) + + for result in eval_xpath(dom, '//div[contains(@class, "search-result")]'): + a = eval_xpath_getindex(result, './div/h5/a', 0, None) + if a is None: + continue + title = extract_text(a) + url = eval_xpath_getindex(a, '@href', 0, None) + stats = eval_xpath(result, './div//div[contains(@class, "stats")]/div') + filesize = size2int(extract_text(stats[1])) + leech = extract_text(stats[2]) + seed = extract_text(stats[3]) + magnet = eval_xpath_getindex(result, './div//a[contains(@class, "dl-magnet")]/@href', 0, None) + + params = { + 'seed': seed, + 'leech': leech, + 'title': title, + 'url': base_url_rand + url, + 'filesize': filesize, + 'magnetlink': magnet, + 'template': "torrent.html", + } + + date_str = extract_text(stats[4]) + + try: + params['publishedDate'] = datetime.strptime(date_str, '%b %d, %Y') + except ValueError: + pass + + results.append(params) - for result in search_results["results"]: - results.append( - { - 'infohash': result["infohash"], - 'seed': result["swarm"]["seeders"], - 'leech': result["swarm"]["leechers"], - 'title': result["title"], - 'url': "https://solidtorrents.net/view/" + result["_id"], - 'filesize': result["size"], - 'magnetlink': result["magnet"], - 'template': "torrent.html", - } - ) return results diff --git a/searx/settings.yml b/searx/settings.yml index 9d91e5329..3d4c0e18a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1257,8 +1257,13 @@ engines: - name: solidtorrents engine: solidtorrents shortcut: solid - timeout: 3.0 + timeout: 4.0 disabled: false + base_url: + - https://solidtorrents.net + - https://solidtorrents.eu + - https://solidtorrents.to + - https://bitsearch.to # For this demo of the sqlite engine download: # https://liste.mediathekview.de/filmliste-v2.db.bz2 From f9c486814203f25b35903880dd038b35ed0580a2 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 4 Feb 2022 11:00:56 +0100 Subject: [PATCH 2/4] [fix] solidtorrents engine: use get_torrent_size from searx.utils Suggested-by: @dalf https://github.com/searxng/searxng/pull/862#pullrequestreview-872858489 Signed-off-by: Markus Heiser --- searx/engines/solidtorrents.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 6a98a1c29..7f005c4a3 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -9,7 +9,12 @@ import random from lxml import html -from searx.utils import extract_text, eval_xpath, eval_xpath_getindex +from searx.utils import ( + extract_text, + eval_xpath, + eval_xpath_getindex, + get_torrent_size, +) about = { "website": 'https://www.solidtorrents.net/', @@ -26,13 +31,6 @@ paging = True base_url = '' base_url_rand = '' -units = {"B": 1, "KB": 2 ** 10, "MB": 2 ** 20, "GB": 2 ** 30, "TB": 2 ** 40} - - -def size2int(size_str): - n, u = size_str.split() - return int(float(n.strip()) * units[u.strip()]) - def request(query, params): global base_url_rand # pylint: disable=global-statement @@ -58,7 +56,8 @@ def response(resp): title = extract_text(a) url = eval_xpath_getindex(a, '@href', 0, None) stats = eval_xpath(result, './div//div[contains(@class, "stats")]/div') - filesize = size2int(extract_text(stats[1])) + n, u = extract_text(stats[1]).split() + filesize = get_torrent_size(n, u) leech = extract_text(stats[2]) seed = extract_text(stats[3]) magnet = eval_xpath_getindex(result, './div//a[contains(@class, "dl-magnet")]/@href', 0, None) From d6061b7c8a9607d13bd1569fd409a4f0167afd42 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 4 Feb 2022 14:30:00 +0100 Subject: [PATCH 3/4] [mod] solidtorrents engine: add metadata & torrentfile BTW: define min_len in eval_xpath_list of 'stats' list Suggested-by: @dalf https://github.com/searxng/searxng/pull/862#pullrequestreview-872910744 Signed-off-by: Markus Heiser --- searx/engines/solidtorrents.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 7f005c4a3..1e58996e5 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -13,6 +13,7 @@ from searx.utils import ( extract_text, eval_xpath, eval_xpath_getindex, + eval_xpath_list, get_torrent_size, ) @@ -55,11 +56,14 @@ def response(resp): continue title = extract_text(a) url = eval_xpath_getindex(a, '@href', 0, None) - stats = eval_xpath(result, './div//div[contains(@class, "stats")]/div') + categ = eval_xpath(result, './div//a[contains(@class, "category")]') + metadata = extract_text(categ) + stats = eval_xpath_list(result, './div//div[contains(@class, "stats")]/div', min_len=5) n, u = extract_text(stats[1]).split() filesize = get_torrent_size(n, u) leech = extract_text(stats[2]) seed = extract_text(stats[3]) + torrentfile = eval_xpath_getindex(result, './div//a[contains(@class, "dl-torrent")]/@href', 0, None) magnet = eval_xpath_getindex(result, './div//a[contains(@class, "dl-magnet")]/@href', 0, None) params = { @@ -69,6 +73,8 @@ def response(resp): 'url': base_url_rand + url, 'filesize': filesize, 'magnetlink': magnet, + 'torrentfile': torrentfile, + 'metadata': metadata, 'template': "torrent.html", } From ddc2102a075d9690b9e98e16cef2ae3c1e5268f3 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 4 Feb 2022 14:41:53 +0100 Subject: [PATCH 4/4] [fix] solidtorrents engine: store random bas_url in param Two different threads ( = two different user queries) can call the request function in a row and then the response function. The namespace will be same since this is the same engine. To keep exactly the same value ``base_url`` must be stored in params and then retrieve using ``resp.search_params["base_url"]``. Suggested-by: @dalf https://github.com/searxng/searxng/pull/862#discussion_r799324861 Signed-off-by: Markus Heiser --- searx/engines/solidtorrents.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 1e58996e5..9b5d543d8 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -29,17 +29,16 @@ about = { categories = ['files'] paging = True -base_url = '' -base_url_rand = '' +# base_url can be overwritten by a list of URLs in the settings.yml +base_url = 'https://solidtorrents.net' def request(query, params): - global base_url_rand # pylint: disable=global-statement if isinstance(base_url, list): - base_url_rand = random.choice(base_url) + params['base_url'] = random.choice(base_url) else: - base_url_rand = base_url - search_url = base_url_rand + '/search?{query}' + params['base_url'] = base_url + search_url = params['base_url'] + '/search?{query}' page = (params['pageno'] - 1) * 20 query = urlencode({'q': query, 'page': page}) params['url'] = search_url.format(query=query) @@ -70,7 +69,7 @@ def response(resp): 'seed': seed, 'leech': leech, 'title': title, - 'url': base_url_rand + url, + 'url': resp.search_params['base_url'] + url, 'filesize': filesize, 'magnetlink': magnet, 'torrentfile': torrentfile,