From 27d74826f1e00302336c8720e014cd4cda871981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89milien=20Devos?= Date: Tue, 18 Aug 2020 19:02:41 +0300 Subject: [PATCH] [enh] add yggtorrent engine (#2135) --- searx/engines/yggtorrent.py | 124 ++++++++++++++++++++++++++++++++++++ searx/settings.yml | 7 ++ 2 files changed, 131 insertions(+) create mode 100644 searx/engines/yggtorrent.py diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py new file mode 100644 index 000000000..739574e8d --- /dev/null +++ b/searx/engines/yggtorrent.py @@ -0,0 +1,124 @@ +# Yggtorrent (Videos, Music, Files) +# +# @website https://www2.yggtorrent.si +# @provide-api no (nothing found) +# +# @using-api no +# @results HTML (using search portal) +# @stable no (HTML can change) +# @parse url, title, seed, leech, publishedDate, filesize + +from lxml import html +from operator import itemgetter +from datetime import datetime +from searx.engines.xpath import extract_text +from searx.url_utils import quote +from searx.utils import get_torrent_size +from searx.poolrequests import get as http_get + +# engine dependent config +categories = ['videos', 'music', 'files'] +paging = True + +# search-url +url = 'https://www2.yggtorrent.si/' +search_url = url + 'engine/search?name={search_term}&do=search&page={pageno}&category={search_type}' + +# yggtorrent specific type-definitions +search_types = {'files': 'all', + 'music': '2139', + 'videos': '2145'} + +cookies = dict() + + +def init(engine_settings=None): + global cookies + # initial cookies + resp = http_get(url, allow_redirects=False) + if resp.ok: + for r in resp.history: + cookies.update(r.cookies) + cookies.update(resp.cookies) + + +# do search-request +def request(query, params): + search_type = search_types.get(params['category'], 'all') + pageno = (params['pageno'] - 1) * 50 + + params['url'] = search_url.format(search_term=quote(query), + search_type=search_type, + pageno=pageno) + + params['cookies'] = cookies + + return params + + +# get response from search-request +def response(resp): + results = [] + dom = html.fromstring(resp.text) + + search_res = dom.xpath('//section[@id="#torrents"]/div/table/tbody/tr') + + # return empty array if nothing is found + if not search_res: + return [] + + # parse results + for result in search_res: + link = result.xpath('.//a[@id="torrent_name"]')[0] + href = link.attrib.get('href') + title = extract_text(link) + seed = result.xpath('.//td[8]/text()')[0] + leech = result.xpath('.//td[9]/text()')[0] + + # convert seed to int if possible + if seed.isdigit(): + seed = int(seed) + else: + seed = 0 + + # convert leech to int if possible + if leech.isdigit(): + leech = int(leech) + else: + leech = 0 + + params = {'url': href, + 'title': title, + 'seed': seed, + 'leech': leech, + 'template': 'torrent.html'} + + # let's try to calculate the torrent size + try: + filesize_info = result.xpath('.//td[6]/text()')[0] + filesize = filesize_info[:-2] + filesize_multiplier = filesize_info[-2:].lower() + multiplier_french_to_english = { + 'to': 'TiB', + 'go': 'GiB', + 'mo': 'MiB', + 'ko': 'KiB' + } + filesize = get_torrent_size(filesize, multiplier_french_to_english[filesize_multiplier]) + params['filesize'] = filesize + except: + pass + + # extract and convert creation date + try: + date_ts = result.xpath('.//td[5]/div/text()')[0] + date = datetime.fromtimestamp(float(date_ts)) + params['publishedDate'] = date + except: + pass + + # append result + results.append(params) + + # return results sorted by seeder + return sorted(results, key=itemgetter('seed'), reverse=True) diff --git a/searx/settings.yml b/searx/settings.yml index f9f0a0f8f..6888547f2 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -702,6 +702,13 @@ engines: # Or you can use the html non-stable engine, activated by default engine : youtube_noapi + - name : yggtorrent + engine : yggtorrent + shortcut : ygg + url: https://www2.yggtorrent.si/ + disabled : True + timeout : 4.0 + - name : dailymotion engine : dailymotion shortcut : dm