From eb182df1324f84d6864f324551fe6be7c535e451 Mon Sep 17 00:00:00 2001 From: volth Date: Thu, 25 Jul 2019 06:40:48 +0000 Subject: [PATCH] [mod] restore btdigg engine as btdig.com (#1515) --- searx/engines/btdigg.py | 39 +- searx/settings.yml | 4 + .../courgette/result_templates/torrent.html | 2 +- .../legacy/result_templates/torrent.html | 2 +- .../oscar/result_templates/torrent.html | 2 +- tests/unit/engines/test_btdigg.py | 414 +++--------------- 6 files changed, 94 insertions(+), 369 deletions(-) diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 40438673f..82eedc24b 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -1,7 +1,7 @@ """ BTDigg (Videos, Music, Files) - @website https://btdigg.org + @website https://btdig.com @provide-api yes (on demand) @using-api no @@ -21,7 +21,7 @@ categories = ['videos', 'music', 'files'] paging = True # search-url -url = 'https://btdigg.org' +url = 'https://btdig.com' search_url = url + '/search?q={search_term}&p={pageno}' @@ -39,7 +39,7 @@ def response(resp): dom = html.fromstring(resp.text) - search_res = dom.xpath('//div[@id="search_res"]/table/tr') + search_res = dom.xpath('//div[@class="one_result"]') # return empty array if nothing is found if not search_res: @@ -47,46 +47,39 @@ def response(resp): # parse results for result in search_res: - link = result.xpath('.//td[@class="torrent_name"]//a')[0] + link = result.xpath('.//div[@class="torrent_name"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) - content = extract_text(result.xpath('.//pre[@class="snippet"]')[0]) - content = "
".join(content.split("\n")) - filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0] - filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1] - files = result.xpath('.//span[@class="attr_val"]/text()')[1] - seed = result.xpath('.//span[@class="attr_val"]/text()')[2] + excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0] + content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False) + # it is better to emit
instead of |, but html tags are verboten + content = content.strip().replace('\n', ' | ') + content = ' '.join(content.split()) - # convert seed to int if possible - if seed.isdigit(): - seed = int(seed) - else: - seed = 0 - - leech = 0 + filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0] + filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1] + files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0] # convert filesize to byte if possible filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible - if files.isdigit(): + try: files = int(files) - else: + except: files = None - magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href'] + magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href'] # append result results.append({'url': href, 'title': title, 'content': content, - 'seed': seed, - 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'template': 'torrent.html'}) # return results sorted by seeder - return sorted(results, key=itemgetter('seed'), reverse=True) + return results diff --git a/searx/settings.yml b/searx/settings.yml index 53dfaae2c..c2a77630a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -115,6 +115,10 @@ engines: disabled : True shortcut : bb + - name : btdigg + engine : btdigg + shortcut : bt + - name : ccc-tv engine : xpath paging : False diff --git a/searx/templates/courgette/result_templates/torrent.html b/searx/templates/courgette/result_templates/torrent.html index 2fd8395ad..d659064d9 100644 --- a/searx/templates/courgette/result_templates/torrent.html +++ b/searx/templates/courgette/result_templates/torrent.html @@ -4,7 +4,7 @@ {% endif %}

{{ result.title|safe }}

{% if result.content %}{{ result.content|safe }}
{% endif %} - {{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}
+ {% if result.seed %}{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}
{% endif %} {% if result.magnetlink %}{{ _('magnet link') }}{% endif %} {% if result.torrentfile %}{{ _('torrent file') }}{% endif %} diff --git a/searx/templates/legacy/result_templates/torrent.html b/searx/templates/legacy/result_templates/torrent.html index 67e058ae5..7a8ac33de 100644 --- a/searx/templates/legacy/result_templates/torrent.html +++ b/searx/templates/legacy/result_templates/torrent.html @@ -8,6 +8,6 @@

{% if result.magnetlink %}{{ _('magnet link') }}{% endif %} {% if result.torrentfile %}{{ _('torrent file') }}{% endif %} - - {{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }} + {% if result.seed %}{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}{% endif %}

diff --git a/searx/templates/oscar/result_templates/torrent.html b/searx/templates/oscar/result_templates/torrent.html index bc2b30fbe..f5ea415e2 100644 --- a/searx/templates/oscar/result_templates/torrent.html +++ b/searx/templates/oscar/result_templates/torrent.html @@ -3,7 +3,7 @@ {{ result_header(result, favicons) }} {{ result_sub_header(result) }} -

{{ icon('transfer') }} {{ _('Seeder') }} {{ result.seed }} • {{ _('Leecher') }} {{ result.leech }} +{% if result.seed %}

{{ icon('transfer') }} {{ _('Seeder') }} {{ result.seed }} • {{ _('Leecher') }} {{ result.leech }}{% endif %} {% if result.filesize %}
{{ icon('floppy-disk') }} {{ _('Filesize') }} {% if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }} diff --git a/tests/unit/engines/test_btdigg.py b/tests/unit/engines/test_btdigg.py index 6a88e3f75..45ddaa6e3 100644 --- a/tests/unit/engines/test_btdigg.py +++ b/tests/unit/engines/test_btdigg.py @@ -14,7 +14,7 @@ class TestBtdiggEngine(SearxTestCase): params = btdigg.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) - self.assertIn('btdigg.org', params['url']) + self.assertIn('btdig.com', params['url']) def test_response(self): self.assertRaises(AttributeError, btdigg.response, None) @@ -26,359 +26,87 @@ class TestBtdiggEngine(SearxTestCase): self.assertEqual(btdigg.response(response), []) html = u""" -

- - - - - -
1 - - - - -
- Should be the title -
- - - - - - - - - - - -
- [magnet] - - [cloud] - - Taille: - 8 B - - Fichiers: - 710 - - Téléchargements: - 5 - - Temps: - 417.8 jours - - Dernière mise à jour: - 5.3 jours - - Faux: - Aucun -
-
-                            Content
-                        
-
+
+
+ +
+
+
+ 4217 files 1 GBfound 3 years ago +
+
+
+
+
+ +
+ found 3 years ago +
+
+
+
+
3.9GBdeLibrosByHuasoFromHell(3de4)

+
Libros H-Z

+
H

H.H. Hollis - El truco de la espada-pdf.zip
17 KB
+
Hagakure - El Libro del Samurai-pdf.zip
95 KB
+
Hamsun, Knut (1859-1952)

+
Hamsun, Knut - Hambre-pdf.zip
786 KB
+ +
+
""" response = mock.Mock(text=html.encode('utf-8')) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'Should be the title') - self.assertEqual(results[0]['url'], 'https://btdigg.org/url') - self.assertEqual(results[0]['content'], 'Content') - self.assertEqual(results[0]['seed'], 5) - self.assertEqual(results[0]['leech'], 0) - self.assertEqual(results[0]['filesize'], 8) - self.assertEqual(results[0]['files'], 710) - self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:magnet&dn=Test') + self.assertEqual(results[0]['title'], '3.9GBdeLibrosByHuasoFromHell(3de4)') + self.assertEqual(results[0]['url'], + 'http://btdig.com/a72f35b7ee3a10928f02bb799e40ae5db701ed1c/pdf?q=pdf&p=1&order=0') + self.assertEqual(results[0]['content'], + '3.9GBdeLibrosByHuasoFromHell(3de4) | ' + + 'Libros H-Z | ' + + 'H H.H. Hollis - El truco de la espada-pdf.zip17 KB | ' + + 'Hagakure - El Libro del Samurai-pdf.zip95 KB | ' + + 'Hamsun, Knut (1859-1952) | Hamsun, Knut - Hambre-pdf.zip786 KB | ' + + '4214 hidden files1 GB') + self.assertEqual(results[0]['filesize'], 1 * 1024 * 1024 * 1024) + self.assertEqual(results[0]['files'], 4217) + self.assertEqual(results[0]['magnetlink'], + 'magnet:?xt=urn:btih:a72f35b7ee3a10928f02bb799e40ae5db701ed1c&dn=3.9GBdeLibrosBy...') html = """ -
- -
+
+
""" response = mock.Mock(text=html.encode('utf-8')) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) - - html = u""" -
- - - - - - - - - - - - - - - - - - - - - -
1 - - - - -
- Should be the title -
- - - - - - - - - - - -
- [magnet] - - [cloud] - - Taille: - 1 KB - - Fichiers: - 710 - - Téléchargements: - 5 - - Temps: - 417.8 jours - - Dernière mise à jour: - 5.3 jours - - Faux: - Aucun -
-
-                            Content
-                        
-
1 - - - - -
- Should be the title -
- - - - - - - - - - - -
- [magnet] - - [cloud] - - Taille: - 1 MB - - Fichiers: - a - - Téléchargements: - 4 - - Temps: - 417.8 jours - - Dernière mise à jour: - 5.3 jours - - Faux: - Aucun -
-
-                            Content
-                        
-
1 - - - - -
- Should be the title -
- - - - - - - - - - - -
- [magnet] - - [cloud] - - Taille: - 1 GB - - Fichiers: - 710 - - Téléchargements: - 3 - - Temps: - 417.8 jours - - Dernière mise à jour: - 5.3 jours - - Faux: - Aucun -
-
-                            Content
-                        
-
1 - - - - -
- Should be the title -
- - - - - - - - - - - -
- [magnet] - - [cloud] - - Taille: - 1 TB - - Fichiers: - 710 - - Téléchargements: - 2 - - Temps: - 417.8 jours - - Dernière mise à jour: - 5.3 jours - - Faux: - Aucun -
-
-                            Content
-                        
-
1 - - - - -
- Should be the title -
- - - - - - - - - - - -
- [magnet] - - [cloud] - - Taille: - a TB - - Fichiers: - 710 - - Téléchargements: - z - - Temps: - 417.8 jours - - Dernière mise à jour: - 5.3 jours - - Faux: - Aucun -
-
-                            Content
-                        
-
-
- """ - response = mock.Mock(text=html.encode('utf-8')) - results = btdigg.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 5) - self.assertEqual(results[0]['title'], 'Should be the title') - self.assertEqual(results[0]['url'], 'https://btdigg.org/url') - self.assertEqual(results[0]['content'], 'Content') - self.assertEqual(results[0]['seed'], 5) - self.assertEqual(results[0]['leech'], 0) - self.assertEqual(results[0]['files'], 710) - self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:magnet&dn=Test') - self.assertEqual(results[0]['filesize'], 1024) - self.assertEqual(results[1]['filesize'], 1048576) - self.assertEqual(results[2]['filesize'], 1073741824) - self.assertEqual(results[3]['filesize'], 1099511627776)