Merge pull request #639 from kvch/digbt-engine

add digbt engine - fixes #638
2016-08-16 10:37:17 +02:00 · 2016-08-16 10:37:17 +02:00 · 13bed1f872
commit 13bed1f872
parent acfe843ecd d320dd0efc
5 changed files with 144 additions and 15 deletions
--- a/searx/engines/btdigg.py
+++ b/searx/engines/btdigg.py
@ -16,6 +16,7 @@ from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
 # engine dependent config
 categories = ['videos', 'music', 'files']
@ -68,20 +69,7 @@ def response(resp):
        leech = 0
        # convert filesize to byte if possible
-        try:
+        filesize = get_torrent_size(filesize, filesize_multiplier)
            filesize = float(filesize)
            # convert filesize to byte
            if filesize_multiplier == 'TB':
                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
            elif filesize_multiplier == 'GB':
                filesize = int(filesize * 1024 * 1024 * 1024)
            elif filesize_multiplier == 'MB':
                filesize = int(filesize * 1024 * 1024)
            elif filesize_multiplier == 'KB':
                filesize = int(filesize * 1024)
        except:
            filesize = None
        # convert files to int if possible
        if files.isdigit():
--- a/searx/engines/digbt.py
+++ b/searx/engines/digbt.py
@ -0,0 +1,58 @@
 """
 DigBT (Videos, Music, Files)
 @website     https://digbt.org
 @provide-api no
 @using-api   no
 @results     HTML (using search portal)
 @stable      no (HTML can change)
 @parse       url, title, content, magnetlink
 """
 from urlparse import urljoin
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
 categories = ['videos', 'music', 'files']
 paging = True
 URL = 'https://digbt.org'
 SEARCH_URL = URL + '/search/{query}-time-{pageno}'
 FILESIZE = 3
 FILESIZE_MULTIPLIER = 4
 def request(query, params):
    params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
    return params
 def response(resp):
    dom = html.fromstring(resp.content)
    search_res = dom.xpath('.//td[@class="x-item"]')
    if not search_res:
        return list()
    results = list()
    for result in search_res:
        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
        title = result.xpath('.//a[@title]/text()')[0]
        content = extract_text(result.xpath('.//div[@class="files"]'))
        files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
        filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
        magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'filesize': filesize,
                        'magnetlink': magnetlink,
                        'seed': 'N/A',
                        'leech': 'N/A',
                        'template': 'torrent.html'})
    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -87,7 +87,7 @@ engines:
  - name : btdigg
    engine : btdigg
    shortcut : bt
-    
+
  - name : crossref
    engine : json_engine
    paging : True
@ -118,6 +118,12 @@ engines:
    weight : 2
    disabled : True
  - name : digbt
    engine : digbt
    shortcut : dbt
    timeout : 6.0
    disabled : True
  - name : digg
    engine : digg
    shortcut : dg
--- a/searx/utils.py
+++ b/searx/utils.py
@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
        return a_list[index]
    else:
        return default
 def get_torrent_size(filesize, filesize_multiplier):
    try:
        filesize = float(filesize)
        if filesize_multiplier == 'TB':
            filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
        elif filesize_multiplier == 'GB':
            filesize = int(filesize * 1024 * 1024 * 1024)
        elif filesize_multiplier == 'MB':
            filesize = int(filesize * 1024 * 1024)
        elif filesize_multiplier == 'KB':
            filesize = int(filesize * 1024)
    except:
        filesize = None
    return filesize
--- a/tests/unit/engines/test_digbt.py
+++ b/tests/unit/engines/test_digbt.py
@ -0,0 +1,59 @@
 from collections import defaultdict
 import mock
 from searx.engines import digbt
 from searx.testing import SearxTestCase
 class TestDigBTEngine(SearxTestCase):
    def test_request(self):
        query = 'test_query'
        dicto = defaultdict(dict)
        dicto['pageno'] = 0
        params = digbt.request(query, dicto)
        self.assertIn('url', params)
        self.assertIn(query, params['url'])
        self.assertIn('digbt.org', params['url'])
    def test_response(self):
        self.assertRaises(AttributeError, digbt.response, None)
        self.assertRaises(AttributeError, digbt.response, [])
        self.assertRaises(AttributeError, digbt.response, '')
        self.assertRaises(AttributeError, digbt.response, '[]')
        response = mock.Mock(content='<html></html>')
        self.assertEqual(digbt.response(response), [])
        html = """
        <table class="table">
            <tr><td class="x-item">
            <div>
                <a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a>
                <span class="ctime"><span style="color:red;">4 hours ago</span></span>
            </div>
            <div class="files">
                <ul>
                    <li>The Big Bang Theory  2.9 GB</li>
                    <li>....</li>
                </ul>
            </div>
            <div class="tail">
                Files: 1 Size: 2.9 GB  Downloads: 1 Updated: <span style="color:red;">4 hours ago</span>
                &nbsp; &nbsp;
                <a class="title" href="magnet:?xt=urn:btih:a&amp;dn=The+Big+Bang+Theory">
                    <span class="glyphicon glyphicon-magnet"></span> magnet-link
                </a>
                &nbsp; &nbsp;
            </div>
            </td></tr>
        </table>
        """
        response = mock.Mock(content=html)
        results = digbt.response(response)
        self.assertEqual(type(results), list)
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0]['title'], 'The Big Bang Theory')
        self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html')
        self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....')
        self.assertEqual(results[0]['filesize'], 3113851289)
        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory')