mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
commit
d800e3fcfa
49 changed files with 754 additions and 1473 deletions
|
|
@ -18,7 +18,6 @@
|
|||
from lxml import html
|
||||
from json import loads
|
||||
import re
|
||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
|
|
@ -26,6 +25,8 @@ categories = ['images']
|
|||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
language_support = True
|
||||
supported_languages_url = 'https://www.bing.com/account/general'
|
||||
|
||||
# search-url
|
||||
base_url = 'https://www.bing.com/'
|
||||
|
|
@ -45,23 +46,41 @@ safesearch_types = {2: 'STRICT',
|
|||
_quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U)
|
||||
|
||||
|
||||
# get supported region code
|
||||
def get_region_code(lang, lang_list=None):
|
||||
region = None
|
||||
if lang in (lang_list or supported_languages):
|
||||
region = lang
|
||||
elif lang.startswith('no'):
|
||||
region = 'nb-NO'
|
||||
else:
|
||||
# try to get a supported country code with language
|
||||
lang = lang.split('-')[0]
|
||||
for lc in (lang_list or supported_languages):
|
||||
if lang == lc.split('-')[0]:
|
||||
region = lc
|
||||
break
|
||||
if region:
|
||||
return region.lower()
|
||||
else:
|
||||
return 'en-us'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 10 + 1
|
||||
|
||||
# required for cookie
|
||||
if params['language'] == 'all':
|
||||
language = 'en-US'
|
||||
else:
|
||||
language = params['language']
|
||||
|
||||
search_path = search_string.format(
|
||||
query=urlencode({'q': query}),
|
||||
offset=offset)
|
||||
|
||||
language = get_region_code(params['language'])
|
||||
|
||||
params['cookies']['SRCHHPGUSR'] = \
|
||||
'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\
|
||||
'&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||
|
||||
params['cookies']['_EDGE_S'] = 'mkt=' + language +\
|
||||
'&ui=' + language + '&F=1'
|
||||
|
||||
params['url'] = base_url + search_path
|
||||
if params['time_range'] in time_range_dict:
|
||||
|
|
@ -106,3 +125,22 @@ def response(resp):
|
|||
|
||||
# return results
|
||||
return results
|
||||
|
||||
|
||||
# get supported languages from their site
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = []
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
regions_xpath = '//div[@id="region-section-content"]' \
|
||||
+ '//ul[@class="b_vList"]/li/a/@href'
|
||||
|
||||
regions = dom.xpath(regions_xpath)
|
||||
for region in regions:
|
||||
code = re.search('setmkt=[^\&]+', region).group()[7:]
|
||||
if code == 'nb-NO':
|
||||
code = 'no-NO'
|
||||
|
||||
supported_languages.append(code)
|
||||
|
||||
return supported_languages
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
from json import loads
|
||||
from lxml import html
|
||||
from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url, get_region_code
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
|
|
@ -21,6 +22,7 @@ paging = True
|
|||
safesearch = True
|
||||
time_range_support = True
|
||||
number_of_results = 10
|
||||
language_support = True
|
||||
|
||||
search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\
|
||||
'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5'
|
||||
|
|
@ -45,7 +47,8 @@ def request(query, params):
|
|||
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||
|
||||
# language cookie
|
||||
params['cookies']['_EDGE_S'] = 'mkt=' + params['language'].lower() + '&F=1'
|
||||
region = get_region_code(params['language'], lang_list=supported_languages)
|
||||
params['cookies']['_EDGE_S'] = 'mkt=' + region + '&F=1'
|
||||
|
||||
# query and paging
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
|
|
|
|||
|
|
@ -1,70 +0,0 @@
|
|||
"""
|
||||
Blekko (Images)
|
||||
|
||||
@website https://blekko.com
|
||||
@provide-api yes (inofficial)
|
||||
|
||||
@using-api yes
|
||||
@results JSON
|
||||
@stable yes
|
||||
@parse url, title, img_src
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
paging = True
|
||||
safesearch = True
|
||||
|
||||
# search-url
|
||||
base_url = 'https://blekko.com'
|
||||
search_url = '/api/images?{query}&c={c}'
|
||||
|
||||
# safesearch definitions
|
||||
safesearch_types = {2: '1',
|
||||
1: '',
|
||||
0: '0'}
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
c = (params['pageno'] - 1) * 48
|
||||
|
||||
params['url'] = base_url +\
|
||||
search_url.format(query=urlencode({'q': query}),
|
||||
c=c)
|
||||
|
||||
if params['pageno'] != 1:
|
||||
params['url'] += '&page={pageno}'.format(pageno=(params['pageno'] - 1))
|
||||
|
||||
# let Blekko know we wan't have profiling
|
||||
params['cookies']['tag_lesslogging'] = '1'
|
||||
|
||||
# parse safesearch argument
|
||||
params['cookies']['safesearch'] = safesearch_types.get(params['safesearch'], '')
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_results = loads(resp.text)
|
||||
|
||||
# return empty array if there are no results
|
||||
if not search_results:
|
||||
return []
|
||||
|
||||
for result in search_results:
|
||||
# append result
|
||||
results.append({'url': result['page_url'],
|
||||
'title': result['title'],
|
||||
'content': '',
|
||||
'img_src': result['url'],
|
||||
'template': 'images.html'})
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
@ -10,6 +10,8 @@
|
|||
@parse url, title, content, publishedDate, thumbnail
|
||||
"""
|
||||
|
||||
import random
|
||||
import string
|
||||
from dateutil import parser
|
||||
from json import loads
|
||||
from lxml import html
|
||||
|
|
@ -30,12 +32,17 @@ title_xpath = './/h2//a//text()'
|
|||
content_xpath = './/p//text()'
|
||||
pubdate_xpath = './/time'
|
||||
|
||||
digg_cookie_chars = string.ascii_uppercase + string.ascii_lowercase +\
|
||||
string.digits + "+_"
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 10
|
||||
params['url'] = search_url.format(position=offset,
|
||||
query=quote_plus(query))
|
||||
params['cookies']['frontend.auid'] = ''.join(random.choice(
|
||||
digg_cookie_chars) for _ in range(22))
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -134,4 +134,4 @@ def _fetch_supported_languages(resp):
|
|||
regions_json = loads(response_page)
|
||||
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
|
||||
|
||||
return supported_languages
|
||||
return list(supported_languages)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
@website http://www.faroo.com
|
||||
@provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key
|
||||
|
||||
@using-api yes
|
||||
@using-api no
|
||||
@results JSON
|
||||
@stable yes
|
||||
@parse url, title, content, publishedDate, img_src
|
||||
|
|
@ -20,18 +20,16 @@ categories = ['general', 'news']
|
|||
paging = True
|
||||
language_support = True
|
||||
number_of_results = 10
|
||||
api_key = None
|
||||
|
||||
# search-url
|
||||
url = 'http://www.faroo.com/'
|
||||
search_url = url + 'api?{query}'\
|
||||
'&start={offset}'\
|
||||
'&length={number_of_results}'\
|
||||
'&l={language}'\
|
||||
'&src={categorie}'\
|
||||
'&i=false'\
|
||||
'&f=json'\
|
||||
'&key={api_key}' # noqa
|
||||
search_url = url + 'instant.json?{query}'\
|
||||
'&start={offset}'\
|
||||
'&length={number_of_results}'\
|
||||
'&l={language}'\
|
||||
'&src={categorie}'\
|
||||
'&i=false'\
|
||||
'&c=false'
|
||||
|
||||
search_category = {'general': 'web',
|
||||
'news': 'news'}
|
||||
|
|
@ -57,21 +55,15 @@ def request(query, params):
|
|||
number_of_results=number_of_results,
|
||||
query=urlencode({'q': query}),
|
||||
language=language,
|
||||
categorie=categorie,
|
||||
api_key=api_key)
|
||||
categorie=categorie)
|
||||
|
||||
# using searx User-Agent
|
||||
params['headers']['User-Agent'] = searx_useragent()
|
||||
params['headers']['Referer'] = url
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
# HTTP-Code 401: api-key is not valide
|
||||
if resp.status_code == 401:
|
||||
raise Exception("API key is not valide")
|
||||
|
||||
# HTTP-Code 429: rate limit exceeded
|
||||
if resp.status_code == 429:
|
||||
raise Exception("rate limit has been exceeded!")
|
||||
|
|
@ -86,31 +78,19 @@ def response(resp):
|
|||
|
||||
# parse results
|
||||
for result in search_res['results']:
|
||||
publishedDate = None
|
||||
result_json = {'url': result['url'], 'title': result['title'],
|
||||
'content': result['kwic']}
|
||||
if result['news']:
|
||||
# timestamp (milliseconds since 1970)
|
||||
publishedDate = datetime.datetime.fromtimestamp(result['date'] / 1000.0) # noqa
|
||||
|
||||
# append news result
|
||||
results.append({'url': result['url'],
|
||||
'title': result['title'],
|
||||
'publishedDate': publishedDate,
|
||||
'content': result['kwic']})
|
||||
|
||||
else:
|
||||
# append general result
|
||||
# TODO, publishedDate correct?
|
||||
results.append({'url': result['url'],
|
||||
'title': result['title'],
|
||||
'content': result['kwic']})
|
||||
result_json['publishedDate'] = \
|
||||
datetime.datetime.fromtimestamp(result['date'] / 1000.0)
|
||||
|
||||
# append image result if image url is set
|
||||
# TODO, show results with an image like in faroo
|
||||
if result['iurl']:
|
||||
results.append({'template': 'images.html',
|
||||
'url': result['url'],
|
||||
'title': result['title'],
|
||||
'content': result['kwic'],
|
||||
'img_src': result['iurl']})
|
||||
result_json['template'] = 'videos.html'
|
||||
result_json['thumbnail'] = result['iurl']
|
||||
|
||||
results.append(result_json)
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -1,62 +0,0 @@
|
|||
"""
|
||||
General Files (Files)
|
||||
|
||||
@website http://www.general-files.org
|
||||
@provide-api no (nothing found)
|
||||
|
||||
@using-api no (because nothing found)
|
||||
@results HTML (using search portal)
|
||||
@stable no (HTML can change)
|
||||
@parse url, title, content
|
||||
|
||||
@todo detect torrents?
|
||||
"""
|
||||
|
||||
from lxml import html
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files']
|
||||
paging = True
|
||||
|
||||
# search-url
|
||||
base_url = 'http://www.general-file.com'
|
||||
search_url = base_url + '/files-{letter}/{query}/{pageno}'
|
||||
|
||||
# specific xpath variables
|
||||
result_xpath = '//table[@class="block-file"]'
|
||||
title_xpath = './/h2/a//text()'
|
||||
url_xpath = './/h2/a/@href'
|
||||
content_xpath = './/p//text()'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
|
||||
params['url'] = search_url.format(query=query,
|
||||
letter=query[0],
|
||||
pageno=params['pageno'])
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath(result_xpath):
|
||||
url = result.xpath(url_xpath)[0]
|
||||
|
||||
# skip fast download links
|
||||
if not url.startswith('/'):
|
||||
continue
|
||||
|
||||
# append result
|
||||
results.append({'url': base_url + url,
|
||||
'title': ''.join(result.xpath(title_xpath)),
|
||||
'content': ''.join(result.xpath(content_xpath))})
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
@ -10,6 +10,7 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
import random
|
||||
from json import loads
|
||||
from time import time
|
||||
from lxml.html import fromstring
|
||||
|
|
@ -32,7 +33,8 @@ search_string = 'search?{query}'\
|
|||
'&qh=0'\
|
||||
'&qlang={lang}'\
|
||||
'&ff={safesearch}'\
|
||||
'&rxikd={rxikd}' # random number - 9 digits
|
||||
'&rxieu={rxieu}'\
|
||||
'&rand={rxikd}' # current unix timestamp
|
||||
|
||||
# specific xpath variables
|
||||
results_xpath = '//response//result'
|
||||
|
|
@ -59,10 +61,12 @@ def request(query, params):
|
|||
else:
|
||||
safesearch = 0
|
||||
|
||||
# rxieu is some kind of hash from the search query, but accepts random atm
|
||||
search_path = search_string.format(query=urlencode({'q': query}),
|
||||
offset=offset,
|
||||
number_of_results=number_of_results,
|
||||
rxikd=str(time())[:9],
|
||||
rxikd=int(time() * 1000),
|
||||
rxieu=random.randint(1000000000, 9999999999),
|
||||
lang=language,
|
||||
safesearch=safesearch)
|
||||
|
||||
|
|
|
|||
|
|
@ -67,8 +67,8 @@ def response(resp):
|
|||
for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
|
||||
try:
|
||||
r = {
|
||||
'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0],
|
||||
'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')),
|
||||
'url': result.xpath('.//a[@class="l _PMs"]')[0].attrib.get("href"),
|
||||
'title': ''.join(result.xpath('.//a[@class="l _PMs"]//text()')),
|
||||
'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
|
||||
}
|
||||
except:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
Nyaa.se (Anime Bittorrent tracker)
|
||||
Nyaa.si (Anime Bittorrent tracker)
|
||||
|
||||
@website http://www.nyaa.se/
|
||||
@website http://www.nyaa.si/
|
||||
@provide-api no
|
||||
@using-api no
|
||||
@results HTML
|
||||
|
|
@ -12,50 +12,25 @@
|
|||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import get_torrent_size, int_or_zero
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files', 'images', 'videos', 'music']
|
||||
paging = True
|
||||
|
||||
# search-url
|
||||
base_url = 'http://www.nyaa.se/'
|
||||
base_url = 'http://www.nyaa.si/'
|
||||
search_url = base_url + '?page=search&{query}&offset={offset}'
|
||||
|
||||
# xpath queries
|
||||
xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]'
|
||||
xpath_category = './/td[@class="tlisticon"]/a'
|
||||
xpath_title = './/td[@class="tlistname"]/a'
|
||||
xpath_torrent_file = './/td[@class="tlistdownload"]/a'
|
||||
xpath_filesize = './/td[@class="tlistsize"]/text()'
|
||||
xpath_seeds = './/td[@class="tlistsn"]/text()'
|
||||
xpath_leeches = './/td[@class="tlistln"]/text()'
|
||||
xpath_downloads = './/td[@class="tlistdn"]/text()'
|
||||
|
||||
|
||||
# convert a variable to integer or return 0 if it's not a number
|
||||
def int_or_zero(num):
|
||||
if isinstance(num, list):
|
||||
if len(num) < 1:
|
||||
return 0
|
||||
num = num[0]
|
||||
if num.isdigit():
|
||||
return int(num)
|
||||
return 0
|
||||
|
||||
|
||||
# get multiplier to convert torrent size to bytes
|
||||
def get_filesize_mul(suffix):
|
||||
return {
|
||||
'KB': 1024,
|
||||
'MB': 1024 ** 2,
|
||||
'GB': 1024 ** 3,
|
||||
'TB': 1024 ** 4,
|
||||
|
||||
'KIB': 1024,
|
||||
'MIB': 1024 ** 2,
|
||||
'GIB': 1024 ** 3,
|
||||
'TIB': 1024 ** 4
|
||||
}[str(suffix).upper()]
|
||||
xpath_results = '//table[contains(@class, "torrent-list")]//tr[not(th)]'
|
||||
xpath_category = './/td[1]/a[1]'
|
||||
xpath_title = './/td[2]/a[last()]'
|
||||
xpath_torrent_links = './/td[3]/a'
|
||||
xpath_filesize = './/td[4]/text()'
|
||||
xpath_seeds = './/td[6]/text()'
|
||||
xpath_leeches = './/td[7]/text()'
|
||||
xpath_downloads = './/td[8]/text()'
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
@ -72,25 +47,32 @@ def response(resp):
|
|||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in dom.xpath(xpath_results):
|
||||
# defaults
|
||||
filesize = 0
|
||||
magnet_link = ""
|
||||
torrent_link = ""
|
||||
|
||||
# category in which our torrent belongs
|
||||
category = result.xpath(xpath_category)[0].attrib.get('title')
|
||||
try:
|
||||
category = result.xpath(xpath_category)[0].attrib.get('title')
|
||||
except:
|
||||
pass
|
||||
|
||||
# torrent title
|
||||
page_a = result.xpath(xpath_title)[0]
|
||||
title = extract_text(page_a)
|
||||
|
||||
# link to the page
|
||||
href = page_a.attrib.get('href')
|
||||
href = base_url + page_a.attrib.get('href')
|
||||
|
||||
# link to the torrent file
|
||||
torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href')
|
||||
|
||||
# torrent size
|
||||
try:
|
||||
file_size, suffix = result.xpath(xpath_filesize)[0].split(' ')
|
||||
file_size = int(float(file_size) * get_filesize_mul(suffix))
|
||||
except:
|
||||
file_size = None
|
||||
for link in result.xpath(xpath_torrent_links):
|
||||
url = link.attrib.get('href')
|
||||
if 'magnet' in url:
|
||||
# link to the magnet
|
||||
magnet_link = url
|
||||
else:
|
||||
# link to the torrent file
|
||||
torrent_link = url
|
||||
|
||||
# seed count
|
||||
seed = int_or_zero(result.xpath(xpath_seeds))
|
||||
|
|
@ -101,6 +83,14 @@ def response(resp):
|
|||
# torrent downloads count
|
||||
downloads = int_or_zero(result.xpath(xpath_downloads))
|
||||
|
||||
# let's try to calculate the torrent size
|
||||
try:
|
||||
filesize_info = result.xpath(xpath_filesize)[0]
|
||||
filesize, filesize_multiplier = filesize_info.split()
|
||||
filesize = get_torrent_size(filesize, filesize_multiplier)
|
||||
except:
|
||||
pass
|
||||
|
||||
# content string contains all information not included into template
|
||||
content = 'Category: "{category}". Downloaded {downloads} times.'
|
||||
content = content.format(category=category, downloads=downloads)
|
||||
|
|
@ -110,8 +100,9 @@ def response(resp):
|
|||
'content': content,
|
||||
'seed': seed,
|
||||
'leech': leech,
|
||||
'filesize': file_size,
|
||||
'filesize': filesize,
|
||||
'torrentfile': torrent_link,
|
||||
'magnetlink': magnet_link,
|
||||
'template': 'torrent.html'})
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ def _fetch_supported_languages(resp):
|
|||
dom = fromstring(resp.text)
|
||||
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
|
||||
for option in options:
|
||||
code = option.xpath('./@data-val')[0]
|
||||
code = option.xpath('./@data-search-language')[0]
|
||||
if code.startswith('nb-'):
|
||||
code = code.replace('nb', 'no', 1)
|
||||
supported_languages.append(code)
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ import re
|
|||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from datetime import datetime
|
||||
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import get_torrent_size, int_or_zero
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files', 'videos', 'music']
|
||||
|
|
@ -76,8 +76,7 @@ def response(resp):
|
|||
try:
|
||||
# ('1.228', 'GB')
|
||||
groups = size_re.match(item).groups()
|
||||
multiplier = get_filesize_mul(groups[1])
|
||||
params['filesize'] = int(multiplier * float(groups[0]))
|
||||
params['filesize'] = get_torrent_size(groups[0], groups[1])
|
||||
except:
|
||||
pass
|
||||
elif item.startswith('Date:'):
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
Torrentz.eu (BitTorrent meta-search engine)
|
||||
Torrentz2.eu (BitTorrent meta-search engine)
|
||||
|
||||
@website https://torrentz.eu/
|
||||
@website https://torrentz2.eu/
|
||||
@provide-api no
|
||||
|
||||
@using-api no
|
||||
|
|
@ -14,24 +14,24 @@
|
|||
import re
|
||||
from lxml import html
|
||||
from datetime import datetime
|
||||
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import get_torrent_size
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files', 'videos', 'music']
|
||||
paging = True
|
||||
|
||||
# search-url
|
||||
# https://torrentz.eu/search?f=EXAMPLE&p=6
|
||||
base_url = 'https://torrentz.eu/'
|
||||
# https://torrentz2.eu/search?f=EXAMPLE&p=6
|
||||
base_url = 'https://torrentz2.eu/'
|
||||
search_url = base_url + 'search?{query}'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
page = params['pageno'] - 1
|
||||
query = urlencode({'q': query, 'p': page})
|
||||
query = urlencode({'f': query, 'p': page})
|
||||
params['url'] = search_url.format(query=query)
|
||||
return params
|
||||
|
||||
|
|
@ -54,22 +54,29 @@ def response(resp):
|
|||
# extract url and remove a slash in the beginning
|
||||
link = links[0].attrib.get('href').lstrip('/')
|
||||
|
||||
seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '')
|
||||
leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '')
|
||||
seed = 0
|
||||
leech = 0
|
||||
try:
|
||||
seed = int(result.xpath('./dd/span[4]/text()')[0].replace(',', ''))
|
||||
leech = int(result.xpath('./dd/span[5]/text()')[0].replace(',', ''))
|
||||
except:
|
||||
pass
|
||||
|
||||
params = {
|
||||
'url': base_url + link,
|
||||
'title': title,
|
||||
'seed': int_or_zero(seed),
|
||||
'leech': int_or_zero(leech),
|
||||
'seed': seed,
|
||||
'leech': leech,
|
||||
'template': 'torrent.html'
|
||||
}
|
||||
|
||||
# let's try to calculate the torrent size
|
||||
try:
|
||||
size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
|
||||
size, suffix = size_str.split()
|
||||
params['filesize'] = int(size) * get_filesize_mul(suffix)
|
||||
filesize_info = result.xpath('./dd/span[3]/text()')[0]
|
||||
filesize, filesize_multiplier = filesize_info.split()
|
||||
filesize = get_torrent_size(filesize, filesize_multiplier)
|
||||
|
||||
params['filesize'] = filesize
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
@ -80,9 +87,8 @@ def response(resp):
|
|||
|
||||
# extract and convert creation date
|
||||
try:
|
||||
date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title')
|
||||
# Fri, 25 Mar 2016 16:29:01
|
||||
date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
|
||||
date_ts = result.xpath('./dd/span[2]')[0].attrib.get('title')
|
||||
date = datetime.fromtimestamp(float(date_ts))
|
||||
params['publishedDate'] = date
|
||||
except:
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue