mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
[enh] py3 compatibility
This commit is contained in:
parent
46a2c63f8e
commit
52e615dede
115 changed files with 517 additions and 513 deletions
|
|
@ -1,8 +1,7 @@
|
|||
from urllib import quote
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size
|
||||
from urlparse import urljoin
|
||||
from searx.url_utils import quote, urljoin
|
||||
|
||||
url = 'https://1337x.to/'
|
||||
search_url = url + 'search/{search_term}/{pageno}/'
|
||||
|
|
|
|||
|
|
@ -72,12 +72,11 @@ def load_engine(engine_data):
|
|||
if engine_data['categories'] == 'none':
|
||||
engine.categories = []
|
||||
else:
|
||||
engine.categories = map(
|
||||
str.strip, engine_data['categories'].split(','))
|
||||
engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
|
||||
continue
|
||||
setattr(engine, param_name, engine_data[param_name])
|
||||
|
||||
for arg_name, arg_value in engine_default_args.iteritems():
|
||||
for arg_name, arg_value in engine_default_args.items():
|
||||
if not hasattr(engine, arg_name):
|
||||
setattr(engine, arg_name, arg_value)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
@parse url, title
|
||||
"""
|
||||
|
||||
from urlparse import urljoin
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
|||
|
|
@ -14,10 +14,10 @@
|
|||
"""
|
||||
|
||||
from lxml import etree
|
||||
from urllib import urlencode
|
||||
from searx.utils import searx_useragent
|
||||
from datetime import datetime
|
||||
import re
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import searx_useragent
|
||||
|
||||
|
||||
categories = ['science']
|
||||
|
|
@ -73,7 +73,7 @@ def request(query, params):
|
|||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_results = etree.XML(resp.content)
|
||||
search_results = etree.XML(resp.text)
|
||||
|
||||
for entry in search_results.xpath('./result/doc'):
|
||||
content = "No description available"
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@
|
|||
@todo publishedDate
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
|
|
|||
|
|
@ -15,11 +15,11 @@
|
|||
limited response to 10 images
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from json import loads
|
||||
import re
|
||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
|
|
|
|||
|
|
@ -11,13 +11,12 @@
|
|||
@parse url, title, content, publishedDate, thumbnail
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from urlparse import urlparse, parse_qsl
|
||||
from datetime import datetime
|
||||
from dateutil import parser
|
||||
from lxml import etree
|
||||
from searx.utils import list_get
|
||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
||||
from searx.url_utils import urlencode, urlparse, parse_qsl
|
||||
|
||||
# engine dependent config
|
||||
categories = ['news']
|
||||
|
|
@ -86,7 +85,7 @@ def request(query, params):
|
|||
def response(resp):
|
||||
results = []
|
||||
|
||||
rss = etree.fromstring(resp.content)
|
||||
rss = etree.fromstring(resp.text)
|
||||
|
||||
ns = rss.nsmap
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
|
|
|
|||
|
|
@ -10,11 +10,10 @@
|
|||
@parse url, title, content, seed, leech, magnetlink
|
||||
"""
|
||||
|
||||
from urlparse import urljoin
|
||||
from urllib import quote
|
||||
from lxml import html
|
||||
from operator import itemgetter
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import quote, urljoin
|
||||
from searx.utils import get_torrent_size
|
||||
|
||||
# engine dependent config
|
||||
|
|
@ -38,7 +37,7 @@ def request(query, params):
|
|||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.content)
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
search_res = dom.xpath('//div[@id="search_res"]/table/tr')
|
||||
|
||||
|
|
|
|||
|
|
@ -1,21 +1,25 @@
|
|||
from datetime import datetime
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
categories = []
|
||||
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
|
||||
weight = 100
|
||||
|
||||
parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa
|
||||
parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
|
||||
|
||||
db = 1
|
||||
|
||||
|
||||
def normalize_name(name):
|
||||
name = name.lower().replace('-', ' ').rstrip('s')
|
||||
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
|
||||
name = re.sub(' +', ' ', name)
|
||||
return unicodedata.normalize('NFKD', name).lower()
|
||||
|
||||
|
|
@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language):
|
|||
|
||||
|
||||
def request(query, params):
|
||||
m = parser_re.match(unicode(query, 'utf8'))
|
||||
m = parser_re.match(query)
|
||||
if not m:
|
||||
# wrong query
|
||||
return params
|
||||
|
|
|
|||
|
|
@ -12,10 +12,9 @@
|
|||
@todo set content-parameter with correct data
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from datetime import datetime
|
||||
from requests import get
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
|
|
@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true"
|
|||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 25
|
||||
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
offset=offset)
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
|||
|
|
@ -12,10 +12,10 @@
|
|||
@todo rewrite to api
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
import re
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
|
|
|
|||
|
|
@ -10,20 +10,20 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from urlparse import urljoin
|
||||
from lxml import html
|
||||
from searx.utils import is_valid_lang
|
||||
from searx.url_utils import urljoin
|
||||
|
||||
categories = ['general']
|
||||
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
||||
weight = 100
|
||||
|
||||
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
|
||||
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
|
||||
results_xpath = './/table[@id="r"]/tr'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
m = parser_re.match(unicode(query, 'utf8'))
|
||||
m = parser_re.match(query)
|
||||
if not m:
|
||||
return params
|
||||
|
||||
|
|
|
|||
|
|
@ -10,10 +10,14 @@
|
|||
@parse url, title, content, magnetlink
|
||||
"""
|
||||
|
||||
from urlparse import urljoin
|
||||
from sys import version_info
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size
|
||||
from searx.url_utils import urljoin
|
||||
|
||||
if version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
categories = ['videos', 'music', 'files']
|
||||
paging = True
|
||||
|
|
@ -31,7 +35,7 @@ def request(query, params):
|
|||
|
||||
|
||||
def response(resp):
|
||||
dom = html.fromstring(resp.content)
|
||||
dom = html.fromstring(resp.text)
|
||||
search_res = dom.xpath('.//td[@class="x-item"]')
|
||||
|
||||
if not search_res:
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@
|
|||
@parse url, title, content, publishedDate, thumbnail
|
||||
"""
|
||||
|
||||
from urllib import quote_plus
|
||||
from dateutil import parser
|
||||
from json import loads
|
||||
from lxml import html
|
||||
from dateutil import parser
|
||||
from searx.url_utils import quote_plus
|
||||
|
||||
# engine dependent config
|
||||
categories = ['news', 'social media']
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@
|
|||
# @stable yes
|
||||
# @parse (general) url, title, content
|
||||
|
||||
from urllib import urlencode
|
||||
from lxml.html import fromstring
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@
|
|||
@todo rewrite to api
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from lxml.html import fromstring
|
||||
from requests import get
|
||||
from json import loads
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import json
|
||||
from urllib import urlencode
|
||||
from re import compile, sub
|
||||
from lxml import html
|
||||
from searx.utils import html_to_text
|
||||
from re import compile
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import html_to_text
|
||||
|
||||
url = 'https://api.duckduckgo.com/'\
|
||||
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@
|
|||
@parse url, title, content, publishedDate, img_src
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
import datetime
|
||||
from searx.utils import searx_useragent
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'news']
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from searx.engines.xpath import extract_text
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files']
|
||||
|
|
@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}'
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
query = urlencode({'fdfilter': query,
|
||||
'fdpage': params['pageno']})
|
||||
query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
|
||||
params['url'] = search_url.format(query=query)
|
||||
return params
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
from urllib import urlencode
|
||||
from HTMLParser import HTMLParser
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
except:
|
||||
from html.parser import HTMLParser
|
||||
|
||||
url = 'http://www.filecrop.com/'
|
||||
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
|
||||
|
|
@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser):
|
|||
|
||||
def request(query, params):
|
||||
index = 1 + (params['pageno'] - 1) * 30
|
||||
params['url'] = search_url.format(query=urlencode({'w': query}),
|
||||
index=index)
|
||||
params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@
|
|||
More info on api-key : https://www.flickr.com/services/apps/create/
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
categories = ['images']
|
||||
|
||||
|
|
|
|||
|
|
@ -12,11 +12,11 @@
|
|||
@parse url, title, thumbnail, img_src
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from time import time
|
||||
import re
|
||||
from searx.engines import logger
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
|
||||
logger = logger.getChild('flickr-noapi')
|
||||
|
|
|
|||
|
|
@ -10,12 +10,10 @@
|
|||
@parse url, title, content, thumbnail, img_src
|
||||
"""
|
||||
|
||||
from urlparse import urljoin
|
||||
from cgi import escape
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urljoin, urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ Frinkiac (Images)
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
categories = ['images']
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from random import randint
|
||||
from time import time
|
||||
from urllib import urlencode
|
||||
from lxml.html import fromstring
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
|||
|
|
@ -9,11 +9,10 @@
|
|||
# @parse url, title, content, suggestion
|
||||
|
||||
import re
|
||||
from urllib import urlencode
|
||||
from urlparse import urlparse, parse_qsl
|
||||
from lxml import html, etree
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
from searx.search import logger
|
||||
from searx import logger
|
||||
from searx.url_utils import urlencode, urlparse, parse_qsl
|
||||
|
||||
logger = logger.getChild('google engine')
|
||||
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@
|
|||
"""
|
||||
|
||||
from datetime import date, timedelta
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from lxml import html
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
|||
|
|
@ -11,9 +11,8 @@
|
|||
"""
|
||||
|
||||
from lxml import html
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from searx.engines.google import _fetch_supported_languages, supported_languages_url
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# search-url
|
||||
categories = ['news']
|
||||
|
|
|
|||
|
|
@ -12,11 +12,15 @@
|
|||
# @todo embedded (needs some md5 from video page)
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from HTMLParser import HTMLParser
|
||||
from searx.engines.xpath import extract_text
|
||||
from dateutil import parser
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
except:
|
||||
from html.parser import HTMLParser
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
|
|
|||
|
|
@ -1,11 +1,16 @@
|
|||
from urllib import urlencode
|
||||
from json import loads
|
||||
from collections import Iterable
|
||||
from json import loads
|
||||
from sys import version_info
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
if version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
search_url = None
|
||||
url_query = None
|
||||
content_query = None
|
||||
title_query = None
|
||||
paging = False
|
||||
suggestion_query = ''
|
||||
results_query = ''
|
||||
|
||||
|
|
@ -20,7 +25,7 @@ first_page_num = 1
|
|||
|
||||
def iterate(iterable):
|
||||
if type(iterable) == dict:
|
||||
it = iterable.iteritems()
|
||||
it = iterable.items()
|
||||
|
||||
else:
|
||||
it = enumerate(iterable)
|
||||
|
|
|
|||
|
|
@ -10,12 +10,11 @@
|
|||
@parse url, title, content, seed, leech, magnetlink
|
||||
"""
|
||||
|
||||
from urlparse import urljoin
|
||||
from urllib import quote
|
||||
from lxml import html
|
||||
from operator import itemgetter
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size, convert_str_to_int
|
||||
from searx.url_utils import quote, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music', 'files']
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from json import loads
|
||||
from string import Formatter
|
||||
from urllib import urlencode, quote
|
||||
from searx.url_utils import urlencode, quote
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@
|
|||
@parse url, title, content, seed, leech, torrentfile
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files', 'images', 'videos', 'music']
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.utils import searx_useragent
|
||||
|
||||
# engine dependent config
|
||||
categories = ['map']
|
||||
|
|
@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
|
|||
def request(query, params):
|
||||
params['url'] = base_url + search_string.format(query=query)
|
||||
|
||||
# using searx User-Agent
|
||||
params['headers']['User-Agent'] = searx_useragent()
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@
|
|||
@parse url, title
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from searx.utils import searx_useragent
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['map']
|
||||
|
|
|
|||
|
|
@ -8,11 +8,10 @@
|
|||
# @stable yes (HTML can change)
|
||||
# @parse url, title, content, seed, leech, magnetlink
|
||||
|
||||
from urlparse import urljoin
|
||||
from urllib import quote
|
||||
from lxml import html
|
||||
from operator import itemgetter
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import quote, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music', 'files']
|
||||
|
|
|
|||
|
|
@ -12,9 +12,8 @@
|
|||
|
||||
from datetime import datetime
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
|
||||
from searx.utils import html_to_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = None
|
||||
|
|
|
|||
|
|
@ -11,9 +11,8 @@
|
|||
"""
|
||||
|
||||
import json
|
||||
from urllib import urlencode
|
||||
from urlparse import urlparse, urljoin
|
||||
from datetime import datetime
|
||||
from searx.url_utils import urlencode, urljoin, urlparse
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'images', 'news', 'social media']
|
||||
|
|
@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}'
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
query = urlencode({'q': query,
|
||||
'limit': page_size})
|
||||
query = urlencode({'q': query, 'limit': page_size})
|
||||
params['url'] = search_url.format(query=query)
|
||||
|
||||
return params
|
||||
|
|
|
|||
|
|
@ -10,9 +10,7 @@
|
|||
@parse url, title, content, img_src
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads, dumps
|
||||
from dateutil import parser
|
||||
from searx.utils import html_to_text
|
||||
|
||||
# engine dependent config
|
||||
|
|
@ -48,7 +46,7 @@ def response(resp):
|
|||
search_res = loads(resp.text)
|
||||
|
||||
# return empty array if there are no results
|
||||
if search_res.get('total') < 1:
|
||||
if search_res.get('total', 0) < 1:
|
||||
return []
|
||||
|
||||
# parse results
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
|
||||
# engine dependent config
|
||||
|
|
@ -31,8 +31,7 @@ code_endings = {'cs': 'c#',
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
pageno=params['pageno'] - 1)
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}'
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
pageno=params['pageno'] - 1)
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
|||
|
|
@ -8,11 +8,9 @@
|
|||
# @stable yes (HTML can change)
|
||||
# @parse url, title, content, seed, leech, magnetlink
|
||||
|
||||
from urlparse import urljoin
|
||||
from urllib import quote
|
||||
from lxml import html
|
||||
from operator import itemgetter
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import quote, urljoin
|
||||
|
||||
|
||||
url = 'http://www.seedpeer.eu/'
|
||||
|
|
|
|||
|
|
@ -11,13 +11,17 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from StringIO import StringIO
|
||||
from json import loads
|
||||
from lxml import etree
|
||||
from urllib import urlencode, quote_plus
|
||||
from lxml import html
|
||||
from dateutil import parser
|
||||
from searx import logger
|
||||
from searx.poolrequests import get as http_get
|
||||
from searx.url_utils import quote_plus, urlencode
|
||||
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except:
|
||||
from io import StringIO
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
|
|
@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\
|
|||
'scrolling="no" frameborder="no" ' +\
|
||||
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
|
||||
|
||||
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
|
||||
|
||||
|
||||
def get_client_id():
|
||||
response = http_get("https://soundcloud.com")
|
||||
rx_namespace = {"re": "http://exslt.org/regular-expressions"}
|
||||
|
||||
if response.ok:
|
||||
tree = etree.parse(StringIO(response.content), etree.HTMLParser())
|
||||
script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
|
||||
tree = html.fromstring(response.content)
|
||||
script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
|
||||
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
|
||||
|
||||
# extracts valid app_js urls from soundcloud.com content
|
||||
|
|
@ -51,7 +56,7 @@ def get_client_id():
|
|||
# gets app_js and searches for the clientid
|
||||
response = http_get(app_js_url)
|
||||
if response.ok:
|
||||
cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
|
||||
cids = cid_re.search(response.text)
|
||||
if cids is not None and len(cids.groups()):
|
||||
return cids.groups()[0]
|
||||
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
|
|
@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{
|
|||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 20
|
||||
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
offset=offset)
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
|||
|
|
@ -10,10 +10,9 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urlparse import urljoin
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]'
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
pageno=params['pageno'])
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ def request(query, params):
|
|||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.content)
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath(results_xpath):
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib import quote_plus
|
||||
from lxml import html
|
||||
from searx.languages import language_codes
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import quote_plus
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode, unquote
|
||||
import re
|
||||
from lxml.html import fromstring
|
||||
from searx.url_utils import unquote, urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'images']
|
||||
|
|
@ -27,10 +27,10 @@ search_string = '?{query}&page={page}'
|
|||
supported_languages_url = base_url
|
||||
|
||||
# regex
|
||||
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
|
||||
regex_json_remove_start = re.compile(r'^initialData:\s*')
|
||||
regex_json_remove_end = re.compile(r',\s*environment$')
|
||||
regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
|
||||
regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
|
||||
regex_json_remove_start = re.compile(b'^initialData:\s*')
|
||||
regex_json_remove_end = re.compile(b',\s*environment$')
|
||||
regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
@ -45,10 +45,9 @@ def request(query, params):
|
|||
ui_language = params['language'].split('-')[0]
|
||||
|
||||
search_path = search_string.format(
|
||||
query=urlencode({'query': query,
|
||||
'uiLanguage': ui_language,
|
||||
'region': region}),
|
||||
page=params['pageno'])
|
||||
query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
|
||||
page=params['pageno']
|
||||
)
|
||||
|
||||
# image search query is something like 'image?{query}&page={page}'
|
||||
if params['category'] == 'images':
|
||||
|
|
@ -63,14 +62,14 @@ def request(query, params):
|
|||
def response(resp):
|
||||
results = []
|
||||
|
||||
json_regex = regex_json.search(resp.content)
|
||||
json_regex = regex_json.search(resp.text)
|
||||
|
||||
# check if results are returned
|
||||
if not json_regex:
|
||||
return []
|
||||
|
||||
json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
|
||||
json = loads(json_raw)
|
||||
json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
|
||||
json = loads(json_raw.decode('utf-8'))
|
||||
|
||||
# parse results
|
||||
for result in json['Results'].get('items', []):
|
||||
|
|
@ -78,7 +77,7 @@ def response(resp):
|
|||
|
||||
# parse image results
|
||||
if result.get('ContentType', '').startswith('image'):
|
||||
img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
|
||||
img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
|
||||
|
||||
# append result
|
||||
results.append({'url': result['SourceUrl'],
|
||||
|
|
@ -100,7 +99,7 @@ def response(resp):
|
|||
# parse images
|
||||
for result in json.get('Images', []):
|
||||
# decode image url
|
||||
img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
|
||||
img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
|
||||
|
||||
# append result
|
||||
results.append({'url': result['SourceUrl'],
|
||||
|
|
|
|||
|
|
@ -11,11 +11,11 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from datetime import datetime
|
||||
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files', 'videos', 'music']
|
||||
|
|
@ -28,8 +28,7 @@ search_url = base_url + 'search.php?{query}'
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
query = urlencode({'page': params['pageno'],
|
||||
'terms': query})
|
||||
query = urlencode({'page': params['pageno'], 'terms': query})
|
||||
params['url'] = search_url.format(query=query)
|
||||
return params
|
||||
|
||||
|
|
@ -50,7 +49,7 @@ def response(resp):
|
|||
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
|
||||
|
||||
# processing the results, two rows at a time
|
||||
for i in xrange(0, len(rows), 2):
|
||||
for i in range(0, len(rows), 2):
|
||||
# parse the first row
|
||||
name_row = rows[i]
|
||||
|
||||
|
|
@ -79,14 +78,14 @@ def response(resp):
|
|||
groups = size_re.match(item).groups()
|
||||
multiplier = get_filesize_mul(groups[1])
|
||||
params['filesize'] = int(multiplier * float(groups[0]))
|
||||
except Exception as e:
|
||||
except:
|
||||
pass
|
||||
elif item.startswith('Date:'):
|
||||
try:
|
||||
# Date: 2016-02-21 21:44 UTC
|
||||
date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
|
||||
params['publishedDate'] = date
|
||||
except Exception as e:
|
||||
except:
|
||||
pass
|
||||
elif item.startswith('Comment:'):
|
||||
params['content'] = item
|
||||
|
|
|
|||
|
|
@ -12,11 +12,11 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from datetime import datetime
|
||||
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files', 'videos', 'music']
|
||||
|
|
@ -70,7 +70,7 @@ def response(resp):
|
|||
size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
|
||||
size, suffix = size_str.split()
|
||||
params['filesize'] = int(size) * get_filesize_mul(suffix)
|
||||
except Exception as e:
|
||||
except:
|
||||
pass
|
||||
|
||||
# does our link contain a valid SHA1 sum?
|
||||
|
|
@ -84,7 +84,7 @@ def response(resp):
|
|||
# Fri, 25 Mar 2016 16:29:01
|
||||
date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
|
||||
params['publishedDate'] = date
|
||||
except Exception as e:
|
||||
except:
|
||||
pass
|
||||
|
||||
results.append(params)
|
||||
|
|
|
|||
|
|
@ -9,8 +9,12 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
import re
|
||||
from sys import version_info
|
||||
from searx.utils import is_valid_lang
|
||||
|
||||
if version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
categories = ['general']
|
||||
url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
|
||||
web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
|
||||
|
|
|
|||
|
|
@ -12,11 +12,10 @@
|
|||
@todo publishedDate
|
||||
"""
|
||||
|
||||
from urlparse import urljoin
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from datetime import datetime
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['social media']
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@
|
|||
# @todo set content-parameter with correct data
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
|
|
|||
|
|
@ -14,12 +14,11 @@
|
|||
from searx import logger
|
||||
from searx.poolrequests import get
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import format_date_by_locale
|
||||
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
from json import loads
|
||||
from lxml.html import fromstring
|
||||
from urllib import urlencode
|
||||
|
||||
logger = logger.getChild('wikidata')
|
||||
result_count = 1
|
||||
|
|
@ -62,14 +61,13 @@ def request(query, params):
|
|||
language = 'en'
|
||||
|
||||
params['url'] = url_search.format(
|
||||
query=urlencode({'label': query,
|
||||
'language': language}))
|
||||
query=urlencode({'label': query, 'language': language}))
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
html = fromstring(resp.content)
|
||||
html = fromstring(resp.text)
|
||||
wikidata_ids = html.xpath(wikidata_ids_xpath)
|
||||
|
||||
language = resp.search_params['language'].split('-')[0]
|
||||
|
|
@ -78,10 +76,9 @@ def response(resp):
|
|||
|
||||
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
||||
for wikidata_id in wikidata_ids[:result_count]:
|
||||
url = url_detail.format(query=urlencode({'page': wikidata_id,
|
||||
'uselang': language}))
|
||||
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
||||
htmlresponse = get(url)
|
||||
jsonresponse = loads(htmlresponse.content)
|
||||
jsonresponse = loads(htmlresponse.text)
|
||||
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -11,13 +11,12 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode, quote
|
||||
from lxml.html import fromstring
|
||||
|
||||
from searx.url_utils import quote, urlencode
|
||||
|
||||
# search-url
|
||||
base_url = 'https://{language}.wikipedia.org/'
|
||||
search_postfix = 'w/api.php?'\
|
||||
base_url = u'https://{language}.wikipedia.org/'
|
||||
search_url = base_url + u'w/api.php?'\
|
||||
'action=query'\
|
||||
'&format=json'\
|
||||
'&{query}'\
|
||||
|
|
@ -37,16 +36,16 @@ def url_lang(lang):
|
|||
else:
|
||||
language = lang
|
||||
|
||||
return base_url.format(language=language)
|
||||
return language
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
if query.islower():
|
||||
query += '|' + query.title()
|
||||
query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
|
||||
|
||||
params['url'] = url_lang(params['language']) \
|
||||
+ search_postfix.format(query=urlencode({'titles': query}))
|
||||
params['url'] = search_url.format(query=urlencode({'titles': query}),
|
||||
language=url_lang(params['language']))
|
||||
|
||||
return params
|
||||
|
||||
|
|
@ -78,7 +77,7 @@ def extract_first_paragraph(content, title, image):
|
|||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_result = loads(resp.content)
|
||||
search_result = loads(resp.text)
|
||||
|
||||
# wikipedia article's unique id
|
||||
# first valid id is assumed to be the requested article
|
||||
|
|
@ -99,11 +98,9 @@ def response(resp):
|
|||
extract = page.get('extract')
|
||||
|
||||
summary = extract_first_paragraph(extract, title, image)
|
||||
if not summary:
|
||||
return []
|
||||
|
||||
# link to wikipedia article
|
||||
wikipedia_link = url_lang(resp.search_params['language']) \
|
||||
wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
|
||||
+ 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
|
||||
|
||||
results.append({'url': wikipedia_link, 'title': title})
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@
|
|||
# @stable yes
|
||||
# @parse url, infobox
|
||||
|
||||
from urllib import urlencode
|
||||
from lxml import etree
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# search-url
|
||||
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
|
||||
|
|
@ -37,8 +37,7 @@ image_pods = {'VisualRepresentation',
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=urlencode({'input': query}),
|
||||
api_key=api_key)
|
||||
params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
|
||||
params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
|
||||
|
||||
return params
|
||||
|
|
@ -56,7 +55,7 @@ def replace_pua_chars(text):
|
|||
u'\uf74e': 'i', # imaginary number
|
||||
u'\uf7d9': '='} # equals sign
|
||||
|
||||
for k, v in pua_chars.iteritems():
|
||||
for k, v in pua_chars.items():
|
||||
text = text.replace(k, v)
|
||||
|
||||
return text
|
||||
|
|
@ -66,7 +65,7 @@ def replace_pua_chars(text):
|
|||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_results = etree.XML(resp.content)
|
||||
search_results = etree.XML(resp.text)
|
||||
|
||||
# return empty array if there are no results
|
||||
if search_results.xpath(failure_xpath):
|
||||
|
|
@ -120,10 +119,10 @@ def response(resp):
|
|||
# append infobox
|
||||
results.append({'infobox': infobox_title,
|
||||
'attributes': result_chunks,
|
||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
|
||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
|
||||
|
||||
# append link to site
|
||||
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
||||
results.append({'url': resp.request.headers['Referer'],
|
||||
'title': title,
|
||||
'content': result_content})
|
||||
|
||||
|
|
|
|||
|
|
@ -10,10 +10,9 @@
|
|||
|
||||
from json import loads
|
||||
from time import time
|
||||
from urllib import urlencode
|
||||
from lxml.etree import XML
|
||||
|
||||
from searx.poolrequests import get as http_get
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# search-url
|
||||
url = 'https://www.wolframalpha.com/'
|
||||
|
|
@ -62,7 +61,7 @@ obtain_token()
|
|||
# do search-request
|
||||
def request(query, params):
|
||||
# obtain token if last update was more than an hour
|
||||
if time() - token['last_updated'] > 3600:
|
||||
if time() - (token['last_updated'] or 0) > 3600:
|
||||
obtain_token()
|
||||
params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
|
||||
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
|
||||
|
|
@ -112,9 +111,9 @@ def response(resp):
|
|||
|
||||
results.append({'infobox': infobox_title,
|
||||
'attributes': result_chunks,
|
||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
|
||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
|
||||
|
||||
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
||||
results.append({'url': resp.request.headers['Referer'],
|
||||
'title': 'Wolfram|Alpha (' + infobox_title + ')',
|
||||
'content': result_content})
|
||||
|
||||
|
|
|
|||
|
|
@ -10,11 +10,9 @@
|
|||
@parse url, title, thumbnail, img_src, content
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from urlparse import urljoin
|
||||
from lxml import html
|
||||
import string
|
||||
import re
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
|
|
@ -55,7 +53,7 @@ def response(resp):
|
|||
cur_element += result_part
|
||||
|
||||
# fix xml-error
|
||||
cur_element = string.replace(cur_element, '"></a>', '"/></a>')
|
||||
cur_element = cur_element.replace('"></a>', '"/></a>')
|
||||
|
||||
dom = html.fromstring(cur_element)
|
||||
link = dom.xpath('//a')[0]
|
||||
|
|
|
|||
|
|
@ -13,8 +13,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from urlparse import urljoin
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
from lxml import html
|
||||
from urllib import urlencode, unquote
|
||||
from urlparse import urlparse, urljoin
|
||||
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
|
||||
from searx.utils import html_to_text
|
||||
from searx.url_utils import unquote, urlencode, urljoin, urlparse
|
||||
|
||||
search_url = None
|
||||
url_xpath = None
|
||||
content_xpath = None
|
||||
title_xpath = None
|
||||
paging = False
|
||||
suggestion_xpath = ''
|
||||
results_xpath = ''
|
||||
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@
|
|||
# @todo parse video, audio and file results
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
from searx.utils import html_to_text
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
@parse url, title, content, suggestion
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from urlparse import unquote
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
from searx.url_utils import unquote, urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
|
|
|||
|
|
@ -9,13 +9,13 @@
|
|||
# @stable no (HTML can change)
|
||||
# @parse url, title, content, publishedDate
|
||||
|
||||
from urllib import urlencode
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['news']
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.search import logger
|
||||
from searx import logger
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
logger = logger.getChild('yandex engine')
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@
|
|||
# @parse url, title, content, publishedDate, thumbnail, embedded
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music']
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@
|
|||
# @stable no
|
||||
# @parse url, title, content, publishedDate, thumbnail, embedded
|
||||
|
||||
from urllib import quote_plus
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import list_get
|
||||
from searx.url_utils import quote_plus
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music']
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue