mirror of https://github.com/searxng/searxng.git
[format.python] initial formatting of the python code
This patch was generated by black [1]:: make format.python [1] https://github.com/psf/black Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
fcdc2c2cd2
commit
3d96a9839a
|
@ -29,6 +29,7 @@ if settings is not None:
|
||||||
|
|
||||||
_unset = object()
|
_unset = object()
|
||||||
|
|
||||||
|
|
||||||
def get_setting(name, default=_unset):
|
def get_setting(name, default=_unset):
|
||||||
"""Returns the value to which ``name`` point. If there is no such name in the
|
"""Returns the value to which ``name`` point. If there is no such name in the
|
||||||
settings and the ``default`` is unset, a :py:obj:`KeyError` is raised.
|
settings and the ``default`` is unset, a :py:obj:`KeyError` is raised.
|
||||||
|
@ -80,14 +81,9 @@ def logging_config_debug():
|
||||||
'levelname': {'color': 8},
|
'levelname': {'color': 8},
|
||||||
'name': {'color': 8},
|
'name': {'color': 8},
|
||||||
'programname': {'color': 'cyan'},
|
'programname': {'color': 'cyan'},
|
||||||
'username': {'color': 'yellow'}
|
'username': {'color': 'yellow'},
|
||||||
}
|
}
|
||||||
coloredlogs.install(
|
coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG)
|
||||||
level=log_level,
|
|
||||||
level_styles=level_styles,
|
|
||||||
field_styles=field_styles,
|
|
||||||
fmt=LOG_FORMAT_DEBUG
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
|
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
|
||||||
|
|
||||||
|
|
|
@ -8,13 +8,12 @@ from flask_babel import gettext
|
||||||
# specifies which search query keywords triggers this answerer
|
# specifies which search query keywords triggers this answerer
|
||||||
keywords = ('random',)
|
keywords = ('random',)
|
||||||
|
|
||||||
random_int_max = 2**31
|
random_int_max = 2 ** 31
|
||||||
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
|
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
|
||||||
|
|
||||||
|
|
||||||
def random_characters():
|
def random_characters():
|
||||||
return [random.choice(random_string_letters)
|
return [random.choice(random_string_letters) for _ in range(random.randint(8, 32))]
|
||||||
for _ in range(random.randint(8, 32))]
|
|
||||||
|
|
||||||
|
|
||||||
def random_string():
|
def random_string():
|
||||||
|
@ -39,11 +38,13 @@ def random_uuid():
|
||||||
return str(uuid.uuid4())
|
return str(uuid.uuid4())
|
||||||
|
|
||||||
|
|
||||||
random_types = {'string': random_string,
|
random_types = {
|
||||||
'int': random_int,
|
'string': random_string,
|
||||||
'float': random_float,
|
'int': random_int,
|
||||||
'sha256': random_sha256,
|
'float': random_float,
|
||||||
'uuid': random_uuid}
|
'sha256': random_sha256,
|
||||||
|
'uuid': random_uuid,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# required answerer function
|
# required answerer function
|
||||||
|
@ -62,6 +63,8 @@ def answer(query):
|
||||||
# required answerer function
|
# required answerer function
|
||||||
# returns information about the answerer
|
# returns information about the answerer
|
||||||
def self_info():
|
def self_info():
|
||||||
return {'name': gettext('Random value generator'),
|
return {
|
||||||
'description': gettext('Generate different random values'),
|
'name': gettext('Random value generator'),
|
||||||
'examples': ['random {}'.format(x) for x in random_types]}
|
'description': gettext('Generate different random values'),
|
||||||
|
'examples': ['random {}'.format(x) for x in random_types],
|
||||||
|
}
|
||||||
|
|
|
@ -4,11 +4,7 @@ from operator import mul
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
|
|
||||||
keywords = ('min',
|
keywords = ('min', 'max', 'avg', 'sum', 'prod')
|
||||||
'max',
|
|
||||||
'avg',
|
|
||||||
'sum',
|
|
||||||
'prod')
|
|
||||||
|
|
||||||
|
|
||||||
# required answerer function
|
# required answerer function
|
||||||
|
@ -47,6 +43,8 @@ def answer(query):
|
||||||
# required answerer function
|
# required answerer function
|
||||||
# returns information about the answerer
|
# returns information about the answerer
|
||||||
def self_info():
|
def self_info():
|
||||||
return {'name': gettext('Statistics functions'),
|
return {
|
||||||
'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)),
|
'name': gettext('Statistics functions'),
|
||||||
'examples': ['avg 123 548 2.04 24.2']}
|
'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)),
|
||||||
|
'examples': ['avg 123 548 2.04 24.2'],
|
||||||
|
}
|
||||||
|
|
|
@ -120,14 +120,15 @@ def wikipedia(query, lang):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
backends = {'dbpedia': dbpedia,
|
backends = {
|
||||||
'duckduckgo': duckduckgo,
|
'dbpedia': dbpedia,
|
||||||
'google': google,
|
'duckduckgo': duckduckgo,
|
||||||
'startpage': startpage,
|
'google': google,
|
||||||
'swisscows': swisscows,
|
'startpage': startpage,
|
||||||
'qwant': qwant,
|
'swisscows': swisscows,
|
||||||
'wikipedia': wikipedia
|
'qwant': qwant,
|
||||||
}
|
'wikipedia': wikipedia,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def search_autocomplete(backend_name, query, lang):
|
def search_autocomplete(backend_name, query, lang):
|
||||||
|
|
|
@ -23,10 +23,12 @@ from pathlib import Path
|
||||||
|
|
||||||
data_dir = Path(__file__).parent
|
data_dir = Path(__file__).parent
|
||||||
|
|
||||||
|
|
||||||
def _load(filename):
|
def _load(filename):
|
||||||
with open(data_dir / filename, encoding='utf-8') as f:
|
with open(data_dir / filename, encoding='utf-8') as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
def ahmia_blacklist_loader():
|
def ahmia_blacklist_loader():
|
||||||
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
|
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
|
||||||
names. The MD5 values are fetched by::
|
names. The MD5 values are fetched by::
|
||||||
|
@ -39,6 +41,7 @@ def ahmia_blacklist_loader():
|
||||||
with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as f:
|
with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as f:
|
||||||
return f.read().split()
|
return f.read().split()
|
||||||
|
|
||||||
|
|
||||||
ENGINES_LANGUAGES = _load('engines_languages.json')
|
ENGINES_LANGUAGES = _load('engines_languages.json')
|
||||||
CURRENCIES = _load('currencies.json')
|
CURRENCIES = _load('currencies.json')
|
||||||
USER_AGENTS = _load('useragents.json')
|
USER_AGENTS = _load('useragents.json')
|
||||||
|
|
|
@ -43,11 +43,15 @@ def response(resp):
|
||||||
filesize, filesize_multiplier = filesize_info.split()
|
filesize, filesize_multiplier = filesize_info.split()
|
||||||
filesize = get_torrent_size(filesize, filesize_multiplier)
|
filesize = get_torrent_size(filesize, filesize_multiplier)
|
||||||
|
|
||||||
results.append({'url': href,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'seed': seed,
|
'url': href,
|
||||||
'leech': leech,
|
'title': title,
|
||||||
'filesize': filesize,
|
'seed': seed,
|
||||||
'template': 'torrent.html'})
|
'leech': leech,
|
||||||
|
'filesize': filesize,
|
||||||
|
'template': 'torrent.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -57,6 +57,7 @@ engine_shortcuts = {}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def load_engine(engine_data):
|
def load_engine(engine_data):
|
||||||
"""Load engine from ``engine_data``.
|
"""Load engine from ``engine_data``.
|
||||||
|
|
||||||
|
@ -166,20 +167,19 @@ def set_language_attributes(engine):
|
||||||
# settings.yml
|
# settings.yml
|
||||||
if engine.language not in engine.supported_languages:
|
if engine.language not in engine.supported_languages:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"settings.yml - engine: '%s' / language: '%s' not supported" % (
|
"settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language)
|
||||||
engine.name, engine.language ))
|
)
|
||||||
|
|
||||||
if isinstance(engine.supported_languages, dict):
|
if isinstance(engine.supported_languages, dict):
|
||||||
engine.supported_languages = {
|
engine.supported_languages = {engine.language: engine.supported_languages[engine.language]}
|
||||||
engine.language : engine.supported_languages[engine.language]
|
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
engine.supported_languages = [engine.language]
|
engine.supported_languages = [engine.language]
|
||||||
|
|
||||||
# find custom aliases for non standard language codes
|
# find custom aliases for non standard language codes
|
||||||
for engine_lang in engine.supported_languages:
|
for engine_lang in engine.supported_languages:
|
||||||
iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
|
iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
|
||||||
if (iso_lang
|
if (
|
||||||
|
iso_lang
|
||||||
and iso_lang != engine_lang
|
and iso_lang != engine_lang
|
||||||
and not engine_lang.startswith(iso_lang)
|
and not engine_lang.startswith(iso_lang)
|
||||||
and iso_lang not in engine.supported_languages
|
and iso_lang not in engine.supported_languages
|
||||||
|
@ -197,14 +197,12 @@ def set_language_attributes(engine):
|
||||||
}
|
}
|
||||||
engine.fetch_supported_languages = (
|
engine.fetch_supported_languages = (
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
lambda: engine._fetch_supported_languages(
|
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
|
||||||
get(engine.supported_languages_url, headers=headers))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def update_attributes_for_tor(engine):
|
def update_attributes_for_tor(engine):
|
||||||
if (settings['outgoing'].get('using_tor_proxy')
|
if settings['outgoing'].get('using_tor_proxy') and hasattr(engine, 'onion_url'):
|
||||||
and hasattr(engine, 'onion_url') ):
|
|
||||||
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
|
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
|
||||||
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)
|
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)
|
||||||
|
|
||||||
|
@ -217,9 +215,7 @@ def is_missing_required_attributes(engine):
|
||||||
missing = False
|
missing = False
|
||||||
for engine_attr in dir(engine):
|
for engine_attr in dir(engine):
|
||||||
if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None:
|
if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None:
|
||||||
logger.error(
|
logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr))
|
||||||
'Missing engine config attribute: "{0}.{1}"'
|
|
||||||
.format(engine.name, engine_attr))
|
|
||||||
missing = True
|
missing = True
|
||||||
return missing
|
return missing
|
||||||
|
|
||||||
|
@ -230,8 +226,7 @@ def is_engine_active(engine):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# exclude onion engines if not using tor
|
# exclude onion engines if not using tor
|
||||||
if ('onions' in engine.categories
|
if 'onions' in engine.categories and not settings['outgoing'].get('using_tor_proxy'):
|
||||||
and not settings['outgoing'].get('using_tor_proxy') ):
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -253,8 +248,7 @@ def register_engine(engine):
|
||||||
|
|
||||||
|
|
||||||
def load_engines(engine_list):
|
def load_engines(engine_list):
|
||||||
"""usage: ``engine_list = settings['engines']``
|
"""usage: ``engine_list = settings['engines']``"""
|
||||||
"""
|
|
||||||
engines.clear()
|
engines.clear()
|
||||||
engine_shortcuts.clear()
|
engine_shortcuts.clear()
|
||||||
categories.clear()
|
categories.clear()
|
||||||
|
|
|
@ -25,9 +25,7 @@ page_size = 10
|
||||||
# search url
|
# search url
|
||||||
search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}'
|
search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}'
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
time_range_dict = {'day': 1,
|
time_range_dict = {'day': 1, 'week': 7, 'month': 30}
|
||||||
'week': 7,
|
|
||||||
'month': 30}
|
|
||||||
|
|
||||||
# xpaths
|
# xpaths
|
||||||
results_xpath = '//li[@class="result"]'
|
results_xpath = '//li[@class="result"]'
|
||||||
|
@ -54,7 +52,7 @@ def response(resp):
|
||||||
# trim results so there's not way too many at once
|
# trim results so there's not way too many at once
|
||||||
first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1)
|
first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1)
|
||||||
all_results = eval_xpath_list(dom, results_xpath)
|
all_results = eval_xpath_list(dom, results_xpath)
|
||||||
trimmed_results = all_results[first_result_index:first_result_index + page_size]
|
trimmed_results = all_results[first_result_index : first_result_index + page_size]
|
||||||
|
|
||||||
# get results
|
# get results
|
||||||
for result in trimmed_results:
|
for result in trimmed_results:
|
||||||
|
@ -65,10 +63,7 @@ def response(resp):
|
||||||
title = extract_text(eval_xpath(result, title_xpath))
|
title = extract_text(eval_xpath(result, title_xpath))
|
||||||
content = extract_text(eval_xpath(result, content_xpath))
|
content = extract_text(eval_xpath(result, content_xpath))
|
||||||
|
|
||||||
results.append({'url': cleaned_url,
|
results.append({'url': cleaned_url, 'title': title, 'content': content, 'is_onion': True})
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'is_onion': True})
|
|
||||||
|
|
||||||
# get spelling corrections
|
# get spelling corrections
|
||||||
for correction in eval_xpath_list(dom, correction_xpath):
|
for correction in eval_xpath_list(dom, correction_xpath):
|
||||||
|
|
|
@ -35,8 +35,8 @@ search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{q
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(
|
params['url'] = search_url.format(
|
||||||
pageno = params['pageno'],
|
pageno=params['pageno'],
|
||||||
query = urlencode({'s': query}),
|
query=urlencode({'s': query}),
|
||||||
)
|
)
|
||||||
logger.debug("query_url --> %s", params['url'])
|
logger.debug("query_url --> %s", params['url'])
|
||||||
return params
|
return params
|
||||||
|
@ -55,11 +55,7 @@ def response(resp):
|
||||||
url = base_url + link.attrib.get('href') + '#downloads'
|
url = base_url + link.attrib.get('href') + '#downloads'
|
||||||
title = extract_text(link)
|
title = extract_text(link)
|
||||||
img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
|
img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
|
||||||
res = {
|
res = {'url': url, 'title': title, 'img_src': img_src}
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'img_src': img_src
|
|
||||||
}
|
|
||||||
|
|
||||||
results.append(res)
|
results.append(res)
|
||||||
|
|
||||||
|
|
|
@ -97,7 +97,7 @@ main_langs = {
|
||||||
'sl': 'Slovenský',
|
'sl': 'Slovenský',
|
||||||
'th': 'ไทย',
|
'th': 'ไทย',
|
||||||
'uk': 'Українська',
|
'uk': 'Українська',
|
||||||
'zh': '简体中文'
|
'zh': '简体中文',
|
||||||
}
|
}
|
||||||
supported_languages = dict(lang_urls, **main_langs)
|
supported_languages = dict(lang_urls, **main_langs)
|
||||||
|
|
||||||
|
@ -141,7 +141,6 @@ def response(resp):
|
||||||
href = urljoin(base_url, link.attrib.get('href'))
|
href = urljoin(base_url, link.attrib.get('href'))
|
||||||
title = extract_text(link)
|
title = extract_text(link)
|
||||||
|
|
||||||
results.append({'url': href,
|
results.append({'url': href, 'title': title})
|
||||||
'title': title})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -27,19 +27,23 @@ nb_per_page = 20
|
||||||
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
|
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
|
||||||
image_api = 'https://www.artic.edu/iiif/2/'
|
image_api = 'https://www.artic.edu/iiif/2/'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
args = urlencode({
|
args = urlencode(
|
||||||
'q' : query,
|
{
|
||||||
'page' : params['pageno'],
|
'q': query,
|
||||||
'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
|
'page': params['pageno'],
|
||||||
'limit' : nb_per_page,
|
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
|
||||||
})
|
'limit': nb_per_page,
|
||||||
|
}
|
||||||
|
)
|
||||||
params['url'] = search_api + args
|
params['url'] = search_api + args
|
||||||
|
|
||||||
logger.debug("query_url --> %s", params['url'])
|
logger.debug("query_url --> %s", params['url'])
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
@ -50,14 +54,16 @@ def response(resp):
|
||||||
if not result['image_id']:
|
if not result['image_id']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': 'https://artic.edu/artworks/%(id)s' % result,
|
{
|
||||||
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
|
'url': 'https://artic.edu/artworks/%(id)s' % result,
|
||||||
'content': result['medium_display'],
|
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
|
||||||
'author': ', '.join(result['artist_titles']),
|
'content': result['medium_display'],
|
||||||
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
|
'author': ', '.join(result['artist_titles']),
|
||||||
'img_format': result['dimensions'],
|
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
|
||||||
'template': 'images.html'
|
'img_format': result['dimensions'],
|
||||||
})
|
'template': 'images.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -20,8 +20,9 @@ about = {
|
||||||
categories = ['science']
|
categories = ['science']
|
||||||
paging = True
|
paging = True
|
||||||
|
|
||||||
base_url = 'https://export.arxiv.org/api/query?search_query=all:'\
|
base_url = (
|
||||||
+ '{query}&start={offset}&max_results={number_of_results}'
|
'https://export.arxiv.org/api/query?search_query=all:' + '{query}&start={offset}&max_results={number_of_results}'
|
||||||
|
)
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
number_of_results = 10
|
number_of_results = 10
|
||||||
|
@ -31,9 +32,7 @@ def request(query, params):
|
||||||
# basic search
|
# basic search
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
offset = (params['pageno'] - 1) * number_of_results
|
||||||
|
|
||||||
string_args = dict(query=query,
|
string_args = dict(query=query, offset=offset, number_of_results=number_of_results)
|
||||||
offset=offset,
|
|
||||||
number_of_results=number_of_results)
|
|
||||||
|
|
||||||
params['url'] = base_url.format(**string_args)
|
params['url'] = base_url.format(**string_args)
|
||||||
|
|
||||||
|
@ -65,10 +64,7 @@ def response(resp):
|
||||||
|
|
||||||
publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ')
|
publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
|
||||||
res_dict = {'url': url,
|
res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
|
||||||
'title': title,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content}
|
|
||||||
|
|
||||||
results.append(res_dict)
|
results.append(res_dict)
|
||||||
|
|
||||||
|
|
|
@ -44,9 +44,7 @@ def request(query, params):
|
||||||
pageno : 1 # number of the requested page
|
pageno : 1 # number of the requested page
|
||||||
'''
|
'''
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno'])
|
||||||
query=urlencode({'q': query}),
|
|
||||||
page=params['pageno'])
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,10 @@ about = {
|
||||||
|
|
||||||
categories = ['science']
|
categories = ['science']
|
||||||
|
|
||||||
base_url = 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'\
|
base_url = (
|
||||||
+ '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}'
|
'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'
|
||||||
|
+ '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}'
|
||||||
|
)
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
paging = True
|
paging = True
|
||||||
|
@ -47,7 +49,7 @@ shorcut_dict = {
|
||||||
'source:': 'dcsource:',
|
'source:': 'dcsource:',
|
||||||
'subject:': 'dcsubject:',
|
'subject:': 'dcsubject:',
|
||||||
'title:': 'dctitle:',
|
'title:': 'dctitle:',
|
||||||
'type:': 'dcdctype:'
|
'type:': 'dcdctype:',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -59,9 +61,7 @@ def request(query, params):
|
||||||
# basic search
|
# basic search
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
offset = (params['pageno'] - 1) * number_of_results
|
||||||
|
|
||||||
string_args = dict(query=urlencode({'query': query}),
|
string_args = dict(query=urlencode({'query': query}), offset=offset, hits=number_of_results)
|
||||||
offset=offset,
|
|
||||||
hits=number_of_results)
|
|
||||||
|
|
||||||
params['url'] = base_url.format(**string_args)
|
params['url'] = base_url.format(**string_args)
|
||||||
|
|
||||||
|
@ -93,7 +93,7 @@ def response(resp):
|
||||||
if len(item.text) > 300:
|
if len(item.text) > 300:
|
||||||
content += "..."
|
content += "..."
|
||||||
|
|
||||||
# dates returned by the BASE API are not several formats
|
# dates returned by the BASE API are not several formats
|
||||||
publishedDate = None
|
publishedDate = None
|
||||||
for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']:
|
for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']:
|
||||||
try:
|
try:
|
||||||
|
@ -103,14 +103,9 @@ def response(resp):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if publishedDate is not None:
|
if publishedDate is not None:
|
||||||
res_dict = {'url': url,
|
res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
|
||||||
'title': title,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content}
|
|
||||||
else:
|
else:
|
||||||
res_dict = {'url': url,
|
res_dict = {'url': url, 'title': title, 'content': content}
|
||||||
'title': title,
|
|
||||||
'content': content}
|
|
||||||
|
|
||||||
results.append(res_dict)
|
results.append(res_dict)
|
||||||
|
|
||||||
|
|
|
@ -36,9 +36,11 @@ inital_query = 'search?{query}&search=&form=QBLH'
|
||||||
# following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE
|
# following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE
|
||||||
page_query = 'search?{query}&search=&first={offset}&FORM=PERE'
|
page_query = 'search?{query}&search=&first={offset}&FORM=PERE'
|
||||||
|
|
||||||
|
|
||||||
def _get_offset_from_pageno(pageno):
|
def _get_offset_from_pageno(pageno):
|
||||||
return (pageno - 1) * 10 + 1
|
return (pageno - 1) * 10 + 1
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
offset = _get_offset_from_pageno(params.get('pageno', 1))
|
offset = _get_offset_from_pageno(params.get('pageno', 1))
|
||||||
|
@ -53,30 +55,23 @@ def request(query, params):
|
||||||
if params['language'] == 'all':
|
if params['language'] == 'all':
|
||||||
lang = 'EN'
|
lang = 'EN'
|
||||||
else:
|
else:
|
||||||
lang = match_language(
|
lang = match_language(params['language'], supported_languages, language_aliases)
|
||||||
params['language'], supported_languages, language_aliases
|
|
||||||
)
|
|
||||||
|
|
||||||
query = 'language:{} {}'.format(
|
query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
|
||||||
lang.split('-')[0].upper(), query
|
|
||||||
)
|
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(query=urlencode({'q': query}), offset=offset)
|
||||||
query = urlencode({'q': query}),
|
|
||||||
offset = offset)
|
|
||||||
|
|
||||||
if offset > 1:
|
if offset > 1:
|
||||||
referer = base_url + inital_query.format(query = urlencode({'q': query}))
|
referer = base_url + inital_query.format(query=urlencode({'q': query}))
|
||||||
params['headers']['Referer'] = referer
|
params['headers']['Referer'] = referer
|
||||||
logger.debug("headers.Referer --> %s", referer )
|
logger.debug("headers.Referer --> %s", referer)
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
params['headers']['Accept-Language'] = "en-US,en;q=0.5"
|
params['headers']['Accept-Language'] = "en-US,en;q=0.5"
|
||||||
params['headers']['Accept'] = (
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
||||||
)
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
@ -87,7 +82,7 @@ def response(resp):
|
||||||
for result in eval_xpath(dom, '//div[@class="sa_cc"]'):
|
for result in eval_xpath(dom, '//div[@class="sa_cc"]'):
|
||||||
|
|
||||||
# IMO //div[@class="sa_cc"] does no longer match
|
# IMO //div[@class="sa_cc"] does no longer match
|
||||||
logger.debug('found //div[@class="sa_cc"] --> %s', result)
|
logger.debug('found //div[@class="sa_cc"] --> %s', result)
|
||||||
|
|
||||||
link = eval_xpath(result, './/h3/a')[0]
|
link = eval_xpath(result, './/h3/a')[0]
|
||||||
url = link.attrib.get('href')
|
url = link.attrib.get('href')
|
||||||
|
@ -95,11 +90,7 @@ def response(resp):
|
||||||
content = extract_text(eval_xpath(result, './/p'))
|
content = extract_text(eval_xpath(result, './/p'))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content
|
|
||||||
})
|
|
||||||
|
|
||||||
# parse results again if nothing is found yet
|
# parse results again if nothing is found yet
|
||||||
for result in eval_xpath(dom, '//li[@class="b_algo"]'):
|
for result in eval_xpath(dom, '//li[@class="b_algo"]'):
|
||||||
|
@ -110,18 +101,14 @@ def response(resp):
|
||||||
content = extract_text(eval_xpath(result, './/p'))
|
content = extract_text(eval_xpath(result, './/p'))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content
|
|
||||||
})
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
|
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
|
||||||
if "-" in result_len_container:
|
if "-" in result_len_container:
|
||||||
|
|
||||||
# Remove the part "from-to" for paginated request ...
|
# Remove the part "from-to" for paginated request ...
|
||||||
result_len_container = result_len_container[result_len_container.find("-") * 2 + 2:]
|
result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :]
|
||||||
|
|
||||||
result_len_container = re.sub('[^0-9]', '', result_len_container)
|
result_len_container = re.sub('[^0-9]', '', result_len_container)
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,10 @@ from json import loads
|
||||||
from searx.utils import match_language
|
from searx.utils import match_language
|
||||||
|
|
||||||
from searx.engines.bing import language_aliases
|
from searx.engines.bing import language_aliases
|
||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
|
from searx.engines.bing import (
|
||||||
|
_fetch_supported_languages,
|
||||||
|
supported_languages_url,
|
||||||
|
) # NOQA # pylint: disable=unused-import
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
@ -31,39 +34,25 @@ number_of_results = 28
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.bing.com/'
|
base_url = 'https://www.bing.com/'
|
||||||
search_string = 'images/search'\
|
search_string = 'images/search' '?{query}' '&count={count}' '&first={first}' '&tsc=ImageHoverTitle'
|
||||||
'?{query}'\
|
|
||||||
'&count={count}'\
|
|
||||||
'&first={first}'\
|
|
||||||
'&tsc=ImageHoverTitle'
|
|
||||||
time_range_string = '&qft=+filterui:age-lt{interval}'
|
time_range_string = '&qft=+filterui:age-lt{interval}'
|
||||||
time_range_dict = {'day': '1440',
|
time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
|
||||||
'week': '10080',
|
|
||||||
'month': '43200',
|
|
||||||
'year': '525600'}
|
|
||||||
|
|
||||||
# safesearch definitions
|
# safesearch definitions
|
||||||
safesearch_types = {2: 'STRICT',
|
safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
|
||||||
1: 'DEMOTE',
|
|
||||||
0: 'OFF'}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = ((params['pageno'] - 1) * number_of_results) + 1
|
offset = ((params['pageno'] - 1) * number_of_results) + 1
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
|
||||||
query=urlencode({'q': query}),
|
|
||||||
count=number_of_results,
|
|
||||||
first=offset)
|
|
||||||
|
|
||||||
language = match_language(params['language'], supported_languages, language_aliases).lower()
|
language = match_language(params['language'], supported_languages, language_aliases).lower()
|
||||||
|
|
||||||
params['cookies']['SRCHHPGUSR'] = \
|
params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||||
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
|
||||||
|
|
||||||
params['cookies']['_EDGE_S'] = 'mkt=' + language +\
|
params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1'
|
||||||
'&ui=' + language + '&F=1'
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
if params['time_range'] in time_range_dict:
|
if params['time_range'] in time_range_dict:
|
||||||
|
@ -92,14 +81,18 @@ def response(resp):
|
||||||
# strip 'Unicode private use area' highlighting, they render to Tux
|
# strip 'Unicode private use area' highlighting, they render to Tux
|
||||||
# the Linux penguin and a standing diamond on my machine...
|
# the Linux penguin and a standing diamond on my machine...
|
||||||
title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
|
title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
|
||||||
results.append({'template': 'images.html',
|
results.append(
|
||||||
'url': m['purl'],
|
{
|
||||||
'thumbnail_src': m['turl'],
|
'template': 'images.html',
|
||||||
'img_src': m['murl'],
|
'url': m['purl'],
|
||||||
'content': '',
|
'thumbnail_src': m['turl'],
|
||||||
'title': title,
|
'img_src': m['murl'],
|
||||||
'source': source,
|
'content': '',
|
||||||
'img_format': img_format})
|
'title': title,
|
||||||
|
'source': source,
|
||||||
|
'img_format': img_format,
|
||||||
|
}
|
||||||
|
)
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
@ -13,10 +13,7 @@ from datetime import datetime
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
from searx.utils import (
|
from searx.utils import match_language, eval_xpath_getindex
|
||||||
match_language,
|
|
||||||
eval_xpath_getindex
|
|
||||||
)
|
|
||||||
from searx.engines.bing import ( # pylint: disable=unused-import
|
from searx.engines.bing import ( # pylint: disable=unused-import
|
||||||
language_aliases,
|
language_aliases,
|
||||||
_fetch_supported_languages,
|
_fetch_supported_languages,
|
||||||
|
@ -42,11 +39,8 @@ time_range_support = True
|
||||||
base_url = 'https://www.bing.com/'
|
base_url = 'https://www.bing.com/'
|
||||||
search_string = 'news/search?{query}&first={offset}&format=RSS'
|
search_string = 'news/search?{query}&first={offset}&format=RSS'
|
||||||
search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
|
search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
|
||||||
time_range_dict = {
|
time_range_dict = {'day': '7', 'week': '8', 'month': '9'}
|
||||||
'day': '7',
|
|
||||||
'week': '8',
|
|
||||||
'month': '9'
|
|
||||||
}
|
|
||||||
|
|
||||||
def url_cleanup(url_string):
|
def url_cleanup(url_string):
|
||||||
"""remove click"""
|
"""remove click"""
|
||||||
|
@ -57,6 +51,7 @@ def url_cleanup(url_string):
|
||||||
url_string = query.get('url', None)
|
url_string = query.get('url', None)
|
||||||
return url_string
|
return url_string
|
||||||
|
|
||||||
|
|
||||||
def image_url_cleanup(url_string):
|
def image_url_cleanup(url_string):
|
||||||
"""replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=..."""
|
"""replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=..."""
|
||||||
|
|
||||||
|
@ -66,6 +61,7 @@ def image_url_cleanup(url_string):
|
||||||
url_string = "https://www.bing.com/th?id=" + quote(query.get('id'))
|
url_string = "https://www.bing.com/th?id=" + quote(query.get('id'))
|
||||||
return url_string
|
return url_string
|
||||||
|
|
||||||
|
|
||||||
def _get_url(query, language, offset, time_range):
|
def _get_url(query, language, offset, time_range):
|
||||||
if time_range in time_range_dict:
|
if time_range in time_range_dict:
|
||||||
search_path = search_string_with_time.format(
|
search_path = search_string_with_time.format(
|
||||||
|
@ -91,6 +87,7 @@ def _get_url(query, language, offset, time_range):
|
||||||
)
|
)
|
||||||
return base_url + search_path
|
return base_url + search_path
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
if params['time_range'] and params['time_range'] not in time_range_dict:
|
if params['time_range'] and params['time_range'] not in time_range_dict:
|
||||||
|
@ -105,6 +102,7 @@ def request(query, params):
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
@ -127,26 +125,16 @@ def response(resp):
|
||||||
publishedDate = datetime.now()
|
publishedDate = datetime.now()
|
||||||
|
|
||||||
# thumbnail
|
# thumbnail
|
||||||
thumbnail = eval_xpath_getindex(
|
thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None)
|
||||||
item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None)
|
|
||||||
if thumbnail is not None:
|
if thumbnail is not None:
|
||||||
thumbnail = image_url_cleanup(thumbnail)
|
thumbnail = image_url_cleanup(thumbnail)
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
if thumbnail is not None:
|
if thumbnail is not None:
|
||||||
results.append({
|
results.append(
|
||||||
'url': url,
|
{'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail}
|
||||||
'title': title,
|
)
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content,
|
|
||||||
'img_src': thumbnail
|
|
||||||
})
|
|
||||||
else:
|
else:
|
||||||
results.append({
|
results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content})
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -9,7 +9,10 @@ from urllib.parse import urlencode
|
||||||
from searx.utils import match_language
|
from searx.utils import match_language
|
||||||
|
|
||||||
from searx.engines.bing import language_aliases
|
from searx.engines.bing import language_aliases
|
||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
|
from searx.engines.bing import (
|
||||||
|
_fetch_supported_languages,
|
||||||
|
supported_languages_url,
|
||||||
|
) # NOQA # pylint: disable=unused-import
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
@ -28,36 +31,22 @@ time_range_support = True
|
||||||
number_of_results = 28
|
number_of_results = 28
|
||||||
|
|
||||||
base_url = 'https://www.bing.com/'
|
base_url = 'https://www.bing.com/'
|
||||||
search_string = 'videos/search'\
|
search_string = 'videos/search' '?{query}' '&count={count}' '&first={first}' '&scope=video' '&FORM=QBLH'
|
||||||
'?{query}'\
|
|
||||||
'&count={count}'\
|
|
||||||
'&first={first}'\
|
|
||||||
'&scope=video'\
|
|
||||||
'&FORM=QBLH'
|
|
||||||
time_range_string = '&qft=+filterui:videoage-lt{interval}'
|
time_range_string = '&qft=+filterui:videoage-lt{interval}'
|
||||||
time_range_dict = {'day': '1440',
|
time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
|
||||||
'week': '10080',
|
|
||||||
'month': '43200',
|
|
||||||
'year': '525600'}
|
|
||||||
|
|
||||||
# safesearch definitions
|
# safesearch definitions
|
||||||
safesearch_types = {2: 'STRICT',
|
safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
|
||||||
1: 'DEMOTE',
|
|
||||||
0: 'OFF'}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = ((params['pageno'] - 1) * number_of_results) + 1
|
offset = ((params['pageno'] - 1) * number_of_results) + 1
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
|
||||||
query=urlencode({'q': query}),
|
|
||||||
count=number_of_results,
|
|
||||||
first=offset)
|
|
||||||
|
|
||||||
# safesearch cookie
|
# safesearch cookie
|
||||||
params['cookies']['SRCHHPGUSR'] = \
|
params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||||
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
|
||||||
|
|
||||||
# language cookie
|
# language cookie
|
||||||
language = match_language(params['language'], supported_languages, language_aliases).lower()
|
language = match_language(params['language'], supported_languages, language_aliases).lower()
|
||||||
|
@ -89,11 +78,15 @@ def response(resp):
|
||||||
info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
|
info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
|
||||||
content = '{0} - {1}'.format(metadata['du'], info)
|
content = '{0} - {1}'.format(metadata['du'], info)
|
||||||
thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid'])
|
thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid'])
|
||||||
results.append({'url': metadata['murl'],
|
results.append(
|
||||||
'thumbnail': thumbnail,
|
{
|
||||||
'title': metadata.get('vt', ''),
|
'url': metadata['murl'],
|
||||||
'content': content,
|
'thumbnail': thumbnail,
|
||||||
'template': 'videos.html'})
|
'title': metadata.get('vt', ''),
|
||||||
|
'content': content,
|
||||||
|
'template': 'videos.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -11,10 +11,7 @@ from searx.utils import extract_text, get_torrent_size
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://btdig.com',
|
"website": 'https://btdig.com',
|
||||||
"wikidata_id": 'Q4836698',
|
"wikidata_id": 'Q4836698',
|
||||||
"official_api_documentation": {
|
"official_api_documentation": {'url': 'https://btdig.com/contacts', 'comment': 'on demand'},
|
||||||
'url': 'https://btdig.com/contacts',
|
|
||||||
'comment': 'on demand'
|
|
||||||
},
|
|
||||||
"use_official_api": False,
|
"use_official_api": False,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'HTML',
|
"results": 'HTML',
|
||||||
|
@ -31,8 +28,7 @@ search_url = url + '/search?q={search_term}&p={pageno}'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(search_term=quote(query),
|
params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'] - 1)
|
||||||
pageno=params['pageno'] - 1)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -77,13 +73,17 @@ def response(resp):
|
||||||
magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href']
|
magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href']
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': href,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': content,
|
'url': href,
|
||||||
'filesize': filesize,
|
'title': title,
|
||||||
'files': files,
|
'content': content,
|
||||||
'magnetlink': magnetlink,
|
'filesize': filesize,
|
||||||
'template': 'torrent.html'})
|
'files': files,
|
||||||
|
'magnetlink': magnetlink,
|
||||||
|
'template': 'torrent.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results sorted by seeder
|
# return results sorted by seeder
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -29,10 +29,7 @@ search_string = '&page={page}&page_size={nb_per_page}&format=json&{query}'
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(query=urlencode({'q': query}), nb_per_page=nb_per_page, page=params['pageno'])
|
||||||
query=urlencode({'q': query}),
|
|
||||||
nb_per_page=nb_per_page,
|
|
||||||
page=params['pageno'])
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
|
|
||||||
|
@ -45,9 +42,13 @@ def response(resp):
|
||||||
json_data = loads(resp.text)
|
json_data = loads(resp.text)
|
||||||
|
|
||||||
for result in json_data['results']:
|
for result in json_data['results']:
|
||||||
results.append({'url': result['foreign_landing_url'],
|
results.append(
|
||||||
'title': result['title'],
|
{
|
||||||
'img_src': result['url'],
|
'url': result['foreign_landing_url'],
|
||||||
'template': 'images.html'})
|
'title': result['title'],
|
||||||
|
'img_src': result['url'],
|
||||||
|
'template': 'images.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -138,7 +138,7 @@ def __check_query_params(params):
|
||||||
|
|
||||||
|
|
||||||
def check_parsing_options(engine_settings):
|
def check_parsing_options(engine_settings):
|
||||||
""" Checks if delimiter based parsing or regex parsing is configured correctly """
|
"""Checks if delimiter based parsing or regex parsing is configured correctly"""
|
||||||
|
|
||||||
if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
|
if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
|
||||||
raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
|
raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
|
||||||
|
@ -151,7 +151,7 @@ def check_parsing_options(engine_settings):
|
||||||
|
|
||||||
|
|
||||||
def __parse_single_result(raw_result):
|
def __parse_single_result(raw_result):
|
||||||
""" Parses command line output based on configuration """
|
"""Parses command line output based on configuration"""
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
|
@ -167,6 +167,6 @@ def __parse_single_result(raw_result):
|
||||||
found = regex.search(raw_result)
|
found = regex.search(raw_result)
|
||||||
if not found:
|
if not found:
|
||||||
return {}
|
return {}
|
||||||
result[result_key] = raw_result[found.start():found.end()]
|
result[result_key] = raw_result[found.start() : found.end()]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
|
@ -28,22 +28,24 @@ api_key = 'unset'
|
||||||
base_url = 'https://core.ac.uk:443/api-v2/search/'
|
base_url = 'https://core.ac.uk:443/api-v2/search/'
|
||||||
search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
|
search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
if api_key == 'unset':
|
if api_key == 'unset':
|
||||||
raise SearxEngineAPIException('missing CORE API key')
|
raise SearxEngineAPIException('missing CORE API key')
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(
|
||||||
query = urlencode({'q': query}),
|
query=urlencode({'q': query}),
|
||||||
nb_per_page = nb_per_page,
|
nb_per_page=nb_per_page,
|
||||||
page = params['pageno'],
|
page=params['pageno'],
|
||||||
apikey = api_key,
|
apikey=api_key,
|
||||||
)
|
)
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
|
|
||||||
logger.debug("query_url --> %s", params['url'])
|
logger.debug("query_url --> %s", params['url'])
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
json_data = loads(resp.text)
|
json_data = loads(resp.text)
|
||||||
|
@ -52,7 +54,7 @@ def response(resp):
|
||||||
|
|
||||||
source = result['_source']
|
source = result['_source']
|
||||||
time = source['publishedDate'] or source['depositedDate']
|
time = source['publishedDate'] or source['depositedDate']
|
||||||
if time :
|
if time:
|
||||||
date = datetime.fromtimestamp(time / 1000)
|
date = datetime.fromtimestamp(time / 1000)
|
||||||
else:
|
else:
|
||||||
date = None
|
date = None
|
||||||
|
@ -66,12 +68,14 @@ def response(resp):
|
||||||
metadata.append(source['doi'])
|
metadata.append(source['doi'])
|
||||||
metadata = ' / '.join(metadata)
|
metadata = ' / '.join(metadata)
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': source['urls'][0].replace('http://', 'https://', 1),
|
{
|
||||||
'title': source['title'],
|
'url': source['urls'][0].replace('http://', 'https://', 1),
|
||||||
'content': source['description'],
|
'title': source['title'],
|
||||||
'publishedDate': date,
|
'content': source['description'],
|
||||||
'metadata' : metadata,
|
'publishedDate': date,
|
||||||
})
|
'metadata': metadata,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -30,7 +30,7 @@ def request(query, params):
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
"""remove first and last lines to get only json"""
|
"""remove first and last lines to get only json"""
|
||||||
json_resp = resp.text[resp.text.find('\n') + 1:resp.text.rfind('\n') - 2]
|
json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
|
||||||
results = []
|
results = []
|
||||||
try:
|
try:
|
||||||
conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount'])
|
conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount'])
|
||||||
|
@ -47,7 +47,8 @@ def response(resp):
|
||||||
)
|
)
|
||||||
|
|
||||||
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'.format(
|
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'.format(
|
||||||
resp.search_params['from'].upper(), resp.search_params['to'])
|
resp.search_params['from'].upper(), resp.search_params['to']
|
||||||
|
)
|
||||||
|
|
||||||
results.append({'answer': answer, 'url': url})
|
results.append({'answer': answer, 'url': url})
|
||||||
|
|
||||||
|
|
|
@ -25,8 +25,10 @@ paging = True
|
||||||
# search-url
|
# search-url
|
||||||
# see http://www.dailymotion.com/doc/api/obj-video.html
|
# see http://www.dailymotion.com/doc/api/obj-video.html
|
||||||
search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa
|
search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa
|
||||||
embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
|
embedded_url = (
|
||||||
'data-src="https://www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
|
'<iframe frameborder="0" width="540" height="304" '
|
||||||
|
+ 'data-src="https://www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
|
||||||
|
)
|
||||||
|
|
||||||
supported_languages_url = 'https://api.dailymotion.com/languages'
|
supported_languages_url = 'https://api.dailymotion.com/languages'
|
||||||
|
|
||||||
|
@ -39,8 +41,8 @@ def request(query, params):
|
||||||
locale = match_language(params['language'], supported_languages)
|
locale = match_language(params['language'], supported_languages)
|
||||||
|
|
||||||
params['url'] = search_url.format(
|
params['url'] = search_url.format(
|
||||||
query=urlencode({'search': query, 'localization': locale}),
|
query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno']
|
||||||
pageno=params['pageno'])
|
)
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -67,13 +69,17 @@ def response(resp):
|
||||||
# http to https
|
# http to https
|
||||||
thumbnail = thumbnail.replace("http://", "https://")
|
thumbnail = thumbnail.replace("http://", "https://")
|
||||||
|
|
||||||
results.append({'template': 'videos.html',
|
results.append(
|
||||||
'url': url,
|
{
|
||||||
'title': title,
|
'template': 'videos.html',
|
||||||
'content': content,
|
'url': url,
|
||||||
'publishedDate': publishedDate,
|
'title': title,
|
||||||
'embedded': embedded,
|
'content': content,
|
||||||
'thumbnail': thumbnail})
|
'publishedDate': publishedDate,
|
||||||
|
'embedded': embedded,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -24,9 +24,11 @@ paging = True
|
||||||
url = 'https://api.deezer.com/'
|
url = 'https://api.deezer.com/'
|
||||||
search_url = url + 'search?{query}&index={offset}'
|
search_url = url + 'search?{query}&index={offset}'
|
||||||
|
|
||||||
embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
|
embedded_url = (
|
||||||
'data-src="https://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' +\
|
'<iframe scrolling="no" frameborder="0" allowTransparency="true" '
|
||||||
'width="540" height="80"></iframe>'
|
+ 'data-src="https://www.deezer.com/plugins/player?type=tracks&id={audioid}" '
|
||||||
|
+ 'width="540" height="80"></iframe>'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
|
@ -53,18 +55,12 @@ def response(resp):
|
||||||
if url.startswith('http://'):
|
if url.startswith('http://'):
|
||||||
url = 'https' + url[4:]
|
url = 'https' + url[4:]
|
||||||
|
|
||||||
content = '{} - {} - {}'.format(
|
content = '{} - {} - {}'.format(result['artist']['name'], result['album']['title'], result['title'])
|
||||||
result['artist']['name'],
|
|
||||||
result['album']['title'],
|
|
||||||
result['title'])
|
|
||||||
|
|
||||||
embedded = embedded_url.format(audioid=result['id'])
|
embedded = embedded_url.format(audioid=result['id'])
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url, 'title': title, 'embedded': embedded, 'content': content})
|
||||||
'title': title,
|
|
||||||
'embedded': embedded,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -31,6 +31,7 @@ about = {
|
||||||
# if there is a need for globals, use a leading underline
|
# if there is a need for globals, use a leading underline
|
||||||
_my_offline_engine = None
|
_my_offline_engine = None
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings=None):
|
def init(engine_settings=None):
|
||||||
"""Initialization of the (offline) engine. The origin of this demo engine is a
|
"""Initialization of the (offline) engine. The origin of this demo engine is a
|
||||||
simple json string which is loaded in this example while the engine is
|
simple json string which is loaded in this example while the engine is
|
||||||
|
@ -44,11 +45,10 @@ def init(engine_settings=None):
|
||||||
', {"value":"first item"}'
|
', {"value":"first item"}'
|
||||||
', {"value":"second item"}'
|
', {"value":"second item"}'
|
||||||
', {"value":"third item"}'
|
', {"value":"third item"}'
|
||||||
']'
|
']' % engine_settings.get('name')
|
||||||
|
|
||||||
% engine_settings.get('name')
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def search(query, request_params):
|
def search(query, request_params):
|
||||||
"""Query (offline) engine and return results. Assemble the list of results from
|
"""Query (offline) engine and return results. Assemble the list of results from
|
||||||
your local engine. In this demo engine we ignore the 'query' term, usual
|
your local engine. In this demo engine we ignore the 'query' term, usual
|
||||||
|
@ -62,11 +62,11 @@ def search(query, request_params):
|
||||||
|
|
||||||
for row in result_list:
|
for row in result_list:
|
||||||
entry = {
|
entry = {
|
||||||
'query' : query,
|
'query': query,
|
||||||
'language' : request_params['language'],
|
'language': request_params['language'],
|
||||||
'value' : row.get("value"),
|
'value': row.get("value"),
|
||||||
# choose a result template or comment out to use the *default*
|
# choose a result template or comment out to use the *default*
|
||||||
'template' : 'key-value.html',
|
'template': 'key-value.html',
|
||||||
}
|
}
|
||||||
ret_val.append(entry)
|
ret_val.append(entry)
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@ about = {
|
||||||
# if there is a need for globals, use a leading underline
|
# if there is a need for globals, use a leading underline
|
||||||
_my_online_engine = None
|
_my_online_engine = None
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings):
|
def init(engine_settings):
|
||||||
"""Initialization of the (online) engine. If no initialization is needed, drop
|
"""Initialization of the (online) engine. If no initialization is needed, drop
|
||||||
this init function.
|
this init function.
|
||||||
|
@ -51,20 +52,24 @@ def init(engine_settings):
|
||||||
global _my_online_engine # pylint: disable=global-statement
|
global _my_online_engine # pylint: disable=global-statement
|
||||||
_my_online_engine = engine_settings.get('name')
|
_my_online_engine = engine_settings.get('name')
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Build up the ``params`` for the online request. In this example we build a
|
"""Build up the ``params`` for the online request. In this example we build a
|
||||||
URL to fetch images from `artic.edu <https://artic.edu>`__
|
URL to fetch images from `artic.edu <https://artic.edu>`__
|
||||||
|
|
||||||
"""
|
"""
|
||||||
args = urlencode({
|
args = urlencode(
|
||||||
'q' : query,
|
{
|
||||||
'page' : params['pageno'],
|
'q': query,
|
||||||
'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
|
'page': params['pageno'],
|
||||||
'limit' : page_size,
|
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
|
||||||
})
|
'limit': page_size,
|
||||||
|
}
|
||||||
|
)
|
||||||
params['url'] = search_api + args
|
params['url'] = search_api + args
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
"""Parse out the result items from the response. In this example we parse the
|
"""Parse out the result items from the response. In this example we parse the
|
||||||
response from `api.artic.edu <https://artic.edu>`__ and filter out all
|
response from `api.artic.edu <https://artic.edu>`__ and filter out all
|
||||||
|
@ -79,14 +84,16 @@ def response(resp):
|
||||||
if not result['image_id']:
|
if not result['image_id']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': 'https://artic.edu/artworks/%(id)s' % result,
|
{
|
||||||
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
|
'url': 'https://artic.edu/artworks/%(id)s' % result,
|
||||||
'content': result['medium_display'],
|
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
|
||||||
'author': ', '.join(result['artist_titles']),
|
'content': result['medium_display'],
|
||||||
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
|
'author': ', '.join(result['artist_titles']),
|
||||||
'img_format': result['dimensions'],
|
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
|
||||||
'template': 'images.html'
|
'img_format': result['dimensions'],
|
||||||
})
|
'template': 'images.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -32,13 +32,14 @@ time_range_dict = {
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.deviantart.com'
|
base_url = 'https://www.deviantart.com'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
# https://www.deviantart.com/search/deviations?page=5&q=foo
|
# https://www.deviantart.com/search/deviations?page=5&q=foo
|
||||||
|
|
||||||
query = {
|
query = {
|
||||||
'page' : params['pageno'],
|
'page': params['pageno'],
|
||||||
'q' : query,
|
'q': query,
|
||||||
}
|
}
|
||||||
if params['time_range'] in time_range_dict:
|
if params['time_range'] in time_range_dict:
|
||||||
query['order'] = time_range_dict[params['time_range']]
|
query['order'] = time_range_dict[params['time_range']]
|
||||||
|
@ -47,6 +48,7 @@ def request(query, params):
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
@ -67,11 +69,13 @@ def response(resp):
|
||||||
continue
|
continue
|
||||||
img_tag = img_tag[0]
|
img_tag = img_tag[0]
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'template': 'images.html',
|
{
|
||||||
'url': a_tag.attrib.get('href'),
|
'template': 'images.html',
|
||||||
'img_src': img_tag.attrib.get('src'),
|
'url': a_tag.attrib.get('href'),
|
||||||
'title': img_tag.attrib.get('alt'),
|
'img_src': img_tag.attrib.get('src'),
|
||||||
})
|
'title': img_tag.attrib.get('alt'),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -27,9 +27,7 @@ https_support = True
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = url.format(from_lang=params['from_lang'][2],
|
params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query'])
|
||||||
to_lang=params['to_lang'][2],
|
|
||||||
query=params['query'])
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -51,10 +49,12 @@ def response(resp):
|
||||||
if t.strip():
|
if t.strip():
|
||||||
to_results.append(to_result.text_content())
|
to_results.append(to_result.text_content())
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': urljoin(str(resp.url), '?%d' % k),
|
{
|
||||||
'title': from_result.text_content(),
|
'url': urljoin(str(resp.url), '?%d' % k),
|
||||||
'content': '; '.join(to_results)
|
'title': from_result.text_content(),
|
||||||
})
|
'content': '; '.join(to_results),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -48,13 +48,17 @@ def response(resp):
|
||||||
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
|
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
|
||||||
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
|
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
|
||||||
|
|
||||||
results.append({'url': url,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': content,
|
'url': url,
|
||||||
'filesize': filesize,
|
'title': title,
|
||||||
'magnetlink': magnetlink,
|
'content': content,
|
||||||
'seed': 'N/A',
|
'filesize': filesize,
|
||||||
'leech': 'N/A',
|
'magnetlink': magnetlink,
|
||||||
'template': 'torrent.html'})
|
'seed': 'N/A',
|
||||||
|
'leech': 'N/A',
|
||||||
|
'template': 'torrent.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -9,13 +9,13 @@ from urllib.parse import urlencode
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://hub.docker.com',
|
"website": 'https://hub.docker.com',
|
||||||
"wikidata_id": 'Q100769064',
|
"wikidata_id": 'Q100769064',
|
||||||
"official_api_documentation": 'https://docs.docker.com/registry/spec/api/',
|
"official_api_documentation": 'https://docs.docker.com/registry/spec/api/',
|
||||||
"use_official_api": True,
|
"use_official_api": True,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'JSON',
|
"results": 'JSON',
|
||||||
}
|
}
|
||||||
|
|
||||||
categories = ['it'] # optional
|
categories = ['it'] # optional
|
||||||
paging = True
|
paging = True
|
||||||
|
@ -23,6 +23,7 @@ paging = True
|
||||||
base_url = "https://hub.docker.com/"
|
base_url = "https://hub.docker.com/"
|
||||||
search_url = base_url + "api/content/v1/products/search?{query}&type=image&page_size=25"
|
search_url = base_url + "api/content/v1/products/search?{query}&type=image&page_size=25"
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode(dict(q=query, page=params["pageno"])))
|
params['url'] = search_url.format(query=urlencode(dict(q=query, page=params["pageno"])))
|
||||||
|
@ -30,6 +31,7 @@ def request(query, params):
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
'''post-response callback
|
'''post-response callback
|
||||||
resp: requests response object
|
resp: requests response object
|
||||||
|
@ -53,12 +55,8 @@ def response(resp):
|
||||||
result["url"] = base_url + "r/" + item.get('slug', "")
|
result["url"] = base_url + "r/" + item.get('slug', "")
|
||||||
result["title"] = item.get("name")
|
result["title"] = item.get("name")
|
||||||
result["content"] = item.get("short_description")
|
result["content"] = item.get("short_description")
|
||||||
result["publishedDate"] = parser.parse(
|
result["publishedDate"] = parser.parse(item.get("updated_at") or item.get("created_at"))
|
||||||
item.get("updated_at") or item.get("created_at")
|
result["thumbnail"] = item["logo_url"].get("large") or item["logo_url"].get("small")
|
||||||
)
|
|
||||||
result["thumbnail"] = (
|
|
||||||
item["logo_url"].get("large") or item["logo_url"].get("small")
|
|
||||||
)
|
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -25,8 +25,7 @@ number_of_results = 5
|
||||||
# search-url
|
# search-url
|
||||||
# Doku is OpenSearch compatible
|
# Doku is OpenSearch compatible
|
||||||
base_url = 'http://localhost:8090'
|
base_url = 'http://localhost:8090'
|
||||||
search_url = '/?do=search'\
|
search_url = '/?do=search' '&{query}'
|
||||||
'&{query}'
|
|
||||||
# TODO '&startRecord={offset}'\
|
# TODO '&startRecord={offset}'\
|
||||||
# TODO '&maximumRecords={limit}'\
|
# TODO '&maximumRecords={limit}'\
|
||||||
|
|
||||||
|
@ -34,8 +33,7 @@ search_url = '/?do=search'\
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
params['url'] = base_url +\
|
params['url'] = base_url + search_url.format(query=urlencode({'id': query}))
|
||||||
search_url.format(query=urlencode({'id': query}))
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -60,9 +58,7 @@ def response(resp):
|
||||||
title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
|
title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'title': title,
|
results.append({'title': title, 'content': "", 'url': base_url + res_url})
|
||||||
'content': "",
|
|
||||||
'url': base_url + res_url})
|
|
||||||
|
|
||||||
# Search results
|
# Search results
|
||||||
for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
|
for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
|
||||||
|
@ -74,9 +70,7 @@ def response(resp):
|
||||||
content = extract_text(eval_xpath(r, '.'))
|
content = extract_text(eval_xpath(r, '.'))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'title': title,
|
results.append({'title': title, 'content': content, 'url': base_url + res_url})
|
||||||
'content': content,
|
|
||||||
'url': base_url + res_url})
|
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
@ -39,15 +39,10 @@ language_aliases = {
|
||||||
'ko': 'kr-KR',
|
'ko': 'kr-KR',
|
||||||
'sl-SI': 'sl-SL',
|
'sl-SI': 'sl-SL',
|
||||||
'zh-TW': 'tzh-TW',
|
'zh-TW': 'tzh-TW',
|
||||||
'zh-HK': 'tzh-HK'
|
'zh-HK': 'tzh-HK',
|
||||||
}
|
}
|
||||||
|
|
||||||
time_range_dict = {
|
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||||
'day': 'd',
|
|
||||||
'week': 'w',
|
|
||||||
'month': 'm',
|
|
||||||
'year': 'y'
|
|
||||||
}
|
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
url = 'https://lite.duckduckgo.com/lite'
|
url = 'https://lite.duckduckgo.com/lite'
|
||||||
|
@ -118,6 +113,7 @@ def request(query, params):
|
||||||
logger.debug("param cookies: %s", params['cookies'])
|
logger.debug("param cookies: %s", params['cookies'])
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
# get response from search-request
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
|
||||||
|
@ -163,21 +159,24 @@ def response(resp):
|
||||||
if td_content is None:
|
if td_content is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'title': a_tag.text_content(),
|
{
|
||||||
'content': extract_text(td_content),
|
'title': a_tag.text_content(),
|
||||||
'url': a_tag.get('href'),
|
'content': extract_text(td_content),
|
||||||
})
|
'url': a_tag.get('href'),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
# get supported languages from their site
|
# get supported languages from their site
|
||||||
def _fetch_supported_languages(resp):
|
def _fetch_supported_languages(resp):
|
||||||
|
|
||||||
# response is a js file with regions as an embedded object
|
# response is a js file with regions as an embedded object
|
||||||
response_page = resp.text
|
response_page = resp.text
|
||||||
response_page = response_page[response_page.find('regions:{') + 8:]
|
response_page = response_page[response_page.find('regions:{') + 8 :]
|
||||||
response_page = response_page[:response_page.find('}') + 1]
|
response_page = response_page[: response_page.find('}') + 1]
|
||||||
|
|
||||||
regions_json = loads(response_page)
|
regions_json = loads(response_page)
|
||||||
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
|
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
|
||||||
|
|
|
@ -10,7 +10,10 @@ from lxml import html
|
||||||
|
|
||||||
from searx.data import WIKIDATA_UNITS
|
from searx.data import WIKIDATA_UNITS
|
||||||
from searx.engines.duckduckgo import language_aliases
|
from searx.engines.duckduckgo import language_aliases
|
||||||
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
|
from searx.engines.duckduckgo import (
|
||||||
|
_fetch_supported_languages,
|
||||||
|
supported_languages_url,
|
||||||
|
) # NOQA # pylint: disable=unused-import
|
||||||
from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function
|
from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function
|
||||||
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
||||||
|
|
||||||
|
@ -24,19 +27,15 @@ about = {
|
||||||
"results": 'JSON',
|
"results": 'JSON',
|
||||||
}
|
}
|
||||||
|
|
||||||
URL = 'https://api.duckduckgo.com/'\
|
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
||||||
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
|
||||||
|
|
||||||
WIKIDATA_PREFIX = [
|
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
|
||||||
'http://www.wikidata.org/entity/',
|
|
||||||
'https://www.wikidata.org/entity/'
|
|
||||||
]
|
|
||||||
|
|
||||||
replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
|
replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
|
||||||
|
|
||||||
|
|
||||||
def is_broken_text(text):
|
def is_broken_text(text):
|
||||||
""" duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
|
"""duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
|
||||||
|
|
||||||
The href URL is broken, the "Related website" may contains some HTML.
|
The href URL is broken, the "Related website" may contains some HTML.
|
||||||
|
|
||||||
|
@ -61,11 +60,7 @@ def result_to_text(text, htmlResult):
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = URL.format(query=urlencode({'q': query}))
|
params['url'] = URL.format(query=urlencode({'q': query}))
|
||||||
language = match_language(
|
language = match_language(params['language'], supported_languages, language_aliases)
|
||||||
params['language'],
|
|
||||||
supported_languages,
|
|
||||||
language_aliases
|
|
||||||
)
|
|
||||||
language = language.split('-')[0]
|
language = language.split('-')[0]
|
||||||
params['headers']['Accept-Language'] = language
|
params['headers']['Accept-Language'] = language
|
||||||
return params
|
return params
|
||||||
|
@ -127,23 +122,14 @@ def response(resp):
|
||||||
firstURL = ddg_result.get('FirstURL')
|
firstURL = ddg_result.get('FirstURL')
|
||||||
text = ddg_result.get('Text')
|
text = ddg_result.get('Text')
|
||||||
if not is_broken_text(text):
|
if not is_broken_text(text):
|
||||||
suggestion = result_to_text(
|
suggestion = result_to_text(text, ddg_result.get('Result'))
|
||||||
text,
|
|
||||||
ddg_result.get('Result')
|
|
||||||
)
|
|
||||||
if suggestion != heading and suggestion is not None:
|
if suggestion != heading and suggestion is not None:
|
||||||
results.append({'suggestion': suggestion})
|
results.append({'suggestion': suggestion})
|
||||||
elif 'Topics' in ddg_result:
|
elif 'Topics' in ddg_result:
|
||||||
suggestions = []
|
suggestions = []
|
||||||
relatedTopics.append({
|
relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions})
|
||||||
'name': ddg_result.get('Name', ''),
|
|
||||||
'suggestions': suggestions
|
|
||||||
})
|
|
||||||
for topic_result in ddg_result.get('Topics', []):
|
for topic_result in ddg_result.get('Topics', []):
|
||||||
suggestion = result_to_text(
|
suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result'))
|
||||||
topic_result.get('Text'),
|
|
||||||
topic_result.get('Result')
|
|
||||||
)
|
|
||||||
if suggestion != heading and suggestion is not None:
|
if suggestion != heading and suggestion is not None:
|
||||||
suggestions.append(suggestion)
|
suggestions.append(suggestion)
|
||||||
|
|
||||||
|
@ -152,25 +138,15 @@ def response(resp):
|
||||||
if abstractURL != '':
|
if abstractURL != '':
|
||||||
# add as result ? problem always in english
|
# add as result ? problem always in english
|
||||||
infobox_id = abstractURL
|
infobox_id = abstractURL
|
||||||
urls.append({
|
urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True})
|
||||||
'title': search_res.get('AbstractSource'),
|
results.append({'url': abstractURL, 'title': heading})
|
||||||
'url': abstractURL,
|
|
||||||
'official': True
|
|
||||||
})
|
|
||||||
results.append({
|
|
||||||
'url': abstractURL,
|
|
||||||
'title': heading
|
|
||||||
})
|
|
||||||
|
|
||||||
# definition
|
# definition
|
||||||
definitionURL = search_res.get('DefinitionURL', '')
|
definitionURL = search_res.get('DefinitionURL', '')
|
||||||
if definitionURL != '':
|
if definitionURL != '':
|
||||||
# add as result ? as answer ? problem always in english
|
# add as result ? as answer ? problem always in english
|
||||||
infobox_id = definitionURL
|
infobox_id = definitionURL
|
||||||
urls.append({
|
urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
|
||||||
'title': search_res.get('DefinitionSource'),
|
|
||||||
'url': definitionURL
|
|
||||||
})
|
|
||||||
|
|
||||||
# to merge with wikidata's infobox
|
# to merge with wikidata's infobox
|
||||||
if infobox_id:
|
if infobox_id:
|
||||||
|
@ -198,10 +174,7 @@ def response(resp):
|
||||||
# * netflix_id
|
# * netflix_id
|
||||||
external_url = get_external_url(data_type, data_value)
|
external_url = get_external_url(data_type, data_value)
|
||||||
if external_url is not None:
|
if external_url is not None:
|
||||||
urls.append({
|
urls.append({'title': data_label, 'url': external_url})
|
||||||
'title': data_label,
|
|
||||||
'url': external_url
|
|
||||||
})
|
|
||||||
elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']:
|
elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']:
|
||||||
# ignore instance: Wikidata value from "Instance Of" (Qxxxx)
|
# ignore instance: Wikidata value from "Instance Of" (Qxxxx)
|
||||||
# ignore wiki_maps_trigger: reference to a javascript
|
# ignore wiki_maps_trigger: reference to a javascript
|
||||||
|
@ -211,11 +184,7 @@ def response(resp):
|
||||||
# There is already an URL for the website
|
# There is already an URL for the website
|
||||||
pass
|
pass
|
||||||
elif data_type == 'area':
|
elif data_type == 'area':
|
||||||
attributes.append({
|
attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'})
|
||||||
'label': data_label,
|
|
||||||
'value': area_to_str(data_value),
|
|
||||||
'entity': 'P2046'
|
|
||||||
})
|
|
||||||
osm_zoom = area_to_osm_zoom(data_value.get('amount'))
|
osm_zoom = area_to_osm_zoom(data_value.get('amount'))
|
||||||
elif data_type == 'coordinates':
|
elif data_type == 'coordinates':
|
||||||
if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2':
|
if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2':
|
||||||
|
@ -224,16 +193,9 @@ def response(resp):
|
||||||
coordinates = info
|
coordinates = info
|
||||||
else:
|
else:
|
||||||
# coordinate NOT on Earth
|
# coordinate NOT on Earth
|
||||||
attributes.append({
|
attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'})
|
||||||
'label': data_label,
|
|
||||||
'value': data_value,
|
|
||||||
'entity': 'P625'
|
|
||||||
})
|
|
||||||
elif data_type == 'string':
|
elif data_type == 'string':
|
||||||
attributes.append({
|
attributes.append({'label': data_label, 'value': data_value})
|
||||||
'label': data_label,
|
|
||||||
'value': data_value
|
|
||||||
})
|
|
||||||
|
|
||||||
if coordinates:
|
if coordinates:
|
||||||
data_label = coordinates.get('label')
|
data_label = coordinates.get('label')
|
||||||
|
@ -241,31 +203,24 @@ def response(resp):
|
||||||
latitude = data_value.get('latitude')
|
latitude = data_value.get('latitude')
|
||||||
longitude = data_value.get('longitude')
|
longitude = data_value.get('longitude')
|
||||||
url = get_earth_coordinates_url(latitude, longitude, osm_zoom)
|
url = get_earth_coordinates_url(latitude, longitude, osm_zoom)
|
||||||
urls.append({
|
urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'})
|
||||||
'title': 'OpenStreetMap',
|
|
||||||
'url': url,
|
|
||||||
'entity': 'P625'
|
|
||||||
})
|
|
||||||
|
|
||||||
if len(heading) > 0:
|
if len(heading) > 0:
|
||||||
# TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme
|
# TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme
|
||||||
if image is None and len(attributes) == 0 and len(urls) == 1 and\
|
if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0:
|
||||||
len(relatedTopics) == 0 and len(content) == 0:
|
results.append({'url': urls[0]['url'], 'title': heading, 'content': content})
|
||||||
results.append({
|
|
||||||
'url': urls[0]['url'],
|
|
||||||
'title': heading,
|
|
||||||
'content': content
|
|
||||||
})
|
|
||||||
else:
|
else:
|
||||||
results.append({
|
results.append(
|
||||||
'infobox': heading,
|
{
|
||||||
'id': infobox_id,
|
'infobox': heading,
|
||||||
'content': content,
|
'id': infobox_id,
|
||||||
'img_src': image,
|
'content': content,
|
||||||
'attributes': attributes,
|
'img_src': image,
|
||||||
'urls': urls,
|
'attributes': attributes,
|
||||||
'relatedTopics': relatedTopics
|
'urls': urls,
|
||||||
})
|
'relatedTopics': relatedTopics,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@ -273,7 +228,7 @@ def response(resp):
|
||||||
def unit_to_str(unit):
|
def unit_to_str(unit):
|
||||||
for prefix in WIKIDATA_PREFIX:
|
for prefix in WIKIDATA_PREFIX:
|
||||||
if unit.startswith(prefix):
|
if unit.startswith(prefix):
|
||||||
wikidata_entity = unit[len(prefix):]
|
wikidata_entity = unit[len(prefix) :]
|
||||||
return WIKIDATA_UNITS.get(wikidata_entity, unit)
|
return WIKIDATA_UNITS.get(wikidata_entity, unit)
|
||||||
return unit
|
return unit
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,10 @@ from json import loads
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from searx.exceptions import SearxEngineAPIException
|
||||||
from searx.engines.duckduckgo import get_region_code
|
from searx.engines.duckduckgo import get_region_code
|
||||||
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
|
from searx.engines.duckduckgo import (
|
||||||
|
_fetch_supported_languages,
|
||||||
|
supported_languages_url,
|
||||||
|
) # NOQA # pylint: disable=unused-import
|
||||||
from searx.network import get
|
from searx.network import get
|
||||||
|
|
||||||
# about
|
# about
|
||||||
|
@ -41,8 +44,8 @@ def get_vqd(query, headers):
|
||||||
content = res.text
|
content = res.text
|
||||||
if content.find('vqd=\'') == -1:
|
if content.find('vqd=\'') == -1:
|
||||||
raise SearxEngineAPIException('Request failed')
|
raise SearxEngineAPIException('Request failed')
|
||||||
vqd = content[content.find('vqd=\'') + 5:]
|
vqd = content[content.find('vqd=\'') + 5 :]
|
||||||
vqd = vqd[:vqd.find('\'')]
|
vqd = vqd[: vqd.find('\'')]
|
||||||
return vqd
|
return vqd
|
||||||
|
|
||||||
|
|
||||||
|
@ -61,10 +64,10 @@ def request(query, params):
|
||||||
region_code = get_region_code(params['language'], lang_list=supported_languages)
|
region_code = get_region_code(params['language'], lang_list=supported_languages)
|
||||||
if region_code:
|
if region_code:
|
||||||
params['url'] = images_url.format(
|
params['url'] = images_url.format(
|
||||||
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
|
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
params['url'] = images_url.format(
|
params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
|
||||||
query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -84,11 +87,15 @@ def response(resp):
|
||||||
image = result['image']
|
image = result['image']
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'template': 'images.html',
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': '',
|
'template': 'images.html',
|
||||||
'thumbnail_src': thumbnail,
|
'title': title,
|
||||||
'img_src': image,
|
'content': '',
|
||||||
'url': url})
|
'thumbnail_src': thumbnail,
|
||||||
|
'img_src': image,
|
||||||
|
'url': url,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -38,7 +38,7 @@ def request(query, params):
|
||||||
pageno : 1 # number of the requested page
|
pageno : 1 # number of the requested page
|
||||||
'''
|
'''
|
||||||
|
|
||||||
offset = (params['pageno'] - 1)
|
offset = params['pageno'] - 1
|
||||||
if offset == 0:
|
if offset == 0:
|
||||||
search_url_fmt = base_url + 'suchen/dudenonline/{query}'
|
search_url_fmt = base_url + 'suchen/dudenonline/{query}'
|
||||||
params['url'] = search_url_fmt.format(query=quote(query))
|
params['url'] = search_url_fmt.format(query=quote(query))
|
||||||
|
@ -58,9 +58,9 @@ def response(resp):
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
number_of_results_element =\
|
number_of_results_element = eval_xpath_getindex(
|
||||||
eval_xpath_getindex(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()',
|
dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None
|
||||||
0, default=None)
|
)
|
||||||
if number_of_results_element is not None:
|
if number_of_results_element is not None:
|
||||||
number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)
|
number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)
|
||||||
results.append({'number_of_results': int(number_of_results_string)})
|
results.append({'number_of_results': int(number_of_results_string)})
|
||||||
|
@ -71,8 +71,6 @@ def response(resp):
|
||||||
title = eval_xpath(result, 'string(.//h2/a)').strip()
|
title = eval_xpath(result, 'string(.//h2/a)').strip()
|
||||||
content = extract_text(eval_xpath(result, './/p'))
|
content = extract_text(eval_xpath(result, './/p'))
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -15,6 +15,8 @@ about = {
|
||||||
|
|
||||||
|
|
||||||
def search(query, request_params):
|
def search(query, request_params):
|
||||||
return [{
|
return [
|
||||||
'result': 'this is what you get',
|
{
|
||||||
}]
|
'result': 'this is what you get',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
|
@ -58,16 +58,17 @@ def response(resp):
|
||||||
if title == "":
|
if title == "":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': url,
|
{
|
||||||
'title': title,
|
'url': url,
|
||||||
'content': content,
|
'title': title,
|
||||||
'price': price,
|
'content': content,
|
||||||
'shipping': shipping,
|
'price': price,
|
||||||
'source_country': source_country,
|
'shipping': shipping,
|
||||||
'thumbnail': thumbnail,
|
'source_country': source_country,
|
||||||
'template': 'products.html',
|
'thumbnail': thumbnail,
|
||||||
|
'template': 'products.html',
|
||||||
})
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -119,9 +119,7 @@ def response(resp):
|
||||||
r['template'] = 'key-value.html'
|
r['template'] = 'key-value.html'
|
||||||
|
|
||||||
if show_metadata:
|
if show_metadata:
|
||||||
r['metadata'] = {'index': result['_index'],
|
r['metadata'] = {'index': result['_index'], 'id': result['_id'], 'score': result['_score']}
|
||||||
'id': result['_id'],
|
|
||||||
'score': result['_score']}
|
|
||||||
|
|
||||||
results.append(r)
|
results.append(r)
|
||||||
|
|
||||||
|
@ -133,12 +131,10 @@ _available_query_types = {
|
||||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
|
||||||
'match': _match_query,
|
'match': _match_query,
|
||||||
'simple_query_string': _simple_query_string_query,
|
'simple_query_string': _simple_query_string_query,
|
||||||
|
|
||||||
# Term-level queries
|
# Term-level queries
|
||||||
# https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
|
||||||
'term': _term_query,
|
'term': _term_query,
|
||||||
'terms': _terms_query,
|
'terms': _terms_query,
|
||||||
|
|
||||||
# Query JSON defined by the instance administrator.
|
# Query JSON defined by the instance administrator.
|
||||||
'custom': _custom_query,
|
'custom': _custom_query,
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,10 +22,7 @@ paging = False
|
||||||
safesearch = True
|
safesearch = True
|
||||||
|
|
||||||
base_url = 'https://www.etools.ch'
|
base_url = 'https://www.etools.ch'
|
||||||
search_path = '/searchAdvancedSubmit.do'\
|
search_path = '/searchAdvancedSubmit.do' '?query={search_term}' '&pageResults=20' '&safeSearch={safesearch}'
|
||||||
'?query={search_term}'\
|
|
||||||
'&pageResults=20'\
|
|
||||||
'&safeSearch={safesearch}'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
@ -49,8 +46,6 @@ def response(resp):
|
||||||
title = extract_text(eval_xpath(result, './a//text()'))
|
title = extract_text(eval_xpath(result, './a//text()'))
|
||||||
content = extract_text(eval_xpath(result, './/div[@class="text"]//text()'))
|
content = extract_text(eval_xpath(result, './/div[@class="text"]//text()'))
|
||||||
|
|
||||||
results.append({'url': url,
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -42,13 +42,13 @@ def response(resp):
|
||||||
for app in dom.xpath('//a[@class="package-header"]'):
|
for app in dom.xpath('//a[@class="package-header"]'):
|
||||||
app_url = app.xpath('./@href')[0]
|
app_url = app.xpath('./@href')[0]
|
||||||
app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()'))
|
app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()'))
|
||||||
app_content = extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() \
|
app_content = (
|
||||||
+ ' - ' + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip()
|
extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip()
|
||||||
|
+ ' - '
|
||||||
|
+ extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip()
|
||||||
|
)
|
||||||
app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0]
|
app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0]
|
||||||
|
|
||||||
results.append({'url': app_url,
|
results.append({'url': app_url, 'title': app_title, 'content': app_content, 'img_src': app_img_src})
|
||||||
'title': app_title,
|
|
||||||
'content': app_content,
|
|
||||||
'img_src': app_img_src})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -25,10 +25,12 @@ paging = True
|
||||||
api_key = None
|
api_key = None
|
||||||
|
|
||||||
|
|
||||||
url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\
|
url = (
|
||||||
'&api_key={api_key}&{text}&sort=relevance' +\
|
'https://api.flickr.com/services/rest/?method=flickr.photos.search'
|
||||||
'&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' +\
|
+ '&api_key={api_key}&{text}&sort=relevance'
|
||||||
'&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
|
+ '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z'
|
||||||
|
+ '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
|
||||||
|
)
|
||||||
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
|
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
|
||||||
|
|
||||||
paging = True
|
paging = True
|
||||||
|
@ -39,10 +41,9 @@ def build_flickr_url(user_id, photo_id):
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = url.format(text=urlencode({'text': query}),
|
params['url'] = url.format(
|
||||||
api_key=api_key,
|
text=urlencode({'text': query}), api_key=api_key, nb_per_page=nb_per_page, page=params['pageno']
|
||||||
nb_per_page=nb_per_page,
|
)
|
||||||
page=params['pageno'])
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,7 +70,7 @@ def response(resp):
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# For a bigger thumbnail, keep only the url_z, not the url_n
|
# For a bigger thumbnail, keep only the url_z, not the url_n
|
||||||
if 'url_n' in photo:
|
if 'url_n' in photo:
|
||||||
thumbnail_src = photo['url_n']
|
thumbnail_src = photo['url_n']
|
||||||
elif 'url_z' in photo:
|
elif 'url_z' in photo:
|
||||||
|
@ -80,13 +81,17 @@ def response(resp):
|
||||||
url = build_flickr_url(photo['owner'], photo['id'])
|
url = build_flickr_url(photo['owner'], photo['id'])
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append(
|
||||||
'title': photo['title'],
|
{
|
||||||
'img_src': img_src,
|
'url': url,
|
||||||
'thumbnail_src': thumbnail_src,
|
'title': photo['title'],
|
||||||
'content': photo['description']['_content'],
|
'img_src': img_src,
|
||||||
'author': photo['ownername'],
|
'thumbnail_src': thumbnail_src,
|
||||||
'template': 'images.html'})
|
'content': photo['description']['_content'],
|
||||||
|
'author': photo['ownername'],
|
||||||
|
'template': 'images.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -30,10 +30,12 @@ image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
|
||||||
|
|
||||||
paging = True
|
paging = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
time_range_dict = {'day': 60 * 60 * 24,
|
time_range_dict = {
|
||||||
'week': 60 * 60 * 24 * 7,
|
'day': 60 * 60 * 24,
|
||||||
'month': 60 * 60 * 24 * 7 * 4,
|
'week': 60 * 60 * 24 * 7,
|
||||||
'year': 60 * 60 * 24 * 7 * 52}
|
'month': 60 * 60 * 24 * 7 * 4,
|
||||||
|
'year': 60 * 60 * 24 * 7 * 52,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def build_flickr_url(user_id, photo_id):
|
def build_flickr_url(user_id, photo_id):
|
||||||
|
@ -47,8 +49,9 @@ def _get_time_range_url(time_range):
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno'])
|
params['url'] = search_url.format(query=urlencode({'text': query}), page=params['pageno']) + _get_time_range_url(
|
||||||
+ _get_time_range_url(params['time_range']))
|
params['time_range']
|
||||||
|
)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,10 +86,9 @@ def response(resp):
|
||||||
for image_size in image_sizes:
|
for image_size in image_sizes:
|
||||||
if image_size in photo['sizes']:
|
if image_size in photo['sizes']:
|
||||||
img_src = photo['sizes'][image_size]['url']
|
img_src = photo['sizes'][image_size]['url']
|
||||||
img_format = 'jpg ' \
|
img_format = (
|
||||||
+ str(photo['sizes'][image_size]['width']) \
|
'jpg ' + str(photo['sizes'][image_size]['width']) + 'x' + str(photo['sizes'][image_size]['height'])
|
||||||
+ 'x' \
|
)
|
||||||
+ str(photo['sizes'][image_size]['height'])
|
|
||||||
break
|
break
|
||||||
|
|
||||||
if not img_src:
|
if not img_src:
|
||||||
|
@ -113,7 +115,7 @@ def response(resp):
|
||||||
'thumbnail_src': thumbnail_src,
|
'thumbnail_src': thumbnail_src,
|
||||||
'source': source,
|
'source': source,
|
||||||
'img_format': img_format,
|
'img_format': img_format,
|
||||||
'template': 'images.html'
|
'template': 'images.html',
|
||||||
}
|
}
|
||||||
result['author'] = author.encode(errors='ignore').decode()
|
result['author'] = author.encode(errors='ignore').decode()
|
||||||
result['source'] = source.encode(errors='ignore').decode()
|
result['source'] = source.encode(errors='ignore').decode()
|
||||||
|
|
|
@ -35,9 +35,8 @@ content_xpath = './/div[@class="content"]//p'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1)
|
offset = params['pageno'] - 1
|
||||||
params['url'] = search_url.format(query=urlencode({'keys': query}),
|
params['url'] = search_url.format(query=urlencode({'keys': query}), offset=offset)
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -63,10 +62,7 @@ def response(resp):
|
||||||
content = escape(extract_text(result.xpath(content_xpath)))
|
content = escape(extract_text(result.xpath(content_xpath)))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': href,
|
results.append({'url': href, 'title': title, 'img_src': thumbnail, 'content': content})
|
||||||
'title': title,
|
|
||||||
'img_src': thumbnail,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -26,8 +26,7 @@ paging = True
|
||||||
# search url
|
# search url
|
||||||
url = "https://freesound.org/apiv2/"
|
url = "https://freesound.org/apiv2/"
|
||||||
search_url = (
|
search_url = (
|
||||||
url
|
url + "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}"
|
||||||
+ "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
embedded_url = '<audio controls><source src="{uri}" type="audio/{ftype}"></audio>'
|
embedded_url = '<audio controls><source src="{uri}" type="audio/{ftype}"></audio>'
|
||||||
|
|
|
@ -10,10 +10,7 @@ from urllib.parse import urlencode
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://frinkiac.com',
|
"website": 'https://frinkiac.com',
|
||||||
"wikidata_id": 'Q24882614',
|
"wikidata_id": 'Q24882614',
|
||||||
"official_api_documentation": {
|
"official_api_documentation": {'url': None, 'comment': 'see https://github.com/MitchellAW/CompuGlobal'},
|
||||||
'url': None,
|
|
||||||
'comment': 'see https://github.com/MitchellAW/CompuGlobal'
|
|
||||||
},
|
|
||||||
"use_official_api": False,
|
"use_official_api": False,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'JSON',
|
"results": 'JSON',
|
||||||
|
@ -40,12 +37,15 @@ def response(resp):
|
||||||
episode = result['Episode']
|
episode = result['Episode']
|
||||||
timestamp = result['Timestamp']
|
timestamp = result['Timestamp']
|
||||||
|
|
||||||
results.append({'template': 'images.html',
|
results.append(
|
||||||
'url': RESULT_URL.format(base=BASE,
|
{
|
||||||
query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})),
|
'template': 'images.html',
|
||||||
'title': episode,
|
'url': RESULT_URL.format(base=BASE, query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})),
|
||||||
'content': '',
|
'title': episode,
|
||||||
'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp),
|
'content': '',
|
||||||
'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp)})
|
'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp),
|
||||||
|
'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -37,15 +37,12 @@ def locale_to_lang_code(locale):
|
||||||
# wikis for some languages were moved off from the main site, we need to make
|
# wikis for some languages were moved off from the main site, we need to make
|
||||||
# requests to correct URLs to be able to get results in those languages
|
# requests to correct URLs to be able to get results in those languages
|
||||||
lang_urls = {
|
lang_urls = {
|
||||||
'en': {
|
'en': {'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}'},
|
||||||
'base': 'https://wiki.gentoo.org',
|
|
||||||
'search': '/index.php?title=Special:Search&offset={offset}&{query}'
|
|
||||||
},
|
|
||||||
'others': {
|
'others': {
|
||||||
'base': 'https://wiki.gentoo.org',
|
'base': 'https://wiki.gentoo.org',
|
||||||
'search': '/index.php?title=Special:Search&offset={offset}&{query}\
|
'search': '/index.php?title=Special:Search&offset={offset}&{query}\
|
||||||
&profile=translation&languagefilter={language}'
|
&profile=translation&languagefilter={language}',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -78,7 +75,7 @@ main_langs = {
|
||||||
'sl': 'Slovenský',
|
'sl': 'Slovenský',
|
||||||
'th': 'ไทย',
|
'th': 'ไทย',
|
||||||
'uk': 'Українська',
|
'uk': 'Українська',
|
||||||
'zh': '简体中文'
|
'zh': '简体中文',
|
||||||
}
|
}
|
||||||
supported_languages = dict(lang_urls, **main_langs)
|
supported_languages = dict(lang_urls, **main_langs)
|
||||||
|
|
||||||
|
@ -101,8 +98,7 @@ def request(query, params):
|
||||||
urls = get_lang_urls(language)
|
urls = get_lang_urls(language)
|
||||||
search_url = urls['base'] + urls['search']
|
search_url = urls['base'] + urls['search']
|
||||||
|
|
||||||
params['url'] = search_url.format(query=query, offset=offset,
|
params['url'] = search_url.format(query=query, offset=offset, language=language)
|
||||||
language=language)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -123,7 +119,6 @@ def response(resp):
|
||||||
href = urljoin(base_url, link.attrib.get('href'))
|
href = urljoin(base_url, link.attrib.get('href'))
|
||||||
title = extract_text(link)
|
title = extract_text(link)
|
||||||
|
|
||||||
results.append({'url': href,
|
results.append({'url': href, 'title': title})
|
||||||
'title': title})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -55,12 +55,12 @@ def fetch_extra_param(query_args, headers):
|
||||||
extra_param_path = search_path + urlencode(query_args)
|
extra_param_path = search_path + urlencode(query_args)
|
||||||
text = get(base_url + extra_param_path, headers=headers).text
|
text = get(base_url + extra_param_path, headers=headers).text
|
||||||
|
|
||||||
re_var= None
|
re_var = None
|
||||||
for line in text.splitlines():
|
for line in text.splitlines():
|
||||||
if re_var is None and extra_param_path in line:
|
if re_var is None and extra_param_path in line:
|
||||||
var = line.split("=")[0].split()[1] # e.g. var --> 'uxrl'
|
var = line.split("=")[0].split()[1] # e.g. var --> 'uxrl'
|
||||||
re_var = re.compile(var + "\\s*=\\s*" + var + "\\s*\\+\\s*'" + "(.*)" + "'(.*)")
|
re_var = re.compile(var + "\\s*=\\s*" + var + "\\s*\\+\\s*'" + "(.*)" + "'(.*)")
|
||||||
extra_param = line.split("'")[1][len(extra_param_path):]
|
extra_param = line.split("'")[1][len(extra_param_path) :]
|
||||||
continue
|
continue
|
||||||
if re_var is not None and re_var.search(line):
|
if re_var is not None and re_var.search(line):
|
||||||
extra_param += re_var.search(line).group(1)
|
extra_param += re_var.search(line).group(1)
|
||||||
|
@ -69,12 +69,7 @@ def fetch_extra_param(query_args, headers):
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params): # pylint: disable=unused-argument
|
def request(query, params): # pylint: disable=unused-argument
|
||||||
query_args = dict(
|
query_args = dict(c='main', q=query, dr=1, showgoodimages=0)
|
||||||
c = 'main'
|
|
||||||
, q = query
|
|
||||||
, dr = 1
|
|
||||||
, showgoodimages = 0
|
|
||||||
)
|
|
||||||
|
|
||||||
if params['language'] and params['language'] != 'all':
|
if params['language'] and params['language'] != 'all':
|
||||||
query_args['qlangcountry'] = params['language']
|
query_args['qlangcountry'] = params['language']
|
||||||
|
@ -93,6 +88,7 @@ def request(query, params): # pylint: disable=unused-argument
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
# get response from search-request
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
@ -125,10 +121,6 @@ def response(resp):
|
||||||
if len(subtitle) > 3 and subtitle != title:
|
if len(subtitle) > 3 and subtitle != title:
|
||||||
title += " - " + subtitle
|
title += " - " + subtitle
|
||||||
|
|
||||||
results.append(dict(
|
results.append(dict(url=url, title=title, content=content))
|
||||||
url = url
|
|
||||||
, title = title
|
|
||||||
, content = content
|
|
||||||
))
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -55,9 +55,7 @@ def response(resp):
|
||||||
content = ''
|
content = ''
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -50,72 +50,63 @@ supported_languages_url = 'https://www.google.com/preferences?#languages'
|
||||||
|
|
||||||
# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
|
# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
|
||||||
google_domains = {
|
google_domains = {
|
||||||
'BG': 'google.bg', # Bulgaria
|
'BG': 'google.bg', # Bulgaria
|
||||||
'CZ': 'google.cz', # Czech Republic
|
'CZ': 'google.cz', # Czech Republic
|
||||||
'DE': 'google.de', # Germany
|
'DE': 'google.de', # Germany
|
||||||
'DK': 'google.dk', # Denmark
|
'DK': 'google.dk', # Denmark
|
||||||
'AT': 'google.at', # Austria
|
'AT': 'google.at', # Austria
|
||||||
'CH': 'google.ch', # Switzerland
|
'CH': 'google.ch', # Switzerland
|
||||||
'GR': 'google.gr', # Greece
|
'GR': 'google.gr', # Greece
|
||||||
'AU': 'google.com.au', # Australia
|
'AU': 'google.com.au', # Australia
|
||||||
'CA': 'google.ca', # Canada
|
'CA': 'google.ca', # Canada
|
||||||
'GB': 'google.co.uk', # United Kingdom
|
'GB': 'google.co.uk', # United Kingdom
|
||||||
'ID': 'google.co.id', # Indonesia
|
'ID': 'google.co.id', # Indonesia
|
||||||
'IE': 'google.ie', # Ireland
|
'IE': 'google.ie', # Ireland
|
||||||
'IN': 'google.co.in', # India
|
'IN': 'google.co.in', # India
|
||||||
'MY': 'google.com.my', # Malaysia
|
'MY': 'google.com.my', # Malaysia
|
||||||
'NZ': 'google.co.nz', # New Zealand
|
'NZ': 'google.co.nz', # New Zealand
|
||||||
'PH': 'google.com.ph', # Philippines
|
'PH': 'google.com.ph', # Philippines
|
||||||
'SG': 'google.com.sg', # Singapore
|
'SG': 'google.com.sg', # Singapore
|
||||||
'US': 'google.com', # United States (google.us) redirects to .com
|
'US': 'google.com', # United States (google.us) redirects to .com
|
||||||
'ZA': 'google.co.za', # South Africa
|
'ZA': 'google.co.za', # South Africa
|
||||||
'AR': 'google.com.ar', # Argentina
|
'AR': 'google.com.ar', # Argentina
|
||||||
'CL': 'google.cl', # Chile
|
'CL': 'google.cl', # Chile
|
||||||
'ES': 'google.es', # Spain
|
'ES': 'google.es', # Spain
|
||||||
'MX': 'google.com.mx', # Mexico
|
'MX': 'google.com.mx', # Mexico
|
||||||
'EE': 'google.ee', # Estonia
|
'EE': 'google.ee', # Estonia
|
||||||
'FI': 'google.fi', # Finland
|
'FI': 'google.fi', # Finland
|
||||||
'BE': 'google.be', # Belgium
|
'BE': 'google.be', # Belgium
|
||||||
'FR': 'google.fr', # France
|
'FR': 'google.fr', # France
|
||||||
'IL': 'google.co.il', # Israel
|
'IL': 'google.co.il', # Israel
|
||||||
'HR': 'google.hr', # Croatia
|
'HR': 'google.hr', # Croatia
|
||||||
'HU': 'google.hu', # Hungary
|
'HU': 'google.hu', # Hungary
|
||||||
'IT': 'google.it', # Italy
|
'IT': 'google.it', # Italy
|
||||||
'JP': 'google.co.jp', # Japan
|
'JP': 'google.co.jp', # Japan
|
||||||
'KR': 'google.co.kr', # South Korea
|
'KR': 'google.co.kr', # South Korea
|
||||||
'LT': 'google.lt', # Lithuania
|
'LT': 'google.lt', # Lithuania
|
||||||
'LV': 'google.lv', # Latvia
|
'LV': 'google.lv', # Latvia
|
||||||
'NO': 'google.no', # Norway
|
'NO': 'google.no', # Norway
|
||||||
'NL': 'google.nl', # Netherlands
|
'NL': 'google.nl', # Netherlands
|
||||||
'PL': 'google.pl', # Poland
|
'PL': 'google.pl', # Poland
|
||||||
'BR': 'google.com.br', # Brazil
|
'BR': 'google.com.br', # Brazil
|
||||||
'PT': 'google.pt', # Portugal
|
'PT': 'google.pt', # Portugal
|
||||||
'RO': 'google.ro', # Romania
|
'RO': 'google.ro', # Romania
|
||||||
'RU': 'google.ru', # Russia
|
'RU': 'google.ru', # Russia
|
||||||
'SK': 'google.sk', # Slovakia
|
'SK': 'google.sk', # Slovakia
|
||||||
'SI': 'google.si', # Slovenia
|
'SI': 'google.si', # Slovenia
|
||||||
'SE': 'google.se', # Sweden
|
'SE': 'google.se', # Sweden
|
||||||
'TH': 'google.co.th', # Thailand
|
'TH': 'google.co.th', # Thailand
|
||||||
'TR': 'google.com.tr', # Turkey
|
'TR': 'google.com.tr', # Turkey
|
||||||
'UA': 'google.com.ua', # Ukraine
|
'UA': 'google.com.ua', # Ukraine
|
||||||
'CN': 'google.com.hk', # There is no google.cn, we use .com.hk for zh-CN
|
'CN': 'google.com.hk', # There is no google.cn, we use .com.hk for zh-CN
|
||||||
'HK': 'google.com.hk', # Hong Kong
|
'HK': 'google.com.hk', # Hong Kong
|
||||||
'TW': 'google.com.tw' # Taiwan
|
'TW': 'google.com.tw', # Taiwan
|
||||||
}
|
}
|
||||||
|
|
||||||
time_range_dict = {
|
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||||
'day': 'd',
|
|
||||||
'week': 'w',
|
|
||||||
'month': 'm',
|
|
||||||
'year': 'y'
|
|
||||||
}
|
|
||||||
|
|
||||||
# Filter results. 0: None, 1: Moderate, 2: Strict
|
# Filter results. 0: None, 1: Moderate, 2: Strict
|
||||||
filter_mapping = {
|
filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
|
||||||
0: 'off',
|
|
||||||
1: 'medium',
|
|
||||||
2: 'high'
|
|
||||||
}
|
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
# ------------------------
|
# ------------------------
|
||||||
|
@ -140,6 +131,7 @@ content_xpath = './/div[@class="IsZvec"]'
|
||||||
# from the links not the links itself.
|
# from the links not the links itself.
|
||||||
suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
|
suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
|
||||||
|
|
||||||
|
|
||||||
def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||||
"""Composing various language properties for the google engines.
|
"""Composing various language properties for the google engines.
|
||||||
|
|
||||||
|
@ -184,11 +176,11 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||||
request's headers)
|
request's headers)
|
||||||
"""
|
"""
|
||||||
ret_val = {
|
ret_val = {
|
||||||
'language' : None,
|
'language': None,
|
||||||
'country' : None,
|
'country': None,
|
||||||
'subdomain' : None,
|
'subdomain': None,
|
||||||
'params' : {},
|
'params': {},
|
||||||
'headers' : {},
|
'headers': {},
|
||||||
}
|
}
|
||||||
|
|
||||||
# language ...
|
# language ...
|
||||||
|
@ -213,7 +205,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||||
|
|
||||||
# subdomain ...
|
# subdomain ...
|
||||||
|
|
||||||
ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com')
|
ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com')
|
||||||
|
|
||||||
# params & headers
|
# params & headers
|
||||||
|
|
||||||
|
@ -250,15 +242,18 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||||
ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
|
ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
|
||||||
|
|
||||||
# Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5
|
# Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5
|
||||||
ret_val['headers']['Accept-Language'] = ','.join([
|
ret_val['headers']['Accept-Language'] = ','.join(
|
||||||
lang_country,
|
[
|
||||||
language + ';q=0.8,',
|
lang_country,
|
||||||
'en;q=0.6',
|
language + ';q=0.8,',
|
||||||
'*;q=0.5',
|
'en;q=0.6',
|
||||||
])
|
'*;q=0.5',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
return ret_val
|
return ret_val
|
||||||
|
|
||||||
|
|
||||||
def detect_google_sorry(resp):
|
def detect_google_sorry(resp):
|
||||||
if resp.url.host == 'sorry.google.com' or resp.url.path.startswith('/sorry'):
|
if resp.url.host == 'sorry.google.com' or resp.url.path.startswith('/sorry'):
|
||||||
raise SearxEngineCaptchaException()
|
raise SearxEngineCaptchaException()
|
||||||
|
@ -269,9 +264,7 @@ def request(query, params):
|
||||||
|
|
||||||
offset = (params['pageno'] - 1) * 10
|
offset = (params['pageno'] - 1) * 10
|
||||||
|
|
||||||
lang_info = get_lang_info(
|
lang_info = get_lang_info(params, supported_languages, language_aliases, True)
|
||||||
params, supported_languages, language_aliases, True
|
|
||||||
)
|
|
||||||
|
|
||||||
additional_parameters = {}
|
additional_parameters = {}
|
||||||
if use_mobile_ui:
|
if use_mobile_ui:
|
||||||
|
@ -281,15 +274,23 @@ def request(query, params):
|
||||||
}
|
}
|
||||||
|
|
||||||
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
||||||
query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
|
query_url = (
|
||||||
'q': query,
|
'https://'
|
||||||
**lang_info['params'],
|
+ lang_info['subdomain']
|
||||||
'ie': "utf8",
|
+ '/search'
|
||||||
'oe': "utf8",
|
+ "?"
|
||||||
'start': offset,
|
+ urlencode(
|
||||||
'filter': '0',
|
{
|
||||||
**additional_parameters,
|
'q': query,
|
||||||
})
|
**lang_info['params'],
|
||||||
|
'ie': "utf8",
|
||||||
|
'oe': "utf8",
|
||||||
|
'start': offset,
|
||||||
|
'filter': '0',
|
||||||
|
**additional_parameters,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if params['time_range'] in time_range_dict:
|
if params['time_range'] in time_range_dict:
|
||||||
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
|
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
|
||||||
|
@ -301,9 +302,7 @@ def request(query, params):
|
||||||
if use_mobile_ui:
|
if use_mobile_ui:
|
||||||
params['headers']['Accept'] = '*/*'
|
params['headers']['Accept'] = '*/*'
|
||||||
else:
|
else:
|
||||||
params['headers']['Accept'] = (
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
||||||
)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -325,7 +324,7 @@ def response(resp):
|
||||||
else:
|
else:
|
||||||
logger.debug("did not find 'answer'")
|
logger.debug("did not find 'answer'")
|
||||||
|
|
||||||
# results --> number_of_results
|
# results --> number_of_results
|
||||||
if not use_mobile_ui:
|
if not use_mobile_ui:
|
||||||
try:
|
try:
|
||||||
_txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0)
|
_txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0)
|
||||||
|
@ -355,11 +354,7 @@ def response(resp):
|
||||||
if url is None:
|
if url is None:
|
||||||
continue
|
continue
|
||||||
content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
|
content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
|
||||||
results.append({
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content
|
|
||||||
})
|
|
||||||
except Exception as e: # pylint: disable=broad-except
|
except Exception as e: # pylint: disable=broad-except
|
||||||
logger.error(e, exc_info=True)
|
logger.error(e, exc_info=True)
|
||||||
# from lxml import etree
|
# from lxml import etree
|
||||||
|
|
|
@ -30,10 +30,8 @@ from searx.engines.google import (
|
||||||
)
|
)
|
||||||
|
|
||||||
# pylint: disable=unused-import
|
# pylint: disable=unused-import
|
||||||
from searx.engines.google import (
|
from searx.engines.google import supported_languages_url, _fetch_supported_languages
|
||||||
supported_languages_url
|
|
||||||
, _fetch_supported_languages
|
|
||||||
)
|
|
||||||
# pylint: enable=unused-import
|
# pylint: enable=unused-import
|
||||||
|
|
||||||
# about
|
# about
|
||||||
|
@ -53,21 +51,16 @@ use_locale_domain = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
|
|
||||||
filter_mapping = {
|
filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
|
||||||
0: 'images',
|
|
||||||
1: 'active',
|
|
||||||
2: 'active'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def scrap_out_thumbs(dom):
|
def scrap_out_thumbs(dom):
|
||||||
"""Scrap out thumbnail data from <script> tags.
|
"""Scrap out thumbnail data from <script> tags."""
|
||||||
"""
|
|
||||||
ret_val = {}
|
ret_val = {}
|
||||||
for script in eval_xpath(dom, '//script[contains(., "_setImgSrc(")]'):
|
for script in eval_xpath(dom, '//script[contains(., "_setImgSrc(")]'):
|
||||||
_script = script.text
|
_script = script.text
|
||||||
# _setImgSrc('0','data:image\/jpeg;base64,\/9j\/4AAQSkZJR ....');
|
# _setImgSrc('0','data:image\/jpeg;base64,\/9j\/4AAQSkZJR ....');
|
||||||
_thumb_no, _img_data = _script[len("_setImgSrc("):-2].split(",", 1)
|
_thumb_no, _img_data = _script[len("_setImgSrc(") : -2].split(",", 1)
|
||||||
_thumb_no = _thumb_no.replace("'", "")
|
_thumb_no = _thumb_no.replace("'", "")
|
||||||
_img_data = _img_data.replace("'", "")
|
_img_data = _img_data.replace("'", "")
|
||||||
_img_data = _img_data.replace(r"\/", r"/")
|
_img_data = _img_data.replace(r"\/", r"/")
|
||||||
|
@ -76,8 +69,7 @@ def scrap_out_thumbs(dom):
|
||||||
|
|
||||||
|
|
||||||
def scrap_img_by_id(script, data_id):
|
def scrap_img_by_id(script, data_id):
|
||||||
"""Get full image URL by data-id in parent element
|
"""Get full image URL by data-id in parent element"""
|
||||||
"""
|
|
||||||
img_url = ''
|
img_url = ''
|
||||||
_script = script.split('\n')
|
_script = script.split('\n')
|
||||||
for i, line in enumerate(_script):
|
for i, line in enumerate(_script):
|
||||||
|
@ -91,20 +83,25 @@ def scrap_img_by_id(script, data_id):
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Google-Video search request"""
|
"""Google-Video search request"""
|
||||||
|
|
||||||
lang_info = get_lang_info(
|
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||||
params, supported_languages, language_aliases, False
|
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
||||||
)
|
|
||||||
logger.debug(
|
|
||||||
"HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
|
||||||
|
|
||||||
query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
|
query_url = (
|
||||||
'q': query,
|
'https://'
|
||||||
'tbm': "isch",
|
+ lang_info['subdomain']
|
||||||
**lang_info['params'],
|
+ '/search'
|
||||||
'ie': "utf8",
|
+ "?"
|
||||||
'oe': "utf8",
|
+ urlencode(
|
||||||
'num': 30,
|
{
|
||||||
})
|
'q': query,
|
||||||
|
'tbm': "isch",
|
||||||
|
**lang_info['params'],
|
||||||
|
'ie': "utf8",
|
||||||
|
'oe': "utf8",
|
||||||
|
'num': 30,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if params['time_range'] in time_range_dict:
|
if params['time_range'] in time_range_dict:
|
||||||
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
|
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
|
||||||
|
@ -113,9 +110,7 @@ def request(query, params):
|
||||||
params['url'] = query_url
|
params['url'] = query_url
|
||||||
|
|
||||||
params['headers'].update(lang_info['headers'])
|
params['headers'].update(lang_info['headers'])
|
||||||
params['headers']['Accept'] = (
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
||||||
)
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -128,8 +123,7 @@ def response(resp):
|
||||||
# convert the text to dom
|
# convert the text to dom
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
img_bas64_map = scrap_out_thumbs(dom)
|
img_bas64_map = scrap_out_thumbs(dom)
|
||||||
img_src_script = eval_xpath_getindex(
|
img_src_script = eval_xpath_getindex(dom, '//script[contains(., "AF_initDataCallback({key: ")]', 1).text
|
||||||
dom, '//script[contains(., "AF_initDataCallback({key: ")]', 1).text
|
|
||||||
|
|
||||||
# parse results
|
# parse results
|
||||||
#
|
#
|
||||||
|
@ -189,15 +183,17 @@ def response(resp):
|
||||||
if not src_url:
|
if not src_url:
|
||||||
src_url = thumbnail_src
|
src_url = thumbnail_src
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': url,
|
{
|
||||||
'title': img_alt,
|
'url': url,
|
||||||
'content': pub_descr,
|
'title': img_alt,
|
||||||
'source': pub_source,
|
'content': pub_descr,
|
||||||
'img_src': src_url,
|
'source': pub_source,
|
||||||
# 'img_format': img_format,
|
'img_src': src_url,
|
||||||
'thumbnail_src': thumbnail_src,
|
# 'img_format': img_format,
|
||||||
'template': 'images.html'
|
'thumbnail_src': thumbnail_src,
|
||||||
})
|
'template': 'images.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -32,6 +32,7 @@ from searx.engines.google import (
|
||||||
supported_languages_url,
|
supported_languages_url,
|
||||||
_fetch_supported_languages,
|
_fetch_supported_languages,
|
||||||
)
|
)
|
||||||
|
|
||||||
# pylint: enable=unused-import
|
# pylint: enable=unused-import
|
||||||
|
|
||||||
from searx.engines.google import (
|
from searx.engines.google import (
|
||||||
|
@ -71,14 +72,12 @@ time_range_support = True
|
||||||
# safesearch : results are identitical for safesearch=0 and safesearch=2
|
# safesearch : results are identitical for safesearch=0 and safesearch=2
|
||||||
safesearch = False
|
safesearch = False
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Google-News search request"""
|
"""Google-News search request"""
|
||||||
|
|
||||||
lang_info = get_lang_info(
|
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||||
params, supported_languages, language_aliases, False
|
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
||||||
)
|
|
||||||
logger.debug(
|
|
||||||
"HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
|
||||||
|
|
||||||
# google news has only one domain
|
# google news has only one domain
|
||||||
lang_info['subdomain'] = 'news.google.com'
|
lang_info['subdomain'] = 'news.google.com'
|
||||||
|
@ -94,19 +93,26 @@ def request(query, params):
|
||||||
if params['time_range']:
|
if params['time_range']:
|
||||||
query += ' ' + time_range_dict[params['time_range']]
|
query += ' ' + time_range_dict[params['time_range']]
|
||||||
|
|
||||||
query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
|
query_url = (
|
||||||
'q': query,
|
'https://'
|
||||||
**lang_info['params'],
|
+ lang_info['subdomain']
|
||||||
'ie': "utf8",
|
+ '/search'
|
||||||
'oe': "utf8",
|
+ "?"
|
||||||
'gl': lang_info['country'],
|
+ urlencode(
|
||||||
}) + ('&ceid=%s' % ceid) # ceid includes a ':' character which must not be urlencoded
|
{
|
||||||
|
'q': query,
|
||||||
|
**lang_info['params'],
|
||||||
|
'ie': "utf8",
|
||||||
|
'oe': "utf8",
|
||||||
|
'gl': lang_info['country'],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
+ ('&ceid=%s' % ceid)
|
||||||
|
) # ceid includes a ':' character which must not be urlencoded
|
||||||
params['url'] = query_url
|
params['url'] = query_url
|
||||||
|
|
||||||
params['headers'].update(lang_info['headers'])
|
params['headers'].update(lang_info['headers'])
|
||||||
params['headers']['Accept'] = (
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
||||||
)
|
|
||||||
params['headers']['Cookie'] = "CONSENT=YES+cb.%s-14-p0.en+F+941;" % datetime.now().strftime("%Y%m%d")
|
params['headers']['Cookie'] = "CONSENT=YES+cb.%s-14-p0.en+F+941;" % datetime.now().strftime("%Y%m%d")
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
@ -141,7 +147,7 @@ def response(resp):
|
||||||
# jslog="95014; 5:W251bGwsbnVsbCxudW...giXQ==; track:click"
|
# jslog="95014; 5:W251bGwsbnVsbCxudW...giXQ==; track:click"
|
||||||
jslog = jslog.split(";")[1].split(':')[1].strip()
|
jslog = jslog.split(";")[1].split(':')[1].strip()
|
||||||
try:
|
try:
|
||||||
padding = (4 -(len(jslog) % 4)) * "="
|
padding = (4 - (len(jslog) % 4)) * "="
|
||||||
jslog = b64decode(jslog + padding)
|
jslog = b64decode(jslog + padding)
|
||||||
except binascii.Error:
|
except binascii.Error:
|
||||||
# URL cant be read, skip this result
|
# URL cant be read, skip this result
|
||||||
|
@ -178,12 +184,14 @@ def response(resp):
|
||||||
|
|
||||||
img_src = extract_text(result.xpath('preceding-sibling::a/figure/img/@src'))
|
img_src = extract_text(result.xpath('preceding-sibling::a/figure/img/@src'))
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': url,
|
{
|
||||||
'title': title,
|
'url': url,
|
||||||
'content': content,
|
'title': title,
|
||||||
'img_src': img_src,
|
'content': content,
|
||||||
})
|
'img_src': img_src,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -32,6 +32,7 @@ from searx.engines.google import (
|
||||||
supported_languages_url,
|
supported_languages_url,
|
||||||
_fetch_supported_languages,
|
_fetch_supported_languages,
|
||||||
)
|
)
|
||||||
|
|
||||||
# pylint: enable=unused-import
|
# pylint: enable=unused-import
|
||||||
|
|
||||||
# about
|
# about
|
||||||
|
@ -52,6 +53,7 @@ use_locale_domain = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
safesearch = False
|
safesearch = False
|
||||||
|
|
||||||
|
|
||||||
def time_range_url(params):
|
def time_range_url(params):
|
||||||
"""Returns a URL query component for a google-Scholar time range based on
|
"""Returns a URL query component for a google-Scholar time range based on
|
||||||
``params['time_range']``. Google-Scholar does only support ranges in years.
|
``params['time_range']``. Google-Scholar does only support ranges in years.
|
||||||
|
@ -64,7 +66,7 @@ def time_range_url(params):
|
||||||
# as_ylo=2016&as_yhi=2019
|
# as_ylo=2016&as_yhi=2019
|
||||||
ret_val = ''
|
ret_val = ''
|
||||||
if params['time_range'] in time_range_dict:
|
if params['time_range'] in time_range_dict:
|
||||||
ret_val= urlencode({'as_ylo': datetime.now().year -1 })
|
ret_val = urlencode({'as_ylo': datetime.now().year - 1})
|
||||||
return '&' + ret_val
|
return '&' + ret_val
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,34 +74,38 @@ def request(query, params):
|
||||||
"""Google-Scholar search request"""
|
"""Google-Scholar search request"""
|
||||||
|
|
||||||
offset = (params['pageno'] - 1) * 10
|
offset = (params['pageno'] - 1) * 10
|
||||||
lang_info = get_lang_info(
|
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||||
params, supported_languages, language_aliases, False
|
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
||||||
)
|
|
||||||
logger.debug(
|
|
||||||
"HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
|
||||||
|
|
||||||
# subdomain is: scholar.google.xy
|
# subdomain is: scholar.google.xy
|
||||||
lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
|
lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
|
||||||
|
|
||||||
query_url = 'https://'+ lang_info['subdomain'] + '/scholar' + "?" + urlencode({
|
query_url = (
|
||||||
'q': query,
|
'https://'
|
||||||
**lang_info['params'],
|
+ lang_info['subdomain']
|
||||||
'ie': "utf8",
|
+ '/scholar'
|
||||||
'oe': "utf8",
|
+ "?"
|
||||||
'start' : offset,
|
+ urlencode(
|
||||||
})
|
{
|
||||||
|
'q': query,
|
||||||
|
**lang_info['params'],
|
||||||
|
'ie': "utf8",
|
||||||
|
'oe': "utf8",
|
||||||
|
'start': offset,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
query_url += time_range_url(params)
|
query_url += time_range_url(params)
|
||||||
params['url'] = query_url
|
params['url'] = query_url
|
||||||
|
|
||||||
params['headers'].update(lang_info['headers'])
|
params['headers'].update(lang_info['headers'])
|
||||||
params['headers']['Accept'] = (
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
||||||
)
|
|
||||||
|
|
||||||
#params['google_subdomain'] = subdomain
|
# params['google_subdomain'] = subdomain
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
"""Get response from google's search request"""
|
"""Get response from google's search request"""
|
||||||
results = []
|
results = []
|
||||||
|
@ -132,11 +138,13 @@ def response(resp):
|
||||||
if pub_type:
|
if pub_type:
|
||||||
title = title + " " + pub_type
|
title = title + " " + pub_type
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': url,
|
{
|
||||||
'title': title,
|
'url': url,
|
||||||
'content': content,
|
'title': title,
|
||||||
})
|
'content': content,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# parse suggestion
|
# parse suggestion
|
||||||
for suggestion in eval_xpath(dom, '//div[contains(@class, "gs_qsuggest_wrap")]//li//a'):
|
for suggestion in eval_xpath(dom, '//div[contains(@class, "gs_qsuggest_wrap")]//li//a'):
|
||||||
|
|
|
@ -38,10 +38,8 @@ from searx.engines.google import (
|
||||||
)
|
)
|
||||||
|
|
||||||
# pylint: disable=unused-import
|
# pylint: disable=unused-import
|
||||||
from searx.engines.google import (
|
from searx.engines.google import supported_languages_url, _fetch_supported_languages
|
||||||
supported_languages_url
|
|
||||||
, _fetch_supported_languages
|
|
||||||
)
|
|
||||||
# pylint: enable=unused-import
|
# pylint: enable=unused-import
|
||||||
|
|
||||||
# about
|
# about
|
||||||
|
@ -65,6 +63,7 @@ safesearch = True
|
||||||
|
|
||||||
RE_CACHE = {}
|
RE_CACHE = {}
|
||||||
|
|
||||||
|
|
||||||
def _re(regexpr):
|
def _re(regexpr):
|
||||||
"""returns compiled regular expression"""
|
"""returns compiled regular expression"""
|
||||||
RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr))
|
RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr))
|
||||||
|
@ -77,18 +76,17 @@ def scrap_out_thumbs_src(dom):
|
||||||
for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
|
for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
|
||||||
_script = script.text
|
_script = script.text
|
||||||
# "dimg_35":"https://i.ytimg.c....",
|
# "dimg_35":"https://i.ytimg.c....",
|
||||||
_dimurl = _re("s='([^']*)").findall( _script)
|
_dimurl = _re("s='([^']*)").findall(_script)
|
||||||
for k,v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)' ).findall(_script):
|
for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script):
|
||||||
v = v.replace(r'\u003d','=')
|
v = v.replace(r'\u003d', '=')
|
||||||
v = v.replace(r'\u0026','&')
|
v = v.replace(r'\u0026', '&')
|
||||||
ret_val[k] = v
|
ret_val[k] = v
|
||||||
logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
|
logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
|
||||||
return ret_val
|
return ret_val
|
||||||
|
|
||||||
|
|
||||||
def scrap_out_thumbs(dom):
|
def scrap_out_thumbs(dom):
|
||||||
"""Scrap out thumbnail data from <script> tags.
|
"""Scrap out thumbnail data from <script> tags."""
|
||||||
"""
|
|
||||||
ret_val = {}
|
ret_val = {}
|
||||||
thumb_name = 'dimg_'
|
thumb_name = 'dimg_'
|
||||||
|
|
||||||
|
@ -96,7 +94,7 @@ def scrap_out_thumbs(dom):
|
||||||
_script = script.text
|
_script = script.text
|
||||||
|
|
||||||
# var s='data:image/jpeg;base64, ...'
|
# var s='data:image/jpeg;base64, ...'
|
||||||
_imgdata = _re("s='([^']*)").findall( _script)
|
_imgdata = _re("s='([^']*)").findall(_script)
|
||||||
if not _imgdata:
|
if not _imgdata:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -112,19 +110,24 @@ def scrap_out_thumbs(dom):
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Google-Video search request"""
|
"""Google-Video search request"""
|
||||||
|
|
||||||
lang_info = get_lang_info(
|
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||||
params, supported_languages, language_aliases, False
|
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
||||||
)
|
|
||||||
logger.debug(
|
|
||||||
"HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
|
||||||
|
|
||||||
query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
|
query_url = (
|
||||||
'q': query,
|
'https://'
|
||||||
'tbm': "vid",
|
+ lang_info['subdomain']
|
||||||
**lang_info['params'],
|
+ '/search'
|
||||||
'ie': "utf8",
|
+ "?"
|
||||||
'oe': "utf8",
|
+ urlencode(
|
||||||
})
|
{
|
||||||
|
'q': query,
|
||||||
|
'tbm': "vid",
|
||||||
|
**lang_info['params'],
|
||||||
|
'ie': "utf8",
|
||||||
|
'oe': "utf8",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if params['time_range'] in time_range_dict:
|
if params['time_range'] in time_range_dict:
|
||||||
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
|
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
|
||||||
|
@ -133,9 +136,7 @@ def request(query, params):
|
||||||
params['url'] = query_url
|
params['url'] = query_url
|
||||||
|
|
||||||
params['headers'].update(lang_info['headers'])
|
params['headers'].update(lang_info['headers'])
|
||||||
params['headers']['Accept'] = (
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
||||||
)
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -171,21 +172,22 @@ def response(resp):
|
||||||
|
|
||||||
title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
|
title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
|
||||||
url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
|
url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
|
||||||
length = extract_text(eval_xpath(
|
length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span'))
|
||||||
result, './/div[contains(@class, "P7xzyf")]/span/span'))
|
|
||||||
c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
|
c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
|
||||||
content = extract_text(c_node)
|
content = extract_text(c_node)
|
||||||
pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]'))
|
pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]'))
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': url,
|
{
|
||||||
'title': title,
|
'url': url,
|
||||||
'content': content,
|
'title': title,
|
||||||
'length': length,
|
'content': content,
|
||||||
'author': pub_info,
|
'length': length,
|
||||||
'thumbnail': img_src,
|
'author': pub_info,
|
||||||
'template': 'videos.html',
|
'thumbnail': img_src,
|
||||||
})
|
'template': 'videos.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# parse suggestion
|
# parse suggestion
|
||||||
for suggestion in eval_xpath_list(dom, suggestion_xpath):
|
for suggestion in eval_xpath_list(dom, suggestion_xpath):
|
||||||
|
|
|
@ -27,7 +27,9 @@ about = {
|
||||||
"results": 'HTML',
|
"results": 'HTML',
|
||||||
}
|
}
|
||||||
|
|
||||||
categories = ['general', ]
|
categories = [
|
||||||
|
'general',
|
||||||
|
]
|
||||||
paging = False
|
paging = False
|
||||||
|
|
||||||
# suggestion_url = "https://sg.media-imdb.com/suggestion/{letter}/{query}.json"
|
# suggestion_url = "https://sg.media-imdb.com/suggestion/{letter}/{query}.json"
|
||||||
|
@ -35,13 +37,7 @@ suggestion_url = "https://v2.sg.media-imdb.com/suggestion/{letter}/{query}.json"
|
||||||
|
|
||||||
href_base = 'https://imdb.com/{category}/{entry_id}'
|
href_base = 'https://imdb.com/{category}/{entry_id}'
|
||||||
|
|
||||||
search_categories = {
|
search_categories = {"nm": "name", "tt": "title", "kw": "keyword", "co": "company", "ep": "episode"}
|
||||||
"nm": "name",
|
|
||||||
"tt": "title",
|
|
||||||
"kw": "keyword",
|
|
||||||
"co": "company",
|
|
||||||
"ep": "episode"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
@ -63,9 +59,7 @@ def response(resp):
|
||||||
entry_id = entry['id']
|
entry_id = entry['id']
|
||||||
categ = search_categories.get(entry_id[:2])
|
categ = search_categories.get(entry_id[:2])
|
||||||
if categ is None:
|
if categ is None:
|
||||||
logger.error(
|
logger.error('skip unknown category tag %s in %s', entry_id[:2], entry_id)
|
||||||
'skip unknown category tag %s in %s', entry_id[:2], entry_id
|
|
||||||
)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
title = entry['l']
|
title = entry['l']
|
||||||
|
@ -95,11 +89,13 @@ def response(resp):
|
||||||
if not image_url_name.endswith('_V1_'):
|
if not image_url_name.endswith('_V1_'):
|
||||||
magic = '_V1_' + magic
|
magic = '_V1_' + magic
|
||||||
image_url = image_url_name + magic + '.' + image_url_prefix
|
image_url = image_url_name + magic + '.' + image_url_prefix
|
||||||
results.append({
|
results.append(
|
||||||
"title": title,
|
{
|
||||||
"url": href_base.format(category=categ, entry_id=entry_id),
|
"title": title,
|
||||||
"content": content,
|
"url": href_base.format(category=categ, entry_id=entry_id),
|
||||||
"img_src" : image_url,
|
"content": content,
|
||||||
})
|
"img_src": image_url,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -41,9 +41,7 @@ content_xpath = './/p[@class="media-body__summary"]'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(ps=page_size,
|
params['url'] = search_url.format(ps=page_size, start=params['pageno'] * page_size, query=urlencode({'q': query}))
|
||||||
start=params['pageno'] * page_size,
|
|
||||||
query=urlencode({'q': query}))
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -75,12 +73,16 @@ def response(resp):
|
||||||
content = extract_text(result.xpath(content_xpath))
|
content = extract_text(result.xpath(content_xpath))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': content,
|
'url': url,
|
||||||
'template': 'videos.html',
|
'title': title,
|
||||||
'publishedDate': publishedDate,
|
'content': content,
|
||||||
'thumbnail': thumbnail})
|
'template': 'videos.html',
|
||||||
|
'publishedDate': publishedDate,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -46,14 +46,10 @@ def request(query, params):
|
||||||
base_url_rand = base_url
|
base_url_rand = base_url
|
||||||
|
|
||||||
search_url = base_url_rand + "api/v1/search?q={query}"
|
search_url = base_url_rand + "api/v1/search?q={query}"
|
||||||
params["url"] = search_url.format(
|
params["url"] = search_url.format(query=quote_plus(query)) + "&page={pageno}".format(pageno=params["pageno"])
|
||||||
query=quote_plus(query)
|
|
||||||
) + "&page={pageno}".format(pageno=params["pageno"])
|
|
||||||
|
|
||||||
if params["time_range"] in time_range_dict:
|
if params["time_range"] in time_range_dict:
|
||||||
params["url"] += "&date={timerange}".format(
|
params["url"] += "&date={timerange}".format(timerange=time_range_dict[params["time_range"]])
|
||||||
timerange=time_range_dict[params["time_range"]]
|
|
||||||
)
|
|
||||||
|
|
||||||
if params["language"] != "all":
|
if params["language"] != "all":
|
||||||
lang = params["language"].split("-")
|
lang = params["language"].split("-")
|
||||||
|
@ -88,17 +84,13 @@ def response(resp):
|
||||||
url = base_invidious_url + videoid
|
url = base_invidious_url + videoid
|
||||||
embedded = embedded_url.format(videoid=videoid)
|
embedded = embedded_url.format(videoid=videoid)
|
||||||
thumbs = result.get("videoThumbnails", [])
|
thumbs = result.get("videoThumbnails", [])
|
||||||
thumb = next(
|
thumb = next((th for th in thumbs if th["quality"] == "sddefault"), None)
|
||||||
(th for th in thumbs if th["quality"] == "sddefault"), None
|
|
||||||
)
|
|
||||||
if thumb:
|
if thumb:
|
||||||
thumbnail = thumb.get("url", "")
|
thumbnail = thumb.get("url", "")
|
||||||
else:
|
else:
|
||||||
thumbnail = ""
|
thumbnail = ""
|
||||||
|
|
||||||
publishedDate = parser.parse(
|
publishedDate = parser.parse(time.ctime(result.get("published", 0)))
|
||||||
time.ctime(result.get("published", 0))
|
|
||||||
)
|
|
||||||
length = time.gmtime(result.get("lengthSeconds"))
|
length = time.gmtime(result.get("lengthSeconds"))
|
||||||
if length.tm_hour:
|
if length.tm_hour:
|
||||||
length = time.strftime("%H:%M:%S", length)
|
length = time.strftime("%H:%M:%S", length)
|
||||||
|
|
|
@ -119,22 +119,22 @@ def response(resp):
|
||||||
content = query(result, content_query)[0]
|
content = query(result, content_query)[0]
|
||||||
except:
|
except:
|
||||||
content = ""
|
content = ""
|
||||||
results.append({
|
results.append(
|
||||||
'url': to_string(url),
|
{
|
||||||
'title': title_filter(to_string(title)),
|
'url': to_string(url),
|
||||||
'content': content_filter(to_string(content)),
|
'title': title_filter(to_string(title)),
|
||||||
})
|
'content': content_filter(to_string(content)),
|
||||||
|
}
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
for url, title, content in zip(
|
for url, title, content in zip(query(json, url_query), query(json, title_query), query(json, content_query)):
|
||||||
query(json, url_query),
|
results.append(
|
||||||
query(json, title_query),
|
{
|
||||||
query(json, content_query)
|
'url': to_string(url),
|
||||||
):
|
'title': title_filter(to_string(title)),
|
||||||
results.append({
|
'content': content_filter(to_string(content)),
|
||||||
'url': to_string(url),
|
}
|
||||||
'title': title_filter(to_string(title)),
|
)
|
||||||
'content': content_filter(to_string(content)),
|
|
||||||
})
|
|
||||||
|
|
||||||
if not suggestion_query:
|
if not suggestion_query:
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -34,8 +34,7 @@ content_xpath = './/span[@class="font11px lightgrey block"]'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(search_term=quote(query),
|
params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'])
|
||||||
pageno=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -79,16 +78,20 @@ def response(resp):
|
||||||
torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")
|
torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': href,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': content,
|
'url': href,
|
||||||
'seed': seed,
|
'title': title,
|
||||||
'leech': leech,
|
'content': content,
|
||||||
'filesize': filesize,
|
'seed': seed,
|
||||||
'files': files,
|
'leech': leech,
|
||||||
'magnetlink': magnetlink,
|
'filesize': filesize,
|
||||||
'torrentfile': torrentfileurl,
|
'files': files,
|
||||||
'template': 'torrent.html'})
|
'magnetlink': magnetlink,
|
||||||
|
'torrentfile': torrentfileurl,
|
||||||
|
'template': 'torrent.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results sorted by seeder
|
# return results sorted by seeder
|
||||||
return sorted(results, key=itemgetter('seed'), reverse=True)
|
return sorted(results, key=itemgetter('seed'), reverse=True)
|
||||||
|
|
|
@ -34,9 +34,7 @@ IMG_SRC_FIXES = {
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno'])
|
||||||
query=urlencode({'q': query}),
|
|
||||||
page=params['pageno'])
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
|
|
||||||
|
@ -56,13 +54,15 @@ def response(resp):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
img_src = result['image']['thumb']
|
img_src = result['image']['thumb']
|
||||||
results.append({
|
results.append(
|
||||||
'url': result['links']['item'],
|
{
|
||||||
'title': result['title'],
|
'url': result['links']['item'],
|
||||||
'img_src': img_src,
|
'title': result['title'],
|
||||||
'thumbnail_src': result['image']['thumb'],
|
'img_src': img_src,
|
||||||
'author': result['creator'],
|
'thumbnail_src': result['image']['thumb'],
|
||||||
'template': 'images.html'
|
'author': result['creator'],
|
||||||
})
|
'template': 'images.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -22,29 +22,33 @@ paging = True
|
||||||
time_range_support = False
|
time_range_support = False
|
||||||
safesearch = False
|
safesearch = False
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
params['url'] = 'https://mediathekviewweb.de/api/query'
|
params['url'] = 'https://mediathekviewweb.de/api/query'
|
||||||
params['method'] = 'POST'
|
params['method'] = 'POST'
|
||||||
params['headers']['Content-type'] = 'text/plain'
|
params['headers']['Content-type'] = 'text/plain'
|
||||||
params['data'] = dumps({
|
params['data'] = dumps(
|
||||||
'queries' : [
|
{
|
||||||
{
|
'queries': [
|
||||||
'fields' : [
|
{
|
||||||
'title',
|
'fields': [
|
||||||
'topic',
|
'title',
|
||||||
],
|
'topic',
|
||||||
'query' : query
|
],
|
||||||
},
|
'query': query,
|
||||||
],
|
},
|
||||||
'sortBy' : 'timestamp',
|
],
|
||||||
'sortOrder' : 'desc',
|
'sortBy': 'timestamp',
|
||||||
'future' : True,
|
'sortOrder': 'desc',
|
||||||
'offset' : (params['pageno'] - 1 )* 10,
|
'future': True,
|
||||||
'size' : 10
|
'offset': (params['pageno'] - 1) * 10,
|
||||||
})
|
'size': 10,
|
||||||
|
}
|
||||||
|
)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
|
||||||
resp = loads(resp.text)
|
resp = loads(resp.text)
|
||||||
|
@ -58,11 +62,13 @@ def response(resp):
|
||||||
|
|
||||||
item['hms'] = str(datetime.timedelta(seconds=item['duration']))
|
item['hms'] = str(datetime.timedelta(seconds=item['duration']))
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url' : item['url_video_hd'],
|
{
|
||||||
'title' : "%(channel)s: %(title)s (%(hms)s)" % item,
|
'url': item['url_video_hd'],
|
||||||
'length' : item['hms'],
|
'title': "%(channel)s: %(title)s (%(hms)s)" % item,
|
||||||
'content' : "%(description)s" % item,
|
'length': item['hms'],
|
||||||
})
|
'content': "%(description)s" % item,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -25,23 +25,24 @@ search_type = 'nearmatch' # possible values: title, text, nearmatch
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://{language}.wikipedia.org/'
|
base_url = 'https://{language}.wikipedia.org/'
|
||||||
search_postfix = 'w/api.php?action=query'\
|
search_postfix = (
|
||||||
'&list=search'\
|
'w/api.php?action=query'
|
||||||
'&{query}'\
|
'&list=search'
|
||||||
'&format=json'\
|
'&{query}'
|
||||||
'&sroffset={offset}'\
|
'&format=json'
|
||||||
'&srlimit={limit}'\
|
'&sroffset={offset}'
|
||||||
|
'&srlimit={limit}'
|
||||||
'&srwhat={searchtype}'
|
'&srwhat={searchtype}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
offset = (params['pageno'] - 1) * number_of_results
|
||||||
|
|
||||||
string_args = dict(query=urlencode({'srsearch': query}),
|
string_args = dict(
|
||||||
offset=offset,
|
query=urlencode({'srsearch': query}), offset=offset, limit=number_of_results, searchtype=search_type
|
||||||
limit=number_of_results,
|
)
|
||||||
searchtype=search_type)
|
|
||||||
|
|
||||||
format_strings = list(Formatter().parse(base_url))
|
format_strings = list(Formatter().parse(base_url))
|
||||||
|
|
||||||
|
@ -78,13 +79,14 @@ def response(resp):
|
||||||
for result in search_results['query']['search']:
|
for result in search_results['query']['search']:
|
||||||
if result.get('snippet', '').startswith('#REDIRECT'):
|
if result.get('snippet', '').startswith('#REDIRECT'):
|
||||||
continue
|
continue
|
||||||
url = base_url.format(language=resp.search_params['language']) +\
|
url = (
|
||||||
'wiki/' + quote(result['title'].replace(' ', '_').encode())
|
base_url.format(language=resp.search_params['language'])
|
||||||
|
+ 'wiki/'
|
||||||
|
+ quote(result['title'].replace(' ', '_').encode())
|
||||||
|
)
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url, 'title': result['title'], 'content': ''})
|
||||||
'title': result['title'],
|
|
||||||
'content': ''})
|
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -26,17 +26,19 @@ def request(query, params):
|
||||||
params['url'] = search_url
|
params['url'] = search_url
|
||||||
params['method'] = 'POST'
|
params['method'] = 'POST'
|
||||||
params['headers']['content-type'] = 'application/json; charset=utf-8'
|
params['headers']['content-type'] = 'application/json; charset=utf-8'
|
||||||
params['data'] = dumps({
|
params['data'] = dumps(
|
||||||
'query': query,
|
{
|
||||||
'queryExpression': '',
|
'query': query,
|
||||||
'filters': [],
|
'queryExpression': '',
|
||||||
'orderBy': 0,
|
'filters': [],
|
||||||
'skip': (params['pageno'] - 1) * 10,
|
'orderBy': 0,
|
||||||
'sortAscending': True,
|
'skip': (params['pageno'] - 1) * 10,
|
||||||
'take': 10,
|
'sortAscending': True,
|
||||||
'includeCitationContexts': False,
|
'take': 10,
|
||||||
'profileId': '',
|
'includeCitationContexts': False,
|
||||||
})
|
'profileId': '',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -54,11 +56,13 @@ def response(resp):
|
||||||
title = result['paper']['dn']
|
title = result['paper']['dn']
|
||||||
content = _get_content(result['paper'])
|
content = _get_content(result['paper'])
|
||||||
url = _paper_url.format(id=result['paper']['id'])
|
url = _paper_url.format(id=result['paper']['id'])
|
||||||
results.append({
|
results.append(
|
||||||
'url': url,
|
{
|
||||||
'title': html_to_text(title),
|
'url': url,
|
||||||
'content': html_to_text(content),
|
'title': html_to_text(title),
|
||||||
})
|
'content': html_to_text(content),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
|
@ -25,16 +25,17 @@ paging = True
|
||||||
url = 'https://api.mixcloud.com/'
|
url = 'https://api.mixcloud.com/'
|
||||||
search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}'
|
search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}'
|
||||||
|
|
||||||
embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
|
embedded_url = (
|
||||||
'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>'
|
'<iframe scrolling="no" frameborder="0" allowTransparency="true" '
|
||||||
|
+ 'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1) * 10
|
offset = (params['pageno'] - 1) * 10
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -54,11 +55,9 @@ def response(resp):
|
||||||
publishedDate = parser.parse(result['created_time'])
|
publishedDate = parser.parse(result['created_time'])
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append(
|
||||||
'title': title,
|
{'url': url, 'title': title, 'embedded': embedded, 'publishedDate': publishedDate, 'content': content}
|
||||||
'embedded': embedded,
|
)
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -26,38 +26,35 @@ result_template = 'key-value.html'
|
||||||
|
|
||||||
_client = None
|
_client = None
|
||||||
|
|
||||||
|
|
||||||
def init(_):
|
def init(_):
|
||||||
connect()
|
connect()
|
||||||
|
|
||||||
|
|
||||||
def connect():
|
def connect():
|
||||||
global _client # pylint: disable=global-statement
|
global _client # pylint: disable=global-statement
|
||||||
kwargs = { 'port': port }
|
kwargs = {'port': port}
|
||||||
if username:
|
if username:
|
||||||
kwargs['username'] = username
|
kwargs['username'] = username
|
||||||
if password:
|
if password:
|
||||||
kwargs['password'] = password
|
kwargs['password'] = password
|
||||||
_client = MongoClient(host, **kwargs)[database][collection]
|
_client = MongoClient(host, **kwargs)[database][collection]
|
||||||
|
|
||||||
|
|
||||||
def search(query, params):
|
def search(query, params):
|
||||||
results = []
|
results = []
|
||||||
if exact_match_only:
|
if exact_match_only:
|
||||||
q = { '$eq': query }
|
q = {'$eq': query}
|
||||||
else:
|
else:
|
||||||
_re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M )
|
_re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M)
|
||||||
q = { '$regex': _re }
|
q = {'$regex': _re}
|
||||||
|
|
||||||
query = _client.find(
|
query = _client.find({key: q}).skip((params['pageno'] - 1) * results_per_page).limit(results_per_page)
|
||||||
{key: q}
|
|
||||||
).skip(
|
|
||||||
( params['pageno'] -1 ) * results_per_page
|
|
||||||
).limit(
|
|
||||||
results_per_page
|
|
||||||
)
|
|
||||||
|
|
||||||
results.append({ 'number_of_results': query.count() })
|
results.append({'number_of_results': query.count()})
|
||||||
for r in query:
|
for r in query:
|
||||||
del r['_id']
|
del r['_id']
|
||||||
r = { str(k):str(v) for k,v in r.items() }
|
r = {str(k): str(v) for k, v in r.items()}
|
||||||
r['template'] = result_template
|
r['template'] = result_template
|
||||||
results.append(r)
|
results.append(r)
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ paging = True
|
||||||
result_template = 'key-value.html'
|
result_template = 'key-value.html'
|
||||||
_connection = None
|
_connection = None
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings):
|
def init(engine_settings):
|
||||||
global _connection # pylint: disable=global-statement
|
global _connection # pylint: disable=global-statement
|
||||||
|
|
||||||
|
@ -30,13 +31,14 @@ def init(engine_settings):
|
||||||
raise ValueError('only SELECT query is supported')
|
raise ValueError('only SELECT query is supported')
|
||||||
|
|
||||||
_connection = mysql.connector.connect(
|
_connection = mysql.connector.connect(
|
||||||
database = database,
|
database=database,
|
||||||
user = username,
|
user=username,
|
||||||
password = password,
|
password=password,
|
||||||
host = host,
|
host=host,
|
||||||
auth_plugin=auth_plugin,
|
auth_plugin=auth_plugin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def search(query, params):
|
def search(query, params):
|
||||||
query_params = {'query': query}
|
query_params = {'query': query}
|
||||||
query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit)
|
query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit)
|
||||||
|
@ -46,6 +48,7 @@ def search(query, params):
|
||||||
|
|
||||||
return _fetch_results(cur)
|
return _fetch_results(cur)
|
||||||
|
|
||||||
|
|
||||||
def _fetch_results(cur):
|
def _fetch_results(cur):
|
||||||
results = []
|
results = []
|
||||||
for res in cur:
|
for res in cur:
|
||||||
|
|
|
@ -98,14 +98,18 @@ def response(resp):
|
||||||
content = 'Category: "{category}". Downloaded {downloads} times.'
|
content = 'Category: "{category}". Downloaded {downloads} times.'
|
||||||
content = content.format(category=category, downloads=downloads)
|
content = content.format(category=category, downloads=downloads)
|
||||||
|
|
||||||
results.append({'url': href,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': content,
|
'url': href,
|
||||||
'seed': seed,
|
'title': title,
|
||||||
'leech': leech,
|
'content': content,
|
||||||
'filesize': filesize,
|
'seed': seed,
|
||||||
'torrentfile': torrent_link,
|
'leech': leech,
|
||||||
'magnetlink': magnet_link,
|
'filesize': filesize,
|
||||||
'template': 'torrent.html'})
|
'torrentfile': torrent_link,
|
||||||
|
'magnetlink': magnet_link,
|
||||||
|
'template': 'torrent.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -151,10 +151,12 @@ def response(resp):
|
||||||
user_language = resp.search_params['language']
|
user_language = resp.search_params['language']
|
||||||
|
|
||||||
if resp.search_params['route']:
|
if resp.search_params['route']:
|
||||||
results.append({
|
results.append(
|
||||||
'answer': gettext('Get directions'),
|
{
|
||||||
'url': route_url.format(*resp.search_params['route'].groups()),
|
'answer': gettext('Get directions'),
|
||||||
})
|
'url': route_url.format(*resp.search_params['route'].groups()),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
fetch_wikidata(nominatim_json, user_language)
|
fetch_wikidata(nominatim_json, user_language)
|
||||||
|
|
||||||
|
@ -170,26 +172,26 @@ def response(resp):
|
||||||
links, link_keys = get_links(result, user_language)
|
links, link_keys = get_links(result, user_language)
|
||||||
data = get_data(result, user_language, link_keys)
|
data = get_data(result, user_language, link_keys)
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'template': 'map.html',
|
{
|
||||||
'title': title,
|
'template': 'map.html',
|
||||||
'address': address,
|
'title': title,
|
||||||
'address_label': get_key_label('addr', user_language),
|
'address': address,
|
||||||
'url': url,
|
'address_label': get_key_label('addr', user_language),
|
||||||
'osm': osm,
|
'url': url,
|
||||||
'geojson': geojson,
|
'osm': osm,
|
||||||
'img_src': img_src,
|
'geojson': geojson,
|
||||||
'links': links,
|
'img_src': img_src,
|
||||||
'data': data,
|
'links': links,
|
||||||
'type': get_tag_label(
|
'data': data,
|
||||||
result.get('category'), result.get('type', ''), user_language
|
'type': get_tag_label(result.get('category'), result.get('type', ''), user_language),
|
||||||
),
|
'type_icon': result.get('icon'),
|
||||||
'type_icon': result.get('icon'),
|
'content': '',
|
||||||
'content': '',
|
'longitude': result['lon'],
|
||||||
'longitude': result['lon'],
|
'latitude': result['lat'],
|
||||||
'latitude': result['lat'],
|
'boundingbox': result['boundingbox'],
|
||||||
'boundingbox': result['boundingbox'],
|
}
|
||||||
})
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@ -270,9 +272,9 @@ def get_title_address(result):
|
||||||
# https://github.com/osm-search/Nominatim/issues/1662
|
# https://github.com/osm-search/Nominatim/issues/1662
|
||||||
address_name = address_raw.get('address29')
|
address_name = address_raw.get('address29')
|
||||||
else:
|
else:
|
||||||
address_name = address_raw.get(result['category'])
|
address_name = address_raw.get(result['category'])
|
||||||
elif result['type'] in address_raw:
|
elif result['type'] in address_raw:
|
||||||
address_name = address_raw.get(result['type'])
|
address_name = address_raw.get(result['type'])
|
||||||
|
|
||||||
# add rest of adressdata, if something is already found
|
# add rest of adressdata, if something is already found
|
||||||
if address_name:
|
if address_name:
|
||||||
|
@ -297,8 +299,7 @@ def get_title_address(result):
|
||||||
|
|
||||||
|
|
||||||
def get_url_osm_geojson(result):
|
def get_url_osm_geojson(result):
|
||||||
"""Get url, osm and geojson
|
"""Get url, osm and geojson"""
|
||||||
"""
|
|
||||||
osm_type = result.get('osm_type', result.get('type'))
|
osm_type = result.get('osm_type', result.get('type'))
|
||||||
if 'osm_id' not in result:
|
if 'osm_id' not in result:
|
||||||
# see https://github.com/osm-search/Nominatim/issues/1521
|
# see https://github.com/osm-search/Nominatim/issues/1521
|
||||||
|
@ -349,11 +350,13 @@ def get_links(result, user_language):
|
||||||
url, url_label = mapping_function(raw_value)
|
url, url_label = mapping_function(raw_value)
|
||||||
if url.startswith('https://wikidata.org'):
|
if url.startswith('https://wikidata.org'):
|
||||||
url_label = result.get('wikidata', {}).get('itemLabel') or url_label
|
url_label = result.get('wikidata', {}).get('itemLabel') or url_label
|
||||||
links.append({
|
links.append(
|
||||||
'label': get_key_label(k, user_language),
|
{
|
||||||
'url': url,
|
'label': get_key_label(k, user_language),
|
||||||
'url_label': url_label,
|
'url': url,
|
||||||
})
|
'url_label': url_label,
|
||||||
|
}
|
||||||
|
)
|
||||||
link_keys.add(k)
|
link_keys.add(k)
|
||||||
return links, link_keys
|
return links, link_keys
|
||||||
|
|
||||||
|
@ -373,11 +376,13 @@ def get_data(result, user_language, ignore_keys):
|
||||||
continue
|
continue
|
||||||
k_label = get_key_label(k, user_language)
|
k_label = get_key_label(k, user_language)
|
||||||
if k_label:
|
if k_label:
|
||||||
data.append({
|
data.append(
|
||||||
'label': k_label,
|
{
|
||||||
'key': k,
|
'label': k_label,
|
||||||
'value': v,
|
'key': k,
|
||||||
})
|
'value': v,
|
||||||
|
}
|
||||||
|
)
|
||||||
data.sort(key=lambda entry: (get_key_rank(entry['key']), entry['label']))
|
data.sort(key=lambda entry: (get_key_rank(entry['key']), entry['label']))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
@ -34,10 +34,7 @@ def request(query, params):
|
||||||
|
|
||||||
params['url'] = pdbe_solr_url
|
params['url'] = pdbe_solr_url
|
||||||
params['method'] = 'POST'
|
params['method'] = 'POST'
|
||||||
params['data'] = {
|
params['data'] = {'q': query, 'wt': "json"} # request response in parsable format
|
||||||
'q': query,
|
|
||||||
'wt': "json" # request response in parsable format
|
|
||||||
}
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,12 +50,21 @@ def construct_body(result):
|
||||||
if result['journal']:
|
if result['journal']:
|
||||||
content = content.format(
|
content = content.format(
|
||||||
title=result['citation_title'],
|
title=result['citation_title'],
|
||||||
authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'],
|
authors=result['entry_author_list'][0],
|
||||||
page=result['journal_page'], year=result['citation_year'])
|
journal=result['journal'],
|
||||||
|
volume=result['journal_volume'],
|
||||||
|
page=result['journal_page'],
|
||||||
|
year=result['citation_year'],
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
content = content.format(
|
content = content.format(
|
||||||
title=result['citation_title'],
|
title=result['citation_title'],
|
||||||
authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year'])
|
authors=result['entry_author_list'][0],
|
||||||
|
journal='',
|
||||||
|
volume='',
|
||||||
|
page='',
|
||||||
|
year=result['release_year'],
|
||||||
|
)
|
||||||
img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
|
img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
|
||||||
except (KeyError):
|
except (KeyError):
|
||||||
content = None
|
content = None
|
||||||
|
@ -96,20 +102,21 @@ def response(resp):
|
||||||
# since we can't construct a proper body from the response, we'll make up our own
|
# since we can't construct a proper body from the response, we'll make up our own
|
||||||
msg_superseded = gettext("This entry has been superseded by")
|
msg_superseded = gettext("This entry has been superseded by")
|
||||||
content = '{msg_superseded}: {url} ({pdb_id})'.format(
|
content = '{msg_superseded}: {url} ({pdb_id})'.format(
|
||||||
msg_superseded=msg_superseded,
|
msg_superseded=msg_superseded, url=superseded_url, pdb_id=result['superseded_by']
|
||||||
url=superseded_url,
|
)
|
||||||
pdb_id=result['superseded_by'])
|
|
||||||
|
|
||||||
# obsoleted entries don't have preview images
|
# obsoleted entries don't have preview images
|
||||||
img_src = None
|
img_src = None
|
||||||
else:
|
else:
|
||||||
title, content, img_src = construct_body(result)
|
title, content, img_src = construct_body(result)
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': pdbe_entry_url.format(pdb_id=result['pdb_id']),
|
{
|
||||||
'title': title,
|
'url': pdbe_entry_url.format(pdb_id=result['pdb_id']),
|
||||||
'content': content,
|
'title': title,
|
||||||
'img_src': img_src
|
'content': content,
|
||||||
})
|
'img_src': img_src,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -36,9 +36,7 @@ def request(query, params):
|
||||||
language = params["language"].split("-")[0]
|
language = params["language"].split("-")[0]
|
||||||
if "all" != language and language in supported_languages:
|
if "all" != language and language in supported_languages:
|
||||||
query_dict["languageOneOf"] = language
|
query_dict["languageOneOf"] = language
|
||||||
params["url"] = search_url.format(
|
params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
|
||||||
query=urlencode(query_dict), pageno=pageno
|
|
||||||
)
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,9 +33,7 @@ supported_languages = ['de', 'en', 'fr', 'it']
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = base_url +\
|
params['url'] = base_url + search_string.format(query=urlencode({'q': query}), limit=number_of_results)
|
||||||
search_string.format(query=urlencode({'q': query}),
|
|
||||||
limit=number_of_results)
|
|
||||||
|
|
||||||
if params['language'] != 'all':
|
if params['language'] != 'all':
|
||||||
language = params['language'].split('_')[0]
|
language = params['language'].split('_')[0]
|
||||||
|
@ -75,59 +73,71 @@ def response(resp):
|
||||||
# continue if invalide osm-type
|
# continue if invalide osm-type
|
||||||
continue
|
continue
|
||||||
|
|
||||||
url = result_base_url.format(osm_type=osm_type,
|
url = result_base_url.format(osm_type=osm_type, osm_id=properties.get('osm_id'))
|
||||||
osm_id=properties.get('osm_id'))
|
|
||||||
|
|
||||||
osm = {'type': osm_type,
|
osm = {'type': osm_type, 'id': properties.get('osm_id')}
|
||||||
'id': properties.get('osm_id')}
|
|
||||||
|
|
||||||
geojson = r.get('geometry')
|
geojson = r.get('geometry')
|
||||||
|
|
||||||
if properties.get('extent'):
|
if properties.get('extent'):
|
||||||
boundingbox = [properties.get('extent')[3],
|
boundingbox = [
|
||||||
properties.get('extent')[1],
|
properties.get('extent')[3],
|
||||||
properties.get('extent')[0],
|
properties.get('extent')[1],
|
||||||
properties.get('extent')[2]]
|
properties.get('extent')[0],
|
||||||
|
properties.get('extent')[2],
|
||||||
|
]
|
||||||
else:
|
else:
|
||||||
# TODO: better boundingbox calculation
|
# TODO: better boundingbox calculation
|
||||||
boundingbox = [geojson['coordinates'][1],
|
boundingbox = [
|
||||||
geojson['coordinates'][1],
|
geojson['coordinates'][1],
|
||||||
geojson['coordinates'][0],
|
geojson['coordinates'][1],
|
||||||
geojson['coordinates'][0]]
|
geojson['coordinates'][0],
|
||||||
|
geojson['coordinates'][0],
|
||||||
|
]
|
||||||
|
|
||||||
# address calculation
|
# address calculation
|
||||||
address = {}
|
address = {}
|
||||||
|
|
||||||
# get name
|
# get name
|
||||||
if properties.get('osm_key') == 'amenity' or\
|
if (
|
||||||
properties.get('osm_key') == 'shop' or\
|
properties.get('osm_key') == 'amenity'
|
||||||
properties.get('osm_key') == 'tourism' or\
|
or properties.get('osm_key') == 'shop'
|
||||||
properties.get('osm_key') == 'leisure':
|
or properties.get('osm_key') == 'tourism'
|
||||||
|
or properties.get('osm_key') == 'leisure'
|
||||||
|
):
|
||||||
address = {'name': properties.get('name')}
|
address = {'name': properties.get('name')}
|
||||||
|
|
||||||
# add rest of adressdata, if something is already found
|
# add rest of adressdata, if something is already found
|
||||||
if address.get('name'):
|
if address.get('name'):
|
||||||
address.update({'house_number': properties.get('housenumber'),
|
address.update(
|
||||||
'road': properties.get('street'),
|
{
|
||||||
'locality': properties.get('city',
|
'house_number': properties.get('housenumber'),
|
||||||
properties.get('town', # noqa
|
'road': properties.get('street'),
|
||||||
properties.get('village'))), # noqa
|
'locality': properties.get(
|
||||||
'postcode': properties.get('postcode'),
|
'city', properties.get('town', properties.get('village')) # noqa
|
||||||
'country': properties.get('country')})
|
), # noqa
|
||||||
|
'postcode': properties.get('postcode'),
|
||||||
|
'country': properties.get('country'),
|
||||||
|
}
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
address = None
|
address = None
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'template': 'map.html',
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': '',
|
'template': 'map.html',
|
||||||
'longitude': geojson['coordinates'][0],
|
'title': title,
|
||||||
'latitude': geojson['coordinates'][1],
|
'content': '',
|
||||||
'boundingbox': boundingbox,
|
'longitude': geojson['coordinates'][0],
|
||||||
'geojson': geojson,
|
'latitude': geojson['coordinates'][1],
|
||||||
'address': address,
|
'boundingbox': boundingbox,
|
||||||
'osm': osm,
|
'geojson': geojson,
|
||||||
'url': url})
|
'address': address,
|
||||||
|
'osm': osm,
|
||||||
|
'url': url,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -40,17 +40,14 @@ trackers = [
|
||||||
]
|
]
|
||||||
|
|
||||||
# piratebay specific type-definitions
|
# piratebay specific type-definitions
|
||||||
search_types = {"files": "0",
|
search_types = {"files": "0", "music": "100", "videos": "200"}
|
||||||
"music": "100",
|
|
||||||
"videos": "200"}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
search_type = search_types.get(params["category"], "0")
|
search_type = search_types.get(params["category"], "0")
|
||||||
|
|
||||||
params["url"] = search_url.format(search_term=quote(query),
|
params["url"] = search_url.format(search_term=quote(query), search_type=search_type)
|
||||||
search_type=search_type)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -68,8 +65,9 @@ def response(resp):
|
||||||
# parse results
|
# parse results
|
||||||
for result in search_res:
|
for result in search_res:
|
||||||
link = url + "description.php?id=" + result["id"]
|
link = url + "description.php?id=" + result["id"]
|
||||||
magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
|
magnetlink = (
|
||||||
+ "&tr=" + "&tr=".join(trackers)
|
"magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers)
|
||||||
|
)
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
"url": link,
|
"url": link,
|
||||||
|
@ -77,7 +75,7 @@ def response(resp):
|
||||||
"seed": result["seeders"],
|
"seed": result["seeders"],
|
||||||
"leech": result["leechers"],
|
"leech": result["leechers"],
|
||||||
"magnetlink": magnetlink,
|
"magnetlink": magnetlink,
|
||||||
"template": "torrent.html"
|
"template": "torrent.html",
|
||||||
}
|
}
|
||||||
|
|
||||||
# extract and convert creation date
|
# extract and convert creation date
|
||||||
|
|
|
@ -20,6 +20,7 @@ paging = True
|
||||||
result_template = 'key-value.html'
|
result_template = 'key-value.html'
|
||||||
_connection = None
|
_connection = None
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings):
|
def init(engine_settings):
|
||||||
global _connection # pylint: disable=global-statement
|
global _connection # pylint: disable=global-statement
|
||||||
|
|
||||||
|
@ -30,25 +31,24 @@ def init(engine_settings):
|
||||||
raise ValueError('only SELECT query is supported')
|
raise ValueError('only SELECT query is supported')
|
||||||
|
|
||||||
_connection = psycopg2.connect(
|
_connection = psycopg2.connect(
|
||||||
database = database,
|
database=database,
|
||||||
user = username,
|
user=username,
|
||||||
password = password,
|
password=password,
|
||||||
host = host,
|
host=host,
|
||||||
port = port,
|
port=port,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def search(query, params):
|
def search(query, params):
|
||||||
query_params = {'query': query}
|
query_params = {'query': query}
|
||||||
query_to_run = (
|
query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit)
|
||||||
query_str
|
|
||||||
+ ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit)
|
|
||||||
)
|
|
||||||
|
|
||||||
with _connection:
|
with _connection:
|
||||||
with _connection.cursor() as cur:
|
with _connection.cursor() as cur:
|
||||||
cur.execute(query_to_run, query_params)
|
cur.execute(query_to_run, query_params)
|
||||||
return _fetch_results(cur)
|
return _fetch_results(cur)
|
||||||
|
|
||||||
|
|
||||||
def _fetch_results(cur):
|
def _fetch_results(cur):
|
||||||
results = []
|
results = []
|
||||||
titles = []
|
titles = []
|
||||||
|
|
|
@ -15,7 +15,7 @@ about = {
|
||||||
"wikidata_id": 'Q1540899',
|
"wikidata_id": 'Q1540899',
|
||||||
"official_api_documentation": {
|
"official_api_documentation": {
|
||||||
'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/',
|
'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/',
|
||||||
'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/'
|
'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/',
|
||||||
},
|
},
|
||||||
"use_official_api": True,
|
"use_official_api": True,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
|
@ -24,8 +24,9 @@ about = {
|
||||||
|
|
||||||
categories = ['science']
|
categories = ['science']
|
||||||
|
|
||||||
base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'\
|
base_url = (
|
||||||
+ '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
|
'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
|
||||||
|
)
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
number_of_results = 10
|
number_of_results = 10
|
||||||
|
@ -36,9 +37,7 @@ def request(query, params):
|
||||||
# basic search
|
# basic search
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
offset = (params['pageno'] - 1) * number_of_results
|
||||||
|
|
||||||
string_args = dict(query=urlencode({'term': query}),
|
string_args = dict(query=urlencode({'term': query}), offset=offset, hits=number_of_results)
|
||||||
offset=offset,
|
|
||||||
hits=number_of_results)
|
|
||||||
|
|
||||||
params['url'] = base_url.format(**string_args)
|
params['url'] = base_url.format(**string_args)
|
||||||
|
|
||||||
|
@ -49,8 +48,9 @@ def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
# First retrieve notice of each result
|
# First retrieve notice of each result
|
||||||
pubmed_retrieve_api_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'\
|
pubmed_retrieve_api_url = (
|
||||||
+ 'db=pubmed&retmode=xml&id={pmids_string}'
|
'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + 'db=pubmed&retmode=xml&id={pmids_string}'
|
||||||
|
)
|
||||||
|
|
||||||
pmids_results = etree.XML(resp.content)
|
pmids_results = etree.XML(resp.content)
|
||||||
pmids = pmids_results.xpath('//eSearchResult/IdList/Id')
|
pmids = pmids_results.xpath('//eSearchResult/IdList/Id')
|
||||||
|
@ -88,14 +88,17 @@ def response(resp):
|
||||||
content = content[0:300] + "..."
|
content = content[0:300] + "..."
|
||||||
# TODO: center snippet on query term
|
# TODO: center snippet on query term
|
||||||
|
|
||||||
res_dict = {'url': url,
|
res_dict = {'url': url, 'title': title, 'content': content}
|
||||||
'title': title,
|
|
||||||
'content': content}
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
publishedDate = datetime.strptime(entry.xpath('.//DateCreated/Year')[0].text
|
publishedDate = datetime.strptime(
|
||||||
+ '-' + entry.xpath('.//DateCreated/Month')[0].text
|
entry.xpath('.//DateCreated/Year')[0].text
|
||||||
+ '-' + entry.xpath('.//DateCreated/Day')[0].text, '%Y-%m-%d')
|
+ '-'
|
||||||
|
+ entry.xpath('.//DateCreated/Month')[0].text
|
||||||
|
+ '-'
|
||||||
|
+ entry.xpath('.//DateCreated/Day')[0].text,
|
||||||
|
'%Y-%m-%d',
|
||||||
|
)
|
||||||
res_dict['publishedDate'] = publishedDate
|
res_dict['publishedDate'] = publishedDate
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -61,6 +61,7 @@ category_to_keyword = {
|
||||||
# search-url
|
# search-url
|
||||||
url = 'https://api.qwant.com/v3/search/{keyword}?{query}&count={count}&offset={offset}'
|
url = 'https://api.qwant.com/v3/search/{keyword}?{query}&count={count}&offset={offset}'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Qwant search request"""
|
"""Qwant search request"""
|
||||||
keyword = category_to_keyword[categories[0]]
|
keyword = category_to_keyword[categories[0]]
|
||||||
|
@ -77,10 +78,10 @@ def request(query, params):
|
||||||
offset = min(offset, 40)
|
offset = min(offset, 40)
|
||||||
|
|
||||||
params['url'] = url.format(
|
params['url'] = url.format(
|
||||||
keyword = keyword,
|
keyword=keyword,
|
||||||
query = urlencode({'q': query}),
|
query=urlencode({'q': query}),
|
||||||
offset = offset,
|
offset=offset,
|
||||||
count = count,
|
count=count,
|
||||||
)
|
)
|
||||||
|
|
||||||
# add language tag
|
# add language tag
|
||||||
|
@ -111,7 +112,14 @@ def response(resp):
|
||||||
|
|
||||||
# check for an API error
|
# check for an API error
|
||||||
if search_results.get('status') != 'success':
|
if search_results.get('status') != 'success':
|
||||||
msg = ",".join(data.get('message', ['unknown', ]))
|
msg = ",".join(
|
||||||
|
data.get(
|
||||||
|
'message',
|
||||||
|
[
|
||||||
|
'unknown',
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
raise SearxEngineAPIException('API error::' + msg)
|
raise SearxEngineAPIException('API error::' + msg)
|
||||||
|
|
||||||
# raise for other errors
|
# raise for other errors
|
||||||
|
@ -128,7 +136,7 @@ def response(resp):
|
||||||
# result['items'].
|
# result['items'].
|
||||||
mainline = data.get('result', {}).get('items', [])
|
mainline = data.get('result', {}).get('items', [])
|
||||||
mainline = [
|
mainline = [
|
||||||
{'type' : keyword, 'items' : mainline },
|
{'type': keyword, 'items': mainline},
|
||||||
]
|
]
|
||||||
|
|
||||||
# return empty array if there are no results
|
# return empty array if there are no results
|
||||||
|
@ -153,11 +161,13 @@ def response(resp):
|
||||||
|
|
||||||
if mainline_type == 'web':
|
if mainline_type == 'web':
|
||||||
content = item['desc']
|
content = item['desc']
|
||||||
results.append({
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'url': res_url,
|
'title': title,
|
||||||
'content': content,
|
'url': res_url,
|
||||||
})
|
'content': content,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
elif mainline_type == 'news':
|
elif mainline_type == 'news':
|
||||||
|
|
||||||
|
@ -168,23 +178,27 @@ def response(resp):
|
||||||
img_src = None
|
img_src = None
|
||||||
if news_media:
|
if news_media:
|
||||||
img_src = news_media[0].get('pict', {}).get('url', None)
|
img_src = news_media[0].get('pict', {}).get('url', None)
|
||||||
results.append({
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'url': res_url,
|
'title': title,
|
||||||
'publishedDate': pub_date,
|
'url': res_url,
|
||||||
'img_src': img_src,
|
'publishedDate': pub_date,
|
||||||
})
|
'img_src': img_src,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
elif mainline_type == 'images':
|
elif mainline_type == 'images':
|
||||||
thumbnail = item['thumbnail']
|
thumbnail = item['thumbnail']
|
||||||
img_src = item['media']
|
img_src = item['media']
|
||||||
results.append({
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'url': res_url,
|
'title': title,
|
||||||
'template': 'images.html',
|
'url': res_url,
|
||||||
'thumbnail_src': thumbnail,
|
'template': 'images.html',
|
||||||
'img_src': img_src,
|
'thumbnail_src': thumbnail,
|
||||||
})
|
'img_src': img_src,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
elif mainline_type == 'videos':
|
elif mainline_type == 'videos':
|
||||||
# some videos do not have a description: while qwant-video
|
# some videos do not have a description: while qwant-video
|
||||||
|
@ -208,19 +222,18 @@ def response(resp):
|
||||||
thumbnail = item['thumbnail']
|
thumbnail = item['thumbnail']
|
||||||
# from some locations (DE and others?) the s2 link do
|
# from some locations (DE and others?) the s2 link do
|
||||||
# response a 'Please wait ..' but does not deliver the thumbnail
|
# response a 'Please wait ..' but does not deliver the thumbnail
|
||||||
thumbnail = thumbnail.replace(
|
thumbnail = thumbnail.replace('https://s2.qwant.com', 'https://s1.qwant.com', 1)
|
||||||
'https://s2.qwant.com',
|
results.append(
|
||||||
'https://s1.qwant.com', 1
|
{
|
||||||
|
'title': title,
|
||||||
|
'url': res_url,
|
||||||
|
'content': content,
|
||||||
|
'publishedDate': pub_date,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'template': 'videos.html',
|
||||||
|
'length': length,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
results.append({
|
|
||||||
'title': title,
|
|
||||||
'url': res_url,
|
|
||||||
'content': content,
|
|
||||||
'publishedDate': pub_date,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'template': 'videos.html',
|
|
||||||
'length': length,
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@ -229,8 +242,8 @@ def response(resp):
|
||||||
def _fetch_supported_languages(resp):
|
def _fetch_supported_languages(resp):
|
||||||
# list of regions is embedded in page as a js object
|
# list of regions is embedded in page as a js object
|
||||||
response_text = resp.text
|
response_text = resp.text
|
||||||
response_text = response_text[response_text.find('INITIAL_PROPS'):]
|
response_text = response_text[response_text.find('INITIAL_PROPS') :]
|
||||||
response_text = response_text[response_text.find('{'):response_text.find('</script>')]
|
response_text = response_text[response_text.find('{') : response_text.find('</script>')]
|
||||||
|
|
||||||
regions_json = loads(response_text)
|
regions_json = loads(response_text)
|
||||||
|
|
||||||
|
|
|
@ -28,18 +28,12 @@ mount_prefix = None
|
||||||
dl_prefix = None
|
dl_prefix = None
|
||||||
|
|
||||||
# embedded
|
# embedded
|
||||||
embedded_url = '<{ttype} controls height="166px" ' +\
|
embedded_url = '<{ttype} controls height="166px" ' + 'src="{url}" type="{mtype}"></{ttype}>'
|
||||||
'src="{url}" type="{mtype}"></{ttype}>'
|
|
||||||
|
|
||||||
|
|
||||||
# helper functions
|
# helper functions
|
||||||
def get_time_range(time_range):
|
def get_time_range(time_range):
|
||||||
sw = {
|
sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365}
|
||||||
'day': 1,
|
|
||||||
'week': 7,
|
|
||||||
'month': 30,
|
|
||||||
'year': 365
|
|
||||||
}
|
|
||||||
|
|
||||||
offset = sw.get(time_range, 0)
|
offset = sw.get(time_range, 0)
|
||||||
if not offset:
|
if not offset:
|
||||||
|
@ -52,11 +46,9 @@ def get_time_range(time_range):
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
search_after = get_time_range(params['time_range'])
|
search_after = get_time_range(params['time_range'])
|
||||||
search_url = base_url + 'json?{query}&highlight=0'
|
search_url = base_url + 'json?{query}&highlight=0'
|
||||||
params['url'] = search_url.format(query=urlencode({
|
params['url'] = search_url.format(
|
||||||
'query': query,
|
query=urlencode({'query': query, 'page': params['pageno'], 'after': search_after, 'dir': search_dir})
|
||||||
'page': params['pageno'],
|
)
|
||||||
'after': search_after,
|
|
||||||
'dir': search_dir}))
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -76,10 +68,7 @@ def response(resp):
|
||||||
content = '{}'.format(result['snippet'])
|
content = '{}'.format(result['snippet'])
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
item = {'url': url,
|
item = {'url': url, 'title': title, 'content': content, 'template': 'files.html'}
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'template': 'files.html'}
|
|
||||||
|
|
||||||
if result['size']:
|
if result['size']:
|
||||||
item['size'] = int(result['size'])
|
item['size'] = int(result['size'])
|
||||||
|
@ -96,9 +85,8 @@ def response(resp):
|
||||||
|
|
||||||
if mtype in ['audio', 'video']:
|
if mtype in ['audio', 'video']:
|
||||||
item['embedded'] = embedded_url.format(
|
item['embedded'] = embedded_url.format(
|
||||||
ttype=mtype,
|
ttype=mtype, url=quote(url.encode('utf8'), '/:'), mtype=result['mtype']
|
||||||
url=quote(url.encode('utf8'), '/:'),
|
)
|
||||||
mtype=result['mtype'])
|
|
||||||
|
|
||||||
if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']:
|
if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']:
|
||||||
item['img_src'] = url
|
item['img_src'] = url
|
||||||
|
|
|
@ -52,10 +52,7 @@ def response(resp):
|
||||||
data = post['data']
|
data = post['data']
|
||||||
|
|
||||||
# extract post information
|
# extract post information
|
||||||
params = {
|
params = {'url': urljoin(base_url, data['permalink']), 'title': data['title']}
|
||||||
'url': urljoin(base_url, data['permalink']),
|
|
||||||
'title': data['title']
|
|
||||||
}
|
|
||||||
|
|
||||||
# if thumbnail field contains a valid URL, we need to change template
|
# if thumbnail field contains a valid URL, we need to change template
|
||||||
thumbnail = data['thumbnail']
|
thumbnail = data['thumbnail']
|
||||||
|
|
|
@ -20,16 +20,19 @@ result_template = 'key-value.html'
|
||||||
exact_match_only = True
|
exact_match_only = True
|
||||||
|
|
||||||
_redis_client = None
|
_redis_client = None
|
||||||
|
|
||||||
|
|
||||||
def init(_engine_settings):
|
def init(_engine_settings):
|
||||||
global _redis_client # pylint: disable=global-statement
|
global _redis_client # pylint: disable=global-statement
|
||||||
_redis_client = redis.StrictRedis(
|
_redis_client = redis.StrictRedis(
|
||||||
host = host,
|
host=host,
|
||||||
port = port,
|
port=port,
|
||||||
db = db,
|
db=db,
|
||||||
password = password or None,
|
password=password or None,
|
||||||
decode_responses = True,
|
decode_responses=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def search(query, _params):
|
def search(query, _params):
|
||||||
if not exact_match_only:
|
if not exact_match_only:
|
||||||
return search_keys(query)
|
return search_keys(query)
|
||||||
|
@ -42,21 +45,20 @@ def search(query, _params):
|
||||||
if ' ' in query:
|
if ' ' in query:
|
||||||
qset, rest = query.split(' ', 1)
|
qset, rest = query.split(' ', 1)
|
||||||
ret = []
|
ret = []
|
||||||
for res in _redis_client.hscan_iter(
|
for res in _redis_client.hscan_iter(qset, match='*{}*'.format(rest)):
|
||||||
qset, match='*{}*'.format(rest)
|
ret.append(
|
||||||
):
|
{
|
||||||
ret.append({
|
res[0]: res[1],
|
||||||
res[0]: res[1],
|
'template': result_template,
|
||||||
'template': result_template,
|
}
|
||||||
})
|
)
|
||||||
return ret
|
return ret
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def search_keys(query):
|
def search_keys(query):
|
||||||
ret = []
|
ret = []
|
||||||
for key in _redis_client.scan_iter(
|
for key in _redis_client.scan_iter(match='*{}*'.format(query)):
|
||||||
match='*{}*'.format(query)
|
|
||||||
):
|
|
||||||
key_type = _redis_client.type(key)
|
key_type = _redis_client.type(key)
|
||||||
res = None
|
res = None
|
||||||
|
|
||||||
|
|
|
@ -68,14 +68,16 @@ def response(resp):
|
||||||
else:
|
else:
|
||||||
content = f"{views} views - {rumbles} rumbles"
|
content = f"{views} views - {rumbles} rumbles"
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': url,
|
{
|
||||||
'title': title,
|
'url': url,
|
||||||
'content': content,
|
'title': title,
|
||||||
'author': author,
|
'content': content,
|
||||||
'length': length,
|
'author': author,
|
||||||
'template': 'videos.html',
|
'length': length,
|
||||||
'publishedDate': fixed_date,
|
'template': 'videos.html',
|
||||||
'thumbnail': thumbnail,
|
'publishedDate': fixed_date,
|
||||||
})
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
)
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -32,12 +32,16 @@ def request(query, params):
|
||||||
params['url'] = search_url
|
params['url'] = search_url
|
||||||
params['method'] = 'POST'
|
params['method'] = 'POST'
|
||||||
params['headers']['Content-type'] = "application/json"
|
params['headers']['Content-type'] = "application/json"
|
||||||
params['data'] = dumps({"query": query,
|
params['data'] = dumps(
|
||||||
"searchField": "ALL",
|
{
|
||||||
"sortDirection": "ASC",
|
"query": query,
|
||||||
"sortOrder": "RELEVANCY",
|
"searchField": "ALL",
|
||||||
"page": params['pageno'],
|
"sortDirection": "ASC",
|
||||||
"pageSize": page_size})
|
"sortOrder": "RELEVANCY",
|
||||||
|
"page": params['pageno'],
|
||||||
|
"pageSize": page_size,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -69,11 +73,15 @@ def response(resp):
|
||||||
content = result['highlights'][0]['value']
|
content = result['highlights'][0]['value']
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url + 'structure/' + result['id'],
|
results.append(
|
||||||
'title': result['label'],
|
{
|
||||||
# 'thumbnail': thumbnail,
|
'url': url + 'structure/' + result['id'],
|
||||||
'img_src': thumbnail,
|
'title': result['label'],
|
||||||
'content': html_to_text(content)})
|
# 'thumbnail': thumbnail,
|
||||||
|
'img_src': thumbnail,
|
||||||
|
'content': html_to_text(content),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -25,10 +25,7 @@ url = 'https://searchcode.com/'
|
||||||
search_url = url + 'api/codesearch_I/?{query}&p={pageno}'
|
search_url = url + 'api/codesearch_I/?{query}&p={pageno}'
|
||||||
|
|
||||||
# special code-endings which are not recognised by the file ending
|
# special code-endings which are not recognised by the file ending
|
||||||
code_endings = {'cs': 'c#',
|
code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'}
|
||||||
'h': 'c',
|
|
||||||
'hpp': 'cpp',
|
|
||||||
'cxx': 'cpp'}
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
|
@ -55,17 +52,21 @@ def response(resp):
|
||||||
lines[int(line)] = code
|
lines[int(line)] = code
|
||||||
|
|
||||||
code_language = code_endings.get(
|
code_language = code_endings.get(
|
||||||
result['filename'].split('.')[-1].lower(),
|
result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower()
|
||||||
result['filename'].split('.')[-1].lower())
|
)
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': href,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': '',
|
'url': href,
|
||||||
'repository': repo,
|
'title': title,
|
||||||
'codelines': sorted(lines.items()),
|
'content': '',
|
||||||
'code_language': code_language,
|
'repository': repo,
|
||||||
'template': 'code.html'})
|
'codelines': sorted(lines.items()),
|
||||||
|
'code_language': code_language,
|
||||||
|
'template': 'code.html',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -37,7 +37,7 @@ def request(query, params):
|
||||||
'language': params['language'],
|
'language': params['language'],
|
||||||
'time_range': params['time_range'],
|
'time_range': params['time_range'],
|
||||||
'category': params['category'],
|
'category': params['category'],
|
||||||
'format': 'json'
|
'format': 'json',
|
||||||
}
|
}
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
|
@ -13,19 +13,21 @@ def request(query, params):
|
||||||
params['url'] = search_url
|
params['url'] = search_url
|
||||||
params['method'] = 'POST'
|
params['method'] = 'POST'
|
||||||
params['headers']['content-type'] = 'application/json'
|
params['headers']['content-type'] = 'application/json'
|
||||||
params['data'] = dumps({
|
params['data'] = dumps(
|
||||||
"queryString": query,
|
{
|
||||||
"page": params['pageno'],
|
"queryString": query,
|
||||||
"pageSize": 10,
|
"page": params['pageno'],
|
||||||
"sort": "relevance",
|
"pageSize": 10,
|
||||||
"useFallbackRankerService": False,
|
"sort": "relevance",
|
||||||
"useFallbackSearchCluster": False,
|
"useFallbackRankerService": False,
|
||||||
"getQuerySuggestions": False,
|
"useFallbackSearchCluster": False,
|
||||||
"authors": [],
|
"getQuerySuggestions": False,
|
||||||
"coAuthors": [],
|
"authors": [],
|
||||||
"venues": [],
|
"coAuthors": [],
|
||||||
"performTitleMatch": True,
|
"venues": [],
|
||||||
})
|
"performTitleMatch": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -33,10 +35,12 @@ def response(resp):
|
||||||
res = loads(resp.text)
|
res = loads(resp.text)
|
||||||
results = []
|
results = []
|
||||||
for result in res['results']:
|
for result in res['results']:
|
||||||
results.append({
|
results.append(
|
||||||
'url': result['primaryPaperLink']['url'],
|
{
|
||||||
'title': result['title']['text'],
|
'url': result['primaryPaperLink']['url'],
|
||||||
'content': result['paperAbstractTruncated']
|
'title': result['title']['text'],
|
||||||
})
|
'content': result['paperAbstractTruncated'],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -31,17 +31,13 @@ supported_languages = [
|
||||||
]
|
]
|
||||||
base_url = 'https://sepiasearch.org/api/v1/search/videos'
|
base_url = 'https://sepiasearch.org/api/v1/search/videos'
|
||||||
|
|
||||||
safesearch_table = {
|
safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
|
||||||
0: 'both',
|
|
||||||
1: 'false',
|
|
||||||
2: 'false'
|
|
||||||
}
|
|
||||||
|
|
||||||
time_range_table = {
|
time_range_table = {
|
||||||
'day': relativedelta.relativedelta(),
|
'day': relativedelta.relativedelta(),
|
||||||
'week': relativedelta.relativedelta(weeks=-1),
|
'week': relativedelta.relativedelta(weeks=-1),
|
||||||
'month': relativedelta.relativedelta(months=-1),
|
'month': relativedelta.relativedelta(months=-1),
|
||||||
'year': relativedelta.relativedelta(years=-1)
|
'year': relativedelta.relativedelta(years=-1),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -55,13 +51,19 @@ def minute_to_hm(minute):
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = base_url + '?' + urlencode({
|
params['url'] = (
|
||||||
'search': query,
|
base_url
|
||||||
'start': (params['pageno'] - 1) * 10,
|
+ '?'
|
||||||
'count': 10,
|
+ urlencode(
|
||||||
'sort': '-match',
|
{
|
||||||
'nsfw': safesearch_table[params['safesearch']]
|
'search': query,
|
||||||
})
|
'start': (params['pageno'] - 1) * 10,
|
||||||
|
'count': 10,
|
||||||
|
'sort': '-match',
|
||||||
|
'nsfw': safesearch_table[params['safesearch']],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
language = params['language'].split('-')[0]
|
language = params['language'].split('-')[0]
|
||||||
if language in supported_languages:
|
if language in supported_languages:
|
||||||
|
@ -91,14 +93,18 @@ def response(resp):
|
||||||
length = minute_to_hm(result.get('duration'))
|
length = minute_to_hm(result.get('duration'))
|
||||||
url = result['url']
|
url = result['url']
|
||||||
|
|
||||||
results.append({'url': url,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': content,
|
'url': url,
|
||||||
'author': author,
|
'title': title,
|
||||||
'length': length,
|
'content': content,
|
||||||
'template': 'videos.html',
|
'author': author,
|
||||||
'publishedDate': publishedDate,
|
'length': length,
|
||||||
'embedded': embedded,
|
'template': 'videos.html',
|
||||||
'thumbnail': thumbnail})
|
'publishedDate': publishedDate,
|
||||||
|
'embedded': embedded,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -58,10 +58,12 @@ def response(resp):
|
||||||
if result_data is None:
|
if result_data is None:
|
||||||
continue
|
continue
|
||||||
title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
|
title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
|
||||||
results.append({
|
results.append(
|
||||||
'url': title_element.get('href'),
|
{
|
||||||
'title': extract_text(title_element),
|
'url': title_element.get('href'),
|
||||||
'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')),
|
'title': extract_text(title_element),
|
||||||
})
|
'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -28,9 +28,11 @@ URL = 'https://sjp.pwn.pl'
|
||||||
SEARCH_URL = URL + '/szukaj/{query}.html'
|
SEARCH_URL = URL + '/szukaj/{query}.html'
|
||||||
|
|
||||||
word_xpath = '//div[@class="query"]'
|
word_xpath = '//div[@class="query"]'
|
||||||
dict_xpath = ['//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]',
|
dict_xpath = [
|
||||||
'//div[@class="wyniki sjp-wyniki sjp-anchor"]',
|
'//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]',
|
||||||
'//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]']
|
'//div[@class="wyniki sjp-wyniki sjp-anchor"]',
|
||||||
|
'//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
@ -85,9 +87,11 @@ def response(resp):
|
||||||
infobox += "</ol>"
|
infobox += "</ol>"
|
||||||
infobox += "</ul></div>"
|
infobox += "</ul></div>"
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'infobox': word,
|
{
|
||||||
'content': infobox,
|
'infobox': word,
|
||||||
})
|
'content': infobox,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -36,14 +36,16 @@ def response(resp):
|
||||||
search_results = loads(resp.text)
|
search_results = loads(resp.text)
|
||||||
|
|
||||||
for result in search_results["results"]:
|
for result in search_results["results"]:
|
||||||
results.append({
|
results.append(
|
||||||
'infohash': result["infohash"],
|
{
|
||||||
'seed': result["swarm"]["seeders"],
|
'infohash': result["infohash"],
|
||||||
'leech': result["swarm"]["leechers"],
|
'seed': result["swarm"]["seeders"],
|
||||||
'title': result["title"],
|
'leech': result["swarm"]["leechers"],
|
||||||
'url': "https://solidtorrents.net/view/" + result["_id"],
|
'title': result["title"],
|
||||||
'filesize': result["size"],
|
'url': "https://solidtorrents.net/view/" + result["_id"],
|
||||||
'magnetlink': result["magnet"],
|
'filesize': result["size"],
|
||||||
'template': "torrent.html",
|
'magnetlink': result["magnet"],
|
||||||
})
|
'template': "torrent.html",
|
||||||
|
}
|
||||||
|
)
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -14,10 +14,10 @@ from searx.exceptions import SearxEngineAPIException
|
||||||
base_url = 'http://localhost:8983'
|
base_url = 'http://localhost:8983'
|
||||||
collection = ''
|
collection = ''
|
||||||
rows = 10
|
rows = 10
|
||||||
sort = '' # sorting: asc or desc
|
sort = '' # sorting: asc or desc
|
||||||
field_list = 'name' # list of field names to display on the UI
|
field_list = 'name' # list of field names to display on the UI
|
||||||
default_fields = '' # default field to query
|
default_fields = '' # default field to query
|
||||||
query_fields = '' # query fields
|
query_fields = '' # query fields
|
||||||
_search_url = ''
|
_search_url = ''
|
||||||
paging = True
|
paging = True
|
||||||
|
|
||||||
|
|
|
@ -27,17 +27,21 @@ paging = True
|
||||||
# search-url
|
# search-url
|
||||||
# missing attribute: user_id, app_version, app_locale
|
# missing attribute: user_id, app_version, app_locale
|
||||||
url = 'https://api-v2.soundcloud.com/'
|
url = 'https://api-v2.soundcloud.com/'
|
||||||
search_url = url + 'search?{query}'\
|
search_url = (
|
||||||
'&variant_ids='\
|
url + 'search?{query}'
|
||||||
'&facet=model'\
|
'&variant_ids='
|
||||||
'&limit=20'\
|
'&facet=model'
|
||||||
'&offset={offset}'\
|
'&limit=20'
|
||||||
'&linked_partitioning=1'\
|
'&offset={offset}'
|
||||||
'&client_id={client_id}' # noqa
|
'&linked_partitioning=1'
|
||||||
|
'&client_id={client_id}'
|
||||||
|
) # noqa
|
||||||
|
|
||||||
embedded_url = '<iframe width="100%" height="166" ' +\
|
embedded_url = (
|
||||||
'scrolling="no" frameborder="no" ' +\
|
'<iframe width="100%" height="166" '
|
||||||
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
|
+ 'scrolling="no" frameborder="no" '
|
||||||
|
+ 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
|
||||||
|
)
|
||||||
|
|
||||||
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
|
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
|
||||||
guest_client_id = ''
|
guest_client_id = ''
|
||||||
|
@ -75,9 +79,7 @@ def init(engine_settings=None):
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1) * 20
|
offset = (params['pageno'] - 1) * 20
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset, client_id=guest_client_id)
|
||||||
offset=offset,
|
|
||||||
client_id=guest_client_id)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -98,11 +100,15 @@ def response(resp):
|
||||||
embedded = embedded_url.format(uri=uri)
|
embedded = embedded_url.format(uri=uri)
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': result['permalink_url'],
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'publishedDate': publishedDate,
|
'url': result['permalink_url'],
|
||||||
'embedded': embedded,
|
'title': title,
|
||||||
'content': content})
|
'publishedDate': publishedDate,
|
||||||
|
'embedded': embedded,
|
||||||
|
'content': content,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -42,9 +42,10 @@ def request(query, params):
|
||||||
r = http_post(
|
r = http_post(
|
||||||
'https://accounts.spotify.com/api/token',
|
'https://accounts.spotify.com/api/token',
|
||||||
data={'grant_type': 'client_credentials'},
|
data={'grant_type': 'client_credentials'},
|
||||||
headers={'Authorization': 'Basic ' + base64.b64encode(
|
headers={
|
||||||
"{}:{}".format(api_client_id, api_client_secret).encode()
|
'Authorization': 'Basic '
|
||||||
).decode()}
|
+ base64.b64encode("{}:{}".format(api_client_id, api_client_secret).encode()).decode()
|
||||||
|
},
|
||||||
)
|
)
|
||||||
j = loads(r.text)
|
j = loads(r.text)
|
||||||
params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
|
params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
|
||||||
|
@ -63,18 +64,12 @@ def response(resp):
|
||||||
if result['type'] == 'track':
|
if result['type'] == 'track':
|
||||||
title = result['name']
|
title = result['name']
|
||||||
url = result['external_urls']['spotify']
|
url = result['external_urls']['spotify']
|
||||||
content = '{} - {} - {}'.format(
|
content = '{} - {} - {}'.format(result['artists'][0]['name'], result['album']['name'], result['name'])
|
||||||
result['artists'][0]['name'],
|
|
||||||
result['album']['name'],
|
|
||||||
result['name'])
|
|
||||||
|
|
||||||
embedded = embedded_url.format(audioid=result['id'])
|
embedded = embedded_url.format(audioid=result['id'])
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url, 'title': title, 'embedded': embedded, 'content': content})
|
||||||
'title': title,
|
|
||||||
'embedded': embedded,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -26,15 +26,11 @@ api_key = 'unset'
|
||||||
|
|
||||||
base_url = 'https://api.springernature.com/metadata/json?'
|
base_url = 'https://api.springernature.com/metadata/json?'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
if api_key == 'unset':
|
if api_key == 'unset':
|
||||||
raise SearxEngineAPIException('missing Springer-Nature API key')
|
raise SearxEngineAPIException('missing Springer-Nature API key')
|
||||||
args = urlencode({
|
args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key})
|
||||||
'q' : query,
|
|
||||||
's' : nb_per_page * (params['pageno'] - 1),
|
|
||||||
'p' : nb_per_page,
|
|
||||||
'api_key' : api_key
|
|
||||||
})
|
|
||||||
params['url'] = base_url + args
|
params['url'] = base_url + args
|
||||||
logger.debug("query_url --> %s", params['url'])
|
logger.debug("query_url --> %s", params['url'])
|
||||||
return params
|
return params
|
||||||
|
@ -50,21 +46,27 @@ def response(resp):
|
||||||
content += "..."
|
content += "..."
|
||||||
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
|
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
|
||||||
|
|
||||||
metadata = [record[x] for x in [
|
metadata = [
|
||||||
'publicationName',
|
record[x]
|
||||||
'identifier',
|
for x in [
|
||||||
'contentType',
|
'publicationName',
|
||||||
] if record.get(x) is not None]
|
'identifier',
|
||||||
|
'contentType',
|
||||||
|
]
|
||||||
|
if record.get(x) is not None
|
||||||
|
]
|
||||||
|
|
||||||
metadata = ' / '.join(metadata)
|
metadata = ' / '.join(metadata)
|
||||||
if record.get('startingPage') and record.get('endingPage') is not None:
|
if record.get('startingPage') and record.get('endingPage') is not None:
|
||||||
metadata += " (%(startingPage)s-%(endingPage)s)" % record
|
metadata += " (%(startingPage)s-%(endingPage)s)" % record
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'title': record['title'],
|
{
|
||||||
'url': record['url'][0]['value'].replace('http://', 'https://', 1),
|
'title': record['title'],
|
||||||
'content' : content,
|
'url': record['url'][0]['value'].replace('http://', 'https://', 1),
|
||||||
'publishedDate' : published,
|
'content': content,
|
||||||
'metadata' : metadata
|
'publishedDate': published,
|
||||||
})
|
'metadata': metadata,
|
||||||
|
}
|
||||||
|
)
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -47,9 +47,9 @@ def search(query, params):
|
||||||
|
|
||||||
query_params = {
|
query_params = {
|
||||||
'query': query,
|
'query': query,
|
||||||
'wildcard': r'%' + query.replace(' ', r'%') + r'%',
|
'wildcard': r'%' + query.replace(' ', r'%') + r'%',
|
||||||
'limit': limit,
|
'limit': limit,
|
||||||
'offset': (params['pageno'] - 1) * limit
|
'offset': (params['pageno'] - 1) * limit,
|
||||||
}
|
}
|
||||||
query_to_run = query_str + ' LIMIT :limit OFFSET :offset'
|
query_to_run = query_str + ' LIMIT :limit OFFSET :offset'
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ def search(query, params):
|
||||||
col_names = [cn[0] for cn in cur.description]
|
col_names = [cn[0] for cn in cur.description]
|
||||||
|
|
||||||
for row in cur.fetchall():
|
for row in cur.fetchall():
|
||||||
item = dict( zip(col_names, map(str, row)) )
|
item = dict(zip(col_names, map(str, row)))
|
||||||
item['template'] = result_template
|
item['template'] = result_template
|
||||||
logger.debug("append result --> %s", item)
|
logger.debug("append result --> %s", item)
|
||||||
results.append(item)
|
results.append(item)
|
||||||
|
|
|
@ -23,26 +23,30 @@ paging = True
|
||||||
pagesize = 10
|
pagesize = 10
|
||||||
|
|
||||||
api_site = 'stackoverflow'
|
api_site = 'stackoverflow'
|
||||||
api_sort= 'activity'
|
api_sort = 'activity'
|
||||||
api_order = 'desc'
|
api_order = 'desc'
|
||||||
|
|
||||||
# https://api.stackexchange.com/docs/advanced-search
|
# https://api.stackexchange.com/docs/advanced-search
|
||||||
search_api = 'https://api.stackexchange.com/2.3/search/advanced?'
|
search_api = 'https://api.stackexchange.com/2.3/search/advanced?'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
args = urlencode({
|
args = urlencode(
|
||||||
'q' : query,
|
{
|
||||||
'page' : params['pageno'],
|
'q': query,
|
||||||
'pagesize' : pagesize,
|
'page': params['pageno'],
|
||||||
'site' : api_site,
|
'pagesize': pagesize,
|
||||||
'sort' : api_sort,
|
'site': api_site,
|
||||||
'order': 'desc',
|
'sort': api_sort,
|
||||||
})
|
'order': 'desc',
|
||||||
|
}
|
||||||
|
)
|
||||||
params['url'] = search_api + args
|
params['url'] = search_api + args
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
@ -56,10 +60,12 @@ def response(resp):
|
||||||
content += ' // is answered'
|
content += ' // is answered'
|
||||||
content += " // score: %s" % result['score']
|
content += " // score: %s" % result['score']
|
||||||
|
|
||||||
results.append({
|
results.append(
|
||||||
'url': "https://%s.com/q/%s" % (api_site, result['question_id']),
|
{
|
||||||
'title': html.unescape(result['title']),
|
'url': "https://%s.com/q/%s" % (api_site, result['question_id']),
|
||||||
'content': html.unescape(content),
|
'title': html.unescape(result['title']),
|
||||||
})
|
'content': html.unescape(content),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -101,7 +101,7 @@ def response(resp):
|
||||||
# check if search result starts with something like: "2 Sep 2014 ... "
|
# check if search result starts with something like: "2 Sep 2014 ... "
|
||||||
if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
|
if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
|
||||||
date_pos = content.find('...') + 4
|
date_pos = content.find('...') + 4
|
||||||
date_string = content[0:date_pos - 5]
|
date_string = content[0 : date_pos - 5]
|
||||||
# fix content string
|
# fix content string
|
||||||
content = content[date_pos:]
|
content = content[date_pos:]
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ def response(resp):
|
||||||
# check if search result starts with something like: "5 days ago ... "
|
# check if search result starts with something like: "5 days ago ... "
|
||||||
elif re.match(r"^[0-9]+ days? ago \.\.\. ", content):
|
elif re.match(r"^[0-9]+ days? ago \.\.\. ", content):
|
||||||
date_pos = content.find('...') + 4
|
date_pos = content.find('...') + 4
|
||||||
date_string = content[0:date_pos - 5]
|
date_string = content[0 : date_pos - 5]
|
||||||
|
|
||||||
# calculate datetime
|
# calculate datetime
|
||||||
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
|
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
|
||||||
|
@ -123,15 +123,10 @@ def response(resp):
|
||||||
|
|
||||||
if published_date:
|
if published_date:
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url, 'title': title, 'content': content, 'publishedDate': published_date})
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'publishedDate': published_date})
|
|
||||||
else:
|
else:
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
'title': title,
|
|
||||||
'content': content})
|
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
@ -152,7 +147,7 @@ def _fetch_supported_languages(resp):
|
||||||
'malayam': 'ml',
|
'malayam': 'ml',
|
||||||
'norsk': 'nb',
|
'norsk': 'nb',
|
||||||
'sinhalese': 'si',
|
'sinhalese': 'si',
|
||||||
'sudanese': 'su'
|
'sudanese': 'su',
|
||||||
}
|
}
|
||||||
|
|
||||||
# get the English name of every language known by babel
|
# get the English name of every language known by babel
|
||||||
|
|
|
@ -56,11 +56,7 @@ def response(resp):
|
||||||
name_row = rows[i]
|
name_row = rows[i]
|
||||||
|
|
||||||
links = name_row.xpath('./td[@class="desc-top"]/a')
|
links = name_row.xpath('./td[@class="desc-top"]/a')
|
||||||
params = {
|
params = {'template': 'torrent.html', 'url': links[-1].attrib.get('href'), 'title': extract_text(links[-1])}
|
||||||
'template': 'torrent.html',
|
|
||||||
'url': links[-1].attrib.get('href'),
|
|
||||||
'title': extract_text(links[-1])
|
|
||||||
}
|
|
||||||
# I have not yet seen any torrents without magnet links, but
|
# I have not yet seen any torrents without magnet links, but
|
||||||
# it's better to be prepared to stumble upon one some day
|
# it's better to be prepared to stumble upon one some day
|
||||||
if len(links) == 2:
|
if len(links) == 2:
|
||||||
|
|
|
@ -35,10 +35,12 @@ api_key = ''
|
||||||
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
|
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
|
||||||
torznab_categories = []
|
torznab_categories = []
|
||||||
|
|
||||||
def init(engine_settings=None): # pylint: disable=unused-argument
|
|
||||||
|
def init(engine_settings=None): # pylint: disable=unused-argument
|
||||||
if len(base_url) < 1:
|
if len(base_url) < 1:
|
||||||
raise ValueError('missing torznab base_url')
|
raise ValueError('missing torznab base_url')
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
search_url = base_url + '?t=search&q={search_query}'
|
search_url = base_url + '?t=search&q={search_query}'
|
||||||
|
@ -48,13 +50,12 @@ def request(query, params):
|
||||||
search_url += '&cat={torznab_categories}'
|
search_url += '&cat={torznab_categories}'
|
||||||
|
|
||||||
params['url'] = search_url.format(
|
params['url'] = search_url.format(
|
||||||
search_query = quote(query),
|
search_query=quote(query), api_key=api_key, torznab_categories=",".join([str(x) for x in torznab_categories])
|
||||||
api_key = api_key,
|
|
||||||
torznab_categories = ",".join([str(x) for x in torznab_categories])
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
@ -103,8 +104,7 @@ def response(resp):
|
||||||
|
|
||||||
result["publishedDate"] = None
|
result["publishedDate"] = None
|
||||||
try:
|
try:
|
||||||
result["publishedDate"] = datetime.strptime(
|
result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z')
|
||||||
get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z')
|
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
logger.debug("ignore exception (publishedDate): %s", e)
|
logger.debug("ignore exception (publishedDate): %s", e)
|
||||||
|
|
||||||
|
@ -134,9 +134,7 @@ def get_property(item, property_name):
|
||||||
def get_torznab_attr(item, attr_name):
|
def get_torznab_attr(item, attr_name):
|
||||||
element = item.find(
|
element = item.find(
|
||||||
'.//torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name),
|
'.//torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name),
|
||||||
{
|
{'torznab': 'http://torznab.com/schemas/2015/feed'},
|
||||||
'torznab': 'http://torznab.com/schemas/2015/feed'
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if element is not None:
|
if element is not None:
|
||||||
|
|
|
@ -28,24 +28,25 @@ def request(query, params):
|
||||||
key_form = '&key=' + api_key
|
key_form = '&key=' + api_key
|
||||||
else:
|
else:
|
||||||
key_form = ''
|
key_form = ''
|
||||||
params['url'] = url.format(from_lang=params['from_lang'][1],
|
params['url'] = url.format(
|
||||||
to_lang=params['to_lang'][1],
|
from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query'], key=key_form
|
||||||
query=params['query'],
|
)
|
||||||
key=key_form)
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
results.append({
|
results.append(
|
||||||
'url': web_url.format(
|
{
|
||||||
from_lang=resp.search_params['from_lang'][2],
|
'url': web_url.format(
|
||||||
to_lang=resp.search_params['to_lang'][2],
|
from_lang=resp.search_params['from_lang'][2],
|
||||||
query=resp.search_params['query']),
|
to_lang=resp.search_params['to_lang'][2],
|
||||||
'title': '[{0}-{1}] {2}'.format(
|
query=resp.search_params['query'],
|
||||||
resp.search_params['from_lang'][1],
|
),
|
||||||
resp.search_params['to_lang'][1],
|
'title': '[{0}-{1}] {2}'.format(
|
||||||
resp.search_params['query']),
|
resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query']
|
||||||
'content': resp.json()['responseData']['translatedText']
|
),
|
||||||
})
|
'content': resp.json()['responseData']['translatedText'],
|
||||||
|
}
|
||||||
|
)
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -26,23 +26,13 @@ paging = True
|
||||||
|
|
||||||
def clean_url(url):
|
def clean_url(url):
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
query = [(k, v) for (k, v)
|
query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']]
|
||||||
in parse_qsl(parsed.query) if k not in ['ixid', 's']]
|
|
||||||
|
|
||||||
return urlunparse((
|
return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment))
|
||||||
parsed.scheme,
|
|
||||||
parsed.netloc,
|
|
||||||
parsed.path,
|
|
||||||
parsed.params,
|
|
||||||
urlencode(query),
|
|
||||||
parsed.fragment
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url + urlencode({
|
params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size})
|
||||||
'query': query, 'page': params['pageno'], 'per_page': page_size
|
|
||||||
})
|
|
||||||
logger.debug("query_url --> %s", params['url'])
|
logger.debug("query_url --> %s", params['url'])
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -53,13 +43,15 @@ def response(resp):
|
||||||
|
|
||||||
if 'results' in json_data:
|
if 'results' in json_data:
|
||||||
for result in json_data['results']:
|
for result in json_data['results']:
|
||||||
results.append({
|
results.append(
|
||||||
'template': 'images.html',
|
{
|
||||||
'url': clean_url(result['links']['html']),
|
'template': 'images.html',
|
||||||
'thumbnail_src': clean_url(result['urls']['thumb']),
|
'url': clean_url(result['links']['html']),
|
||||||
'img_src': clean_url(result['urls']['raw']),
|
'thumbnail_src': clean_url(result['urls']['thumb']),
|
||||||
'title': result.get('alt_description') or 'unknown',
|
'img_src': clean_url(result['urls']['raw']),
|
||||||
'content': result.get('description') or ''
|
'title': result.get('alt_description') or 'unknown',
|
||||||
})
|
'content': result.get('description') or '',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -25,15 +25,16 @@ paging = True
|
||||||
base_url = 'https://vimeo.com/'
|
base_url = 'https://vimeo.com/'
|
||||||
search_url = base_url + '/search/page:{pageno}?{query}'
|
search_url = base_url + '/search/page:{pageno}?{query}'
|
||||||
|
|
||||||
embedded_url = '<iframe data-src="https://player.vimeo.com/video/{videoid}" ' +\
|
embedded_url = (
|
||||||
'width="540" height="304" frameborder="0" ' +\
|
'<iframe data-src="https://player.vimeo.com/video/{videoid}" '
|
||||||
'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
|
+ 'width="540" height="304" frameborder="0" '
|
||||||
|
+ 'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(pageno=params['pageno'],
|
params['url'] = search_url.format(pageno=params['pageno'], query=urlencode({'q': query}))
|
||||||
query=urlencode({'q': query}))
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -56,13 +57,17 @@ def response(resp):
|
||||||
embedded = embedded_url.format(videoid=videoid)
|
embedded = embedded_url.format(videoid=videoid)
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append(
|
||||||
'title': title,
|
{
|
||||||
'content': '',
|
'url': url,
|
||||||
'template': 'videos.html',
|
'title': title,
|
||||||
'publishedDate': publishedDate,
|
'content': '',
|
||||||
'embedded': embedded,
|
'template': 'videos.html',
|
||||||
'thumbnail': thumbnail})
|
'publishedDate': publishedDate,
|
||||||
|
'embedded': embedded,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -14,7 +14,10 @@ from searx.data import WIKIDATA_UNITS
|
||||||
from searx.network import post, get
|
from searx.network import post, get
|
||||||
from searx.utils import match_language, searx_useragent, get_string_replaces_function
|
from searx.utils import match_language, searx_useragent, get_string_replaces_function
|
||||||
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
||||||
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
|
from searx.engines.wikipedia import (
|
||||||
|
_fetch_supported_languages,
|
||||||
|
supported_languages_url,
|
||||||
|
) # NOQA # pylint: disable=unused-import
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
@ -112,10 +115,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
|
||||||
|
|
||||||
def get_headers():
|
def get_headers():
|
||||||
# user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits
|
# user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits
|
||||||
return {
|
return {'Accept': 'application/sparql-results+json', 'User-Agent': searx_useragent()}
|
||||||
'Accept': 'application/sparql-results+json',
|
|
||||||
'User-Agent': searx_useragent()
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_label_for_entity(entity_id, language):
|
def get_label_for_entity(entity_id, language):
|
||||||
|
@ -211,9 +211,9 @@ def get_results(attribute_result, attributes, language):
|
||||||
results.append({'title': infobox_title, 'url': url})
|
results.append({'title': infobox_title, 'url': url})
|
||||||
# update the infobox_id with the wikipedia URL
|
# update the infobox_id with the wikipedia URL
|
||||||
# first the local wikipedia URL, and as fallback the english wikipedia URL
|
# first the local wikipedia URL, and as fallback the english wikipedia URL
|
||||||
if attribute_type == WDArticle\
|
if attribute_type == WDArticle and (
|
||||||
and ((attribute.language == 'en' and infobox_id_lang is None)
|
(attribute.language == 'en' and infobox_id_lang is None) or attribute.language != 'en'
|
||||||
or attribute.language != 'en'):
|
):
|
||||||
infobox_id_lang = attribute.language
|
infobox_id_lang = attribute.language
|
||||||
infobox_id = url
|
infobox_id = url
|
||||||
elif attribute_type == WDImageAttribute:
|
elif attribute_type == WDImageAttribute:
|
||||||
|
@ -232,13 +232,11 @@ def get_results(attribute_result, attributes, language):
|
||||||
osm_zoom = area_to_osm_zoom(area) if area else 19
|
osm_zoom = area_to_osm_zoom(area) if area else 19
|
||||||
url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom)
|
url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom)
|
||||||
if url:
|
if url:
|
||||||
infobox_urls.append({'title': attribute.get_label(language),
|
infobox_urls.append({'title': attribute.get_label(language), 'url': url, 'entity': attribute.name})
|
||||||
'url': url,
|
|
||||||
'entity': attribute.name})
|
|
||||||
else:
|
else:
|
||||||
infobox_attributes.append({'label': attribute.get_label(language),
|
infobox_attributes.append(
|
||||||
'value': value,
|
{'label': attribute.get_label(language), 'value': value, 'entity': attribute.name}
|
||||||
'entity': attribute.name})
|
)
|
||||||
|
|
||||||
if infobox_id:
|
if infobox_id:
|
||||||
infobox_id = replace_http_by_https(infobox_id)
|
infobox_id = replace_http_by_https(infobox_id)
|
||||||
|
@ -246,22 +244,19 @@ def get_results(attribute_result, attributes, language):
|
||||||
# add the wikidata URL at the end
|
# add the wikidata URL at the end
|
||||||
infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
|
infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
|
||||||
|
|
||||||
if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and\
|
if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0:
|
||||||
len(infobox_content) == 0:
|
results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content})
|
||||||
results.append({
|
|
||||||
'url': infobox_urls[0]['url'],
|
|
||||||
'title': infobox_title,
|
|
||||||
'content': infobox_content
|
|
||||||
})
|
|
||||||
else:
|
else:
|
||||||
results.append({
|
results.append(
|
||||||
'infobox': infobox_title,
|
{
|
||||||
'id': infobox_id,
|
'infobox': infobox_title,
|
||||||
'content': infobox_content,
|
'id': infobox_id,
|
||||||
'img_src': img_src,
|
'content': infobox_content,
|
||||||
'urls': infobox_urls,
|
'img_src': img_src,
|
||||||
'attributes': infobox_attributes
|
'urls': infobox_urls,
|
||||||
})
|
'attributes': infobox_attributes,
|
||||||
|
}
|
||||||
|
)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
@ -271,13 +266,14 @@ def get_query(query, language):
|
||||||
where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes]))
|
where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes]))
|
||||||
wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes]))
|
wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes]))
|
||||||
group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes]))
|
group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes]))
|
||||||
query = QUERY_TEMPLATE\
|
query = (
|
||||||
.replace('%QUERY%', sparql_string_escape(query))\
|
QUERY_TEMPLATE.replace('%QUERY%', sparql_string_escape(query))
|
||||||
.replace('%SELECT%', ' '.join(select))\
|
.replace('%SELECT%', ' '.join(select))
|
||||||
.replace('%WHERE%', '\n '.join(where))\
|
.replace('%WHERE%', '\n '.join(where))
|
||||||
.replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))\
|
.replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))
|
||||||
.replace('%GROUP_BY%', ' '.join(group_by))\
|
.replace('%GROUP_BY%', ' '.join(group_by))
|
||||||
.replace('%LANGUAGE%', language)
|
.replace('%LANGUAGE%', language)
|
||||||
|
)
|
||||||
return query, attributes
|
return query, attributes
|
||||||
|
|
||||||
|
|
||||||
|
@ -303,90 +299,98 @@ def get_attributes(language):
|
||||||
attributes.append(WDDateAttribute(name))
|
attributes.append(WDDateAttribute(name))
|
||||||
|
|
||||||
# Dates
|
# Dates
|
||||||
for p in ['P571', # inception date
|
for p in [
|
||||||
'P576', # dissolution date
|
'P571', # inception date
|
||||||
'P580', # start date
|
'P576', # dissolution date
|
||||||
'P582', # end date
|
'P580', # start date
|
||||||
'P569', # date of birth
|
'P582', # end date
|
||||||
'P570', # date of death
|
'P569', # date of birth
|
||||||
'P619', # date of spacecraft launch
|
'P570', # date of death
|
||||||
'P620']: # date of spacecraft landing
|
'P619', # date of spacecraft launch
|
||||||
|
'P620',
|
||||||
|
]: # date of spacecraft landing
|
||||||
add_date(p)
|
add_date(p)
|
||||||
|
|
||||||
for p in ['P27', # country of citizenship
|
for p in [
|
||||||
'P495', # country of origin
|
'P27', # country of citizenship
|
||||||
'P17', # country
|
'P495', # country of origin
|
||||||
'P159']: # headquarters location
|
'P17', # country
|
||||||
|
'P159',
|
||||||
|
]: # headquarters location
|
||||||
add_label(p)
|
add_label(p)
|
||||||
|
|
||||||
# Places
|
# Places
|
||||||
for p in ['P36', # capital
|
for p in [
|
||||||
'P35', # head of state
|
'P36', # capital
|
||||||
'P6', # head of government
|
'P35', # head of state
|
||||||
'P122', # basic form of government
|
'P6', # head of government
|
||||||
'P37']: # official language
|
'P122', # basic form of government
|
||||||
|
'P37',
|
||||||
|
]: # official language
|
||||||
add_label(p)
|
add_label(p)
|
||||||
|
|
||||||
add_value('P1082') # population
|
add_value('P1082') # population
|
||||||
add_amount('P2046') # area
|
add_amount('P2046') # area
|
||||||
add_amount('P281') # postal code
|
add_amount('P281') # postal code
|
||||||
add_label('P38') # currency
|
add_label('P38') # currency
|
||||||
add_amount('P2048') # heigth (building)
|
add_amount('P2048') # heigth (building)
|
||||||
|
|
||||||
# Media
|
# Media
|
||||||
for p in ['P400', # platform (videogames, computing)
|
for p in [
|
||||||
'P50', # author
|
'P400', # platform (videogames, computing)
|
||||||
'P170', # creator
|
'P50', # author
|
||||||
'P57', # director
|
'P170', # creator
|
||||||
'P175', # performer
|
'P57', # director
|
||||||
'P178', # developer
|
'P175', # performer
|
||||||
'P162', # producer
|
'P178', # developer
|
||||||
'P176', # manufacturer
|
'P162', # producer
|
||||||
'P58', # screenwriter
|
'P176', # manufacturer
|
||||||
'P272', # production company
|
'P58', # screenwriter
|
||||||
'P264', # record label
|
'P272', # production company
|
||||||
'P123', # publisher
|
'P264', # record label
|
||||||
'P449', # original network
|
'P123', # publisher
|
||||||
'P750', # distributed by
|
'P449', # original network
|
||||||
'P86']: # composer
|
'P750', # distributed by
|
||||||
|
'P86',
|
||||||
|
]: # composer
|
||||||
add_label(p)
|
add_label(p)
|
||||||
|
|
||||||
add_date('P577') # publication date
|
add_date('P577') # publication date
|
||||||
add_label('P136') # genre (music, film, artistic...)
|
add_label('P136') # genre (music, film, artistic...)
|
||||||
add_label('P364') # original language
|
add_label('P364') # original language
|
||||||
add_value('P212') # ISBN-13
|
add_value('P212') # ISBN-13
|
||||||
add_value('P957') # ISBN-10
|
add_value('P957') # ISBN-10
|
||||||
add_label('P275') # copyright license
|
add_label('P275') # copyright license
|
||||||
add_label('P277') # programming language
|
add_label('P277') # programming language
|
||||||
add_value('P348') # version
|
add_value('P348') # version
|
||||||
add_label('P840') # narrative location
|
add_label('P840') # narrative location
|
||||||
|
|
||||||
# Languages
|
# Languages
|
||||||
add_value('P1098') # number of speakers
|
add_value('P1098') # number of speakers
|
||||||
add_label('P282') # writing system
|
add_label('P282') # writing system
|
||||||
add_label('P1018') # language regulatory body
|
add_label('P1018') # language regulatory body
|
||||||
add_value('P218') # language code (ISO 639-1)
|
add_value('P218') # language code (ISO 639-1)
|
||||||
|
|
||||||
# Other
|
# Other
|
||||||
add_label('P169') # ceo
|
add_label('P169') # ceo
|
||||||
add_label('P112') # founded by
|
add_label('P112') # founded by
|
||||||
add_label('P1454') # legal form (company, organization)
|
add_label('P1454') # legal form (company, organization)
|
||||||
add_label('P137') # operator (service, facility, ...)
|
add_label('P137') # operator (service, facility, ...)
|
||||||
add_label('P1029') # crew members (tripulation)
|
add_label('P1029') # crew members (tripulation)
|
||||||
add_label('P225') # taxon name
|
add_label('P225') # taxon name
|
||||||
add_value('P274') # chemical formula
|
add_value('P274') # chemical formula
|
||||||
add_label('P1346') # winner (sports, contests, ...)
|
add_label('P1346') # winner (sports, contests, ...)
|
||||||
add_value('P1120') # number of deaths
|
add_value('P1120') # number of deaths
|
||||||
add_value('P498') # currency code (ISO 4217)
|
add_value('P498') # currency code (ISO 4217)
|
||||||
|
|
||||||
# URL
|
# URL
|
||||||
add_url('P856', official=True) # official website
|
add_url('P856', official=True) # official website
|
||||||
attributes.append(WDArticle(language)) # wikipedia (user language)
|
attributes.append(WDArticle(language)) # wikipedia (user language)
|
||||||
if not language.startswith('en'):
|
if not language.startswith('en'):
|
||||||
attributes.append(WDArticle('en')) # wikipedia (english)
|
attributes.append(WDArticle('en')) # wikipedia (english)
|
||||||
|
|
||||||
add_url('P1324') # source code repository
|
add_url('P1324') # source code repository
|
||||||
add_url('P1581') # blog
|
add_url('P1581') # blog
|
||||||
add_url('P434', url_id='musicbrainz_artist')
|
add_url('P434', url_id='musicbrainz_artist')
|
||||||
add_url('P435', url_id='musicbrainz_work')
|
add_url('P435', url_id='musicbrainz_work')
|
||||||
add_url('P436', url_id='musicbrainz_release_group')
|
add_url('P436', url_id='musicbrainz_release_group')
|
||||||
|
@ -402,11 +406,11 @@ def get_attributes(language):
|
||||||
attributes.append(WDGeoAttribute('P625'))
|
attributes.append(WDGeoAttribute('P625'))
|
||||||
|
|
||||||
# Image
|
# Image
|
||||||
add_image('P15', priority=1, url_id='wikimedia_image') # route map
|
add_image('P15', priority=1, url_id='wikimedia_image') # route map
|
||||||
add_image('P242', priority=2, url_id='wikimedia_image') # locator map
|
add_image('P242', priority=2, url_id='wikimedia_image') # locator map
|
||||||
add_image('P154', priority=3, url_id='wikimedia_image') # logo
|
add_image('P154', priority=3, url_id='wikimedia_image') # logo
|
||||||
add_image('P18', priority=4, url_id='wikimedia_image') # image
|
add_image('P18', priority=4, url_id='wikimedia_image') # image
|
||||||
add_image('P41', priority=5, url_id='wikimedia_image') # flag
|
add_image('P41', priority=5, url_id='wikimedia_image') # flag
|
||||||
add_image('P2716', priority=6, url_id='wikimedia_image') # collage
|
add_image('P2716', priority=6, url_id='wikimedia_image') # collage
|
||||||
add_image('P2910', priority=7, url_id='wikimedia_image') # icon
|
add_image('P2910', priority=7, url_id='wikimedia_image') # icon
|
||||||
|
|
||||||
|
@ -415,7 +419,7 @@ def get_attributes(language):
|
||||||
|
|
||||||
class WDAttribute:
|
class WDAttribute:
|
||||||
|
|
||||||
__slots__ = 'name',
|
__slots__ = ('name',)
|
||||||
|
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
@ -443,14 +447,15 @@ class WDAttribute:
|
||||||
|
|
||||||
|
|
||||||
class WDAmountAttribute(WDAttribute):
|
class WDAmountAttribute(WDAttribute):
|
||||||
|
|
||||||
def get_select(self):
|
def get_select(self):
|
||||||
return '?{name} ?{name}Unit'.replace('{name}', self.name)
|
return '?{name} ?{name}Unit'.replace('{name}', self.name)
|
||||||
|
|
||||||
def get_where(self):
|
def get_where(self):
|
||||||
return """ OPTIONAL { ?item p:{name} ?{name}Node .
|
return """ OPTIONAL { ?item p:{name} ?{name}Node .
|
||||||
?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} .
|
?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} .
|
||||||
OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace('{name}', self.name)
|
OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace(
|
||||||
|
'{name}', self.name
|
||||||
|
)
|
||||||
|
|
||||||
def get_group_by(self):
|
def get_group_by(self):
|
||||||
return self.get_select()
|
return self.get_select()
|
||||||
|
@ -484,7 +489,9 @@ class WDArticle(WDAttribute):
|
||||||
return """OPTIONAL { ?article{language} schema:about ?item ;
|
return """OPTIONAL { ?article{language} schema:about ?item ;
|
||||||
schema:inLanguage "{language}" ;
|
schema:inLanguage "{language}" ;
|
||||||
schema:isPartOf <https://{language}.wikipedia.org/> ;
|
schema:isPartOf <https://{language}.wikipedia.org/> ;
|
||||||
schema:name ?articleName{language} . }""".replace('{language}', self.language)
|
schema:name ?articleName{language} . }""".replace(
|
||||||
|
'{language}', self.language
|
||||||
|
)
|
||||||
|
|
||||||
def get_group_by(self):
|
def get_group_by(self):
|
||||||
return self.get_select()
|
return self.get_select()
|
||||||
|
@ -495,7 +502,6 @@ class WDArticle(WDAttribute):
|
||||||
|
|
||||||
|
|
||||||
class WDLabelAttribute(WDAttribute):
|
class WDLabelAttribute(WDAttribute):
|
||||||
|
|
||||||
def get_select(self):
|
def get_select(self):
|
||||||
return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name)
|
return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name)
|
||||||
|
|
||||||
|
@ -526,14 +532,13 @@ class WDURLAttribute(WDAttribute):
|
||||||
value = value.split(',')[0]
|
value = value.split(',')[0]
|
||||||
url_id = self.url_id
|
url_id = self.url_id
|
||||||
if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):
|
if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):
|
||||||
value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):]
|
value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :]
|
||||||
url_id = 'wikimedia_image'
|
url_id = 'wikimedia_image'
|
||||||
return get_external_url(url_id, value)
|
return get_external_url(url_id, value)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
class WDGeoAttribute(WDAttribute):
|
class WDGeoAttribute(WDAttribute):
|
||||||
|
|
||||||
def get_label(self, language):
|
def get_label(self, language):
|
||||||
return "OpenStreetMap"
|
return "OpenStreetMap"
|
||||||
|
|
||||||
|
@ -543,7 +548,9 @@ class WDGeoAttribute(WDAttribute):
|
||||||
def get_where(self):
|
def get_where(self):
|
||||||
return """OPTIONAL { ?item p:{name}/psv:{name} [
|
return """OPTIONAL { ?item p:{name}/psv:{name} [
|
||||||
wikibase:geoLatitude ?{name}Lat ;
|
wikibase:geoLatitude ?{name}Lat ;
|
||||||
wikibase:geoLongitude ?{name}Long ] }""".replace('{name}', self.name)
|
wikibase:geoLongitude ?{name}Long ] }""".replace(
|
||||||
|
'{name}', self.name
|
||||||
|
)
|
||||||
|
|
||||||
def get_group_by(self):
|
def get_group_by(self):
|
||||||
return self.get_select()
|
return self.get_select()
|
||||||
|
@ -565,7 +572,7 @@ class WDGeoAttribute(WDAttribute):
|
||||||
|
|
||||||
class WDImageAttribute(WDURLAttribute):
|
class WDImageAttribute(WDURLAttribute):
|
||||||
|
|
||||||
__slots__ = 'priority',
|
__slots__ = ('priority',)
|
||||||
|
|
||||||
def __init__(self, name, url_id=None, priority=100):
|
def __init__(self, name, url_id=None, priority=100):
|
||||||
super().__init__(name, url_id)
|
super().__init__(name, url_id)
|
||||||
|
@ -573,7 +580,6 @@ class WDImageAttribute(WDURLAttribute):
|
||||||
|
|
||||||
|
|
||||||
class WDDateAttribute(WDAttribute):
|
class WDDateAttribute(WDAttribute):
|
||||||
|
|
||||||
def get_select(self):
|
def get_select(self):
|
||||||
return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
|
return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
|
||||||
|
|
||||||
|
@ -587,7 +593,9 @@ class WDDateAttribute(WDAttribute):
|
||||||
wikibase:timePrecision ?{name}timePrecision ;
|
wikibase:timePrecision ?{name}timePrecision ;
|
||||||
wikibase:timeTimezone ?{name}timeZone ;
|
wikibase:timeTimezone ?{name}timeZone ;
|
||||||
wikibase:timeCalendarModel ?{name}timeCalendar ] . }
|
wikibase:timeCalendarModel ?{name}timeCalendar ] . }
|
||||||
hint:Prior hint:rangeSafe true;""".replace('{name}', self.name)
|
hint:Prior hint:rangeSafe true;""".replace(
|
||||||
|
'{name}', self.name
|
||||||
|
)
|
||||||
|
|
||||||
def get_group_by(self):
|
def get_group_by(self):
|
||||||
return self.get_select()
|
return self.get_select()
|
||||||
|
@ -619,11 +627,12 @@ class WDDateAttribute(WDAttribute):
|
||||||
def format_13(self, value, locale):
|
def format_13(self, value, locale):
|
||||||
timestamp = isoparse(value)
|
timestamp = isoparse(value)
|
||||||
# precision: minute
|
# precision: minute
|
||||||
return get_datetime_format(format, locale=locale) \
|
return (
|
||||||
.replace("'", "") \
|
get_datetime_format(format, locale=locale)
|
||||||
.replace('{0}', format_time(timestamp, 'full', tzinfo=None,
|
.replace("'", "")
|
||||||
locale=locale)) \
|
.replace('{0}', format_time(timestamp, 'full', tzinfo=None, locale=locale))
|
||||||
.replace('{1}', format_date(timestamp, 'short', locale=locale))
|
.replace('{1}', format_date(timestamp, 'short', locale=locale))
|
||||||
|
)
|
||||||
|
|
||||||
def format_14(self, value, locale):
|
def format_14(self, value, locale):
|
||||||
# precision: second.
|
# precision: second.
|
||||||
|
@ -644,7 +653,7 @@ class WDDateAttribute(WDAttribute):
|
||||||
'11': ('format_11', 0), # day
|
'11': ('format_11', 0), # day
|
||||||
'12': ('format_13', 0), # hour (not supported by babel, display minute)
|
'12': ('format_13', 0), # hour (not supported by babel, display minute)
|
||||||
'13': ('format_13', 0), # minute
|
'13': ('format_13', 0), # minute
|
||||||
'14': ('format_14', 0) # second
|
'14': ('format_14', 0), # second
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_str(self, result, language):
|
def get_str(self, result, language):
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue