[enh] introduce private engines

This PR adds a new setting to engines named `tokens`.
It expects a list of tokens which lets searx validate
if the request should be accepted or not.
This commit is contained in:
Noémi Ványi 2020-02-01 11:01:17 +01:00
parent f9c7a678d2
commit 99435381a8
10 changed files with 161 additions and 28 deletions

View File

@ -38,6 +38,7 @@ def check_settings_yml(file_name):
else:
return None
# find location of settings.yml
if 'SEARX_SETTINGS_PATH' in environ:
# if possible set path to settings using the

View File

@ -54,7 +54,8 @@ engine_default_args = {'paging': False,
'suspend_end_time': 0,
'continuous_errors': 0,
'time_range_support': False,
'offline': False}
'offline': False,
'tokens': []}
def load_engine(engine_data):
@ -160,7 +161,7 @@ def to_percentage(stats, maxvalue):
return stats
def get_engines_stats():
def get_engines_stats(preferences):
# TODO refactor
pageloads = []
engine_times = []
@ -171,8 +172,12 @@ def get_engines_stats():
max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
for engine in engines.values():
if not preferences.validate_token(engine):
continue
if engine.stats['search_count'] == 0:
continue
results_num = \
engine.stats['result_count'] / float(engine.stats['search_count'])

View File

@ -0,0 +1,12 @@
"""
Dummy Offline
@results one result
@stable yes
"""
def search(query, request_params):
return [{
'result': 'this is what you get',
}]

View File

@ -72,6 +72,7 @@ def parse_album(hit):
result.update({'content': 'Released: {}'.format(year)})
return result
parse = {'lyric': parse_lyric, 'song': parse_lyric, 'artist': parse_artist, 'album': parse_album}

View File

@ -104,6 +104,31 @@ class MultipleChoiceSetting(EnumStringSetting):
resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
class SetSetting(Setting):
def _post_init(self):
if not hasattr(self, 'values'):
self.values = set()
def get_value(self):
return ','.join(self.values)
def parse(self, data):
if data == '':
self.values = set()
return
elements = data.split(',')
for element in elements:
self.values.add(element)
def parse_form(self, data):
elements = data.split(',')
self.values = set(elements)
def save(self, name, resp):
resp.set_cookie(name, ','.join(self.values), max_age=COOKIE_MAX_AGE)
class SearchLanguageSetting(EnumStringSetting):
"""Available choices may change, so user's value may not be in choices anymore"""
@ -272,6 +297,7 @@ class Preferences(object):
self.engines = EnginesSetting('engines', choices=engines)
self.plugins = PluginsSetting('plugins', choices=plugins)
self.tokens = SetSetting('tokens')
self.unknown_params = {}
def get_as_url_params(self):
@ -288,6 +314,8 @@ class Preferences(object):
settings_kv['disabled_plugins'] = ','.join(self.plugins.disabled)
settings_kv['enabled_plugins'] = ','.join(self.plugins.enabled)
settings_kv['tokens'] = ','.join(self.tokens.values)
return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8')
def parse_encoded_data(self, input_data):
@ -307,6 +335,8 @@ class Preferences(object):
elif user_setting_name == 'disabled_plugins':
self.plugins.parse_cookie((input_data.get('disabled_plugins', ''),
input_data.get('enabled_plugins', '')))
elif user_setting_name == 'tokens':
self.tokens.parse(user_setting)
elif not any(user_setting_name.startswith(x) for x in [
'enabled_',
'disabled_',
@ -328,6 +358,8 @@ class Preferences(object):
enabled_categories.append(user_setting_name[len('category_'):])
elif user_setting_name.startswith('plugin_'):
disabled_plugins.append(user_setting_name)
elif user_setting_name == 'tokens':
self.tokens.parse_form(user_setting)
else:
self.unknown_params[user_setting_name] = user_setting
self.key_value_settings['categories'].parse_form(enabled_categories)
@ -346,6 +378,18 @@ class Preferences(object):
user_setting.save(user_setting_name, resp)
self.engines.save(resp)
self.plugins.save(resp)
self.tokens.save('tokens', resp)
for k, v in self.unknown_params.items():
resp.set_cookie(k, v, max_age=COOKIE_MAX_AGE)
return resp
def validate_token(self, engine):
valid = True
if hasattr(engine, 'tokens') and engine.tokens:
valid = False
for token in self.tokens.values:
if token in engine.tokens:
valid = True
break
return valid

View File

@ -177,7 +177,8 @@ class RawTextQuery(object):
class SearchQuery(object):
"""container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, timeout_limit=None):
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
timeout_limit=None, preferences=None):
self.query = query.encode('utf-8')
self.engines = engines
self.categories = categories
@ -186,6 +187,7 @@ class SearchQuery(object):
self.pageno = pageno
self.time_range = None if time_range in ('', 'None', None) else time_range
self.timeout_limit = timeout_limit
self.preferences = preferences
def __str__(self):
return str(self.query) + ";" + str(self.engines)

View File

@ -407,7 +407,7 @@ def get_search_query_from_webapp(preferences, form):
return (SearchQuery(query, query_engines, query_categories,
query_lang, query_safesearch, query_pageno,
query_time_range, query_timeout),
query_time_range, query_timeout, preferences),
raw_text_query)
@ -459,6 +459,9 @@ class Search(object):
engine = engines[selected_engine['name']]
if not search_query.preferences.validate_token(engine):
continue
# skip suspended engines
if engine.suspend_end_time >= time():
logger.debug('Engine currently suspended: %s', selected_engine['name'])

View File

@ -131,6 +131,12 @@
{% endfor %}
</select>
{{ preferences_item_footer(info, label, rtl) }}
{% set label = _('Engine tokens') %}
{% set info = _('Access tokens for private engines') %}
{{ preferences_item_header(info, label, rtl) }}
<input class="form-control" id='tokens' name='tokens' value='{{ preferences.tokens.get_value() }}'/>
{{ preferences_item_footer(info, label, rtl) }}
</div>
</fieldset>
</div>

View File

@ -731,8 +731,13 @@ def preferences():
# stats for preferences page
stats = {}
engines_by_category = {}
for c in categories:
engines_by_category[c] = []
for e in categories[c]:
if not request.preferences.validate_token(e):
continue
stats[e.name] = {'time': None,
'warn_timeout': False,
'warn_time': False}
@ -740,9 +745,11 @@ def preferences():
stats[e.name]['warn_timeout'] = True
stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences)
engines_by_category[c].append(e)
# get first element [0], the engine time,
# and then the second element [1] : the time (the first one is the label)
for engine_stat in get_engines_stats()[0][1]:
for engine_stat in get_engines_stats(request.preferences)[0][1]:
stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3)
if engine_stat.get('avg') > settings['outgoing']['request_timeout']:
stats[engine_stat.get('name')]['warn_time'] = True
@ -752,7 +759,7 @@ def preferences():
locales=settings['locales'],
current_locale=get_locale(),
image_proxy=image_proxy,
engines_by_category=categories,
engines_by_category=engines_by_category,
stats=stats,
answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
disabled_engines=disabled_engines,
@ -828,7 +835,7 @@ def image_proxy():
@app.route('/stats', methods=['GET'])
def stats():
"""Render engine statistics page."""
stats = get_engines_stats()
stats = get_engines_stats(request.preferences)
return render(
'stats.html',
stats=stats,
@ -891,7 +898,7 @@ def clear_cookies():
@app.route('/config')
def config():
return jsonify({'categories': list(categories.keys()),
'engines': [{'name': engine_name,
'engines': [{'name': name,
'categories': engine.categories,
'shortcut': engine.shortcut,
'enabled': not engine.disabled,
@ -904,7 +911,7 @@ def config():
'safesearch': engine.safesearch,
'time_range_support': engine.time_range_support,
'timeout': engine.timeout}
for engine_name, engine in engines.items()],
for name, engine in engines.items() if request.preferences.validate_token(engine)],
'plugins': [{'name': plugin.name,
'enabled': plugin.default_on}
for plugin in plugins],

View File

@ -1,60 +1,112 @@
# -*- coding: utf-8 -*-
from searx.testing import SearxTestCase
from searx.preferences import Preferences
from searx.engines import engines
import searx.preferences
import searx.search
import searx.engines
SAFESEARCH = 0
PAGENO = 1
PUBLIC_ENGINE_NAME = 'general dummy'
PRIVATE_ENGINE_NAME = 'general private offline'
TEST_ENGINES = [
{
'name': PUBLIC_ENGINE_NAME,
'engine': 'dummy',
'categories': 'general',
'shortcut': 'gd',
'timeout': 3.0,
'tokens': [],
},
{
'name': PRIVATE_ENGINE_NAME,
'engine': 'dummy-offline',
'categories': 'general',
'shortcut': 'do',
'timeout': 3.0,
'offline': True,
'tokens': ['my-token'],
},
]
class SearchTestCase(SearxTestCase):
@classmethod
def setUpClass(cls):
searx.engines.initialize_engines([{
'name': 'general dummy',
'engine': 'dummy',
'categories': 'general',
'shortcut': 'gd',
'timeout': 3.0
}])
searx.engines.initialize_engines(TEST_ENGINES)
def test_timeout_simple(self):
searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, None)
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, None,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 3.0)
def test_timeout_query_above_default_nomax(self):
searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, 5.0)
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 3.0)
def test_timeout_query_below_default_nomax(self):
searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, 1.0)
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 1.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 1.0)
def test_timeout_query_below_max(self):
searx.search.max_request_timeout = 10.0
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, 5.0)
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 5.0)
def test_timeout_query_above_max(self):
searx.search.max_request_timeout = 10.0
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, 15.0)
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 15.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 10.0)
def test_query_private_engine_without_token(self):
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
results = search.search()
self.assertEquals(results.results_length(), 0)
def test_query_private_engine_with_incorrect_token(self):
preferences_with_tokens = Preferences(['oscar'], ['general'], engines, [])
preferences_with_tokens.parse_dict({'tokens': 'bad-token'})
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0,
preferences=preferences_with_tokens)
search = searx.search.Search(search_query)
results = search.search()
self.assertEquals(results.results_length(), 0)
def test_query_private_engine_with_correct_token(self):
preferences_with_tokens = Preferences(['oscar'], ['general'], engines, [])
preferences_with_tokens.parse_dict({'tokens': 'my-token'})
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0,
preferences=preferences_with_tokens)
search = searx.search.Search(search_query)
results = search.search()
self.assertEquals(results.results_length(), 1)