From fa5b2a7948044522c0fa6d763f436e55c7600b95 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sun, 8 Oct 2023 19:27:48 +0200 Subject: [PATCH] [mod] yacy: use official instance by default and fix crashes --- searx/engines/yacy.py | 62 ++++++++++++++++++++++++++++++------------- searx/settings.yml | 29 +++++++++++++------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 0603a4564..50c7886c0 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -18,17 +18,27 @@ Configuration The engine has the following (additional) settings: +- :py:obj:`http_digest_auth_user` +- :py:obj:`http_digest_auth_pass` +- :py:obj:`search_mode` +- :py:obj:`search_type` + .. code:: yaml - - name: yacy - engine: yacy - shortcut: ya - base_url: http://localhost:8090 - # Yacy search mode. 'global' or 'local'. - search_mode: 'global' - number_of_results: 5 - http_digest_auth_user: "" - http_digest_auth_pass: "" + - name: yacy + engine: yacy + categories: general + search_type: text + base_url: https://yacy.searchlab.eu + shortcut: ya + + - name: yacy images + engine: yacy + categories: images + search_type: image + base_url: https://yacy.searchlab.eu + shortcut: yai + disabled: true Implementations @@ -55,11 +65,14 @@ about = { } # engine dependent config -categories = ['general', 'images'] # TODO , 'music', 'videos', 'files' +categories = ['general'] paging = True -number_of_results = 5 +number_of_results = 10 http_digest_auth_user = "" +"""HTTP digest user for the local YACY instance""" http_digest_auth_pass = "" +"""HTTP digest password for the local YACY instance""" + search_mode = 'global' """Yacy search mode ``global`` or ``local``. By default, Yacy operates in ``global`` mode. @@ -70,8 +83,13 @@ mode. ``local`` Privacy or Stealth mode, restricts the search to local yacy instance. """ +search_type = 'text' +"""One of ``text``, ``image`` / The search-types ``app``, ``audio`` and +``video`` are not yet implemented (Pull-Requests are welcome). +""" + # search-url -base_url = 'http://localhost:8090' +base_url = 'https://yacy.searchlab.eu' search_url = ( '/yacysearch.json?{query}' '&startRecord={offset}' @@ -80,13 +98,19 @@ search_url = ( '&resource={resource}' ) -# yacy specific type-definitions -search_types = {'general': 'text', 'images': 'image', 'files': 'app', 'music': 'audio', 'videos': 'video'} + +def init(_): + valid_types = [ + 'text', + 'image', + # 'app', 'audio', 'video', + ] + if search_type not in valid_types: + raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types)) def request(query, params): offset = (params['pageno'] - 1) * number_of_results - search_type = search_types.get(params.get('category'), '0') params['url'] = base_url + search_url.format( query=urlencode({'query': query}), @@ -122,7 +146,7 @@ def response(resp): for result in search_results[0].get('items', []): # parse image results - if resp.search_params.get('category') == 'images': + if search_type == 'image': result_url = '' if 'url' in result: result_url = result['url'] @@ -144,12 +168,14 @@ def response(resp): # parse general results else: - publishedDate = parser.parse(result['pubDate']) + publishedDate = None + if 'pubDate' in result: + publishedDate = parser.parse(result['pubDate']) # append result results.append( { - 'url': result['link'], + 'url': result['link'] or '', 'title': result['title'], 'content': html_to_text(result['description']), 'publishedDate': publishedDate, diff --git a/searx/settings.yml b/searx/settings.yml index 695b7f812..1b511c2c9 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1898,15 +1898,26 @@ engines: shortcut: mvw disabled: true - # - name: yacy - # engine: yacy - # shortcut: ya - # base_url: http://localhost:8090 - # # required if you aren't using HTTPS for your local yacy instance' - # enable_http: true - # timeout: 3.0 - # # Yacy search mode. 'global' or 'local'. - # search_mode: 'global' + - name: yacy + engine: yacy + categories: general + search_type: text + base_url: https://yacy.searchlab.eu + shortcut: ya + disabled: true + # required if you aren't using HTTPS for your local yacy instance + # https://docs.searxng.org/dev/engines/online/yacy.html + # enable_http: true + # timeout: 3.0 + # search_mode: 'global' + + - name: yacy images + engine: yacy + categories: images + search_type: image + base_url: https://yacy.searchlab.eu + shortcut: yai + disabled: true - name: rumble engine: rumble