From 17394ad5583df970e6b51e79ef708afd4fa192c5 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 14 Aug 2023 18:30:11 +0200 Subject: [PATCH] [mod] improve seekr engines and add documentation Tis patch adds some more fields to the result items and changed paging to the ``nextResultSet`` given in seekr's JSON response. Signed-off-by: Markus Heiser --- docs/dev/engines/online/seekr.rst | 13 +++ searx/engines/seekr.py | 162 +++++++++++++++++++++++++----- searx/settings.yml | 12 +-- 3 files changed, 156 insertions(+), 31 deletions(-) create mode 100644 docs/dev/engines/online/seekr.rst diff --git a/docs/dev/engines/online/seekr.rst b/docs/dev/engines/online/seekr.rst new file mode 100644 index 000000000..fcbc7bf82 --- /dev/null +++ b/docs/dev/engines/online/seekr.rst @@ -0,0 +1,13 @@ +.. _seekr engine: + +============= +Seekr Engines +============= + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + +.. automodule:: searx.engines.seekr + :members: diff --git a/searx/engines/seekr.py b/searx/engines/seekr.py index c87d21d80..9250ac991 100644 --- a/searx/engines/seekr.py +++ b/searx/engines/seekr.py @@ -1,50 +1,120 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Seekr (images, videos, news) +"""seekr.com Seeker Score + +Seekr is a privately held search and content evaluation engine that prioritizes +credibility over popularity. + +Configuration +============= + +The engine has the following additional settings: + +- :py:obj:`seekr_category` +- :py:obj:`api_key` + +This implementation is used by seekr engines in the :ref:`settings.yml +`: + +.. code:: yaml + + - name: seekr news + seekr_category: news + ... + - name: seekr images + seekr_category: images + ... + - name: seekr videos + seekr_category: videos + ... + +Known Quirks +============ + +The implementation to support :py:obj:`paging ` +is based on the *nextpage* method of Seekr's REST API. This feature is *next +page driven* and plays well with the :ref:`infinite_scroll ` +setting in SearXNG but it does not really fit into SearXNG's UI to select a page +by number. + +Implementations +=============== + """ from datetime import datetime from json import loads from urllib.parse import urlencode +from flask_babel import gettext about = { "website": 'https://seekr.com/', "official_api_documentation": None, - "use_official_api": True, + "use_official_api": False, "require_api_key": True, "results": 'JSON', + "language": 'en', } -paging = True # news search doesn't support paging base_url = "https://api.seekr.com" -# v2/newssearch, v1/imagetab, v1/videotab -seekr_path = "newssearch" -seekr_api_version = "v2" +paging = True + api_key = "srh1-22fb-sekr" -results_per_page = 10 +"""API key / reversed engineered / is still the same one since 2022.""" + +seekr_category: str = 'unset' +"""Search category, any of ``news``, ``videos`` or ``images``.""" + + +def init(engine_settings): + + # global paging + if engine_settings['seekr_category'] not in ['news', 'videos', 'images']: + raise ValueError(f"Unsupported seekr category: {engine_settings['seekr_category']}") def request(query, params): + + if not query: + return None + args = { 'query': query, 'apiKey': api_key, - 'limit': results_per_page, - 'offset': (params['pageno'] - 1) * results_per_page, } - path = f"{seekr_api_version}/{seekr_path}" - if seekr_api_version == "v1": - path = seekr_path + api_url = base_url + '/engine' + if seekr_category == 'news': + api_url += '/v2/newssearch' - params['url'] = f"{base_url}/engine/{path}?{urlencode(args)}" + elif seekr_category == 'images': + api_url += '/imagetab' + + elif seekr_category == 'videos': + api_url += '/videotab' + + params['url'] = f"{api_url}?{urlencode(args)}" + if params['pageno'] > 1: + nextpage = params['engine_data'].get('nextpage') + if nextpage: + params['url'] = nextpage return params def _images_response(json): - results = [] - for result in json['expertResponses'][0]['advice']['results']: + search_results = json.get('expertResponses') + if search_results: + search_results = search_results[0].get('advice') + else: # response from a 'nextResultSet' + search_results = json.get('advice') + + results = [] + if not search_results: + return results + + for result in search_results['results']: summary = loads(result['summary']) results.append( { @@ -53,52 +123,96 @@ def _images_response(json): 'title': result['title'], 'img_src': result['url'], 'img_format': f"{summary['width']}x{summary['height']}", + 'thumbnail_src': 'https://media.seekr.com/engine/rp/' + summary['tg'] + '/?src= ' + result['thumbnail'], } ) + if search_results.get('nextResultSet'): + results.append( + { + "engine_data": search_results.get('nextResultSet'), + "key": "nextpage", + } + ) return results def _videos_response(json): - results = [] - for result in json['expertResponses'][0]['advice']['results']: + search_results = json.get('expertResponses') + if search_results: + search_results = search_results[0].get('advice') + else: # response from a 'nextResultSet' + search_results = json.get('advice') + + results = [] + if not search_results: + return results + + for result in search_results['results']: + summary = loads(result['summary']) results.append( { 'template': 'videos.html', 'url': result['url'], 'title': result['title'], + 'thumbnail': 'https://media.seekr.com/engine/rp/' + summary['tg'] + '/?src= ' + result['thumbnail'], } ) + if search_results.get('nextResultSet'): + results.append( + { + "engine_data": search_results.get('nextResultSet'), + "key": "nextpage", + } + ) return results def _news_response(json): - results = [] - for result in json['expertResponses'][0]['advice']['categorySearchResult']['searchResult']['results']: + search_results = json.get('expertResponses') + if search_results: + search_results = search_results[0]['advice']['categorySearchResult']['searchResult'] + else: # response from a 'nextResultSet' + search_results = json.get('advice') + + results = [] + if not search_results: + return results + + for result in search_results['results']: + results.append( { 'url': result['url'], 'title': result['title'], - 'content': result['summary'], + 'content': result['summary'] or result["topCategory"] or result["displayUrl"] or '', 'thumbnail': result.get('thumbnail', ''), 'publishedDate': datetime.strptime(result['pubDate'][:19], '%Y-%m-%d %H:%M:%S'), + 'metadata': gettext("Language") + ': ' + result.get('language', ''), } ) + if search_results.get('nextResultSet'): + results.append( + { + "engine_data": search_results.get('nextResultSet'), + "key": "nextpage", + } + ) return results def response(resp): json = resp.json() - if seekr_path == "videotab": + if seekr_category == "videos": return _videos_response(json) - if seekr_path == "imagetab": + if seekr_category == "images": return _images_response(json) - if seekr_path == "newssearch": + if seekr_category == "news": return _news_response(json) - raise ValueError(f"Unsupported seekr path: {seekr_path}") + raise ValueError(f"Unsupported seekr category: {seekr_category}") diff --git a/searx/settings.yml b/searx/settings.yml index f26bb061e..40d6e48f4 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1809,27 +1809,25 @@ engines: - name: seekr news engine: seekr - paging: false shortcut: senews categories: news - seekr_path: newssearch - seekr_api_version: v2 + seekr_category: news disabled: true - name: seekr images engine: seekr + network: seekr news shortcut: seimg categories: images - seekr_path: imagetab - seekr_api_version: v1 + seekr_category: images disabled: true - name: seekr videos engine: seekr + network: seekr news shortcut: sevid categories: videos - seekr_path: videotab - seekr_api_version: v1 + seekr_category: videos disabled: true - name: sjp.pwn