[mod] improve seekr engines and add documentation

Tis patch adds some more fields to the result items and changed paging to the
``nextResultSet`` given in seekr's JSON response.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2023-08-14 18:30:11 +02:00 committed by MatthieuBarbu
parent 8bf0791117
commit 17394ad558
3 changed files with 156 additions and 31 deletions

View File

@ -0,0 +1,13 @@
.. _seekr engine:
=============
Seekr Engines
=============
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
.. automodule:: searx.engines.seekr
:members:

View File

@ -1,50 +1,120 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint # lint: pylint
"""Seekr (images, videos, news) """seekr.com Seeker Score
Seekr is a privately held search and content evaluation engine that prioritizes
credibility over popularity.
Configuration
=============
The engine has the following additional settings:
- :py:obj:`seekr_category`
- :py:obj:`api_key`
This implementation is used by seekr engines in the :ref:`settings.yml
<settings engine>`:
.. code:: yaml
- name: seekr news
seekr_category: news
...
- name: seekr images
seekr_category: images
...
- name: seekr videos
seekr_category: videos
...
Known Quirks
============
The implementation to support :py:obj:`paging <searx.enginelib.Engine.paging>`
is based on the *nextpage* method of Seekr's REST API. This feature is *next
page driven* and plays well with the :ref:`infinite_scroll <settings ui>`
setting in SearXNG but it does not really fit into SearXNG's UI to select a page
by number.
Implementations
===============
""" """
from datetime import datetime from datetime import datetime
from json import loads from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
from flask_babel import gettext
about = { about = {
"website": 'https://seekr.com/', "website": 'https://seekr.com/',
"official_api_documentation": None, "official_api_documentation": None,
"use_official_api": True, "use_official_api": False,
"require_api_key": True, "require_api_key": True,
"results": 'JSON', "results": 'JSON',
"language": 'en',
} }
paging = True # news search doesn't support paging
base_url = "https://api.seekr.com" base_url = "https://api.seekr.com"
# v2/newssearch, v1/imagetab, v1/videotab paging = True
seekr_path = "newssearch"
seekr_api_version = "v2"
api_key = "srh1-22fb-sekr" api_key = "srh1-22fb-sekr"
results_per_page = 10 """API key / reversed engineered / is still the same one since 2022."""
seekr_category: str = 'unset'
"""Search category, any of ``news``, ``videos`` or ``images``."""
def init(engine_settings):
# global paging
if engine_settings['seekr_category'] not in ['news', 'videos', 'images']:
raise ValueError(f"Unsupported seekr category: {engine_settings['seekr_category']}")
def request(query, params): def request(query, params):
if not query:
return None
args = { args = {
'query': query, 'query': query,
'apiKey': api_key, 'apiKey': api_key,
'limit': results_per_page,
'offset': (params['pageno'] - 1) * results_per_page,
} }
path = f"{seekr_api_version}/{seekr_path}" api_url = base_url + '/engine'
if seekr_api_version == "v1": if seekr_category == 'news':
path = seekr_path api_url += '/v2/newssearch'
params['url'] = f"{base_url}/engine/{path}?{urlencode(args)}" elif seekr_category == 'images':
api_url += '/imagetab'
elif seekr_category == 'videos':
api_url += '/videotab'
params['url'] = f"{api_url}?{urlencode(args)}"
if params['pageno'] > 1:
nextpage = params['engine_data'].get('nextpage')
if nextpage:
params['url'] = nextpage
return params return params
def _images_response(json): def _images_response(json):
results = []
for result in json['expertResponses'][0]['advice']['results']: search_results = json.get('expertResponses')
if search_results:
search_results = search_results[0].get('advice')
else: # response from a 'nextResultSet'
search_results = json.get('advice')
results = []
if not search_results:
return results
for result in search_results['results']:
summary = loads(result['summary']) summary = loads(result['summary'])
results.append( results.append(
{ {
@ -53,52 +123,96 @@ def _images_response(json):
'title': result['title'], 'title': result['title'],
'img_src': result['url'], 'img_src': result['url'],
'img_format': f"{summary['width']}x{summary['height']}", 'img_format': f"{summary['width']}x{summary['height']}",
'thumbnail_src': 'https://media.seekr.com/engine/rp/' + summary['tg'] + '/?src= ' + result['thumbnail'],
} }
) )
if search_results.get('nextResultSet'):
results.append(
{
"engine_data": search_results.get('nextResultSet'),
"key": "nextpage",
}
)
return results return results
def _videos_response(json): def _videos_response(json):
results = []
for result in json['expertResponses'][0]['advice']['results']: search_results = json.get('expertResponses')
if search_results:
search_results = search_results[0].get('advice')
else: # response from a 'nextResultSet'
search_results = json.get('advice')
results = []
if not search_results:
return results
for result in search_results['results']:
summary = loads(result['summary'])
results.append( results.append(
{ {
'template': 'videos.html', 'template': 'videos.html',
'url': result['url'], 'url': result['url'],
'title': result['title'], 'title': result['title'],
'thumbnail': 'https://media.seekr.com/engine/rp/' + summary['tg'] + '/?src= ' + result['thumbnail'],
} }
) )
if search_results.get('nextResultSet'):
results.append(
{
"engine_data": search_results.get('nextResultSet'),
"key": "nextpage",
}
)
return results return results
def _news_response(json): def _news_response(json):
results = []
for result in json['expertResponses'][0]['advice']['categorySearchResult']['searchResult']['results']: search_results = json.get('expertResponses')
if search_results:
search_results = search_results[0]['advice']['categorySearchResult']['searchResult']
else: # response from a 'nextResultSet'
search_results = json.get('advice')
results = []
if not search_results:
return results
for result in search_results['results']:
results.append( results.append(
{ {
'url': result['url'], 'url': result['url'],
'title': result['title'], 'title': result['title'],
'content': result['summary'], 'content': result['summary'] or result["topCategory"] or result["displayUrl"] or '',
'thumbnail': result.get('thumbnail', ''), 'thumbnail': result.get('thumbnail', ''),
'publishedDate': datetime.strptime(result['pubDate'][:19], '%Y-%m-%d %H:%M:%S'), 'publishedDate': datetime.strptime(result['pubDate'][:19], '%Y-%m-%d %H:%M:%S'),
'metadata': gettext("Language") + ': ' + result.get('language', ''),
} }
) )
if search_results.get('nextResultSet'):
results.append(
{
"engine_data": search_results.get('nextResultSet'),
"key": "nextpage",
}
)
return results return results
def response(resp): def response(resp):
json = resp.json() json = resp.json()
if seekr_path == "videotab": if seekr_category == "videos":
return _videos_response(json) return _videos_response(json)
if seekr_path == "imagetab": if seekr_category == "images":
return _images_response(json) return _images_response(json)
if seekr_path == "newssearch": if seekr_category == "news":
return _news_response(json) return _news_response(json)
raise ValueError(f"Unsupported seekr path: {seekr_path}") raise ValueError(f"Unsupported seekr category: {seekr_category}")

View File

@ -1809,27 +1809,25 @@ engines:
- name: seekr news - name: seekr news
engine: seekr engine: seekr
paging: false
shortcut: senews shortcut: senews
categories: news categories: news
seekr_path: newssearch seekr_category: news
seekr_api_version: v2
disabled: true disabled: true
- name: seekr images - name: seekr images
engine: seekr engine: seekr
network: seekr news
shortcut: seimg shortcut: seimg
categories: images categories: images
seekr_path: imagetab seekr_category: images
seekr_api_version: v1
disabled: true disabled: true
- name: seekr videos - name: seekr videos
engine: seekr engine: seekr
network: seekr news
shortcut: sevid shortcut: sevid
categories: videos categories: videos
seekr_path: videotab seekr_category: videos
seekr_api_version: v1
disabled: true disabled: true
- name: sjp.pwn - name: sjp.pwn