mirror of https://github.com/searxng/searxng.git
[feat] implementation of presearch engine
This commit is contained in:
parent
99fb565b39
commit
23582aac5c
|
@ -0,0 +1,102 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Presearch (general, images, videos, news)
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from searx.network import get
|
||||
from searx.utils import gen_useragent, html_to_text
|
||||
|
||||
about = {
|
||||
"website": "https://presearch.io",
|
||||
"wikidiata_id": "Q7240905",
|
||||
"official_api_documentation": "https://docs.presearch.io/nodes/api",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
paging = True
|
||||
time_range_support = True
|
||||
categories = ["general", "web"] # general, images, videos, news
|
||||
|
||||
search_type = "search" # must be any of "search", "images", "videos", "news"
|
||||
|
||||
base_url = "https://presearch.com"
|
||||
safesearch_map = {0: 'false', 1: 'true', 2: 'true'}
|
||||
|
||||
|
||||
def _get_request_id(query, page, time_range, safesearch):
|
||||
args = {
|
||||
"q": query,
|
||||
"page": page,
|
||||
}
|
||||
if time_range:
|
||||
args["time_range"] = time_range
|
||||
|
||||
url = f"{base_url}/{search_type}?{urlencode(args)}"
|
||||
headers = {
|
||||
'User-Agent': gen_useragent(),
|
||||
'Cookie': f"b=1;presearch_session=;use_safe_search={safesearch_map[safesearch]}",
|
||||
}
|
||||
resp_text = get(url, headers=headers).text
|
||||
|
||||
for line in resp_text.split("\n"):
|
||||
if "window.searchId = " in line:
|
||||
return line.split("= ")[1][:-1].replace('"', "")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _is_valid_img_src(url):
|
||||
# in some cases, the image url is a base64 encoded string, which has to be skipped
|
||||
return "https://" in url
|
||||
|
||||
|
||||
def request(query, params):
|
||||
request_id = _get_request_id(query, params["pageno"], params["time_range"], params["safesearch"])
|
||||
|
||||
params["headers"]["Accept"] = "application/json"
|
||||
params["url"] = f"{base_url}/results?id={request_id}"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json = resp.json()
|
||||
|
||||
json_results = []
|
||||
if search_type == "search":
|
||||
json_results = json['results'].get('standardResults', [])
|
||||
else:
|
||||
json_results = json.get(search_type, [])
|
||||
|
||||
for json_result in json_results:
|
||||
result = {
|
||||
'url': json_result['link'],
|
||||
'title': json_result['title'],
|
||||
'content': html_to_text(json_result.get('description', '')),
|
||||
}
|
||||
if search_type == "images":
|
||||
result['template'] = 'images.html'
|
||||
|
||||
if not _is_valid_img_src(json_result['image']):
|
||||
continue
|
||||
|
||||
result['img_src'] = json_result['image']
|
||||
if _is_valid_img_src(json_result['thumbnail']):
|
||||
result['thumbnail'] = json_result['thumbnail']
|
||||
|
||||
elif search_type == "videos":
|
||||
result['template'] = 'videos.html'
|
||||
|
||||
if _is_valid_img_src(json_result['image']):
|
||||
result['thumbnail'] = json_result['image']
|
||||
|
||||
result['duration'] = json_result['duration']
|
||||
result['length'] = json_result['duration']
|
||||
|
||||
results.append(result)
|
||||
|
||||
return results
|
|
@ -1290,6 +1290,36 @@ engines:
|
|||
# query_str: 'SELECT * from my_table WHERE my_column = %(query)s'
|
||||
# shortcut : psql
|
||||
|
||||
- name: presearch
|
||||
engine: presearch
|
||||
search_type: search
|
||||
categories: [general, web]
|
||||
shortcut: ps
|
||||
|
||||
- name: presearch images
|
||||
engine: presearch
|
||||
search_type: images
|
||||
categories: [images, web]
|
||||
timeout: 4.0
|
||||
shortcut: psimg
|
||||
disabled: true
|
||||
|
||||
- name: presearch videos
|
||||
engine: presearch
|
||||
search_type: videos
|
||||
categories: [videos, web]
|
||||
timeout: 4.0
|
||||
shortcut: psvid
|
||||
disabled: true
|
||||
|
||||
- name: presearch news
|
||||
engine: presearch
|
||||
search_type: news
|
||||
categories: [news, web]
|
||||
timeout: 4.0
|
||||
shortcut: psnews
|
||||
disabled: true
|
||||
|
||||
- name: pub.dev
|
||||
engine: xpath
|
||||
shortcut: pd
|
||||
|
|
Loading…
Reference in New Issue