Sync upstream

This commit is contained in:
github-actions[bot] 2024-08-22 00:29:54 +00:00
commit c4fc0609d3
13 changed files with 315 additions and 40 deletions

View file

@ -123,7 +123,7 @@ ${fedora_build}
# jump to SearXNG's working tree and install SearXNG into virtualenv
(${SERVICE_USER})$ cd \"$SEARXNG_SRC\"
(${SERVICE_USER})$ pip install -e .
(${SERVICE_USER})$ pip install --use-pep517 --no-build-isolation -e .
.. END manage.sh update_packages

View file

@ -61,7 +61,7 @@ working tree and release a ``make install`` to get a virtualenv with a
$ make install
PYENV [virtualenv] installing ./requirements*.txt into local/py3
...
PYENV [install] pip install -e 'searx[test]'
PYENV [install] pip install --use-pep517 --no-build-isolation -e 'searx[test]'
...
Successfully installed searxng-2023.7.19+a446dea1b
@ -78,7 +78,7 @@ the check fails if you edit the requirements listed in
...
PYENV [virtualenv] installing ./requirements*.txt into local/py3
...
PYENV [install] pip install -e 'searx[test]'
PYENV [install] pip install --use-pep517 --no-build-isolation -e 'searx[test]'
...
Successfully installed searxng-2023.7.19+a446dea1b

4
manage
View file

@ -300,8 +300,8 @@ pyenv.install() {
( set -e
pyenv
build_msg PYENV "[install] pip install -e 'searx${PY_SETUP_EXTRAS}'"
"${PY_ENV_BIN}/python" -m pip install -e ".${PY_SETUP_EXTRAS}"
build_msg PYENV "[install] pip install --use-pep517 --no-build-isolation -e 'searx${PY_SETUP_EXTRAS}'"
"${PY_ENV_BIN}/python" -m pip install --use-pep517 --no-build-isolation -e ".${PY_SETUP_EXTRAS}"
)
local exit_val=$?
if [ ! $exit_val -eq 0 ]; then

View file

@ -14,10 +14,16 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode
from datetime import datetime
from flask_babel import gettext
if TYPE_CHECKING:
import logging
logger = logging.getLogger()
about = {
"website": 'https://tineye.com',
"wikidata_id": 'Q2382535',
@ -34,7 +40,7 @@ categories = ['general']
paging = True
safesearch = False
base_url = 'https://tineye.com'
search_string = '/result_json/?page={page}&{query}'
search_string = '/api/v1/result_json/?page={page}&{query}'
FORMAT_NOT_SUPPORTED = gettext(
"Could not read that image url. This may be due to an unsupported file"
@ -120,7 +126,7 @@ def parse_tineye_match(match_json):
crawl_date = backlink_json.get("crawl_date")
if crawl_date:
crawl_date = datetime.fromisoformat(crawl_date[:-3])
crawl_date = datetime.strptime(crawl_date, '%Y-%m-%d')
else:
crawl_date = datetime.min
@ -150,29 +156,15 @@ def parse_tineye_match(match_json):
def response(resp):
"""Parse HTTP response from TinEye."""
results = []
try:
# handle the 422 client side errors, and the possible 400 status code error
if resp.status_code in (400, 422):
json_data = resp.json()
except Exception as exc: # pylint: disable=broad-except
msg = "can't parse JSON response // %s" % exc
logger.error(msg)
json_data = {'error': msg}
# handle error codes from Tineye
if resp.is_error:
if resp.status_code in (400, 422):
message = 'HTTP status: %s' % resp.status_code
error = json_data.get('error')
s_key = json_data.get('suggestions', {}).get('key', '')
if error and s_key:
message = "%s (%s)" % (error, s_key)
elif error:
message = error
suggestions = json_data.get('suggestions', {})
message = f'HTTP Status Code: {resp.status_code}'
if resp.status_code == 422:
s_key = suggestions.get('key', '')
if s_key == "Invalid image URL":
# test https://docs.searxng.org/_static/searxng-wordmark.svg
message = FORMAT_NOT_SUPPORTED
@ -182,16 +174,23 @@ def response(resp):
elif s_key == 'Download Error':
# test https://notexists
message = DOWNLOAD_ERROR
else:
logger.warning("Unknown suggestion key encountered: %s", s_key)
else: # 400
description = suggestions.get('description')
if isinstance(description, list):
message = ','.join(description)
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
# results.append({'answer': message})
logger.error(message)
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
# results.append({'answer': message})
logger.error(message)
return []
return results
# Raise for all other responses
resp.raise_for_status()
resp.raise_for_status()
# append results from matches
results = []
json_data = resp.json()
for match_json in json_data['matches']:

133
searx/engines/yandex.py Normal file
View file

@ -0,0 +1,133 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Yandex (Web, images)"""
from json import loads
from urllib.parse import urlencode
from html import unescape
from lxml import html
from searx.exceptions import SearxEngineCaptchaException
from searx.utils import humanize_bytes, eval_xpath, eval_xpath_list, extract_text, extr
# Engine metadata
about = {
"website": 'https://yandex.com/',
"wikidata_id": 'Q5281',
"official_api_documentation": "?",
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# Engine configuration
categories = []
paging = True
search_type = ""
# Search URL
base_url_web = 'https://yandex.com/search/site/'
base_url_images = 'https://yandex.com/images/search'
results_xpath = '//li[contains(@class, "serp-item")]'
url_xpath = './/a[@class="b-serp-item__title-link"]/@href'
title_xpath = './/h3[@class="b-serp-item__title"]/a[@class="b-serp-item__title-link"]/span'
content_xpath = './/div[@class="b-serp-item__content"]//div[@class="b-serp-item__text"]'
def catch_bad_response(resp):
if resp.url.path.startswith('/showcaptcha'):
raise SearxEngineCaptchaException()
def request(query, params):
query_params_web = {
"tmpl_version": "releases",
"text": query,
"web": "1",
"frame": "1",
"searchid": "3131712",
}
query_params_images = {
"text": query,
"uinfo": "sw-1920-sh-1080-ww-1125-wh-999",
}
if params['pageno'] > 1:
query_params_web.update({"p": params["pageno"] - 1})
query_params_images.update({"p": params["pageno"] - 1})
params["cookies"] = {'cookie': "yp=1716337604.sp.family%3A0#1685406411.szm.1:1920x1080:1920x999"}
if search_type == 'web':
params['url'] = f"{base_url_web}?{urlencode(query_params_web)}"
elif search_type == 'images':
params['url'] = f"{base_url_images}?{urlencode(query_params_images)}"
return params
def response(resp):
if search_type == 'web':
catch_bad_response(resp)
dom = html.fromstring(resp.text)
results = []
for result in eval_xpath_list(dom, results_xpath):
results.append(
{
'url': extract_text(eval_xpath(result, url_xpath)),
'title': extract_text(eval_xpath(result, title_xpath)),
'content': extract_text(eval_xpath(result, content_xpath)),
}
)
return results
if search_type == 'images':
catch_bad_response(resp)
html_data = html.fromstring(resp.text)
html_sample = unescape(html.tostring(html_data, encoding='unicode'))
content_between_tags = extr(
html_sample, '{"location":"/images/search/', 'advRsyaSearchColumn":null}}', default="fail"
)
json_data = '{"location":"/images/search/' + content_between_tags + 'advRsyaSearchColumn":null}}'
if content_between_tags == "fail":
content_between_tags = extr(html_sample, '{"location":"/images/search/', 'false}}}')
json_data = '{"location":"/images/search/' + content_between_tags + 'false}}}'
json_resp = loads(json_data)
results = []
for _, item_data in json_resp['initialState']['serpList']['items']['entities'].items():
title = item_data['snippet']['title']
source = item_data['snippet']['url']
thumb = item_data['image']
fullsize_image = item_data['viewerData']['dups'][0]['url']
height = item_data['viewerData']['dups'][0]['h']
width = item_data['viewerData']['dups'][0]['w']
filesize = item_data['viewerData']['dups'][0]['fileSizeInBytes']
humanized_filesize = humanize_bytes(filesize)
results.append(
{
'title': title,
'url': source,
'img_src': fullsize_image,
'filesize': humanized_filesize,
'thumbnail_src': thumb,
'template': 'images.html',
'resolution': f'{width} x {height}',
}
)
return results
return []

View file

@ -1,7 +1,19 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=too-many-branches
"""In addition to rewriting/replace reslut URLs, the *hoostnames* plugin offers
other features.
"""
.. attention::
The **"Hostname replace"** plugin has been replace by **"Hostnames
plugin"**, see :pull:`3463` & :pull:`3552`.
The **Hostnames plugin** can be enabled by adding it to the
``enabled_plugins`` **list** in the ``setting.yml`` like so.
.. code:: yaml
enabled_plugins:
- 'Hostnames plugin'
...
- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
replaced by other hostnames.

View file

@ -1814,6 +1814,22 @@ engines:
engine: unsplash
shortcut: us
- name: yandex
engine: yandex
categories: general
search_type: web
shortcut: yd
disabled: true
inactive: true
- name: yandex images
engine: yandex
categories: images
search_type: images
shortcut: ydi
disabled: true
inactive: true
- name: yandex music
engine: yandex_music
shortcut: ydm

View file

@ -10,6 +10,7 @@ class TestEnginesInit(SearxTestCase): # pylint: disable=missing-class-docstring
def tearDownClass(cls):
settings['outgoing']['using_tor_proxy'] = False
settings['outgoing']['extra_proxy_timeout'] = 0
engines.load_engines([])
def test_initialize_engines_default(self):
engine_list = [

View file

@ -2,6 +2,7 @@
# pylint: disable=missing-module-docstring, invalid-name
from copy import copy
import logging
import searx.search
from searx.search import SearchQuery, EngineRef
@ -46,8 +47,13 @@ class SearchQueryTestCase(SearxTestCase): # pylint: disable=missing-class-docst
class SearchTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self):
log = logging.getLogger("searx")
log_lev = log.level
log.setLevel(logging.ERROR)
from searx import webapp # pylint: disable=import-outside-toplevel
log.setLevel(log_lev)
self.app = webapp.app
@classmethod

102
tests/unit/test_tineye.py Normal file
View file

@ -0,0 +1,102 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
from datetime import datetime
from unittest.mock import Mock
from requests import HTTPError
from searx.engines import load_engines, tineye
from tests import SearxTestCase
class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
def setUp(self):
load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}])
def tearDown(self):
load_engines([])
def test_status_code_raises(self):
response = Mock()
response.status_code = 401
response.raise_for_status.side_effect = HTTPError()
self.assertRaises(HTTPError, lambda: tineye.response(response))
def test_returns_empty_list_for_422(self):
response = Mock()
response.json.return_value = {}
response.status_code = 422
response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as _dev_null:
results = tineye.response(response)
self.assertEqual(0, len(results))
def test_logs_format_for_422(self):
response = Mock()
response.json.return_value = {"suggestions": {"key": "Invalid image URL"}}
response.status_code = 422
response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context:
tineye.response(response)
self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output))
def test_logs_signature_for_422(self):
response = Mock()
response.json.return_value = {"suggestions": {"key": "NO_SIGNATURE_ERROR"}}
response.status_code = 422
response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context:
tineye.response(response)
self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output))
def test_logs_download_for_422(self):
response = Mock()
response.json.return_value = {"suggestions": {"key": "Download Error"}}
response.status_code = 422
response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context:
tineye.response(response)
self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output))
def test_empty_list_for_400(self):
response = Mock()
response.json.return_value = {}
response.status_code = 400
response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as _dev_null:
results = tineye.response(response)
self.assertEqual(0, len(results))
def test_logs_description_for_400(self):
description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645'
response = Mock()
response.json.return_value = {"suggestions": {"description": [description], "title": "Oops! We're sorry!"}}
response.status_code = 400
response.raise_for_status.side_effect = HTTPError()
with self.assertLogs(tineye.logger) as assert_logs_context:
tineye.response(response)
self.assertIn(description, ','.join(assert_logs_context.output))
def test_crawl_date_parses(self):
date_str = '2020-05-25'
date = datetime.strptime(date_str, '%Y-%m-%d')
response = Mock()
response.json.return_value = {
'matches': [
{
'backlinks': [
{
'crawl_date': date_str,
}
]
}
]
}
response.status_code = 200
results = tineye.response(response)
self.assertEqual(date, results[0]['publishedDate'])

View file

@ -1,6 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
import logging
import json
from urllib.parse import ParseResult
from mock import Mock
@ -20,8 +21,13 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=missing-class-docstring,
self.setattr4test(searx.search.processors, 'initialize_processor', dummy)
log = logging.getLogger("searx")
log_lev = log.level
log.setLevel(logging.ERROR)
from searx import webapp # pylint: disable=import-outside-toplevel
log.setLevel(log_lev)
webapp.app.config['TESTING'] = True # to get better error messages
self.app = webapp.app.test_client()

View file

@ -663,8 +663,8 @@ pyenv.install() {
pyenv
fi
for i in ${PYOBJECTS}; do
build_msg PYENV "[install] pip install -e '$i${PY_SETUP_EXTRAS}'"
"${PY_ENV_BIN}/python" -m pip install -e "$i${PY_SETUP_EXTRAS}"
build_msg PYENV "[install] pip install --use-pep517 --no-build-isolation -e '$i${PY_SETUP_EXTRAS}'"
"${PY_ENV_BIN}/python" -m pip install --use-pep517 --no-build-isolation -e "$i${PY_SETUP_EXTRAS}"
done
fi
pyenv.install.OK

View file

@ -501,7 +501,7 @@ pip install -U setuptools
pip install -U wheel
pip install -U pyyaml
cd ${SEARXNG_SRC}
pip install -e .
pip install --use-pep517 --no-build-isolation -e .
EOF
}
@ -569,7 +569,7 @@ pip install -U pip
pip install -U setuptools
pip install -U wheel
pip install -U pyyaml
pip install -U -e .
pip install -U --use-pep517 --no-build-isolation -e .
EOF
rst_para "update instance's settings.yml from ${SEARXNG_SETTINGS_PATH}"
DEFAULT_SELECT=2 \