mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
Merge remote-tracking branch 'origin/latesto'
This commit is contained in:
commit
d008d78cd6
408 changed files with 96618 additions and 55272 deletions
|
|
@ -3,6 +3,7 @@
|
|||
"""This module implements functions needed for the autocompleter.
|
||||
|
||||
"""
|
||||
# pylint: disable=use-dict-literal
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
|
|
@ -89,17 +90,24 @@ def seznam(query, _lang):
|
|||
# seznam search autocompleter
|
||||
url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
|
||||
|
||||
resp = get(url.format(query=urlencode(
|
||||
{'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
|
||||
)))
|
||||
resp = get(
|
||||
url.format(
|
||||
query=urlencode(
|
||||
{'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if not resp.ok:
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
return [''.join(
|
||||
[part.get('text', '') for part in item.get('text', [])]
|
||||
) for item in data.get('result', []) if item.get('itemType', None) == 'ItemType.TEXT']
|
||||
return [
|
||||
''.join([part.get('text', '') for part in item.get('text', [])])
|
||||
for item in data.get('result', [])
|
||||
if item.get('itemType', None) == 'ItemType.TEXT'
|
||||
]
|
||||
|
||||
|
||||
def startpage(query, lang):
|
||||
# startpage autocompleter
|
||||
|
|
@ -145,6 +153,16 @@ def wikipedia(query, lang):
|
|||
return []
|
||||
|
||||
|
||||
def yandex(query, _lang):
|
||||
# yandex autocompleter
|
||||
url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
|
||||
|
||||
resp = loads(get(url.format(urlencode(dict(part=query)))).text)
|
||||
if len(resp) > 1:
|
||||
return resp[1]
|
||||
return []
|
||||
|
||||
|
||||
backends = {
|
||||
'dbpedia': dbpedia,
|
||||
'duckduckgo': duckduckgo,
|
||||
|
|
@ -155,6 +173,7 @@ backends = {
|
|||
'qwant': qwant,
|
||||
'wikipedia': wikipedia,
|
||||
'brave': brave,
|
||||
'yandex': yandex,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,11 @@
|
|||
# pylint: disable=C,R
|
||||
|
||||
|
||||
__all__ = ('cached_property',)
|
||||
|
||||
|
||||
try:
|
||||
from functools import cached_property # pylint: disable=unused-import
|
||||
from functools import cached_property # type: ignore
|
||||
|
||||
except ImportError:
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because one or more lines are too long
|
|
@ -1396,170 +1396,155 @@
|
|||
"sv",
|
||||
"zh"
|
||||
],
|
||||
"qwant": [
|
||||
"bg-BG",
|
||||
"ca-ES",
|
||||
"cs-CZ",
|
||||
"da-DK",
|
||||
"de-AT",
|
||||
"de-CH",
|
||||
"de-DE",
|
||||
"el-GR",
|
||||
"en-AU",
|
||||
"en-CA",
|
||||
"en-GB",
|
||||
"en-IE",
|
||||
"en-MY",
|
||||
"en-NZ",
|
||||
"en-US",
|
||||
"es-AR",
|
||||
"es-CL",
|
||||
"es-ES",
|
||||
"es-MX",
|
||||
"et-EE",
|
||||
"fi-FI",
|
||||
"fr-BE",
|
||||
"fr-CA",
|
||||
"fr-CH",
|
||||
"fr-FR",
|
||||
"hu-HU",
|
||||
"it-CH",
|
||||
"it-IT",
|
||||
"ko-KR",
|
||||
"nb-NO",
|
||||
"nl-BE",
|
||||
"nl-NL",
|
||||
"pl-PL",
|
||||
"pt-PT",
|
||||
"ro-RO",
|
||||
"sv-SE",
|
||||
"th-TH",
|
||||
"zh-CN",
|
||||
"zh-HK"
|
||||
],
|
||||
"qwant images": [
|
||||
"bg-BG",
|
||||
"ca-ES",
|
||||
"cs-CZ",
|
||||
"da-DK",
|
||||
"de-AT",
|
||||
"de-CH",
|
||||
"de-DE",
|
||||
"el-GR",
|
||||
"en-AU",
|
||||
"en-CA",
|
||||
"en-GB",
|
||||
"en-IE",
|
||||
"en-MY",
|
||||
"en-NZ",
|
||||
"en-US",
|
||||
"es-AR",
|
||||
"es-CL",
|
||||
"es-ES",
|
||||
"es-MX",
|
||||
"et-EE",
|
||||
"fi-FI",
|
||||
"fr-BE",
|
||||
"fr-CA",
|
||||
"fr-CH",
|
||||
"fr-FR",
|
||||
"hu-HU",
|
||||
"it-CH",
|
||||
"it-IT",
|
||||
"ko-KR",
|
||||
"nb-NO",
|
||||
"nl-BE",
|
||||
"nl-NL",
|
||||
"pl-PL",
|
||||
"pt-PT",
|
||||
"ro-RO",
|
||||
"sv-SE",
|
||||
"th-TH",
|
||||
"zh-CN",
|
||||
"zh-HK"
|
||||
],
|
||||
"qwant news": [
|
||||
"bg-BG",
|
||||
"ca-ES",
|
||||
"cs-CZ",
|
||||
"da-DK",
|
||||
"de-AT",
|
||||
"de-CH",
|
||||
"de-DE",
|
||||
"el-GR",
|
||||
"en-AU",
|
||||
"en-CA",
|
||||
"en-GB",
|
||||
"en-IE",
|
||||
"en-MY",
|
||||
"en-NZ",
|
||||
"en-US",
|
||||
"es-AR",
|
||||
"es-CL",
|
||||
"es-ES",
|
||||
"es-MX",
|
||||
"et-EE",
|
||||
"fi-FI",
|
||||
"fr-BE",
|
||||
"fr-CA",
|
||||
"fr-CH",
|
||||
"fr-FR",
|
||||
"hu-HU",
|
||||
"it-CH",
|
||||
"it-IT",
|
||||
"ko-KR",
|
||||
"nb-NO",
|
||||
"nl-BE",
|
||||
"nl-NL",
|
||||
"pl-PL",
|
||||
"pt-PT",
|
||||
"ro-RO",
|
||||
"sv-SE",
|
||||
"th-TH",
|
||||
"zh-CN",
|
||||
"zh-HK"
|
||||
],
|
||||
"qwant videos": [
|
||||
"bg-BG",
|
||||
"ca-ES",
|
||||
"cs-CZ",
|
||||
"da-DK",
|
||||
"de-AT",
|
||||
"de-CH",
|
||||
"de-DE",
|
||||
"el-GR",
|
||||
"en-AU",
|
||||
"en-CA",
|
||||
"en-GB",
|
||||
"en-IE",
|
||||
"en-MY",
|
||||
"en-NZ",
|
||||
"en-US",
|
||||
"es-AR",
|
||||
"es-CL",
|
||||
"es-ES",
|
||||
"es-MX",
|
||||
"et-EE",
|
||||
"fi-FI",
|
||||
"fr-BE",
|
||||
"fr-CA",
|
||||
"fr-CH",
|
||||
"fr-FR",
|
||||
"hu-HU",
|
||||
"it-CH",
|
||||
"it-IT",
|
||||
"ko-KR",
|
||||
"nb-NO",
|
||||
"nl-BE",
|
||||
"nl-NL",
|
||||
"pl-PL",
|
||||
"pt-PT",
|
||||
"ro-RO",
|
||||
"sv-SE",
|
||||
"th-TH",
|
||||
"zh-CN",
|
||||
"zh-HK"
|
||||
],
|
||||
"qwant": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"qwant images": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"qwant news": {
|
||||
"ca-ES": "ca_ES",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pt-PT": "pt_PT"
|
||||
},
|
||||
"qwant videos": {
|
||||
"bg-BG": "bg_BG",
|
||||
"ca-ES": "ca_ES",
|
||||
"cs-CZ": "cs_CZ",
|
||||
"da-DK": "da_DK",
|
||||
"de-AT": "de_AT",
|
||||
"de-CH": "de_CH",
|
||||
"de-DE": "de_DE",
|
||||
"el-GR": "el_GR",
|
||||
"en-AU": "en_AU",
|
||||
"en-CA": "en_CA",
|
||||
"en-GB": "en_GB",
|
||||
"en-IE": "en_IE",
|
||||
"en-MY": "en_MY",
|
||||
"en-NZ": "en_NZ",
|
||||
"en-US": "en_US",
|
||||
"es-AR": "es_AR",
|
||||
"es-CL": "es_CL",
|
||||
"es-ES": "es_ES",
|
||||
"es-MX": "es_MX",
|
||||
"et-EE": "et_EE",
|
||||
"fi-FI": "fi_FI",
|
||||
"fr-BE": "fr_BE",
|
||||
"fr-CA": "fr_CA",
|
||||
"fr-CH": "fr_CH",
|
||||
"fr-FR": "fr_FR",
|
||||
"hu-HU": "hu_HU",
|
||||
"it-CH": "it_CH",
|
||||
"it-IT": "it_IT",
|
||||
"ko-KR": "ko_KR",
|
||||
"nb-NO": "nb_NO",
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pl-PL": "pl_PL",
|
||||
"pt-PT": "pt_PT",
|
||||
"ro-RO": "ro_RO",
|
||||
"sv-SE": "sv_SE",
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"startpage": {
|
||||
"af": {
|
||||
"alias": "afrikaans"
|
||||
|
|
@ -1952,6 +1937,10 @@
|
|||
"english_name": "Banjar",
|
||||
"name": "Bahasa Banjar"
|
||||
},
|
||||
"blk": {
|
||||
"english_name": "Pa'O",
|
||||
"name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f"
|
||||
},
|
||||
"bm": {
|
||||
"english_name": "Bambara",
|
||||
"name": "Bamanankan"
|
||||
|
|
@ -2352,6 +2341,10 @@
|
|||
"english_name": "Kabiye",
|
||||
"name": "Kab\u0269y\u025b"
|
||||
},
|
||||
"kcg": {
|
||||
"english_name": "Tyap",
|
||||
"name": "Tyap"
|
||||
},
|
||||
"kg": {
|
||||
"english_name": "Kongo",
|
||||
"name": "Kik\u00f4ngo"
|
||||
|
|
@ -2668,6 +2661,10 @@
|
|||
"english_name": "Picard",
|
||||
"name": "Picard"
|
||||
},
|
||||
"pcm": {
|
||||
"english_name": "Nigerian Pidgin",
|
||||
"name": "Naij\u00e1"
|
||||
},
|
||||
"pdc": {
|
||||
"english_name": "Pennsylvania German",
|
||||
"name": "Deitsch"
|
||||
|
|
@ -3214,6 +3211,10 @@
|
|||
"english_name": "Banjar",
|
||||
"name": "Bahasa Banjar"
|
||||
},
|
||||
"blk": {
|
||||
"english_name": "Pa'O",
|
||||
"name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f"
|
||||
},
|
||||
"bm": {
|
||||
"english_name": "Bambara",
|
||||
"name": "Bamanankan"
|
||||
|
|
@ -3614,6 +3615,10 @@
|
|||
"english_name": "Kabiye",
|
||||
"name": "Kab\u0269y\u025b"
|
||||
},
|
||||
"kcg": {
|
||||
"english_name": "Tyap",
|
||||
"name": "Tyap"
|
||||
},
|
||||
"kg": {
|
||||
"english_name": "Kongo",
|
||||
"name": "Kik\u00f4ngo"
|
||||
|
|
@ -3930,6 +3935,10 @@
|
|||
"english_name": "Picard",
|
||||
"name": "Picard"
|
||||
},
|
||||
"pcm": {
|
||||
"english_name": "Nigerian Pidgin",
|
||||
"name": "Naij\u00e1"
|
||||
},
|
||||
"pdc": {
|
||||
"english_name": "Pennsylvania German",
|
||||
"name": "Deitsch"
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
BIN
searx/data/lid.176.ftz
Executable file
BIN
searx/data/lid.176.ftz
Executable file
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
|
@ -1,10 +1,7 @@
|
|||
{
|
||||
"versions": [
|
||||
"99.0.1",
|
||||
"99.0",
|
||||
"98.0.2",
|
||||
"98.0.1",
|
||||
"98.0"
|
||||
"109.0",
|
||||
"108.0"
|
||||
],
|
||||
"os": [
|
||||
"Windows NT 10.0; Win64; x64",
|
||||
|
|
|
|||
|
|
@ -114,7 +114,6 @@
|
|||
"Q106645257": "MN m",
|
||||
"Q106645261": "kN m",
|
||||
"Q106645290": "dN m",
|
||||
"Q106647058": "u",
|
||||
"Q1067722": "Fg",
|
||||
"Q106777906": "μS/m",
|
||||
"Q106777917": "S/cm",
|
||||
|
|
@ -154,7 +153,6 @@
|
|||
"Q107164998": "cd mm²/m²",
|
||||
"Q107210119": "g/s",
|
||||
"Q107210344": "mg/s",
|
||||
"Q107213614": "kJ/100g",
|
||||
"Q107226391": "cm⁻¹",
|
||||
"Q1072404": "K",
|
||||
"Q107244316": "mm⁻¹",
|
||||
|
|
@ -209,16 +207,45 @@
|
|||
"Q1091257": "tex",
|
||||
"Q1092296": "a",
|
||||
"Q110143852": "Ω cm",
|
||||
"Q110143896": "cm³/g",
|
||||
"Q1104069": "$",
|
||||
"Q11061003": "μm²",
|
||||
"Q11061005": "nm²",
|
||||
"Q110742003": "dppx",
|
||||
"Q1131660": "st",
|
||||
"Q1137675": "cr",
|
||||
"Q114002440": "𒄀",
|
||||
"Q114002534": "𒃻",
|
||||
"Q114002568": "𒂠",
|
||||
"Q114002639": "𒈨𒊑",
|
||||
"Q114002688": "𒋗𒋛",
|
||||
"Q114002734": "𒊺",
|
||||
"Q114002796": "𒂆",
|
||||
"Q114002897": "𒊬",
|
||||
"Q114002930": "𒀺",
|
||||
"Q114002955": "𒀹𒃷",
|
||||
"Q114002974": "𒃷",
|
||||
"Q114002998": "𒁓",
|
||||
"Q114018694": "𒄥",
|
||||
"Q114018781": "𒁀𒌷𒂵",
|
||||
"Q1140444": "Zb",
|
||||
"Q1140577": "Yb",
|
||||
"Q114589269": "A",
|
||||
"Q1152074": "Pb",
|
||||
"Q1152323": "Tb",
|
||||
"Q115277430": "QB",
|
||||
"Q115280832": "RB",
|
||||
"Q115359862": "qg",
|
||||
"Q115359863": "rg",
|
||||
"Q115359865": "Rg",
|
||||
"Q115359866": "Qg",
|
||||
"Q115359910": "Rm",
|
||||
"Q115533751": "rm",
|
||||
"Q115533764": "qm",
|
||||
"Q115533776": "Qm",
|
||||
"Q116432446": "ᵐ",
|
||||
"Q116432563": "ˢ",
|
||||
"Q116443090": "ʰ",
|
||||
"Q1165799": "mil",
|
||||
"Q11776930": "Mg",
|
||||
"Q11830636": "psf",
|
||||
|
|
@ -237,12 +264,14 @@
|
|||
"Q12257695": "Eb/s",
|
||||
"Q12257696": "EB/s",
|
||||
"Q12261466": "kB/s",
|
||||
"Q12263659": "mgal",
|
||||
"Q12265780": "Pb/s",
|
||||
"Q12265783": "PB/s",
|
||||
"Q12269121": "Yb/s",
|
||||
"Q12269122": "YB/s",
|
||||
"Q12269308": "Zb/s",
|
||||
"Q12269309": "ZB/s",
|
||||
"Q1238720": "vols.",
|
||||
"Q1247300": "cm H₂O",
|
||||
"Q12714022": "cwt",
|
||||
"Q12789864": "GeV",
|
||||
|
|
@ -283,7 +312,6 @@
|
|||
"Q14914907": "th",
|
||||
"Q14916719": "Gpc",
|
||||
"Q14923662": "Pm³",
|
||||
"Q1511773": "LSd",
|
||||
"Q15120301": "l atm",
|
||||
"Q1542309": "xu",
|
||||
"Q1545979": "ft³",
|
||||
|
|
@ -305,7 +333,6 @@
|
|||
"Q17255465": "v_P",
|
||||
"Q173117": "R$",
|
||||
"Q1741429": "kpm",
|
||||
"Q174467": "Lm",
|
||||
"Q174728": "cm",
|
||||
"Q174789": "mm",
|
||||
"Q175821": "μm",
|
||||
|
|
@ -329,13 +356,11 @@
|
|||
"Q182429": "m/s",
|
||||
"Q1826195": "dl",
|
||||
"Q18413919": "cm/s",
|
||||
"Q184172": "F",
|
||||
"Q185078": "a",
|
||||
"Q185153": "erg",
|
||||
"Q185648": "Torr",
|
||||
"Q185759": "span",
|
||||
"Q1872619": "zs",
|
||||
"Q189097": "₧",
|
||||
"Q190095": "Gy",
|
||||
"Q19017495": "mm²",
|
||||
"Q190951": "S$",
|
||||
|
|
@ -351,6 +376,7 @@
|
|||
"Q194339": "B$",
|
||||
"Q1970718": "mam",
|
||||
"Q1972579": "pdl",
|
||||
"Q19877834": "cd-ft",
|
||||
"Q199462": "LE",
|
||||
"Q199471": "Afs",
|
||||
"Q200323": "dm",
|
||||
|
|
@ -389,7 +415,7 @@
|
|||
"Q211256": "mi/h",
|
||||
"Q21154419": "PD",
|
||||
"Q211580": "BTU (th)",
|
||||
"Q212120": "A h",
|
||||
"Q212120": "A⋅h",
|
||||
"Q213005": "G$",
|
||||
"Q2140397": "in³",
|
||||
"Q214377": "ell",
|
||||
|
|
@ -429,7 +455,6 @@
|
|||
"Q23931040": "dam²",
|
||||
"Q23931103": "nmi²",
|
||||
"Q240468": "syr£",
|
||||
"Q2414435": "$b.",
|
||||
"Q242988": "Lib$",
|
||||
"Q2438073": "ag",
|
||||
"Q2448803": "mV",
|
||||
|
|
@ -507,6 +532,7 @@
|
|||
"Q3013059": "ka",
|
||||
"Q304479": "tr",
|
||||
"Q305896": "DPI",
|
||||
"Q3095010": "γ",
|
||||
"Q31889818": "ppq",
|
||||
"Q3194304": "kb",
|
||||
"Q3207456": "mW",
|
||||
|
|
@ -546,7 +572,7 @@
|
|||
"Q3773454": "Mpc",
|
||||
"Q3815076": "Kib",
|
||||
"Q3833309": "£",
|
||||
"Q3858002": "mA h",
|
||||
"Q3858002": "mA⋅h",
|
||||
"Q3867152": "ft/s²",
|
||||
"Q389062": "Tib",
|
||||
"Q3902688": "pl",
|
||||
|
|
@ -607,6 +633,8 @@
|
|||
"Q53393868": "GJ",
|
||||
"Q53393886": "PJ",
|
||||
"Q53393890": "EJ",
|
||||
"Q53393893": "ZJ",
|
||||
"Q53393898": "YJ",
|
||||
"Q53448786": "yHz",
|
||||
"Q53448790": "zHz",
|
||||
"Q53448794": "fHz",
|
||||
|
|
@ -620,6 +648,7 @@
|
|||
"Q53448826": "hHz",
|
||||
"Q53448828": "yJ",
|
||||
"Q53448832": "zJ",
|
||||
"Q53448835": "fJ",
|
||||
"Q53448842": "pJ",
|
||||
"Q53448844": "nJ",
|
||||
"Q53448847": "μJ",
|
||||
|
|
@ -682,6 +711,7 @@
|
|||
"Q53951982": "Mt",
|
||||
"Q53952048": "kt",
|
||||
"Q54006645": "ZWb",
|
||||
"Q54081354": "ZT",
|
||||
"Q54081925": "ZSv",
|
||||
"Q54082468": "ZS",
|
||||
"Q54083144": "ZΩ",
|
||||
|
|
@ -706,8 +736,6 @@
|
|||
"Q56157046": "nmol",
|
||||
"Q56157048": "pmol",
|
||||
"Q56160603": "fmol",
|
||||
"Q56302633": "UM",
|
||||
"Q56317116": "mgal",
|
||||
"Q56317622": "Q_P",
|
||||
"Q56318907": "kbar",
|
||||
"Q56349362": "Bs.S",
|
||||
|
|
@ -1184,10 +1212,10 @@
|
|||
"Q11570": "kg",
|
||||
"Q11573": "m",
|
||||
"Q11574": "s",
|
||||
"Q11579": "K",
|
||||
"Q11582": "L",
|
||||
"Q12129": "pc",
|
||||
"Q12438": "N",
|
||||
"Q16068": "DM",
|
||||
"Q1811": "AU",
|
||||
"Q20764": "Ma",
|
||||
"Q2101": "e",
|
||||
|
|
|
|||
77
searx/engines/9gag.py
Normal file
77
searx/engines/9gag.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
# pylint: disable=invalid-name
|
||||
"""9GAG (social media)"""
|
||||
|
||||
from json import loads
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
|
||||
about = {
|
||||
"website": 'https://9gag.com/',
|
||||
"wikidata_id": 'Q277421',
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['social media']
|
||||
paging = True
|
||||
|
||||
search_url = "https://9gag.com/v1/search-posts?{query}"
|
||||
page_size = 10
|
||||
|
||||
|
||||
def request(query, params):
|
||||
query = urlencode({'query': query, 'c': (params['pageno'] - 1) * page_size})
|
||||
|
||||
params['url'] = search_url.format(query=query)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json_results = loads(resp.text)['data']
|
||||
|
||||
for result in json_results['posts']:
|
||||
result_type = result['type']
|
||||
|
||||
# Get the not cropped version of the thumbnail when the image height is not too important
|
||||
if result['images']['image700']['height'] > 400:
|
||||
thumbnail = result['images']['imageFbThumbnail']['url']
|
||||
else:
|
||||
thumbnail = result['images']['image700']['url']
|
||||
|
||||
if result_type == 'Photo':
|
||||
results.append(
|
||||
{
|
||||
'template': 'images.html',
|
||||
'url': result['url'],
|
||||
'title': result['title'],
|
||||
'content': result['description'],
|
||||
'publishedDate': datetime.utcfromtimestamp(result['creationTs']),
|
||||
'img_src': result['images']['image700']['url'],
|
||||
'thumbnail_src': thumbnail,
|
||||
}
|
||||
)
|
||||
elif result_type == 'Animated':
|
||||
results.append(
|
||||
{
|
||||
'template': 'videos.html',
|
||||
'url': result['url'],
|
||||
'title': result['title'],
|
||||
'content': result['description'],
|
||||
'publishedDate': datetime.utcfromtimestamp(result['creationTs']),
|
||||
'thumbnail': thumbnail,
|
||||
'iframe_src': result['images'].get('image460sv', {}).get('url'),
|
||||
}
|
||||
)
|
||||
|
||||
if 'tags' in json_results:
|
||||
for suggestion in json_results['tags']:
|
||||
results.append({'suggestion': suggestion['key']})
|
||||
|
||||
return results
|
||||
|
|
@ -44,6 +44,7 @@ ENGINE_DEFAULT_ARGS = {
|
|||
"enable_http": False,
|
||||
"using_tor_proxy": False,
|
||||
"display_error_messages": True,
|
||||
"send_accept_language_header": False,
|
||||
"tokens": [],
|
||||
"about": {},
|
||||
}
|
||||
|
|
@ -80,6 +81,7 @@ engine_shortcuts = {}
|
|||
|
||||
engine_shortcuts[engine.shortcut] = engine.name
|
||||
|
||||
:meta hide-value:
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -104,8 +106,12 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
|
|||
- required attribute is not set :py:func:`is_missing_required_attributes`
|
||||
|
||||
"""
|
||||
# pylint: disable=too-many-return-statements
|
||||
|
||||
engine_name = engine_data['name']
|
||||
engine_name = engine_data.get('name')
|
||||
if engine_name is None:
|
||||
logger.error('An engine does not have a "name" field')
|
||||
return None
|
||||
if '_' in engine_name:
|
||||
logger.error('Engine name contains underscore: "{}"'.format(engine_name))
|
||||
return None
|
||||
|
|
@ -116,7 +122,10 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
|
|||
engine_data['name'] = engine_name
|
||||
|
||||
# load_module
|
||||
engine_module = engine_data['engine']
|
||||
engine_module = engine_data.get('engine')
|
||||
if engine_module is None:
|
||||
logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name))
|
||||
return None
|
||||
try:
|
||||
engine = load_module(engine_module + '.py', ENGINE_DIR)
|
||||
except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
|
||||
|
|
@ -149,7 +158,11 @@ def set_loggers(engine, engine_name):
|
|||
engine.logger = logger.getChild(engine_name)
|
||||
# the engine may have load some other engines
|
||||
# may sure the logger is initialized
|
||||
for module_name, module in sys.modules.items():
|
||||
# use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"
|
||||
# see https://github.com/python/cpython/issues/89516
|
||||
# and https://docs.python.org/3.10/library/sys.html#sys.modules
|
||||
modules = sys.modules.copy()
|
||||
for module_name, module in modules.items():
|
||||
if (
|
||||
module_name.startswith("searx.engines")
|
||||
and module_name != "searx.engines.__init__"
|
||||
|
|
@ -269,12 +282,12 @@ def is_engine_active(engine: Engine):
|
|||
|
||||
def register_engine(engine: Engine):
|
||||
if engine.name in engines:
|
||||
logger.error('Engine config error: ambigious name: {0}'.format(engine.name))
|
||||
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
|
||||
sys.exit(1)
|
||||
engines[engine.name] = engine
|
||||
|
||||
if engine.shortcut in engine_shortcuts:
|
||||
logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
|
||||
logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut))
|
||||
sys.exit(1)
|
||||
engine_shortcuts[engine.shortcut] = engine.name
|
||||
|
||||
|
|
|
|||
57
searx/engines/apple_app_store.py
Normal file
57
searx/engines/apple_app_store.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""
|
||||
Apple App Store
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from dateutil.parser import parse
|
||||
|
||||
about = {
|
||||
"website": 'https://www.apple.com/app-store/',
|
||||
"wikidata_id": 'Q368215',
|
||||
"official_api_documentation": (
|
||||
'https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/'
|
||||
'iTuneSearchAPI/UnderstandingSearchResults.html#//apple_ref/doc/uid/TP40017632-CH8-SW1'
|
||||
),
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['files', 'apps']
|
||||
safesearch = True
|
||||
|
||||
search_url = 'https://itunes.apple.com/search?{query}'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
explicit = "Yes"
|
||||
|
||||
if params['safesearch'] > 0:
|
||||
explicit = "No"
|
||||
|
||||
params['url'] = search_url.format(query=urlencode({'term': query, 'media': 'software', 'explicit': explicit}))
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json_result = loads(resp.text)
|
||||
|
||||
for result in json_result['results']:
|
||||
results.append(
|
||||
{
|
||||
'url': result['trackViewUrl'],
|
||||
'title': result['trackName'],
|
||||
'content': result['description'],
|
||||
'img_src': result['artworkUrl100'],
|
||||
'publishedDate': parse(result['currentVersionReleaseDate']),
|
||||
'author': result['sellerName'],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
113
searx/engines/apple_maps.py
Normal file
113
searx/engines/apple_maps.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Apple Maps"""
|
||||
|
||||
from json import loads
|
||||
from time import time
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.network import get as http_get
|
||||
from searx.engines.openstreetmap import get_key_label
|
||||
|
||||
about = {
|
||||
"website": 'https://www.apple.com/maps/',
|
||||
"wikidata_id": 'Q276101',
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
token = {'value': '', 'last_updated': None}
|
||||
|
||||
categories = ['map']
|
||||
paging = False
|
||||
|
||||
search_url = "https://api.apple-mapkit.com/v1/search?{query}&mkjsVersion=5.72.53"
|
||||
|
||||
|
||||
def obtain_token():
|
||||
update_time = time() - (time() % 1800)
|
||||
try:
|
||||
# use duckduckgo's mapkit token
|
||||
token_response = http_get('https://duckduckgo.com/local.js?get_mk_token=1', timeout=2.0)
|
||||
actual_token = http_get(
|
||||
'https://cdn.apple-mapkit.com/ma/bootstrap?apiVersion=2&mkjsVersion=5.72.53&poi=1',
|
||||
timeout=2.0,
|
||||
headers={'Authorization': 'Bearer ' + token_response.text},
|
||||
)
|
||||
token['value'] = loads(actual_token.text)['authInfo']['access_token']
|
||||
token['last_updated'] = update_time
|
||||
# pylint: disable=bare-except
|
||||
except:
|
||||
pass
|
||||
return token
|
||||
|
||||
|
||||
def request(query, params):
|
||||
if time() - (token['last_updated'] or 0) > 1800:
|
||||
obtain_token()
|
||||
|
||||
params['url'] = search_url.format(query=urlencode({'q': query, 'lang': params['language']}))
|
||||
|
||||
params['headers'] = {'Authorization': 'Bearer ' + token['value']}
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
resp_json = loads(resp.text)
|
||||
|
||||
user_language = resp.search_params['language']
|
||||
|
||||
for result in resp_json['results']:
|
||||
boundingbox = None
|
||||
if 'displayMapRegion' in result:
|
||||
box = result['displayMapRegion']
|
||||
boundingbox = [box['southLat'], box['northLat'], box['westLng'], box['eastLng']]
|
||||
|
||||
links = []
|
||||
if 'telephone' in result:
|
||||
telephone = result['telephone']
|
||||
links.append(
|
||||
{
|
||||
'label': get_key_label('phone', user_language),
|
||||
'url': 'tel:' + telephone,
|
||||
'url_label': telephone,
|
||||
}
|
||||
)
|
||||
if result.get('urls'):
|
||||
url = result['urls'][0]
|
||||
links.append(
|
||||
{
|
||||
'label': get_key_label('website', user_language),
|
||||
'url': url,
|
||||
'url_label': url,
|
||||
}
|
||||
)
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'map.html',
|
||||
'type': result.get('poiCategory'),
|
||||
'title': result['name'],
|
||||
'links': links,
|
||||
'latitude': result['center']['lat'],
|
||||
'longitude': result['center']['lng'],
|
||||
'url': result['placecardUrl'],
|
||||
'boundingbox': boundingbox,
|
||||
'geojson': {'type': 'Point', 'coordinates': [result['center']['lng'], result['center']['lat']]},
|
||||
'address': {
|
||||
'name': result['name'],
|
||||
'house_number': result.get('subThoroughfare'),
|
||||
'road': result.get('thoroughfare'),
|
||||
'locality': result.get('locality'),
|
||||
'postcode': result.get('postCode'),
|
||||
'country': result.get('country'),
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -3,9 +3,10 @@
|
|||
ArXiV (Scientific preprints)
|
||||
"""
|
||||
|
||||
from lxml import html
|
||||
from lxml import etree
|
||||
from lxml.etree import XPath
|
||||
from datetime import datetime
|
||||
from searx.utils import eval_xpath_list, eval_xpath_getindex
|
||||
from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
@ -17,7 +18,7 @@ about = {
|
|||
"results": 'XML-RSS',
|
||||
}
|
||||
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
|
||||
base_url = (
|
||||
|
|
@ -27,6 +28,23 @@ base_url = (
|
|||
# engine dependent config
|
||||
number_of_results = 10
|
||||
|
||||
# xpaths
|
||||
arxiv_namespaces = {
|
||||
"atom": "http://www.w3.org/2005/Atom",
|
||||
"arxiv": "http://arxiv.org/schemas/atom",
|
||||
}
|
||||
xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
|
||||
xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
|
||||
xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
|
||||
xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
|
||||
xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
|
||||
xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
|
||||
xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
|
||||
xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
|
||||
xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
|
||||
xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
|
||||
xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
# basic search
|
||||
|
|
@ -41,30 +59,50 @@ def request(query, params):
|
|||
|
||||
def response(resp):
|
||||
results = []
|
||||
dom = etree.fromstring(resp.content)
|
||||
for entry in eval_xpath_list(dom, xpath_entry):
|
||||
title = eval_xpath_getindex(entry, xpath_title, 0).text
|
||||
|
||||
dom = html.fromstring(resp.content)
|
||||
url = eval_xpath_getindex(entry, xpath_id, 0).text
|
||||
abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
|
||||
|
||||
for entry in eval_xpath_list(dom, '//entry'):
|
||||
title = eval_xpath_getindex(entry, './/title', 0).text
|
||||
authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
|
||||
|
||||
url = eval_xpath_getindex(entry, './/id', 0).text
|
||||
# doi
|
||||
doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
|
||||
doi = None if doi_element is None else doi_element.text
|
||||
|
||||
content_string = '{doi_content}{abstract_content}'
|
||||
# pdf
|
||||
pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
|
||||
pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
|
||||
|
||||
abstract = eval_xpath_getindex(entry, './/summary', 0).text
|
||||
# journal
|
||||
journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
|
||||
journal = None if journal_element is None else journal_element.text
|
||||
|
||||
# If a doi is available, add it to the snipppet
|
||||
doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None)
|
||||
doi_content = doi_element.text if doi_element is not None else ''
|
||||
content = content_string.format(doi_content=doi_content, abstract_content=abstract)
|
||||
# tags
|
||||
tag_elements = eval_xpath(entry, xpath_category)
|
||||
tags = [str(tag) for tag in tag_elements]
|
||||
|
||||
if len(content) > 300:
|
||||
content = content[0:300] + "..."
|
||||
# TODO: center snippet on query term
|
||||
# comments
|
||||
comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
|
||||
comments = None if comments_elements is None else comments_elements.text
|
||||
|
||||
publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ')
|
||||
publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
|
||||
|
||||
res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
|
||||
res_dict = {
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'publishedDate': publishedDate,
|
||||
'content': abstract,
|
||||
'doi': doi,
|
||||
'authors': authors,
|
||||
'journal': journal,
|
||||
'tags': tags,
|
||||
'comments': comments,
|
||||
'pdf_url': pdf_url,
|
||||
}
|
||||
|
||||
results.append(res_dict)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,11 +4,13 @@
|
|||
|
||||
- https://github.com/searx/searx/issues/2019#issuecomment-648227442
|
||||
"""
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
import re
|
||||
from urllib.parse import urlencode, urlparse, parse_qs
|
||||
from lxml import html
|
||||
from searx.utils import eval_xpath, extract_text, match_language
|
||||
from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language, eval_xpath_getindex
|
||||
from searx.network import multi_requests, Request
|
||||
|
||||
about = {
|
||||
"website": 'https://www.bing.com',
|
||||
|
|
@ -24,6 +26,7 @@ categories = ['general', 'web']
|
|||
paging = True
|
||||
time_range_support = False
|
||||
safesearch = False
|
||||
send_accept_language_header = True
|
||||
supported_languages_url = 'https://www.bing.com/account/general'
|
||||
language_aliases = {}
|
||||
|
||||
|
|
@ -67,42 +70,71 @@ def request(query, params):
|
|||
logger.debug("headers.Referer --> %s", referer)
|
||||
|
||||
params['url'] = base_url + search_path
|
||||
params['headers']['Accept-Language'] = "en-US,en;q=0.5"
|
||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
|
||||
results = []
|
||||
result_len = 0
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in eval_xpath(dom, '//div[@class="sa_cc"]'):
|
||||
|
||||
# IMO //div[@class="sa_cc"] does no longer match
|
||||
logger.debug('found //div[@class="sa_cc"] --> %s', result)
|
||||
|
||||
link = eval_xpath(result, './/h3/a')[0]
|
||||
url = link.attrib.get('href')
|
||||
title = extract_text(link)
|
||||
content = extract_text(eval_xpath(result, './/p'))
|
||||
|
||||
# append result
|
||||
results.append({'url': url, 'title': title, 'content': content})
|
||||
|
||||
# parse results again if nothing is found yet
|
||||
for result in eval_xpath(dom, '//li[@class="b_algo"]'):
|
||||
|
||||
link = eval_xpath(result, './/h2/a')[0]
|
||||
url_to_resolve = []
|
||||
url_to_resolve_index = []
|
||||
i = 0
|
||||
for result in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):
|
||||
|
||||
link = eval_xpath_getindex(result, './/h2/a', 0, None)
|
||||
if link is None:
|
||||
continue
|
||||
url = link.attrib.get('href')
|
||||
title = extract_text(link)
|
||||
content = extract_text(eval_xpath(result, './/p'))
|
||||
|
||||
# Make sure that the element is free of <a href> links and <span class='algoSlug_icon'>
|
||||
content = eval_xpath(result, '(.//p)[1]')
|
||||
for p in content:
|
||||
for e in p.xpath('.//a'):
|
||||
e.getparent().remove(e)
|
||||
for e in p.xpath('.//span[@class="algoSlug_icon"]'):
|
||||
e.getparent().remove(e)
|
||||
content = extract_text(content)
|
||||
|
||||
# get the real URL either using the URL shown to user or following the Bing URL
|
||||
if url.startswith('https://www.bing.com/ck/a?'):
|
||||
url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
|
||||
# Bing can shorten the URL either at the end or in the middle of the string
|
||||
if (
|
||||
url_cite.startswith('https://')
|
||||
and '…' not in url_cite
|
||||
and '...' not in url_cite
|
||||
and '›' not in url_cite
|
||||
):
|
||||
# no need for an additional HTTP request
|
||||
url = url_cite
|
||||
else:
|
||||
# resolve the URL with an additional HTTP request
|
||||
url_to_resolve.append(url.replace('&ntb=1', '&ntb=F'))
|
||||
url_to_resolve_index.append(i)
|
||||
url = None # remove the result if the HTTP Bing redirect raise an exception
|
||||
|
||||
# append result
|
||||
results.append({'url': url, 'title': title, 'content': content})
|
||||
# increment result pointer for the next iteration in this loop
|
||||
i += 1
|
||||
|
||||
# resolve all Bing redirections in parallel
|
||||
request_list = [
|
||||
Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
|
||||
]
|
||||
response_list = multi_requests(request_list)
|
||||
for i, redirect_response in enumerate(response_list):
|
||||
if not isinstance(redirect_response, Exception):
|
||||
results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
|
||||
|
||||
# get number_of_results
|
||||
try:
|
||||
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
|
||||
if "-" in result_len_container:
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ categories = ['images', 'web']
|
|||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
send_accept_language_header = True
|
||||
supported_languages_url = 'https://www.bing.com/account/general'
|
||||
number_of_results = 28
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ about = {
|
|||
categories = ['news']
|
||||
paging = True
|
||||
time_range_support = True
|
||||
send_accept_language_header = True
|
||||
|
||||
# search-url
|
||||
base_url = 'https://www.bing.com/'
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ categories = ['videos', 'web']
|
|||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
send_accept_language_header = True
|
||||
number_of_results = 28
|
||||
|
||||
base_url = 'https://www.bing.com/'
|
||||
|
|
@ -70,10 +71,6 @@ def request(query, params):
|
|||
if params['time_range'] in time_range_dict:
|
||||
params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
|
||||
|
||||
# bing videos did not like "older" versions < 70.0.1 when selectin other
|
||||
# languages then 'en' .. very strange ?!?!
|
||||
params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0.1) Gecko/20100101 Firefox/73.0.1'
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
|
@ -83,7 +80,7 @@ def response(resp):
|
|||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in dom.xpath('//div[@class="dg_u"]'):
|
||||
for result in dom.xpath('//div[@class="dg_u"]/div[contains(@class, "mc_vtvc")]'):
|
||||
metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
|
||||
info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
|
||||
content = '{0} - {1}'.format(metadata['du'], info)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
|
||||
|
|
@ -19,7 +18,7 @@ about = {
|
|||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
nb_per_page = 10
|
||||
|
||||
|
|
@ -42,39 +41,75 @@ def request(query, params):
|
|||
)
|
||||
params['url'] = base_url + search_path
|
||||
|
||||
logger.debug("query_url --> %s", params['url'])
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
json_data = loads(resp.text)
|
||||
json_data = resp.json()
|
||||
|
||||
for result in json_data['data']:
|
||||
|
||||
source = result['_source']
|
||||
url = None
|
||||
if source.get('urls'):
|
||||
url = source['urls'][0].replace('http://', 'https://', 1)
|
||||
|
||||
if url is None and source.get('doi'):
|
||||
# use the DOI reference
|
||||
url = 'https://doi.org/' + source['doi']
|
||||
|
||||
if url is None and source.get('downloadUrl'):
|
||||
# use the downloadUrl
|
||||
url = source['downloadUrl']
|
||||
|
||||
if url is None and source.get('identifiers'):
|
||||
# try to find an ark id, see
|
||||
# https://www.wikidata.org/wiki/Property:P8091
|
||||
# and https://en.wikipedia.org/wiki/Archival_Resource_Key
|
||||
arkids = [
|
||||
identifier[5:] # 5 is the length of "ark:/"
|
||||
for identifier in source.get('identifiers')
|
||||
if isinstance(identifier, str) and identifier.startswith('ark:/')
|
||||
]
|
||||
if len(arkids) > 0:
|
||||
url = 'https://n2t.net/' + arkids[0]
|
||||
|
||||
if url is None:
|
||||
continue
|
||||
|
||||
publishedDate = None
|
||||
time = source['publishedDate'] or source['depositedDate']
|
||||
if time:
|
||||
date = datetime.fromtimestamp(time / 1000)
|
||||
else:
|
||||
date = None
|
||||
publishedDate = datetime.fromtimestamp(time / 1000)
|
||||
|
||||
metadata = []
|
||||
if source['publisher'] and len(source['publisher']) > 3:
|
||||
metadata.append(source['publisher'])
|
||||
if source['topics']:
|
||||
metadata.append(source['topics'][0])
|
||||
if source['doi']:
|
||||
metadata.append(source['doi'])
|
||||
metadata = ' / '.join(metadata)
|
||||
# sometimes the 'title' is None / filter None values
|
||||
journals = [j['title'] for j in (source.get('journals') or []) if j['title']]
|
||||
|
||||
publisher = source['publisher']
|
||||
if publisher:
|
||||
publisher = source['publisher'].strip("'")
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': source['urls'][0].replace('http://', 'https://', 1),
|
||||
'template': 'paper.html',
|
||||
'title': source['title'],
|
||||
'content': source['description'],
|
||||
'publishedDate': date,
|
||||
'metadata': metadata,
|
||||
'url': url,
|
||||
'content': source['description'] or '',
|
||||
# 'comments': '',
|
||||
'tags': source['topics'],
|
||||
'publishedDate': publishedDate,
|
||||
'type': (source['types'] or [None])[0],
|
||||
'authors': source['authors'],
|
||||
'editor': ', '.join(source['contributors'] or []),
|
||||
'publisher': publisher,
|
||||
'journal': ', '.join(journals),
|
||||
# 'volume': '',
|
||||
# 'pages' : '',
|
||||
# 'number': '',
|
||||
'doi': source['doi'],
|
||||
'issn': [x for x in [source.get('issn')] if x],
|
||||
'isbn': [x for x in [source.get('isbn')] if x], # exists in the rawRecordXml
|
||||
'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'),
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
60
searx/engines/crossref.py
Normal file
60
searx/engines/crossref.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Semantic Scholar (Science)
|
||||
"""
|
||||
# pylint: disable=use-dict-literal
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text
|
||||
|
||||
about = {
|
||||
"website": 'https://www.crossref.org/',
|
||||
"wikidata_id": 'Q5188229',
|
||||
"official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
search_url = 'https://api.crossref.org/works'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
res = resp.json()
|
||||
results = []
|
||||
for record in res['message']['items']:
|
||||
record_type = record['type']
|
||||
if record_type == 'book-chapter':
|
||||
title = record['container-title'][0]
|
||||
if record['title'][0].lower().strip() != title.lower().strip():
|
||||
title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
|
||||
journal = None
|
||||
else:
|
||||
title = html_to_text(record['title'][0])
|
||||
journal = record.get('container-title', [None])[0]
|
||||
url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
|
||||
authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
|
||||
isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'journal': journal,
|
||||
'volume': record.get('volume'),
|
||||
'type': record['type'],
|
||||
'content': html_to_text(record.get('abstract', '')),
|
||||
'publisher': record.get('publisher'),
|
||||
'authors': authors,
|
||||
'doi': record['DOI'],
|
||||
'isbn': isbn,
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
|
@ -30,7 +30,7 @@ number_of_results = 10
|
|||
|
||||
time_range_support = True
|
||||
time_delta_dict = {
|
||||
"day": timedelta(days=1),
|
||||
"day": timedelta(days=1),
|
||||
"week": timedelta(days=7),
|
||||
"month": timedelta(days=31),
|
||||
"year": timedelta(days=365),
|
||||
|
|
@ -58,7 +58,7 @@ search_url = (
|
|||
'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
|
||||
).format(
|
||||
fields=','.join(result_fields),
|
||||
password_protected= 'false',
|
||||
password_protected='false',
|
||||
private='false',
|
||||
sort='relevance',
|
||||
limit=number_of_results,
|
||||
|
|
@ -93,7 +93,7 @@ def request(query, params):
|
|||
query_args = {
|
||||
'search': query,
|
||||
'languages': language_iso639,
|
||||
'page': params['pageno'],
|
||||
'page': params['pageno'],
|
||||
}
|
||||
|
||||
if locale.territory:
|
||||
|
|
@ -170,7 +170,4 @@ def response(resp):
|
|||
# get supported languages from their site
|
||||
def _fetch_supported_languages(resp):
|
||||
response_json = resp.json()
|
||||
return [
|
||||
item['locale']
|
||||
for item in response_json['list']
|
||||
]
|
||||
return [item['locale'] for item in response_json['list']]
|
||||
|
|
|
|||
62
searx/engines/deepl.py
Normal file
62
searx/engines/deepl.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Deepl translation engine"""
|
||||
|
||||
from json import loads
|
||||
|
||||
about = {
|
||||
"website": 'https://deepl.com',
|
||||
"wikidata_id": 'Q43968444',
|
||||
"official_api_documentation": 'https://www.deepl.com/docs-api',
|
||||
"use_official_api": True,
|
||||
"require_api_key": True,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
engine_type = 'online_dictionary'
|
||||
categories = ['general']
|
||||
|
||||
url = 'https://api-free.deepl.com/v2/translate'
|
||||
api_key = None
|
||||
|
||||
|
||||
def request(_query, params):
|
||||
'''pre-request callback
|
||||
|
||||
params<dict>:
|
||||
|
||||
- ``method`` : POST/GET
|
||||
- ``headers``: {}
|
||||
- ``data``: {} # if method == POST
|
||||
- ``url``: ''
|
||||
- ``category``: 'search category'
|
||||
- ``pageno``: 1 # number of the requested page
|
||||
'''
|
||||
|
||||
params['url'] = url
|
||||
params['method'] = 'POST'
|
||||
params['data'] = {'auth_key': api_key, 'text': params['query'], 'target_lang': params['to_lang'][1]}
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
result = loads(resp.text)
|
||||
translations = result['translations']
|
||||
|
||||
infobox = "<dl>"
|
||||
|
||||
for translation in translations:
|
||||
infobox += f"<dd>{translation['text']}</dd>"
|
||||
|
||||
infobox += "</dl>"
|
||||
|
||||
results.append(
|
||||
{
|
||||
'infobox': 'Deepl',
|
||||
'content': infobox,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -19,7 +19,8 @@ list in ``settings.yml``:
|
|||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
|
||||
engine_type = 'offline'
|
||||
engine_type = 'online'
|
||||
send_accept_language_header = True
|
||||
categories = ['general']
|
||||
disabled = True
|
||||
timeout = 2.0
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
"""Docker Hub (IT)
|
||||
|
||||
"""
|
||||
# pylint: disable=use-dict-literal
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from searx.network import get
|
|||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://lite.duckduckgo.com/lite',
|
||||
"website": 'https://lite.duckduckgo.com/lite/',
|
||||
"wikidata_id": 'Q12805',
|
||||
"official_api_documentation": 'https://duckduckgo.com/api',
|
||||
"use_official_api": False,
|
||||
|
|
@ -31,6 +31,7 @@ categories = ['general', 'web']
|
|||
paging = True
|
||||
supported_languages_url = 'https://duckduckgo.com/util/u588.js'
|
||||
time_range_support = True
|
||||
send_accept_language_header = True
|
||||
|
||||
language_aliases = {
|
||||
'ar-SA': 'ar-XA',
|
||||
|
|
@ -45,7 +46,7 @@ language_aliases = {
|
|||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
|
||||
# search-url
|
||||
url = 'https://lite.duckduckgo.com/lite'
|
||||
url = 'https://lite.duckduckgo.com/lite/'
|
||||
url_ping = 'https://duckduckgo.com/t/sl_l'
|
||||
|
||||
# match query's language to a region code that duckduckgo will accept
|
||||
|
|
@ -72,6 +73,7 @@ def request(query, params):
|
|||
# link again and again ..
|
||||
|
||||
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
params['headers']['Referer'] = 'https://google.com/'
|
||||
|
||||
# initial page does not have an offset
|
||||
if params['pageno'] == 2:
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ about = {
|
|||
"results": 'JSON',
|
||||
}
|
||||
|
||||
send_accept_language_header = True
|
||||
|
||||
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
||||
|
||||
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
|
||||
|
|
@ -62,7 +64,6 @@ def request(query, params):
|
|||
params['url'] = URL.format(query=urlencode({'q': query}))
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
language = language.split('-')[0]
|
||||
params['headers']['Accept-Language'] = language
|
||||
return params
|
||||
|
||||
|
||||
|
|
@ -78,7 +79,7 @@ def response(resp):
|
|||
# * book / performing art / film / television / media franchise / concert tour / playwright
|
||||
# * prepared food
|
||||
# * website / software / os / programming language / file format / software engineer
|
||||
# * compagny
|
||||
# * company
|
||||
|
||||
content = ''
|
||||
heading = search_res.get('Heading', '')
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ about = {
|
|||
categories = ['images', 'web']
|
||||
paging = True
|
||||
safesearch = True
|
||||
send_accept_language_header = True
|
||||
|
||||
# search-url
|
||||
images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
|
||||
|
|
|
|||
136
searx/engines/duckduckgo_weather.py
Normal file
136
searx/engines/duckduckgo_weather.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""DuckDuckGo Weather"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import quote
|
||||
|
||||
from datetime import datetime
|
||||
from flask_babel import gettext
|
||||
|
||||
about = {
|
||||
"website": 'https://duckduckgo.com/',
|
||||
"wikidata_id": 'Q12805',
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
categories = ["others"]
|
||||
|
||||
url = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
|
||||
|
||||
|
||||
def generate_condition_table(condition):
|
||||
res = ""
|
||||
|
||||
res += f"<tr><td><b>{gettext('Condition')}</b></td>" f"<td><b>{condition['summary']}</b></td></tr>"
|
||||
|
||||
res += (
|
||||
f"<tr><td><b>{gettext('Temperature')}</b></td>"
|
||||
f"<td><b>{f_to_c(condition['temperature'])}°C / {condition['temperature']}°F</b></td></tr>"
|
||||
)
|
||||
|
||||
res += (
|
||||
f"<tr><td>{gettext('Feels like')}</td><td>{f_to_c(condition['apparentTemperature'])}°C / "
|
||||
f"{condition['apparentTemperature']}°F</td></tr>"
|
||||
)
|
||||
|
||||
res += (
|
||||
f"<tr><td>{gettext('Wind')}</td><td>{condition['windBearing']}° — "
|
||||
f"{(condition['windSpeed'] * 1.6093440006147):.2f} km/h / {condition['windSpeed']} mph</td></tr>"
|
||||
)
|
||||
|
||||
res += f"<tr><td>{gettext('Visibility')}</td><td>{condition['visibility']} km</td>"
|
||||
|
||||
res += f"<tr><td>{gettext('Humidity')}</td><td>{(condition['humidity'] * 100):.1f}%</td></tr>"
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def generate_day_table(day):
|
||||
res = ""
|
||||
|
||||
res += (
|
||||
f"<tr><td>{gettext('Min temp.')}</td><td>{f_to_c(day['temperatureLow'])}°C / "
|
||||
f"{day['temperatureLow']}°F</td></tr>"
|
||||
)
|
||||
res += (
|
||||
f"<tr><td>{gettext('Max temp.')}</td><td>{f_to_c(day['temperatureHigh'])}°C / "
|
||||
f"{day['temperatureHigh']}°F</td></tr>"
|
||||
)
|
||||
res += f"<tr><td>{gettext('UV index')}</td><td>{day['uvIndex']}</td></tr>"
|
||||
res += (
|
||||
f"<tr><td>{gettext('Sunrise')}</td><td>{datetime.fromtimestamp(day['sunriseTime']).strftime('%H:%M')}</td></tr>"
|
||||
)
|
||||
res += (
|
||||
f"<tr><td>{gettext('Sunset')}</td><td>{datetime.fromtimestamp(day['sunsetTime']).strftime('%H:%M')}</td></tr>"
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params["url"] = url.format(query=quote(query), lang=params['language'].split('-')[0])
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def f_to_c(temperature):
|
||||
return "%.2f" % ((temperature - 32) / 1.8)
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
if resp.text.strip() == "ddg_spice_forecast();":
|
||||
return []
|
||||
|
||||
result = loads(resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2])
|
||||
|
||||
current = result["currently"]
|
||||
|
||||
title = result['flags']['ddg-location']
|
||||
|
||||
infobox = f"<h3>{gettext('Current condition')}</h3><table><tbody>"
|
||||
|
||||
infobox += generate_condition_table(current)
|
||||
|
||||
infobox += "</tbody></table>"
|
||||
|
||||
last_date = None
|
||||
|
||||
for time in result['hourly']['data']:
|
||||
current_time = datetime.fromtimestamp(time['time'])
|
||||
|
||||
if last_date != current_time.date():
|
||||
if last_date is not None:
|
||||
infobox += "</tbody></table>"
|
||||
|
||||
infobox += f"<h3>{current_time.strftime('%Y-%m-%d')}</h3>"
|
||||
|
||||
infobox += "<table><tbody>"
|
||||
|
||||
for day in result['daily']['data']:
|
||||
if datetime.fromtimestamp(day['time']).date() == current_time.date():
|
||||
infobox += generate_day_table(day)
|
||||
|
||||
infobox += "</tbody></table><table><tbody>"
|
||||
|
||||
last_date = current_time.date()
|
||||
|
||||
infobox += f"<tr><td rowspan=\"7\"><b>{current_time.strftime('%H:%M')}</b></td></tr>"
|
||||
|
||||
infobox += generate_condition_table(time)
|
||||
|
||||
infobox += "</tbody></table>"
|
||||
|
||||
results.append(
|
||||
{
|
||||
"infobox": title,
|
||||
"content": infobox,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -7,6 +7,7 @@ import re
|
|||
from urllib.parse import quote, urljoin
|
||||
from lxml import html
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
||||
from searx.network import raise_for_httperror
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
@ -47,6 +48,7 @@ def request(query, params):
|
|||
# after the last page of results, spelling corrections are returned after a HTTP redirect
|
||||
# whatever the page number is
|
||||
params['soft_max_redirects'] = 1
|
||||
params['raise_for_httperror'] = False
|
||||
return params
|
||||
|
||||
|
||||
|
|
@ -56,6 +58,11 @@ def response(resp):
|
|||
'''
|
||||
results = []
|
||||
|
||||
if resp.status_code == 404:
|
||||
return results
|
||||
|
||||
raise_for_httperror(resp)
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
number_of_results_element = eval_xpath_getindex(
|
||||
|
|
|
|||
67
searx/engines/emojipedia.py
Normal file
67
searx/engines/emojipedia.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Emojipedia
|
||||
|
||||
Emojipedia is an emoji reference website which documents the meaning and
|
||||
common usage of emoji characters in the Unicode Standard. It is owned by Zedge
|
||||
since 2021. Emojipedia is a voting member of The Unicode Consortium.[1]
|
||||
|
||||
[1] https://en.wikipedia.org/wiki/Emojipedia
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import (
|
||||
eval_xpath_list,
|
||||
eval_xpath_getindex,
|
||||
extract_text,
|
||||
)
|
||||
|
||||
about = {
|
||||
"website": 'https://emojipedia.org',
|
||||
"wikidata_id": 'Q22908129',
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
}
|
||||
|
||||
categories = []
|
||||
paging = False
|
||||
time_range_support = False
|
||||
|
||||
base_url = 'https://emojipedia.org'
|
||||
search_url = base_url + '/search/?{query}'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(
|
||||
query=urlencode({'q': query}),
|
||||
)
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in eval_xpath_list(dom, "//ol[@class='search-results']/li"):
|
||||
|
||||
extracted_desc = extract_text(eval_xpath_getindex(result, './/p', 0))
|
||||
|
||||
if 'No results found.' in extracted_desc:
|
||||
break
|
||||
|
||||
link = eval_xpath_getindex(result, './/h2/a', 0)
|
||||
|
||||
url = base_url + link.attrib.get('href')
|
||||
title = extract_text(link)
|
||||
content = extracted_desc
|
||||
|
||||
res = {'url': url, 'title': title, 'content': content}
|
||||
|
||||
results.append(res)
|
||||
|
||||
return results
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
"""
|
||||
Gigablast (Web)
|
||||
"""
|
||||
# pylint: disable=invalid-name
|
||||
# pylint: disable=invalid-name, use-dict-literal
|
||||
|
||||
import re
|
||||
from time import time
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ def response(resp):
|
|||
|
||||
search_res = loads(resp.text)
|
||||
|
||||
# check if items are recieved
|
||||
# check if items are received
|
||||
if 'items' not in search_res:
|
||||
return []
|
||||
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ The google WEB engine itself has a special setup option:
|
|||
|
||||
- name: google
|
||||
...
|
||||
use_mobile_ui: true
|
||||
use_mobile_ui: false
|
||||
|
||||
``use_mobile_ui``: (default: ``true``)
|
||||
``use_mobile_ui``: (default: ``false``)
|
||||
Enables to use *mobile endpoint* to bypass the google blocking (see
|
||||
:issue:`159`). On the mobile UI of Google Search, the button :guilabel:`More
|
||||
results` is not affected by Google rate limiting and we can still do requests
|
||||
|
|
@ -45,6 +45,7 @@ categories = ['general', 'web']
|
|||
paging = True
|
||||
time_range_support = True
|
||||
safesearch = True
|
||||
send_accept_language_header = True
|
||||
use_mobile_ui = False
|
||||
supported_languages_url = 'https://www.google.com/preferences?#languages'
|
||||
|
||||
|
|
@ -111,21 +112,14 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
|
|||
# specific xpath variables
|
||||
# ------------------------
|
||||
|
||||
# google results are grouped into <div class="jtfYYd ..." ../>
|
||||
results_xpath = '//div[@class="jtfYYd"]'
|
||||
results_xpath = './/div[@data-sokoban-container]'
|
||||
title_xpath = './/a/h3[1]'
|
||||
href_xpath = './/a[h3]/@href'
|
||||
content_xpath = './/div[@data-content-feature=1]'
|
||||
|
||||
# google *sections* are no usual *results*, we ignore them
|
||||
g_section_with_header = './g-section-with-header'
|
||||
|
||||
# the title is a h3 tag relative to the result group
|
||||
title_xpath = './/h3[1]'
|
||||
|
||||
# in the result group there is <div class="yuRUbf" ../> it's first child is a <a
|
||||
# href=...>
|
||||
href_xpath = './/div[@class="yuRUbf"]//a/@href'
|
||||
|
||||
# in the result group there is <div class="VwiC3b ..." ../> containing the *content*
|
||||
content_xpath = './/div[contains(@class, "VwiC3b")]'
|
||||
|
||||
# Suggestions are links placed in a *card-section*, we extract only the text
|
||||
# from the links not the links itself.
|
||||
|
|
@ -241,16 +235,6 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|||
# language.
|
||||
ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
|
||||
|
||||
# Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5
|
||||
ret_val['headers']['Accept-Language'] = ','.join(
|
||||
[
|
||||
lang_country,
|
||||
language + ';q=0.8,',
|
||||
'en;q=0.6',
|
||||
'*;q=0.5',
|
||||
]
|
||||
)
|
||||
|
||||
return ret_val
|
||||
|
||||
|
||||
|
|
@ -270,7 +254,7 @@ def request(query, params):
|
|||
if use_mobile_ui:
|
||||
additional_parameters = {
|
||||
'asearch': 'arc',
|
||||
'async': 'use_ac:true,_fmt:pc',
|
||||
'async': 'use_ac:true,_fmt:prog',
|
||||
}
|
||||
|
||||
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
||||
|
|
@ -298,6 +282,7 @@ def request(query, params):
|
|||
query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
|
||||
params['url'] = query_url
|
||||
|
||||
params['cookies']['CONSENT'] = "YES+"
|
||||
params['headers'].update(lang_info['headers'])
|
||||
if use_mobile_ui:
|
||||
params['headers']['Accept'] = '*/*'
|
||||
|
|
@ -341,14 +326,14 @@ def response(resp):
|
|||
|
||||
# google *sections*
|
||||
if extract_text(eval_xpath(result, g_section_with_header)):
|
||||
logger.debug("ingoring <g-section-with-header>")
|
||||
logger.debug("ignoring <g-section-with-header>")
|
||||
continue
|
||||
|
||||
try:
|
||||
title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
|
||||
if title_tag is None:
|
||||
# this not one of the common google results *section*
|
||||
logger.debug('ingoring item from the result_xpath list: missing title')
|
||||
logger.debug('ignoring item from the result_xpath list: missing title')
|
||||
continue
|
||||
title = extract_text(title_tag)
|
||||
url = eval_xpath_getindex(result, href_xpath, 0, None)
|
||||
|
|
@ -356,7 +341,7 @@ def response(resp):
|
|||
continue
|
||||
content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
|
||||
if content is None:
|
||||
logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title)
|
||||
logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
|
||||
continue
|
||||
|
||||
logger.debug('add link to results: %s', title)
|
||||
|
|
|
|||
|
|
@ -1,28 +1,20 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""This is the implementation of the google images engine.
|
||||
"""This is the implementation of the google images engine using the google
|
||||
internal API used the Google Go Android app.
|
||||
|
||||
.. admonition:: Content-Security-Policy (CSP)
|
||||
This internal API offer results in
|
||||
|
||||
This engine needs to allow images from the `data URLs`_ (prefixed with the
|
||||
``data:`` scheme)::
|
||||
- JSON (_fmt:json)
|
||||
- Protobuf (_fmt:pb)
|
||||
- Protobuf compressed? (_fmt:pc)
|
||||
- HTML (_fmt:html)
|
||||
- Protobuf encoded in JSON (_fmt:jspb).
|
||||
|
||||
Header set Content-Security-Policy "img-src 'self' data: ;"
|
||||
|
||||
.. _data URLs:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
||||
"""
|
||||
|
||||
import re
|
||||
from urllib.parse import urlencode, unquote
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
eval_xpath_list,
|
||||
eval_xpath_getindex,
|
||||
extract_text,
|
||||
)
|
||||
from urllib.parse import urlencode
|
||||
from json import loads
|
||||
|
||||
from searx.engines.google import (
|
||||
get_lang_info,
|
||||
|
|
@ -42,90 +34,24 @@ about = {
|
|||
"official_api_documentation": 'https://developers.google.com/custom-search',
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images', 'web']
|
||||
paging = False
|
||||
paging = True
|
||||
use_locale_domain = True
|
||||
time_range_support = True
|
||||
safesearch = True
|
||||
send_accept_language_header = True
|
||||
|
||||
filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
|
||||
|
||||
|
||||
def scrap_out_thumbs(dom):
|
||||
"""Scrap out thumbnail data from <script> tags."""
|
||||
ret_val = {}
|
||||
for script in eval_xpath(dom, '//script[contains(., "_setImgSrc(")]'):
|
||||
_script = script.text
|
||||
# _setImgSrc('0','data:image\/jpeg;base64,\/9j\/4AAQSkZJR ....');
|
||||
_thumb_no, _img_data = _script[len("_setImgSrc(") : -2].split(",", 1)
|
||||
_thumb_no = _thumb_no.replace("'", "")
|
||||
_img_data = _img_data.replace("'", "")
|
||||
_img_data = _img_data.replace(r"\/", r"/")
|
||||
ret_val[_thumb_no] = _img_data.replace(r"\x3d", "=")
|
||||
return ret_val
|
||||
|
||||
|
||||
# [0, "-H96xjSoW5DsgM", ["https://encrypted-tbn0.gstatic.com/images?q...", 155, 324]
|
||||
# , ["https://assets.cdn.moviepilot.de/files/d3bf..", 576, 1200],
|
||||
_RE_JS_IMAGE_URL = re.compile(
|
||||
r'"'
|
||||
r'([^"]*)' # -H96xjSoW5DsgM
|
||||
r'",\s*\["'
|
||||
r'https://[^\.]*\.gstatic.com/images[^"]*' # https://encrypted-tbn0.gstatic.com/images?q...
|
||||
r'[^\[]*\["'
|
||||
r'(https?://[^"]*)' # https://assets.cdn.moviepilot.de/files/d3bf...
|
||||
)
|
||||
|
||||
|
||||
def parse_urls_img_from_js(dom):
|
||||
|
||||
# There are two HTML script tags starting with a JS function
|
||||
# 'AF_initDataCallback(...)'
|
||||
#
|
||||
# <script nonce="zscm+Ab/JzBk1Qd4GY6wGQ">
|
||||
# AF_initDataCallback({key: 'ds:0', hash: '1', data:[], sideChannel: {}});
|
||||
# </script>
|
||||
# <script nonce="zscm+Ab/JzBk1Qd4GY6wGQ">
|
||||
# AF_initDataCallback({key: 'ds:1', hash: '2', data:[null,[[["online_chips",[["the big",
|
||||
# ["https://encrypted-tbn0.gstatic.com/images?q...",null,null,true,[null,0],f
|
||||
# ...
|
||||
# </script>
|
||||
#
|
||||
# The second script contains the URLs of the images.
|
||||
|
||||
# The AF_initDataCallback(..) is called with very large dictionary, that
|
||||
# looks like JSON but it is not JSON since it contains JS variables and
|
||||
# constants like 'null' (we can't use a JSON parser for).
|
||||
#
|
||||
# The alternative is to parse the entire <script> and find all image URLs by
|
||||
# a regular expression.
|
||||
|
||||
img_src_script = eval_xpath_getindex(dom, '//script[contains(., "AF_initDataCallback({key: ")]', 1).text
|
||||
data_id_to_img_url = {}
|
||||
for data_id, url in _RE_JS_IMAGE_URL.findall(img_src_script):
|
||||
data_id_to_img_url[data_id] = url
|
||||
return data_id_to_img_url
|
||||
|
||||
|
||||
def get_img_url_by_data_id(data_id_to_img_url, img_node):
|
||||
"""Get full image URL by @data-id from parent element."""
|
||||
|
||||
data_id = eval_xpath_getindex(img_node, '../../../@data-id', 0)
|
||||
img_url = data_id_to_img_url.get(data_id, '')
|
||||
img_url = unquote(img_url.replace(r'\u00', r'%'))
|
||||
|
||||
return img_url
|
||||
|
||||
|
||||
def request(query, params):
|
||||
"""Google-Video search request"""
|
||||
"""Google-Image search request"""
|
||||
|
||||
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
||||
|
||||
query_url = (
|
||||
'https://'
|
||||
|
|
@ -139,7 +65,8 @@ def request(query, params):
|
|||
**lang_info['params'],
|
||||
'ie': "utf8",
|
||||
'oe': "utf8",
|
||||
'num': 30,
|
||||
'asearch': 'isch',
|
||||
'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
|
||||
}
|
||||
)
|
||||
)
|
||||
|
|
@ -151,7 +78,8 @@ def request(query, params):
|
|||
params['url'] = query_url
|
||||
|
||||
params['headers'].update(lang_info['headers'])
|
||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||
params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip'
|
||||
params['headers']['Accept'] = '*/*'
|
||||
return params
|
||||
|
||||
|
||||
|
|
@ -161,78 +89,34 @@ def response(resp):
|
|||
|
||||
detect_google_sorry(resp)
|
||||
|
||||
# convert the text to dom
|
||||
dom = html.fromstring(resp.text)
|
||||
img_bas64_map = scrap_out_thumbs(dom)
|
||||
data_id_to_img_url = parse_urls_img_from_js(dom)
|
||||
json_start = resp.text.find('{"ischj":')
|
||||
json_data = loads(resp.text[json_start:])
|
||||
|
||||
# parse results
|
||||
#
|
||||
# root element::
|
||||
# <div id="islmp" ..>
|
||||
# result div per image::
|
||||
# <div jsmodel="tTXmib"> / <div jsaction="..." data-id="..."
|
||||
# The data-id matches to a item in a json-data structure in::
|
||||
# <script nonce="I+vqelcy/01CKiBJi5Z1Ow">AF_initDataCallback({key: 'ds:1', ... data:function(){return [ ...
|
||||
# In this structure the link to the origin PNG, JPG or whatever is given
|
||||
# first link per image-div contains a <img> with the data-iid for bas64 encoded image data::
|
||||
# <img class="rg_i Q4LuWd" data-iid="0"
|
||||
# second link per image-div is the target link::
|
||||
# <a class="VFACy kGQAp" href="https://en.wikipedia.org/wiki/The_Sacrament_of_the_Last_Supper">
|
||||
# the second link also contains two div tags with the *description* and *publisher*::
|
||||
# <div class="WGvvNb">The Sacrament of the Last Supper ...</div>
|
||||
# <div class="fxgdke">en.wikipedia.org</div>
|
||||
for item in json_data["ischj"]["metadata"]:
|
||||
|
||||
root = eval_xpath(dom, '//div[@id="islmp"]')
|
||||
if not root:
|
||||
logger.error("did not find root element id='islmp'")
|
||||
return results
|
||||
result_item = {
|
||||
'url': item["result"]["referrer_url"],
|
||||
'title': item["result"]["page_title"],
|
||||
'content': item["text_in_grid"]["snippet"],
|
||||
'source': item["result"]["site_title"],
|
||||
'img_format': f'{item["original_image"]["width"]} x {item["original_image"]["height"]}',
|
||||
'img_src': item["original_image"]["url"],
|
||||
'thumbnail_src': item["thumbnail"]["url"],
|
||||
'template': 'images.html',
|
||||
}
|
||||
|
||||
root = root[0]
|
||||
for img_node in eval_xpath_list(root, './/img[contains(@class, "rg_i")]'):
|
||||
author = item["result"].get('iptc', {}).get('creator')
|
||||
if author:
|
||||
result_item['author'] = ', '.join(author)
|
||||
|
||||
img_alt = eval_xpath_getindex(img_node, '@alt', 0)
|
||||
copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
|
||||
if copyright_notice:
|
||||
result_item['source'] += ' / ' + copyright_notice
|
||||
|
||||
img_base64_id = eval_xpath(img_node, '@data-iid')
|
||||
if img_base64_id:
|
||||
img_base64_id = img_base64_id[0]
|
||||
thumbnail_src = img_bas64_map[img_base64_id]
|
||||
else:
|
||||
thumbnail_src = eval_xpath(img_node, '@src')
|
||||
if not thumbnail_src:
|
||||
thumbnail_src = eval_xpath(img_node, '@data-src')
|
||||
if thumbnail_src:
|
||||
thumbnail_src = thumbnail_src[0]
|
||||
else:
|
||||
thumbnail_src = ''
|
||||
file_size = item.get('gsa', {}).get('file_size')
|
||||
if file_size:
|
||||
result_item['source'] += ' (%s)' % file_size
|
||||
|
||||
link_node = eval_xpath_getindex(img_node, '../../../a[2]', 0)
|
||||
url = eval_xpath_getindex(link_node, '@href', 0, None)
|
||||
if url is None:
|
||||
logger.error("missing @href in node: %s", html.tostring(link_node))
|
||||
continue
|
||||
|
||||
pub_nodes = eval_xpath(link_node, './div/div')
|
||||
pub_descr = img_alt
|
||||
pub_source = ''
|
||||
if pub_nodes:
|
||||
pub_descr = extract_text(pub_nodes[0])
|
||||
pub_source = extract_text(pub_nodes[1])
|
||||
|
||||
src_url = get_img_url_by_data_id(data_id_to_img_url, img_node)
|
||||
if not src_url:
|
||||
src_url = thumbnail_src
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': url,
|
||||
'title': img_alt,
|
||||
'content': pub_descr,
|
||||
'source': pub_source,
|
||||
'img_src': src_url,
|
||||
'thumbnail_src': thumbnail_src,
|
||||
'template': 'images.html',
|
||||
}
|
||||
)
|
||||
results.append(result_item)
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ ignores some parameters from the common :ref:`google API`:
|
|||
# pylint: disable=invalid-name
|
||||
|
||||
import binascii
|
||||
from datetime import datetime
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from base64 import b64decode
|
||||
|
|
@ -71,13 +70,13 @@ time_range_support = True
|
|||
#
|
||||
# safesearch : results are identitical for safesearch=0 and safesearch=2
|
||||
safesearch = False
|
||||
send_accept_language_header = True
|
||||
|
||||
|
||||
def request(query, params):
|
||||
"""Google-News search request"""
|
||||
|
||||
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
||||
|
||||
# google news has only one domain
|
||||
lang_info['subdomain'] = 'news.google.com'
|
||||
|
|
@ -98,22 +97,14 @@ def request(query, params):
|
|||
+ lang_info['subdomain']
|
||||
+ '/search'
|
||||
+ "?"
|
||||
+ urlencode(
|
||||
{
|
||||
'q': query,
|
||||
**lang_info['params'],
|
||||
'ie': "utf8",
|
||||
'oe': "utf8",
|
||||
'gl': lang_info['country'],
|
||||
}
|
||||
)
|
||||
+ urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'gl': lang_info['country']})
|
||||
+ ('&ceid=%s' % ceid)
|
||||
) # ceid includes a ':' character which must not be urlencoded
|
||||
params['url'] = query_url
|
||||
|
||||
params['cookies']['CONSENT'] = "YES+"
|
||||
params['headers'].update(lang_info['headers'])
|
||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||
params['headers']['Cookie'] = "CONSENT=YES+cb.%s-14-p0.en+F+941;" % datetime.now().strftime("%Y%m%d")
|
||||
|
||||
return params
|
||||
|
||||
|
|
@ -150,7 +141,7 @@ def response(resp):
|
|||
padding = (4 - (len(jslog) % 4)) * "="
|
||||
jslog = b64decode(jslog + padding)
|
||||
except binascii.Error:
|
||||
# URL cant be read, skip this result
|
||||
# URL can't be read, skip this result
|
||||
continue
|
||||
|
||||
# now we have : b'[null, ... null,"https://www.cnn.com/.../index.html"]'
|
||||
|
|
@ -159,24 +150,12 @@ def response(resp):
|
|||
# the first <h3> tag in the <article> contains the title of the link
|
||||
title = extract_text(eval_xpath(result, './article/h3[1]'))
|
||||
|
||||
# the first <div> tag in the <article> contains the content of the link
|
||||
content = extract_text(eval_xpath(result, './article/div[1]'))
|
||||
# The pub_date is mostly a string like 'yesertday', not a real
|
||||
# timezone date or time. Therefore we can't use publishedDate.
|
||||
pub_date = extract_text(eval_xpath(result, './article/div[1]/div[1]/time'))
|
||||
pub_origin = extract_text(eval_xpath(result, './article/div[1]/div[1]/a'))
|
||||
|
||||
# the second <div> tag contains origin publisher and the publishing date
|
||||
|
||||
pub_date = extract_text(eval_xpath(result, './article/div[2]//time'))
|
||||
pub_origin = extract_text(eval_xpath(result, './article/div[2]//a'))
|
||||
|
||||
pub_info = []
|
||||
if pub_origin:
|
||||
pub_info.append(pub_origin)
|
||||
if pub_date:
|
||||
# The pub_date is mostly a string like 'yesertday', not a real
|
||||
# timezone date or time. Therefore we can't use publishedDate.
|
||||
pub_info.append(pub_date)
|
||||
pub_info = ', '.join(pub_info)
|
||||
if pub_info:
|
||||
content = pub_info + ': ' + content
|
||||
content = ' / '.join([x for x in [pub_origin, pub_date] if x])
|
||||
|
||||
# The image URL is located in a preceding sibling <img> tag, e.g.:
|
||||
# "https://lh3.googleusercontent.com/DjhQh7DMszk.....z=-p-h100-w100"
|
||||
|
|
|
|||
71
searx/engines/google_play_apps.py
Normal file
71
searx/engines/google_play_apps.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
Google Play Apps
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
extract_url,
|
||||
extract_text,
|
||||
eval_xpath_list,
|
||||
eval_xpath_getindex,
|
||||
)
|
||||
|
||||
about = {
|
||||
"website": "https://play.google.com/",
|
||||
"wikidata_id": "Q79576",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
categories = ["files", "apps"]
|
||||
send_accept_language_header = True
|
||||
|
||||
search_url = "https://play.google.com/store/search?{query}&c=apps"
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params["url"] = search_url.format(query=urlencode({"q": query}))
|
||||
params['cookies']['CONSENT'] = "YES+"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
if eval_xpath(dom, '//div[@class="v6DsQb"]'):
|
||||
return []
|
||||
|
||||
spot = eval_xpath_getindex(dom, '//div[@class="ipRz4"]', 0, None)
|
||||
if spot is not None:
|
||||
url = extract_url(eval_xpath(spot, './a[@class="Qfxief"]/@href'), search_url)
|
||||
title = extract_text(eval_xpath(spot, './/div[@class="vWM94c"]'))
|
||||
content = extract_text(eval_xpath(spot, './/div[@class="LbQbAe"]'))
|
||||
img = extract_text(eval_xpath(spot, './/img[@class="T75of bzqKMd"]/@src'))
|
||||
|
||||
results.append({"url": url, "title": title, "content": content, "img_src": img})
|
||||
|
||||
more = eval_xpath_list(dom, '//c-wiz[@jsrenderer="RBsfwb"]//div[@role="listitem"]', min_len=1)
|
||||
for result in more:
|
||||
url = extract_url(eval_xpath(result, ".//a/@href"), search_url)
|
||||
title = extract_text(eval_xpath(result, './/span[@class="DdYX5"]'))
|
||||
content = extract_text(eval_xpath(result, './/span[@class="wMUdtb"]'))
|
||||
img = extract_text(
|
||||
eval_xpath(
|
||||
result,
|
||||
'.//img[@class="T75of stzEZd" or @class="T75of etjhNc Q8CSx "]/@src',
|
||||
)
|
||||
)
|
||||
|
||||
results.append({"url": url, "title": title, "content": content, "img_src": img})
|
||||
|
||||
for suggestion in eval_xpath_list(dom, '//c-wiz[@jsrenderer="qyd4Kb"]//div[@class="ULeU3b neq64b"]'):
|
||||
results.append({"suggestion": extract_text(eval_xpath(suggestion, './/div[@class="Epkrse "]'))})
|
||||
|
||||
return results
|
||||
|
|
@ -13,10 +13,12 @@ Definitions`_.
|
|||
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
)
|
||||
|
|
@ -46,12 +48,13 @@ about = {
|
|||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
language_support = True
|
||||
use_locale_domain = True
|
||||
time_range_support = True
|
||||
safesearch = False
|
||||
send_accept_language_header = True
|
||||
|
||||
|
||||
def time_range_url(params):
|
||||
|
|
@ -75,7 +78,6 @@ def request(query, params):
|
|||
|
||||
offset = (params['pageno'] - 1) * 10
|
||||
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
||||
|
||||
# subdomain is: scholar.google.xy
|
||||
lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
|
||||
|
|
@ -85,20 +87,13 @@ def request(query, params):
|
|||
+ lang_info['subdomain']
|
||||
+ '/scholar'
|
||||
+ "?"
|
||||
+ urlencode(
|
||||
{
|
||||
'q': query,
|
||||
**lang_info['params'],
|
||||
'ie': "utf8",
|
||||
'oe': "utf8",
|
||||
'start': offset,
|
||||
}
|
||||
)
|
||||
+ urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start': offset})
|
||||
)
|
||||
|
||||
query_url += time_range_url(params)
|
||||
params['url'] = query_url
|
||||
|
||||
params['cookies']['CONSENT'] = "YES+"
|
||||
params['headers'].update(lang_info['headers'])
|
||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||
|
||||
|
|
@ -106,7 +101,43 @@ def request(query, params):
|
|||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
def parse_gs_a(text: Optional[str]):
|
||||
"""Parse the text written in green.
|
||||
|
||||
Possible formats:
|
||||
* "{authors} - {journal}, {year} - {publisher}"
|
||||
* "{authors} - {year} - {publisher}"
|
||||
* "{authors} - {publisher}"
|
||||
"""
|
||||
if text is None or text == "":
|
||||
return None, None, None, None
|
||||
|
||||
s_text = text.split(' - ')
|
||||
authors = s_text[0].split(', ')
|
||||
publisher = s_text[-1]
|
||||
if len(s_text) != 3:
|
||||
return authors, None, publisher, None
|
||||
|
||||
# the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"
|
||||
# get journal and year
|
||||
journal_year = s_text[1].split(', ')
|
||||
# journal is optional and may contains some coma
|
||||
if len(journal_year) > 1:
|
||||
journal = ', '.join(journal_year[0:-1])
|
||||
if journal == '…':
|
||||
journal = None
|
||||
else:
|
||||
journal = None
|
||||
# year
|
||||
year = journal_year[-1]
|
||||
try:
|
||||
publishedDate = datetime.strptime(year.strip(), '%Y')
|
||||
except ValueError:
|
||||
publishedDate = None
|
||||
return authors, journal, publisher, publishedDate
|
||||
|
||||
|
||||
def response(resp): # pylint: disable=too-many-locals
|
||||
"""Get response from google's search request"""
|
||||
results = []
|
||||
|
||||
|
|
@ -119,30 +150,53 @@ def response(resp):
|
|||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results
|
||||
for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'):
|
||||
for result in eval_xpath_list(dom, '//div[@data-cid]'):
|
||||
|
||||
title = extract_text(eval_xpath(result, './h3[1]//a'))
|
||||
title = extract_text(eval_xpath(result, './/h3[1]//a'))
|
||||
|
||||
if not title:
|
||||
# this is a [ZITATION] block
|
||||
continue
|
||||
|
||||
url = eval_xpath(result, './h3[1]//a/@href')[0]
|
||||
content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or ''
|
||||
|
||||
pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]'))
|
||||
if pub_info:
|
||||
content += "[%s]" % pub_info
|
||||
|
||||
pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
|
||||
if pub_type:
|
||||
title = title + " " + pub_type
|
||||
pub_type = pub_type[1:-1].lower()
|
||||
|
||||
url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0)
|
||||
content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]'))
|
||||
authors, journal, publisher, publishedDate = parse_gs_a(
|
||||
extract_text(eval_xpath(result, './/div[@class="gs_a"]'))
|
||||
)
|
||||
if publisher in url:
|
||||
publisher = None
|
||||
|
||||
# cited by
|
||||
comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'))
|
||||
|
||||
# link to the html or pdf document
|
||||
html_url = None
|
||||
pdf_url = None
|
||||
doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None)
|
||||
doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
|
||||
if doc_type == "[PDF]":
|
||||
pdf_url = doc_url
|
||||
else:
|
||||
html_url = doc_url
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'type': pub_type,
|
||||
'url': url,
|
||||
'title': title,
|
||||
'authors': authors,
|
||||
'publisher': publisher,
|
||||
'journal': journal,
|
||||
'publishedDate': publishedDate,
|
||||
'content': content,
|
||||
'comments': comments,
|
||||
'html_url': html_url,
|
||||
'pdf_url': pdf_url,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ language_support = True
|
|||
use_locale_domain = True
|
||||
time_range_support = True
|
||||
safesearch = True
|
||||
send_accept_language_header = True
|
||||
|
||||
RE_CACHE = {}
|
||||
|
||||
|
|
@ -111,22 +112,13 @@ def request(query, params):
|
|||
"""Google-Video search request"""
|
||||
|
||||
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
||||
|
||||
query_url = (
|
||||
'https://'
|
||||
+ lang_info['subdomain']
|
||||
+ '/search'
|
||||
+ "?"
|
||||
+ urlencode(
|
||||
{
|
||||
'q': query,
|
||||
'tbm': "vid",
|
||||
**lang_info['params'],
|
||||
'ie': "utf8",
|
||||
'oe': "utf8",
|
||||
}
|
||||
)
|
||||
+ urlencode({'q': query, 'tbm': "vid", **lang_info['params'], 'ie': "utf8", 'oe': "utf8"})
|
||||
)
|
||||
|
||||
if params['time_range'] in time_range_dict:
|
||||
|
|
@ -135,6 +127,7 @@ def request(query, params):
|
|||
query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
|
||||
params['url'] = query_url
|
||||
|
||||
params['cookies']['CONSENT'] = "YES+"
|
||||
params['headers'].update(lang_info['headers'])
|
||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||
return params
|
||||
|
|
@ -157,7 +150,7 @@ def response(resp):
|
|||
|
||||
# ignore google *sections*
|
||||
if extract_text(eval_xpath(result, g_section_with_header)):
|
||||
logger.debug("ingoring <g-section-with-header>")
|
||||
logger.debug("ignoring <g-section-with-header>")
|
||||
continue
|
||||
|
||||
# ingnore articles without an image id / e.g. news articles
|
||||
|
|
|
|||
|
|
@ -53,19 +53,16 @@ def response(resp):
|
|||
if 'reading' in title_raw:
|
||||
title += ' (' + title_raw['reading'] + ')'
|
||||
alt_forms.append(title)
|
||||
|
||||
#
|
||||
|
||||
result_url = urljoin(BASE_URL, page['slug'])
|
||||
definitions = get_definitions(page)
|
||||
|
||||
# For results, we'll return the URL, all alternative forms (as title),
|
||||
# and all definitions (as description) truncated to 300 characters.
|
||||
content = " ".join(f"{engdef}." for _, engdef, _ in definitions)
|
||||
results.append({
|
||||
'url': result_url,
|
||||
'title': ", ".join(alt_forms),
|
||||
'content': content[:300] + (content[300:] and '...')
|
||||
})
|
||||
results.append(
|
||||
{'url': result_url, 'title': ", ".join(alt_forms), 'content': content[:300] + (content[300:] and '...')}
|
||||
)
|
||||
|
||||
# Like Wordnik, we'll return the first result in an infobox too.
|
||||
if first_result:
|
||||
|
|
@ -93,11 +90,13 @@ def get_definitions(page):
|
|||
extra.append(', '.join(defn_raw['info']).capitalize() + '. ')
|
||||
if defn_raw.get('restrictions'):
|
||||
extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')
|
||||
definitions.append((
|
||||
', '.join(defn_raw['parts_of_speech']),
|
||||
'; '.join(defn_raw['english_definitions']),
|
||||
''.join(extra)[:-1],
|
||||
))
|
||||
definitions.append(
|
||||
(
|
||||
', '.join(defn_raw['parts_of_speech']),
|
||||
'; '.join(defn_raw['english_definitions']),
|
||||
''.join(extra)[:-1],
|
||||
)
|
||||
)
|
||||
return definitions
|
||||
|
||||
|
||||
|
|
@ -109,12 +108,14 @@ def get_infobox(alt_forms, result_url, definitions):
|
|||
infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')
|
||||
|
||||
# definitions
|
||||
infobox_content.append('''
|
||||
infobox_content.append(
|
||||
'''
|
||||
<small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
|
||||
and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
|
||||
by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>
|
||||
<ul>
|
||||
''')
|
||||
'''
|
||||
)
|
||||
for pos, engdef, extra in definitions:
|
||||
if pos == 'Wikipedia definition':
|
||||
infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')
|
||||
|
|
@ -132,5 +133,5 @@ def get_infobox(alt_forms, result_url, definitions):
|
|||
'title': 'Jisho.org',
|
||||
'url': result_url,
|
||||
}
|
||||
]
|
||||
],
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,11 @@ paging = False
|
|||
suggestion_query = ''
|
||||
results_query = ''
|
||||
|
||||
cookies = {}
|
||||
headers = {}
|
||||
'''Some engines might offer different result based on cookies or headers.
|
||||
Possible use-case: To set safesearch cookie or header to moderate.'''
|
||||
|
||||
# parameters for engines with paging support
|
||||
#
|
||||
# number of results on each page
|
||||
|
|
@ -88,6 +93,9 @@ def request(query, params):
|
|||
if paging and search_url.find('{pageno}') >= 0:
|
||||
fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num
|
||||
|
||||
params['cookies'].update(cookies)
|
||||
params['headers'].update(headers)
|
||||
|
||||
params['url'] = search_url.format(**fp)
|
||||
params['query'] = query
|
||||
|
||||
|
|
|
|||
68
searx/engines/lingva.py
Normal file
68
searx/engines/lingva.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Lingva (alternative Google Translate frontend)"""
|
||||
|
||||
from json import loads
|
||||
|
||||
about = {
|
||||
"website": 'https://lingva.ml',
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": 'https://github.com/thedaviddelta/lingva-translate#public-apis',
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
engine_type = 'online_dictionary'
|
||||
categories = ['general']
|
||||
|
||||
url = "https://lingva.ml"
|
||||
search_url = "{url}/api/v1/{from_lang}/{to_lang}/{query}"
|
||||
|
||||
|
||||
def request(_query, params):
|
||||
params['url'] = search_url.format(
|
||||
url=url, from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query']
|
||||
)
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
result = loads(resp.text)
|
||||
info = result["info"]
|
||||
from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1])
|
||||
|
||||
if "typo" in info:
|
||||
results.append({"suggestion": from_to_prefix + info["typo"]})
|
||||
|
||||
if 'definitions' in info: # pylint: disable=too-many-nested-blocks
|
||||
for definition in info['definitions']:
|
||||
if 'list' in definition:
|
||||
for item in definition['list']:
|
||||
if 'synonyms' in item:
|
||||
for synonym in item['synonyms']:
|
||||
results.append({"suggestion": from_to_prefix + synonym})
|
||||
|
||||
infobox = ""
|
||||
|
||||
for translation in info["extraTranslations"]:
|
||||
infobox += f"<b>{translation['type']}</b>"
|
||||
|
||||
for word in translation["list"]:
|
||||
infobox += f"<dl><dt>{word['word']}</dt>"
|
||||
|
||||
for meaning in word["meanings"]:
|
||||
infobox += f"<dd>{meaning}</dd>"
|
||||
|
||||
infobox += "</dl>"
|
||||
|
||||
results.append(
|
||||
{
|
||||
'infobox': result["translation"],
|
||||
'content': infobox,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
79
searx/engines/metacpan.py
Normal file
79
searx/engines/metacpan.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""metacpan
|
||||
"""
|
||||
|
||||
from urllib.parse import urlunparse
|
||||
from json import dumps
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://metacpan.org/',
|
||||
"wikidata_id": 'Q841507',
|
||||
"official_api_documentation": 'https://github.com/metacpan/metacpan-api/blob/master/docs/API-docs.md',
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
number_of_results = 20 # Don't put this over 5000
|
||||
categories = ["it", "packages"]
|
||||
disabled = True
|
||||
shortcut = "cpan"
|
||||
paging = True
|
||||
|
||||
query_data_template = {
|
||||
'query': {
|
||||
'multi_match': {
|
||||
'type': 'most_fields',
|
||||
'fields': ['documentation', 'documentation.*'],
|
||||
'analyzer': 'camelcase',
|
||||
}
|
||||
},
|
||||
'filter': {
|
||||
'bool': {
|
||||
'must': [
|
||||
{'exists': {'field': 'documentation'}},
|
||||
{'term': {'status': 'latest'}},
|
||||
{'term': {'indexed': 1}},
|
||||
{'term': {'authorized': 1}},
|
||||
]
|
||||
}
|
||||
},
|
||||
"sort": [
|
||||
{"_score": {"order": "desc"}},
|
||||
{"date": {"order": "desc"}},
|
||||
],
|
||||
'_source': ['documentation', "abstract"],
|
||||
'size': number_of_results,
|
||||
}
|
||||
search_url = urlunparse(["https", "fastapi.metacpan.org", "/v1/file/_search", "", "", ""])
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params["url"] = search_url
|
||||
params["method"] = "POST"
|
||||
query_data = query_data_template
|
||||
query_data["query"]["multi_match"]["query"] = query
|
||||
query_data["from"] = (params["pageno"] - 1) * number_of_results
|
||||
params["data"] = dumps(query_data)
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_results = resp.json()["hits"]["hits"]
|
||||
for result in search_results:
|
||||
fields = result["_source"]
|
||||
module = fields["documentation"]
|
||||
results.append(
|
||||
{
|
||||
"url": "https://metacpan.org/pod/" + module,
|
||||
"title": module,
|
||||
"content": fields.get("abstract", ""),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from pymongo import MongoClient # pylint: disable=import-error
|
||||
from pymongo import MongoClient # pyright: ignore # pylint: disable=import-error
|
||||
|
||||
engine_type = 'offline'
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
# import error is ignored because the admin has to install mysql manually to use
|
||||
# the engine
|
||||
import mysql.connector # pylint: disable=import-error
|
||||
import mysql.connector # pyright: ignore # pylint: disable=import-error
|
||||
|
||||
engine_type = 'offline'
|
||||
auth_plugin = 'caching_sha2_password'
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ about = {
|
|||
# engine dependent config
|
||||
categories = ['map']
|
||||
paging = False
|
||||
language_support = True
|
||||
send_accept_language_header = True
|
||||
|
||||
# search-url
|
||||
base_url = 'https://nominatim.openstreetmap.org/'
|
||||
|
|
@ -141,6 +143,8 @@ def request(query, params):
|
|||
params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
|
||||
params['route'] = route_re.match(query)
|
||||
params['headers']['User-Agent'] = searx_useragent()
|
||||
if 'Accept-Language' not in params['headers']:
|
||||
params['headers']['Accept-Language'] = 'en'
|
||||
return params
|
||||
|
||||
|
||||
|
|
@ -202,7 +206,7 @@ def get_wikipedia_image(raw_value):
|
|||
return get_external_url('wikimedia_image', raw_value)
|
||||
|
||||
|
||||
def fetch_wikidata(nominatim_json, user_langage):
|
||||
def fetch_wikidata(nominatim_json, user_language):
|
||||
"""Update nominatim_json using the result of an unique to wikidata
|
||||
|
||||
For result in nominatim_json:
|
||||
|
|
@ -223,10 +227,10 @@ def fetch_wikidata(nominatim_json, user_langage):
|
|||
wd_to_results.setdefault(wd_id, []).append(result)
|
||||
|
||||
if wikidata_ids:
|
||||
user_langage = 'en' if user_langage == 'all' else user_langage
|
||||
user_language = 'en' if user_language == 'all' else user_language.split('-')[0]
|
||||
wikidata_ids_str = " ".join(wikidata_ids)
|
||||
query = wikidata_image_sparql.replace('%WIKIDATA_IDS%', sparql_string_escape(wikidata_ids_str)).replace(
|
||||
'%LANGUAGE%', sparql_string_escape(user_langage)
|
||||
'%LANGUAGE%', sparql_string_escape(user_language)
|
||||
)
|
||||
wikidata_json = send_wikidata_query(query)
|
||||
for wd_result in wikidata_json.get('results', {}).get('bindings', {}):
|
||||
|
|
@ -241,7 +245,7 @@ def fetch_wikidata(nominatim_json, user_langage):
|
|||
# overwrite wikipedia link
|
||||
wikipedia_name = wd_result.get('wikipediaName', {}).get('value')
|
||||
if wikipedia_name:
|
||||
result['extratags']['wikipedia'] = user_langage + ':' + wikipedia_name
|
||||
result['extratags']['wikipedia'] = user_language + ':' + wikipedia_name
|
||||
# get website if not already defined
|
||||
website = wd_result.get('website', {}).get('value')
|
||||
if (
|
||||
|
|
|
|||
|
|
@ -22,9 +22,7 @@ about = {
|
|||
categories = ["videos"]
|
||||
paging = True
|
||||
base_url = "https://peer.tube"
|
||||
supported_languages_url = (
|
||||
'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/views/Search.vue'
|
||||
)
|
||||
supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
@ -84,9 +82,6 @@ def response(resp):
|
|||
|
||||
|
||||
def _fetch_supported_languages(resp):
|
||||
import re
|
||||
|
||||
# https://docs.python.org/3/howto/regex.html#greedy-versus-non-greedy
|
||||
videolanguages = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
|
||||
peertube_languages = [m.group(1) for m in re.finditer(r"\{ id: '([a-z]+)', label:", videolanguages.group(1))]
|
||||
videolanguages = resp.json()
|
||||
peertube_languages = list(videolanguages.keys())
|
||||
return peertube_languages
|
||||
|
|
|
|||
94
searx/engines/petal_images.py
Normal file
94
searx/engines/petal_images.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Petalsearch Images
|
||||
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import extract_text
|
||||
|
||||
about = {
|
||||
"website": 'https://petalsearch.com/',
|
||||
"wikidata_id": 'Q104399280',
|
||||
"official_api_documentation": False,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['images']
|
||||
paging = True
|
||||
time_range_support = False
|
||||
|
||||
safesearch = True
|
||||
safesearch_table = {0: 'off', 1: 'moderate', 2: 'on'}
|
||||
|
||||
base_url = 'https://petalsearch.com/'
|
||||
search_string = 'search?{query}&channel=image&ps=50&pn={page}®ion={lang}&ss_mode={safesearch}&ss_type=normal'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
search_path = search_string.format(
|
||||
query=urlencode({'query': query}),
|
||||
page=params['pageno'],
|
||||
lang=params['language'].lower(),
|
||||
safesearch=safesearch_table[params['safesearch']],
|
||||
)
|
||||
|
||||
params['url'] = base_url + search_path
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
tree = html.fromstring(resp.text)
|
||||
root = tree.findall('.//script[3]')
|
||||
|
||||
# Convert list to JSON
|
||||
json_content = extract_text(root)
|
||||
|
||||
# Manipulate with JSON
|
||||
data = loads(json_content)
|
||||
|
||||
for result in data['newImages']:
|
||||
url = result['url']
|
||||
title = result['title']
|
||||
thumbnail_src = result['image']
|
||||
|
||||
pic_dict = result.get('extrainfo')
|
||||
|
||||
date_from_api = pic_dict.get('publish_time')
|
||||
width = pic_dict.get('width')
|
||||
height = pic_dict.get('height')
|
||||
img_src = pic_dict.get('real_url')
|
||||
|
||||
# Continue if img_src is missing
|
||||
if img_src is None or '':
|
||||
continue
|
||||
|
||||
# Get and convert published date
|
||||
if date_from_api is not None:
|
||||
publishedDate = datetime.fromtimestamp(int(date_from_api))
|
||||
|
||||
# Append results
|
||||
results.append(
|
||||
{
|
||||
'template': 'images.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'img_src': img_src,
|
||||
'thumbnail_src': thumbnail_src,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'publishedDate': publishedDate,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -70,7 +70,7 @@ def response(resp):
|
|||
elif properties.get('osm_type') == 'R':
|
||||
osm_type = 'relation'
|
||||
else:
|
||||
# continue if invalide osm-type
|
||||
# continue if invalid osm-type
|
||||
continue
|
||||
|
||||
url = result_base_url.format(osm_type=osm_type, osm_id=properties.get('osm_id'))
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
# import error is ignored because the admin has to install mysql manually to use
|
||||
# the engine
|
||||
import psycopg2 # pylint: disable=import-error
|
||||
import psycopg2 # pyright: ignore # pylint: disable=import-error
|
||||
|
||||
engine_type = 'offline'
|
||||
host = "127.0.0.1"
|
||||
|
|
|
|||
|
|
@ -3,11 +3,15 @@
|
|||
PubMed (Scholar publications)
|
||||
"""
|
||||
|
||||
from flask_babel import gettext
|
||||
from lxml import etree
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from searx.network import get
|
||||
from searx.utils import (
|
||||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
)
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
@ -22,7 +26,7 @@ about = {
|
|||
"results": 'XML',
|
||||
}
|
||||
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
|
||||
base_url = (
|
||||
'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
|
||||
|
|
@ -63,46 +67,61 @@ def response(resp):
|
|||
|
||||
retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
|
||||
|
||||
search_results_xml = get(retrieve_url_encoded).content
|
||||
search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation')
|
||||
search_results_response = get(retrieve_url_encoded).content
|
||||
search_results = etree.XML(search_results_response)
|
||||
for entry in eval_xpath_list(search_results, '//PubmedArticle'):
|
||||
medline = eval_xpath_getindex(entry, './MedlineCitation', 0)
|
||||
|
||||
for entry in search_results:
|
||||
title = entry.xpath('.//Article/ArticleTitle')[0].text
|
||||
|
||||
pmid = entry.xpath('.//PMID')[0].text
|
||||
title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text
|
||||
pmid = eval_xpath_getindex(medline, './/PMID', 0).text
|
||||
url = pubmed_url + pmid
|
||||
content = extract_text(
|
||||
eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True
|
||||
)
|
||||
doi = extract_text(
|
||||
eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True
|
||||
)
|
||||
journal = extract_text(
|
||||
eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True
|
||||
)
|
||||
issn = extract_text(
|
||||
eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True
|
||||
)
|
||||
authors = []
|
||||
for author in eval_xpath_list(medline, './Article/AuthorList/Author'):
|
||||
f = eval_xpath_getindex(author, './ForeName', 0, default=None)
|
||||
l = eval_xpath_getindex(author, './LastName', 0, default=None)
|
||||
f = '' if f is None else f.text
|
||||
l = '' if l is None else l.text
|
||||
authors.append((f + ' ' + l).strip())
|
||||
|
||||
try:
|
||||
content = entry.xpath('.//Abstract/AbstractText')[0].text
|
||||
except:
|
||||
content = gettext('No abstract is available for this publication.')
|
||||
res_dict = {
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'journal': journal,
|
||||
'issn': [issn],
|
||||
'authors': authors,
|
||||
'doi': doi,
|
||||
}
|
||||
|
||||
# If a doi is available, add it to the snipppet
|
||||
try:
|
||||
doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text
|
||||
content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content)
|
||||
except:
|
||||
pass
|
||||
|
||||
if len(content) > 300:
|
||||
content = content[0:300] + "..."
|
||||
# TODO: center snippet on query term
|
||||
|
||||
res_dict = {'url': url, 'title': title, 'content': content}
|
||||
|
||||
try:
|
||||
publishedDate = datetime.strptime(
|
||||
entry.xpath('.//DateCreated/Year')[0].text
|
||||
+ '-'
|
||||
+ entry.xpath('.//DateCreated/Month')[0].text
|
||||
+ '-'
|
||||
+ entry.xpath('.//DateCreated/Day')[0].text,
|
||||
'%Y-%m-%d',
|
||||
)
|
||||
res_dict['publishedDate'] = publishedDate
|
||||
except:
|
||||
pass
|
||||
accepted_date = eval_xpath_getindex(
|
||||
entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None
|
||||
)
|
||||
if accepted_date is not None:
|
||||
year = eval_xpath_getindex(accepted_date, './Year', 0)
|
||||
month = eval_xpath_getindex(accepted_date, './Month', 0)
|
||||
day = eval_xpath_getindex(accepted_date, './Day', 0)
|
||||
try:
|
||||
publishedDate = datetime.strptime(
|
||||
year.text + '-' + month.text + '-' + day.text,
|
||||
'%Y-%m-%d',
|
||||
)
|
||||
res_dict['publishedDate'] = publishedDate
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
results.append(res_dict)
|
||||
|
||||
return results
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -9,16 +9,16 @@ https://www.qwant.com/ queries.
|
|||
This implementation is used by different qwant engines in the settings.yml::
|
||||
|
||||
- name: qwant
|
||||
categories: general
|
||||
qwant_categ: web
|
||||
...
|
||||
- name: qwant news
|
||||
categories: news
|
||||
qwant_categ: news
|
||||
...
|
||||
- name: qwant images
|
||||
categories: images
|
||||
qwant_categ: images
|
||||
...
|
||||
- name: qwant videos
|
||||
categories: videos
|
||||
qwant_categ: videos
|
||||
...
|
||||
|
||||
"""
|
||||
|
|
@ -30,11 +30,11 @@ from datetime import (
|
|||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from flask_babel import gettext
|
||||
import babel
|
||||
|
||||
from searx.utils import match_language
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.network import raise_for_httperror
|
||||
|
||||
from searx.locales import get_engine_locale
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
@ -50,13 +50,20 @@ about = {
|
|||
categories = []
|
||||
paging = True
|
||||
supported_languages_url = about['website']
|
||||
qwant_categ = None # web|news|inages|videos
|
||||
|
||||
category_to_keyword = {
|
||||
'general': 'web',
|
||||
'news': 'news',
|
||||
'images': 'images',
|
||||
'videos': 'videos',
|
||||
}
|
||||
safesearch = True
|
||||
safe_search_map = {0: '&safesearch=0', 1: '&safesearch=1', 2: '&safesearch=2'}
|
||||
|
||||
# fmt: off
|
||||
qwant_news_locales = [
|
||||
'ca_ad', 'ca_es', 'ca_fr', 'co_fr', 'de_at', 'de_ch', 'de_de', 'en_au',
|
||||
'en_ca', 'en_gb', 'en_ie', 'en_my', 'en_nz', 'en_us', 'es_ad', 'es_ar',
|
||||
'es_cl', 'es_co', 'es_es', 'es_mx', 'es_pe', 'eu_es', 'eu_fr', 'fc_ca',
|
||||
'fr_ad', 'fr_be', 'fr_ca', 'fr_ch', 'fr_fr', 'it_ch', 'it_it', 'nl_be',
|
||||
'nl_nl', 'pt_ad', 'pt_pt',
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
# search-url
|
||||
url = 'https://api.qwant.com/v3/search/{keyword}?{query}&count={count}&offset={offset}'
|
||||
|
|
@ -64,10 +71,13 @@ url = 'https://api.qwant.com/v3/search/{keyword}?{query}&count={count}&offset={o
|
|||
|
||||
def request(query, params):
|
||||
"""Qwant search request"""
|
||||
keyword = category_to_keyword[categories[0]]
|
||||
|
||||
if not query:
|
||||
return None
|
||||
|
||||
count = 10 # web: count must be equal to 10
|
||||
|
||||
if keyword == 'images':
|
||||
if qwant_categ == 'images':
|
||||
count = 50
|
||||
offset = (params['pageno'] - 1) * count
|
||||
# count + offset must be lower than 250
|
||||
|
|
@ -78,22 +88,18 @@ def request(query, params):
|
|||
offset = min(offset, 40)
|
||||
|
||||
params['url'] = url.format(
|
||||
keyword=keyword,
|
||||
keyword=qwant_categ,
|
||||
query=urlencode({'q': query}),
|
||||
offset=offset,
|
||||
count=count,
|
||||
)
|
||||
|
||||
# add language tag
|
||||
if params['language'] == 'all':
|
||||
params['url'] += '&locale=en_US'
|
||||
else:
|
||||
language = match_language(
|
||||
params['language'],
|
||||
supported_languages,
|
||||
language_aliases,
|
||||
)
|
||||
params['url'] += '&locale=' + language.replace('-', '_')
|
||||
# add quant's locale
|
||||
q_locale = get_engine_locale(params['language'], supported_languages, default='en_US')
|
||||
params['url'] += '&locale=' + q_locale
|
||||
|
||||
# add safesearch option
|
||||
params['url'] += safe_search_map.get(params['safesearch'], '')
|
||||
|
||||
params['raise_for_httperror'] = False
|
||||
return params
|
||||
|
|
@ -103,7 +109,6 @@ def response(resp):
|
|||
"""Get response from Qwant's search request"""
|
||||
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
|
||||
|
||||
keyword = category_to_keyword[categories[0]]
|
||||
results = []
|
||||
|
||||
# load JSON result
|
||||
|
|
@ -125,7 +130,7 @@ def response(resp):
|
|||
# raise for other errors
|
||||
raise_for_httperror(resp)
|
||||
|
||||
if keyword == 'web':
|
||||
if qwant_categ == 'web':
|
||||
# The WEB query contains a list named 'mainline'. This list can contain
|
||||
# different result types (e.g. mainline[0]['type'] returns type of the
|
||||
# result items in mainline[0]['items']
|
||||
|
|
@ -136,7 +141,7 @@ def response(resp):
|
|||
# result['items'].
|
||||
mainline = data.get('result', {}).get('items', [])
|
||||
mainline = [
|
||||
{'type': keyword, 'items': mainline},
|
||||
{'type': qwant_categ, 'items': mainline},
|
||||
]
|
||||
|
||||
# return empty array if there are no results
|
||||
|
|
@ -146,7 +151,7 @@ def response(resp):
|
|||
for row in mainline:
|
||||
|
||||
mainline_type = row.get('type', 'web')
|
||||
if mainline_type != keyword:
|
||||
if mainline_type != qwant_categ:
|
||||
continue
|
||||
|
||||
if mainline_type == 'ads':
|
||||
|
|
@ -238,19 +243,43 @@ def response(resp):
|
|||
return results
|
||||
|
||||
|
||||
# get supported languages from their site
|
||||
def _fetch_supported_languages(resp):
|
||||
# list of regions is embedded in page as a js object
|
||||
response_text = resp.text
|
||||
response_text = response_text[response_text.find('INITIAL_PROPS') :]
|
||||
response_text = response_text[response_text.find('{') : response_text.find('</script>')]
|
||||
|
||||
regions_json = loads(response_text)
|
||||
text = resp.text
|
||||
text = text[text.find('INITIAL_PROPS') :]
|
||||
text = text[text.find('{') : text.find('</script>')]
|
||||
|
||||
supported_languages = []
|
||||
for country, langs in regions_json['locales'].items():
|
||||
for lang in langs['langs']:
|
||||
lang_code = "{lang}-{country}".format(lang=lang, country=country)
|
||||
supported_languages.append(lang_code)
|
||||
q_initial_props = loads(text)
|
||||
q_locales = q_initial_props.get('locales')
|
||||
q_valid_locales = []
|
||||
|
||||
for country, v in q_locales.items():
|
||||
for lang in v['langs']:
|
||||
_locale = "{lang}_{country}".format(lang=lang, country=country)
|
||||
|
||||
if qwant_categ == 'news' and _locale.lower() not in qwant_news_locales:
|
||||
# qwant-news does not support all locales from qwant-web:
|
||||
continue
|
||||
|
||||
q_valid_locales.append(_locale)
|
||||
|
||||
supported_languages = {}
|
||||
|
||||
for q_locale in q_valid_locales:
|
||||
try:
|
||||
locale = babel.Locale.parse(q_locale, sep='_')
|
||||
except babel.core.UnknownLocaleError:
|
||||
print("ERROR: can't determine babel locale of quant's locale %s" % q_locale)
|
||||
continue
|
||||
|
||||
# note: supported_languages (dict)
|
||||
#
|
||||
# dict's key is a string build up from a babel.Locale object / the
|
||||
# notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and
|
||||
# language) notation and dict's values are the locale strings used by
|
||||
# the engine.
|
||||
|
||||
searxng_locale = locale.language + '-' + locale.territory # --> params['language']
|
||||
supported_languages[searxng_locale] = q_locale
|
||||
|
||||
return supported_languages
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
from json import dumps, loads
|
||||
from datetime import datetime
|
||||
|
||||
from flask_babel import gettext
|
||||
|
||||
about = {
|
||||
"website": 'https://www.semanticscholar.org/',
|
||||
"wikidata_id": 'Q22908627',
|
||||
|
|
@ -15,6 +17,7 @@ about = {
|
|||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
search_url = 'https://www.semanticscholar.org/api/1/search'
|
||||
paper_url = 'https://www.semanticscholar.org/paper'
|
||||
|
|
@ -45,11 +48,7 @@ def request(query, params):
|
|||
def response(resp):
|
||||
res = loads(resp.text)
|
||||
results = []
|
||||
|
||||
for result in res['results']:
|
||||
item = {}
|
||||
metadata = []
|
||||
|
||||
url = result.get('primaryPaperLink', {}).get('url')
|
||||
if not url and result.get('links'):
|
||||
url = result.get('links')[0]
|
||||
|
|
@ -60,22 +59,47 @@ def response(resp):
|
|||
if not url:
|
||||
url = paper_url + '/%s' % result['id']
|
||||
|
||||
item['url'] = url
|
||||
# publishedDate
|
||||
if 'pubDate' in result:
|
||||
publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
|
||||
else:
|
||||
publishedDate = None
|
||||
|
||||
item['title'] = result['title']['text']
|
||||
item['content'] = result['paperAbstract']['text']
|
||||
# authors
|
||||
authors = [author[0]['name'] for author in result.get('authors', [])]
|
||||
|
||||
metadata = result.get('fieldsOfStudy') or []
|
||||
venue = result.get('venue', {}).get('text')
|
||||
if venue:
|
||||
metadata.append(venue)
|
||||
if metadata:
|
||||
item['metadata'] = ', '.join(metadata)
|
||||
# pick for the first alternate link, but not from the crawler
|
||||
pdf_url = None
|
||||
for doc in result.get('alternatePaperLinks', []):
|
||||
if doc['linkType'] not in ('crawler', 'doi'):
|
||||
pdf_url = doc['url']
|
||||
break
|
||||
|
||||
pubDate = result.get('pubDate')
|
||||
if pubDate:
|
||||
item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
|
||||
# comments
|
||||
comments = None
|
||||
if 'citationStats' in result:
|
||||
comments = gettext(
|
||||
'{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
|
||||
).format(
|
||||
numCitations=result['citationStats']['numCitations'],
|
||||
firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
|
||||
lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
|
||||
)
|
||||
|
||||
results.append(item)
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': result['title']['text'],
|
||||
'content': result['paperAbstract']['text'],
|
||||
'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
|
||||
'doi': result.get('doiInfo', {}).get('doi'),
|
||||
'tags': result.get('fieldsOfStudy'),
|
||||
'authors': authors,
|
||||
'pdf_url': pdf_url,
|
||||
'publishedDate': publishedDate,
|
||||
'comments': comments,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Słownik Języka Polskiego (general)
|
||||
# lint: pylint
|
||||
"""Słownik Języka Polskiego
|
||||
|
||||
Dictionary of the polish language from PWN (sjp.pwn)
|
||||
"""
|
||||
|
||||
from lxml.html import fromstring
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ about = {
|
|||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
nb_per_page = 10
|
||||
api_key = 'unset'
|
||||
|
|
@ -41,32 +41,32 @@ def response(resp):
|
|||
json_data = loads(resp.text)
|
||||
|
||||
for record in json_data['records']:
|
||||
content = record['abstract'][0:500]
|
||||
if len(record['abstract']) > len(content):
|
||||
content += "..."
|
||||
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
|
||||
|
||||
metadata = [
|
||||
record[x]
|
||||
for x in [
|
||||
'publicationName',
|
||||
'identifier',
|
||||
'contentType',
|
||||
]
|
||||
if record.get(x) is not None
|
||||
]
|
||||
|
||||
metadata = ' / '.join(metadata)
|
||||
if record.get('startingPage') and record.get('endingPage') is not None:
|
||||
metadata += " (%(startingPage)s-%(endingPage)s)" % record
|
||||
|
||||
authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
|
||||
tags = record.get('genre')
|
||||
if isinstance(tags, str):
|
||||
tags = [tags]
|
||||
results.append(
|
||||
{
|
||||
'title': record['title'],
|
||||
'template': 'paper.html',
|
||||
'url': record['url'][0]['value'].replace('http://', 'https://', 1),
|
||||
'content': content,
|
||||
'title': record['title'],
|
||||
'content': record['abstract'],
|
||||
'comments': record['publicationName'],
|
||||
'tags': tags,
|
||||
'publishedDate': published,
|
||||
'metadata': metadata,
|
||||
'type': record.get('contentType'),
|
||||
'authors': authors,
|
||||
# 'editor': '',
|
||||
'publisher': record.get('publisher'),
|
||||
'journal': record.get('publicationName'),
|
||||
'volume': record.get('volume') or None,
|
||||
'pages': '-'.join([x for x in [record.get('startingPage'), record.get('endingPage')] if x]),
|
||||
'number': record.get('number') or None,
|
||||
'doi': record.get('doi'),
|
||||
'issn': [x for x in [record.get('issn')] if x],
|
||||
'isbn': [x for x in [record.get('isbn')] if x],
|
||||
# 'pdf_url' : ''
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -62,8 +62,7 @@ sc_code = ''
|
|||
def raise_captcha(resp):
|
||||
|
||||
if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
|
||||
# suspend CAPTCHA for 7 days
|
||||
raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600)
|
||||
raise SearxEngineCaptchaException()
|
||||
|
||||
|
||||
def get_sc_code(headers):
|
||||
|
|
@ -89,15 +88,14 @@ def get_sc_code(headers):
|
|||
dom = html.fromstring(resp.text)
|
||||
|
||||
try:
|
||||
# href --> '/?sc=adrKJMgF8xwp20'
|
||||
href = eval_xpath(dom, '//a[@class="footer-home__logo"]')[0].get('href')
|
||||
# <input type="hidden" name="sc" value="...">
|
||||
sc_code = eval_xpath(dom, '//input[@name="sc"]/@value')[0]
|
||||
except IndexError as exc:
|
||||
# suspend startpage API --> https://github.com/searxng/searxng/pull/695
|
||||
raise SearxEngineResponseException(
|
||||
suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!"
|
||||
) from exc
|
||||
|
||||
sc_code = href[5:]
|
||||
sc_code_ts = time()
|
||||
logger.debug("new value is: %s", sc_code)
|
||||
|
||||
|
|
@ -209,7 +207,7 @@ def _fetch_supported_languages(resp):
|
|||
# native name, the English name of the writing script used by the language,
|
||||
# or occasionally something else entirely.
|
||||
|
||||
# this cases are so special they need to be hardcoded, a couple of them are mispellings
|
||||
# this cases are so special they need to be hardcoded, a couple of them are misspellings
|
||||
language_names = {
|
||||
'english_uk': 'en-GB',
|
||||
'fantizhengwen': ['zh-TW', 'zh-HK'],
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
|
|||
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
from flask_babel import gettext
|
||||
|
||||
about = {
|
||||
"website": 'https://tineye.com',
|
||||
|
|
@ -28,20 +29,41 @@ about = {
|
|||
}
|
||||
|
||||
engine_type = 'online_url_search'
|
||||
""":py:obj:`searx.search.processors.online_url_search`"""
|
||||
|
||||
categories = ['general']
|
||||
paging = True
|
||||
safesearch = False
|
||||
base_url = 'https://tineye.com'
|
||||
search_string = '/result_json/?page={page}&{query}'
|
||||
|
||||
FORMAT_NOT_SUPPORTED = gettext(
|
||||
"Could not read that image url. This may be due to an unsupported file"
|
||||
" format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."
|
||||
)
|
||||
"""TinEye error message"""
|
||||
|
||||
NO_SIGNATURE_ERROR = gettext(
|
||||
"The image is too simple to find matches. TinEye requires a basic level of"
|
||||
" visual detail to successfully identify matches."
|
||||
)
|
||||
"""TinEye error message"""
|
||||
|
||||
DOWNLOAD_ERROR = gettext("The image could not be downloaded.")
|
||||
"""TinEye error message"""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
"""Build TinEye HTTP request using ``search_urls`` of a :py:obj:`engine_type`."""
|
||||
|
||||
params['raise_for_httperror'] = False
|
||||
|
||||
if params['search_urls']['data:image']:
|
||||
query = params['search_urls']['data:image']
|
||||
elif params['search_urls']['http']:
|
||||
query = params['search_urls']['http']
|
||||
|
||||
logger.debug("query URL: %s", query)
|
||||
query = urlencode({'url': query})
|
||||
|
||||
# see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
|
||||
|
|
@ -59,45 +81,145 @@ def request(query, params):
|
|||
return params
|
||||
|
||||
|
||||
def parse_tineye_match(match_json):
|
||||
"""Takes parsed JSON from the API server and turns it into a :py:obj:`dict`
|
||||
object.
|
||||
|
||||
Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__
|
||||
|
||||
- `image_url`, link to the result image.
|
||||
- `domain`, domain this result was found on.
|
||||
- `score`, a number (0 to 100) that indicates how closely the images match.
|
||||
- `width`, image width in pixels.
|
||||
- `height`, image height in pixels.
|
||||
- `size`, image area in pixels.
|
||||
- `format`, image format.
|
||||
- `filesize`, image size in bytes.
|
||||
- `overlay`, overlay URL.
|
||||
- `tags`, whether this match belongs to a collection or stock domain.
|
||||
|
||||
- `backlinks`, a list of Backlink objects pointing to the original websites
|
||||
and image URLs. List items are instances of :py:obj:`dict`, (`Backlink
|
||||
<https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):
|
||||
|
||||
- `url`, the image URL to the image.
|
||||
- `backlink`, the original website URL.
|
||||
- `crawl_date`, the date the image was crawled.
|
||||
|
||||
"""
|
||||
|
||||
# HINT: there exists an alternative backlink dict in the domains list / e.g.::
|
||||
#
|
||||
# match_json['domains'][0]['backlinks']
|
||||
|
||||
backlinks = []
|
||||
if "backlinks" in match_json:
|
||||
|
||||
for backlink_json in match_json["backlinks"]:
|
||||
if not isinstance(backlink_json, dict):
|
||||
continue
|
||||
|
||||
crawl_date = backlink_json.get("crawl_date")
|
||||
if crawl_date:
|
||||
crawl_date = datetime.fromisoformat(crawl_date[:-3])
|
||||
else:
|
||||
crawl_date = datetime.min
|
||||
|
||||
backlinks.append(
|
||||
{
|
||||
'url': backlink_json.get("url"),
|
||||
'backlink': backlink_json.get("backlink"),
|
||||
'crawl_date': crawl_date,
|
||||
'image_name': backlink_json.get("image_name"),
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
'image_url': match_json.get("image_url"),
|
||||
'domain': match_json.get("domain"),
|
||||
'score': match_json.get("score"),
|
||||
'width': match_json.get("width"),
|
||||
'height': match_json.get("height"),
|
||||
'size': match_json.get("size"),
|
||||
'image_format': match_json.get("format"),
|
||||
'filesize': match_json.get("filesize"),
|
||||
'overlay': match_json.get("overlay"),
|
||||
'tags': match_json.get("tags"),
|
||||
'backlinks': backlinks,
|
||||
}
|
||||
|
||||
|
||||
def response(resp):
|
||||
"""Parse HTTP response from TinEye."""
|
||||
results = []
|
||||
|
||||
# Define wanted results
|
||||
json_data = resp.json()
|
||||
number_of_results = json_data['num_matches']
|
||||
try:
|
||||
json_data = resp.json()
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
msg = "can't parse JSON response // %s" % exc
|
||||
logger.error(msg)
|
||||
json_data = {'error': msg}
|
||||
|
||||
for i in json_data['matches']:
|
||||
image_format = i['format']
|
||||
width = i['width']
|
||||
height = i['height']
|
||||
thumbnail_src = i['image_url']
|
||||
backlink = i['domains'][0]['backlinks'][0]
|
||||
url = backlink['backlink']
|
||||
source = backlink['url']
|
||||
title = backlink['image_name']
|
||||
img_src = backlink['url']
|
||||
# handle error codes from Tineye
|
||||
|
||||
# Get and convert published date
|
||||
api_date = backlink['crawl_date'][:-3]
|
||||
publishedDate = datetime.fromisoformat(api_date)
|
||||
if resp.is_error:
|
||||
if resp.status_code in (400, 422):
|
||||
|
||||
# Append results
|
||||
message = 'HTTP status: %s' % resp.status_code
|
||||
error = json_data.get('error')
|
||||
s_key = json_data.get('suggestions', {}).get('key', '')
|
||||
|
||||
if error and s_key:
|
||||
message = "%s (%s)" % (error, s_key)
|
||||
elif error:
|
||||
message = error
|
||||
|
||||
if s_key == "Invalid image URL":
|
||||
# test https://docs.searxng.org/_static/searxng-wordmark.svg
|
||||
message = FORMAT_NOT_SUPPORTED
|
||||
elif s_key == 'NO_SIGNATURE_ERROR':
|
||||
# test https://pngimg.com/uploads/dot/dot_PNG4.png
|
||||
message = NO_SIGNATURE_ERROR
|
||||
elif s_key == 'Download Error':
|
||||
# test https://notexists
|
||||
message = DOWNLOAD_ERROR
|
||||
|
||||
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
|
||||
# results.append({'answer': message})
|
||||
logger.error(message)
|
||||
|
||||
return results
|
||||
|
||||
resp.raise_for_status()
|
||||
|
||||
# append results from matches
|
||||
|
||||
for match_json in json_data['matches']:
|
||||
|
||||
tineye_match = parse_tineye_match(match_json)
|
||||
if not tineye_match['backlinks']:
|
||||
continue
|
||||
|
||||
backlink = tineye_match['backlinks'][0]
|
||||
results.append(
|
||||
{
|
||||
'template': 'images.html',
|
||||
'url': url,
|
||||
'thumbnail_src': thumbnail_src,
|
||||
'source': source,
|
||||
'title': title,
|
||||
'img_src': img_src,
|
||||
'format': image_format,
|
||||
'widht': width,
|
||||
'height': height,
|
||||
'publishedDate': publishedDate,
|
||||
'url': backlink['backlink'],
|
||||
'thumbnail_src': tineye_match['image_url'],
|
||||
'source': backlink['url'],
|
||||
'title': backlink['image_name'],
|
||||
'img_src': backlink['url'],
|
||||
'format': tineye_match['image_format'],
|
||||
'widht': tineye_match['width'],
|
||||
'height': tineye_match['height'],
|
||||
'publishedDate': backlink['crawl_date'],
|
||||
}
|
||||
)
|
||||
|
||||
# Append number of results
|
||||
results.append({'number_of_results': number_of_results})
|
||||
# append number of results
|
||||
|
||||
number_of_results = json_data.get('num_matches')
|
||||
if number_of_results:
|
||||
results.append({'number_of_results': number_of_results})
|
||||
|
||||
return results
|
||||
|
|
|
|||
75
searx/engines/twitter.py
Normal file
75
searx/engines/twitter.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Twitter (microblogging platform)"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
|
||||
about = {
|
||||
"website": 'https://twitter.com',
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": 'https://developer.twitter.com/en/docs/twitter-api',
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['social media']
|
||||
|
||||
url = "https://api.twitter.com"
|
||||
search_url = (
|
||||
"{url}/2/search/adaptive.json?{query}&tweet_mode=extended&query_source=typed_query&pc=1&spelling_corrections=1"
|
||||
)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(url=url, query=urlencode({'q': query}))
|
||||
|
||||
params['headers'] = {
|
||||
# This token is used in the Twitter web interface (twitter.com). Without this header, the API doesn't work.
|
||||
# The value of the token has never changed (or maybe once a long time ago).
|
||||
# https://github.com/zedeus/nitter/blob/5f31e86e0e8578377fa7d5aeb9631bbb2d35ef1e/src/consts.nim#L5
|
||||
'Authorization': (
|
||||
"Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKb"
|
||||
"T3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw"
|
||||
)
|
||||
}
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json_res = loads(resp.text)['globalObjects']
|
||||
|
||||
for tweet in json_res['tweets'].values():
|
||||
text = tweet['full_text']
|
||||
display = tweet['display_text_range']
|
||||
|
||||
img_src = tweet.get('extended_entities', {}).get('media', [{}])[0].get('media_url_https')
|
||||
if img_src:
|
||||
img_src += "?name=thumb"
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': 'https://twitter.com/i/web/status/' + tweet['id_str'],
|
||||
'title': (text[:40] + '...') if len(text) > 40 else text,
|
||||
'content': text[display[0] : display[1]],
|
||||
'img_src': img_src,
|
||||
'publishedDate': datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y'),
|
||||
}
|
||||
)
|
||||
|
||||
for user in json_res['users'].values():
|
||||
results.append(
|
||||
{
|
||||
'title': user['name'],
|
||||
'content': user['description'],
|
||||
'url': 'https://twitter.com/' + user['screen_name'],
|
||||
'img_src': user['profile_image_url_https'],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -50,7 +50,7 @@ WIKIDATA_PROPERTIES = {
|
|||
# SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE
|
||||
# https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates
|
||||
# https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model
|
||||
# optmization:
|
||||
# optimization:
|
||||
# * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization
|
||||
# * https://github.com/blazegraph/database/wiki/QueryHints
|
||||
QUERY_TEMPLATE = """
|
||||
|
|
@ -65,6 +65,7 @@ WHERE
|
|||
mwapi:language "%LANGUAGE%".
|
||||
?item wikibase:apiOutputItem mwapi:item.
|
||||
}
|
||||
hint:Prior hint:runFirst "true".
|
||||
|
||||
%WHERE%
|
||||
|
||||
|
|
@ -93,6 +94,12 @@ WHERE {
|
|||
}
|
||||
"""
|
||||
|
||||
# see the property "dummy value" of https://www.wikidata.org/wiki/Q2013 (Wikidata)
|
||||
# hard coded here to avoid to an additional SPARQL request when the server starts
|
||||
DUMMY_ENTITY_URLS = set(
|
||||
"http://www.wikidata.org/entity/" + wid for wid in ("Q4115189", "Q13406268", "Q15397819", "Q17339402")
|
||||
)
|
||||
|
||||
|
||||
# https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1
|
||||
# https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html
|
||||
|
|
@ -177,7 +184,7 @@ def response(resp):
|
|||
for result in jsonresponse.get('results', {}).get('bindings', []):
|
||||
attribute_result = {key: value['value'] for key, value in result.items()}
|
||||
entity_url = attribute_result['item']
|
||||
if entity_url not in seen_entities:
|
||||
if entity_url not in seen_entities and entity_url not in DUMMY_ENTITY_URLS:
|
||||
seen_entities.add(entity_url)
|
||||
results += get_results(attribute_result, attributes, language)
|
||||
else:
|
||||
|
|
@ -379,7 +386,7 @@ def get_attributes(language):
|
|||
add_amount('P2046') # area
|
||||
add_amount('P281') # postal code
|
||||
add_label('P38') # currency
|
||||
add_amount('P2048') # heigth (building)
|
||||
add_amount('P2048') # height (building)
|
||||
|
||||
# Media
|
||||
for p in [
|
||||
|
|
@ -464,7 +471,6 @@ def get_attributes(language):
|
|||
|
||||
|
||||
class WDAttribute:
|
||||
# pylint: disable=no-self-use
|
||||
__slots__ = ('name',)
|
||||
|
||||
def __init__(self, name):
|
||||
|
|
@ -626,7 +632,6 @@ class WDImageAttribute(WDURLAttribute):
|
|||
|
||||
|
||||
class WDDateAttribute(WDAttribute):
|
||||
# pylint: disable=no-self-use
|
||||
def get_select(self):
|
||||
return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,9 @@ about = {
|
|||
"results": 'JSON',
|
||||
}
|
||||
|
||||
|
||||
send_accept_language_header = True
|
||||
|
||||
# search-url
|
||||
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
||||
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
||||
|
|
@ -41,9 +44,6 @@ def request(query, params):
|
|||
language = url_lang(params['language'])
|
||||
params['url'] = search_url.format(title=quote(query), language=language)
|
||||
|
||||
if params['language'].lower() in language_variants.get(language, []):
|
||||
params['headers']['Accept-Language'] = params['language'].lower()
|
||||
|
||||
params['headers']['User-Agent'] = searx_useragent()
|
||||
params['raise_for_httperror'] = False
|
||||
params['soft_max_redirects'] = 2
|
||||
|
|
@ -106,9 +106,9 @@ def _fetch_supported_languages(resp):
|
|||
for tr in trs:
|
||||
td = tr.xpath('./td')
|
||||
code = td[3].xpath('./a')[0].text
|
||||
name = td[2].xpath('./a')[0].text
|
||||
name = td[1].xpath('./a')[0].text
|
||||
english_name = td[1].xpath('./a')[0].text
|
||||
articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
|
||||
articles = int(td[4].xpath('./a')[0].text.replace(',', ''))
|
||||
# exclude languages with too few articles
|
||||
if articles >= 100:
|
||||
supported_languages[code] = {"name": name, "english_name": english_name}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ def request(query, params):
|
|||
# replace private user area characters to make text legible
|
||||
def replace_pua_chars(text):
|
||||
pua_chars = {
|
||||
'\uf522': '\u2192', # rigth arrow
|
||||
'\uf522': '\u2192', # right arrow
|
||||
'\uf7b1': '\u2115', # set of natural numbers
|
||||
'\uf7b4': '\u211a', # set of rational numbers
|
||||
'\uf7b5': '\u211d', # set of real numbers
|
||||
|
|
|
|||
136
searx/engines/wttr.py
Normal file
136
searx/engines/wttr.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""wttr.in (weather forecast service)"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import quote
|
||||
from flask_babel import gettext
|
||||
|
||||
about = {
|
||||
"website": "https://wttr.in",
|
||||
"wikidata_id": "Q107586666",
|
||||
"official_api_documentation": "https://github.com/chubin/wttr.in#json-output",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
categories = ["others"]
|
||||
|
||||
url = "https://wttr.in/{query}?format=j1&lang={lang}"
|
||||
|
||||
|
||||
def get_weather_condition_key(lang):
|
||||
if lang == "en":
|
||||
return "weatherDesc"
|
||||
|
||||
return "lang_" + lang.lower()
|
||||
|
||||
|
||||
def generate_day_table(day):
|
||||
res = ""
|
||||
|
||||
res += f"<tr><td>{gettext('Average temp.')}</td><td>{day['avgtempC']}°C / {day['avgtempF']}°F</td></tr>"
|
||||
res += f"<tr><td>{gettext('Min temp.')}</td><td>{day['mintempC']}°C / {day['mintempF']}°F</td></tr>"
|
||||
res += f"<tr><td>{gettext('Max temp.')}</td><td>{day['maxtempC']}°C / {day['maxtempF']}°F</td></tr>"
|
||||
res += f"<tr><td>{gettext('UV index')}</td><td>{day['uvIndex']}</td></tr>"
|
||||
res += f"<tr><td>{gettext('Sunrise')}</td><td>{day['astronomy'][0]['sunrise']}</td></tr>"
|
||||
res += f"<tr><td>{gettext('Sunset')}</td><td>{day['astronomy'][0]['sunset']}</td></tr>"
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def generate_condition_table(condition, lang, current=False):
|
||||
res = ""
|
||||
|
||||
if current:
|
||||
key = "temp_"
|
||||
else:
|
||||
key = "temp"
|
||||
|
||||
res += (
|
||||
f"<tr><td><b>{gettext('Condition')}</b></td>"
|
||||
f"<td><b>{condition[get_weather_condition_key(lang)][0]['value']}</b></td></tr>"
|
||||
)
|
||||
res += (
|
||||
f"<tr><td><b>{gettext('Temperature')}</b></td>"
|
||||
f"<td><b>{condition[key+'C']}°C / {condition[key+'F']}°F</b></td></tr>"
|
||||
)
|
||||
res += (
|
||||
f"<tr><td>{gettext('Feels like')}</td><td>{condition['FeelsLikeC']}°C / {condition['FeelsLikeF']}°F</td></tr>"
|
||||
)
|
||||
res += (
|
||||
f"<tr><td>{gettext('Wind')}</td><td>{condition['winddir16Point']} — "
|
||||
f"{condition['windspeedKmph']} km/h / {condition['windspeedMiles']} mph</td></tr>"
|
||||
)
|
||||
res += (
|
||||
f"<tr><td>{gettext('Visibility')}</td><td>{condition['visibility']} km / {condition['visibilityMiles']} mi</td>"
|
||||
)
|
||||
res += f"<tr><td>{gettext('Humidity')}</td><td>{condition['humidity']}%</td></tr>"
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def request(query, params):
|
||||
if query.replace('/', '') in [":help", ":bash.function", ":translation"]:
|
||||
return None
|
||||
|
||||
if params["language"] == "all":
|
||||
params["language"] = "en"
|
||||
else:
|
||||
params["language"] = params["language"].split("-")[0]
|
||||
|
||||
params["url"] = url.format(query=quote(query), lang=params["language"])
|
||||
|
||||
params["raise_for_httperror"] = False
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
if resp.status_code == 404:
|
||||
return []
|
||||
|
||||
result = loads(resp.text)
|
||||
|
||||
current = result["current_condition"][0]
|
||||
location = result['nearest_area'][0]
|
||||
|
||||
forecast_indices = {3: gettext('Morning'), 4: gettext('Noon'), 6: gettext('Evening'), 7: gettext('Night')}
|
||||
|
||||
title = f"{location['areaName'][0]['value']}, {location['region'][0]['value']}"
|
||||
|
||||
infobox = f"<h3>{gettext('Current condition')}</h3><table><tbody>"
|
||||
|
||||
infobox += generate_condition_table(current, resp.search_params['language'], True)
|
||||
|
||||
infobox += "</tbody></table>"
|
||||
|
||||
for day in result["weather"]:
|
||||
infobox += f"<h3>{day['date']}</h3>"
|
||||
|
||||
infobox += "<table><tbody>"
|
||||
|
||||
infobox += generate_day_table(day)
|
||||
|
||||
infobox += "</tbody></table>"
|
||||
|
||||
infobox += "<table><tbody>"
|
||||
|
||||
for time in forecast_indices.items():
|
||||
infobox += f"<tr><td rowspan=\"7\"><b>{time[1]}</b></td></tr>"
|
||||
|
||||
infobox += generate_condition_table(day['hourly'][time[0]], resp.search_params['language'])
|
||||
|
||||
infobox += "</tbody></table>"
|
||||
|
||||
results.append(
|
||||
{
|
||||
"infobox": title,
|
||||
"content": infobox,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
@ -22,6 +22,7 @@ from urllib.parse import urlencode
|
|||
|
||||
from lxml import html
|
||||
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
|
||||
from searx.network import raise_for_httperror
|
||||
|
||||
search_url = None
|
||||
"""
|
||||
|
|
@ -52,7 +53,7 @@ Replacements are:
|
|||
|
||||
0: none, 1: moderate, 2:strict
|
||||
|
||||
If not supported, the URL paramter is an empty string.
|
||||
If not supported, the URL parameter is an empty string.
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -61,6 +62,14 @@ lang_all = 'en'
|
|||
selected.
|
||||
'''
|
||||
|
||||
no_result_for_http_status = []
|
||||
'''Return empty result for these HTTP status codes instead of throwing an error.
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
no_result_for_http_status: []
|
||||
'''
|
||||
|
||||
soft_max_redirects = 0
|
||||
'''Maximum redirects, soft limit. Record an error but don't stop the engine'''
|
||||
|
||||
|
|
@ -105,7 +114,7 @@ time_range_support = False
|
|||
|
||||
time_range_url = '&hours={time_range_val}'
|
||||
'''Time range URL parameter in the in :py:obj:`search_url`. If no time range is
|
||||
requested by the user, the URL paramter is an empty string. The
|
||||
requested by the user, the URL parameter is an empty string. The
|
||||
``{time_range_val}`` replacement is taken from the :py:obj:`time_range_map`.
|
||||
|
||||
.. code:: yaml
|
||||
|
|
@ -177,11 +186,18 @@ def request(query, params):
|
|||
params['url'] = search_url.format(**fargs)
|
||||
params['soft_max_redirects'] = soft_max_redirects
|
||||
|
||||
params['raise_for_httperror'] = False
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
def response(resp): # pylint: disable=too-many-branches
|
||||
'''Scrap *results* from the response (see :ref:`engine results`).'''
|
||||
if no_result_for_http_status and resp.status_code in no_result_for_http_status:
|
||||
return []
|
||||
|
||||
raise_for_httperror(resp)
|
||||
|
||||
results = []
|
||||
dom = html.fromstring(resp.text)
|
||||
is_onion = 'onions' in categories
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
Youtube (Videos)
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from functools import reduce
|
||||
from json import loads, dumps
|
||||
from urllib.parse import quote_plus
|
||||
|
|
@ -37,6 +36,7 @@ base_youtube_url = 'https://www.youtube.com/watch?v='
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['cookies']['CONSENT'] = "YES+"
|
||||
if not params['engine_data'].get('next_page_token'):
|
||||
params['url'] = search_url.format(query=quote_plus(query), page=params['pageno'])
|
||||
if params['time_range'] in time_range_dict:
|
||||
|
|
@ -52,7 +52,6 @@ def request(query, params):
|
|||
)
|
||||
params['headers']['Content-Type'] = 'application/json'
|
||||
|
||||
params['headers']['Cookie'] = "CONSENT=YES+cb.%s-17-p0.en+F+941;" % datetime.now().strftime("%Y%m%d")
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ def init(engine_settings=None):
|
|||
resp = http_get('https://z-lib.org', timeout=5.0)
|
||||
if resp.ok:
|
||||
dom = html.fromstring(resp.text)
|
||||
base_url = "https:" + extract_text(
|
||||
base_url = extract_text(
|
||||
eval_xpath(dom, './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href')
|
||||
)
|
||||
logger.debug("using base_url: %s" % base_url)
|
||||
|
|
|
|||
|
|
@ -1,29 +1,19 @@
|
|||
'''
|
||||
searx is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
searx is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||
|
||||
(C) 2017- by Alexandre Flament, <alex@al-f.net>
|
||||
'''
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Exception types raised by SearXNG modules.
|
||||
"""
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
class SearxException(Exception):
|
||||
pass
|
||||
"""Base SearXNG exception."""
|
||||
|
||||
|
||||
class SearxParameterException(SearxException):
|
||||
"""Raised when query miss a required paramater"""
|
||||
|
||||
def __init__(self, name, value):
|
||||
if value == '' or value is None:
|
||||
message = 'Empty ' + name + ' parameter'
|
||||
|
|
@ -69,19 +59,38 @@ class SearxEngineAPIException(SearxEngineResponseException):
|
|||
class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
||||
"""The website is blocking the access"""
|
||||
|
||||
def __init__(self, suspended_time=24 * 3600, message='Access denied'):
|
||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
|
||||
"""This settings contains the default suspended time (default 86400 sec / 1
|
||||
day)."""
|
||||
|
||||
def __init__(self, suspended_time: int = None, message: str = 'Access denied'):
|
||||
"""Generic exception to raise when an engine denies access to the results.
|
||||
|
||||
:param suspended_time: How long the engine is going to be suspended in
|
||||
second. Defaults to None.
|
||||
:type suspended_time: int, None
|
||||
:param message: Internal message. Defaults to ``Access denied``
|
||||
:type message: str
|
||||
"""
|
||||
suspended_time = suspended_time or self._get_default_suspended_time()
|
||||
super().__init__(message + ', suspended_time=' + str(suspended_time))
|
||||
self.suspended_time = suspended_time
|
||||
self.message = message
|
||||
|
||||
def _get_default_suspended_time(self):
|
||||
from searx import get_setting # pylint: disable=C0415
|
||||
|
||||
return get_setting(self.SUSPEND_TIME_SETTING)
|
||||
|
||||
|
||||
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
||||
"""The website has returned a CAPTCHA
|
||||
"""The website has returned a CAPTCHA."""
|
||||
|
||||
By default, searx stops sending requests to this engine for 1 day.
|
||||
"""
|
||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
|
||||
"""This settings contains the default suspended time (default 86400 sec / 1
|
||||
day)."""
|
||||
|
||||
def __init__(self, suspended_time=24 * 3600, message='CAPTCHA'):
|
||||
def __init__(self, suspended_time=None, message='CAPTCHA'):
|
||||
super().__init__(message=message, suspended_time=suspended_time)
|
||||
|
||||
|
||||
|
|
@ -91,7 +100,11 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
|
|||
By default, searx stops sending requests to this engine for 1 hour.
|
||||
"""
|
||||
|
||||
def __init__(self, suspended_time=3600, message='Too many request'):
|
||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
|
||||
"""This settings contains the default suspended time (default 3660 sec / 1
|
||||
hour)."""
|
||||
|
||||
def __init__(self, suspended_time=None, message='Too many request'):
|
||||
super().__init__(message=message, suspended_time=suspended_time)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
from urllib.parse import quote_plus
|
||||
from searx.data import EXTERNAL_BANGS
|
||||
|
||||
LEAF_KEY = chr(16)
|
||||
|
|
@ -39,7 +40,7 @@ def get_bang_definition_and_ac(external_bangs_db, bang):
|
|||
|
||||
def resolve_bang_definition(bang_definition, query):
|
||||
url, rank = bang_definition.split(chr(1))
|
||||
url = url.replace(chr(2), query)
|
||||
url = url.replace(chr(2), quote_plus(query))
|
||||
if url.startswith('//'):
|
||||
url = 'https:' + url
|
||||
rank = int(rank) if len(rank) > 0 else 0
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ def get_external_url(url_id, item_id, alternative="default"):
|
|||
"""Return an external URL or None if url_id is not found.
|
||||
|
||||
url_id can take value from data/external_urls.json
|
||||
The "imdb_id" value is automaticaly converted according to the item_id value.
|
||||
The "imdb_id" value is automatically converted according to the item_id value.
|
||||
|
||||
If item_id is None, the raw URL with the $1 is returned.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -77,13 +77,11 @@ class InfoPage:
|
|||
.. _markdown-it-py: https://github.com/executablebooks/markdown-it-py
|
||||
|
||||
"""
|
||||
return MarkdownIt(
|
||||
"commonmark", {"typographer": True}
|
||||
).enable(
|
||||
["replacements", "smartquotes"]
|
||||
).render(self.content)
|
||||
return (
|
||||
MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(self.content)
|
||||
)
|
||||
|
||||
def get_ctx(self): # pylint: disable=no-self-use
|
||||
def get_ctx(self):
|
||||
"""Jinja context to render :py:obj:`InfoPage.content`"""
|
||||
|
||||
def _md_link(name, url):
|
||||
|
|
@ -136,6 +134,7 @@ class InfoPageSet: # pylint: disable=too-few-public-methods
|
|||
self.toc: typing.List[str] = [
|
||||
'search-syntax',
|
||||
'about',
|
||||
'donate',
|
||||
]
|
||||
"""list of articles in the online documentation"""
|
||||
|
||||
|
|
@ -158,10 +157,9 @@ class InfoPageSet: # pylint: disable=too-few-public-methods
|
|||
return None
|
||||
|
||||
cache_key = (pagename, locale)
|
||||
page = self.CACHE.get(cache_key)
|
||||
|
||||
if page is not None:
|
||||
return page
|
||||
if cache_key in self.CACHE:
|
||||
return self.CACHE[cache_key]
|
||||
|
||||
# not yet instantiated
|
||||
|
||||
|
|
@ -184,4 +182,6 @@ class InfoPageSet: # pylint: disable=too-few-public-methods
|
|||
if fallback_to_default and page is None:
|
||||
page_locale = self.locale_default
|
||||
page = self.get_page(page_name, self.locale_default)
|
||||
yield page_name, page_locale, page
|
||||
if page is not None:
|
||||
# page is None if the page was deleted by the administrator
|
||||
yield page_name, page_locale, page
|
||||
|
|
|
|||
88
searx/infopage/de/about.md
Normal file
88
searx/infopage/de/about.md
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
# Über SearXNG
|
||||
|
||||
SearXNG ist eine [Metasuchmaschine], welche die Ergebnisse anderer
|
||||
{{link('Suchmaschinen', 'preferences')}} sammelt und aufbereitet ohne dabei
|
||||
Informationen über seine Benutzer zu sammeln oder an andere Suchmaschinen weiter
|
||||
zu geben.
|
||||
|
||||
Das SearXNG Projekt wird von einer offenen Gemeinschaft entwickelt; wenn Sie
|
||||
Fragen haben oder einfach nur über SearXNG plaudern möchten, besuchen Sie uns
|
||||
auf Matrix unter: [#searxng:matrix.org]
|
||||
|
||||
Werden Sie Teil des Projekts und unterstützen Sie SearXNG:
|
||||
|
||||
- Sie können die SearXNG Übersetzungen ergänzen oder korrigieren: [Weblate]
|
||||
- oder folgen Sie den Entwicklungen, senden Sie Beiträge und melden Sie Fehler:
|
||||
[SearXNG Quellen]
|
||||
- Mehr Informationen sind in der [SearXNG Dokumentation] zu finden.
|
||||
|
||||
## Warum sollte ich SearXNG benutzen?
|
||||
|
||||
- SearXNG bietet Ihnen vielleicht nicht so personalisierte Ergebnisse wie
|
||||
Google, aber es erstellt auch kein Profil über Sie.
|
||||
- SearXNG kümmert sich nicht darum, wonach Sie suchen, gibt niemals etwas an
|
||||
Dritte weiter und kann nicht dazu verwendet werden Sie zu kompromittieren.
|
||||
- SearXNG ist freie Software, der Code ist zu 100% offen und jeder ist
|
||||
willkommen ihn zu verbessern.
|
||||
|
||||
Wenn Ihnen die Privatsphäre wichtig ist, Sie ein bewusster Nutzer sind und Sie
|
||||
an die digitale Freiheit glauben, sollten Sie SearXNG zu Ihrer
|
||||
Standardsuchmaschine machen oder eine SearXNG Instanz auf Ihrem eigenen Server
|
||||
betreiben.
|
||||
|
||||
## Wie kann ich SearXNG als Standardsuchmaschine festlegen?
|
||||
|
||||
SearXNG unterstützt [OpenSearch]. Weitere Informationen zum Ändern Ihrer
|
||||
Standardsuchmaschine finden Sie in der Dokumentation zu Ihrem [WEB-Browser]:
|
||||
|
||||
- [Firefox]
|
||||
- [Microsoft Edge] - Hinter dem Link finden sich auch nützliche Hinweise zu
|
||||
Chrome und Safari.
|
||||
- [Chromium]-basierte Browser fügen nur Websites hinzu, zu denen der Benutzer
|
||||
ohne Pfadangabe navigiert.
|
||||
|
||||
Wenn Sie eine Suchmaschine hinzufügen, darf es keine Duplikate mit demselben
|
||||
Namen geben. Wenn Sie auf ein Problem stoßen, bei dem Sie die Suchmaschine
|
||||
nicht hinzufügen können, dann können Sie entweder:
|
||||
|
||||
- das Duplikat entfernen (Standardname: SearXNG) oder
|
||||
- den Eigentümer kontaktieren, damit dieser der Instance einen anderen Namen als
|
||||
den Standardnamen gibt.
|
||||
|
||||
## Wie funktioniert SearXNG?
|
||||
|
||||
SearXNG ist ein Fork der bekannten [searx] [Metasuchmaschine], die durch das
|
||||
[Seeks-Projekt] inspiriert wurde (diese beide Projekte werden heute nicht mehr
|
||||
aktiv weiterentwickelt). SearXNG bietet einen grundlegenden Schutz der
|
||||
Privatsphäre, indem es die Suchanfragen der Benutzer mit Suchen auf anderen
|
||||
Plattformen vermischt ohne dabei Suchdaten zu speichern. SearXNG kann im
|
||||
[WEB-Browser] als weitere oder Standard-Suchmaschine hinzugefügt werden.
|
||||
|
||||
Die {{link('Suchmaschinenstatistik', 'stats')}} enthält einige nützliche
|
||||
Statistiken über die verwendeten Suchmaschinen.
|
||||
|
||||
## Wie kann ich einen eigenen SearXNG Server betreiben?
|
||||
|
||||
Jeder der mit dem Betrieb von WEB-Servern vertraut ist kann sich eine eigene
|
||||
Instanz einrichten; die Software dazu kann über die [SearXNG Quellen] bezogen
|
||||
werden. Weitere Informationen zur Installation und zum Betrieb finden sich in
|
||||
der [SearXNG Dokumentation].
|
||||
|
||||
Fügen Sie Ihre Instanz zu der [Liste der öffentlich zugänglichen
|
||||
Instanzen]({{get_setting('brand.public_instances')}}) hinzu um auch anderen
|
||||
Menschen zu helfen ihre Privatsphäre zurückzugewinnen und das Internet freier zu
|
||||
machen. Je dezentraler das Internet ist, desto mehr Freiheit haben wir!
|
||||
|
||||
|
||||
[SearXNG Quellen]: {{GIT_URL}}
|
||||
[#searxng:matrix.org]: https://matrix.to/#/#searxng:matrix.org
|
||||
[SearXNG Dokumentation]: {{get_setting('brand.docs_url')}}
|
||||
[searx]: https://github.com/searx/searx
|
||||
[Metasuchmaschine]: https://de.wikipedia.org/wiki/Metasuchmaschine
|
||||
[Weblate]: https://translate.codeberg.org/projects/searxng/
|
||||
[Seeks-Projekt]: https://beniz.github.io/seeks/
|
||||
[OpenSearch]: https://github.com/dewitt/opensearch/blob/master/opensearch-1-1-draft-6.md
|
||||
[Firefox]: https://support.mozilla.org/en-US/kb/add-or-remove-search-engine-firefox
|
||||
[Microsoft Edge]: https://support.microsoft.com/en-us/help/4028574/microsoft-edge-change-the-default-search-engine
|
||||
[Chromium]: https://www.chromium.org/tab-to-search
|
||||
[WEB-Browser]: https://de.wikipedia.org/wiki/Webbrowser
|
||||
77
searx/infopage/de/search-syntax.md
Normal file
77
searx/infopage/de/search-syntax.md
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
# Suchbegriffe
|
||||
|
||||
SearXNG verfügt über eine Syntax mit der in einer Suchanfrage die Kategorien,
|
||||
Suchmaschinen, Sprachen und mehr geändert werden können. In den
|
||||
{{link('Eigenschaften','preferences')}} sind die Kategorien, Suchmaschinen und
|
||||
Sprachen zu finden, die zur Verfügung stehen.
|
||||
|
||||
## `!` Suchmaschine und Kategorie auswählen
|
||||
|
||||
Zum Festlegen von Kategorie- und/oder Suchmaschinen dient das Präfix `!`. Um
|
||||
ein paar Beispiele zu geben:
|
||||
|
||||
- in der Wikipedia nach dem Begriff **paris** suchen
|
||||
|
||||
- {{search('!wp paris')}}
|
||||
- {{search('!wikipedia paris')}}
|
||||
|
||||
- in der Kategorie **Karte** nach dem Begriff **paris** suchen:
|
||||
|
||||
- {{search('!map paris')}}
|
||||
|
||||
- in der Kategorie **Bilder** suchen
|
||||
|
||||
- {{search('!images Wau Holland')}}
|
||||
|
||||
Abkürzungen der Suchmaschinen und Kategorien sind ebenfalls möglich und können
|
||||
auch kombiniert werden. So wird z.B. mit {{search('!map !ddg !wp paris')}} in
|
||||
der Kategorie **Karte** als auch mit den Suchmaschinen DuckDuckGo und Wikipedia
|
||||
nach dem Begriff **paris** gesucht.
|
||||
|
||||
## `:` Sprache auswählen
|
||||
|
||||
Um einen Sprachfilter auszuwählen, verwenden Sie das Präfix`:`. Um ein
|
||||
einfaches Beispiel zu geben:
|
||||
|
||||
- Wikipedia mit einer benutzerdefinierten Sprache durchsuchen
|
||||
|
||||
- {{search(':de !wp Wau Holland')}}
|
||||
|
||||
## `!!` external bangs
|
||||
|
||||
SearXNG unterstützt die _external bangs_ von [ddg]. Das Präfix `!!` kann
|
||||
verwendet werden um direkt zu einer externen Suchseite zu springen. Um ein
|
||||
Beispiel zu geben:
|
||||
|
||||
- In Wikipedia mit einer benutzerdefinierten Sprache eine Suche durchführen
|
||||
|
||||
- {{search('!!wde Wau Holland')}}
|
||||
|
||||
Bitte beachten; die Suche wird direkt in der externen Suchmaschine durchgeführt.
|
||||
SearXNG kann die Privatsphäre des Benutzers in diesem Fall nur eingeschränkt
|
||||
schützen, dennoch wird diese Funktion von manchen Benutzern als sehr nützlich
|
||||
empfunden.
|
||||
|
||||
[ddg]: https://duckduckgo.com/bang
|
||||
|
||||
## Besondere Abfragen
|
||||
|
||||
In den {{link('Eigenschaften', 'preferences')}} finden sich Schlüsselwörter für
|
||||
_besondere Abfragen_. Um ein paar Beispiele zu geben:
|
||||
|
||||
- Zufallsgenerator für eine UUID
|
||||
|
||||
- {{search('random uuid')}}
|
||||
|
||||
- Bestimmung des Mittelwerts
|
||||
|
||||
- {{search('avg 123 548 2.04 24.2')}}
|
||||
|
||||
- anzeigen des _user agent_ Ihres WEB-Browsers (muss aktiviert sein)
|
||||
|
||||
- {{search('user-agent')}}
|
||||
|
||||
- Zeichenketten in verschiedene Hash-Digests umwandeln (muss aktiviert sein)
|
||||
|
||||
- {{search('md5 lorem ipsum')}}
|
||||
- {{search('sha512 lorem ipsum')}}
|
||||
|
|
@ -7,7 +7,7 @@ via the search query.
|
|||
|
||||
To set category and/or engine names use a `!` prefix. To give a few examples:
|
||||
|
||||
- search in wikipedia for **paris**
|
||||
- search in Wikipedia for **paris**
|
||||
|
||||
- {{search('!wp paris')}}
|
||||
- {{search('!wikipedia paris')}}
|
||||
|
|
@ -22,29 +22,29 @@ To set category and/or engine names use a `!` prefix. To give a few examples:
|
|||
|
||||
Abbreviations of the engines and languages are also accepted. Engine/category
|
||||
modifiers are chain able and inclusive. E.g. with {{search('!map !ddg !wp
|
||||
paris')}} search in map category and duckduckgo and wikipedia for **paris**.
|
||||
paris')}} search in map category and DuckDuckGo and Wikipedia for **paris**.
|
||||
|
||||
## `:` select language
|
||||
|
||||
To select language filter use a `:` prefix. To give an example:
|
||||
|
||||
- search wikipedia by a custom language
|
||||
- search Wikipedia by a custom language
|
||||
|
||||
- {{search(':fr !wp Wau Holland')}}
|
||||
|
||||
## `!!` external bangs
|
||||
|
||||
SearXNG supports the external bangs from [ddg]. To directly jump to a external
|
||||
search page use the `!!` prefix. To give an example:
|
||||
SearXNG supports the external bangs from [DuckDuckGo]. To directly jump to a
|
||||
external search page use the `!!` prefix. To give an example:
|
||||
|
||||
- search wikipedia by a custom language
|
||||
- search Wikipedia by a custom language
|
||||
|
||||
- {{search('!!wfr Wau Holland')}}
|
||||
|
||||
Please note, your search will be performed directly in the external search
|
||||
engine, SearXNG cannot protect your privacy on this.
|
||||
|
||||
[ddg]: https://duckduckgo.com/bang
|
||||
[DuckDuckGo]: https://duckduckgo.com/bang
|
||||
|
||||
## Special Queries
|
||||
|
||||
|
|
|
|||
82
searx/infopage/id/about.md
Normal file
82
searx/infopage/id/about.md
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
# Tentang SearXNG
|
||||
|
||||
SearXNG adalah sebuah [mesin pencari meta], yang mendapatkan hasil dari
|
||||
{{link('mesin pencari', 'preferences')}} lainnya sambil tidak melacak
|
||||
penggunanya.
|
||||
|
||||
Proyek SearXNG diarahkan oleh sebuah komunitas terbuka, bergabung dengan kami di
|
||||
Matrix jika Anda memiliki pertanyaan atau ingin mengobrol tentang SearXNG di
|
||||
[#searxng:matrix.org]
|
||||
|
||||
Buat SearXNG lebih baik.
|
||||
|
||||
- Anda dapat membuat terjemahan SearXNG lebih baik di [Weblate], atau...
|
||||
- Lacak pengembangan, kirim kontribusi, dan laporkan masalah di [sumber
|
||||
SearXNG].
|
||||
- Untuk mendapatkan informasi lanjut, kunjungi dokumentasi proyek SearXNG di
|
||||
[dokumentasi SearXNG].
|
||||
|
||||
## Kenapa menggunakan SearXNG?
|
||||
|
||||
- SearXNG mungkin tidak menawarkan Anda hasil yang dipersonalisasikan seperti
|
||||
Google, tetapi tidak membuat sebuah profil tentang Anda.
|
||||
- SearXNG tidak peduli apa yang Anda cari, tidak akan membagikan apa pun dengan
|
||||
pihak ketiga, dan tidak dapat digunakan untuk mengkompromikan Anda.
|
||||
- SearXNG adalah perangkat lunak bebas, kodenya 100% terbuka, dan semuanya
|
||||
dipersilakan untuk membuatnya lebih baik.
|
||||
|
||||
Jika Anda peduli dengan privasi, ingin menjadi pengguna yang sadar, ataupun
|
||||
percaya dalam kebebasan digital, buat SearXNG sebagai mesin pencari bawaan atau
|
||||
jalankan di server Anda sendiri!
|
||||
|
||||
## Bagaimana saya dapat membuat SearXNG sebagai mesin pencari bawaan?
|
||||
|
||||
SearXNG mendukung [OpenSearch]. Untuk informasi lanjut tentang mengubah mesin
|
||||
pencari bawaan Anda, lihat dokumentasi peramban Anda:
|
||||
|
||||
- [Firefox]
|
||||
- [Microsoft Edge] - Dibalik tautan, Anda juga akan menemukan beberapa instruksi
|
||||
berguna untuk Chrome dan Safari.
|
||||
- Peramban berbasis [Chromium] hanya menambahkan situs web yang dikunjungi oleh
|
||||
pengguna tanpa sebuah jalur.
|
||||
|
||||
Apabila menambahkan mesin pencari, tidak boleh ada duplikat dengan nama yang
|
||||
sama. Jika Anda menemukan masalah di mana Anda tidak bisa menambahkan mesin
|
||||
pencari, Anda bisa:
|
||||
|
||||
- menghapus duplikat (nama default: SearXNG) atau
|
||||
- menghubungi pemilik untuk memberikan nama yang berbeda dari nama default.
|
||||
|
||||
## Bagaimana caranya SearXNG bekerja?
|
||||
|
||||
SearXNG adalah sebuah *fork* dari [mesin pencari meta] [searx] yang banyak
|
||||
dikenal yang diinspirasi oleh [proyek Seeks]. SearXNG menyediakan privasi dasar
|
||||
dengan mencampur kueri Anda dengan pencarian pada *platform* lainnya tanpa
|
||||
menyimpan data pencarian. SearXNG dapat ditambahkan ke bilah pencarian peramban
|
||||
Anda; lain lagi, SearXNG dapat diatur sebagai mesin pencarian bawaan.
|
||||
|
||||
{{link('Laman statistik', 'stats')}} berisi beberapa statistik penggunaan anonim
|
||||
berguna tentang mesin pencarian yang digunakan.
|
||||
|
||||
## Bagaimana caranya untuk membuat SearXNG milik saya?
|
||||
|
||||
SearXNG menghargai kekhawatiran Anda tentang pencatatan (*log*), jadi ambil
|
||||
kodenya dari [sumber SearXNG] dan jalankan sendiri!
|
||||
|
||||
Tambahkan instansi Anda ke [daftar instansi
|
||||
publik]({{get_setting('brand.public_instances')}}) ini untuk membantu orang lain
|
||||
mendapatkan kembali privasi mereka dan membuat internet lebih bebas. Lebih
|
||||
terdesentralisasinya internet, lebih banyak kebebasan yang kita punya!
|
||||
|
||||
|
||||
[sumber SearXNG]: {{GIT_URL}}
|
||||
[#searxng:matrix.org]: https://matrix.to/#/#searxng:matrix.org
|
||||
[dokumentasi SearXNG]: {{get_setting('brand.docs_url')}}
|
||||
[searx]: https://github.com/searx/searx
|
||||
[mesin pencari meta]: https://id.wikipedia.org/wiki/Mesin_pencari_web#Mesin_Pencari_dan_Mesin_Pencari-meta
|
||||
[Weblate]: https://translate.codeberg.org/projects/searxng/
|
||||
[proyek Seeks]: https://beniz.github.io/seeks/
|
||||
[OpenSearch]: https://github.com/dewitt/opensearch/blob/master/opensearch-1-1-draft-6.md
|
||||
[Firefox]: https://support.mozilla.org/id/kb/add-or-remove-search-engine-firefox
|
||||
[Microsoft Edge]: https://support.microsoft.com/id-id/microsoft-edge/ubah-mesin-pencarian-default-anda-f863c519-5994-a8ed-6859-00fbc123b782
|
||||
[Chromium]: https://www.chromium.org/tab-to-search
|
||||
73
searx/infopage/id/search-syntax.md
Normal file
73
searx/infopage/id/search-syntax.md
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
# Sintaks pencarian
|
||||
|
||||
SearXNG mempunyai sintaks pencarian memungkinkan Anda untuk mengubah kategori,
|
||||
mesin pencari, bahasa dan lainnya. Lihat {{link('preferensi', 'preferences')}}
|
||||
untuk daftar mesin pencari, kategori dan bahasa.
|
||||
|
||||
## `!` pilih mesin pencari dan kategori
|
||||
|
||||
Untuk menetapkan nama kategori dan/atau mesin pencari gunakan awalan `!`.
|
||||
Sebagai contoh:
|
||||
|
||||
- cari di Wikipedia tentang **Jakarta**
|
||||
|
||||
- {{search('!wp Jakarta')}}
|
||||
- {{search('!wikipedia Jakarta')}}
|
||||
|
||||
- cari dalam kategori **peta** untuk **Jakarta**
|
||||
|
||||
- {{search('!map Jakarta')}}
|
||||
|
||||
- pencarian gambar
|
||||
|
||||
- {{search('!images kucing')}}
|
||||
|
||||
Singkatan mesin pencari dan bahasa juga diterima. Pengubah mesin/kategori dapat
|
||||
dirantai dan inklusif. Misalnya dengan pencarian {{search('!map !ddg !wp
|
||||
Jakarta')}} dalam kategori peta dan DuckDuckGo dan Wikipedia tentang
|
||||
**Jakarta**.
|
||||
|
||||
## `:` pilih bahasa
|
||||
|
||||
Untuk memilih saringan bahasa gunakan awalan `:`. Sebagai contoh:
|
||||
|
||||
- cari Wikipedia dengan bahasa lain
|
||||
|
||||
- {{search(':en !wp Jakarta')}}
|
||||
|
||||
## `!!` mesin pencarian (*bangs*) eksternal
|
||||
|
||||
SearXNG mendukung mesin pencarian eksternal (*bangs*) dari [DuckDuckGo]. Untuk
|
||||
langsung lompat ke sebuah laman pencarian eksternal gunakan awalan `!!`.
|
||||
Sebagai contoh:
|
||||
|
||||
- cari Wikipedia dengan bahasa yang lain
|
||||
|
||||
- {{search('!!wen cat')}}
|
||||
|
||||
Diingat, pencarian Anda akan dilakukan secara langsung di mesin pencari
|
||||
eksternal, SearXNG tidak dapat melindungi privasi Anda di sana.
|
||||
|
||||
[DuckDuckGo]: https://duckduckgo.com/bang
|
||||
|
||||
## Kueri Khusus
|
||||
|
||||
Dalam laman {{link('preferensi', 'preferences')}} Anda akan menemukan kata kunci
|
||||
_kueri khusus_. Sebagai contoh:
|
||||
|
||||
- buat sebuah UUID acak
|
||||
|
||||
- {{search('random uuid')}}
|
||||
|
||||
- temukan rata-rata
|
||||
|
||||
- {{search('avg 123 548 2.04 24.2')}}
|
||||
|
||||
- tampilkan _user agent_ (agen pengguna) dari peramban Anda (harus diaktifkan)
|
||||
|
||||
- {{search('user-agent')}}
|
||||
|
||||
- ubah _string_ (teks) ke intisari *hash* yang berbeda (harus diaktifkan)
|
||||
|
||||
- {{search('md5 kucing sphynx')}}
|
||||
- {{search('sha512 kucing sphynx')}}
|
||||
303
searx/locales.py
303
searx/locales.py
|
|
@ -9,20 +9,112 @@ import os
|
|||
import pathlib
|
||||
|
||||
from babel import Locale
|
||||
from babel.support import Translations
|
||||
import babel.languages
|
||||
import babel.core
|
||||
import flask_babel
|
||||
import flask
|
||||
from flask.ctx import has_request_context
|
||||
from searx import logger
|
||||
|
||||
LOCALE_NAMES = {
|
||||
"oc": "Occitan",
|
||||
"nl-BE": "Vlaams (Dutch, Belgium)",
|
||||
}
|
||||
"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR'
|
||||
(delimiter is *underline* '-')"""
|
||||
logger = logger.getChild('locales')
|
||||
|
||||
|
||||
# safe before monkey patching flask_babel.get_translations
|
||||
_flask_babel_get_translations = flask_babel.get_translations
|
||||
|
||||
LOCALE_NAMES = {}
|
||||
"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see
|
||||
:py:obj:`locales_initialize`).
|
||||
|
||||
:meta hide-value:
|
||||
"""
|
||||
|
||||
RTL_LOCALES: Set[str] = set()
|
||||
"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (delimiter is
|
||||
*underline* '-')"""
|
||||
"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
|
||||
:py:obj:`locales_initialize`)."""
|
||||
|
||||
ADDITIONAL_TRANSLATIONS = {
|
||||
"dv": "ދިވެހި (Dhivehi)",
|
||||
"oc": "Occitan",
|
||||
"szl": "Ślōnski (Silesian)",
|
||||
"pap": "Papiamento",
|
||||
}
|
||||
"""Additional languages SearXNG has translations for but not supported by
|
||||
python-babel (see :py:obj:`locales_initialize`)."""
|
||||
|
||||
LOCALE_BEST_MATCH = {
|
||||
"dv": "si",
|
||||
"oc": 'fr-FR',
|
||||
"szl": "pl",
|
||||
"nl-BE": "nl",
|
||||
"zh-HK": "zh-Hant-TW",
|
||||
"pap": "pt-BR",
|
||||
}
|
||||
"""Map a locale we do not have a translations for to a locale we have a
|
||||
translation for. By example: use Taiwan version of the translation for Hong
|
||||
Kong."""
|
||||
|
||||
|
||||
def _get_name(locale, language_code):
|
||||
def localeselector():
|
||||
locale = 'en'
|
||||
if has_request_context():
|
||||
value = flask.request.preferences.get_value('locale')
|
||||
if value:
|
||||
locale = value
|
||||
|
||||
# first, set the language that is not supported by babel
|
||||
if locale in ADDITIONAL_TRANSLATIONS:
|
||||
flask.request.form['use-translation'] = locale
|
||||
|
||||
# second, map locale to a value python-babel supports
|
||||
locale = LOCALE_BEST_MATCH.get(locale, locale)
|
||||
|
||||
if locale == '':
|
||||
# if there is an error loading the preferences
|
||||
# the locale is going to be ''
|
||||
locale = 'en'
|
||||
|
||||
# babel uses underscore instead of hyphen.
|
||||
locale = locale.replace('-', '_')
|
||||
return locale
|
||||
|
||||
|
||||
def get_translations():
|
||||
"""Monkey patch of :py:obj:`flask_babel.get_translations`"""
|
||||
if has_request_context():
|
||||
use_translation = flask.request.form.get('use-translation')
|
||||
if use_translation in ADDITIONAL_TRANSLATIONS:
|
||||
babel_ext = flask_babel.current_app.extensions['babel']
|
||||
return Translations.load(babel_ext.translation_directories[0], use_translation)
|
||||
return _flask_babel_get_translations()
|
||||
|
||||
|
||||
def get_locale_descr(locale, locale_name):
|
||||
"""Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
|
||||
|
||||
:param locale: instance of :py:class:`Locale`
|
||||
:param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
|
||||
"""
|
||||
|
||||
native_language, native_territory = _get_locale_descr(locale, locale_name)
|
||||
english_language, english_territory = _get_locale_descr(locale, 'en')
|
||||
|
||||
if native_territory == english_territory:
|
||||
english_territory = None
|
||||
|
||||
if not native_territory and not english_territory:
|
||||
if native_language == english_language:
|
||||
return native_language
|
||||
return native_language + ' (' + english_language + ')'
|
||||
|
||||
result = native_language + ', ' + native_territory + ' (' + english_language
|
||||
if english_territory:
|
||||
return result + ', ' + english_territory + ')'
|
||||
return result + ')'
|
||||
|
||||
|
||||
def _get_locale_descr(locale, language_code):
|
||||
language_name = locale.get_language_name(language_code).capitalize()
|
||||
if language_name and ('a' <= language_name[0] <= 'z'):
|
||||
language_name = language_name.capitalize()
|
||||
|
|
@ -30,39 +122,184 @@ def _get_name(locale, language_code):
|
|||
return language_name, terrirtory_name
|
||||
|
||||
|
||||
def _get_locale_name(locale, locale_name):
|
||||
"""Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
|
||||
def locales_initialize(directory=None):
|
||||
"""Initialize locales environment of the SearXNG session.
|
||||
|
||||
:param locale: instance of :py:class:`Locale`
|
||||
:param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
|
||||
- monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`
|
||||
- init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`
|
||||
"""
|
||||
native_language, native_territory = _get_name(locale, locale_name)
|
||||
english_language, english_territory = _get_name(locale, 'en')
|
||||
if native_territory == english_territory:
|
||||
english_territory = None
|
||||
if not native_territory and not english_territory:
|
||||
if native_language == english_language:
|
||||
return native_language
|
||||
return native_language + ' (' + english_language + ')'
|
||||
result = native_language + ', ' + native_territory + ' (' + english_language
|
||||
if english_territory:
|
||||
return result + ', ' + english_territory + ')'
|
||||
return result + ')'
|
||||
|
||||
directory = directory or pathlib.Path(__file__).parent / 'translations'
|
||||
logger.debug("locales_initialize: %s", directory)
|
||||
flask_babel.get_translations = get_translations
|
||||
|
||||
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
|
||||
locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
|
||||
LOCALE_NAMES[tag] = descr
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(tag)
|
||||
|
||||
for tag in LOCALE_BEST_MATCH:
|
||||
descr = LOCALE_NAMES.get(tag)
|
||||
if not descr:
|
||||
locale = Locale.parse(tag, sep='-')
|
||||
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(tag)
|
||||
|
||||
def initialize_locales(directory):
|
||||
"""Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`."""
|
||||
for dirname in sorted(os.listdir(directory)):
|
||||
# Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations
|
||||
if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')):
|
||||
continue
|
||||
locale_name = dirname.replace('_', '-')
|
||||
info = LOCALE_NAMES.get(locale_name)
|
||||
if not info:
|
||||
tag = dirname.replace('_', '-')
|
||||
descr = LOCALE_NAMES.get(tag)
|
||||
if not descr:
|
||||
locale = Locale.parse(dirname)
|
||||
LOCALE_NAMES[locale_name] = _get_locale_name(locale, dirname)
|
||||
LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(locale_name)
|
||||
RTL_LOCALES.add(tag)
|
||||
|
||||
|
||||
initialize_locales(pathlib.Path(__file__).parent / 'translations')
|
||||
def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||
"""Return engine's language (aka locale) string that best fits to argument
|
||||
``searxng_locale``.
|
||||
|
||||
Argument ``engine_locales`` is a python dict that maps *SearXNG locales* to
|
||||
corresponding *engine locales*::
|
||||
|
||||
<engine>: {
|
||||
# SearXNG string : engine-string
|
||||
'ca-ES' : 'ca_ES',
|
||||
'fr-BE' : 'fr_BE',
|
||||
'fr-CA' : 'fr_CA',
|
||||
'fr-CH' : 'fr_CH',
|
||||
'fr' : 'fr_FR',
|
||||
...
|
||||
'pl-PL' : 'pl_PL',
|
||||
'pt-PT' : 'pt_PT'
|
||||
}
|
||||
|
||||
.. hint::
|
||||
|
||||
The *SearXNG locale* string has to be known by babel!
|
||||
|
||||
If there is no direct 1:1 mapping, this functions tries to narrow down
|
||||
engine's language (locale). If no value can be determined by these
|
||||
approximation attempts the ``default`` value is returned.
|
||||
|
||||
Assumptions:
|
||||
|
||||
A. When user select a language the results should be optimized according to
|
||||
the selected language.
|
||||
|
||||
B. When user select a language and a territory the results should be
|
||||
optimized with first priority on terrirtory and second on language.
|
||||
|
||||
First approximation rule (*by territory*):
|
||||
|
||||
When the user selects a locale with terrirtory (and a language), the
|
||||
territory has priority over the language. If any of the offical languages
|
||||
in the terrirtory is supported by the engine (``engine_locales``) it will
|
||||
be used.
|
||||
|
||||
Second approximation rule (*by language*):
|
||||
|
||||
If "First approximation rule" brings no result or the user selects only a
|
||||
language without a terrirtory. Check in which territories the language
|
||||
has an offical status and if one of these territories is supported by the
|
||||
engine.
|
||||
|
||||
"""
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
engine_locale = engine_locales.get(searxng_locale)
|
||||
|
||||
if engine_locale is not None:
|
||||
# There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no
|
||||
# need to narrow language nor territory.
|
||||
return engine_locale
|
||||
|
||||
try:
|
||||
locale = babel.Locale.parse(searxng_locale, sep='-')
|
||||
except babel.core.UnknownLocaleError:
|
||||
try:
|
||||
locale = babel.Locale.parse(searxng_locale.split('-')[0])
|
||||
except babel.core.UnknownLocaleError:
|
||||
return default
|
||||
|
||||
# SearXNG's selected locale is not supported by the engine ..
|
||||
|
||||
if locale.territory:
|
||||
# Try to narrow by *offical* languages in the territory (??-XX).
|
||||
|
||||
for official_language in babel.languages.get_official_languages(locale.territory, de_facto=True):
|
||||
searxng_locale = official_language + '-' + locale.territory
|
||||
engine_locale = engine_locales.get(searxng_locale)
|
||||
if engine_locale is not None:
|
||||
return engine_locale
|
||||
|
||||
# Engine does not support one of the offical languages in the territory or
|
||||
# there is only a language selected without a territory.
|
||||
|
||||
# Now lets have a look if the searxng_lang (the language selected by the
|
||||
# user) is a offical language in other territories. If so, check if
|
||||
# engine does support the searxng_lang in this other territory.
|
||||
|
||||
if locale.language:
|
||||
|
||||
searxng_lang = locale.language
|
||||
if locale.script:
|
||||
searxng_lang += '_' + locale.script
|
||||
|
||||
terr_lang_dict = {}
|
||||
for territory, langs in babel.core.get_global("territory_languages").items():
|
||||
if not langs.get(searxng_lang, {}).get('official_status'):
|
||||
continue
|
||||
terr_lang_dict[territory] = langs.get(searxng_lang)
|
||||
|
||||
# first: check fr-FR, de-DE .. is supported by the engine
|
||||
# exception: 'en' --> 'en-US'
|
||||
|
||||
territory = locale.language.upper()
|
||||
if territory == 'EN':
|
||||
territory = 'US'
|
||||
|
||||
if terr_lang_dict.get(territory):
|
||||
searxng_locale = locale.language + '-' + territory
|
||||
engine_locale = engine_locales.get(searxng_locale)
|
||||
if engine_locale is not None:
|
||||
return engine_locale
|
||||
|
||||
# second: sort by population_percent and take first match
|
||||
|
||||
# drawback of "population percent": if there is a terrirtory with a
|
||||
# small number of people (e.g 100) but the majority speaks the
|
||||
# language, then the percentage migth be 100% (--> 100 people) but in
|
||||
# a different terrirtory with more people (e.g. 10.000) where only 10%
|
||||
# speak the language the total amount of speaker is higher (--> 200
|
||||
# people).
|
||||
#
|
||||
# By example: The population of Saint-Martin is 33.000, of which 100%
|
||||
# speak French, but this is less than the 30% of the approximately 2.5
|
||||
# million Belgian citizens
|
||||
#
|
||||
# - 'fr-MF', 'population_percent': 100.0, 'official_status': 'official'
|
||||
# - 'fr-BE', 'population_percent': 38.0, 'official_status': 'official'
|
||||
|
||||
terr_lang_list = []
|
||||
for k, v in terr_lang_dict.items():
|
||||
terr_lang_list.append((k, v))
|
||||
|
||||
for territory, _lang in sorted(terr_lang_list, key=lambda item: item[1]['population_percent'], reverse=True):
|
||||
searxng_locale = locale.language + '-' + territory
|
||||
engine_locale = engine_locales.get(searxng_locale)
|
||||
if engine_locale is not None:
|
||||
return engine_locale
|
||||
|
||||
# No luck: narrow by "language from territory" and "territory from language"
|
||||
# does not fit to a locale supported by the engine.
|
||||
|
||||
if engine_locale is None:
|
||||
engine_locale = default
|
||||
|
||||
return default
|
||||
|
|
|
|||
|
|
@ -160,6 +160,7 @@ def get_reliabilities(engline_name_list, checker_results):
|
|||
# even if there is no exception
|
||||
reliablity = 0
|
||||
else:
|
||||
# pylint: disable=consider-using-generator
|
||||
reliablity = 100 - sum([error['percentage'] for error in errors if not error.get('secondary')])
|
||||
|
||||
reliabilities[engine_name] = {
|
||||
|
|
|
|||
|
|
@ -8,12 +8,13 @@ import concurrent.futures
|
|||
from queue import SimpleQueue
|
||||
from types import MethodType
|
||||
from timeit import default_timer
|
||||
from typing import Iterable, Tuple
|
||||
from typing import Iterable, NamedTuple, Tuple, List, Dict, Union
|
||||
from contextlib import contextmanager
|
||||
|
||||
import httpx
|
||||
import anyio
|
||||
|
||||
from .network import get_network, initialize, check_network_configuration
|
||||
from .network import get_network, initialize, check_network_configuration # pylint:disable=cyclic-import
|
||||
from .client import get_loop
|
||||
from .raise_for_httperror import raise_for_httperror
|
||||
|
||||
|
|
@ -48,9 +49,23 @@ def get_context_network():
|
|||
return THREADLOCAL.__dict__.get('network') or get_network()
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
"""same as requests/requests/api.py request(...)"""
|
||||
@contextmanager
|
||||
def _record_http_time():
|
||||
# pylint: disable=too-many-branches
|
||||
time_before_request = default_timer()
|
||||
start_time = getattr(THREADLOCAL, 'start_time', time_before_request)
|
||||
try:
|
||||
yield start_time
|
||||
finally:
|
||||
# update total_time.
|
||||
# See get_time_for_thread() and reset_time_for_thread()
|
||||
if hasattr(THREADLOCAL, 'total_time'):
|
||||
time_after_request = default_timer()
|
||||
THREADLOCAL.total_time += time_after_request - time_before_request
|
||||
|
||||
|
||||
def _get_timeout(start_time, kwargs):
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
# timeout (httpx)
|
||||
if 'timeout' in kwargs:
|
||||
|
|
@ -65,45 +80,84 @@ def request(method, url, **kwargs):
|
|||
|
||||
# ajdust actual timeout
|
||||
timeout += 0.2 # overhead
|
||||
start_time = getattr(THREADLOCAL, 'start_time', time_before_request)
|
||||
if start_time:
|
||||
timeout -= default_timer() - start_time
|
||||
|
||||
# raise_for_error
|
||||
check_for_httperror = True
|
||||
if 'raise_for_httperror' in kwargs:
|
||||
check_for_httperror = kwargs['raise_for_httperror']
|
||||
del kwargs['raise_for_httperror']
|
||||
return timeout
|
||||
|
||||
# requests compatibility
|
||||
if isinstance(url, bytes):
|
||||
url = url.decode()
|
||||
|
||||
# network
|
||||
network = get_context_network()
|
||||
def request(method, url, **kwargs):
|
||||
"""same as requests/requests/api.py request(...)"""
|
||||
with _record_http_time() as start_time:
|
||||
network = get_context_network()
|
||||
timeout = _get_timeout(start_time, kwargs)
|
||||
future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop())
|
||||
try:
|
||||
return future.result(timeout)
|
||||
except concurrent.futures.TimeoutError as e:
|
||||
raise httpx.TimeoutException('Timeout', request=None) from e
|
||||
|
||||
# do request
|
||||
future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop())
|
||||
try:
|
||||
response = future.result(timeout)
|
||||
except concurrent.futures.TimeoutError as e:
|
||||
raise httpx.TimeoutException('Timeout', request=None) from e
|
||||
|
||||
# requests compatibility
|
||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||
response.ok = not response.is_error
|
||||
def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response, Exception]]:
|
||||
"""send multiple HTTP requests in parallel. Wait for all requests to finish."""
|
||||
with _record_http_time() as start_time:
|
||||
# send the requests
|
||||
network = get_context_network()
|
||||
loop = get_loop()
|
||||
future_list = []
|
||||
for request_desc in request_list:
|
||||
timeout = _get_timeout(start_time, request_desc.kwargs)
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
network.request(request_desc.method, request_desc.url, **request_desc.kwargs), loop
|
||||
)
|
||||
future_list.append((future, timeout))
|
||||
|
||||
# update total_time.
|
||||
# See get_time_for_thread() and reset_time_for_thread()
|
||||
if hasattr(THREADLOCAL, 'total_time'):
|
||||
time_after_request = default_timer()
|
||||
THREADLOCAL.total_time += time_after_request - time_before_request
|
||||
# read the responses
|
||||
responses = []
|
||||
for future, timeout in future_list:
|
||||
try:
|
||||
responses.append(future.result(timeout))
|
||||
except concurrent.futures.TimeoutError:
|
||||
responses.append(httpx.TimeoutException('Timeout', request=None))
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
responses.append(e)
|
||||
return responses
|
||||
|
||||
# raise an exception
|
||||
if check_for_httperror:
|
||||
raise_for_httperror(response)
|
||||
|
||||
return response
|
||||
class Request(NamedTuple):
|
||||
"""Request description for the multi_requests function"""
|
||||
|
||||
method: str
|
||||
url: str
|
||||
kwargs: Dict[str, str] = {}
|
||||
|
||||
@staticmethod
|
||||
def get(url, **kwargs):
|
||||
return Request('GET', url, kwargs)
|
||||
|
||||
@staticmethod
|
||||
def options(url, **kwargs):
|
||||
return Request('OPTIONS', url, kwargs)
|
||||
|
||||
@staticmethod
|
||||
def head(url, **kwargs):
|
||||
return Request('HEAD', url, kwargs)
|
||||
|
||||
@staticmethod
|
||||
def post(url, **kwargs):
|
||||
return Request('POST', url, kwargs)
|
||||
|
||||
@staticmethod
|
||||
def put(url, **kwargs):
|
||||
return Request('PUT', url, kwargs)
|
||||
|
||||
@staticmethod
|
||||
def patch(url, **kwargs):
|
||||
return Request('PATCH', url, kwargs)
|
||||
|
||||
@staticmethod
|
||||
def delete(url, **kwargs):
|
||||
return Request('DELETE', url, kwargs)
|
||||
|
||||
|
||||
def get(url, **kwargs):
|
||||
|
|
|
|||
|
|
@ -26,9 +26,6 @@ else:
|
|||
logger = logger.getChild('searx.network.client')
|
||||
LOOP = None
|
||||
SSLCONTEXTS: Dict[Any, SSLContext] = {}
|
||||
TRANSPORT_KWARGS = {
|
||||
'trust_env': False,
|
||||
}
|
||||
|
||||
|
||||
def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False):
|
||||
|
|
@ -74,7 +71,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
|
|||
rdns = True
|
||||
|
||||
proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
|
||||
verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify
|
||||
verify = get_sslcontexts(proxy_url, None, verify, True, http2) if verify is True else verify
|
||||
return AsyncProxyTransportFixed(
|
||||
proxy_type=proxy_type,
|
||||
proxy_host=proxy_host,
|
||||
|
|
@ -88,12 +85,11 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
|
|||
local_address=local_address,
|
||||
limits=limit,
|
||||
retries=retries,
|
||||
**TRANSPORT_KWARGS,
|
||||
)
|
||||
|
||||
|
||||
def get_transport(verify, http2, local_address, proxy_url, limit, retries):
|
||||
verify = get_sslcontexts(None, None, True, False, http2) if verify is True else verify
|
||||
verify = get_sslcontexts(None, None, verify, True, http2) if verify is True else verify
|
||||
return httpx.AsyncHTTPTransport(
|
||||
# pylint: disable=protected-access
|
||||
verify=verify,
|
||||
|
|
@ -102,7 +98,6 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries):
|
|||
proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,
|
||||
local_address=local_address,
|
||||
retries=retries,
|
||||
**TRANSPORT_KWARGS,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import httpx
|
|||
|
||||
from searx import logger, searx_debug
|
||||
from .client import new_client, get_loop, AsyncHTTPTransportNoHttp
|
||||
from .raise_for_httperror import raise_for_httperror
|
||||
|
||||
|
||||
logger = logger.getChild('network')
|
||||
|
|
@ -172,7 +173,7 @@ class Network:
|
|||
):
|
||||
continue
|
||||
return False
|
||||
response = await client.get("https://check.torproject.org/api/ip", timeout=10)
|
||||
response = await client.get("https://check.torproject.org/api/ip", timeout=60)
|
||||
if not response.json()["IsTor"]:
|
||||
result = False
|
||||
Network._TOR_CHECK_RESULT[proxies] = result
|
||||
|
|
@ -226,6 +227,27 @@ class Network:
|
|||
kwargs['follow_redirects'] = kwargs.pop('allow_redirects')
|
||||
return kwargs_clients
|
||||
|
||||
@staticmethod
|
||||
def extract_do_raise_for_httperror(kwargs):
|
||||
do_raise_for_httperror = True
|
||||
if 'raise_for_httperror' in kwargs:
|
||||
do_raise_for_httperror = kwargs['raise_for_httperror']
|
||||
del kwargs['raise_for_httperror']
|
||||
return do_raise_for_httperror
|
||||
|
||||
@staticmethod
|
||||
def patch_response(response, do_raise_for_httperror):
|
||||
if isinstance(response, httpx.Response):
|
||||
# requests compatibility (response is not streamed)
|
||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||
response.ok = not response.is_error
|
||||
|
||||
# raise an exception
|
||||
if do_raise_for_httperror:
|
||||
raise_for_httperror(response)
|
||||
|
||||
return response
|
||||
|
||||
def is_valid_response(self, response):
|
||||
# pylint: disable=too-many-boolean-expressions
|
||||
if (
|
||||
|
|
@ -239,6 +261,7 @@ class Network:
|
|||
async def call_client(self, stream, method, url, **kwargs):
|
||||
retries = self.retries
|
||||
was_disconnected = False
|
||||
do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs)
|
||||
kwargs_clients = Network.extract_kwargs_clients(kwargs)
|
||||
while retries >= 0: # pragma: no cover
|
||||
client = await self.get_client(**kwargs_clients)
|
||||
|
|
@ -248,7 +271,7 @@ class Network:
|
|||
else:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
if self.is_valid_response(response) or retries <= 0:
|
||||
return response
|
||||
return Network.patch_response(response, do_raise_for_httperror)
|
||||
except httpx.RemoteProtocolError as e:
|
||||
if not was_disconnected:
|
||||
# the server has closed the connection:
|
||||
|
|
@ -311,7 +334,7 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
|||
# see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # pylint: disable=line-too-long
|
||||
default_params = {
|
||||
'enable_http': False,
|
||||
'verify': True,
|
||||
'verify': settings_outgoing['verify'],
|
||||
'enable_http2': settings_outgoing['enable_http2'],
|
||||
'max_connections': settings_outgoing['pool_connections'],
|
||||
'max_keepalive_connections': settings_outgoing['pool_maxsize'],
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from searx.exceptions import (
|
|||
SearxEngineTooManyRequestsException,
|
||||
SearxEngineAccessDeniedException,
|
||||
)
|
||||
from searx import get_setting
|
||||
|
||||
|
||||
def is_cloudflare_challenge(resp):
|
||||
|
|
@ -33,15 +34,22 @@ def raise_for_cloudflare_captcha(resp):
|
|||
if is_cloudflare_challenge(resp):
|
||||
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
|
||||
# suspend for 2 weeks
|
||||
raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15)
|
||||
raise SearxEngineCaptchaException(
|
||||
message='Cloudflare CAPTCHA', suspended_time=get_setting('search.suspended_times.cf_SearxEngineCaptcha')
|
||||
)
|
||||
|
||||
if is_cloudflare_firewall(resp):
|
||||
raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24)
|
||||
raise SearxEngineAccessDeniedException(
|
||||
message='Cloudflare Firewall',
|
||||
suspended_time=get_setting('search.suspended_times.cf_SearxEngineAccessDenied'),
|
||||
)
|
||||
|
||||
|
||||
def raise_for_recaptcha(resp):
|
||||
if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
|
||||
raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7)
|
||||
raise SearxEngineCaptchaException(
|
||||
message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha')
|
||||
)
|
||||
|
||||
|
||||
def raise_for_captcha(resp):
|
||||
|
|
@ -64,9 +72,7 @@ def raise_for_httperror(resp):
|
|||
if resp.status_code and resp.status_code >= 400:
|
||||
raise_for_captcha(resp)
|
||||
if resp.status_code in (402, 403):
|
||||
raise SearxEngineAccessDeniedException(
|
||||
message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24
|
||||
)
|
||||
raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code))
|
||||
if resp.status_code == 429:
|
||||
raise SearxEngineTooManyRequestsException()
|
||||
resp.raise_for_status()
|
||||
|
|
|
|||
|
|
@ -198,7 +198,6 @@ class PluginStore:
|
|||
self.plugins.append(plugin)
|
||||
|
||||
def call(self, ordered_plugin_list, plugin_type, *args, **kwargs):
|
||||
# pylint: disable=no-self-use
|
||||
ret = True
|
||||
for plugin in ordered_plugin_list:
|
||||
if hasattr(plugin, plugin_type):
|
||||
|
|
|
|||
97
searx/plugins/autodetect_search_language.py
Normal file
97
searx/plugins/autodetect_search_language.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Plugin to detect the search language from the search query.
|
||||
|
||||
The language detection is done by using the fastText_ library (`python
|
||||
fasttext`_). fastText_ distributes the `language identification model`_, for
|
||||
reference:
|
||||
|
||||
- `FastText.zip: Compressing text classification models`_
|
||||
- `Bag of Tricks for Efficient Text Classification`_
|
||||
|
||||
The `language identification model`_ support the language codes (ISO-639-3)::
|
||||
|
||||
af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs bxr
|
||||
ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es et eu fa
|
||||
fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia id ie ilo io
|
||||
is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li lmo lo lrc lt lv
|
||||
mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah nap nds ne new nl nn
|
||||
no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru rue sa sah sc scn sco sd
|
||||
sh si sk sl so sq sr su sv sw ta te tg th tk tl tr tt tyv ug uk ur uz vec vep
|
||||
vi vls vo wa war wuu xal xmf yi yo yue zh
|
||||
|
||||
The `language identification model`_ is harmonized with the SearXNG's language
|
||||
(locale) model. General conditions of SearXNG's locale model are:
|
||||
|
||||
a. SearXNG's locale of a query is passed to the
|
||||
:py:obj:`searx.locales.get_engine_locale` to get a language and/or region
|
||||
code that is used by an engine.
|
||||
|
||||
b. SearXNG and most of the engines do not support all the languages from
|
||||
language model and there might be also a discrepancy in the ISO-639-3 and
|
||||
ISO-639-2 handling (:py:obj:`searx.locales.get_engine_locale`). Further
|
||||
more, in SearXNG the locales like ``zh-TH`` (``zh-CN``) are mapped to
|
||||
``zh_Hant`` (``zh_Hans``).
|
||||
|
||||
Conclusion: This plugin does only auto-detect the languages a user can select in
|
||||
the language menu (:py:obj:`supported_langs`).
|
||||
|
||||
SearXNG's locale of a query comes from (*highest wins*):
|
||||
|
||||
1. The ``Accept-Language`` header from user's HTTP client.
|
||||
2. The user select a locale in the preferences.
|
||||
3. The user select a locale from the menu in the query form (e.g. ``:zh-TW``)
|
||||
4. This plugin is activated in the preferences and the locale (only the language
|
||||
code / none region code) comes from the fastText's language detection.
|
||||
|
||||
Conclusion: There is a conflict between the language selected by the user and
|
||||
the language from language detection of this plugin. For example, the user
|
||||
explicitly selects the German locale via the search syntax to search for a term
|
||||
that is identified as an English term (try ``:de-DE thermomix``, for example).
|
||||
|
||||
.. hint::
|
||||
|
||||
To SearXNG maintainers; please take into account: under some circumstances
|
||||
the auto-detection of the language of this plugin could be detrimental to
|
||||
users expectations. Its not recommended to activate this plugin by
|
||||
default. It should always be the user's decision whether to activate this
|
||||
plugin or not.
|
||||
|
||||
.. _fastText: https://fasttext.cc/
|
||||
.. _python fasttext: https://pypi.org/project/fasttext/
|
||||
.. _language identification model: https://fasttext.cc/docs/en/language-identification.html
|
||||
.. _Bag of Tricks for Efficient Text Classification: https://arxiv.org/abs/1607.01759
|
||||
.. _`FastText.zip: Compressing text classification models`: https://arxiv.org/abs/1612.03651
|
||||
|
||||
"""
|
||||
|
||||
from flask_babel import gettext
|
||||
import babel
|
||||
|
||||
from searx.utils import detect_language
|
||||
from searx.languages import language_codes
|
||||
|
||||
name = gettext('Autodetect search language')
|
||||
description = gettext('Automatically detect the query search language and switch to it.')
|
||||
preference_section = 'general'
|
||||
default_on = False
|
||||
|
||||
supported_langs = set()
|
||||
"""Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`)."""
|
||||
|
||||
|
||||
def pre_search(request, search): # pylint: disable=unused-argument
|
||||
lang = detect_language(search.search_query.query, min_probability=0)
|
||||
if lang in supported_langs:
|
||||
search.search_query.lang = lang
|
||||
try:
|
||||
search.search_query.locale = babel.Locale.parse(lang)
|
||||
except babel.core.UnknownLocaleError:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def init(app, settings): # pylint: disable=unused-argument
|
||||
for searxng_locale in language_codes:
|
||||
supported_langs.add(searxng_locale[0].split('-')[0])
|
||||
return True
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
# pyright: basic
|
||||
"""Some bot protection / rate limitation
|
||||
|
||||
To monitore rate limits and protect privacy the IP addresses are getting stored
|
||||
To monitor rate limits and protect privacy the IP addresses are getting stored
|
||||
with a hash so the limiter plugin knows who to block. A redis database is
|
||||
needed to store the hash values.
|
||||
|
||||
|
|
@ -13,11 +13,11 @@ Enable the plugin in ``settings.yml``:
|
|||
- ``redis.url: ...`` check the value, see :ref:`settings redis`
|
||||
"""
|
||||
|
||||
import hmac
|
||||
import re
|
||||
from flask import request
|
||||
|
||||
from searx.shared import redisdb
|
||||
from searx import redisdb
|
||||
from searx.redislib import incr_sliding_window
|
||||
|
||||
name = "Request limiter"
|
||||
description = "Limit the number of request"
|
||||
|
|
@ -36,8 +36,9 @@ re_bot = re.compile(
|
|||
)
|
||||
|
||||
|
||||
def is_accepted_request(inc_get_counter) -> bool:
|
||||
def is_accepted_request() -> bool:
|
||||
# pylint: disable=too-many-return-statements
|
||||
redis_client = redisdb.client()
|
||||
user_agent = request.headers.get('User-Agent', '')
|
||||
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
|
||||
|
||||
|
|
@ -47,83 +48,54 @@ def is_accepted_request(inc_get_counter) -> bool:
|
|||
return True
|
||||
|
||||
if request.path == '/search':
|
||||
c_burst = inc_get_counter(interval=20, keys=[b'IP limit, burst', x_forwarded_for])
|
||||
c_10min = inc_get_counter(interval=600, keys=[b'IP limit, 10 minutes', x_forwarded_for])
|
||||
c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
|
||||
c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
|
||||
if c_burst > 15 or c_10min > 150:
|
||||
logger.debug("to many request") # pylint: disable=undefined-variable
|
||||
return False
|
||||
|
||||
if re_bot.match(user_agent):
|
||||
logger.debug("detected bot") # pylint: disable=undefined-variable
|
||||
return False
|
||||
|
||||
if len(request.headers.get('Accept-Language', '').strip()) == '':
|
||||
logger.debug("missing Accept-Language") # pylint: disable=undefined-variable
|
||||
return False
|
||||
|
||||
if request.headers.get('Connection') == 'close':
|
||||
logger.debug("got Connection=close") # pylint: disable=undefined-variable
|
||||
return False
|
||||
|
||||
accept_encoding_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
|
||||
if 'gzip' not in accept_encoding_list or 'deflate' not in accept_encoding_list:
|
||||
if 'gzip' not in accept_encoding_list and 'deflate' not in accept_encoding_list:
|
||||
logger.debug("suspicious Accept-Encoding") # pylint: disable=undefined-variable
|
||||
return False
|
||||
|
||||
if 'text/html' not in request.accept_mimetypes:
|
||||
logger.debug("Accept-Encoding misses text/html") # pylint: disable=undefined-variable
|
||||
return False
|
||||
|
||||
if request.args.get('format', 'html') != 'html':
|
||||
c = inc_get_counter(interval=3600, keys=[b'API limit', x_forwarded_for])
|
||||
c = incr_sliding_window(redis_client, 'API limit' + x_forwarded_for, 3600)
|
||||
if c > 4:
|
||||
logger.debug("API limit exceeded") # pylint: disable=undefined-variable
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def create_inc_get_counter(redis_client, secret_key_bytes):
|
||||
lua_script = """
|
||||
local slidingWindow = KEYS[1]
|
||||
local key = KEYS[2]
|
||||
local now = tonumber(redis.call('TIME')[1])
|
||||
local id = redis.call('INCR', 'counter')
|
||||
if (id > 2^46)
|
||||
then
|
||||
redis.call('SET', 'count', 0)
|
||||
end
|
||||
redis.call('ZREMRANGEBYSCORE', key, 0, now - slidingWindow)
|
||||
redis.call('ZADD', key, now, id)
|
||||
local result = redis.call('ZCOUNT', key, 0, now+1)
|
||||
redis.call('EXPIRE', key, slidingWindow)
|
||||
return result
|
||||
"""
|
||||
script_sha = redis_client.script_load(lua_script)
|
||||
|
||||
def inc_get_counter(interval, keys):
|
||||
m = hmac.new(secret_key_bytes, digestmod='sha256')
|
||||
for k in keys:
|
||||
m.update(bytes(str(k), encoding='utf-8') or b'')
|
||||
m.update(b"\0")
|
||||
key = m.digest()
|
||||
return redis_client.evalsha(script_sha, 2, interval, key)
|
||||
|
||||
return inc_get_counter
|
||||
|
||||
|
||||
def create_pre_request(get_aggregation_count):
|
||||
def pre_request():
|
||||
if not is_accepted_request(get_aggregation_count):
|
||||
return '', 429
|
||||
return None
|
||||
|
||||
return pre_request
|
||||
def pre_request():
|
||||
if not is_accepted_request():
|
||||
return 'Too Many Requests', 429
|
||||
return None
|
||||
|
||||
|
||||
def init(app, settings):
|
||||
if not settings['server']['limiter']:
|
||||
return False
|
||||
|
||||
logger.debug("init limiter DB") # pylint: disable=undefined-variable
|
||||
if not redisdb.init():
|
||||
logger.error("init limiter DB failed!!!") # pylint: disable=undefined-variable
|
||||
if not redisdb.client():
|
||||
logger.error("The limiter requires Redis") # pylint: disable=undefined-variable
|
||||
return False
|
||||
|
||||
redis_client = redisdb.client()
|
||||
secret_key_bytes = bytes(settings['server']['secret_key'], encoding='utf-8')
|
||||
inc_get_counter = create_inc_get_counter(redis_client, secret_key_bytes)
|
||||
app.before_request(create_pre_request(inc_get_counter))
|
||||
app.before_request(pre_request)
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -42,4 +42,6 @@ def on_result(request, search, result):
|
|||
doi = doi[: -len(suffix)]
|
||||
result['url'] = get_doi_resolver(request.preferences) + doi
|
||||
result['parsed_url'] = urlparse(result['url'])
|
||||
if 'doi' not in result:
|
||||
result['doi'] = doi
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -18,9 +18,7 @@ from flask_babel import gettext
|
|||
|
||||
name = gettext('Search on category select')
|
||||
description = gettext(
|
||||
'Perform search immediately if a category selected. ' 'Disable to select multiple categories. (JavaScript required)'
|
||||
'Perform search immediately if a category selected. Disable to select multiple categories. (JavaScript required)'
|
||||
)
|
||||
default_on = True
|
||||
preference_section = 'ui'
|
||||
|
||||
js_dependencies = ('plugins/js/search_on_category_select.js',)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
|||
from flask_babel import gettext
|
||||
import re
|
||||
|
||||
name = gettext('Self Informations')
|
||||
name = gettext('Self Information')
|
||||
description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".')
|
||||
default_on = True
|
||||
preference_section = 'query'
|
||||
|
|
|
|||
92
searx/plugins/tor_check.py
Normal file
92
searx/plugins/tor_check.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""A plugin to check if the ip address of the request is a Tor exit-node if the
|
||||
user searches for ``tor-check``. It fetches the tor exit node list from
|
||||
https://check.torproject.org/exit-addresses and parses all the IPs into a list,
|
||||
then checks if the user's IP address is in it.
|
||||
|
||||
Enable in ``settings.yml``:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
enabled_plugins:
|
||||
..
|
||||
- 'Tor check plugin'
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
from flask_babel import gettext
|
||||
from httpx import HTTPError
|
||||
from searx.network import get
|
||||
|
||||
default_on = False
|
||||
|
||||
name = gettext("Tor check plugin")
|
||||
'''Translated name of the plugin'''
|
||||
|
||||
description = gettext(
|
||||
"This plugin checks if the address of the request is a Tor exit-node, and"
|
||||
" informs the user if it is; like check.torproject.org, but from SearXNG."
|
||||
)
|
||||
'''Translated description of the plugin.'''
|
||||
|
||||
preference_section = 'query'
|
||||
'''The preference section where the plugin is shown.'''
|
||||
|
||||
query_keywords = ['tor-check']
|
||||
'''Query keywords shown in the preferences.'''
|
||||
|
||||
query_examples = ''
|
||||
'''Query examples shown in the preferences.'''
|
||||
|
||||
# Regex for exit node addresses in the list.
|
||||
reg = re.compile(r"(?<=ExitAddress )\S+")
|
||||
|
||||
|
||||
def post_search(request, search):
|
||||
|
||||
if search.search_query.pageno > 1:
|
||||
return True
|
||||
|
||||
if search.search_query.query.lower() == "tor-check":
|
||||
|
||||
# Request the list of tor exit nodes.
|
||||
try:
|
||||
resp = get("https://check.torproject.org/exit-addresses")
|
||||
node_list = re.findall(reg, resp.text)
|
||||
|
||||
except HTTPError:
|
||||
# No answer, return error
|
||||
search.result_container.answers["tor"] = {
|
||||
"answer": gettext(
|
||||
"Could not download the list of Tor exit-nodes from: https://check.torproject.org/exit-addresses"
|
||||
)
|
||||
}
|
||||
return True
|
||||
|
||||
x_forwarded_for = request.headers.getlist("X-Forwarded-For")
|
||||
|
||||
if x_forwarded_for:
|
||||
ip_address = x_forwarded_for[0]
|
||||
else:
|
||||
ip_address = request.remote_addr
|
||||
|
||||
if ip_address in node_list:
|
||||
search.result_container.answers["tor"] = {
|
||||
"answer": gettext(
|
||||
"You are using Tor and it looks like you have this external IP address: {ip_address}".format(
|
||||
ip_address=ip_address
|
||||
)
|
||||
)
|
||||
}
|
||||
else:
|
||||
search.result_container.answers["tor"] = {
|
||||
"answer": gettext(
|
||||
"You are not using Tor and you have this external IP address: {ip_address}".format(
|
||||
ip_address=ip_address
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
return True
|
||||
|
|
@ -8,6 +8,3 @@ description = gettext(
|
|||
)
|
||||
default_on = False
|
||||
preference_section = 'ui'
|
||||
|
||||
js_dependencies = ('plugins/js/vim_hotkeys.js',)
|
||||
css_dependencies = ('plugins/css/vim_hotkeys.css',)
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class Setting:
|
|||
return self.value
|
||||
|
||||
def save(self, name: str, resp: flask.Response):
|
||||
"""Save cookie ``name`` in the HTTP reponse obect
|
||||
"""Save cookie ``name`` in the HTTP response object
|
||||
|
||||
If needed, its overwritten in the inheritance."""
|
||||
resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
|
||||
|
|
@ -113,7 +113,7 @@ class MultipleChoiceSetting(Setting):
|
|||
self.value.append(choice)
|
||||
|
||||
def save(self, name: str, resp: flask.Response):
|
||||
"""Save cookie ``name`` in the HTTP reponse obect"""
|
||||
"""Save cookie ``name`` in the HTTP response object"""
|
||||
resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
|
||||
|
||||
|
||||
|
|
@ -146,7 +146,7 @@ class SetSetting(Setting):
|
|||
self.values = set(elements)
|
||||
|
||||
def save(self, name: str, resp: flask.Response):
|
||||
"""Save cookie ``name`` in the HTTP reponse obect"""
|
||||
"""Save cookie ``name`` in the HTTP response object"""
|
||||
resp.set_cookie(name, ','.join(self.values), max_age=COOKIE_MAX_AGE)
|
||||
|
||||
|
||||
|
|
@ -193,7 +193,7 @@ class MapSetting(Setting):
|
|||
self.key = data # pylint: disable=attribute-defined-outside-init
|
||||
|
||||
def save(self, name: str, resp: flask.Response):
|
||||
"""Save cookie ``name`` in the HTTP reponse obect"""
|
||||
"""Save cookie ``name`` in the HTTP response object"""
|
||||
if hasattr(self, 'key'):
|
||||
resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
|
||||
|
||||
|
|
@ -208,11 +208,9 @@ class BooleanChoices:
|
|||
self.default_choices = dict(choices)
|
||||
|
||||
def transform_form_items(self, items):
|
||||
# pylint: disable=no-self-use
|
||||
return items
|
||||
|
||||
def transform_values(self, values):
|
||||
# pylint: disable=no-self-use
|
||||
return values
|
||||
|
||||
def parse_cookie(self, data_disabled: str, data_enabled: str):
|
||||
|
|
@ -241,7 +239,7 @@ class BooleanChoices:
|
|||
return (k for k, v in self.choices.items() if not v)
|
||||
|
||||
def save(self, resp: flask.Response):
|
||||
"""Save cookie in the HTTP reponse obect"""
|
||||
"""Save cookie in the HTTP response object"""
|
||||
disabled_changed = (k for k in self.disabled if self.default_choices[k])
|
||||
enabled_changed = (k for k in self.enabled if not self.default_choices[k])
|
||||
resp.set_cookie('disabled_{0}'.format(self.name), ','.join(disabled_changed), max_age=COOKIE_MAX_AGE)
|
||||
|
|
@ -367,6 +365,16 @@ class Preferences:
|
|||
locked=is_locked('simple_style'),
|
||||
choices=['', 'auto', 'light', 'dark']
|
||||
),
|
||||
'center_alignment': MapSetting(
|
||||
settings['ui']['center_alignment'],
|
||||
locked=is_locked('center_alignment'),
|
||||
map={
|
||||
'0': False,
|
||||
'1': True,
|
||||
'False': False,
|
||||
'True': True
|
||||
}
|
||||
),
|
||||
'advanced_search': MapSetting(
|
||||
settings['ui']['advanced_search'],
|
||||
locked=is_locked('advanced_search'),
|
||||
|
|
@ -433,7 +441,7 @@ class Preferences:
|
|||
"""parse (base64) preferences from request (``flask.request.form['preferences']``)"""
|
||||
bin_data = decompress(urlsafe_b64decode(input_data))
|
||||
dict_data = {}
|
||||
for x, y in parse_qs(bin_data.decode('ascii')).items():
|
||||
for x, y in parse_qs(bin_data.decode('ascii'), keep_blank_values=True).items():
|
||||
dict_data[x] = y[0]
|
||||
self.parse_dict(dict_data)
|
||||
|
||||
|
|
@ -488,7 +496,7 @@ class Preferences:
|
|||
return ret_val
|
||||
|
||||
def save(self, resp: flask.Response):
|
||||
"""Save cookie in the HTTP reponse obect"""
|
||||
"""Save cookie in the HTTP response object"""
|
||||
for user_setting_name, user_setting in self.key_value_settings.items():
|
||||
# pylint: disable=unnecessary-dict-index-lookup
|
||||
if self.key_value_settings[user_setting_name].locked:
|
||||
|
|
|
|||
|
|
@ -198,10 +198,10 @@ class BangParser(QueryPartParser):
|
|||
self.raw_text_query.enginerefs.append(EngineRef(value, 'none'))
|
||||
return True
|
||||
|
||||
# check if prefix is equal with categorie name
|
||||
# check if prefix is equal with category name
|
||||
if value in categories:
|
||||
# using all engines for that search, which
|
||||
# are declared under that categorie name
|
||||
# are declared under that category name
|
||||
self.raw_text_query.enginerefs.extend(
|
||||
EngineRef(engine.name, value)
|
||||
for engine in categories[value]
|
||||
|
|
@ -219,7 +219,7 @@ class BangParser(QueryPartParser):
|
|||
self._add_autocomplete(first_char + suggestion)
|
||||
return
|
||||
|
||||
# check if query starts with categorie name
|
||||
# check if query starts with category name
|
||||
for category in categories:
|
||||
if category.startswith(value):
|
||||
self._add_autocomplete(first_char + category.replace(' ', '_'))
|
||||
|
|
@ -311,7 +311,7 @@ class RawTextQuery:
|
|||
|
||||
def getFullQuery(self):
|
||||
"""
|
||||
get full querry including whitespaces
|
||||
get full query including whitespaces
|
||||
"""
|
||||
return '{0} {1}'.format(' '.join(self.query_parts), self.getQuery()).strip()
|
||||
|
||||
|
|
|
|||
70
searx/redisdb.py
Normal file
70
searx/redisdb.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Implementation of the redis client (redis-py_).
|
||||
|
||||
.. _redis-py: https://github.com/redis/redis-py
|
||||
|
||||
This implementation uses the :ref:`settings redis` setup from ``settings.yml``.
|
||||
A redis DB connect can be tested by::
|
||||
|
||||
>>> from searx import redisdb
|
||||
>>> redisdb.initialize()
|
||||
True
|
||||
>>> db = redisdb.client()
|
||||
>>> db.set("foo", "bar")
|
||||
True
|
||||
>>> db.get("foo")
|
||||
b'bar'
|
||||
>>>
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import pwd
|
||||
import logging
|
||||
import redis
|
||||
from searx import get_setting
|
||||
|
||||
|
||||
OLD_REDIS_URL_DEFAULT_URL = 'unix:///usr/local/searxng-redis/run/redis.sock?db=0'
|
||||
"""This was the default Redis URL in settings.yml."""
|
||||
|
||||
_CLIENT = None
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def client() -> redis.Redis:
|
||||
return _CLIENT
|
||||
|
||||
|
||||
def initialize():
|
||||
global _CLIENT # pylint: disable=global-statement
|
||||
redis_url = get_setting('redis.url')
|
||||
if not redis_url:
|
||||
return False
|
||||
try:
|
||||
# create a client, but no connection is done
|
||||
_CLIENT = redis.Redis.from_url(redis_url)
|
||||
|
||||
# log the parameters as seen by the redis lib, without the password
|
||||
kwargs = _CLIENT.get_connection_kwargs().copy()
|
||||
kwargs.pop('password', None)
|
||||
kwargs = ' '.join([f'{k}={v!r}' for k, v in kwargs.items()])
|
||||
logger.info("connecting to Redis %s", kwargs)
|
||||
|
||||
# check the connection
|
||||
_CLIENT.ping()
|
||||
|
||||
# no error: the redis connection is working
|
||||
logger.info("connected to Redis")
|
||||
return True
|
||||
except redis.exceptions.RedisError as e:
|
||||
_CLIENT = None
|
||||
_pw = pwd.getpwuid(os.getuid())
|
||||
logger.exception("[%s (%s)] can't connect redis DB ...", _pw.pw_name, _pw.pw_uid)
|
||||
if redis_url == OLD_REDIS_URL_DEFAULT_URL and isinstance(e, redis.exceptions.ConnectionError):
|
||||
logger.info(
|
||||
"You can safely ignore the above Redis error if you don't use Redis. "
|
||||
"You can remove this error by setting redis.url to false in your settings.yml."
|
||||
)
|
||||
return False
|
||||
241
searx/redislib.py
Normal file
241
searx/redislib.py
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""A collection of convenient functions and redis/lua scripts.
|
||||
|
||||
This code was partial inspired by the `Bullet-Proofing Lua Scripts in RedisPy`_
|
||||
article.
|
||||
|
||||
.. _Bullet-Proofing Lua Scripts in RedisPy:
|
||||
https://redis.com/blog/bullet-proofing-lua-scripts-in-redispy/
|
||||
|
||||
"""
|
||||
|
||||
import hmac
|
||||
|
||||
from searx import get_setting
|
||||
|
||||
LUA_SCRIPT_STORAGE = {}
|
||||
"""A global dictionary to cache client's ``Script`` objects, used by
|
||||
:py:obj:`lua_script_storage`"""
|
||||
|
||||
|
||||
def lua_script_storage(client, script):
|
||||
"""Returns a redis :py:obj:`Script
|
||||
<redis.commands.core.CoreCommands.register_script>` instance.
|
||||
|
||||
Due to performance reason the ``Script`` object is instantiated only once
|
||||
for a client (``client.register_script(..)``) and is cached in
|
||||
:py:obj:`LUA_SCRIPT_STORAGE`.
|
||||
|
||||
"""
|
||||
|
||||
# redis connection can be closed, lets use the id() of the redis connector
|
||||
# as key in the script-storage:
|
||||
client_id = id(client)
|
||||
|
||||
if LUA_SCRIPT_STORAGE.get(client_id) is None:
|
||||
LUA_SCRIPT_STORAGE[client_id] = {}
|
||||
|
||||
if LUA_SCRIPT_STORAGE[client_id].get(script) is None:
|
||||
LUA_SCRIPT_STORAGE[client_id][script] = client.register_script(script)
|
||||
|
||||
return LUA_SCRIPT_STORAGE[client_id][script]
|
||||
|
||||
|
||||
PURGE_BY_PREFIX = """
|
||||
local prefix = tostring(ARGV[1])
|
||||
for i, name in ipairs(redis.call('KEYS', prefix .. '*')) do
|
||||
redis.call('EXPIRE', name, 0)
|
||||
end
|
||||
"""
|
||||
|
||||
|
||||
def purge_by_prefix(client, prefix: str = "SearXNG_"):
|
||||
"""Purge all keys with ``prefix`` from database.
|
||||
|
||||
Queries all keys in the database by the given prefix and set expire time to
|
||||
zero. The default prefix will drop all keys which has been set by SearXNG
|
||||
(drops SearXNG schema entirely from database).
|
||||
|
||||
The implementation is the lua script from string :py:obj:`PURGE_BY_PREFIX`.
|
||||
The lua script uses EXPIRE_ instead of DEL_: if there are a lot keys to
|
||||
delete and/or their values are big, `DEL` could take more time and blocks
|
||||
the command loop while `EXPIRE` turns back immediate.
|
||||
|
||||
:param prefix: prefix of the key to delete (default: ``SearXNG_``)
|
||||
:type name: str
|
||||
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _DEL: https://redis.io/commands/del/
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, PURGE_BY_PREFIX)
|
||||
script(args=[prefix])
|
||||
|
||||
|
||||
def secret_hash(name: str):
|
||||
"""Creates a hash of the ``name``.
|
||||
|
||||
Combines argument ``name`` with the ``secret_key`` from :ref:`settings
|
||||
server`. This function can be used to get a more anonymised name of a Redis
|
||||
KEY.
|
||||
|
||||
:param name: the name to create a secret hash for
|
||||
:type name: str
|
||||
"""
|
||||
m = hmac.new(bytes(name, encoding='utf-8'), digestmod='sha256')
|
||||
m.update(bytes(get_setting('server.secret_key'), encoding='utf-8'))
|
||||
return m.hexdigest()
|
||||
|
||||
|
||||
INCR_COUNTER = """
|
||||
local limit = tonumber(ARGV[1])
|
||||
local expire = tonumber(ARGV[2])
|
||||
local c_name = KEYS[1]
|
||||
|
||||
local c = redis.call('GET', c_name)
|
||||
|
||||
if not c then
|
||||
c = redis.call('INCR', c_name)
|
||||
if expire > 0 then
|
||||
redis.call('EXPIRE', c_name, expire)
|
||||
end
|
||||
else
|
||||
c = tonumber(c)
|
||||
if limit == 0 or c < limit then
|
||||
c = redis.call('INCR', c_name)
|
||||
end
|
||||
end
|
||||
return c
|
||||
"""
|
||||
|
||||
|
||||
def incr_counter(client, name: str, limit: int = 0, expire: int = 0):
|
||||
"""Increment a counter and return the new value.
|
||||
|
||||
If counter with redis key ``SearXNG_counter_<name>`` does not exists it is
|
||||
created with initial value 1 returned. The replacement ``<name>`` is a
|
||||
*secret hash* of the value from argument ``name`` (see
|
||||
:py:func:`secret_hash`).
|
||||
|
||||
The implementation of the redis counter is the lua script from string
|
||||
:py:obj:`INCR_COUNTER`.
|
||||
|
||||
:param name: name of the counter
|
||||
:type name: str
|
||||
|
||||
:param expire: live-time of the counter in seconds (default ``None`` means
|
||||
infinite).
|
||||
:type expire: int / see EXPIRE_
|
||||
|
||||
:param limit: limit where the counter stops to increment (default ``None``)
|
||||
:type limit: int / limit is 2^64 see INCR_
|
||||
|
||||
:return: value of the incremented counter
|
||||
:type return: int
|
||||
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _INCR: https://redis.io/commands/incr/
|
||||
|
||||
A simple demo of a counter with expire time and limit::
|
||||
|
||||
>>> for i in range(6):
|
||||
... i, incr_counter(client, "foo", 3, 5) # max 3, duration 5 sec
|
||||
... time.sleep(1) # from the third call on max has been reached
|
||||
...
|
||||
(0, 1)
|
||||
(1, 2)
|
||||
(2, 3)
|
||||
(3, 3)
|
||||
(4, 3)
|
||||
(5, 1)
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, INCR_COUNTER)
|
||||
name = "SearXNG_counter_" + secret_hash(name)
|
||||
c = script(args=[limit, expire], keys=[name])
|
||||
return c
|
||||
|
||||
|
||||
def drop_counter(client, name):
|
||||
"""Drop counter with redis key ``SearXNG_counter_<name>``
|
||||
|
||||
The replacement ``<name>`` is a *secret hash* of the value from argument
|
||||
``name`` (see :py:func:`incr_counter` and :py:func:`incr_sliding_window`).
|
||||
"""
|
||||
name = "SearXNG_counter_" + secret_hash(name)
|
||||
client.delete(name)
|
||||
|
||||
|
||||
INCR_SLIDING_WINDOW = """
|
||||
local expire = tonumber(ARGV[1])
|
||||
local name = KEYS[1]
|
||||
local current_time = redis.call('TIME')
|
||||
|
||||
redis.call('ZREMRANGEBYSCORE', name, 0, current_time[1] - expire)
|
||||
redis.call('ZADD', name, current_time[1], current_time[1] .. current_time[2])
|
||||
local result = redis.call('ZCOUNT', name, 0, current_time[1] + 1)
|
||||
redis.call('EXPIRE', name, expire)
|
||||
return result
|
||||
"""
|
||||
|
||||
|
||||
def incr_sliding_window(client, name: str, duration: int):
|
||||
"""Increment a sliding-window counter and return the new value.
|
||||
|
||||
If counter with redis key ``SearXNG_counter_<name>`` does not exists it is
|
||||
created with initial value 1 returned. The replacement ``<name>`` is a
|
||||
*secret hash* of the value from argument ``name`` (see
|
||||
:py:func:`secret_hash`).
|
||||
|
||||
:param name: name of the counter
|
||||
:type name: str
|
||||
|
||||
:param duration: live-time of the sliding window in seconds
|
||||
:typeduration: int
|
||||
|
||||
:return: value of the incremented counter
|
||||
:type return: int
|
||||
|
||||
The implementation of the redis counter is the lua script from string
|
||||
:py:obj:`INCR_SLIDING_WINDOW`. The lua script uses `sorted sets in Redis`_
|
||||
to implement a sliding window for the redis key ``SearXNG_counter_<name>``
|
||||
(ZADD_). The current TIME_ is used to score the items in the sorted set and
|
||||
the time window is moved by removing items with a score lower current time
|
||||
minus *duration* time (ZREMRANGEBYSCORE_).
|
||||
|
||||
The EXPIRE_ time (the duration of the sliding window) is refreshed on each
|
||||
call (incrementation) and if there is no call in this duration, the sorted
|
||||
set expires from the redis DB.
|
||||
|
||||
The return value is the amount of items in the sorted set (ZCOUNT_), what
|
||||
means the number of calls in the sliding window.
|
||||
|
||||
.. _Sorted sets in Redis:
|
||||
https://redis.com/ebook/part-1-getting-started/chapter-1-getting-to-know-redis/1-2-what-redis-data-structures-look-like/1-2-5-sorted-sets-in-redis/
|
||||
.. _TIME: https://redis.io/commands/time/
|
||||
.. _ZADD: https://redis.io/commands/zadd/
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _ZREMRANGEBYSCORE: https://redis.io/commands/zremrangebyscore/
|
||||
.. _ZCOUNT: https://redis.io/commands/zcount/
|
||||
|
||||
A simple demo of the sliding window::
|
||||
|
||||
>>> for i in range(5):
|
||||
... incr_sliding_window(client, "foo", 3) # duration 3 sec
|
||||
... time.sleep(1) # from the third call (second) on the window is moved
|
||||
...
|
||||
1
|
||||
2
|
||||
3
|
||||
3
|
||||
3
|
||||
>>> time.sleep(3) # wait until expire
|
||||
>>> incr_sliding_window(client, "foo", 3)
|
||||
1
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, INCR_SLIDING_WINDOW)
|
||||
name = "SearXNG_counter_" + secret_hash(name)
|
||||
c = script(args=[duration], keys=[name])
|
||||
return c
|
||||
|
|
@ -134,9 +134,9 @@ def result_score(result):
|
|||
if hasattr(engines[result_engine], 'weight'):
|
||||
weight *= float(engines[result_engine].weight)
|
||||
|
||||
occurences = len(result['positions'])
|
||||
occurrences = len(result['positions'])
|
||||
|
||||
return sum((occurences * weight) / position for position in result['positions'])
|
||||
return sum((occurrences * weight) / position for position in result['positions'])
|
||||
|
||||
|
||||
class Timing(NamedTuple):
|
||||
|
|
@ -286,7 +286,7 @@ class ResultContainer:
|
|||
if 'template' not in result:
|
||||
result['template'] = 'default.html'
|
||||
|
||||
# strip multiple spaces and cariage returns from content
|
||||
# strip multiple spaces and carriage returns from content
|
||||
if result.get('content'):
|
||||
result['content'] = WHITESPACE_REGEX.sub(' ', result['content'])
|
||||
|
||||
|
|
@ -315,7 +315,7 @@ class ResultContainer:
|
|||
return merged_result
|
||||
else:
|
||||
# it's an image
|
||||
# it's a duplicate if the parsed_url, template and img_src are differents
|
||||
# it's a duplicate if the parsed_url, template and img_src are different
|
||||
if result.get('img_src', '') == merged_result.get('img_src', ''):
|
||||
return merged_result
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -2,11 +2,12 @@
|
|||
# lint: pylint
|
||||
# pylint: disable=missing-module-docstring, too-few-public-methods
|
||||
|
||||
import typing
|
||||
import threading
|
||||
from timeit import default_timer
|
||||
from uuid import uuid4
|
||||
|
||||
import flask
|
||||
|
||||
from searx import settings
|
||||
from searx.answerers import ask
|
||||
from searx.external_bang import get_bang_url
|
||||
|
|
@ -133,7 +134,7 @@ class Search:
|
|||
|
||||
def search_multiple_requests(self, requests):
|
||||
# pylint: disable=protected-access
|
||||
search_id = uuid4().__str__()
|
||||
search_id = str(uuid4())
|
||||
|
||||
for engine_name, query, request_params in requests:
|
||||
th = threading.Thread( # pylint: disable=invalid-name
|
||||
|
|
@ -181,7 +182,7 @@ class SearchWithPlugins(Search):
|
|||
|
||||
__slots__ = 'ordered_plugin_list', 'request'
|
||||
|
||||
def __init__(self, search_query: SearchQuery, ordered_plugin_list, request: "flask.Request"):
|
||||
def __init__(self, search_query: SearchQuery, ordered_plugin_list, request: flask.Request):
|
||||
super().__init__(search_query)
|
||||
self.ordered_plugin_list = ordered_plugin_list
|
||||
self.result_container.on_result = self._on_result
|
||||
|
|
|
|||
|
|
@ -2,3 +2,5 @@
|
|||
|
||||
from .impl import Checker
|
||||
from .background import initialize, get_result
|
||||
|
||||
__all__ = ('Checker', 'initialize', 'get_result')
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ def run(engine_name_list, verbose):
|
|||
stderr.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n')
|
||||
checker = searx.search.checker.Checker(processor)
|
||||
checker.run()
|
||||
if checker.test_results.succesfull:
|
||||
if checker.test_results.successful:
|
||||
stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{GREEN}OK{RESET_SEQ}\n')
|
||||
if verbose:
|
||||
stdout.write(f' {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
|
||||
|
|
|
|||
|
|
@ -1,26 +1,28 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
# pylint: disable=missing-module-docstring
|
||||
# pyright: strict
|
||||
# pyright: basic
|
||||
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
import threading
|
||||
import os
|
||||
import signal
|
||||
from typing import Dict, Union, List, Any, Tuple
|
||||
from typing import Dict, Union, List, Any, Tuple, Optional
|
||||
from typing_extensions import TypedDict, Literal
|
||||
|
||||
import redis.exceptions
|
||||
|
||||
from searx import logger, settings, searx_debug
|
||||
from searx.redisdb import client as get_redis_client
|
||||
from searx.exceptions import SearxSettingsException
|
||||
from searx.search.processors import PROCESSORS
|
||||
from searx.search.checker import Checker
|
||||
from searx.shared import schedule, storage
|
||||
from searx.search.checker.scheduler import scheduler_function
|
||||
|
||||
|
||||
CHECKER_RESULT = 'CHECKER_RESULT'
|
||||
running = threading.Lock()
|
||||
REDIS_RESULT_KEY = 'SearXNG_checker_result'
|
||||
REDIS_LOCK_KEY = 'SearXNG_checker_lock'
|
||||
|
||||
|
||||
CheckerResult = Union['CheckerOk', 'CheckerErr', 'CheckerOther']
|
||||
|
|
@ -77,20 +79,24 @@ def _get_interval(every: Any, error_msg: str) -> Tuple[int, int]:
|
|||
return (every[0], every[1])
|
||||
|
||||
|
||||
def _get_every():
|
||||
every = settings.get('checker', {}).get('scheduling', {}).get('every', (300, 1800))
|
||||
return _get_interval(every, 'checker.scheduling.every is not a int or list')
|
||||
|
||||
|
||||
def get_result() -> CheckerResult:
|
||||
serialized_result = storage.get_str(CHECKER_RESULT)
|
||||
if serialized_result is not None:
|
||||
return json.loads(serialized_result)
|
||||
return {'status': 'unknown'}
|
||||
client = get_redis_client()
|
||||
if client is None:
|
||||
# without Redis, the checker is disabled
|
||||
return {'status': 'disabled'}
|
||||
serialized_result: Optional[bytes] = client.get(REDIS_RESULT_KEY)
|
||||
if serialized_result is None:
|
||||
# the Redis key does not exist
|
||||
return {'status': 'unknown'}
|
||||
return json.loads(serialized_result)
|
||||
|
||||
|
||||
def _set_result(result: CheckerResult):
|
||||
storage.set_str(CHECKER_RESULT, json.dumps(result))
|
||||
client = get_redis_client()
|
||||
if client is None:
|
||||
# without Redis, the function does nothing
|
||||
return
|
||||
client.set(REDIS_RESULT_KEY, json.dumps(result))
|
||||
|
||||
|
||||
def _timestamp():
|
||||
|
|
@ -98,41 +104,29 @@ def _timestamp():
|
|||
|
||||
|
||||
def run():
|
||||
if not running.acquire(blocking=False): # pylint: disable=consider-using-with
|
||||
return
|
||||
try:
|
||||
logger.info('Starting checker')
|
||||
result: CheckerOk = {'status': 'ok', 'engines': {}, 'timestamp': _timestamp()}
|
||||
for name, processor in PROCESSORS.items():
|
||||
logger.debug('Checking %s engine', name)
|
||||
checker = Checker(processor)
|
||||
checker.run()
|
||||
if checker.test_results.succesfull:
|
||||
result['engines'][name] = {'success': True}
|
||||
else:
|
||||
result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}
|
||||
# use a Redis lock to make sure there is no checker running at the same time
|
||||
# (this should not happen, this is a safety measure)
|
||||
with get_redis_client().lock(REDIS_LOCK_KEY, blocking_timeout=60, timeout=3600):
|
||||
logger.info('Starting checker')
|
||||
result: CheckerOk = {'status': 'ok', 'engines': {}, 'timestamp': _timestamp()}
|
||||
for name, processor in PROCESSORS.items():
|
||||
logger.debug('Checking %s engine', name)
|
||||
checker = Checker(processor)
|
||||
checker.run()
|
||||
if checker.test_results.successful:
|
||||
result['engines'][name] = {'success': True}
|
||||
else:
|
||||
result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}
|
||||
|
||||
_set_result(result)
|
||||
logger.info('Check done')
|
||||
_set_result(result)
|
||||
logger.info('Check done')
|
||||
except redis.exceptions.LockError:
|
||||
_set_result({'status': 'error', 'timestamp': _timestamp()})
|
||||
logger.exception('Error while running the checker')
|
||||
except Exception: # pylint: disable=broad-except
|
||||
_set_result({'status': 'error', 'timestamp': _timestamp()})
|
||||
logger.exception('Error while running the checker')
|
||||
finally:
|
||||
running.release()
|
||||
|
||||
|
||||
def _run_with_delay():
|
||||
every = _get_every()
|
||||
delay = random.randint(0, every[1] - every[0])
|
||||
logger.debug('Start checker in %i seconds', delay)
|
||||
time.sleep(delay)
|
||||
run()
|
||||
|
||||
|
||||
def _start_scheduling():
|
||||
every = _get_every()
|
||||
if schedule(every[0], _run_with_delay):
|
||||
run()
|
||||
|
||||
|
||||
def _signal_handler(_signum: int, _frame: Any):
|
||||
|
|
@ -147,27 +141,31 @@ def initialize():
|
|||
logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid())
|
||||
signal.signal(signal.SIGUSR1, _signal_handler)
|
||||
|
||||
# disabled by default
|
||||
_set_result({'status': 'disabled'})
|
||||
|
||||
# special case when debug is activate
|
||||
if searx_debug and settings.get('checker', {}).get('off_when_debug', True):
|
||||
if searx_debug and settings['checker']['off_when_debug']:
|
||||
logger.info('debug mode: checker is disabled')
|
||||
return
|
||||
|
||||
# check value of checker.scheduling.every now
|
||||
scheduling = settings.get('checker', {}).get('scheduling', None)
|
||||
scheduling = settings['checker']['scheduling']
|
||||
if scheduling is None or not scheduling:
|
||||
logger.info('Checker scheduler is disabled')
|
||||
return
|
||||
|
||||
#
|
||||
_set_result({'status': 'unknown'})
|
||||
# make sure there is a Redis connection
|
||||
if get_redis_client() is None:
|
||||
logger.error('The checker requires Redis')
|
||||
return
|
||||
|
||||
start_after = scheduling.get('start_after', (300, 1800))
|
||||
start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list')
|
||||
delay = random.randint(start_after[0], start_after[1])
|
||||
logger.info('Start checker in %i seconds', delay)
|
||||
t = threading.Timer(delay, _start_scheduling)
|
||||
# start the background scheduler
|
||||
every_range = _get_interval(scheduling.get('every', (300, 1800)), 'checker.scheduling.every is not a int or list')
|
||||
start_after_range = _get_interval(
|
||||
scheduling.get('start_after', (300, 1800)), 'checker.scheduling.start_after is not a int or list'
|
||||
)
|
||||
t = threading.Thread(
|
||||
target=scheduler_function,
|
||||
args=(start_after_range[0], start_after_range[1], every_range[0], every_range[1], run),
|
||||
name='checker_scheduler',
|
||||
)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
|
|
|||
|
|
@ -10,12 +10,10 @@ from timeit import default_timer
|
|||
from urllib.parse import urlparse
|
||||
|
||||
import re
|
||||
from langdetect import detect_langs
|
||||
from langdetect.lang_detect_exception import LangDetectException
|
||||
import httpx
|
||||
|
||||
from searx import network, logger
|
||||
from searx.utils import gen_useragent
|
||||
from searx.utils import gen_useragent, detect_language
|
||||
from searx.results import ResultContainer
|
||||
from searx.search.models import SearchQuery, EngineRef
|
||||
from searx.search.processors import EngineProcessor
|
||||
|
|
@ -174,7 +172,7 @@ class TestResults:
|
|||
self.languages.add(language)
|
||||
|
||||
@property
|
||||
def succesfull(self):
|
||||
def successful(self):
|
||||
return len(self.errors) == 0
|
||||
|
||||
def __iter__(self):
|
||||
|
|
@ -208,14 +206,10 @@ class ResultContainerTests:
|
|||
self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')')
|
||||
|
||||
def _add_language(self, text: str) -> typing.Optional[str]:
|
||||
try:
|
||||
r = detect_langs(str(text)) # pylint: disable=E1101
|
||||
except LangDetectException:
|
||||
return None
|
||||
|
||||
if len(r) > 0 and r[0].prob > 0.95:
|
||||
self.languages.add(r[0].lang)
|
||||
self.test_results.add_language(r[0].lang)
|
||||
langStr = detect_language(text)
|
||||
if langStr:
|
||||
self.languages.add(langStr)
|
||||
self.test_results.add_language(langStr)
|
||||
return None
|
||||
|
||||
def _check_result(self, result):
|
||||
|
|
@ -317,7 +311,7 @@ class ResultContainerTests:
|
|||
self._record_error('No result')
|
||||
|
||||
def one_title_contains(self, title: str):
|
||||
"""Check one of the title contains `title` (case insensitive comparaison)"""
|
||||
"""Check one of the title contains `title` (case insensitive comparison)"""
|
||||
title = title.lower()
|
||||
for result in self.result_container.get_ordered_results():
|
||||
if title in result['title'].lower():
|
||||
|
|
|
|||
36
searx/search/checker/scheduler.lua
Normal file
36
searx/search/checker/scheduler.lua
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
--
|
||||
-- This script is not a string in scheduler.py, so editors can provide syntax highlighting.
|
||||
|
||||
-- The Redis KEY is defined here and not in Python on purpose:
|
||||
-- only this LUA script can read and update this key to avoid lock and concurrency issues.
|
||||
local redis_key = 'SearXNG_checker_next_call_ts'
|
||||
|
||||
local now = redis.call('TIME')[1]
|
||||
local start_after_from = ARGV[1]
|
||||
local start_after_to = ARGV[2]
|
||||
local every_from = ARGV[3]
|
||||
local every_to = ARGV[4]
|
||||
|
||||
local next_call_ts = redis.call('GET', redis_key)
|
||||
|
||||
if (next_call_ts == false or next_call_ts == nil) then
|
||||
-- the scheduler has never run on this Redis instance, so:
|
||||
-- 1/ the scheduler does not run now
|
||||
-- 2/ the next call is a random time between start_after_from and start_after_to
|
||||
local initial_delay = math.random(start_after_from, start_after_to)
|
||||
redis.call('SET', redis_key, now + initial_delay)
|
||||
return { false, delay }
|
||||
end
|
||||
|
||||
-- next_call_ts is defined
|
||||
-- --> if now is lower than next_call_ts then we don't run the embedded checker
|
||||
-- --> if now is higher then we update next_call_ts and ask to run the embedded checker now.
|
||||
local call_now = next_call_ts <= now
|
||||
if call_now then
|
||||
-- the checker runs now, define the timestamp of the next call:
|
||||
-- this is a random delay between every_from and every_to
|
||||
local periodic_delay = math.random(every_from, every_to)
|
||||
next_call_ts = redis.call('INCRBY', redis_key, periodic_delay)
|
||||
end
|
||||
return { call_now, next_call_ts - now }
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue