mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
[refactor] convert wikipedia engine to class
This commit is contained in:
parent
1e97bfab70
commit
e579d72494
2 changed files with 45 additions and 29 deletions
|
@ -23,6 +23,7 @@ from searx.utils import load_module, gen_useragent, find_language_aliases
|
||||||
from searx.engine import Engine
|
from searx.engine import Engine
|
||||||
|
|
||||||
_NEXTGEN_ENGINES = {
|
_NEXTGEN_ENGINES = {
|
||||||
|
'wikipedia': 'WikipediaEngine',
|
||||||
}
|
}
|
||||||
"""maps module names to class names for engines that are implemented using the new class-based approach"""
|
"""maps module names to class names for engines that are implemented using the new class-based approach"""
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,29 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# pyright: basic
|
||||||
"""
|
"""
|
||||||
Wikipedia (Web)
|
Wikipedia (Web)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from typing import List
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from searx.utils import match_language, searx_useragent
|
from searx.engine import (
|
||||||
|
About,
|
||||||
|
InfoBox,
|
||||||
|
OnlineEngine,
|
||||||
|
QueryContext,
|
||||||
|
OnlineRequest,
|
||||||
|
Result,
|
||||||
|
Response,
|
||||||
|
StandardResult,
|
||||||
|
)
|
||||||
|
from searx.utils import match_language, searx_useragent, find_language_aliases
|
||||||
from searx.network import raise_for_httperror
|
from searx.network import raise_for_httperror
|
||||||
|
import searx.data
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about: About = {
|
||||||
"website": 'https://www.wikipedia.org/',
|
"website": 'https://www.wikipedia.org/',
|
||||||
"wikidata_id": 'Q52',
|
"wikidata_id": 'Q52',
|
||||||
"official_api_documentation": 'https://en.wikipedia.org/api/',
|
"official_api_documentation": 'https://en.wikipedia.org/api/',
|
||||||
|
@ -25,34 +38,36 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
||||||
language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
|
language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
|
||||||
|
|
||||||
|
|
||||||
# set language in base_url
|
class WikipediaEngine(OnlineEngine):
|
||||||
def url_lang(lang):
|
about = about
|
||||||
|
supported_languages = searx.data.ENGINES_LANGUAGES['wikipedia']
|
||||||
|
language_aliases = find_language_aliases(supported_languages)
|
||||||
|
|
||||||
|
def _url_lang(self, lang: str):
|
||||||
lang_pre = lang.split('-')[0]
|
lang_pre = lang.split('-')[0]
|
||||||
if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
|
if lang_pre == 'all' or lang_pre not in self.supported_languages and lang_pre not in self.language_aliases:
|
||||||
return 'en'
|
return 'en'
|
||||||
return match_language(lang, supported_languages, language_aliases).split('-')[0]
|
return match_language(lang, self.supported_languages, self.language_aliases).split('-')[0]
|
||||||
|
|
||||||
|
def request(self, query: str, ctx: QueryContext) -> OnlineRequest:
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
if query.islower():
|
if query.islower():
|
||||||
query = query.title()
|
query = query.title()
|
||||||
|
|
||||||
language = url_lang(params['language'])
|
language = self._url_lang(ctx.language)
|
||||||
params['url'] = search_url.format(title=quote(query), language=language)
|
|
||||||
|
|
||||||
if params['language'].lower() in language_variants.get(language, []):
|
req = OnlineRequest(
|
||||||
params['headers']['Accept-Language'] = params['language'].lower()
|
url=search_url.format(title=quote(query), language=language),
|
||||||
|
raise_for_httperror=False,
|
||||||
|
soft_max_redirects=2,
|
||||||
|
headers={'User-Agent': searx_useragent()},
|
||||||
|
)
|
||||||
|
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
if ctx.language.lower() in language_variants.get(language, []):
|
||||||
params['raise_for_httperror'] = False
|
req.set_header('Accept-Language', ctx.language.lower())
|
||||||
params['soft_max_redirects'] = 2
|
|
||||||
|
|
||||||
return params
|
return req
|
||||||
|
|
||||||
|
def response(self, resp: Response) -> List[Result]:
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
if resp.status_code == 404:
|
if resp.status_code == 404:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@ -70,7 +85,7 @@ language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw"
|
||||||
|
|
||||||
raise_for_httperror(resp)
|
raise_for_httperror(resp)
|
||||||
|
|
||||||
results = []
|
results: List[Result] = []
|
||||||
api_result = loads(resp.text)
|
api_result = loads(resp.text)
|
||||||
|
|
||||||
# skip disambiguation pages
|
# skip disambiguation pages
|
||||||
|
@ -80,16 +95,16 @@ language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw"
|
||||||
title = api_result['title']
|
title = api_result['title']
|
||||||
wikipedia_link = api_result['content_urls']['desktop']['page']
|
wikipedia_link = api_result['content_urls']['desktop']['page']
|
||||||
|
|
||||||
results.append({'url': wikipedia_link, 'title': title})
|
results.append(StandardResult(url=wikipedia_link, title=title))
|
||||||
|
|
||||||
results.append(
|
results.append(
|
||||||
{
|
InfoBox(
|
||||||
'infobox': title,
|
url=wikipedia_link,
|
||||||
'id': wikipedia_link,
|
title=title,
|
||||||
'content': api_result.get('extract', ''),
|
content=api_result.get('extract', ''),
|
||||||
'img_src': api_result.get('thumbnail', {}).get('source'),
|
img_src=api_result.get('thumbnail', {}).get('source'),
|
||||||
'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
|
links=[{'title': 'Wikipedia', 'url': wikipedia_link}],
|
||||||
}
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
Loading…
Add table
Reference in a new issue