mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
Merge branch 'searxng:master' into master
This commit is contained in:
commit
1d713bdf1c
10 changed files with 575 additions and 23842 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,12 +1,17 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
Dailymotion (Videos)
|
||||
"""Dailymotion (Videos)
|
||||
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
from datetime import datetime
|
||||
from typing import Set
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import match_language, html_to_text
|
||||
import time
|
||||
import babel
|
||||
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.network import raise_for_httperror
|
||||
from searx.utils import html_to_text
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
|
@ -21,23 +26,89 @@ about = {
|
|||
# engine dependent config
|
||||
categories = ['videos']
|
||||
paging = True
|
||||
number_of_results = 10
|
||||
|
||||
time_range_support = True
|
||||
time_delta_dict = {
|
||||
"day": timedelta(days=1),
|
||||
"week": timedelta(days=7),
|
||||
"month": timedelta(days=31),
|
||||
"year": timedelta(days=365),
|
||||
}
|
||||
|
||||
safesearch = True
|
||||
safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''}
|
||||
|
||||
# search-url
|
||||
# see http://www.dailymotion.com/doc/api/obj-video.html
|
||||
search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa
|
||||
supported_languages_url = 'https://api.dailymotion.com/languages'
|
||||
# - https://developers.dailymotion.com/tools/
|
||||
# - https://www.dailymotion.com/doc/api/obj-video.html
|
||||
|
||||
result_fields = [
|
||||
'allow_embed',
|
||||
'description',
|
||||
'title',
|
||||
'created_time',
|
||||
'duration',
|
||||
'url',
|
||||
'thumbnail_360_url',
|
||||
'id',
|
||||
]
|
||||
search_url = (
|
||||
'https://api.dailymotion.com/videos?'
|
||||
'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
|
||||
).format(
|
||||
fields=','.join(result_fields),
|
||||
password_protected= 'false',
|
||||
private='false',
|
||||
sort='relevance',
|
||||
limit=number_of_results,
|
||||
)
|
||||
iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
|
||||
|
||||
# The request query filters by 'languages' & 'country', therefore instead of
|
||||
# fetching only languages we need to fetch locales.
|
||||
supported_languages_url = 'https://api.dailymotion.com/locales'
|
||||
supported_languages_iso639: Set[str] = set()
|
||||
|
||||
|
||||
def init(_engine_settings):
|
||||
global supported_languages_iso639
|
||||
supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages])
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
if params['language'] == 'all':
|
||||
locale = 'en-US'
|
||||
else:
|
||||
locale = match_language(params['language'], supported_languages)
|
||||
|
||||
params['url'] = search_url.format(
|
||||
query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno']
|
||||
)
|
||||
if not query:
|
||||
return False
|
||||
|
||||
language = params['language']
|
||||
if language == 'all':
|
||||
language = 'en-US'
|
||||
locale = babel.Locale.parse(language, sep='-')
|
||||
|
||||
language_iso639 = locale.language
|
||||
if locale.language not in supported_languages_iso639:
|
||||
language_iso639 = 'en'
|
||||
|
||||
query_args = {
|
||||
'search': query,
|
||||
'languages': language_iso639,
|
||||
'page': params['pageno'],
|
||||
}
|
||||
|
||||
if locale.territory:
|
||||
localization = locale.language + '_' + locale.territory
|
||||
if localization in supported_languages:
|
||||
query_args['country'] = locale.territory
|
||||
|
||||
time_delta = time_delta_dict.get(params["time_range"])
|
||||
if time_delta:
|
||||
created_after = datetime.now() - time_delta
|
||||
query_args['created_after'] = datetime.timestamp(created_after)
|
||||
|
||||
query_str = urlencode(query_args)
|
||||
params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '')
|
||||
params['raise_for_httperror'] = False
|
||||
|
||||
return params
|
||||
|
||||
|
|
@ -46,34 +117,51 @@ def request(query, params):
|
|||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_res = loads(resp.text)
|
||||
search_res = resp.json()
|
||||
|
||||
# return empty array if there are no results
|
||||
if 'list' not in search_res:
|
||||
return []
|
||||
# check for an API error
|
||||
if 'error' in search_res:
|
||||
raise SearxEngineAPIException(search_res['error'].get('message'))
|
||||
|
||||
raise_for_httperror(resp)
|
||||
|
||||
# parse results
|
||||
for res in search_res['list']:
|
||||
for res in search_res.get('list', []):
|
||||
|
||||
title = res['title']
|
||||
url = res['url']
|
||||
|
||||
content = html_to_text(res['description'])
|
||||
thumbnail = res['thumbnail_360_url']
|
||||
if len(content) > 300:
|
||||
content = content[:300] + '...'
|
||||
|
||||
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
||||
|
||||
# http to https
|
||||
length = time.gmtime(res.get('duration'))
|
||||
if length.tm_hour:
|
||||
length = time.strftime("%H:%M:%S", length)
|
||||
else:
|
||||
length = time.strftime("%M:%S", length)
|
||||
|
||||
thumbnail = res['thumbnail_360_url']
|
||||
thumbnail = thumbnail.replace("http://", "https://")
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'videos.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'publishedDate': publishedDate,
|
||||
'iframe_src': "https://www.dailymotion.com/embed/video/" + res['id'],
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
)
|
||||
item = {
|
||||
'template': 'videos.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'publishedDate': publishedDate,
|
||||
'length': length,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
# HINT: no mater what the value is, without API token videos can't shown
|
||||
# embedded
|
||||
if res['allow_embed']:
|
||||
item['iframe_src'] = iframe_src.format(video_id=res['id'])
|
||||
|
||||
results.append(item)
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
@ -81,18 +169,8 @@ def response(resp):
|
|||
|
||||
# get supported languages from their site
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = {}
|
||||
|
||||
response_json = loads(resp.text)
|
||||
|
||||
for language in response_json['list']:
|
||||
supported_languages[language['code']] = {}
|
||||
|
||||
name = language['native_name']
|
||||
if name:
|
||||
supported_languages[language['code']]['name'] = name
|
||||
english_name = language['name']
|
||||
if english_name:
|
||||
supported_languages[language['code']]['english_name'] = english_name
|
||||
|
||||
return supported_languages
|
||||
response_json = resp.json()
|
||||
return [
|
||||
item['locale']
|
||||
for item in response_json['list']
|
||||
]
|
||||
|
|
|
|||
|
|
@ -20,8 +20,10 @@ about = {
|
|||
categories = ['shopping']
|
||||
paging = True
|
||||
|
||||
url = 'https://www.ebay.com'
|
||||
search_url = url + '/sch/i.html?_nkw={query}&_sacat={pageno}'
|
||||
# Set base_url in settings.yml in order to
|
||||
# have the desired local TLD.
|
||||
base_url = None
|
||||
search_url = '/sch/i.html?_nkw={query}&_sacat={pageno}'
|
||||
|
||||
results_xpath = '//li[contains(@class, "s-item")]'
|
||||
url_xpath = './/a[@class="s-item__link"]/@href'
|
||||
|
|
@ -34,7 +36,7 @@ thumbnail_xpath = './/img[@class="s-item__image-img"]/@src'
|
|||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=quote(query), pageno=params['pageno'])
|
||||
params['url'] = f'{base_url}' + search_url.format(query=quote(query), pageno=params['pageno'])
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -85,6 +85,11 @@ suggestion_xpath = ''
|
|||
cached_xpath = ''
|
||||
cached_url = ''
|
||||
|
||||
cookies = {}
|
||||
headers = {}
|
||||
'''Some engines might offer different result based on cookies or headers.
|
||||
Possible use-case: To set safesearch cookie or header to moderate.'''
|
||||
|
||||
paging = False
|
||||
'''Engine supports paging [True or False].'''
|
||||
|
||||
|
|
@ -166,6 +171,9 @@ def request(query, params):
|
|||
'safe_search': safe_search,
|
||||
}
|
||||
|
||||
params['cookies'].update(cookies)
|
||||
params['headers'].update(headers)
|
||||
|
||||
params['url'] = search_url.format(**fargs)
|
||||
params['soft_max_redirects'] = soft_max_redirects
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,7 @@
|
|||
# list of language codes
|
||||
# this file is generated automatically by utils/fetch_languages.py
|
||||
language_codes = (
|
||||
('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
|
||||
('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'),
|
||||
('be-BY', 'Беларуская', 'Беларусь', 'Belarusian', '\U0001f1e7\U0001f1fe'),
|
||||
('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
|
||||
('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'),
|
||||
('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
|
||||
|
|
@ -28,20 +26,15 @@ language_codes = (
|
|||
('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
|
||||
('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
|
||||
('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
|
||||
('fa-IR', 'فارسی', 'ایران', 'Persian', '\U0001f1ee\U0001f1f7'),
|
||||
('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
|
||||
('fil-PH', 'Filipino', 'Pilipinas', 'Filipino', '\U0001f1f5\U0001f1ed'),
|
||||
('fr', 'Français', '', 'French', '\U0001f310'),
|
||||
('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
|
||||
('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
|
||||
('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
|
||||
('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
|
||||
('he-IL', 'עברית', 'ישראל', 'Hebrew', '\U0001f1ee\U0001f1f1'),
|
||||
('hi-IN', 'हिन्दी', 'भारत', 'Hindi', '\U0001f1ee\U0001f1f3'),
|
||||
('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
|
||||
('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
|
||||
('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
|
||||
('is-IS', 'Íslenska', 'Ísland', 'Icelandic', '\U0001f1ee\U0001f1f8'),
|
||||
('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
|
||||
('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
|
||||
('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
|
||||
|
|
@ -63,13 +56,10 @@ language_codes = (
|
|||
('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
|
||||
('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
|
||||
('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'),
|
||||
('sr-RS', 'Српски', 'Србија', 'Serbian', '\U0001f1f7\U0001f1f8'),
|
||||
('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
|
||||
('sw-TZ', 'Kiswahili', 'Tanzania', 'Swahili', '\U0001f1f9\U0001f1ff'),
|
||||
('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
|
||||
('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
|
||||
('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
|
||||
('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
|
||||
('zh', '中文', '', 'Chinese', '\U0001f310'),
|
||||
('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
|
||||
('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
|
||||
|
|
|
|||
|
|
@ -514,6 +514,7 @@ engines:
|
|||
# - name: ebay
|
||||
# engine: ebay
|
||||
# shortcut: eb
|
||||
# base_url: 'https://www.ebay.com'
|
||||
# disabled: true
|
||||
# timeout: 5
|
||||
|
||||
|
|
|
|||
|
|
@ -88,6 +88,8 @@ class _HTMLTextExtractor(HTMLParser): # pylint: disable=W0223 # (see https://b
|
|||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
self.tags.append(tag)
|
||||
if tag == 'br':
|
||||
self.result.append(' ')
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if not self.tags:
|
||||
|
|
@ -142,7 +144,7 @@ def html_to_text(html_str: str) -> str:
|
|||
>>> html_to_text('<style>.span { color: red; }</style><span>Example</span>')
|
||||
'Example'
|
||||
"""
|
||||
html_str = html_str.replace('\n', ' ')
|
||||
html_str = html_str.replace('\n', ' ').replace('\r', ' ')
|
||||
html_str = ' '.join(html_str.split())
|
||||
s = _HTMLTextExtractor()
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue