add get_embeded_stream_url to searx.utils

This commit is contained in:
Austin-Olacsi 2024-09-14 16:28:35 -06:00
parent 231e55f38d
commit 76234479f4
5 changed files with 56 additions and 14 deletions

View file

@ -123,7 +123,6 @@ from typing import Any, TYPE_CHECKING
from urllib.parse import (
urlencode,
urlparse,
parse_qs,
)
from dateutil import parser
@ -137,6 +136,7 @@ from searx.utils import (
eval_xpath_list,
eval_xpath_getindex,
js_variable_to_python,
get_embeded_stream_url,
)
from searx.enginelib.traits import EngineTraits
@ -311,7 +311,7 @@ def _parse_search(resp):
# In my tests a video tag in the WEB search was most often not a
# video, except the ones from youtube ..
iframe_src = _get_iframe_src(url)
iframe_src = get_embeded_stream_url(url)
if iframe_src:
item['iframe_src'] = iframe_src
item['template'] = 'videos.html'
@ -328,15 +328,6 @@ def _parse_search(resp):
return result_list
def _get_iframe_src(url):
parsed_url = urlparse(url)
if parsed_url.path == '/watch' and parsed_url.query:
video_id = parse_qs(parsed_url.query).get('v', []) # type: ignore
if video_id:
return 'https://www.youtube-nocookie.com/embed/' + video_id[0] # type: ignore
return None
def _parse_news(json_resp):
result_list = []
@ -392,7 +383,7 @@ def _parse_videos(json_resp):
if result['thumbnail'] is not None:
item['thumbnail'] = result['thumbnail']['src']
iframe_src = _get_iframe_src(url)
iframe_src = get_embeded_stream_url(url)
if iframe_src:
item['iframe_src'] = iframe_src

View file

@ -7,6 +7,7 @@ DuckDuckGo Extra (images, videos, news)
from datetime import datetime
from typing import TYPE_CHECKING
from urllib.parse import urlencode
from searx.utils import get_embeded_stream_url
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
from searx.engines.duckduckgo import (
@ -108,7 +109,7 @@ def _video_result(result):
'title': result['title'],
'content': result['description'],
'thumbnail': result['images'].get('small') or result['images'].get('medium'),
'iframe_src': result['embed_url'],
'iframe_src': get_embeded_stream_url(result['content']),
'source': result['provider'],
'length': result['duration'],
'metadata': result.get('uploader'),

View file

@ -34,6 +34,7 @@ from searx.engines.google import (
detect_google_sorry,
)
from searx.enginelib.traits import EngineTraits
from searx.utils import get_embeded_stream_url
if TYPE_CHECKING:
import logging
@ -125,6 +126,7 @@ def response(resp):
'content': content,
'author': pub_info,
'thumbnail': thumbnail,
'iframe_src': get_embeded_stream_url(url),
'template': 'videos.html',
}
)

View file

@ -57,6 +57,7 @@ from searx.utils import (
eval_xpath,
eval_xpath_list,
extract_text,
get_embeded_stream_url,
)
traits: EngineTraits
@ -297,6 +298,7 @@ def parse_web_api(resp):
'title': title,
'url': res_url,
'content': content,
'iframe_src': get_embeded_stream_url(res_url),
'publishedDate': pub_date,
'thumbnail': thumbnail,
'template': 'videos.html',