add get_embeded_stream_url to searx.utils

2024-01-01 19:24:07 +01:00 · 2024-09-14 16:28:35 -06:00 · 2024-09-14 16:28:35 -06:00 · 76234479f4
commit 76234479f4
parent 231e55f38d
5 changed files with 56 additions and 14 deletions
--- a/searx/engines/brave.py
+++ b/searx/engines/brave.py
@ -123,7 +123,6 @@ from typing import Any, TYPE_CHECKING
 from urllib.parse import (
    urlencode,
    urlparse,
-    parse_qs,
 )

 from dateutil import parser
@ -137,6 +136,7 @@ from searx.utils import (
    eval_xpath_list,
    eval_xpath_getindex,
    js_variable_to_python,
+    get_embeded_stream_url,
 )
 from searx.enginelib.traits import EngineTraits

@ -311,7 +311,7 @@ def _parse_search(resp):
            # In my tests a video tag in the WEB search was most often not a
            # video, except the ones from youtube ..

-            iframe_src = _get_iframe_src(url)
+            iframe_src = get_embeded_stream_url(url)
            if iframe_src:
                item['iframe_src'] = iframe_src
                item['template'] = 'videos.html'
@ -328,15 +328,6 @@ def _parse_search(resp):
    return result_list


-def _get_iframe_src(url):
-    parsed_url = urlparse(url)
-    if parsed_url.path == '/watch' and parsed_url.query:
-        video_id = parse_qs(parsed_url.query).get('v', [])  # type: ignore
-        if video_id:
-            return 'https://www.youtube-nocookie.com/embed/' + video_id[0]  # type: ignore
-    return None
-
-
 def _parse_news(json_resp):
    result_list = []

@ -392,7 +383,7 @@ def _parse_videos(json_resp):
        if result['thumbnail'] is not None:
            item['thumbnail'] = result['thumbnail']['src']

-        iframe_src = _get_iframe_src(url)
+        iframe_src = get_embeded_stream_url(url)
        if iframe_src:
            item['iframe_src'] = iframe_src

--- a/searx/engines/duckduckgo_extra.py
+++ b/searx/engines/duckduckgo_extra.py
@ -7,6 +7,7 @@ DuckDuckGo Extra (images, videos, news)
 from datetime import datetime
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode
+from searx.utils import get_embeded_stream_url

 from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
 from searx.engines.duckduckgo import (
@ -108,7 +109,7 @@ def _video_result(result):
        'title': result['title'],
        'content': result['description'],
        'thumbnail': result['images'].get('small') or result['images'].get('medium'),
-        'iframe_src': result['embed_url'],
+        'iframe_src': get_embeded_stream_url(result['content']),
        'source': result['provider'],
        'length': result['duration'],
        'metadata': result.get('uploader'),
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@ -34,6 +34,7 @@ from searx.engines.google import (
    detect_google_sorry,
 )
 from searx.enginelib.traits import EngineTraits
+from searx.utils import get_embeded_stream_url

 if TYPE_CHECKING:
    import logging
@ -125,6 +126,7 @@ def response(resp):
                'content': content,
                'author': pub_info,
                'thumbnail': thumbnail,
+                'iframe_src': get_embeded_stream_url(url),
                'template': 'videos.html',
            }
        )
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@ -57,6 +57,7 @@ from searx.utils import (
    eval_xpath,
    eval_xpath_list,
    extract_text,
+    get_embeded_stream_url,
 )

 traits: EngineTraits
@ -297,6 +298,7 @@ def parse_web_api(resp):
                        'title': title,
                        'url': res_url,
                        'content': content,
+                        'iframe_src': get_embeded_stream_url(res_url),
                        'publishedDate': pub_date,
                        'thumbnail': thumbnail,
                        'template': 'videos.html',