mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[feat] engine: implementation of German news, Tagesschau
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									2256ba2ffb
								
							
						
					
					
						commit
						e25d1c7288
					
				
					 2 changed files with 106 additions and 0 deletions
				
			
		
							
								
								
									
										101
									
								
								searx/engines/tagesschau.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								searx/engines/tagesschau.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,101 @@
 | 
			
		|||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
# lint: pylint
 | 
			
		||||
"""ARD: `Tagesschau API`_
 | 
			
		||||
 | 
			
		||||
The Tagesschau is a news program of the ARD.  Via the `Tagesschau API`_, current
 | 
			
		||||
news and media reports are available in JSON format.  The `Bundesstelle für Open
 | 
			
		||||
Data`_ offers a `OpenAPI`_ portal at bundDEV_ where APIs are documented an can
 | 
			
		||||
be tested.
 | 
			
		||||
 | 
			
		||||
This SearXNG engine uses the `/api2u/search`_ API.
 | 
			
		||||
 | 
			
		||||
.. _/api2u/search: http://tagesschau.api.bund.dev/
 | 
			
		||||
.. _bundDEV: https://bund.dev/apis
 | 
			
		||||
.. _Bundesstelle für Open Data: https://github.com/bundesAPI
 | 
			
		||||
.. _Tagesschau API: https://github.com/AndreasFischer1985/tagesschau-api/blob/main/README_en.md
 | 
			
		||||
.. _OpenAPI: https://swagger.io/specification/
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
from typing import TYPE_CHECKING
 | 
			
		||||
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from urllib.parse import urlencode
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
if TYPE_CHECKING:
 | 
			
		||||
    import logging
 | 
			
		||||
 | 
			
		||||
    logger: logging.Logger
 | 
			
		||||
 | 
			
		||||
about = {
 | 
			
		||||
    'website': "https://tagesschau.de",
 | 
			
		||||
    'wikidata_id': "Q703907",
 | 
			
		||||
    'official_api_documentation': None,
 | 
			
		||||
    'use_official_api': True,
 | 
			
		||||
    'require_api_key': False,
 | 
			
		||||
    'results': 'JSON',
 | 
			
		||||
    'language': 'de',
 | 
			
		||||
}
 | 
			
		||||
categories = ['general', 'news']
 | 
			
		||||
paging = True
 | 
			
		||||
 | 
			
		||||
results_per_page = 10
 | 
			
		||||
base_url = "https://www.tagesschau.de"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def request(query, params):
 | 
			
		||||
    args = {
 | 
			
		||||
        'searchText': query,
 | 
			
		||||
        'pageSize': results_per_page,
 | 
			
		||||
        'resultPage': params['pageno'] - 1,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    params['url'] = f"{base_url}/api2u/search?{urlencode(args)}"
 | 
			
		||||
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def response(resp):
 | 
			
		||||
    results = []
 | 
			
		||||
 | 
			
		||||
    json = resp.json()
 | 
			
		||||
 | 
			
		||||
    for item in json['searchResults']:
 | 
			
		||||
        item_type = item.get('type')
 | 
			
		||||
        if item_type in ('story', 'webview'):
 | 
			
		||||
            results.append(_story(item))
 | 
			
		||||
        elif item_type == 'video':
 | 
			
		||||
            results.append(_video(item))
 | 
			
		||||
        else:
 | 
			
		||||
            logger.error("unknow result type: %s", item_type)
 | 
			
		||||
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _story(item):
 | 
			
		||||
    return {
 | 
			
		||||
        'title': item['title'],
 | 
			
		||||
        'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
 | 
			
		||||
        'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
 | 
			
		||||
        'content': item['firstSentence'],
 | 
			
		||||
        'url': item['shareURL'],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _video(item):
 | 
			
		||||
    video_url = item['streams']['h264s']
 | 
			
		||||
    title = item['title']
 | 
			
		||||
 | 
			
		||||
    if "_vapp.mxf" in title:
 | 
			
		||||
        title = title.replace("_vapp.mxf", "")
 | 
			
		||||
        title = re.sub(r"APP\d+ (FC-)?", "", title, count=1)
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
        'template': 'videos.html',
 | 
			
		||||
        'title': title,
 | 
			
		||||
        'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
 | 
			
		||||
        'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
 | 
			
		||||
        'content': item.get('firstSentence', ''),
 | 
			
		||||
        'iframe_src': video_url,
 | 
			
		||||
        'url': video_url,
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			@ -1431,6 +1431,11 @@ engines:
 | 
			
		|||
  #      WHERE title LIKE :wildcard OR description LIKE :wildcard
 | 
			
		||||
  #      ORDER BY duration DESC
 | 
			
		||||
 | 
			
		||||
  - name: tagesschau
 | 
			
		||||
    engine: tagesschau
 | 
			
		||||
    shortcut: ts
 | 
			
		||||
    disabled: true
 | 
			
		||||
 | 
			
		||||
  # Requires Tor
 | 
			
		||||
  - name: torch
 | 
			
		||||
    engine: xpath
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue