[mod] engine - simplify region & lang handling, make filters configurable

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2023-09-30 18:41:13 +02:00 committed by MatthieuBarbu
parent 70f6a84aff
commit ba5f9bd4ac
3 changed files with 453 additions and 38 deletions

View File

@ -0,0 +1,13 @@
.. _RadioBrowser engine:
============
RadioBrowser
============
.. contents::
:depth: 2
:local:
:backlinks: entry
.. automodule:: searx.engines.radio_browser
:members:

View File

@ -4932,6 +4932,343 @@
"zh-HK": "zh_HK" "zh-HK": "zh_HK"
} }
}, },
"radio browser": {
"all_locale": null,
"custom": {
"countrycodes": [
"AD",
"AE",
"AF",
"AG",
"AL",
"AM",
"AO",
"AQ",
"AR",
"AS",
"AT",
"AU",
"AW",
"AZ",
"BA",
"BB",
"BD",
"BE",
"BF",
"BG",
"BH",
"BI",
"BJ",
"BM",
"BN",
"BO",
"BQ",
"BR",
"BS",
"BT",
"BW",
"BY",
"BZ",
"CA",
"CC",
"CD",
"CF",
"CH",
"CI",
"CK",
"CL",
"CM",
"CN",
"CO",
"CR",
"CU",
"CV",
"CW",
"CY",
"CZ",
"DE",
"DK",
"DM",
"DO",
"DZ",
"EC",
"EE",
"EG",
"ES",
"ET",
"FI",
"FJ",
"FK",
"FO",
"FR",
"GA",
"GB",
"GD",
"GE",
"GF",
"GG",
"GH",
"GI",
"GL",
"GN",
"GP",
"GQ",
"GR",
"GS",
"GT",
"GU",
"GW",
"GY",
"HK",
"HN",
"HR",
"HT",
"HU",
"ID",
"IE",
"IL",
"IM",
"IN",
"IO",
"IQ",
"IR",
"IS",
"IT",
"JM",
"JO",
"JP",
"KE",
"KG",
"KH",
"KM",
"KN",
"KP",
"KR",
"KW",
"KY",
"KZ",
"LB",
"LC",
"LK",
"LT",
"LU",
"LV",
"LY",
"MA",
"MC",
"MD",
"ME",
"MG",
"MK",
"ML",
"MM",
"MN",
"MO",
"MQ",
"MT",
"MU",
"MW",
"MX",
"MY",
"MZ",
"NA",
"NC",
"NE",
"NF",
"NG",
"NI",
"NL",
"NO",
"NP",
"NZ",
"OM",
"PA",
"PE",
"PF",
"PH",
"PK",
"PL",
"PM",
"PR",
"PS",
"PT",
"PY",
"QA",
"RE",
"RO",
"RS",
"RU",
"RW",
"SA",
"SC",
"SD",
"SE",
"SG",
"SH",
"SI",
"SJ",
"SK",
"SL",
"SM",
"SN",
"SO",
"SR",
"ST",
"SV",
"SY",
"SZ",
"TC",
"TD",
"TF",
"TG",
"TH",
"TJ",
"TM",
"TN",
"TO",
"TR",
"TT",
"TW",
"TZ",
"UA",
"UG",
"UM",
"US",
"UY",
"UZ",
"VA",
"VC",
"VE",
"VG",
"VI",
"VN",
"VU",
"WF",
"XK",
"YE",
"YT",
"ZA",
"ZM",
"ZW"
]
},
"data_type": "traits_v1",
"languages": {
"af": "afrikaans",
"ak": "akan",
"am": "amharic",
"ar": "arabic",
"ast": "asturian",
"az": "azerbaijani",
"be": "belarusian",
"bg": "bulgarian",
"bm": "bambara",
"bn": "bengali",
"bo": "tibetan",
"br": "breton",
"bs": "bosnian",
"ca": "catalan",
"cs": "czech",
"cv": "chuvash",
"cy": "welsh",
"da": "danish",
"de": "german",
"dsb": "lower sorbian",
"dz": "dzongkha",
"el": "greek",
"en": "english",
"eo": "esperanto",
"es": "spanish",
"et": "estonian",
"eu": "basque",
"fa": "persian",
"fi": "finnish",
"fil": "tagalog",
"fo": "faroese",
"fr": "french",
"ga": "irish",
"gd": "gaelic",
"gl": "galician",
"gsw": "swiss german",
"gu": "gujarati",
"gv": "manx",
"ha": "hausa",
"he": "hebrew",
"hi": "hindi",
"hr": "croatian",
"hsb": "upper sorbian",
"hu": "hungarian",
"hy": "armenian",
"id": "indonesian",
"is": "icelandic",
"it": "italian",
"ja": "japanese",
"jv": "javanese",
"ka": "georgian",
"kk": "kazakh",
"kl": "kalaallisut",
"km": "khmer",
"kn": "kannada",
"ko": "korean",
"ku": "kurdish",
"lb": "luxembourgish",
"ln": "lingala",
"lt": "lithuanian",
"lv": "latvian",
"mg": "malagasy",
"mk": "macedonian",
"ml": "malayalam",
"mn": "mongolian",
"mr": "marathi",
"ms": "malay",
"mt": "maltese",
"my": "burmese",
"nds": "low german",
"ne": "nepali",
"nl": "dutch",
"no": "norwegian",
"oc": "occitan",
"om": "oromo",
"os": "ossetian",
"pa": "panjabi",
"pl": "polish",
"pt": "portuguese",
"qu": "quechua",
"rm": "romansh",
"ro": "romanian",
"ru": "russian",
"rw": "kinyarwanda",
"sa": "sanskrit",
"sc": "sardinian",
"sd": "sindhi",
"si": "sinhala",
"sk": "slovak",
"sl": "slovenian",
"so": "somali",
"sq": "albanian",
"sr": "serbian",
"sv": "swedish",
"sw": "swahili",
"ta": "tamil",
"te": "telugu",
"tg": "tajik",
"th": "thai",
"tk": "turkmen",
"tr": "turkish",
"tt": "tatar",
"uk": "ukrainian",
"ur": "urdu",
"uz": "uzbek",
"vi": "vietnamese",
"wo": "wolof",
"xh": "xhosa",
"yi": "yiddish",
"yue": "cantonese",
"zh": "chinese",
"zh_Hans": "mandarin"
},
"regions": {}
},
"sepiasearch": { "sepiasearch": {
"all_locale": null, "all_locale": null,
"custom": {}, "custom": {},

View File

@ -1,30 +1,57 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint # lint: pylint
"""Radio browser (music) """Search radio stations from RadioBrowser by `Advanced station search API`_.
.. _Advanced station search API:
https://de1.api.radio-browser.info/#Advanced_station_search
""" """
from urllib.parse import urlencode from urllib.parse import urlencode
import babel import babel
from flask_babel import gettext
from searx.network import get from searx.network import get
from searx.enginelib.traits import EngineTraits from searx.enginelib.traits import EngineTraits
from searx.locales import language_tag, region_tag from searx.locales import language_tag
traits: EngineTraits traits: EngineTraits
about = { about = {
"website": 'https://www.radio-browser.info/', "website": 'https://www.radio-browser.info/',
"wikidata_id": 'Q111664849',
"official_api_documentation": 'https://de1.api.radio-browser.info/', "official_api_documentation": 'https://de1.api.radio-browser.info/',
"use_official_api": True, "use_official_api": True,
"require_api_key": False, "require_api_key": False,
"results": 'JSON', "results": 'JSON',
} }
paging = True paging = True
categories = ['music'] categories = ['music', 'radio']
base_url = "https://de1.api.radio-browser.info" # see https://api.radio-browser.info/ for all nodes base_url = "https://de1.api.radio-browser.info" # see https://api.radio-browser.info/ for all nodes
number_of_results = 10 number_of_results = 10
station_filters = [] # ['countrycode', 'language']
"""A list of filters to be applied to the search of radio stations. By default
none filters are applied. Valid filters are:
``language``
Filter stations by selected language. For instance the ``de`` from ``:de-AU``
will be translated to `german` and used in the argument ``language=``.
``countrycode``
Filter stations by selected country. The 2-digit countrycode of the station
comes from the region the user selected. For instance ``:de-AU`` will filter
out all stations not in ``AU``.
.. note::
RadioBrowser has registered a lot of languages and countrycodes unknown to
:py:obj:`babel` and note that when searching for radio stations, users are
more likely to search by name than by region or language.
"""
def request(query, params): def request(query, params):
args = { args = {
@ -35,13 +62,17 @@ def request(query, params):
'hidebroken': 'true', 'hidebroken': 'true',
'reverse': 'true', 'reverse': 'true',
} }
lang = traits.get_language(params['searxng_locale'], None)
if lang is not None: if 'language' in station_filters:
lang = traits.get_language(params['searxng_locale']) # type: ignore
if lang:
args['language'] = lang args['language'] = lang
region = traits.get_region(params['searxng_locale'], None) if 'countrycode' in station_filters:
if region is not None: if len(params['searxng_locale'].split('-')) > 1:
args['countrycode'] = region.split('-')[1] countrycode = params['searxng_locale'].split('-')[-1].upper()
if countrycode in traits.custom['countrycodes']: # type: ignore
args['countrycode'] = countrycode
params['url'] = f"{base_url}/json/stations/search?{urlencode(args)}" params['url'] = f"{base_url}/json/stations/search?{urlencode(args)}"
return params return params
@ -50,22 +81,43 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
for result in resp.json(): json_resp = resp.json()
for result in json_resp:
url = result['homepage'] url = result['homepage']
if not url: if not url:
url = result['url_resolved'] url = result['url_resolved']
content = []
tags = ', '.join(result.get('tags', '').split(','))
if tags:
content.append(tags)
for x in ['state', 'country']:
v = result.get(x)
if v:
v = str(v).strip()
content.append(v)
metadata = []
codec = result.get('codec')
if codec and codec.lower() != 'unknown':
metadata.append(f'{codec} ' + gettext('radio'))
for x, y in [
(gettext('bitrate'), 'bitrate'),
(gettext('votes'), 'votes'),
(gettext('clicks'), 'clickcount'),
]:
v = result.get(y)
if v:
v = str(v).strip()
metadata.append(f"{x} {v}")
results.append( results.append(
{ {
'template': 'videos.html',
'url': url, 'url': url,
'title': result['name'], 'title': result['name'],
'thumbnail': result.get('favicon', '').replace("http://", "https://"), 'img_src': result.get('favicon', '').replace("http://", "https://"),
'content': result['country'] 'content': ' | '.join(content),
+ " / " 'metadata': ' | '.join(metadata),
+ result["tags"]
+ f" / {result['votes']} votes"
+ f" / {result['clickcount']} clicks",
'iframe_src': result['url_resolved'].replace("http://", "https://"), 'iframe_src': result['url_resolved'].replace("http://", "https://"),
} }
) )
@ -74,38 +126,51 @@ def response(resp):
def fetch_traits(engine_traits: EngineTraits): def fetch_traits(engine_traits: EngineTraits):
language_list = get(f'{base_url}/json/languages').json() """Fetch languages and countrycodes from RadioBrowser
country_list = get(f'{base_url}/json/countrycodes').json() - ``traits.languages``: `list of languages API`_
- ``traits.custom['countrycodes']``: `list of countries API`_
.. _list of countries API: https://de1.api.radio-browser.info/#List_of_countries
.. _list of languages API: https://de1.api.radio-browser.info/#List_of_languages
"""
# pylint: disable=import-outside-toplevel
from babel.core import get_global
babel_reg_list = get_global("territory_languages").keys()
language_list = get(f'{base_url}/json/languages').json() # type: ignore
country_list = get(f'{base_url}/json/countries').json() # type: ignore
for lang in language_list: for lang in language_list:
babel_lang = lang.get('iso_639')
if not babel_lang:
# the language doesn't have any iso code, and hence can't be parsed # the language doesn't have any iso code, and hence can't be parsed
if not lang['iso_639']: # print(f"ERROR: lang - no iso code in {lang}")
continue continue
try: try:
lang_tag = lang['iso_639'] sxng_tag = language_tag(babel.Locale.parse(babel_lang, sep="-"))
sxng_tag = language_tag(babel.Locale.parse(lang_tag, sep="-"))
except babel.UnknownLocaleError: except babel.UnknownLocaleError:
print("ERROR: %s is unknown by babel" % lang_tag) # print(f"ERROR: language tag {babel_lang} is unknown by babel")
continue continue
eng_tag = lang['name']
conflict = engine_traits.languages.get(sxng_tag) conflict = engine_traits.languages.get(sxng_tag)
if conflict: if conflict:
if conflict != eng_tag:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
continue continue
engine_traits.languages[sxng_tag] = eng_tag
engine_traits.languages[sxng_tag] = lang['name'] countrycodes = set()
for region in country_list: for region in country_list:
try: if region['iso_3166_1'] not in babel_reg_list:
reg_tag = f"{lang['iso_639']}-{region['name']}" print(f"ERROR: region tag {region['iso_3166_1']} is unknown by babel")
sxng_tag = region_tag(babel.Locale.parse(reg_tag, sep="-"))
except babel.UnknownLocaleError:
continue continue
countrycodes.add(region['iso_3166_1'])
conflict = engine_traits.regions.get(sxng_tag) countrycodes = list(countrycodes)
if conflict: countrycodes.sort()
continue engine_traits.custom['countrycodes'] = countrycodes
engine_traits.regions[sxng_tag] = reg_tag