From 62982c8812c31249ab0cf3d0a8ca97eb439abf5e Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 22 Apr 2022 12:09:42 +0200 Subject: [PATCH] [fix] add back missing languages & regions (followup of PR #1071) In PR #1071 the language catalog of dailymotion has been cleaned up, before there had been over 7000 "languages" in the catalog. As a side effect of this clean-up the language & region catalog in SearXNG has been reduced [1]. This patch reduce the ``min_engines_per_lang`` from 13 to 12 to get the missed languages back in language & region catalog of SearXNG. [1] https://github.com/searxng/searxng/pull/1071/commits/3bb62823ec3af0e67bd2d959bec20c4791ee3bac#diff-f3f00db0f87f95b882624a192e0aac21525638af0b18c9514e765fcf1991678d Requested-by: @tiekoetter in a Matrix chat Signed-off-by: Markus Heiser --- searx/data/engines_languages.json | 6 +++--- searx/languages.py | 18 ++++++++++++------ searxng_extra/update/update_languages.py | 5 +++-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/searx/data/engines_languages.json b/searx/data/engines_languages.json index fe648499a..ad4629e75 100644 --- a/searx/data/engines_languages.json +++ b/searx/data/engines_languages.json @@ -1714,15 +1714,15 @@ "mt": { "alias": "maltese" }, - "nb": { - "alias": "norsk" - }, "ne": { "alias": "nepali" }, "nl": { "alias": "nederlands" }, + "no": { + "alias": "norsk" + }, "oc": { "alias": "occitan" }, diff --git a/searx/languages.py b/searx/languages.py index 8ffff9c1d..377e7495b 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -2,7 +2,9 @@ # list of language codes # this file is generated automatically by utils/fetch_languages.py language_codes = ( + ('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'), ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'), + ('be-BY', 'Беларуская', 'Беларусь', 'Belarusian', '\U0001f1e7\U0001f1fe'), ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'), ('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'), ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'), @@ -26,28 +28,29 @@ language_codes = ( ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'), ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'), ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'), + ('fa-IR', 'فارسی', 'ایران', 'Persian', '\U0001f1ee\U0001f1f7'), ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'), + ('fil-PH', 'Filipino', 'Pilipinas', 'Filipino', '\U0001f1f5\U0001f1ed'), ('fr', 'Français', '', 'French', '\U0001f310'), ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'), ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'), ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'), ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'), ('he-IL', 'עברית', 'ישראל', 'Hebrew', '\U0001f1ee\U0001f1f1'), + ('hi-IN', 'हिन्दी', 'भारत', 'Hindi', '\U0001f1ee\U0001f1f3'), ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'), ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'), + ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'), + ('is-IS', 'Íslenska', 'Ísland', 'Icelandic', '\U0001f1ee\U0001f1f8'), ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'), ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'), ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'), ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'), ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'), - ( 'nb-NO', - 'Norsk Bokmål', - 'Norge', - 'Norwegian Bokmål', - '\U0001f1f3\U0001f1f4'), ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'), ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'), ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'), + ('no-NO', 'Norsk', '', 'Norwegian (Bokmål)', '\U0001f1f3\U0001f1f4'), ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'), ('pt', 'Português', '', 'Portuguese', '\U0001f310'), ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'), @@ -56,12 +59,15 @@ language_codes = ( ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'), ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'), ('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'), + ('sr-RS', 'Српски', 'Србија', 'Serbian', '\U0001f1f7\U0001f1f8'), ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'), + ('sw-TZ', 'Kiswahili', 'Tanzania', 'Swahili', '\U0001f1f9\U0001f1ff'), ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'), ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'), ('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'), + ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'), ('zh', '中文', '', 'Chinese', '\U0001f310'), ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'), - ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'), + ('zh-HK', '中文', '中國香港', 'Chinese', '\U0001f1ed\U0001f1f0'), ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'), ) diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index 95ee5bd3e..5baf2fc5c 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -117,7 +117,8 @@ def get_territory_name(lang_code): country_name = None locale = get_locale(lang_code) try: - country_name = locale.get_territory_name() + if locale is not None: + country_name = locale.get_territory_name() except FileNotFoundError as exc: print("ERROR: %s --> %s" % (locale, exc)) return country_name @@ -190,7 +191,7 @@ def join_language_lists(engines_languages): # Filter language list so it only includes the most supported languages and countries def filter_language_list(all_languages): - min_engines_per_lang = 13 + min_engines_per_lang = 12 min_engines_per_country = 7 # pylint: disable=consider-using-dict-items, consider-iterating-dictionary main_engines = [