From f78f9083836be851c224b4334b53b9686835e300 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sat, 8 Oct 2022 11:32:08 +0200 Subject: [PATCH] [mod] Google: fetch engine traits (data_type: supported_languages) Implements a fetch_traits function for the Google engines. .. note:: Does not include migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object! Signed-off-by: Markus Heiser --- searx/data/engine_traits.json | 1290 ++++++++++++++++++++++++++++++- searx/engines/google.py | 87 +++ searx/engines/google_images.py | 2 +- searx/engines/google_news.py | 1 + searx/engines/google_scholar.py | 1 + searx/engines/google_videos.py | 2 +- 6 files changed, 1366 insertions(+), 17 deletions(-) diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index 251b7295a..e5f85bf38 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -2526,11 +2526,263 @@ ] }, "google": { - "all_locale": null, + "all_locale": "ZZ", "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "countryZA", + "ar-AE": "countryAE", + "ar-BH": "countryBH", + "ar-DJ": "countryDJ", + "ar-DZ": "countryDZ", + "ar-EG": "countryEG", + "ar-IL": "countryIL", + "ar-IQ": "countryIQ", + "ar-JO": "countryJO", + "ar-KW": "countryKW", + "ar-LB": "countryLB", + "ar-LY": "countryLY", + "ar-MA": "countryMA", + "ar-OM": "countryOM", + "ar-PS": "countryPS", + "ar-QA": "countryQA", + "ar-SA": "countrySA", + "ar-SO": "countrySO", + "ar-TD": "countryTD", + "ar-TN": "countryTN", + "be-BY": "countryBY", + "bg-BG": "countryBG", + "ca-AD": "countryAD", + "ca-ES": "countryES", + "cs-CZ": "countryCZ", + "da-DK": "countryDK", + "de-AT": "countryAT", + "de-BE": "countryBE", + "de-CH": "countryCH", + "de-DE": "countryDE", + "de-LI": "countryLI", + "de-LU": "countryLU", + "el-CY": "countryCY", + "el-GR": "countryGR", + "en-AG": "countryAG", + "en-AI": "countryAI", + "en-AS": "countryAS", + "en-AU": "countryAU", + "en-BI": "countryBI", + "en-BS": "countryBS", + "en-BW": "countryBW", + "en-BZ": "countryBZ", + "en-CA": "countryCA", + "en-CK": "countryCK", + "en-CM": "countryCM", + "en-DM": "countryDM", + "en-FJ": "countryFJ", + "en-FM": "countryFM", + "en-GB": "countryGB", + "en-GG": "countryGG", + "en-GH": "countryGH", + "en-GI": "countryGI", + "en-GM": "countryGM", + "en-GY": "countryGY", + "en-HK": "countryHK", + "en-IE": "countryIE", + "en-IM": "countryIM", + "en-IN": "countryIN", + "en-JE": "countryJE", + "en-JM": "countryJM", + "en-KE": "countryKE", + "en-KI": "countryKI", + "en-LS": "countryLS", + "en-MG": "countryMG", + "en-MS": "countryMS", + "en-MT": "countryMT", + "en-MU": "countryMU", + "en-MW": "countryMW", + "en-NA": "countryNA", + "en-NG": "countryNG", + "en-NR": "countryNR", + "en-NU": "countryNU", + "en-NZ": "countryNZ", + "en-PG": "countryPG", + "en-PH": "countryPH", + "en-PK": "countryPK", + "en-PN": "countryPN", + "en-PR": "countryPR", + "en-RW": "countryRW", + "en-SB": "countrySB", + "en-SC": "countrySC", + "en-SG": "countrySG", + "en-SH": "countrySH", + "en-SL": "countrySL", + "en-TO": "countryTO", + "en-TT": "countryTT", + "en-TZ": "countryTZ", + "en-UG": "countryUG", + "en-US": "countryUS", + "en-VC": "countryVC", + "en-VG": "countryVG", + "en-VI": "countryVI", + "en-VU": "countryVU", + "en-WS": "countryWS", + "en-ZA": "countryZA", + "en-ZM": "countryZM", + "en-ZW": "countryZW", + "es-AR": "countryAR", + "es-BO": "countryBO", + "es-CL": "countryCL", + "es-CO": "countryCO", + "es-CR": "countryCR", + "es-CU": "countryCU", + "es-DO": "countryDO", + "es-EC": "countryEC", + "es-ES": "countryES", + "es-GT": "countryGT", + "es-HN": "countryHN", + "es-MX": "countryMX", + "es-NI": "countryNI", + "es-PA": "countryPA", + "es-PE": "countryPE", + "es-PR": "countryPR", + "es-PY": "countryPY", + "es-SV": "countrySV", + "es-US": "countryUS", + "es-UY": "countryUY", + "es-VE": "countryVE", + "et-EE": "countryEE", + "fa-AF": "countryAF", + "fi-FI": "countryFI", + "fil-PH": "countryPH", + "fr-BE": "countryBE", + "fr-BF": "countryBF", + "fr-BI": "countryBI", + "fr-BJ": "countryBJ", + "fr-CA": "countryCA", + "fr-CD": "countryCD", + "fr-CF": "countryCF", + "fr-CG": "countryCG", + "fr-CH": "countryCH", + "fr-CI": "countryCI", + "fr-CM": "countryCM", + "fr-DJ": "countryDJ", + "fr-DZ": "countryDZ", + "fr-FR": "countryFR", + "fr-GA": "countryGA", + "fr-HT": "countryHT", + "fr-LU": "countryLU", + "fr-MA": "countryMA", + "fr-MG": "countryMG", + "fr-ML": "countryML", + "fr-MU": "countryMU", + "fr-NE": "countryNE", + "fr-RW": "countryRW", + "fr-SC": "countrySC", + "fr-SN": "countrySN", + "fr-TD": "countryTD", + "fr-TG": "countryTG", + "fr-TN": "countryTN", + "fr-VU": "countryVU", + "he-IL": "countryIL", + "hi-IN": "countryIN", + "hr-BA": "countryBA", + "hr-HR": "countryHR", + "hu-HU": "countryHU", + "hy-AM": "countryAM", + "id-ID": "countryID", + "is-IS": "countryIS", + "it-CH": "countryCH", + "it-IT": "countryIT", + "it-SM": "countrySM", + "ja-JP": "countryJP", + "ko-KR": "countryKR", + "lt-LT": "countryLT", + "lv-LV": "countryLV", + "nb-NO": "countryNO", + "nl-BE": "countryBE", + "nl-NL": "countryNL", + "nl-SR": "countrySR", + "pl-PL": "countryPL", + "pt-AO": "countryAO", + "pt-BR": "countryBR", + "pt-CV": "countryCV", + "pt-MZ": "countryMZ", + "pt-PT": "countryPT", + "pt-ST": "countryST", + "pt-TL": "countryTL", + "ro-MD": "countryMD", + "ro-RO": "countryRO", + "ru-BY": "countryBY", + "ru-KG": "countryKG", + "ru-KZ": "countryKZ", + "ru-RU": "countryRU", + "ru-UA": "countryUA", + "sk-SK": "countrySK", + "sl-SI": "countrySI", + "sr-BA": "countryBA", + "sr-RS": "countryRS", + "sv-FI": "countryFI", + "sv-SE": "countrySE", + "sw-CD": "countryCD", + "sw-KE": "countryKE", + "sw-TZ": "countryTZ", + "sw-UG": "countryUG", + "th-TH": "countryTH", + "tr-CY": "countryCY", + "tr-TR": "countryTR", + "uk-UA": "countryUA", + "vi-VN": "countryVN", + "zh-HK": "countryHK", + "zh-SG": "countrySG", + "zh-TW": "countryTW" + }, "supported_languages": { "af": { "name": "Afrikaans" @@ -2673,11 +2925,263 @@ } }, "google images": { - "all_locale": null, + "all_locale": "ZZ", "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "countryZA", + "ar-AE": "countryAE", + "ar-BH": "countryBH", + "ar-DJ": "countryDJ", + "ar-DZ": "countryDZ", + "ar-EG": "countryEG", + "ar-IL": "countryIL", + "ar-IQ": "countryIQ", + "ar-JO": "countryJO", + "ar-KW": "countryKW", + "ar-LB": "countryLB", + "ar-LY": "countryLY", + "ar-MA": "countryMA", + "ar-OM": "countryOM", + "ar-PS": "countryPS", + "ar-QA": "countryQA", + "ar-SA": "countrySA", + "ar-SO": "countrySO", + "ar-TD": "countryTD", + "ar-TN": "countryTN", + "be-BY": "countryBY", + "bg-BG": "countryBG", + "ca-AD": "countryAD", + "ca-ES": "countryES", + "cs-CZ": "countryCZ", + "da-DK": "countryDK", + "de-AT": "countryAT", + "de-BE": "countryBE", + "de-CH": "countryCH", + "de-DE": "countryDE", + "de-LI": "countryLI", + "de-LU": "countryLU", + "el-CY": "countryCY", + "el-GR": "countryGR", + "en-AG": "countryAG", + "en-AI": "countryAI", + "en-AS": "countryAS", + "en-AU": "countryAU", + "en-BI": "countryBI", + "en-BS": "countryBS", + "en-BW": "countryBW", + "en-BZ": "countryBZ", + "en-CA": "countryCA", + "en-CK": "countryCK", + "en-CM": "countryCM", + "en-DM": "countryDM", + "en-FJ": "countryFJ", + "en-FM": "countryFM", + "en-GB": "countryGB", + "en-GG": "countryGG", + "en-GH": "countryGH", + "en-GI": "countryGI", + "en-GM": "countryGM", + "en-GY": "countryGY", + "en-HK": "countryHK", + "en-IE": "countryIE", + "en-IM": "countryIM", + "en-IN": "countryIN", + "en-JE": "countryJE", + "en-JM": "countryJM", + "en-KE": "countryKE", + "en-KI": "countryKI", + "en-LS": "countryLS", + "en-MG": "countryMG", + "en-MS": "countryMS", + "en-MT": "countryMT", + "en-MU": "countryMU", + "en-MW": "countryMW", + "en-NA": "countryNA", + "en-NG": "countryNG", + "en-NR": "countryNR", + "en-NU": "countryNU", + "en-NZ": "countryNZ", + "en-PG": "countryPG", + "en-PH": "countryPH", + "en-PK": "countryPK", + "en-PN": "countryPN", + "en-PR": "countryPR", + "en-RW": "countryRW", + "en-SB": "countrySB", + "en-SC": "countrySC", + "en-SG": "countrySG", + "en-SH": "countrySH", + "en-SL": "countrySL", + "en-TO": "countryTO", + "en-TT": "countryTT", + "en-TZ": "countryTZ", + "en-UG": "countryUG", + "en-US": "countryUS", + "en-VC": "countryVC", + "en-VG": "countryVG", + "en-VI": "countryVI", + "en-VU": "countryVU", + "en-WS": "countryWS", + "en-ZA": "countryZA", + "en-ZM": "countryZM", + "en-ZW": "countryZW", + "es-AR": "countryAR", + "es-BO": "countryBO", + "es-CL": "countryCL", + "es-CO": "countryCO", + "es-CR": "countryCR", + "es-CU": "countryCU", + "es-DO": "countryDO", + "es-EC": "countryEC", + "es-ES": "countryES", + "es-GT": "countryGT", + "es-HN": "countryHN", + "es-MX": "countryMX", + "es-NI": "countryNI", + "es-PA": "countryPA", + "es-PE": "countryPE", + "es-PR": "countryPR", + "es-PY": "countryPY", + "es-SV": "countrySV", + "es-US": "countryUS", + "es-UY": "countryUY", + "es-VE": "countryVE", + "et-EE": "countryEE", + "fa-AF": "countryAF", + "fi-FI": "countryFI", + "fil-PH": "countryPH", + "fr-BE": "countryBE", + "fr-BF": "countryBF", + "fr-BI": "countryBI", + "fr-BJ": "countryBJ", + "fr-CA": "countryCA", + "fr-CD": "countryCD", + "fr-CF": "countryCF", + "fr-CG": "countryCG", + "fr-CH": "countryCH", + "fr-CI": "countryCI", + "fr-CM": "countryCM", + "fr-DJ": "countryDJ", + "fr-DZ": "countryDZ", + "fr-FR": "countryFR", + "fr-GA": "countryGA", + "fr-HT": "countryHT", + "fr-LU": "countryLU", + "fr-MA": "countryMA", + "fr-MG": "countryMG", + "fr-ML": "countryML", + "fr-MU": "countryMU", + "fr-NE": "countryNE", + "fr-RW": "countryRW", + "fr-SC": "countrySC", + "fr-SN": "countrySN", + "fr-TD": "countryTD", + "fr-TG": "countryTG", + "fr-TN": "countryTN", + "fr-VU": "countryVU", + "he-IL": "countryIL", + "hi-IN": "countryIN", + "hr-BA": "countryBA", + "hr-HR": "countryHR", + "hu-HU": "countryHU", + "hy-AM": "countryAM", + "id-ID": "countryID", + "is-IS": "countryIS", + "it-CH": "countryCH", + "it-IT": "countryIT", + "it-SM": "countrySM", + "ja-JP": "countryJP", + "ko-KR": "countryKR", + "lt-LT": "countryLT", + "lv-LV": "countryLV", + "nb-NO": "countryNO", + "nl-BE": "countryBE", + "nl-NL": "countryNL", + "nl-SR": "countrySR", + "pl-PL": "countryPL", + "pt-AO": "countryAO", + "pt-BR": "countryBR", + "pt-CV": "countryCV", + "pt-MZ": "countryMZ", + "pt-PT": "countryPT", + "pt-ST": "countryST", + "pt-TL": "countryTL", + "ro-MD": "countryMD", + "ro-RO": "countryRO", + "ru-BY": "countryBY", + "ru-KG": "countryKG", + "ru-KZ": "countryKZ", + "ru-RU": "countryRU", + "ru-UA": "countryUA", + "sk-SK": "countrySK", + "sl-SI": "countrySI", + "sr-BA": "countryBA", + "sr-RS": "countryRS", + "sv-FI": "countryFI", + "sv-SE": "countrySE", + "sw-CD": "countryCD", + "sw-KE": "countryKE", + "sw-TZ": "countryTZ", + "sw-UG": "countryUG", + "th-TH": "countryTH", + "tr-CY": "countryCY", + "tr-TR": "countryTR", + "uk-UA": "countryUA", + "vi-VN": "countryVN", + "zh-HK": "countryHK", + "zh-SG": "countrySG", + "zh-TW": "countryTW" + }, "supported_languages": { "af": { "name": "Afrikaans" @@ -2820,11 +3324,263 @@ } }, "google news": { - "all_locale": null, + "all_locale": "ZZ", "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "countryZA", + "ar-AE": "countryAE", + "ar-BH": "countryBH", + "ar-DJ": "countryDJ", + "ar-DZ": "countryDZ", + "ar-EG": "countryEG", + "ar-IL": "countryIL", + "ar-IQ": "countryIQ", + "ar-JO": "countryJO", + "ar-KW": "countryKW", + "ar-LB": "countryLB", + "ar-LY": "countryLY", + "ar-MA": "countryMA", + "ar-OM": "countryOM", + "ar-PS": "countryPS", + "ar-QA": "countryQA", + "ar-SA": "countrySA", + "ar-SO": "countrySO", + "ar-TD": "countryTD", + "ar-TN": "countryTN", + "be-BY": "countryBY", + "bg-BG": "countryBG", + "ca-AD": "countryAD", + "ca-ES": "countryES", + "cs-CZ": "countryCZ", + "da-DK": "countryDK", + "de-AT": "countryAT", + "de-BE": "countryBE", + "de-CH": "countryCH", + "de-DE": "countryDE", + "de-LI": "countryLI", + "de-LU": "countryLU", + "el-CY": "countryCY", + "el-GR": "countryGR", + "en-AG": "countryAG", + "en-AI": "countryAI", + "en-AS": "countryAS", + "en-AU": "countryAU", + "en-BI": "countryBI", + "en-BS": "countryBS", + "en-BW": "countryBW", + "en-BZ": "countryBZ", + "en-CA": "countryCA", + "en-CK": "countryCK", + "en-CM": "countryCM", + "en-DM": "countryDM", + "en-FJ": "countryFJ", + "en-FM": "countryFM", + "en-GB": "countryGB", + "en-GG": "countryGG", + "en-GH": "countryGH", + "en-GI": "countryGI", + "en-GM": "countryGM", + "en-GY": "countryGY", + "en-HK": "countryHK", + "en-IE": "countryIE", + "en-IM": "countryIM", + "en-IN": "countryIN", + "en-JE": "countryJE", + "en-JM": "countryJM", + "en-KE": "countryKE", + "en-KI": "countryKI", + "en-LS": "countryLS", + "en-MG": "countryMG", + "en-MS": "countryMS", + "en-MT": "countryMT", + "en-MU": "countryMU", + "en-MW": "countryMW", + "en-NA": "countryNA", + "en-NG": "countryNG", + "en-NR": "countryNR", + "en-NU": "countryNU", + "en-NZ": "countryNZ", + "en-PG": "countryPG", + "en-PH": "countryPH", + "en-PK": "countryPK", + "en-PN": "countryPN", + "en-PR": "countryPR", + "en-RW": "countryRW", + "en-SB": "countrySB", + "en-SC": "countrySC", + "en-SG": "countrySG", + "en-SH": "countrySH", + "en-SL": "countrySL", + "en-TO": "countryTO", + "en-TT": "countryTT", + "en-TZ": "countryTZ", + "en-UG": "countryUG", + "en-US": "countryUS", + "en-VC": "countryVC", + "en-VG": "countryVG", + "en-VI": "countryVI", + "en-VU": "countryVU", + "en-WS": "countryWS", + "en-ZA": "countryZA", + "en-ZM": "countryZM", + "en-ZW": "countryZW", + "es-AR": "countryAR", + "es-BO": "countryBO", + "es-CL": "countryCL", + "es-CO": "countryCO", + "es-CR": "countryCR", + "es-CU": "countryCU", + "es-DO": "countryDO", + "es-EC": "countryEC", + "es-ES": "countryES", + "es-GT": "countryGT", + "es-HN": "countryHN", + "es-MX": "countryMX", + "es-NI": "countryNI", + "es-PA": "countryPA", + "es-PE": "countryPE", + "es-PR": "countryPR", + "es-PY": "countryPY", + "es-SV": "countrySV", + "es-US": "countryUS", + "es-UY": "countryUY", + "es-VE": "countryVE", + "et-EE": "countryEE", + "fa-AF": "countryAF", + "fi-FI": "countryFI", + "fil-PH": "countryPH", + "fr-BE": "countryBE", + "fr-BF": "countryBF", + "fr-BI": "countryBI", + "fr-BJ": "countryBJ", + "fr-CA": "countryCA", + "fr-CD": "countryCD", + "fr-CF": "countryCF", + "fr-CG": "countryCG", + "fr-CH": "countryCH", + "fr-CI": "countryCI", + "fr-CM": "countryCM", + "fr-DJ": "countryDJ", + "fr-DZ": "countryDZ", + "fr-FR": "countryFR", + "fr-GA": "countryGA", + "fr-HT": "countryHT", + "fr-LU": "countryLU", + "fr-MA": "countryMA", + "fr-MG": "countryMG", + "fr-ML": "countryML", + "fr-MU": "countryMU", + "fr-NE": "countryNE", + "fr-RW": "countryRW", + "fr-SC": "countrySC", + "fr-SN": "countrySN", + "fr-TD": "countryTD", + "fr-TG": "countryTG", + "fr-TN": "countryTN", + "fr-VU": "countryVU", + "he-IL": "countryIL", + "hi-IN": "countryIN", + "hr-BA": "countryBA", + "hr-HR": "countryHR", + "hu-HU": "countryHU", + "hy-AM": "countryAM", + "id-ID": "countryID", + "is-IS": "countryIS", + "it-CH": "countryCH", + "it-IT": "countryIT", + "it-SM": "countrySM", + "ja-JP": "countryJP", + "ko-KR": "countryKR", + "lt-LT": "countryLT", + "lv-LV": "countryLV", + "nb-NO": "countryNO", + "nl-BE": "countryBE", + "nl-NL": "countryNL", + "nl-SR": "countrySR", + "pl-PL": "countryPL", + "pt-AO": "countryAO", + "pt-BR": "countryBR", + "pt-CV": "countryCV", + "pt-MZ": "countryMZ", + "pt-PT": "countryPT", + "pt-ST": "countryST", + "pt-TL": "countryTL", + "ro-MD": "countryMD", + "ro-RO": "countryRO", + "ru-BY": "countryBY", + "ru-KG": "countryKG", + "ru-KZ": "countryKZ", + "ru-RU": "countryRU", + "ru-UA": "countryUA", + "sk-SK": "countrySK", + "sl-SI": "countrySI", + "sr-BA": "countryBA", + "sr-RS": "countryRS", + "sv-FI": "countryFI", + "sv-SE": "countrySE", + "sw-CD": "countryCD", + "sw-KE": "countryKE", + "sw-TZ": "countryTZ", + "sw-UG": "countryUG", + "th-TH": "countryTH", + "tr-CY": "countryCY", + "tr-TR": "countryTR", + "uk-UA": "countryUA", + "vi-VN": "countryVN", + "zh-HK": "countryHK", + "zh-SG": "countrySG", + "zh-TW": "countryTW" + }, "supported_languages": { "af": { "name": "Afrikaans" @@ -2967,11 +3723,263 @@ } }, "google scholar": { - "all_locale": null, + "all_locale": "ZZ", "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "countryZA", + "ar-AE": "countryAE", + "ar-BH": "countryBH", + "ar-DJ": "countryDJ", + "ar-DZ": "countryDZ", + "ar-EG": "countryEG", + "ar-IL": "countryIL", + "ar-IQ": "countryIQ", + "ar-JO": "countryJO", + "ar-KW": "countryKW", + "ar-LB": "countryLB", + "ar-LY": "countryLY", + "ar-MA": "countryMA", + "ar-OM": "countryOM", + "ar-PS": "countryPS", + "ar-QA": "countryQA", + "ar-SA": "countrySA", + "ar-SO": "countrySO", + "ar-TD": "countryTD", + "ar-TN": "countryTN", + "be-BY": "countryBY", + "bg-BG": "countryBG", + "ca-AD": "countryAD", + "ca-ES": "countryES", + "cs-CZ": "countryCZ", + "da-DK": "countryDK", + "de-AT": "countryAT", + "de-BE": "countryBE", + "de-CH": "countryCH", + "de-DE": "countryDE", + "de-LI": "countryLI", + "de-LU": "countryLU", + "el-CY": "countryCY", + "el-GR": "countryGR", + "en-AG": "countryAG", + "en-AI": "countryAI", + "en-AS": "countryAS", + "en-AU": "countryAU", + "en-BI": "countryBI", + "en-BS": "countryBS", + "en-BW": "countryBW", + "en-BZ": "countryBZ", + "en-CA": "countryCA", + "en-CK": "countryCK", + "en-CM": "countryCM", + "en-DM": "countryDM", + "en-FJ": "countryFJ", + "en-FM": "countryFM", + "en-GB": "countryGB", + "en-GG": "countryGG", + "en-GH": "countryGH", + "en-GI": "countryGI", + "en-GM": "countryGM", + "en-GY": "countryGY", + "en-HK": "countryHK", + "en-IE": "countryIE", + "en-IM": "countryIM", + "en-IN": "countryIN", + "en-JE": "countryJE", + "en-JM": "countryJM", + "en-KE": "countryKE", + "en-KI": "countryKI", + "en-LS": "countryLS", + "en-MG": "countryMG", + "en-MS": "countryMS", + "en-MT": "countryMT", + "en-MU": "countryMU", + "en-MW": "countryMW", + "en-NA": "countryNA", + "en-NG": "countryNG", + "en-NR": "countryNR", + "en-NU": "countryNU", + "en-NZ": "countryNZ", + "en-PG": "countryPG", + "en-PH": "countryPH", + "en-PK": "countryPK", + "en-PN": "countryPN", + "en-PR": "countryPR", + "en-RW": "countryRW", + "en-SB": "countrySB", + "en-SC": "countrySC", + "en-SG": "countrySG", + "en-SH": "countrySH", + "en-SL": "countrySL", + "en-TO": "countryTO", + "en-TT": "countryTT", + "en-TZ": "countryTZ", + "en-UG": "countryUG", + "en-US": "countryUS", + "en-VC": "countryVC", + "en-VG": "countryVG", + "en-VI": "countryVI", + "en-VU": "countryVU", + "en-WS": "countryWS", + "en-ZA": "countryZA", + "en-ZM": "countryZM", + "en-ZW": "countryZW", + "es-AR": "countryAR", + "es-BO": "countryBO", + "es-CL": "countryCL", + "es-CO": "countryCO", + "es-CR": "countryCR", + "es-CU": "countryCU", + "es-DO": "countryDO", + "es-EC": "countryEC", + "es-ES": "countryES", + "es-GT": "countryGT", + "es-HN": "countryHN", + "es-MX": "countryMX", + "es-NI": "countryNI", + "es-PA": "countryPA", + "es-PE": "countryPE", + "es-PR": "countryPR", + "es-PY": "countryPY", + "es-SV": "countrySV", + "es-US": "countryUS", + "es-UY": "countryUY", + "es-VE": "countryVE", + "et-EE": "countryEE", + "fa-AF": "countryAF", + "fi-FI": "countryFI", + "fil-PH": "countryPH", + "fr-BE": "countryBE", + "fr-BF": "countryBF", + "fr-BI": "countryBI", + "fr-BJ": "countryBJ", + "fr-CA": "countryCA", + "fr-CD": "countryCD", + "fr-CF": "countryCF", + "fr-CG": "countryCG", + "fr-CH": "countryCH", + "fr-CI": "countryCI", + "fr-CM": "countryCM", + "fr-DJ": "countryDJ", + "fr-DZ": "countryDZ", + "fr-FR": "countryFR", + "fr-GA": "countryGA", + "fr-HT": "countryHT", + "fr-LU": "countryLU", + "fr-MA": "countryMA", + "fr-MG": "countryMG", + "fr-ML": "countryML", + "fr-MU": "countryMU", + "fr-NE": "countryNE", + "fr-RW": "countryRW", + "fr-SC": "countrySC", + "fr-SN": "countrySN", + "fr-TD": "countryTD", + "fr-TG": "countryTG", + "fr-TN": "countryTN", + "fr-VU": "countryVU", + "he-IL": "countryIL", + "hi-IN": "countryIN", + "hr-BA": "countryBA", + "hr-HR": "countryHR", + "hu-HU": "countryHU", + "hy-AM": "countryAM", + "id-ID": "countryID", + "is-IS": "countryIS", + "it-CH": "countryCH", + "it-IT": "countryIT", + "it-SM": "countrySM", + "ja-JP": "countryJP", + "ko-KR": "countryKR", + "lt-LT": "countryLT", + "lv-LV": "countryLV", + "nb-NO": "countryNO", + "nl-BE": "countryBE", + "nl-NL": "countryNL", + "nl-SR": "countrySR", + "pl-PL": "countryPL", + "pt-AO": "countryAO", + "pt-BR": "countryBR", + "pt-CV": "countryCV", + "pt-MZ": "countryMZ", + "pt-PT": "countryPT", + "pt-ST": "countryST", + "pt-TL": "countryTL", + "ro-MD": "countryMD", + "ro-RO": "countryRO", + "ru-BY": "countryBY", + "ru-KG": "countryKG", + "ru-KZ": "countryKZ", + "ru-RU": "countryRU", + "ru-UA": "countryUA", + "sk-SK": "countrySK", + "sl-SI": "countrySI", + "sr-BA": "countryBA", + "sr-RS": "countryRS", + "sv-FI": "countryFI", + "sv-SE": "countrySE", + "sw-CD": "countryCD", + "sw-KE": "countryKE", + "sw-TZ": "countryTZ", + "sw-UG": "countryUG", + "th-TH": "countryTH", + "tr-CY": "countryCY", + "tr-TR": "countryTR", + "uk-UA": "countryUA", + "vi-VN": "countryVN", + "zh-HK": "countryHK", + "zh-SG": "countrySG", + "zh-TW": "countryTW" + }, "supported_languages": { "af": { "name": "Afrikaans" @@ -3114,11 +4122,263 @@ } }, "google videos": { - "all_locale": null, + "all_locale": "ZZ", "custom": {}, "data_type": "supported_languages", - "languages": {}, - "regions": {}, + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "countryZA", + "ar-AE": "countryAE", + "ar-BH": "countryBH", + "ar-DJ": "countryDJ", + "ar-DZ": "countryDZ", + "ar-EG": "countryEG", + "ar-IL": "countryIL", + "ar-IQ": "countryIQ", + "ar-JO": "countryJO", + "ar-KW": "countryKW", + "ar-LB": "countryLB", + "ar-LY": "countryLY", + "ar-MA": "countryMA", + "ar-OM": "countryOM", + "ar-PS": "countryPS", + "ar-QA": "countryQA", + "ar-SA": "countrySA", + "ar-SO": "countrySO", + "ar-TD": "countryTD", + "ar-TN": "countryTN", + "be-BY": "countryBY", + "bg-BG": "countryBG", + "ca-AD": "countryAD", + "ca-ES": "countryES", + "cs-CZ": "countryCZ", + "da-DK": "countryDK", + "de-AT": "countryAT", + "de-BE": "countryBE", + "de-CH": "countryCH", + "de-DE": "countryDE", + "de-LI": "countryLI", + "de-LU": "countryLU", + "el-CY": "countryCY", + "el-GR": "countryGR", + "en-AG": "countryAG", + "en-AI": "countryAI", + "en-AS": "countryAS", + "en-AU": "countryAU", + "en-BI": "countryBI", + "en-BS": "countryBS", + "en-BW": "countryBW", + "en-BZ": "countryBZ", + "en-CA": "countryCA", + "en-CK": "countryCK", + "en-CM": "countryCM", + "en-DM": "countryDM", + "en-FJ": "countryFJ", + "en-FM": "countryFM", + "en-GB": "countryGB", + "en-GG": "countryGG", + "en-GH": "countryGH", + "en-GI": "countryGI", + "en-GM": "countryGM", + "en-GY": "countryGY", + "en-HK": "countryHK", + "en-IE": "countryIE", + "en-IM": "countryIM", + "en-IN": "countryIN", + "en-JE": "countryJE", + "en-JM": "countryJM", + "en-KE": "countryKE", + "en-KI": "countryKI", + "en-LS": "countryLS", + "en-MG": "countryMG", + "en-MS": "countryMS", + "en-MT": "countryMT", + "en-MU": "countryMU", + "en-MW": "countryMW", + "en-NA": "countryNA", + "en-NG": "countryNG", + "en-NR": "countryNR", + "en-NU": "countryNU", + "en-NZ": "countryNZ", + "en-PG": "countryPG", + "en-PH": "countryPH", + "en-PK": "countryPK", + "en-PN": "countryPN", + "en-PR": "countryPR", + "en-RW": "countryRW", + "en-SB": "countrySB", + "en-SC": "countrySC", + "en-SG": "countrySG", + "en-SH": "countrySH", + "en-SL": "countrySL", + "en-TO": "countryTO", + "en-TT": "countryTT", + "en-TZ": "countryTZ", + "en-UG": "countryUG", + "en-US": "countryUS", + "en-VC": "countryVC", + "en-VG": "countryVG", + "en-VI": "countryVI", + "en-VU": "countryVU", + "en-WS": "countryWS", + "en-ZA": "countryZA", + "en-ZM": "countryZM", + "en-ZW": "countryZW", + "es-AR": "countryAR", + "es-BO": "countryBO", + "es-CL": "countryCL", + "es-CO": "countryCO", + "es-CR": "countryCR", + "es-CU": "countryCU", + "es-DO": "countryDO", + "es-EC": "countryEC", + "es-ES": "countryES", + "es-GT": "countryGT", + "es-HN": "countryHN", + "es-MX": "countryMX", + "es-NI": "countryNI", + "es-PA": "countryPA", + "es-PE": "countryPE", + "es-PR": "countryPR", + "es-PY": "countryPY", + "es-SV": "countrySV", + "es-US": "countryUS", + "es-UY": "countryUY", + "es-VE": "countryVE", + "et-EE": "countryEE", + "fa-AF": "countryAF", + "fi-FI": "countryFI", + "fil-PH": "countryPH", + "fr-BE": "countryBE", + "fr-BF": "countryBF", + "fr-BI": "countryBI", + "fr-BJ": "countryBJ", + "fr-CA": "countryCA", + "fr-CD": "countryCD", + "fr-CF": "countryCF", + "fr-CG": "countryCG", + "fr-CH": "countryCH", + "fr-CI": "countryCI", + "fr-CM": "countryCM", + "fr-DJ": "countryDJ", + "fr-DZ": "countryDZ", + "fr-FR": "countryFR", + "fr-GA": "countryGA", + "fr-HT": "countryHT", + "fr-LU": "countryLU", + "fr-MA": "countryMA", + "fr-MG": "countryMG", + "fr-ML": "countryML", + "fr-MU": "countryMU", + "fr-NE": "countryNE", + "fr-RW": "countryRW", + "fr-SC": "countrySC", + "fr-SN": "countrySN", + "fr-TD": "countryTD", + "fr-TG": "countryTG", + "fr-TN": "countryTN", + "fr-VU": "countryVU", + "he-IL": "countryIL", + "hi-IN": "countryIN", + "hr-BA": "countryBA", + "hr-HR": "countryHR", + "hu-HU": "countryHU", + "hy-AM": "countryAM", + "id-ID": "countryID", + "is-IS": "countryIS", + "it-CH": "countryCH", + "it-IT": "countryIT", + "it-SM": "countrySM", + "ja-JP": "countryJP", + "ko-KR": "countryKR", + "lt-LT": "countryLT", + "lv-LV": "countryLV", + "nb-NO": "countryNO", + "nl-BE": "countryBE", + "nl-NL": "countryNL", + "nl-SR": "countrySR", + "pl-PL": "countryPL", + "pt-AO": "countryAO", + "pt-BR": "countryBR", + "pt-CV": "countryCV", + "pt-MZ": "countryMZ", + "pt-PT": "countryPT", + "pt-ST": "countryST", + "pt-TL": "countryTL", + "ro-MD": "countryMD", + "ro-RO": "countryRO", + "ru-BY": "countryBY", + "ru-KG": "countryKG", + "ru-KZ": "countryKZ", + "ru-RU": "countryRU", + "ru-UA": "countryUA", + "sk-SK": "countrySK", + "sl-SI": "countrySI", + "sr-BA": "countryBA", + "sr-RS": "countryRS", + "sv-FI": "countryFI", + "sv-SE": "countrySE", + "sw-CD": "countryCD", + "sw-KE": "countryKE", + "sw-TZ": "countryTZ", + "sw-UG": "countryUG", + "th-TH": "countryTH", + "tr-CY": "countryCY", + "tr-TR": "countryTR", + "uk-UA": "countryUA", + "vi-VN": "countryVN", + "zh-HK": "countryHK", + "zh-SG": "countrySG", + "zh-TW": "countryTW" + }, "supported_languages": { "af": { "name": "Afrikaans" diff --git a/searx/engines/google.py b/searx/engines/google.py index bdb351432..bee7085ec 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -29,6 +29,9 @@ from urllib.parse import urlencode from lxml import html from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex from searx.exceptions import SearxEngineCaptchaException +from searx.enginelib.traits import EngineTraits + +traits: EngineTraits # about about = { @@ -373,3 +376,87 @@ def _fetch_supported_languages(resp): ret_val[code] = {"name": name} return ret_val + + +skip_countries = [ + # official language of google-country not in google-languages + 'AL', # Albanien (sq) + 'AZ', # Aserbaidschan (az) + 'BD', # Bangladesch (bn) + 'BN', # Brunei Darussalam (ms) + 'BT', # Bhutan (dz) + 'ET', # Äthiopien (am) + 'GE', # Georgien (ka, os) + 'GL', # Grönland (kl) + 'KH', # Kambodscha (km) + 'LA', # Laos (lo) + 'LK', # Sri Lanka (si, ta) + 'ME', # Montenegro (sr) + 'MK', # Nordmazedonien (mk, sq) + 'MM', # Myanmar (my) + 'MN', # Mongolei (mn) + 'MV', # Malediven (dv) // dv_MV is unknown by babel + 'MY', # Malaysia (ms) + 'NP', # Nepal (ne) + 'TJ', # Tadschikistan (tg) + 'TM', # Turkmenistan (tk) + 'UZ', # Usbekistan (uz) +] + + +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages from Google.""" + # pylint: disable=import-outside-toplevel + + engine_traits.data_type = 'supported_languages' # deprecated + + import babel + import babel.languages + from searx import network + from searx.locales import language_tag, region_tag, get_offical_locales + + resp = network.get('https://www.google.com/preferences') + if not resp.ok: + print("ERROR: response from Google is not OK.") + + dom = html.fromstring(resp.text) + + lang_map = {'no': 'nb'} + + for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'): + + eng_lang = x.get("value").split('_')[-1] + try: + locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-') + except babel.UnknownLocaleError: + print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang)) + continue + sxng_lang = language_tag(locale) + + conflict = engine_traits.languages.get(sxng_lang) + if conflict: + if conflict != eng_lang: + print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang)) + continue + engine_traits.languages[sxng_lang] = 'lang_' + eng_lang + + # alias languages + engine_traits.languages['zh'] = 'lang_zh-CN' + + for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'): + eng_country = x.get("value") + + if eng_country in skip_countries: + continue + if eng_country == 'ZZ': + engine_traits.all_locale = 'ZZ' + continue + + sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True) + + if not sxng_locales: + print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country)) + continue + + for sxng_locale in sxng_locales: + engine_traits.regions[region_tag(sxng_locale)] = 'country' + eng_country diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 528f8d21d..219f2adee 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -23,7 +23,7 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import supported_languages_url, _fetch_supported_languages +from searx.engines.google import supported_languages_url, _fetch_supported_languages, fetch_traits # pylint: enable=unused-import diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 1ada2d64d..8962af36a 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -28,6 +28,7 @@ from searx.utils import ( # pylint: disable=unused-import from searx.engines.google import ( + fetch_traits, supported_languages_url, _fetch_supported_languages, ) diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index c07cd4cea..38aaf904b 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -31,6 +31,7 @@ from searx.engines.google import ( # pylint: disable=unused-import from searx.engines.google import ( + fetch_traits, supported_languages_url, _fetch_supported_languages, ) diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index fc574bd48..5ab29f9ff 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -38,7 +38,7 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import supported_languages_url, _fetch_supported_languages +from searx.engines.google import supported_languages_url, _fetch_supported_languages, fetch_traits # pylint: enable=unused-import