From 203079aecfa9c4f1a20b3bcb17257796c4b86343 Mon Sep 17 00:00:00 2001 From: Joseph Cheung Date: Fri, 24 Feb 2023 08:54:06 +0800 Subject: [PATCH] o --- searx/autocomplete.py | 132 +- searx/data/engine_traits.json | 3802 +++++++++++++++++++++++ searx/engines/google.py | 497 +-- searx/engines/google_images.py | 49 +- searx/engines/google_internal_search.py | 529 ---- searx/engines/google_news.py | 252 +- searx/engines/google_scholar.py | 116 +- searx/engines/google_videos.py | 115 +- 8 files changed, 4525 insertions(+), 967 deletions(-) create mode 100644 searx/data/engine_traits.json delete mode 100644 searx/engines/google_internal_search.py diff --git a/searx/autocomplete.py b/searx/autocomplete.py index aeb697a14..dda1208b4 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -5,20 +5,20 @@ """ # pylint: disable=use-dict-literal -from json import loads +import json from urllib.parse import urlencode -from lxml import etree +import lxml from httpx import HTTPError from searx import settings -from searx.data import ENGINES_LANGUAGES +from searx.engines import ( + engines, + google, +) from searx.network import get as http_get from searx.exceptions import SearxEngineResponseException -# a fetch_supported_languages() for XPath engines isn't available right now -# _brave = ENGINES_LANGUAGES['brave'].keys() - def get(*args, **kwargs): if 'timeout' not in kwargs: @@ -55,34 +55,58 @@ def dbpedia(query, _lang): results = [] if response.ok: - dom = etree.fromstring(response.content) + dom = lxml.etree.fromstring(response.content) results = dom.xpath('//Result/Label//text()') return results -def duckduckgo(query, _lang): - # duckduckgo autocompleter - url = 'https://ac.duckduckgo.com/ac/?{0}&type=list' +def duckduckgo(query, sxng_locale): + """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages""" - resp = loads(get(url.format(urlencode(dict(q=query)))).text) - if len(resp) > 1: - return resp[1] - return [] + traits = engines['duckduckgo'].traits + args = { + 'q': query, + 'kl': traits.get_region(sxng_locale, traits.all_locale), + } + + url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args) + resp = get(url) + + ret_val = [] + if resp.ok: + j = resp.json() + if len(j) > 1: + ret_val = j[1] + return ret_val -def google(query, lang): - # google autocompleter - autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' +def google_complete(query, sxng_locale): + """Autocomplete from Google. Supports Google's languages and subdomains + (:py:obj:`searx.engines.google.get_google_info`) by using the async REST + API:: - response = get(autocomplete_url + urlencode(dict(hl=lang, q=query))) + https://{subdomain}/complete/search?{args} + """ + + google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits) + + url = 'https://{subdomain}/complete/search?{args}' + args = urlencode( + { + 'q': query, + 'client': 'gws-wiz', + 'hl': google_info['params']['hl'], + } + ) results = [] - - if response.ok: - dom = etree.fromstring(response.text) - results = dom.xpath('//suggestion/@data') - + resp = get(url.format(subdomain=google_info['subdomain'], args=args)) + if resp.ok: + json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1] + data = json.loads(json_txt) + for item in data[0]: + results.append(lxml.html.fromstring(item[0]).text_content()) return results @@ -109,9 +133,9 @@ def seznam(query, _lang): ] -def startpage(query, lang): - # startpage autocompleter - lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english') +def startpage(query, sxng_locale): + """Autocomplete from Startpage. Supports Startpage's languages""" + lui = engines['startpage'].traits.get_language(sxng_locale, 'english') url = 'https://startpage.com/suggestions?{query}' resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui}))) data = resp.json() @@ -122,20 +146,20 @@ def swisscows(query, _lang): # swisscows autocompleter url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5' - resp = loads(get(url.format(query=urlencode({'query': query}))).text) + resp = json.loads(get(url.format(query=urlencode({'query': query}))).text) return resp -def qwant(query, lang): - # qwant autocompleter (additional parameter : lang=en_en&count=xxx ) - url = 'https://api.qwant.com/api/suggest?{query}' - - resp = get(url.format(query=urlencode({'q': query, 'lang': lang}))) - +def qwant(query, sxng_locale): + """Autocomplete from Qwant. Supports Qwant's regions.""" results = [] + locale = engines['startpage'].traits.get_region(sxng_locale, 'en_US') + url = 'https://api.qwant.com/v3/suggest?{query}' + resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'}))) + if resp.ok: - data = loads(resp.text) + data = resp.json() if data['status'] == 'success': for item in data['data']['items']: results.append(item['value']) @@ -143,21 +167,38 @@ def qwant(query, lang): return results -def wikipedia(query, lang): - # wikipedia autocompleter - url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json' +def wikipedia(query, sxng_locale): + """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc).""" + results = [] + eng_traits = engines['wikipedia'].traits + wiki_lang = eng_traits.get_language(sxng_locale, 'en') + wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') - resp = loads(get(url.format(urlencode(dict(search=query)))).text) - if len(resp) > 1: - return resp[1] - return [] + url = 'https://{wiki_netloc}/w/api.php?{args}' + args = urlencode( + { + 'action': 'opensearch', + 'format': 'json', + 'formatversion': '2', + 'search': query, + 'namespace': '0', + 'limit': '10', + } + ) + resp = get(url.format(args=args, wiki_netloc=wiki_netloc)) + if resp.ok: + data = resp.json() + if len(data) > 1: + results = data[1] + + return results def yandex(query, _lang): # yandex autocompleter url = "https://suggest.yandex.com/suggest-ff.cgi?{0}" - resp = loads(get(url.format(urlencode(dict(part=query)))).text) + resp = json.loads(get(url.format(urlencode(dict(part=query)))).text) if len(resp) > 1: return resp[1] return [] @@ -166,7 +207,7 @@ def yandex(query, _lang): backends = { 'dbpedia': dbpedia, 'duckduckgo': duckduckgo, - 'google': google, + 'google': google_complete, 'seznam': seznam, 'startpage': startpage, 'swisscows': swisscows, @@ -177,12 +218,11 @@ backends = { } -def search_autocomplete(backend_name, query, lang): +def search_autocomplete(backend_name, query, sxng_locale): backend = backends.get(backend_name) if backend is None: return [] - try: - return backend(query, lang) + return backend(query, sxng_locale) except (HTTPError, SearxEngineResponseException): return [] diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json new file mode 100644 index 000000000..69156cc27 --- /dev/null +++ b/searx/data/engine_traits.json @@ -0,0 +1,3802 @@ +{ + "arch linux wiki": { + "all_locale": null, + "custom": { + "title": { + "de": "Spezial:Suche", + "fa": "\u0648\u06cc\u0698\u0647:\u062c\u0633\u062a\u062c\u0648", + "ja": "\u7279\u5225:\u691c\u7d22", + "zh": "Special:\u641c\u7d22" + }, + "wiki_netloc": { + "de": "wiki.archlinux.de", + "fa": "wiki.archusers.ir", + "ja": "wiki.archlinux.jp", + "zh": "wiki.archlinuxcn.org" + } + }, + "data_type": "traits_v1", + "languages": { + "ar": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629", + "bg": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438", + "bs": "Bosanski", + "cs": "\u010ce\u0161tina", + "da": "Dansk", + "de": "Deutsch", + "el": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac", + "en": "English", + "es": "Espa\u00f1ol", + "fa": "\u0641\u0627\u0631\u0633\u06cc", + "fi": "Suomi", + "fr": "Fran\u00e7ais", + "he": "\u05e2\u05d1\u05e8\u05d9\u05ea", + "hr": "Hrvatski", + "hu": "Magyar", + "id": "Bahasa Indonesia", + "it": "Italiano", + "ja": "\u65e5\u672c\u8a9e", + "ko": "\ud55c\uad6d\uc5b4", + "lt": "Lietuvi\u0173", + "nl": "Nederlands", + "pl": "Polski", + "pt": "Portugu\u00eas", + "ru": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439", + "sk": "Sloven\u010dina", + "sr": "\u0421\u0440\u043f\u0441\u043a\u0438 / srpski", + "sv": "Svenska", + "th": "\u0e44\u0e17\u0e22", + "tr": "T\u00fcrk\u00e7e", + "uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430", + "zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09" + }, + "regions": {} + }, + "bing": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "bn": "bn", + "ca": "ca", + "cs": "cs", + "da": "da", + "de": "de", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gl": "gl", + "gu": "gu", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "is": "is", + "it": "it", + "ja": "jp", + "kn": "kn", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "ml": "ml", + "mr": "mr", + "ms": "ms", + "nb": "nb", + "nl": "nl", + "pa": "pa", + "pl": "pl", + "pt": "pt-pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sr": "sr", + "sv": "sv", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh": "zh-hans", + "zh_Hans": "zh-hans", + "zh_Hant": "zh-hant" + }, + "regions": { + "da-DK": "da-DK", + "de-AT": "de-AT", + "de-CH": "de-CH", + "de-DE": "de-DE", + "en-AU": "en-AU", + "en-CA": "en-CA", + "en-GB": "en-GB", + "en-IN": "en-IN", + "en-MY": "en-MY", + "en-NZ": "en-NZ", + "en-PH": "en-PH", + "en-US": "en-US", + "en-ZA": "en-ZA", + "es-AR": "es-AR", + "es-CL": "es-CL", + "es-ES": "es-ES", + "es-MX": "es-MX", + "es-US": "es-US", + "fi-FI": "fi-FI", + "fr-BE": "fr-BE", + "fr-CA": "fr-CA", + "fr-CH": "fr-CH", + "fr-FR": "fr-FR", + "id-ID": "en-ID", + "it-IT": "it-IT", + "ja-JP": "ja-JP", + "ko-KR": "ko-KR", + "nb-NO": "no-NO", + "nl-BE": "nl-BE", + "nl-NL": "nl-NL", + "pl-PL": "pl-PL", + "pt-BR": "pt-BR", + "ru-RU": "ru-RU", + "sv-SE": "sv-SE", + "tr-TR": "tr-TR", + "zh-CN": "zh-CN", + "zh-HK": "zh-HK", + "zh-TW": "zh-TW" + } + }, + "bing images": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "bn": "bn", + "ca": "ca", + "cs": "cs", + "da": "da", + "de": "de", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gl": "gl", + "gu": "gu", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "is": "is", + "it": "it", + "ja": "jp", + "kn": "kn", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "ml": "ml", + "mr": "mr", + "ms": "ms", + "nb": "nb", + "nl": "nl", + "pa": "pa", + "pl": "pl", + "pt": "pt-pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sr": "sr", + "sv": "sv", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh": "zh-hans", + "zh_Hans": "zh-hans", + "zh_Hant": "zh-hant" + }, + "regions": { + "da-DK": "da-DK", + "de-AT": "de-AT", + "de-CH": "de-CH", + "de-DE": "de-DE", + "en-AU": "en-AU", + "en-CA": "en-CA", + "en-GB": "en-GB", + "en-IN": "en-IN", + "en-MY": "en-MY", + "en-NZ": "en-NZ", + "en-PH": "en-PH", + "en-US": "en-US", + "en-ZA": "en-ZA", + "es-AR": "es-AR", + "es-CL": "es-CL", + "es-ES": "es-ES", + "es-MX": "es-MX", + "es-US": "es-US", + "fi-FI": "fi-FI", + "fr-BE": "fr-BE", + "fr-CA": "fr-CA", + "fr-CH": "fr-CH", + "fr-FR": "fr-FR", + "id-ID": "en-ID", + "it-IT": "it-IT", + "ja-JP": "ja-JP", + "ko-KR": "ko-KR", + "nb-NO": "no-NO", + "nl-BE": "nl-BE", + "nl-NL": "nl-NL", + "pl-PL": "pl-PL", + "pt-BR": "pt-BR", + "ru-RU": "ru-RU", + "sv-SE": "sv-SE", + "tr-TR": "tr-TR", + "zh-CN": "zh-CN", + "zh-HK": "zh-HK", + "zh-TW": "zh-TW" + } + }, + "bing news": { + "all_locale": "en-WW", + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "bn": "bn", + "ca": "ca", + "cs": "cs", + "da": "da", + "de": "de", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gl": "gl", + "gu": "gu", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "is": "is", + "it": "it", + "ja": "jp", + "kn": "kn", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "ml": "ml", + "mr": "mr", + "ms": "ms", + "nb": "nb", + "nl": "nl", + "pa": "pa", + "pl": "pl", + "pt": "pt-pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sr": "sr", + "sv": "sv", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh": "zh-hans", + "zh_Hans": "zh-hans", + "zh_Hant": "zh-hant" + }, + "regions": { + "da-DK": "da-DK", + "de-DE": "de-DE", + "en-AU": "en-AU", + "en-GB": "en-GB", + "en-US": "en-US", + "es-CL": "es-CL", + "es-MX": "es-MX", + "es-US": "es-US", + "fi-FI": "fi-FI", + "fr-CA": "fr-CA", + "fr-FR": "fr-FR", + "it-IT": "it-IT", + "pt-BR": "pt-BR", + "zh-CN": "zh-CN" + } + }, + "bing videos": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "bn": "bn", + "ca": "ca", + "cs": "cs", + "da": "da", + "de": "de", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gl": "gl", + "gu": "gu", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "is": "is", + "it": "it", + "ja": "jp", + "kn": "kn", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "ml": "ml", + "mr": "mr", + "ms": "ms", + "nb": "nb", + "nl": "nl", + "pa": "pa", + "pl": "pl", + "pt": "pt-pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sr": "sr", + "sv": "sv", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh": "zh-hans", + "zh_Hans": "zh-hans", + "zh_Hant": "zh-hant" + }, + "regions": { + "da-DK": "da-DK", + "de-AT": "de-AT", + "de-CH": "de-CH", + "de-DE": "de-DE", + "en-AU": "en-AU", + "en-CA": "en-CA", + "en-GB": "en-GB", + "en-IN": "en-IN", + "en-MY": "en-MY", + "en-NZ": "en-NZ", + "en-PH": "en-PH", + "en-US": "en-US", + "en-ZA": "en-ZA", + "es-AR": "es-AR", + "es-CL": "es-CL", + "es-ES": "es-ES", + "es-MX": "es-MX", + "es-US": "es-US", + "fi-FI": "fi-FI", + "fr-BE": "fr-BE", + "fr-CA": "fr-CA", + "fr-CH": "fr-CH", + "fr-FR": "fr-FR", + "id-ID": "en-ID", + "it-IT": "it-IT", + "ja-JP": "ja-JP", + "ko-KR": "ko-KR", + "nb-NO": "no-NO", + "nl-BE": "nl-BE", + "nl-NL": "nl-NL", + "pl-PL": "pl-PL", + "pt-BR": "pt-BR", + "ru-RU": "ru-RU", + "sv-SE": "sv-SE", + "tr-TR": "tr-TR", + "zh-CN": "zh-CN", + "zh-HK": "zh-HK", + "zh-TW": "zh-TW" + } + }, + "dailymotion": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "fr": "fr", + "id": "id", + "it": "it", + "ja": "ja", + "ko": "ko", + "ms": "ms", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ro": "ro", + "ru": "ru", + "th": "th", + "tr": "tr", + "vi": "vi", + "zh": "zh" + }, + "regions": { + "ar-AE": "ar_AE", + "ar-EG": "ar_EG", + "ar-SA": "ar_SA", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-HK": "en_HK", + "en-IE": "en_IE", + "en-IN": "en_IN", + "en-NG": "en_NG", + "en-PH": "en_PH", + "en-PK": "en_PK", + "en-SG": "en_SG", + "en-US": "en_US", + "en-ZA": "en_ZA", + "es-AR": "es_AR", + "es-ES": "es_ES", + "es-MX": "es_MX", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-CI": "fr_CI", + "fr-FR": "fr_FR", + "fr-MA": "fr_MA", + "fr-SN": "fr_SN", + "fr-TN": "fr_TN", + "id-ID": "id_ID", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ja-JP": "ja_JP", + "ko-KR": "ko_KR", + "ms-MY": "ms_MY", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-BR": "pt_BR", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "ru-RU": "ru_RU", + "th-TH": "th_TH", + "tr-TR": "tr_TR", + "vi-VN": "vi_VN", + "zh-CN": "zh_CN", + "zh-TW": "zh_TW" + } + }, + "duckduckgo": { + "all_locale": "wt-wt", + "custom": { + "lang_region": { + "ar-DZ": "ar_DZ", + "ar-JO": "ar_JO", + "ar-SA": "ar_SA", + "bn-IN": "bn_IN", + "de-CH": "de_CH", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-CR": "es_CR", + "es-EC": "es_EC", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "nl-BE": "nl_BE", + "pt-BR": "pt_BR" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af_ZA", + "ar": "ar_EG", + "ast": "ast_ES", + "az_Latn": "az_AZ", + "be": "be_BY", + "bg": "bg_BG", + "bn": "bn_BD", + "br": "br_FR", + "bs_Latn": "bs_BA", + "ca": "ca_ES", + "cs": "cs_CZ", + "cy": "cy_GB", + "da": "da_DK", + "de": "de_DE", + "el": "el_GR", + "en": "en_US", + "eo": "eo_XX", + "es": "es_ES", + "et": "et_EE", + "eu": "eu_ES", + "fa": "fa_IR", + "fi": "fi_FI", + "fil": "tl_PH", + "fr": "fr_FR", + "ga": "ga_IE", + "gd": "gd_GB", + "gl": "gl_ES", + "he": "he_IL", + "hi": "hi_IN", + "hr": "hr_HR", + "hu": "hu_HU", + "hy": "hy_AM", + "id": "id_ID", + "is": "is_IS", + "it": "it_IT", + "ja": "ja_JP", + "kab": "kab_DZ", + "kn": "kn_IN", + "ko": "ko_KR", + "ku": "ku", + "kw": "kw_GB", + "lt": "lt_LT", + "lv": "lv_LV", + "ml": "ml_IN", + "mr": "mr_IN", + "ms": "ms_MY", + "nb": "nb_NO", + "nl": "nl_NL", + "nn": "nn_NO", + "pl": "pl_PL", + "pt": "pt_PT", + "ro": "ro_RO", + "ru": "ru_RU", + "sc": "sc_IT", + "si": "si_LK", + "sk": "sk_SK", + "sl": "sl_SI", + "sq": "sq_AL", + "sr_Cyrl": "sr_RS", + "sv": "sv_SE", + "ta": "ta_IN", + "te": "te_IN", + "th": "th_TH", + "tr": "tr_TR", + "uk": "uk_UA", + "ur": "ur_PK", + "vi": "vi_VN", + "zh_Hans": "zh_CN", + "zh_Hant": "zh_TW" + }, + "regions": { + "ar-SA": "xa-ar", + "bg-BG": "bg-bg", + "ca-ES": "es-ca", + "cs-CZ": "cz-cs", + "da-DK": "dk-da", + "de-AT": "at-de", + "de-CH": "ch-de", + "de-DE": "de-de", + "el-GR": "gr-el", + "en-AU": "au-en", + "en-CA": "ca-en", + "en-GB": "uk-en", + "en-IE": "ie-en", + "en-IL": "il-en", + "en-IN": "in-en", + "en-MY": "my-en", + "en-NZ": "nz-en", + "en-PH": "ph-en", + "en-PK": "pk-en", + "en-SG": "sg-en", + "en-US": "us-en", + "en-ZA": "za-en", + "es-AR": "ar-es", + "es-CL": "cl-es", + "es-CO": "co-es", + "es-ES": "es-es", + "es-MX": "mx-es", + "es-PE": "pe-es", + "es-US": "us-es", + "et-EE": "ee-et", + "fi-FI": "fi-fi", + "fr-BE": "be-fr", + "fr-CA": "ca-fr", + "fr-CH": "ch-fr", + "fr-FR": "fr-fr", + "hr-HR": "hr-hr", + "hu-HU": "hu-hu", + "id-ID": "id-en", + "it-IT": "it-it", + "ja-JP": "jp-jp", + "ko-KR": "kr-kr", + "lt-LT": "lt-lt", + "lv-LV": "lv-lv", + "nb-NO": "no-no", + "nl-BE": "be-nl", + "nl-NL": "nl-nl", + "pl-PL": "pl-pl", + "pt-BR": "br-pt", + "pt-PT": "pt-pt", + "ro-RO": "ro-ro", + "ru-RU": "ru-ru", + "sk-SK": "sk-sk", + "sl-SI": "sl-sl", + "sv-SE": "se-sv", + "th-TH": "th-en", + "tr-TR": "tr-tr", + "uk-UA": "ua-uk", + "vi-VN": "vn-en", + "zh-CN": "cn-zh", + "zh-HK": "hk-tzh", + "zh-TW": "tw-tzh" + } + }, + "duckduckgo images": { + "all_locale": "wt-wt", + "custom": { + "lang_region": { + "ar-DZ": "ar_DZ", + "ar-JO": "ar_JO", + "ar-SA": "ar_SA", + "bn-IN": "bn_IN", + "de-CH": "de_CH", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-CR": "es_CR", + "es-EC": "es_EC", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "nl-BE": "nl_BE", + "pt-BR": "pt_BR" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af_ZA", + "ar": "ar_EG", + "ast": "ast_ES", + "az_Latn": "az_AZ", + "be": "be_BY", + "bg": "bg_BG", + "bn": "bn_BD", + "br": "br_FR", + "bs_Latn": "bs_BA", + "ca": "ca_ES", + "cs": "cs_CZ", + "cy": "cy_GB", + "da": "da_DK", + "de": "de_DE", + "el": "el_GR", + "en": "en_US", + "eo": "eo_XX", + "es": "es_ES", + "et": "et_EE", + "eu": "eu_ES", + "fa": "fa_IR", + "fi": "fi_FI", + "fil": "tl_PH", + "fr": "fr_FR", + "ga": "ga_IE", + "gd": "gd_GB", + "gl": "gl_ES", + "he": "he_IL", + "hi": "hi_IN", + "hr": "hr_HR", + "hu": "hu_HU", + "hy": "hy_AM", + "id": "id_ID", + "is": "is_IS", + "it": "it_IT", + "ja": "ja_JP", + "kab": "kab_DZ", + "kn": "kn_IN", + "ko": "ko_KR", + "ku": "ku", + "kw": "kw_GB", + "lt": "lt_LT", + "lv": "lv_LV", + "ml": "ml_IN", + "mr": "mr_IN", + "ms": "ms_MY", + "nb": "nb_NO", + "nl": "nl_NL", + "nn": "nn_NO", + "pl": "pl_PL", + "pt": "pt_PT", + "ro": "ro_RO", + "ru": "ru_RU", + "sc": "sc_IT", + "si": "si_LK", + "sk": "sk_SK", + "sl": "sl_SI", + "sq": "sq_AL", + "sr_Cyrl": "sr_RS", + "sv": "sv_SE", + "ta": "ta_IN", + "te": "te_IN", + "th": "th_TH", + "tr": "tr_TR", + "uk": "uk_UA", + "ur": "ur_PK", + "vi": "vi_VN", + "zh_Hans": "zh_CN", + "zh_Hant": "zh_TW" + }, + "regions": { + "ar-SA": "xa-ar", + "bg-BG": "bg-bg", + "ca-ES": "es-ca", + "cs-CZ": "cz-cs", + "da-DK": "dk-da", + "de-AT": "at-de", + "de-CH": "ch-de", + "de-DE": "de-de", + "el-GR": "gr-el", + "en-AU": "au-en", + "en-CA": "ca-en", + "en-GB": "uk-en", + "en-IE": "ie-en", + "en-IL": "il-en", + "en-IN": "in-en", + "en-MY": "my-en", + "en-NZ": "nz-en", + "en-PH": "ph-en", + "en-PK": "pk-en", + "en-SG": "sg-en", + "en-US": "us-en", + "en-ZA": "za-en", + "es-AR": "ar-es", + "es-CL": "cl-es", + "es-CO": "co-es", + "es-ES": "es-es", + "es-MX": "mx-es", + "es-PE": "pe-es", + "es-US": "us-es", + "et-EE": "ee-et", + "fi-FI": "fi-fi", + "fr-BE": "be-fr", + "fr-CA": "ca-fr", + "fr-CH": "ch-fr", + "fr-FR": "fr-fr", + "hr-HR": "hr-hr", + "hu-HU": "hu-hu", + "id-ID": "id-en", + "it-IT": "it-it", + "ja-JP": "jp-jp", + "ko-KR": "kr-kr", + "lt-LT": "lt-lt", + "lv-LV": "lv-lv", + "nb-NO": "no-no", + "nl-BE": "be-nl", + "nl-NL": "nl-nl", + "pl-PL": "pl-pl", + "pt-BR": "br-pt", + "pt-PT": "pt-pt", + "ro-RO": "ro-ro", + "ru-RU": "ru-ru", + "sk-SK": "sk-sk", + "sl-SI": "sl-sl", + "sv-SE": "se-sv", + "th-TH": "th-en", + "tr-TR": "tr-tr", + "uk-UA": "ua-uk", + "vi-VN": "vn-en", + "zh-CN": "cn-zh", + "zh-HK": "hk-tzh", + "zh-TW": "tw-tzh" + } + }, + "duckduckgo weather": { + "all_locale": "wt-wt", + "custom": { + "lang_region": { + "ar-DZ": "ar_DZ", + "ar-JO": "ar_JO", + "ar-SA": "ar_SA", + "bn-IN": "bn_IN", + "de-CH": "de_CH", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-CR": "es_CR", + "es-EC": "es_EC", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "nl-BE": "nl_BE", + "pt-BR": "pt_BR" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af_ZA", + "ar": "ar_EG", + "ast": "ast_ES", + "az_Latn": "az_AZ", + "be": "be_BY", + "bg": "bg_BG", + "bn": "bn_BD", + "br": "br_FR", + "bs_Latn": "bs_BA", + "ca": "ca_ES", + "cs": "cs_CZ", + "cy": "cy_GB", + "da": "da_DK", + "de": "de_DE", + "el": "el_GR", + "en": "en_US", + "eo": "eo_XX", + "es": "es_ES", + "et": "et_EE", + "eu": "eu_ES", + "fa": "fa_IR", + "fi": "fi_FI", + "fil": "tl_PH", + "fr": "fr_FR", + "ga": "ga_IE", + "gd": "gd_GB", + "gl": "gl_ES", + "he": "he_IL", + "hi": "hi_IN", + "hr": "hr_HR", + "hu": "hu_HU", + "hy": "hy_AM", + "id": "id_ID", + "is": "is_IS", + "it": "it_IT", + "ja": "ja_JP", + "kab": "kab_DZ", + "kn": "kn_IN", + "ko": "ko_KR", + "ku": "ku", + "kw": "kw_GB", + "lt": "lt_LT", + "lv": "lv_LV", + "ml": "ml_IN", + "mr": "mr_IN", + "ms": "ms_MY", + "nb": "nb_NO", + "nl": "nl_NL", + "nn": "nn_NO", + "pl": "pl_PL", + "pt": "pt_PT", + "ro": "ro_RO", + "ru": "ru_RU", + "sc": "sc_IT", + "si": "si_LK", + "sk": "sk_SK", + "sl": "sl_SI", + "sq": "sq_AL", + "sr_Cyrl": "sr_RS", + "sv": "sv_SE", + "ta": "ta_IN", + "te": "te_IN", + "th": "th_TH", + "tr": "tr_TR", + "uk": "uk_UA", + "ur": "ur_PK", + "vi": "vi_VN", + "zh_Hans": "zh_CN", + "zh_Hant": "zh_TW" + }, + "regions": { + "ar-SA": "xa-ar", + "bg-BG": "bg-bg", + "ca-ES": "es-ca", + "cs-CZ": "cz-cs", + "da-DK": "dk-da", + "de-AT": "at-de", + "de-CH": "ch-de", + "de-DE": "de-de", + "el-GR": "gr-el", + "en-AU": "au-en", + "en-CA": "ca-en", + "en-GB": "uk-en", + "en-IE": "ie-en", + "en-IL": "il-en", + "en-IN": "in-en", + "en-MY": "my-en", + "en-NZ": "nz-en", + "en-PH": "ph-en", + "en-PK": "pk-en", + "en-SG": "sg-en", + "en-US": "us-en", + "en-ZA": "za-en", + "es-AR": "ar-es", + "es-CL": "cl-es", + "es-CO": "co-es", + "es-ES": "es-es", + "es-MX": "mx-es", + "es-PE": "pe-es", + "es-US": "us-es", + "et-EE": "ee-et", + "fi-FI": "fi-fi", + "fr-BE": "be-fr", + "fr-CA": "ca-fr", + "fr-CH": "ch-fr", + "fr-FR": "fr-fr", + "hr-HR": "hr-hr", + "hu-HU": "hu-hu", + "id-ID": "id-en", + "it-IT": "it-it", + "ja-JP": "jp-jp", + "ko-KR": "kr-kr", + "lt-LT": "lt-lt", + "lv-LV": "lv-lv", + "nb-NO": "no-no", + "nl-BE": "be-nl", + "nl-NL": "nl-nl", + "pl-PL": "pl-pl", + "pt-BR": "br-pt", + "pt-PT": "pt-pt", + "ro-RO": "ro-ro", + "ru-RU": "ru-ru", + "sk-SK": "sk-sk", + "sl-SI": "sl-sl", + "sv-SE": "se-sv", + "th-TH": "th-en", + "tr-TR": "tr-tr", + "uk-UA": "ua-uk", + "vi-VN": "vn-en", + "zh-CN": "cn-zh", + "zh-HK": "hk-tzh", + "zh-TW": "tw-tzh" + } + }, + "google": { + "all_locale": "ZZ", + "custom": { + "supported_domains": { + "AD": "www.google.ad", + "AE": "www.google.ae", + "AF": "www.google.com.af", + "AG": "www.google.com.ag", + "AI": "www.google.com.ai", + "AL": "www.google.al", + "AM": "www.google.am", + "AO": "www.google.co.ao", + "AR": "www.google.com.ar", + "AS": "www.google.as", + "AT": "www.google.at", + "AU": "www.google.com.au", + "AZ": "www.google.az", + "BA": "www.google.ba", + "BD": "www.google.com.bd", + "BE": "www.google.be", + "BF": "www.google.bf", + "BG": "www.google.bg", + "BH": "www.google.com.bh", + "BI": "www.google.bi", + "BJ": "www.google.bj", + "BN": "www.google.com.bn", + "BO": "www.google.com.bo", + "BR": "www.google.com.br", + "BS": "www.google.bs", + "BT": "www.google.bt", + "BW": "www.google.co.bw", + "BY": "www.google.by", + "BZ": "www.google.com.bz", + "CA": "www.google.ca", + "CAT": "www.google.cat", + "CD": "www.google.cd", + "CF": "www.google.cf", + "CG": "www.google.cg", + "CH": "www.google.ch", + "CI": "www.google.ci", + "CK": "www.google.co.ck", + "CL": "www.google.cl", + "CM": "www.google.cm", + "CN": "www.google.com.hk", + "CO": "www.google.com.co", + "CR": "www.google.co.cr", + "CU": "www.google.com.cu", + "CV": "www.google.cv", + "CY": "www.google.com.cy", + "CZ": "www.google.cz", + "DE": "www.google.de", + "DJ": "www.google.dj", + "DK": "www.google.dk", + "DM": "www.google.dm", + "DO": "www.google.com.do", + "DZ": "www.google.dz", + "EC": "www.google.com.ec", + "EE": "www.google.ee", + "EG": "www.google.com.eg", + "ES": "www.google.es", + "ET": "www.google.com.et", + "FI": "www.google.fi", + "FJ": "www.google.com.fj", + "FM": "www.google.fm", + "FR": "www.google.fr", + "GA": "www.google.ga", + "GE": "www.google.ge", + "GG": "www.google.gg", + "GH": "www.google.com.gh", + "GI": "www.google.com.gi", + "GL": "www.google.gl", + "GM": "www.google.gm", + "GR": "www.google.gr", + "GT": "www.google.com.gt", + "GY": "www.google.gy", + "HK": "www.google.com.hk", + "HN": "www.google.hn", + "HR": "www.google.hr", + "HT": "www.google.ht", + "HU": "www.google.hu", + "ID": "www.google.co.id", + "IE": "www.google.ie", + "IL": "www.google.co.il", + "IM": "www.google.im", + "IN": "www.google.co.in", + "IQ": "www.google.iq", + "IS": "www.google.is", + "IT": "www.google.it", + "JE": "www.google.je", + "JM": "www.google.com.jm", + "JO": "www.google.jo", + "JP": "www.google.co.jp", + "KE": "www.google.co.ke", + "KG": "www.google.kg", + "KH": "www.google.com.kh", + "KI": "www.google.ki", + "KR": "www.google.co.kr", + "KW": "www.google.com.kw", + "KZ": "www.google.kz", + "LA": "www.google.la", + "LB": "www.google.com.lb", + "LI": "www.google.li", + "LK": "www.google.lk", + "LS": "www.google.co.ls", + "LT": "www.google.lt", + "LU": "www.google.lu", + "LV": "www.google.lv", + "LY": "www.google.com.ly", + "MA": "www.google.co.ma", + "MD": "www.google.md", + "ME": "www.google.me", + "MG": "www.google.mg", + "MK": "www.google.mk", + "ML": "www.google.ml", + "MM": "www.google.com.mm", + "MN": "www.google.mn", + "MS": "www.google.ms", + "MT": "www.google.com.mt", + "MU": "www.google.mu", + "MV": "www.google.mv", + "MW": "www.google.mw", + "MX": "www.google.com.mx", + "MY": "www.google.com.my", + "MZ": "www.google.co.mz", + "NA": "www.google.com.na", + "NE": "www.google.ne", + "NG": "www.google.com.ng", + "NI": "www.google.com.ni", + "NL": "www.google.nl", + "NO": "www.google.no", + "NP": "www.google.com.np", + "NR": "www.google.nr", + "NU": "www.google.nu", + "NZ": "www.google.co.nz", + "OM": "www.google.com.om", + "PA": "www.google.com.pa", + "PE": "www.google.com.pe", + "PG": "www.google.com.pg", + "PH": "www.google.com.ph", + "PK": "www.google.com.pk", + "PL": "www.google.pl", + "PN": "www.google.pn", + "PR": "www.google.com.pr", + "PS": "www.google.ps", + "PT": "www.google.pt", + "PY": "www.google.com.py", + "QA": "www.google.com.qa", + "RO": "www.google.ro", + "RS": "www.google.rs", + "RU": "www.google.ru", + "RW": "www.google.rw", + "SA": "www.google.com.sa", + "SB": "www.google.com.sb", + "SC": "www.google.sc", + "SE": "www.google.se", + "SG": "www.google.com.sg", + "SH": "www.google.sh", + "SI": "www.google.si", + "SK": "www.google.sk", + "SL": "www.google.com.sl", + "SM": "www.google.sm", + "SN": "www.google.sn", + "SO": "www.google.so", + "SR": "www.google.sr", + "ST": "www.google.st", + "SV": "www.google.com.sv", + "TD": "www.google.td", + "TG": "www.google.tg", + "TH": "www.google.co.th", + "TJ": "www.google.com.tj", + "TL": "www.google.tl", + "TM": "www.google.tm", + "TN": "www.google.tn", + "TO": "www.google.to", + "TR": "www.google.com.tr", + "TT": "www.google.tt", + "TW": "www.google.com.tw", + "TZ": "www.google.co.tz", + "UA": "www.google.com.ua", + "UG": "www.google.co.ug", + "UK": "www.google.co.uk", + "UY": "www.google.com.uy", + "UZ": "www.google.co.uz", + "VC": "www.google.com.vc", + "VE": "www.google.co.ve", + "VG": "www.google.vg", + "VI": "www.google.co.vi", + "VN": "www.google.com.vn", + "VU": "www.google.vu", + "WS": "www.google.ws", + "ZA": "www.google.co.za", + "ZM": "www.google.co.zm", + "ZW": "www.google.co.zw" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "google images": { + "all_locale": "ZZ", + "custom": { + "supported_domains": { + "AD": "www.google.ad", + "AE": "www.google.ae", + "AF": "www.google.com.af", + "AG": "www.google.com.ag", + "AI": "www.google.com.ai", + "AL": "www.google.al", + "AM": "www.google.am", + "AO": "www.google.co.ao", + "AR": "www.google.com.ar", + "AS": "www.google.as", + "AT": "www.google.at", + "AU": "www.google.com.au", + "AZ": "www.google.az", + "BA": "www.google.ba", + "BD": "www.google.com.bd", + "BE": "www.google.be", + "BF": "www.google.bf", + "BG": "www.google.bg", + "BH": "www.google.com.bh", + "BI": "www.google.bi", + "BJ": "www.google.bj", + "BN": "www.google.com.bn", + "BO": "www.google.com.bo", + "BR": "www.google.com.br", + "BS": "www.google.bs", + "BT": "www.google.bt", + "BW": "www.google.co.bw", + "BY": "www.google.by", + "BZ": "www.google.com.bz", + "CA": "www.google.ca", + "CAT": "www.google.cat", + "CD": "www.google.cd", + "CF": "www.google.cf", + "CG": "www.google.cg", + "CH": "www.google.ch", + "CI": "www.google.ci", + "CK": "www.google.co.ck", + "CL": "www.google.cl", + "CM": "www.google.cm", + "CN": "www.google.com.hk", + "CO": "www.google.com.co", + "CR": "www.google.co.cr", + "CU": "www.google.com.cu", + "CV": "www.google.cv", + "CY": "www.google.com.cy", + "CZ": "www.google.cz", + "DE": "www.google.de", + "DJ": "www.google.dj", + "DK": "www.google.dk", + "DM": "www.google.dm", + "DO": "www.google.com.do", + "DZ": "www.google.dz", + "EC": "www.google.com.ec", + "EE": "www.google.ee", + "EG": "www.google.com.eg", + "ES": "www.google.es", + "ET": "www.google.com.et", + "FI": "www.google.fi", + "FJ": "www.google.com.fj", + "FM": "www.google.fm", + "FR": "www.google.fr", + "GA": "www.google.ga", + "GE": "www.google.ge", + "GG": "www.google.gg", + "GH": "www.google.com.gh", + "GI": "www.google.com.gi", + "GL": "www.google.gl", + "GM": "www.google.gm", + "GR": "www.google.gr", + "GT": "www.google.com.gt", + "GY": "www.google.gy", + "HK": "www.google.com.hk", + "HN": "www.google.hn", + "HR": "www.google.hr", + "HT": "www.google.ht", + "HU": "www.google.hu", + "ID": "www.google.co.id", + "IE": "www.google.ie", + "IL": "www.google.co.il", + "IM": "www.google.im", + "IN": "www.google.co.in", + "IQ": "www.google.iq", + "IS": "www.google.is", + "IT": "www.google.it", + "JE": "www.google.je", + "JM": "www.google.com.jm", + "JO": "www.google.jo", + "JP": "www.google.co.jp", + "KE": "www.google.co.ke", + "KG": "www.google.kg", + "KH": "www.google.com.kh", + "KI": "www.google.ki", + "KR": "www.google.co.kr", + "KW": "www.google.com.kw", + "KZ": "www.google.kz", + "LA": "www.google.la", + "LB": "www.google.com.lb", + "LI": "www.google.li", + "LK": "www.google.lk", + "LS": "www.google.co.ls", + "LT": "www.google.lt", + "LU": "www.google.lu", + "LV": "www.google.lv", + "LY": "www.google.com.ly", + "MA": "www.google.co.ma", + "MD": "www.google.md", + "ME": "www.google.me", + "MG": "www.google.mg", + "MK": "www.google.mk", + "ML": "www.google.ml", + "MM": "www.google.com.mm", + "MN": "www.google.mn", + "MS": "www.google.ms", + "MT": "www.google.com.mt", + "MU": "www.google.mu", + "MV": "www.google.mv", + "MW": "www.google.mw", + "MX": "www.google.com.mx", + "MY": "www.google.com.my", + "MZ": "www.google.co.mz", + "NA": "www.google.com.na", + "NE": "www.google.ne", + "NG": "www.google.com.ng", + "NI": "www.google.com.ni", + "NL": "www.google.nl", + "NO": "www.google.no", + "NP": "www.google.com.np", + "NR": "www.google.nr", + "NU": "www.google.nu", + "NZ": "www.google.co.nz", + "OM": "www.google.com.om", + "PA": "www.google.com.pa", + "PE": "www.google.com.pe", + "PG": "www.google.com.pg", + "PH": "www.google.com.ph", + "PK": "www.google.com.pk", + "PL": "www.google.pl", + "PN": "www.google.pn", + "PR": "www.google.com.pr", + "PS": "www.google.ps", + "PT": "www.google.pt", + "PY": "www.google.com.py", + "QA": "www.google.com.qa", + "RO": "www.google.ro", + "RS": "www.google.rs", + "RU": "www.google.ru", + "RW": "www.google.rw", + "SA": "www.google.com.sa", + "SB": "www.google.com.sb", + "SC": "www.google.sc", + "SE": "www.google.se", + "SG": "www.google.com.sg", + "SH": "www.google.sh", + "SI": "www.google.si", + "SK": "www.google.sk", + "SL": "www.google.com.sl", + "SM": "www.google.sm", + "SN": "www.google.sn", + "SO": "www.google.so", + "SR": "www.google.sr", + "ST": "www.google.st", + "SV": "www.google.com.sv", + "TD": "www.google.td", + "TG": "www.google.tg", + "TH": "www.google.co.th", + "TJ": "www.google.com.tj", + "TL": "www.google.tl", + "TM": "www.google.tm", + "TN": "www.google.tn", + "TO": "www.google.to", + "TR": "www.google.com.tr", + "TT": "www.google.tt", + "TW": "www.google.com.tw", + "TZ": "www.google.co.tz", + "UA": "www.google.com.ua", + "UG": "www.google.co.ug", + "UK": "www.google.co.uk", + "UY": "www.google.com.uy", + "UZ": "www.google.co.uz", + "VC": "www.google.com.vc", + "VE": "www.google.co.ve", + "VG": "www.google.vg", + "VI": "www.google.co.vi", + "VN": "www.google.com.vn", + "VU": "www.google.vu", + "WS": "www.google.ws", + "ZA": "www.google.co.za", + "ZM": "www.google.co.zm", + "ZW": "www.google.co.zw" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "google news": { + "all_locale": "ZZ", + "custom": { + "ceid": { + "ar-AE": "AE:ar", + "ar-EG": "EG:ar", + "ar-LB": "LB:ar", + "ar-SA": "SA:ar", + "bg-BG": "BG:bg", + "bn-BD": "BD:bn", + "bn-IN": "IN:bn", + "cs-CZ": "CZ:cs", + "de-AT": "AT:de", + "de-CH": "CH:de", + "de-DE": "DE:de", + "el-GR": "GR:el", + "en-AU": "AU:en", + "en-BW": "BW:en", + "en-CA": "CA:en", + "en-GB": "GB:en", + "en-GH": "GH:en", + "en-IE": "IE:en", + "en-IL": "IL:en", + "en-IN": "IN:en", + "en-KE": "KE:en", + "en-MY": "MY:en", + "en-NA": "NA:en", + "en-NG": "NG:en", + "en-NZ": "NZ:en", + "en-PH": "PH:en", + "en-PK": "PK:en", + "en-SG": "SG:en", + "en-TZ": "TZ:en", + "en-UG": "UG:en", + "en-US": "US:en", + "en-ZA": "ZA:en", + "en-ZW": "ZW:en", + "es-AR": "AR:es-419", + "es-CL": "CL:es-419", + "es-CO": "CO:es-419", + "es-CU": "CU:es-419", + "es-ES": "ES:es", + "es-MX": "MX:es-419", + "es-PE": "PE:es-419", + "es-US": "US:es-419", + "es-VE": "VE:es-419", + "fr-BE": "BE:fr", + "fr-CA": "CA:fr", + "fr-CH": "CH:fr", + "fr-FR": "FR:fr", + "fr-MA": "MA:fr", + "fr-SN": "SN:fr", + "he-IL": "IL:he", + "hi-IN": "IN:hi", + "hu-HU": "HU:hu", + "id-ID": "ID:id", + "it-IT": "IT:it", + "ja-JP": "JP:ja", + "ko-KR": "KR:ko", + "lt-LT": "LT:lt", + "lv-LV": "LV:lv", + "ml-IN": "IN:ml", + "mr-IN": "IN:mr", + "nb-NO": "NO:no", + "nl-BE": "BE:nl", + "nl-NL": "NL:nl", + "pl-PL": "PL:pl", + "pt-BR": "BR:pt-419", + "pt-PT": "PT:pt-150", + "ro-RO": "RO:ro", + "ru-RU": "RU:ru", + "ru-UA": "UA:ru", + "sk-SK": "SK:sk", + "sl-SI": "SI:sl", + "sr-RS": "RS:sr", + "sv-SE": "SE:sv", + "ta-IN": "IN:ta", + "te-IN": "IN:te", + "th-TH": "TH:th", + "tr-TR": "TR:tr", + "uk-UA": "UA:uk", + "vi-VN": "VN:vi", + "zh-CN": "CN:zh-Hans", + "zh-HK": "HK:zh-Hant", + "zh-TW": "TW:zh-Hant" + }, + "supported_domains": {} + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "google scholar": { + "all_locale": "ZZ", + "custom": { + "supported_domains": { + "AD": "www.google.ad", + "AE": "www.google.ae", + "AF": "www.google.com.af", + "AG": "www.google.com.ag", + "AI": "www.google.com.ai", + "AL": "www.google.al", + "AM": "www.google.am", + "AO": "www.google.co.ao", + "AR": "www.google.com.ar", + "AS": "www.google.as", + "AT": "www.google.at", + "AU": "www.google.com.au", + "AZ": "www.google.az", + "BA": "www.google.ba", + "BD": "www.google.com.bd", + "BE": "www.google.be", + "BF": "www.google.bf", + "BG": "www.google.bg", + "BH": "www.google.com.bh", + "BI": "www.google.bi", + "BJ": "www.google.bj", + "BN": "www.google.com.bn", + "BO": "www.google.com.bo", + "BR": "www.google.com.br", + "BS": "www.google.bs", + "BT": "www.google.bt", + "BW": "www.google.co.bw", + "BY": "www.google.by", + "BZ": "www.google.com.bz", + "CA": "www.google.ca", + "CAT": "www.google.cat", + "CD": "www.google.cd", + "CF": "www.google.cf", + "CG": "www.google.cg", + "CH": "www.google.ch", + "CI": "www.google.ci", + "CK": "www.google.co.ck", + "CL": "www.google.cl", + "CM": "www.google.cm", + "CN": "www.google.com.hk", + "CO": "www.google.com.co", + "CR": "www.google.co.cr", + "CU": "www.google.com.cu", + "CV": "www.google.cv", + "CY": "www.google.com.cy", + "CZ": "www.google.cz", + "DE": "www.google.de", + "DJ": "www.google.dj", + "DK": "www.google.dk", + "DM": "www.google.dm", + "DO": "www.google.com.do", + "DZ": "www.google.dz", + "EC": "www.google.com.ec", + "EE": "www.google.ee", + "EG": "www.google.com.eg", + "ES": "www.google.es", + "ET": "www.google.com.et", + "FI": "www.google.fi", + "FJ": "www.google.com.fj", + "FM": "www.google.fm", + "FR": "www.google.fr", + "GA": "www.google.ga", + "GE": "www.google.ge", + "GG": "www.google.gg", + "GH": "www.google.com.gh", + "GI": "www.google.com.gi", + "GL": "www.google.gl", + "GM": "www.google.gm", + "GR": "www.google.gr", + "GT": "www.google.com.gt", + "GY": "www.google.gy", + "HK": "www.google.com.hk", + "HN": "www.google.hn", + "HR": "www.google.hr", + "HT": "www.google.ht", + "HU": "www.google.hu", + "ID": "www.google.co.id", + "IE": "www.google.ie", + "IL": "www.google.co.il", + "IM": "www.google.im", + "IN": "www.google.co.in", + "IQ": "www.google.iq", + "IS": "www.google.is", + "IT": "www.google.it", + "JE": "www.google.je", + "JM": "www.google.com.jm", + "JO": "www.google.jo", + "JP": "www.google.co.jp", + "KE": "www.google.co.ke", + "KG": "www.google.kg", + "KH": "www.google.com.kh", + "KI": "www.google.ki", + "KR": "www.google.co.kr", + "KW": "www.google.com.kw", + "KZ": "www.google.kz", + "LA": "www.google.la", + "LB": "www.google.com.lb", + "LI": "www.google.li", + "LK": "www.google.lk", + "LS": "www.google.co.ls", + "LT": "www.google.lt", + "LU": "www.google.lu", + "LV": "www.google.lv", + "LY": "www.google.com.ly", + "MA": "www.google.co.ma", + "MD": "www.google.md", + "ME": "www.google.me", + "MG": "www.google.mg", + "MK": "www.google.mk", + "ML": "www.google.ml", + "MM": "www.google.com.mm", + "MN": "www.google.mn", + "MS": "www.google.ms", + "MT": "www.google.com.mt", + "MU": "www.google.mu", + "MV": "www.google.mv", + "MW": "www.google.mw", + "MX": "www.google.com.mx", + "MY": "www.google.com.my", + "MZ": "www.google.co.mz", + "NA": "www.google.com.na", + "NE": "www.google.ne", + "NG": "www.google.com.ng", + "NI": "www.google.com.ni", + "NL": "www.google.nl", + "NO": "www.google.no", + "NP": "www.google.com.np", + "NR": "www.google.nr", + "NU": "www.google.nu", + "NZ": "www.google.co.nz", + "OM": "www.google.com.om", + "PA": "www.google.com.pa", + "PE": "www.google.com.pe", + "PG": "www.google.com.pg", + "PH": "www.google.com.ph", + "PK": "www.google.com.pk", + "PL": "www.google.pl", + "PN": "www.google.pn", + "PR": "www.google.com.pr", + "PS": "www.google.ps", + "PT": "www.google.pt", + "PY": "www.google.com.py", + "QA": "www.google.com.qa", + "RO": "www.google.ro", + "RS": "www.google.rs", + "RU": "www.google.ru", + "RW": "www.google.rw", + "SA": "www.google.com.sa", + "SB": "www.google.com.sb", + "SC": "www.google.sc", + "SE": "www.google.se", + "SG": "www.google.com.sg", + "SH": "www.google.sh", + "SI": "www.google.si", + "SK": "www.google.sk", + "SL": "www.google.com.sl", + "SM": "www.google.sm", + "SN": "www.google.sn", + "SO": "www.google.so", + "SR": "www.google.sr", + "ST": "www.google.st", + "SV": "www.google.com.sv", + "TD": "www.google.td", + "TG": "www.google.tg", + "TH": "www.google.co.th", + "TJ": "www.google.com.tj", + "TL": "www.google.tl", + "TM": "www.google.tm", + "TN": "www.google.tn", + "TO": "www.google.to", + "TR": "www.google.com.tr", + "TT": "www.google.tt", + "TW": "www.google.com.tw", + "TZ": "www.google.co.tz", + "UA": "www.google.com.ua", + "UG": "www.google.co.ug", + "UK": "www.google.co.uk", + "UY": "www.google.com.uy", + "UZ": "www.google.co.uz", + "VC": "www.google.com.vc", + "VE": "www.google.co.ve", + "VG": "www.google.vg", + "VI": "www.google.co.vi", + "VN": "www.google.com.vn", + "VU": "www.google.vu", + "WS": "www.google.ws", + "ZA": "www.google.co.za", + "ZM": "www.google.co.zm", + "ZW": "www.google.co.zw" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "google videos": { + "all_locale": "ZZ", + "custom": { + "supported_domains": { + "AD": "www.google.ad", + "AE": "www.google.ae", + "AF": "www.google.com.af", + "AG": "www.google.com.ag", + "AI": "www.google.com.ai", + "AL": "www.google.al", + "AM": "www.google.am", + "AO": "www.google.co.ao", + "AR": "www.google.com.ar", + "AS": "www.google.as", + "AT": "www.google.at", + "AU": "www.google.com.au", + "AZ": "www.google.az", + "BA": "www.google.ba", + "BD": "www.google.com.bd", + "BE": "www.google.be", + "BF": "www.google.bf", + "BG": "www.google.bg", + "BH": "www.google.com.bh", + "BI": "www.google.bi", + "BJ": "www.google.bj", + "BN": "www.google.com.bn", + "BO": "www.google.com.bo", + "BR": "www.google.com.br", + "BS": "www.google.bs", + "BT": "www.google.bt", + "BW": "www.google.co.bw", + "BY": "www.google.by", + "BZ": "www.google.com.bz", + "CA": "www.google.ca", + "CAT": "www.google.cat", + "CD": "www.google.cd", + "CF": "www.google.cf", + "CG": "www.google.cg", + "CH": "www.google.ch", + "CI": "www.google.ci", + "CK": "www.google.co.ck", + "CL": "www.google.cl", + "CM": "www.google.cm", + "CN": "www.google.com.hk", + "CO": "www.google.com.co", + "CR": "www.google.co.cr", + "CU": "www.google.com.cu", + "CV": "www.google.cv", + "CY": "www.google.com.cy", + "CZ": "www.google.cz", + "DE": "www.google.de", + "DJ": "www.google.dj", + "DK": "www.google.dk", + "DM": "www.google.dm", + "DO": "www.google.com.do", + "DZ": "www.google.dz", + "EC": "www.google.com.ec", + "EE": "www.google.ee", + "EG": "www.google.com.eg", + "ES": "www.google.es", + "ET": "www.google.com.et", + "FI": "www.google.fi", + "FJ": "www.google.com.fj", + "FM": "www.google.fm", + "FR": "www.google.fr", + "GA": "www.google.ga", + "GE": "www.google.ge", + "GG": "www.google.gg", + "GH": "www.google.com.gh", + "GI": "www.google.com.gi", + "GL": "www.google.gl", + "GM": "www.google.gm", + "GR": "www.google.gr", + "GT": "www.google.com.gt", + "GY": "www.google.gy", + "HK": "www.google.com.hk", + "HN": "www.google.hn", + "HR": "www.google.hr", + "HT": "www.google.ht", + "HU": "www.google.hu", + "ID": "www.google.co.id", + "IE": "www.google.ie", + "IL": "www.google.co.il", + "IM": "www.google.im", + "IN": "www.google.co.in", + "IQ": "www.google.iq", + "IS": "www.google.is", + "IT": "www.google.it", + "JE": "www.google.je", + "JM": "www.google.com.jm", + "JO": "www.google.jo", + "JP": "www.google.co.jp", + "KE": "www.google.co.ke", + "KG": "www.google.kg", + "KH": "www.google.com.kh", + "KI": "www.google.ki", + "KR": "www.google.co.kr", + "KW": "www.google.com.kw", + "KZ": "www.google.kz", + "LA": "www.google.la", + "LB": "www.google.com.lb", + "LI": "www.google.li", + "LK": "www.google.lk", + "LS": "www.google.co.ls", + "LT": "www.google.lt", + "LU": "www.google.lu", + "LV": "www.google.lv", + "LY": "www.google.com.ly", + "MA": "www.google.co.ma", + "MD": "www.google.md", + "ME": "www.google.me", + "MG": "www.google.mg", + "MK": "www.google.mk", + "ML": "www.google.ml", + "MM": "www.google.com.mm", + "MN": "www.google.mn", + "MS": "www.google.ms", + "MT": "www.google.com.mt", + "MU": "www.google.mu", + "MV": "www.google.mv", + "MW": "www.google.mw", + "MX": "www.google.com.mx", + "MY": "www.google.com.my", + "MZ": "www.google.co.mz", + "NA": "www.google.com.na", + "NE": "www.google.ne", + "NG": "www.google.com.ng", + "NI": "www.google.com.ni", + "NL": "www.google.nl", + "NO": "www.google.no", + "NP": "www.google.com.np", + "NR": "www.google.nr", + "NU": "www.google.nu", + "NZ": "www.google.co.nz", + "OM": "www.google.com.om", + "PA": "www.google.com.pa", + "PE": "www.google.com.pe", + "PG": "www.google.com.pg", + "PH": "www.google.com.ph", + "PK": "www.google.com.pk", + "PL": "www.google.pl", + "PN": "www.google.pn", + "PR": "www.google.com.pr", + "PS": "www.google.ps", + "PT": "www.google.pt", + "PY": "www.google.com.py", + "QA": "www.google.com.qa", + "RO": "www.google.ro", + "RS": "www.google.rs", + "RU": "www.google.ru", + "RW": "www.google.rw", + "SA": "www.google.com.sa", + "SB": "www.google.com.sb", + "SC": "www.google.sc", + "SE": "www.google.se", + "SG": "www.google.com.sg", + "SH": "www.google.sh", + "SI": "www.google.si", + "SK": "www.google.sk", + "SL": "www.google.com.sl", + "SM": "www.google.sm", + "SN": "www.google.sn", + "SO": "www.google.so", + "SR": "www.google.sr", + "ST": "www.google.st", + "SV": "www.google.com.sv", + "TD": "www.google.td", + "TG": "www.google.tg", + "TH": "www.google.co.th", + "TJ": "www.google.com.tj", + "TL": "www.google.tl", + "TM": "www.google.tm", + "TN": "www.google.tn", + "TO": "www.google.to", + "TR": "www.google.com.tr", + "TT": "www.google.tt", + "TW": "www.google.com.tw", + "TZ": "www.google.co.tz", + "UA": "www.google.com.ua", + "UG": "www.google.co.ug", + "UK": "www.google.co.uk", + "UY": "www.google.com.uy", + "UZ": "www.google.co.uz", + "VC": "www.google.com.vc", + "VE": "www.google.co.ve", + "VG": "www.google.vg", + "VI": "www.google.co.vi", + "VN": "www.google.com.vn", + "VU": "www.google.vu", + "WS": "www.google.ws", + "ZA": "www.google.co.za", + "ZM": "www.google.co.zm", + "ZW": "www.google.co.zw" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "peertube": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ca": "ca", + "cs": "cs", + "de": "de", + "el": "el", + "en": "en", + "eo": "eo", + "es": "es", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gd": "gd", + "it": "it", + "ja": "ja", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ru": "ru", + "sv": "sv", + "zh": "zh", + "zh_Hans": "zh", + "zh_Hant": "zh" + }, + "regions": {} + }, + "qwant": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "qwant images": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "qwant news": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": {}, + "regions": { + "ca-ES": "ca_ES", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "it-CH": "it_CH", + "it-IT": "it_IT", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pt-PT": "pt_PT" + } + }, + "qwant videos": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "sepiasearch": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ca": "ca", + "cs": "cs", + "de": "de", + "el": "el", + "en": "en", + "eo": "eo", + "es": "es", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gd": "gd", + "it": "it", + "ja": "ja", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ru": "ru", + "sv": "sv", + "zh": "zh", + "zh_Hans": "zh", + "zh_Hant": "zh" + }, + "regions": {} + }, + "startpage": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "af": "afrikaans", + "am": "amharic", + "ar": "arabic", + "az": "azerbaijani", + "be": "belarusian", + "bg": "bulgarian", + "bn": "bengali", + "bs": "bosnian", + "ca": "catalan", + "cs": "czech", + "cy": "welsh", + "da": "dansk", + "de": "deutsch", + "el": "greek", + "en": "english", + "eo": "esperanto", + "es": "espanol", + "et": "estonian", + "eu": "basque", + "fa": "persian", + "fi": "suomi", + "fo": "faroese", + "fr": "francais", + "fy": "frisian", + "ga": "irish", + "gd": "gaelic", + "gl": "galician", + "gu": "gujarati", + "he": "hebrew", + "hi": "hindi", + "hr": "croatian", + "hu": "hungarian", + "ia": "interlingua", + "id": "indonesian", + "is": "icelandic", + "it": "italiano", + "ja": "nihongo", + "jv": "javanese", + "ka": "georgian", + "kn": "kannada", + "ko": "hangul", + "la": "latin", + "lt": "lithuanian", + "lv": "latvian", + "mai": "bihari", + "mk": "macedonian", + "ml": "malayalam", + "mr": "marathi", + "ms": "malay", + "mt": "maltese", + "nb": "norsk", + "ne": "nepali", + "nl": "nederlands", + "oc": "occitan", + "pa": "punjabi", + "pl": "polski", + "pt": "portugues", + "ro": "romanian", + "ru": "russian", + "si": "sinhalese", + "sk": "slovak", + "sl": "slovenian", + "sq": "albanian", + "sr": "serbian", + "su": "sudanese", + "sv": "svenska", + "sw": "swahili", + "ta": "tamil", + "te": "telugu", + "th": "thai", + "ti": "tigrinya", + "tl": "tagalog", + "tr": "turkce", + "uk": "ukrainian", + "ur": "urdu", + "uz": "uzbek", + "vi": "vietnamese", + "xh": "xhosa", + "zh": "jiantizhongwen", + "zh_Hant": "fantizhengwen", + "zu": "zulu" + }, + "regions": { + "ar-EG": "ar_EG", + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en-GB_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "en-ZA": "en_ZA", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-US": "es_US", + "es-UY": "es_UY", + "fi-FI": "fi_FI", + "fil-PH": "fil_PH", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hi-IN": "hi_IN", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ja-JP": "ja_JP", + "ko-KR": "ko_KR", + "ms-MY": "ms_MY", + "nb-NO": "no_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-BR": "pt-BR_BR", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "ru-BY": "ru_BY", + "ru-RU": "ru_RU", + "sv-SE": "sv_SE", + "tr-TR": "tr_TR", + "uk-UA": "uk_UA", + "zh-CN": "zh-CN_CN", + "zh-HK": "zh-TW_HK", + "zh-TW": "zh-TW_TW" + } + }, + "wikidata": { + "all_locale": null, + "custom": { + "wiki_netloc": {} + }, + "data_type": "traits_v1", + "languages": { + "af": "af", + "am": "am", + "ar": "ar", + "as": "as", + "az": "az", + "be": "be", + "bg": "bg", + "bn": "bn", + "bs": "bs", + "ca": "ca", + "ckb": "ckb", + "cs": "cs", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "fa": "fa", + "fi": "fi", + "fil": "tl", + "fo": "fo", + "fr": "fr", + "fy": "fy", + "gl": "gl", + "gsw": "als", + "gu": "gu", + "he": "he", + "hi": "hi", + "hsb": "hsb", + "hu": "hu", + "hy": "hy", + "id": "id", + "is": "is", + "it": "it", + "ja": "ja", + "jv": "jv", + "ka": "ka", + "kn": "kn", + "ko": "ko", + "lb": "lb", + "lt": "lt", + "lv": "lv", + "mai": "mai", + "mk": "mk", + "ml": "ml", + "mn": "mn", + "mr": "mr", + "ne": "ne", + "no": "no", + "or": "or", + "os": "os", + "pa": "pa", + "pl": "pl", + "ps": "ps", + "pt": "pt", + "qu": "qu", + "ro": "ro", + "ru": "ru", + "sa": "sa", + "sah": "sah", + "sd": "sd", + "si": "si", + "sk": "sk", + "sl": "sl", + "sq": "sq", + "sr": "sr", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "ur": "ur", + "uz": "uz", + "vi": "vi", + "yi": "yi", + "zh": "zh", + "zh_Hant": "zh-classical" + }, + "regions": {} + }, + "wikipedia": { + "all_locale": null, + "custom": { + "wiki_netloc": { + "af": "af.wikipedia.org", + "als": "als.wikipedia.org", + "am": "am.wikipedia.org", + "ar": "ar.wikipedia.org", + "as": "as.wikipedia.org", + "az": "az.wikipedia.org", + "be": "be.wikipedia.org", + "bg": "bg.wikipedia.org", + "bn": "bn.wikipedia.org", + "bs": "bs.wikipedia.org", + "ca": "ca.wikipedia.org", + "ckb": "ckb.wikipedia.org", + "cs": "cs.wikipedia.org", + "da": "da.wikipedia.org", + "de": "de.wikipedia.org", + "el": "el.wikipedia.org", + "en": "en.wikipedia.org", + "es": "es.wikipedia.org", + "et": "et.wikipedia.org", + "fa": "fa.wikipedia.org", + "fi": "fi.wikipedia.org", + "fo": "fo.wikipedia.org", + "fr": "fr.wikipedia.org", + "fy": "fy.wikipedia.org", + "gl": "gl.wikipedia.org", + "gu": "gu.wikipedia.org", + "he": "he.wikipedia.org", + "hi": "hi.wikipedia.org", + "hsb": "hsb.wikipedia.org", + "hu": "hu.wikipedia.org", + "hy": "hy.wikipedia.org", + "id": "id.wikipedia.org", + "is": "is.wikipedia.org", + "it": "it.wikipedia.org", + "ja": "ja.wikipedia.org", + "jv": "jv.wikipedia.org", + "ka": "ka.wikipedia.org", + "kn": "kn.wikipedia.org", + "ko": "ko.wikipedia.org", + "lb": "lb.wikipedia.org", + "lt": "lt.wikipedia.org", + "lv": "lv.wikipedia.org", + "mai": "mai.wikipedia.org", + "mk": "mk.wikipedia.org", + "ml": "ml.wikipedia.org", + "mn": "mn.wikipedia.org", + "mr": "mr.wikipedia.org", + "ne": "ne.wikipedia.org", + "no": "no.wikipedia.org", + "or": "or.wikipedia.org", + "os": "os.wikipedia.org", + "pa": "pa.wikipedia.org", + "pl": "pl.wikipedia.org", + "ps": "ps.wikipedia.org", + "pt": "pt.wikipedia.org", + "qu": "qu.wikipedia.org", + "ro": "ro.wikipedia.org", + "ru": "ru.wikipedia.org", + "sa": "sa.wikipedia.org", + "sah": "sah.wikipedia.org", + "sd": "sd.wikipedia.org", + "si": "si.wikipedia.org", + "sk": "sk.wikipedia.org", + "sl": "sl.wikipedia.org", + "sq": "sq.wikipedia.org", + "sr": "sr.wikipedia.org", + "ta": "ta.wikipedia.org", + "te": "te.wikipedia.org", + "th": "th.wikipedia.org", + "tl": "tl.wikipedia.org", + "tr": "tr.wikipedia.org", + "uk": "uk.wikipedia.org", + "ur": "ur.wikipedia.org", + "uz": "uz.wikipedia.org", + "vi": "vi.wikipedia.org", + "yi": "yi.wikipedia.org", + "zh": "zh.wikipedia.org", + "zh-classical": "zh-classical.wikipedia.org" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af", + "am": "am", + "ar": "ar", + "as": "as", + "az": "az", + "be": "be", + "bg": "bg", + "bn": "bn", + "bs": "bs", + "ca": "ca", + "ckb": "ckb", + "cs": "cs", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "fa": "fa", + "fi": "fi", + "fil": "tl", + "fo": "fo", + "fr": "fr", + "fy": "fy", + "gl": "gl", + "gsw": "als", + "gu": "gu", + "he": "he", + "hi": "hi", + "hsb": "hsb", + "hu": "hu", + "hy": "hy", + "id": "id", + "is": "is", + "it": "it", + "ja": "ja", + "jv": "jv", + "ka": "ka", + "kn": "kn", + "ko": "ko", + "lb": "lb", + "lt": "lt", + "lv": "lv", + "mai": "mai", + "mk": "mk", + "ml": "ml", + "mn": "mn", + "mr": "mr", + "ne": "ne", + "no": "no", + "or": "or", + "os": "os", + "pa": "pa", + "pl": "pl", + "ps": "ps", + "pt": "pt", + "qu": "qu", + "ro": "ro", + "ru": "ru", + "sa": "sa", + "sah": "sah", + "sd": "sd", + "si": "si", + "sk": "sk", + "sl": "sl", + "sq": "sq", + "sr": "sr", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "ur": "ur", + "uz": "uz", + "vi": "vi", + "yi": "yi", + "zh": "zh", + "zh_Hans": "zh", + "zh_Hant": "zh-classical" + }, + "regions": {} + }, + "yahoo": { + "all_locale": "any", + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "cs": "cs", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "fi": "fi", + "fr": "fr", + "he": "he", + "hr": "hr", + "hu": "hu", + "it": "it", + "ja": "ja", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "nl": "nl", + "no": "no", + "pl": "pl", + "pt": "pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sv": "sv", + "th": "th", + "tr": "tr", + "zh_Hans": "zh_chs", + "zh_Hant": "zh_cht" + }, + "regions": {} + } +} \ No newline at end of file diff --git a/searx/engines/google.py b/searx/engines/google.py index bdb351432..fdde9024a 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -1,34 +1,39 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google WEB engine. Some of this -implementations are shared by other engines: +"""This is the implementation of the Google WEB engine. Some of this +implementations (manly the :py:obj:`get_google_info`) are shared by other +engines: - :ref:`google images engine` - :ref:`google news engine` - :ref:`google videos engine` - -The google WEB engine itself has a special setup option: - -.. code:: yaml - - - name: google - ... - use_mobile_ui: false - -``use_mobile_ui``: (default: ``false``) - Enables to use *mobile endpoint* to bypass the google blocking (see - :issue:`159`). On the mobile UI of Google Search, the button :guilabel:`More - results` is not affected by Google rate limiting and we can still do requests - while actively blocked by the original Google search. By activate - ``use_mobile_ui`` this behavior is simulated by adding the parameter - ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`. +- :ref:`google scholar engine` +- :ref:`google autocomplete` """ +from typing import TYPE_CHECKING + +import re from urllib.parse import urlencode from lxml import html -from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex +import babel +import babel.core +import babel.languages + +from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex +from searx.locales import language_tag, region_tag, get_offical_locales +from searx import network from searx.exceptions import SearxEngineCaptchaException +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + # about about = { @@ -45,64 +50,6 @@ categories = ['general', 'web'] paging = True time_range_support = True safesearch = True -send_accept_language_header = True -use_mobile_ui = False -supported_languages_url = 'https://www.google.com/preferences?#languages' - -# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests -google_domains = { - 'BG': 'google.bg', # Bulgaria - 'CZ': 'google.cz', # Czech Republic - 'DE': 'google.de', # Germany - 'DK': 'google.dk', # Denmark - 'AT': 'google.at', # Austria - 'CH': 'google.ch', # Switzerland - 'GR': 'google.gr', # Greece - 'AU': 'google.com.au', # Australia - 'CA': 'google.ca', # Canada - 'GB': 'google.co.uk', # United Kingdom - 'ID': 'google.co.id', # Indonesia - 'IE': 'google.ie', # Ireland - 'IN': 'google.co.in', # India - 'MY': 'google.com.my', # Malaysia - 'NZ': 'google.co.nz', # New Zealand - 'PH': 'google.com.ph', # Philippines - 'SG': 'google.com.sg', # Singapore - 'US': 'google.com', # United States (google.us) redirects to .com - 'ZA': 'google.co.za', # South Africa - 'AR': 'google.com.ar', # Argentina - 'CL': 'google.cl', # Chile - 'ES': 'google.es', # Spain - 'MX': 'google.com.mx', # Mexico - 'EE': 'google.ee', # Estonia - 'FI': 'google.fi', # Finland - 'BE': 'google.be', # Belgium - 'FR': 'google.fr', # France - 'IL': 'google.co.il', # Israel - 'HR': 'google.hr', # Croatia - 'HU': 'google.hu', # Hungary - 'IT': 'google.it', # Italy - 'JP': 'google.co.jp', # Japan - 'KR': 'google.co.kr', # South Korea - 'LT': 'google.lt', # Lithuania - 'LV': 'google.lv', # Latvia - 'NO': 'google.no', # Norway - 'NL': 'google.nl', # Netherlands - 'PL': 'google.pl', # Poland - 'BR': 'google.com.br', # Brazil - 'PT': 'google.pt', # Portugal - 'RO': 'google.ro', # Romania - 'RU': 'google.ru', # Russia - 'SK': 'google.sk', # Slovakia - 'SI': 'google.si', # Slovenia - 'SE': 'google.se', # Sweden - 'TH': 'google.co.th', # Thailand - 'TR': 'google.com.tr', # Turkey - 'UA': 'google.com.ua', # Ukraine - 'CN': 'google.com.hk', # There is no google.cn, we use .com.hk for zh-CN - 'HK': 'google.com.hk', # Hong Kong - 'TW': 'google.com.tw', # Taiwan -} time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} @@ -115,47 +62,50 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'} results_xpath = './/div[@data-sokoban-container]' title_xpath = './/a/h3[1]' href_xpath = './/a[h3]/@href' -content_xpath = './/div[@data-content-feature=1]' +content_xpath = './/div[@data-content-feature]' # google *sections* are no usual *results*, we ignore them g_section_with_header = './g-section-with-header' - # Suggestions are links placed in a *card-section*, we extract only the text # from the links not the links itself. suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a' +# UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for +# # celebrities like '!google natasha allegri' +# # or '!google chris evans' +UI_ASYNC = 'use_ac:true,_fmt:prog' +"""Format of the response from UI's async request.""" -def get_lang_info(params, lang_list, custom_aliases, supported_any_language): - """Composing various language properties for the google engines. + +def get_google_info(params, eng_traits): + """Composing various (language) properties for the google engines (:ref:`google + API`). This function is called by the various google engines (:ref:`google web engine`, :ref:`google images engine`, :ref:`google news engine` and :ref:`google videos engine`). - :param dict param: request parameters of the engine + :param dict param: Request parameters of the engine. At least + a ``searxng_locale`` key should be in the dictionary. - :param list lang_list: list of supported languages of the engine - :py:obj:`ENGINES_LANGUAGES[engine-name] ` - - :param dict lang_list: custom aliases for non standard language codes - (used when calling :py:func:`searx.utils.match_language`) - - :param bool supported_any_language: When a language is not specified, the - language interpretation is left up to Google to decide how the search - results should be delivered. This argument is ``True`` for the google - engine and ``False`` for the other engines (google-images, -news, - -scholar, -videos). + :param eng_traits: Engine's traits fetched from google preferences + (:py:obj:`searx.enginelib.traits.EngineTraits`) :rtype: dict :returns: Py-Dictionary with the key/value pairs: language: - Return value from :py:func:`searx.utils.match_language` + The language code that is used by google (e.g. ``lang_en`` or + ``lang_zh-TW``) country: - The country code (e.g. US, AT, CA, FR, DE ..) + The country code that is used by google (e.g. ``US`` or ``TW``) + + locale: + A instance of :py:obj:`babel.core.Locale` build from the + ``searxng_locale`` value. subdomain: Google subdomain :py:obj:`google_domains` that fits to the country @@ -165,52 +115,67 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): Py-Dictionary with additional request arguments (can be passed to :py:func:`urllib.parse.urlencode`). + - ``hl`` parameter: specifies the interface language of user interface. + - ``lr`` parameter: restricts search results to documents written in + a particular language. + - ``cr`` parameter: restricts search results to documents + originating in a particular country. + - ``ie`` parameter: sets the character encoding scheme that should + be used to interpret the query string ('utf8'). + - ``oe`` parameter: sets the character encoding scheme that should + be used to decode the XML result ('utf8'). + headers: Py-Dictionary with additional HTTP headers (can be passed to request's headers) + + - ``Accept: '*/*`` + """ + ret_val = { 'language': None, 'country': None, 'subdomain': None, 'params': {}, 'headers': {}, + 'cookies': {}, + 'locale': None, } - # language ... + sxng_locale = params.get('searxng_locale', 'all') + try: + locale = babel.Locale.parse(sxng_locale, sep='-') + except babel.core.UnknownLocaleError: + locale = None - _lang = params['language'] - _any_language = _lang.lower() == 'all' - if _any_language: - _lang = 'en-US' - language = match_language(_lang, lang_list, custom_aliases) - ret_val['language'] = language + eng_lang = eng_traits.get_language(sxng_locale, 'lang_en') + lang_code = eng_lang.split('_')[-1] # lang_zh-TW --> zh-TW / lang_en --> en + country = eng_traits.get_region(sxng_locale, eng_traits.all_locale) - # country ... + # Test zh_hans & zh_hant --> in the topmost links in the result list of list + # TW and HK you should a find wiktionary.org zh_hant link. In the result + # list of zh-CN should not be no hant link instead you should find + # zh.m.wikipedia.org/zh somewhere in the top. - _l = _lang.split('-') - if len(_l) == 2: - country = _l[1] - else: - country = _l[0].upper() - if country == 'EN': - country = 'US' + # '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5 + # '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5 + + ret_val['language'] = eng_lang ret_val['country'] = country - - # subdomain ... - - ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') - - # params & headers - - lang_country = '%s-%s' % (language, country) # (en-US, en-EN, de-DE, de-AU, fr-FR ..) + ret_val['locale'] = locale + ret_val['subdomain'] = eng_traits.custom['supported_domains'].get(country.upper(), 'www.google.com') # hl parameter: - # https://developers.google.com/custom-search/docs/xml_results#hlsp The - # Interface Language: + # The hl parameter specifies the interface language (host language) of + # your user interface. To improve the performance and the quality of your + # search results, you are strongly encouraged to set this parameter + # explicitly. + # https://developers.google.com/custom-search/docs/xml_results#hlsp + # The Interface Language: # https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages - ret_val['params']['hl'] = lang_list.get(lang_country, language) + ret_val['params']['hl'] = lang_code # lr parameter: # The lr (language restrict) parameter restricts search results to @@ -218,22 +183,72 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): # https://developers.google.com/custom-search/docs/xml_results#lrsp # Language Collection Values: # https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections + # + # To select 'all' languages an empty 'lr' value is used. + # + # Different to other google services, Google Schloar supports to select more + # than one language. The languages are seperated by a pipe '|' (logical OR). + # By example: &lr=lang_zh-TW%7Clang_de selects articles written in + # traditional chinese OR german language. - if _any_language and supported_any_language: + ret_val['params']['lr'] = eng_lang + if sxng_locale == 'all': + ret_val['params']['lr'] = '' - # interpretation is left up to Google (based on whoogle) - # - # - add parameter ``source=lnt`` - # - don't use parameter ``lr`` - # - don't add a ``Accept-Language`` HTTP header. + # cr parameter: + # The cr parameter restricts search results to documents originating in a + # particular country. + # https://developers.google.com/custom-search/docs/xml_results#crsp - ret_val['params']['source'] = 'lnt' + ret_val['params']['cr'] = 'country' + country + if sxng_locale == 'all': + ret_val['params']['cr'] = '' - else: + # gl parameter: (mandatory by Geeogle News) + # The gl parameter value is a two-letter country code. For WebSearch + # results, the gl parameter boosts search results whose country of origin + # matches the parameter value. See the Country Codes section for a list of + # valid values. + # Specifying a gl parameter value in WebSearch requests should improve the + # relevance of results. This is particularly true for international + # customers and, even more specifically, for customers in English-speaking + # countries other than the United States. + # https://developers.google.com/custom-search/docs/xml_results#glsp - # restricts search results to documents written in a particular - # language. - ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language) + ret_val['params']['gl'] = country + + # ie parameter: + # The ie parameter sets the character encoding scheme that should be used + # to interpret the query string. The default ie value is latin1. + # https://developers.google.com/custom-search/docs/xml_results#iesp + + ret_val['params']['ie'] = 'utf8' + + # oe parameter: + # The oe parameter sets the character encoding scheme that should be used + # to decode the XML result. The default oe value is latin1. + # https://developers.google.com/custom-search/docs/xml_results#oesp + + ret_val['params']['oe'] = 'utf8' + + # num parameter: + # The num parameter identifies the number of search results to return. + # The default num value is 10, and the maximum value is 20. If you request + # more than 20 results, only 20 results will be returned. + # https://developers.google.com/custom-search/docs/xml_results#numsp + + # HINT: seems to have no effect (tested in google WEB & Images) + # ret_val['params']['num'] = 20 + + # HTTP headers + + ret_val['headers']['Accept'] = '*/*' + + # Cookies + + # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746 + # - https://github.com/searxng/searxng/issues/1555 + ret_val['cookies']['CONSENT'] = "YES+" return ret_val @@ -245,33 +260,34 @@ def detect_google_sorry(resp): def request(query, params): """Google search request""" - + # pylint: disable=line-too-long offset = (params['pageno'] - 1) * 10 - - lang_info = get_lang_info(params, supported_languages, language_aliases, True) - - additional_parameters = {} - if use_mobile_ui: - additional_parameters = { - 'asearch': 'arc', - 'async': 'use_ac:true,_fmt:prog', - } + google_info = get_google_info(params, traits) # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium query_url = ( 'https://' - + lang_info['subdomain'] + + google_info['subdomain'] + '/search' + "?" + urlencode( { 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'start': offset, + **google_info['params'], 'filter': '0', - **additional_parameters, + 'start': offset, + # 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i', + # 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG', + # 'cs' : 1, + # 'sa': 'N', + # 'yv': 3, + # 'prmd': 'vin', + # 'ei': 'GASaY6TxOcy_xc8PtYeY6AE', + # 'sa': 'N', + # 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg' + # formally known as use_mobile_ui + 'asearch': 'arc', + 'async': UI_ASYNC, } ) ) @@ -282,25 +298,45 @@ def request(query, params): query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) params['url'] = query_url - params['cookies']['CONSENT'] = "YES+" - params['headers'].update(lang_info['headers']) - if use_mobile_ui: - params['headers']['Accept'] = '*/*' - else: - params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - + params['cookies'] = google_info['cookies'] + params['headers'].update(google_info['headers']) return params +# (function(){var s=' ... +# ... DX/Ff5XSpSgdU32xSlKDJ//9k\x3d';var ii=['dimg_21'];_setImagesSrc(ii,s);})(); +RE_DATA_IMAGE = re.compile(r"'(data:image[^']*)'[^']*ii=\['([^']*)'\];_setImagesSrc") + + +def _parse_data_images(dom): + data_image_map = {} + for _script in eval_xpath_list(dom, "//script[@nonce]"): + script = _script.text + if not script: + continue + script = RE_DATA_IMAGE.search(script) + if not script: + continue + data_image_map[script.group(2)] = script.group(1).replace(r'\x3d', '=') + logger.debug('data:image objects --> %s', list(data_image_map.keys())) + return data_image_map + + def response(resp): """Get response from google's search request""" - + # pylint: disable=too-many-branches, too-many-statements detect_google_sorry(resp) results = [] # convert the text to dom dom = html.fromstring(resp.text) + + data_image_map = [] + if '_fmt:html' in UI_ASYNC: + # in this format images are embedded by a bse64 encoded 'data:image' + data_image_map = _parse_data_images(dom) + # results --> answer answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]') if answer_list: @@ -309,20 +345,9 @@ def response(resp): else: logger.debug("did not find 'answer'") - # results --> number_of_results - if not use_mobile_ui: - try: - _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0) - _digit = ''.join([n for n in _txt if n.isdigit()]) - number_of_results = int(_digit) - results.append({'number_of_results': number_of_results}) - except Exception as e: # pylint: disable=broad-except - logger.debug("did not 'number_of_results'") - logger.error(e, exc_info=True) - # parse results - for result in eval_xpath_list(dom, results_xpath): + for result in eval_xpath_list(dom, results_xpath): # pylint: disable=too-many-nested-blocks # google *sections* if extract_text(eval_xpath(result, g_section_with_header)): @@ -339,13 +364,31 @@ def response(resp): url = eval_xpath_getindex(result, href_xpath, 0, None) if url is None: continue - content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True) - if content is None: + + content = [] + img_list = [] + for content_feature in eval_xpath(result, content_xpath): + val = content_feature.attrib['data-content-feature'] + if val in ['1', '2']: + txt = extract_text(content_feature, allow_none=True) + if txt: + content.append(txt) + elif '0' in val: + img = content_feature.xpath('.//img/@src') + if img: + img = img[0] + if img.startswith('data:image'): + img_id = content_feature.xpath('.//img/@id') + if img_id: + img = data_image_map.get(img_id[0]) + img_list.append(img) + + if not content: logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title) continue - - logger.debug('add link to results: %s', title) - results.append({'url': url, 'title': title, 'content': content}) + content = ' / '.join(content) + img_src = img_list[0] if img_list else None + results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src}) except Exception as e: # pylint: disable=broad-except logger.error(e, exc_info=True) @@ -361,15 +404,107 @@ def response(resp): # get supported languages from their site -def _fetch_supported_languages(resp): - ret_val = {} + + +skip_countries = [ + # official language of google-country not in google-languages + 'AL', # Albanien (sq) + 'AZ', # Aserbaidschan (az) + 'BD', # Bangladesch (bn) + 'BN', # Brunei Darussalam (ms) + 'BT', # Bhutan (dz) + 'ET', # Äthiopien (am) + 'GE', # Georgien (ka, os) + 'GL', # Grönland (kl) + 'KH', # Kambodscha (km) + 'LA', # Laos (lo) + 'LK', # Sri Lanka (si, ta) + 'ME', # Montenegro (sr) + 'MK', # Nordmazedonien (mk, sq) + 'MM', # Myanmar (my) + 'MN', # Mongolei (mn) + 'MV', # Malediven (dv) // dv_MV is unknown by babel + 'MY', # Malaysia (ms) + 'NP', # Nepal (ne) + 'TJ', # Tadschikistan (tg) + 'TM', # Turkmenistan (tk) + 'UZ', # Usbekistan (uz) +] + + +def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True): + """Fetch languages from Google.""" + # pylint: disable=import-outside-toplevel, too-many-branches + + engine_traits.custom['supported_domains'] = {} + + resp = network.get('https://www.google.com/preferences') + if not resp.ok: + raise RuntimeError("Response from Google's preferences is not OK.") + dom = html.fromstring(resp.text) - radio_buttons = eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]') + # supported language codes - for x in radio_buttons: - name = x.get("data-name") - code = x.get("value").split('_')[-1] - ret_val[code] = {"name": name} + lang_map = {'no': 'nb'} + for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'): - return ret_val + eng_lang = x.get("value").split('_')[-1] + try: + locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-') + except babel.UnknownLocaleError: + print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang)) + continue + sxng_lang = language_tag(locale) + + conflict = engine_traits.languages.get(sxng_lang) + if conflict: + if conflict != eng_lang: + print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang)) + continue + engine_traits.languages[sxng_lang] = 'lang_' + eng_lang + + # alias languages + engine_traits.languages['zh'] = 'lang_zh-CN' + + # supported region codes + + for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'): + eng_country = x.get("value") + + if eng_country in skip_countries: + continue + if eng_country == 'ZZ': + engine_traits.all_locale = 'ZZ' + continue + + sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True) + + if not sxng_locales: + print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country)) + continue + + for sxng_locale in sxng_locales: + engine_traits.regions[region_tag(sxng_locale)] = eng_country + + # alias regions + engine_traits.regions['zh-CN'] = 'HK' + + # supported domains + + if add_domains: + resp = network.get('https://www.google.com/supported_domains') + if not resp.ok: + raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.") + + for domain in resp.text.split(): + domain = domain.strip() + if not domain or domain in [ + '.google.com', + ]: + continue + region = domain.split('.')[-1].upper() + engine_traits.custom['supported_domains'][region] = 'www' + domain + if region == 'HK': + # There is no google.cn, we use .com.hk for zh-CN + engine_traits.custom['supported_domains']['CN'] = 'www' + domain diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 528f8d21d..e6445b1c4 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -1,31 +1,38 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google images engine using the google -internal API used the Google Go Android app. +"""This is the implementation of the Google Images engine using the internal +Google API used by the Google Go Android app. This internal API offer results in -- JSON (_fmt:json) -- Protobuf (_fmt:pb) -- Protobuf compressed? (_fmt:pc) -- HTML (_fmt:html) -- Protobuf encoded in JSON (_fmt:jspb). +- JSON (``_fmt:json``) +- Protobuf_ (``_fmt:pb``) +- Protobuf_ compressed? (``_fmt:pc``) +- HTML (``_fmt:html``) +- Protobuf_ encoded in JSON (``_fmt:jspb``). +.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers """ +from typing import TYPE_CHECKING + from urllib.parse import urlencode from json import loads +from searx.engines.google import fetch_traits # pylint: disable=unused-import from searx.engines.google import ( - get_lang_info, + get_google_info, time_range_dict, detect_google_sorry, ) -# pylint: disable=unused-import -from searx.engines.google import supported_languages_url, _fetch_supported_languages +if TYPE_CHECKING: + import logging + from searx.enginelib.traits import EngineTraits + + logger: logging.Logger + traits: EngineTraits -# pylint: enable=unused-import # about about = { @@ -40,7 +47,6 @@ about = { # engine dependent config categories = ['images', 'web'] paging = True -use_locale_domain = True time_range_support = True safesearch = True send_accept_language_header = True @@ -51,20 +57,18 @@ filter_mapping = {0: 'images', 1: 'active', 2: 'active'} def request(query, params): """Google-Image search request""" - lang_info = get_lang_info(params, supported_languages, language_aliases, False) + google_info = get_google_info(params, traits) query_url = ( 'https://' - + lang_info['subdomain'] + + google_info['subdomain'] + '/search' + "?" + urlencode( { 'q': query, 'tbm': "isch", - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", + **google_info['params'], 'asearch': 'isch', 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']), } @@ -77,9 +81,8 @@ def request(query, params): query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) params['url'] = query_url - params['headers'].update(lang_info['headers']) - params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip' - params['headers']['Accept'] = '*/*' + params['cookies'] = google_info['cookies'] + params['headers'].update(google_info['headers']) return params @@ -111,7 +114,11 @@ def response(resp): copyright_notice = item["result"].get('iptc', {}).get('copyright_notice') if copyright_notice: - result_item['source'] += ' / ' + copyright_notice + result_item['source'] += ' | ' + copyright_notice + + freshness_date = item["result"].get("freshness_date") + if freshness_date: + result_item['source'] += ' | ' + freshness_date file_size = item.get('gsa', {}).get('file_size') if file_size: diff --git a/searx/engines/google_internal_search.py b/searx/engines/google_internal_search.py deleted file mode 100644 index 2fa07e1ab..000000000 --- a/searx/engines/google_internal_search.py +++ /dev/null @@ -1,529 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -# lint: pylint -"""This is the implementation of the google WEB engine using the google internal API used on the mobile UI. -This internal API offer results in -- JSON (_fmt:json) -- Protobuf (_fmt:pb) -- Protobuf compressed? (_fmt:pc) -- HTML (_fmt:html) -- Protobuf encoded in JSON (_fmt:jspb). - -Some of this implementations are shared by other engines: -The implementation is shared by other engines: - -- :ref:`google images internal engine` -- :ref:`google news internal engine` -- :ref:`google videos internal engine` - -""" - -from urllib.parse import urlencode -from json import loads, dumps -from datetime import datetime, timedelta -from dateutil.tz import tzoffset -from babel.dates import format_datetime -import babel -from searx.utils import html_to_text - -# pylint: disable=unused-import -from searx.engines.google import ( - get_lang_info, - detect_google_sorry, - supported_languages_url, - time_range_dict, - filter_mapping, - _fetch_supported_languages, -) - -# pylint: enable=unused-import - -# about -about = { - "website": 'https://www.google.com', - "wikidata_id": 'Q9366', - "official_api_documentation": 'https://developers.google.com/custom-search/', - "use_official_api": False, - "require_api_key": False, - "results": 'JSON', -} - -# engine dependent config -categories = None -paging = True -time_range_support = True -safesearch = True -send_accept_language_header = True - -# configuration -include_image_results = True -include_twitter_results = False - - -def get_query_url_general(query, lang_info, query_params): - return ( - 'https://' - + lang_info['subdomain'] - + '/search' - + "?" - + urlencode( - { - 'q': query, - **query_params, - } - ) - ) - - -def get_query_url_images(query, lang_info, query_params): - # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium - return ( - 'https://' - + lang_info['subdomain'] - + '/search' - + "?" - + urlencode( - { - 'q': query, - 'tbm': "isch", - **query_params, - } - ) - ) - - -def get_query_url_news(query, lang_info, query_params): - return ( - 'https://' - + lang_info['subdomain'] - + '/search' - + "?" - + urlencode( - { - 'q': query, - 'tbm': "nws", - **query_params, - } - ) - ) - - -CATEGORY_TO_GET_QUERY_URL = { - 'general': get_query_url_general, - 'images': get_query_url_images, - 'news': get_query_url_news, -} - -CATEGORY_RESULT_COUNT_PER_PAGE = { - 'general': 10, - 'images': 100, - 'news': 10, -} - - -def request(query, params): - """Google search request""" - - result_count_per_page = CATEGORY_RESULT_COUNT_PER_PAGE[categories[0]] # pylint: disable=unsubscriptable-object - - offset = (params['pageno'] - 1) * result_count_per_page - - lang_info = get_lang_info(params, supported_languages, language_aliases, True) - - query_params = { - **lang_info['params'], - 'ie': 'utf8', - 'oe': 'utf8', - 'start': offset, - 'num': result_count_per_page, - 'filter': '0', - 'asearch': 'arc', - 'async': 'use_ac:true,_fmt:json', - } - - get_query_url = CATEGORY_TO_GET_QUERY_URL[categories[0]] # pylint: disable=unsubscriptable-object - - # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium - query_url = get_query_url(query, lang_info, query_params) - - if params['time_range'] in time_range_dict: - query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) - if params['safesearch']: - query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) - params['url'] = query_url - - params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = '*/*' - - return params - - -def parse_search_feature_proto(search_feature_proto): - result_index = search_feature_proto["feature_metadata"]["logging_tree_ref_feature_metadata_extension"][ - "result_index" - ] - image_result_data = search_feature_proto["payload"]["image_result_data"] - title = html_to_text(image_result_data["page_title"]) - content = html_to_text(image_result_data.get("snippet", "")) - url = image_result_data["coupled_url"] - img_src = image_result_data["url"] - thumbnail_src = "https://encrypted-tbn0.gstatic.com/images?q=tbn:" + image_result_data["encrypted_docid"] - img_format = f'{image_result_data["full_image_size"]["width"]} * {image_result_data["full_image_size"]["height"]}' - - iptc = image_result_data.get("iptc_info", {}).get("iptc", {}) - copyright_notice = iptc.get("copyright_notice") - creator = iptc.get("creator") - if isinstance(creator, list): - creator = ", ".join(creator) - if creator and copyright_notice and creator != copyright_notice: - author = f'{creator} ; {copyright_notice}' - else: - author = creator - return { - "template": "images.html", - "title": title, - "content": content, - "url": url, - "img_src": img_src, - "thumbnail_src": thumbnail_src, - 'img_format': img_format, - "author": author, - "result_index": result_index, - } - - -class ParseResultGroupItem: - """Parse result_group_search_feature_proto.search_feature_proto""" - - def __init__(self, locale): - """Parse one tier 1 result""" - self.locale = locale - self.item_types = { - "EXPLORE_UNIVERSAL_BLOCK": self.explore_universal_block, - "HOST_CLUSTER": self.host_cluster, - "NAVIGATIONAL_RESULT_GROUP": self.navigational_result_group, - "VIDEO_RESULT": self.video_result, - "VIDEO_UNIVERSAL_GROUP": self.video_universal_group, - "WEB_RESULT": self.web_result, - "WEB_ANSWERS_CARD_BLOCK": self.web_answers_card_block, - "IMAGE_RESULT_GROUP": self.image_result_group, - "TWITTER_RESULT_GROUP": self.twitter_result_group, - "NEWS_WHOLEPAGE": self.news_wholepage, - # WHOLEPAGE_PAGE_GROUP - found for keyword what is t in English language - # EXPLORE_UNIVERSAL_BLOCK - # TRAVEL_ANSWERS_RESULT - # TOP_STORIES : news.html template - # ONEBOX_BLOCK: for example, result of math forumla, weather ... - } - - def explore_universal_block(self, item_to_parse): - results = [] - for item in item_to_parse["explore_universal_unit_sfp_interface"]: - explore_unit = item["explore_block_extension"]["payload"]["explore_unit"] - if "lookup_key" in explore_unit: - results.append( - {'suggestion': html_to_text(explore_unit["lookup_key"]["aquarium_query"]), 'result_index': -1} - ) - elif "label" in explore_unit: - results.append({'suggestion': html_to_text(explore_unit["label"]["text"]), 'result_index': -1}) - return results - - def host_cluster(self, item_to_parse): - results = [] - for navigational_result in item_to_parse["results"]: - result_index = navigational_result["web_result_inner"]["feature_metadata"][ - "logging_tree_ref_feature_metadata_extension" - ]["result_index"] - url = None - title = None - content = None - - for item in navigational_result["payload"]["sub_features"]["sub_feature"]: - payload = item["search_feature_proto"]["payload"] - if "primary_link" in payload: - primary_link = payload["primary_link"] - title = html_to_text(primary_link["title"]) - url = primary_link["url"] - if "snippet_text" in payload: - content = html_to_text(payload["snippet_text"]) - results.append({'url': url, 'title': title, 'content': content, 'result_index': result_index}) - # to do: parse additional results - return results - - def navigational_result_group(self, item_to_parse): - results = [] - navigational_result = item_to_parse["navigational_result"] - result_index = navigational_result["navigational_result_inner"]["feature_metadata"][ - "logging_tree_ref_feature_metadata_extension" - ]["result_index"] - url = None - title = None - content = None - - for item in navigational_result["payload"]["sub_features"]["sub_feature"]: - payload = item["search_feature_proto"]["payload"] - if "primary_link" in payload: - primary_link = payload["primary_link"] - title = html_to_text(primary_link["title"]) - url = primary_link["url"] - if "snippet_text" in payload: - content = html_to_text(payload["snippet_text"]) - results.append({'url': url, 'title': title, 'content': content, 'result_index': result_index}) - - for item in item_to_parse["megasitelinks"]["results"]: - result_data = item["payload"]["result_data"] - url = result_data["url"] - title = html_to_text(result_data["result_title"]) - content = html_to_text(result_data["snippet"]) - result_index = item["feature_metadata"]["logging_tree_ref_feature_metadata_extension"]["result_index"] - results.append({'url': url, 'title': title, 'content': content, 'result_index': result_index}) - - return results - - def video_result(self, item_to_parse): - result_index = item_to_parse["feature_metadata"]["logging_tree_ref_feature_metadata_extension"]["result_index"] - url = None - title = None - - for item in item_to_parse["payload"]["sub_features"]["sub_feature"]: - payload = item["search_feature_proto"]["payload"] - if "primary_link" in payload: - primary_link = payload["primary_link"] - title = html_to_text(primary_link["title"]) - url = primary_link["url"] - - return [{'url': url, 'title': title, 'result_index': result_index}] - - def video_universal_group(self, item_to_parse): - results = [] - - for item in item_to_parse["video_universal_group_element"]: - video_result = item["video_result"] - result_index = video_result["feature_metadata"]["logging_tree_ref_feature_metadata_extension"][ - "result_index" - ] - video_result_data = video_result["payload"]["video_result_data"] - url = video_result_data["url"] - title = html_to_text(video_result_data["title"]) - content = html_to_text(video_result_data["snippet"]) - results.append({'url': url, 'title': title, 'content': content, 'result_index': result_index}) - - return results - - def web_result(self, item_to_parse): - result_index = item_to_parse["web_result_inner"]["feature_metadata"][ - "logging_tree_ref_feature_metadata_extension" - ]["result_index"] - url = None - title = None - content = None - - for item in item_to_parse["payload"]["sub_features"]["sub_feature"]: - payload = item["search_feature_proto"]["payload"] - if "primary_link" in payload: - primary_link = payload["primary_link"] - title = html_to_text(primary_link["title"]) - url = primary_link["url"] - if "snippet_text" in payload: - content = html_to_text(payload["snippet_text"]) - - return [{'url': url, 'title': title, 'content': content, 'result_index': result_index}] - - def web_answers_card_block(self, item_to_parse): - results = [] - - for item in item_to_parse["web_answers_card_block_elements"]: - answer = None - url = None - for item_webanswers in item["webanswers_container"]["webanswers_container_elements"]: - if ( - "web_answers_result" in item_webanswers - and "text" in item_webanswers["web_answers_result"]["payload"] - ): - answer = html_to_text(item_webanswers["web_answers_result"]["payload"]["text"]) - if "web_answers_standard_result" in item_webanswers: - primary_link = item_webanswers["web_answers_standard_result"]["payload"]["standard_result"][ - "primary_link" - ] - url = primary_link["url"] - - results.append({'answer': answer, 'url': url, 'result_index': -1}) - - return results - - def twitter_result_group(self, item_to_parse): - results = [] - if not include_twitter_results: - return results - - result_index = item_to_parse["twitter_carousel_header"]["feature_metadata"][ - "logging_tree_ref_feature_metadata_extension" - ]["result_index"] - for item in item_to_parse["twitter_cards"]: - profile_payload = item["profile_link"]["payload"]["author"] - results.append( - { - "title": profile_payload["display_name"], - "url": profile_payload["profile_page_url"], - "result_index": result_index, - } - ) - - return results - - def image_result_group(self, item_to_parse): - results = [] - if not include_image_results: - return results - - for item in item_to_parse["image_result_group_element"]: - results.append(parse_search_feature_proto(item["image_result"])) - return results - - def news_wholepage(self, item_to_parse): - """Parse a news search result""" - - def iter_snippets(): - """Iterate over all the results, yield result_index, snippet to deal with nested structured""" - result_index = 0 - for item in item_to_parse["element"]: - if "news_singleton_result_group" in item: - payload = item["news_singleton_result_group"]["result"]["payload"]["liquid_item_data"] - yield result_index, payload["article"]["stream_simplified_snippet"] - result_index += 1 - continue - - if "top_coverage" in item: - for element in item["top_coverage"]["element"]: - yield result_index, element["result"]["payload"]["liquid_item_data"]["article"][ - "stream_simplified_snippet" - ] - result_index += 1 - continue - - if "news_sports_hub_result_group" in item: - for element in item["news_sports_hub_result_group"]["element"]: - yield result_index, element["result"]["payload"]["liquid_item_data"]["article"][ - "stream_simplified_snippet" - ] - result_index += 1 - continue - - if "news_topic_hub_refinements_result_group" in item: - for ref_list in item["news_topic_hub_refinements_result_group"]["refinements"]["refinement_list"]: - for result in ref_list["results"]: - yield result_index, result["payload"]["liquid_item_data"]["article"][ - "stream_simplified_snippet" - ] - result_index += 1 - continue - - print("unknow news", item) - - results = [] - for result_index, snippet in iter_snippets(): - publishedDate = snippet["date"]["timestamp"] - url = snippet["url"]["result_url"] - title = html_to_text(snippet["title"]["text"]) - content = html_to_text(snippet["snippet"]["snippet"]) - img_src = snippet.get("thumbnail_info", {}).get("sffe_50k_thumbnail_url") - results.append( - { - 'url': url, - 'title': title, - 'content': content, - 'img_src': img_src, - 'publishedDate': datetime.fromtimestamp(publishedDate), - "result_index": result_index, - } - ) - return results - - -class ParseResultItem: # pylint: disable=too-few-public-methods - """Parse result_search_feature_proto.search_feature_proto""" - - def __init__(self, locale): - self.locale = locale - self.item_types = { - "LOCAL_TIME": self.local_time, - "IMAGE_RESULT": self.image_result, - } - - def local_time(self, item_to_parse): - """Query like 'time in auckland' or 'time' - Note: localized_location reveal the location of the server - """ - seconds_utc = item_to_parse["payload"]["current_time"]["seconds_utc"] - timezones_0 = item_to_parse["payload"]["target_location"]["timezones"][0] - iana_timezone = timezones_0["iana_timezone"] - localized_location = timezones_0["localized_location"] - # parse timezone_abbrev_specific to create result_tz - # timezone_abbrev_specific for India is "UTC+5:30" and for New York is "UTC−4" - # the values for offsets are respectively ["5", "30", "0"] and ["-4": "0"] - timezone_abbrev_specific = timezones_0["timezone_abbrev_specific"] - offsets = timezone_abbrev_specific.replace("UTC", "").replace("GMT", "").replace("−", "-").split(":") - offsets.append("0") - result_tz = tzoffset(iana_timezone, timedelta(hours=int(offsets[0]), minutes=int(offsets[1]))) - result_dt = datetime.fromtimestamp(seconds_utc, tz=result_tz) - result_dt_str = format_datetime(result_dt, 'long', tzinfo=result_tz, locale=self.locale) - answer = f"{result_dt_str} ( {localized_location} )" - return [{'answer': answer, 'result_index': -1}] - - def image_result(self, item_to_parse): - return [parse_search_feature_proto(item_to_parse)] - - -def parse_web_results_list(json_data, locale): - results = [] - - tier_1_search_results = json_data["arcResponse"]["search_results"]["tier_1_search_results"] - results_list = tier_1_search_results["result_list"]["item"] - - if "spell_suggestion" in tier_1_search_results: - spell_suggestion = tier_1_search_results["spell_suggestion"] - if "spell_column" in spell_suggestion: - for spell_suggestion in tier_1_search_results["spell_suggestion"]["spell_column"]: - for spell_link in spell_suggestion["spell_link"]: - results.append({'correction': spell_link["raw_corrected_query"], 'result_index': -1}) - elif "full_page" in spell_suggestion: - results.append({'correction': spell_suggestion["full_page"]["raw_query"], 'result_index': -1}) - - parseResultItem = ParseResultItem(locale) - parseResultGroupItem = ParseResultGroupItem(locale) - for item in results_list: - if "result_group" in item: - result_item = item["result_group"] - result_item_extension = result_item["result_group_extension"] - elif "result" in item: - result_item = item["result"] - result_item_extension = result_item["result_extension"] - one_namespace_type = result_item_extension["one_namespace_type"] - if one_namespace_type in parseResultGroupItem.item_types and "result_group_search_feature_proto" in result_item: - search_feature_proto = result_item["result_group_search_feature_proto"]["search_feature_proto"] - results = results + parseResultGroupItem.item_types[one_namespace_type](search_feature_proto) - elif one_namespace_type in parseResultItem.item_types and "result_search_feature_proto" in result_item: - search_feature_proto = result_item["result_search_feature_proto"]["search_feature_proto"] - results = results + parseResultItem.item_types[one_namespace_type](search_feature_proto) - elif "result_group_search_feature_proto" in result_item: - print(dumps(one_namespace_type)) - - return sorted(results, key=lambda d: d['result_index']) - - -def response(resp): - """Get response from google's search request""" - - detect_google_sorry(resp) - - language = resp.search_params["language"] - locale = 'en' - try: - locale = babel.Locale.parse(language, sep='-') - except babel.core.UnknownLocaleError: - pass - - # only the 2nd line has the JSON content - response_2nd_line = resp.text.split("\n", 1)[1] - json_data = loads(response_2nd_line) - - return parse_web_results_list(json_data, locale) diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 1ada2d64d..ae55ca9cb 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -1,24 +1,40 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google news engine. The google news API -ignores some parameters from the common :ref:`google API`: +"""This is the implementation of the Google News engine. -- num_ : the number of search results is ignored +Google News has a different region handling compared to Google WEB. + +- the ``ceid`` argument has to be set (:py:obj:`ceid_list`) +- the hl_ argument has to be set correctly (and different to Google WEB) +- the gl_ argument is mandatory + +If one of this argument is not set correctly, the request is redirected to +CONSENT dialog:: + + https://consent.google.com/m?continue= + +The google news API ignores some parameters from the common :ref:`google API`: + +- num_ : the number of search results is ignored / there is no paging all + results for a query term are in the first response. - save_ : is ignored / Google-News results are always *SafeSearch* +.. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp +.. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp - """ -# pylint: disable=invalid-name +from typing import TYPE_CHECKING import binascii import re from urllib.parse import urlencode from base64 import b64decode from lxml import html +import babel +from searx import locales from searx.utils import ( eval_xpath, eval_xpath_list, @@ -26,18 +42,19 @@ from searx.utils import ( extract_text, ) -# pylint: disable=unused-import +from searx.engines.google import fetch_traits as _fetch_traits # pylint: disable=unused-import from searx.engines.google import ( - supported_languages_url, - _fetch_supported_languages, -) - -# pylint: enable=unused-import - -from searx.engines.google import ( - get_lang_info, + get_google_info, detect_google_sorry, ) +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -49,70 +66,77 @@ about = { "results": 'HTML', } -# compared to other google engines google-news has a different time range -# support. The time range is included in the search term. -time_range_dict = { - 'day': 'when:1d', - 'week': 'when:7d', - 'month': 'when:1m', - 'year': 'when:1y', -} - # engine dependent config - categories = ['news'] paging = False -use_locale_domain = True -time_range_support = True +time_range_support = False # Google-News results are always *SafeSearch*. Option 'safesearch' is set to # False here, otherwise checker will report safesearch-errors:: # # safesearch : results are identitical for safesearch=0 and safesearch=2 -safesearch = False -send_accept_language_header = True +safesearch = True +# send_accept_language_header = True def request(query, params): """Google-News search request""" - lang_info = get_lang_info(params, supported_languages, language_aliases, False) + sxng_locale = params.get('searxng_locale', 'en-US') + ceid = locales.get_engine_locale(sxng_locale, traits.custom['ceid'], default='US:en') + google_info = get_google_info(params, traits) + google_info['subdomain'] = 'news.google.com' # google news has only one domain - # google news has only one domain - lang_info['subdomain'] = 'news.google.com' + ceid_region, ceid_lang = ceid.split(':') + ceid_lang, ceid_suffix = ( + ceid_lang.split('-') + + [ + None, + ] + )[:2] - ceid = "%s:%s" % (lang_info['country'], lang_info['language']) + google_info['params']['hl'] = ceid_lang - # google news redirects en to en-US - if lang_info['params']['hl'] == 'en': - lang_info['params']['hl'] = 'en-US' + if ceid_suffix and ceid_suffix not in ['Hans', 'Hant']: - # Very special to google-news compared to other google engines, the time - # range is included in the search term. - if params['time_range']: - query += ' ' + time_range_dict[params['time_range']] + if ceid_region.lower() == ceid_lang: + google_info['params']['hl'] = ceid_lang + '-' + ceid_region + else: + google_info['params']['hl'] = ceid_lang + '-' + ceid_suffix + + elif ceid_region.lower() != ceid_lang: + + if ceid_region in ['AT', 'BE', 'CH', 'IL', 'SA', 'IN', 'BD', 'PT']: + google_info['params']['hl'] = ceid_lang + else: + google_info['params']['hl'] = ceid_lang + '-' + ceid_region + + google_info['params']['lr'] = 'lang_' + ceid_lang.split('-')[0] + google_info['params']['gl'] = ceid_region query_url = ( 'https://' - + lang_info['subdomain'] - + '/search' - + "?" - + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'gl': lang_info['country']}) + + google_info['subdomain'] + + "/search?" + + urlencode( + { + 'q': query, + **google_info['params'], + } + ) + # ceid includes a ':' character which must not be urlencoded + ('&ceid=%s' % ceid) - ) # ceid includes a ':' character which must not be urlencoded + ) + params['url'] = query_url - - params['cookies']['CONSENT'] = "YES+" - params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - + params['cookies'] = google_info['cookies'] + params['headers'].update(google_info['headers']) return params def response(resp): """Get response from google's search request""" results = [] - detect_google_sorry(resp) # convert the text to dom @@ -152,8 +176,8 @@ def response(resp): # The pub_date is mostly a string like 'yesertday', not a real # timezone date or time. Therefore we can't use publishedDate. - pub_date = extract_text(eval_xpath(result, './article/div[1]/div[1]/time')) - pub_origin = extract_text(eval_xpath(result, './article/div[1]/div[1]/a')) + pub_date = extract_text(eval_xpath(result, './article//time')) + pub_origin = extract_text(eval_xpath(result, './article//a[@data-n-tid]')) content = ' / '.join([x for x in [pub_origin, pub_date] if x]) @@ -174,3 +198,127 @@ def response(resp): # return results return results + + +ceid_list = [ + 'AE:ar', + 'AR:es-419', + 'AT:de', + 'AU:en', + 'BD:bn', + 'BE:fr', + 'BE:nl', + 'BG:bg', + 'BR:pt-419', + 'BW:en', + 'CA:en', + 'CA:fr', + 'CH:de', + 'CH:fr', + 'CL:es-419', + 'CN:zh-Hans', + 'CO:es-419', + 'CU:es-419', + 'CZ:cs', + 'DE:de', + 'EG:ar', + 'ES:es', + 'ET:en', + 'FR:fr', + 'GB:en', + 'GH:en', + 'GR:el', + 'HK:zh-Hant', + 'HU:hu', + 'ID:en', + 'ID:id', + 'IE:en', + 'IL:en', + 'IL:he', + 'IN:bn', + 'IN:en', + 'IN:hi', + 'IN:ml', + 'IN:mr', + 'IN:ta', + 'IN:te', + 'IT:it', + 'JP:ja', + 'KE:en', + 'KR:ko', + 'LB:ar', + 'LT:lt', + 'LV:en', + 'LV:lv', + 'MA:fr', + 'MX:es-419', + 'MY:en', + 'NA:en', + 'NG:en', + 'NL:nl', + 'NO:no', + 'NZ:en', + 'PE:es-419', + 'PH:en', + 'PK:en', + 'PL:pl', + 'PT:pt-150', + 'RO:ro', + 'RS:sr', + 'RU:ru', + 'SA:ar', + 'SE:sv', + 'SG:en', + 'SI:sl', + 'SK:sk', + 'SN:fr', + 'TH:th', + 'TR:tr', + 'TW:zh-Hant', + 'TZ:en', + 'UA:ru', + 'UA:uk', + 'UG:en', + 'US:en', + 'US:es-419', + 'VE:es-419', + 'VN:vi', + 'ZA:en', + 'ZW:en', +] +"""List of region/language combinations supported by Google News. Values of the +``ceid`` argument of the Google News REST API.""" + + +_skip_values = [ + 'ET:en', # english (ethiopia) + 'ID:en', # english (indonesia) + 'LV:en', # english (latvia) +] + +_ceid_locale_map = {'NO:no': 'nb-NO'} + + +def fetch_traits(engine_traits: EngineTraits): + _fetch_traits(engine_traits, add_domains=False) + + engine_traits.custom['ceid'] = {} + + for ceid in ceid_list: + if ceid in _skip_values: + continue + + region, lang = ceid.split(':') + x = lang.split('-') + if len(x) > 1: + if x[1] not in ['Hant', 'Hans']: + lang = x[0] + + sxng_locale = _ceid_locale_map.get(ceid, lang + '-' + region) + try: + locale = babel.Locale.parse(sxng_locale, sep='-') + except babel.UnknownLocaleError: + print("ERROR: %s -> %s is unknown by babel" % (ceid, sxng_locale)) + continue + + engine_traits.custom['ceid'][locales.region_tag(locale)] = ceid diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index c07cd4cea..6f33d1e1a 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -1,19 +1,18 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Google (Scholar) +"""This is the implementation of the Google Scholar engine. -For detailed description of the *REST-full* API see: `Query Parameter -Definitions`_. - -.. _Query Parameter Definitions: - https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions +Compared to other Google services the Scholar engine has a simple GET REST-API +and there does not exists `async` API. Even though the API slightly vintage we +can make use of the :ref:`google API` to assemble the arguments of the GET +request. """ -# pylint: disable=invalid-name +from typing import TYPE_CHECKING +from typing import Optional from urllib.parse import urlencode from datetime import datetime -from typing import Optional from lxml import html from searx.utils import ( @@ -23,19 +22,21 @@ from searx.utils import ( extract_text, ) +from searx.exceptions import SearxEngineCaptchaException + +from searx.engines.google import fetch_traits # pylint: disable=unused-import from searx.engines.google import ( - get_lang_info, + get_google_info, time_range_dict, - detect_google_sorry, ) +from searx.enginelib.traits import EngineTraits -# pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url, - _fetch_supported_languages, -) +if TYPE_CHECKING: + import logging -# pylint: enable=unused-import + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -51,53 +52,62 @@ about = { categories = ['science', 'scientific publications'] paging = True language_support = True -use_locale_domain = True time_range_support = True safesearch = False send_accept_language_header = True -def time_range_url(params): - """Returns a URL query component for a google-Scholar time range based on - ``params['time_range']``. Google-Scholar does only support ranges in years. - To have any effect, all the Searx ranges (*day*, *week*, *month*, *year*) - are mapped to *year*. If no range is set, an empty string is returned. - Example:: +def time_range_args(params): + """Returns a dictionary with a time range arguments based on + ``params['time_range']``. + + Google Scholar supports a detailed search by year. Searching by *last + month* or *last week* (as offered by SearXNG) is uncommon for scientific + publications and is not supported by Google Scholar. + + To limit the result list when the users selects a range, all the SearXNG + ranges (*day*, *week*, *month*, *year*) are mapped to *year*. If no range + is set an empty dictionary of arguments is returned. Example; when + user selects a time range (current year minus one in 2022): + + .. code:: python + + { 'as_ylo' : 2021 } - &as_ylo=2019 """ - # as_ylo=2016&as_yhi=2019 - ret_val = '' + ret_val = {} if params['time_range'] in time_range_dict: - ret_val = urlencode({'as_ylo': datetime.now().year - 1}) - return '&' + ret_val + ret_val['as_ylo'] = datetime.now().year - 1 + return ret_val + + +def detect_google_captcha(dom): + """In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is + not redirected to ``sorry.google.com``. + """ + if eval_xpath(dom, "//form[@id='gs_captcha_f']"): + raise SearxEngineCaptchaException() def request(query, params): """Google-Scholar search request""" - offset = (params['pageno'] - 1) * 10 - lang_info = get_lang_info(params, supported_languages, language_aliases, False) - + google_info = get_google_info(params, traits) # subdomain is: scholar.google.xy - lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.") + google_info['subdomain'] = google_info['subdomain'].replace("www.", "scholar.") - query_url = ( - 'https://' - + lang_info['subdomain'] - + '/scholar' - + "?" - + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start': offset}) - ) + args = { + 'q': query, + **google_info['params'], + 'start': (params['pageno'] - 1) * 10, + 'as_sdt': '2007', # include patents / to disable set '0,5' + 'as_vis': '0', # include citations / to disable set '1' + } + args.update(time_range_args(params)) - query_url += time_range_url(params) - params['url'] = query_url - - params['cookies']['CONSENT'] = "YES+" - params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - - # params['google_subdomain'] = subdomain + params['url'] = 'https://' + google_info['subdomain'] + '/scholar?' + urlencode(args) + params['cookies'] = google_info['cookies'] + params['headers'].update(google_info['headers']) return params @@ -138,19 +148,15 @@ def parse_gs_a(text: Optional[str]): def response(resp): # pylint: disable=too-many-locals - """Get response from google's search request""" + """Parse response from Google Scholar""" results = [] - detect_google_sorry(resp) - - # which subdomain ? - # subdomain = resp.search_params.get('google_subdomain') - # convert the text to dom dom = html.fromstring(resp.text) + detect_google_captcha(dom) # parse results - for result in eval_xpath_list(dom, '//div[@data-cid]'): + for result in eval_xpath_list(dom, '//div[@data-rp]'): title = extract_text(eval_xpath(result, './/h3[1]//a')) @@ -158,7 +164,7 @@ def response(resp): # pylint: disable=too-many-locals # this is a [ZITATION] block continue - pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]')) + pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]')) if pub_type: pub_type = pub_type[1:-1].lower() diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index fc574bd48..985189df5 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google videos engine. +"""This is the implementation of the Google Videos engine. .. admonition:: Content-Security-Policy (CSP) @@ -14,9 +14,8 @@ """ -# pylint: disable=invalid-name +from typing import TYPE_CHECKING -import re from urllib.parse import urlencode from lxml import html @@ -27,20 +26,22 @@ from searx.utils import ( extract_text, ) +from searx.engines.google import fetch_traits # pylint: disable=unused-import from searx.engines.google import ( - get_lang_info, + get_google_info, time_range_dict, filter_mapping, - g_section_with_header, - title_xpath, suggestion_xpath, detect_google_sorry, ) +from searx.enginelib.traits import EngineTraits -# pylint: disable=unused-import -from searx.engines.google import supported_languages_url, _fetch_supported_languages +if TYPE_CHECKING: + import logging -# pylint: enable=unused-import + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -55,70 +56,32 @@ about = { # engine dependent config categories = ['videos', 'web'] -paging = False +paging = True language_support = True -use_locale_domain = True time_range_support = True safesearch = True -send_accept_language_header = True - -RE_CACHE = {} - - -def _re(regexpr): - """returns compiled regular expression""" - RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr)) - return RE_CACHE[regexpr] - - -def scrap_out_thumbs_src(dom): - ret_val = {} - thumb_name = 'dimg_' - for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'): - _script = script.text - # "dimg_35":"https://i.ytimg.c....", - _dimurl = _re("s='([^']*)").findall(_script) - for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script): - v = v.replace(r'\u003d', '=') - v = v.replace(r'\u0026', '&') - ret_val[k] = v - logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys()) - return ret_val - - -def scrap_out_thumbs(dom): - """Scrap out thumbnail data from