mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[mod] DuckDuckGo: fetch engine traits (data_type: supported_languages)
Implements a fetch_traits function for the DuckDuckGo engines. .. note:: Does not include migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object! Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									ef143729a0
								
							
						
					
					
						commit
						dba8977b09
					
				
					 4 changed files with 266 additions and 13 deletions
				
			
		|  | @ -2124,11 +2124,73 @@ | |||
|     ] | ||||
|   }, | ||||
|   "ddg definitions": { | ||||
|     "all_locale": null, | ||||
|     "all_locale": "wt-wt", | ||||
|     "custom": {}, | ||||
|     "data_type": "supported_languages", | ||||
|     "languages": {}, | ||||
|     "regions": {}, | ||||
|     "regions": { | ||||
|       "ar-SA": "xa-ar", | ||||
|       "bg-BG": "bg-bg", | ||||
|       "ca-ES": "es-ca", | ||||
|       "cs-CZ": "cz-cs", | ||||
|       "da-DK": "dk-da", | ||||
|       "de-AT": "at-de", | ||||
|       "de-CH": "ch-de", | ||||
|       "de-DE": "de-de", | ||||
|       "el-GR": "gr-el", | ||||
|       "en-AU": "au-en", | ||||
|       "en-CA": "ca-en", | ||||
|       "en-GB": "uk-en", | ||||
|       "en-IE": "ie-en", | ||||
|       "en-IL": "il-en", | ||||
|       "en-IN": "in-en", | ||||
|       "en-MY": "my-en", | ||||
|       "en-NZ": "nz-en", | ||||
|       "en-PH": "ph-en", | ||||
|       "en-PK": "pk-en", | ||||
|       "en-SG": "sg-en", | ||||
|       "en-US": "us-en", | ||||
|       "en-ZA": "za-en", | ||||
|       "es-AR": "ar-es", | ||||
|       "es-CL": "cl-es", | ||||
|       "es-CO": "co-es", | ||||
|       "es-ES": "es-es", | ||||
|       "es-MX": "mx-es", | ||||
|       "es-PE": "pe-es", | ||||
|       "es-US": "us-es", | ||||
|       "et-EE": "ee-et", | ||||
|       "fi-FI": "fi-fi", | ||||
|       "fr-BE": "be-fr", | ||||
|       "fr-CA": "ca-fr", | ||||
|       "fr-CH": "ch-fr", | ||||
|       "fr-FR": "fr-fr", | ||||
|       "hr-HR": "hr-hr", | ||||
|       "hu-HU": "hu-hu", | ||||
|       "id-ID": "id-en", | ||||
|       "it-IT": "it-it", | ||||
|       "ja-JP": "jp-jp", | ||||
|       "ko-KR": "kr-kr", | ||||
|       "lt-LT": "lt-lt", | ||||
|       "lv-LV": "lv-lv", | ||||
|       "nb-NO": "no-no", | ||||
|       "nl-BE": "be-nl", | ||||
|       "nl-NL": "nl-nl", | ||||
|       "pl-PL": "pl-pl", | ||||
|       "pt-BR": "br-pt", | ||||
|       "pt-PT": "pt-pt", | ||||
|       "ro-RO": "ro-ro", | ||||
|       "ru-RU": "ru-ru", | ||||
|       "sk-SK": "sk-sk", | ||||
|       "sl-SI": "sl-sl", | ||||
|       "sv-SE": "se-sv", | ||||
|       "th-TH": "th-en", | ||||
|       "tr-TR": "tr-tr", | ||||
|       "uk-UA": "ua-uk", | ||||
|       "vi-VN": "vn-en", | ||||
|       "zh-CN": "cn-zh", | ||||
|       "zh-HK": "hk-tzh", | ||||
|       "zh-TW": "tw-tzh" | ||||
|     }, | ||||
|     "supported_languages": [ | ||||
|       "ar-XA", | ||||
|       "bg-BG", | ||||
|  | @ -2196,11 +2258,73 @@ | |||
|     ] | ||||
|   }, | ||||
|   "duckduckgo": { | ||||
|     "all_locale": null, | ||||
|     "all_locale": "wt-wt", | ||||
|     "custom": {}, | ||||
|     "data_type": "supported_languages", | ||||
|     "languages": {}, | ||||
|     "regions": {}, | ||||
|     "regions": { | ||||
|       "ar-SA": "xa-ar", | ||||
|       "bg-BG": "bg-bg", | ||||
|       "ca-ES": "es-ca", | ||||
|       "cs-CZ": "cz-cs", | ||||
|       "da-DK": "dk-da", | ||||
|       "de-AT": "at-de", | ||||
|       "de-CH": "ch-de", | ||||
|       "de-DE": "de-de", | ||||
|       "el-GR": "gr-el", | ||||
|       "en-AU": "au-en", | ||||
|       "en-CA": "ca-en", | ||||
|       "en-GB": "uk-en", | ||||
|       "en-IE": "ie-en", | ||||
|       "en-IL": "il-en", | ||||
|       "en-IN": "in-en", | ||||
|       "en-MY": "my-en", | ||||
|       "en-NZ": "nz-en", | ||||
|       "en-PH": "ph-en", | ||||
|       "en-PK": "pk-en", | ||||
|       "en-SG": "sg-en", | ||||
|       "en-US": "us-en", | ||||
|       "en-ZA": "za-en", | ||||
|       "es-AR": "ar-es", | ||||
|       "es-CL": "cl-es", | ||||
|       "es-CO": "co-es", | ||||
|       "es-ES": "es-es", | ||||
|       "es-MX": "mx-es", | ||||
|       "es-PE": "pe-es", | ||||
|       "es-US": "us-es", | ||||
|       "et-EE": "ee-et", | ||||
|       "fi-FI": "fi-fi", | ||||
|       "fr-BE": "be-fr", | ||||
|       "fr-CA": "ca-fr", | ||||
|       "fr-CH": "ch-fr", | ||||
|       "fr-FR": "fr-fr", | ||||
|       "hr-HR": "hr-hr", | ||||
|       "hu-HU": "hu-hu", | ||||
|       "id-ID": "id-en", | ||||
|       "it-IT": "it-it", | ||||
|       "ja-JP": "jp-jp", | ||||
|       "ko-KR": "kr-kr", | ||||
|       "lt-LT": "lt-lt", | ||||
|       "lv-LV": "lv-lv", | ||||
|       "nb-NO": "no-no", | ||||
|       "nl-BE": "be-nl", | ||||
|       "nl-NL": "nl-nl", | ||||
|       "pl-PL": "pl-pl", | ||||
|       "pt-BR": "br-pt", | ||||
|       "pt-PT": "pt-pt", | ||||
|       "ro-RO": "ro-ro", | ||||
|       "ru-RU": "ru-ru", | ||||
|       "sk-SK": "sk-sk", | ||||
|       "sl-SI": "sl-sl", | ||||
|       "sv-SE": "se-sv", | ||||
|       "th-TH": "th-en", | ||||
|       "tr-TR": "tr-tr", | ||||
|       "uk-UA": "ua-uk", | ||||
|       "vi-VN": "vn-en", | ||||
|       "zh-CN": "cn-zh", | ||||
|       "zh-HK": "hk-tzh", | ||||
|       "zh-TW": "tw-tzh" | ||||
|     }, | ||||
|     "supported_languages": [ | ||||
|       "ar-XA", | ||||
|       "bg-BG", | ||||
|  | @ -2268,11 +2392,73 @@ | |||
|     ] | ||||
|   }, | ||||
|   "duckduckgo images": { | ||||
|     "all_locale": null, | ||||
|     "all_locale": "wt-wt", | ||||
|     "custom": {}, | ||||
|     "data_type": "supported_languages", | ||||
|     "languages": {}, | ||||
|     "regions": {}, | ||||
|     "regions": { | ||||
|       "ar-SA": "xa-ar", | ||||
|       "bg-BG": "bg-bg", | ||||
|       "ca-ES": "es-ca", | ||||
|       "cs-CZ": "cz-cs", | ||||
|       "da-DK": "dk-da", | ||||
|       "de-AT": "at-de", | ||||
|       "de-CH": "ch-de", | ||||
|       "de-DE": "de-de", | ||||
|       "el-GR": "gr-el", | ||||
|       "en-AU": "au-en", | ||||
|       "en-CA": "ca-en", | ||||
|       "en-GB": "uk-en", | ||||
|       "en-IE": "ie-en", | ||||
|       "en-IL": "il-en", | ||||
|       "en-IN": "in-en", | ||||
|       "en-MY": "my-en", | ||||
|       "en-NZ": "nz-en", | ||||
|       "en-PH": "ph-en", | ||||
|       "en-PK": "pk-en", | ||||
|       "en-SG": "sg-en", | ||||
|       "en-US": "us-en", | ||||
|       "en-ZA": "za-en", | ||||
|       "es-AR": "ar-es", | ||||
|       "es-CL": "cl-es", | ||||
|       "es-CO": "co-es", | ||||
|       "es-ES": "es-es", | ||||
|       "es-MX": "mx-es", | ||||
|       "es-PE": "pe-es", | ||||
|       "es-US": "us-es", | ||||
|       "et-EE": "ee-et", | ||||
|       "fi-FI": "fi-fi", | ||||
|       "fr-BE": "be-fr", | ||||
|       "fr-CA": "ca-fr", | ||||
|       "fr-CH": "ch-fr", | ||||
|       "fr-FR": "fr-fr", | ||||
|       "hr-HR": "hr-hr", | ||||
|       "hu-HU": "hu-hu", | ||||
|       "id-ID": "id-en", | ||||
|       "it-IT": "it-it", | ||||
|       "ja-JP": "jp-jp", | ||||
|       "ko-KR": "kr-kr", | ||||
|       "lt-LT": "lt-lt", | ||||
|       "lv-LV": "lv-lv", | ||||
|       "nb-NO": "no-no", | ||||
|       "nl-BE": "be-nl", | ||||
|       "nl-NL": "nl-nl", | ||||
|       "pl-PL": "pl-pl", | ||||
|       "pt-BR": "br-pt", | ||||
|       "pt-PT": "pt-pt", | ||||
|       "ro-RO": "ro-ro", | ||||
|       "ru-RU": "ru-ru", | ||||
|       "sk-SK": "sk-sk", | ||||
|       "sl-SI": "sl-sl", | ||||
|       "sv-SE": "se-sv", | ||||
|       "th-TH": "th-en", | ||||
|       "tr-TR": "tr-tr", | ||||
|       "uk-UA": "ua-uk", | ||||
|       "vi-VN": "vn-en", | ||||
|       "zh-CN": "cn-zh", | ||||
|       "zh-HK": "hk-tzh", | ||||
|       "zh-TW": "tw-tzh" | ||||
|     }, | ||||
|     "supported_languages": [ | ||||
|       "ar-XA", | ||||
|       "bg-BG", | ||||
|  |  | |||
|  | @ -3,9 +3,8 @@ | |||
| """DuckDuckGo Lite | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| 
 | ||||
| from lxml.html import fromstring | ||||
| import json | ||||
| from lxml import html | ||||
| 
 | ||||
| from searx.utils import ( | ||||
|     dict_subset, | ||||
|  | @ -14,7 +13,10 @@ from searx.utils import ( | |||
|     extract_text, | ||||
|     match_language, | ||||
| ) | ||||
| from searx.network import get | ||||
| from searx import network | ||||
| from searx.enginelib.traits import EngineTraits | ||||
| 
 | ||||
| traits: EngineTraits | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|  | @ -120,13 +122,13 @@ def request(query, params): | |||
| def response(resp): | ||||
| 
 | ||||
|     headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie']) | ||||
|     get(url_ping, headers=headers_ping) | ||||
|     network.get(url_ping, headers=headers_ping) | ||||
| 
 | ||||
|     if resp.status_code == 303: | ||||
|         return [] | ||||
| 
 | ||||
|     results = [] | ||||
|     doc = fromstring(resp.text) | ||||
|     doc = html.fromstring(resp.text) | ||||
| 
 | ||||
|     result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table') | ||||
|     if not len(result_table) >= 3: | ||||
|  | @ -180,7 +182,70 @@ def _fetch_supported_languages(resp): | |||
|     response_page = response_page[response_page.find('regions:{') + 8 :] | ||||
|     response_page = response_page[: response_page.find('}') + 1] | ||||
| 
 | ||||
|     regions_json = loads(response_page) | ||||
|     regions_json = json.loads(response_page) | ||||
|     supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) | ||||
| 
 | ||||
|     return list(supported_languages) | ||||
| 
 | ||||
| 
 | ||||
| def fetch_traits(engine_traits: EngineTraits): | ||||
|     """Fetch regions from DuckDuckGo.""" | ||||
|     # pylint: disable=import-outside-toplevel | ||||
| 
 | ||||
|     engine_traits.data_type = 'supported_languages'  # deprecated | ||||
| 
 | ||||
|     import babel | ||||
|     from searx.locales import region_tag | ||||
| 
 | ||||
|     engine_traits.all_locale = 'wt-wt' | ||||
| 
 | ||||
|     resp = network.get('https://duckduckgo.com/util/u588.js') | ||||
|     if not resp.ok: | ||||
|         print("ERROR: response from DuckDuckGo is not OK.") | ||||
| 
 | ||||
|     pos = resp.text.find('regions:{') + 8 | ||||
|     js_code = resp.text[pos:] | ||||
|     pos = js_code.find('}') + 1 | ||||
|     regions = json.loads(js_code[:pos]) | ||||
| 
 | ||||
|     reg_map = { | ||||
|         'tw-tzh': 'zh_TW', | ||||
|         'hk-tzh': 'zh_HK', | ||||
|         'ct-ca': 'skip',  # ct-ca and es-ca both map to ca_ES | ||||
|         'es-ca': 'ca_ES', | ||||
|         'id-en': 'id_ID', | ||||
|         'no-no': 'nb_NO', | ||||
|         'jp-jp': 'ja_JP', | ||||
|         'kr-kr': 'ko_KR', | ||||
|         'xa-ar': 'ar_SA', | ||||
|         'sl-sl': 'sl_SI', | ||||
|         'th-en': 'th_TH', | ||||
|         'vn-en': 'vi_VN', | ||||
|     } | ||||
| 
 | ||||
|     for eng_tag, name in regions.items(): | ||||
| 
 | ||||
|         if eng_tag == 'wt-wt': | ||||
|             engine_traits.all_locale = 'wt-wt' | ||||
|             continue | ||||
| 
 | ||||
|         region = reg_map.get(eng_tag) | ||||
|         if region == 'skip': | ||||
|             continue | ||||
| 
 | ||||
|         if not region: | ||||
|             eng_territory, eng_lang = eng_tag.split('-') | ||||
|             region = eng_lang + '_' + eng_territory.upper() | ||||
| 
 | ||||
|         try: | ||||
|             sxng_tag = region_tag(babel.Locale.parse(region)) | ||||
|         except babel.UnknownLocaleError: | ||||
|             print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region)) | ||||
|             continue | ||||
| 
 | ||||
|         conflict = engine_traits.regions.get(sxng_tag) | ||||
|         if conflict: | ||||
|             if conflict != eng_tag: | ||||
|                 print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) | ||||
|             continue | ||||
|         engine_traits.regions[sxng_tag] = eng_tag | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ from lxml import html | |||
| from searx.data import WIKIDATA_UNITS | ||||
| from searx.engines.duckduckgo import language_aliases | ||||
| from searx.engines.duckduckgo import (  # pylint: disable=unused-import | ||||
|     fetch_traits, | ||||
|     _fetch_supported_languages, | ||||
|     supported_languages_url, | ||||
| ) | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ from urllib.parse import urlencode | |||
| from searx.exceptions import SearxEngineAPIException | ||||
| from searx.engines.duckduckgo import get_region_code | ||||
| from searx.engines.duckduckgo import (  # pylint: disable=unused-import | ||||
|     fetch_traits, | ||||
|     _fetch_supported_languages, | ||||
|     supported_languages_url, | ||||
| ) | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser