forked from zaclys/searxng
		
	[mod] DuckDuckGo: fetch engine traits (data_type: supported_languages)
Implements a fetch_traits function for the DuckDuckGo engines. .. note:: Does not include migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object! Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									ef143729a0
								
							
						
					
					
						commit
						dba8977b09
					
				
					 4 changed files with 266 additions and 13 deletions
				
			
		| 
						 | 
				
			
			@ -2124,11 +2124,73 @@
 | 
			
		|||
    ]
 | 
			
		||||
  },
 | 
			
		||||
  "ddg definitions": {
 | 
			
		||||
    "all_locale": null,
 | 
			
		||||
    "all_locale": "wt-wt",
 | 
			
		||||
    "custom": {},
 | 
			
		||||
    "data_type": "supported_languages",
 | 
			
		||||
    "languages": {},
 | 
			
		||||
    "regions": {},
 | 
			
		||||
    "regions": {
 | 
			
		||||
      "ar-SA": "xa-ar",
 | 
			
		||||
      "bg-BG": "bg-bg",
 | 
			
		||||
      "ca-ES": "es-ca",
 | 
			
		||||
      "cs-CZ": "cz-cs",
 | 
			
		||||
      "da-DK": "dk-da",
 | 
			
		||||
      "de-AT": "at-de",
 | 
			
		||||
      "de-CH": "ch-de",
 | 
			
		||||
      "de-DE": "de-de",
 | 
			
		||||
      "el-GR": "gr-el",
 | 
			
		||||
      "en-AU": "au-en",
 | 
			
		||||
      "en-CA": "ca-en",
 | 
			
		||||
      "en-GB": "uk-en",
 | 
			
		||||
      "en-IE": "ie-en",
 | 
			
		||||
      "en-IL": "il-en",
 | 
			
		||||
      "en-IN": "in-en",
 | 
			
		||||
      "en-MY": "my-en",
 | 
			
		||||
      "en-NZ": "nz-en",
 | 
			
		||||
      "en-PH": "ph-en",
 | 
			
		||||
      "en-PK": "pk-en",
 | 
			
		||||
      "en-SG": "sg-en",
 | 
			
		||||
      "en-US": "us-en",
 | 
			
		||||
      "en-ZA": "za-en",
 | 
			
		||||
      "es-AR": "ar-es",
 | 
			
		||||
      "es-CL": "cl-es",
 | 
			
		||||
      "es-CO": "co-es",
 | 
			
		||||
      "es-ES": "es-es",
 | 
			
		||||
      "es-MX": "mx-es",
 | 
			
		||||
      "es-PE": "pe-es",
 | 
			
		||||
      "es-US": "us-es",
 | 
			
		||||
      "et-EE": "ee-et",
 | 
			
		||||
      "fi-FI": "fi-fi",
 | 
			
		||||
      "fr-BE": "be-fr",
 | 
			
		||||
      "fr-CA": "ca-fr",
 | 
			
		||||
      "fr-CH": "ch-fr",
 | 
			
		||||
      "fr-FR": "fr-fr",
 | 
			
		||||
      "hr-HR": "hr-hr",
 | 
			
		||||
      "hu-HU": "hu-hu",
 | 
			
		||||
      "id-ID": "id-en",
 | 
			
		||||
      "it-IT": "it-it",
 | 
			
		||||
      "ja-JP": "jp-jp",
 | 
			
		||||
      "ko-KR": "kr-kr",
 | 
			
		||||
      "lt-LT": "lt-lt",
 | 
			
		||||
      "lv-LV": "lv-lv",
 | 
			
		||||
      "nb-NO": "no-no",
 | 
			
		||||
      "nl-BE": "be-nl",
 | 
			
		||||
      "nl-NL": "nl-nl",
 | 
			
		||||
      "pl-PL": "pl-pl",
 | 
			
		||||
      "pt-BR": "br-pt",
 | 
			
		||||
      "pt-PT": "pt-pt",
 | 
			
		||||
      "ro-RO": "ro-ro",
 | 
			
		||||
      "ru-RU": "ru-ru",
 | 
			
		||||
      "sk-SK": "sk-sk",
 | 
			
		||||
      "sl-SI": "sl-sl",
 | 
			
		||||
      "sv-SE": "se-sv",
 | 
			
		||||
      "th-TH": "th-en",
 | 
			
		||||
      "tr-TR": "tr-tr",
 | 
			
		||||
      "uk-UA": "ua-uk",
 | 
			
		||||
      "vi-VN": "vn-en",
 | 
			
		||||
      "zh-CN": "cn-zh",
 | 
			
		||||
      "zh-HK": "hk-tzh",
 | 
			
		||||
      "zh-TW": "tw-tzh"
 | 
			
		||||
    },
 | 
			
		||||
    "supported_languages": [
 | 
			
		||||
      "ar-XA",
 | 
			
		||||
      "bg-BG",
 | 
			
		||||
| 
						 | 
				
			
			@ -2196,11 +2258,73 @@
 | 
			
		|||
    ]
 | 
			
		||||
  },
 | 
			
		||||
  "duckduckgo": {
 | 
			
		||||
    "all_locale": null,
 | 
			
		||||
    "all_locale": "wt-wt",
 | 
			
		||||
    "custom": {},
 | 
			
		||||
    "data_type": "supported_languages",
 | 
			
		||||
    "languages": {},
 | 
			
		||||
    "regions": {},
 | 
			
		||||
    "regions": {
 | 
			
		||||
      "ar-SA": "xa-ar",
 | 
			
		||||
      "bg-BG": "bg-bg",
 | 
			
		||||
      "ca-ES": "es-ca",
 | 
			
		||||
      "cs-CZ": "cz-cs",
 | 
			
		||||
      "da-DK": "dk-da",
 | 
			
		||||
      "de-AT": "at-de",
 | 
			
		||||
      "de-CH": "ch-de",
 | 
			
		||||
      "de-DE": "de-de",
 | 
			
		||||
      "el-GR": "gr-el",
 | 
			
		||||
      "en-AU": "au-en",
 | 
			
		||||
      "en-CA": "ca-en",
 | 
			
		||||
      "en-GB": "uk-en",
 | 
			
		||||
      "en-IE": "ie-en",
 | 
			
		||||
      "en-IL": "il-en",
 | 
			
		||||
      "en-IN": "in-en",
 | 
			
		||||
      "en-MY": "my-en",
 | 
			
		||||
      "en-NZ": "nz-en",
 | 
			
		||||
      "en-PH": "ph-en",
 | 
			
		||||
      "en-PK": "pk-en",
 | 
			
		||||
      "en-SG": "sg-en",
 | 
			
		||||
      "en-US": "us-en",
 | 
			
		||||
      "en-ZA": "za-en",
 | 
			
		||||
      "es-AR": "ar-es",
 | 
			
		||||
      "es-CL": "cl-es",
 | 
			
		||||
      "es-CO": "co-es",
 | 
			
		||||
      "es-ES": "es-es",
 | 
			
		||||
      "es-MX": "mx-es",
 | 
			
		||||
      "es-PE": "pe-es",
 | 
			
		||||
      "es-US": "us-es",
 | 
			
		||||
      "et-EE": "ee-et",
 | 
			
		||||
      "fi-FI": "fi-fi",
 | 
			
		||||
      "fr-BE": "be-fr",
 | 
			
		||||
      "fr-CA": "ca-fr",
 | 
			
		||||
      "fr-CH": "ch-fr",
 | 
			
		||||
      "fr-FR": "fr-fr",
 | 
			
		||||
      "hr-HR": "hr-hr",
 | 
			
		||||
      "hu-HU": "hu-hu",
 | 
			
		||||
      "id-ID": "id-en",
 | 
			
		||||
      "it-IT": "it-it",
 | 
			
		||||
      "ja-JP": "jp-jp",
 | 
			
		||||
      "ko-KR": "kr-kr",
 | 
			
		||||
      "lt-LT": "lt-lt",
 | 
			
		||||
      "lv-LV": "lv-lv",
 | 
			
		||||
      "nb-NO": "no-no",
 | 
			
		||||
      "nl-BE": "be-nl",
 | 
			
		||||
      "nl-NL": "nl-nl",
 | 
			
		||||
      "pl-PL": "pl-pl",
 | 
			
		||||
      "pt-BR": "br-pt",
 | 
			
		||||
      "pt-PT": "pt-pt",
 | 
			
		||||
      "ro-RO": "ro-ro",
 | 
			
		||||
      "ru-RU": "ru-ru",
 | 
			
		||||
      "sk-SK": "sk-sk",
 | 
			
		||||
      "sl-SI": "sl-sl",
 | 
			
		||||
      "sv-SE": "se-sv",
 | 
			
		||||
      "th-TH": "th-en",
 | 
			
		||||
      "tr-TR": "tr-tr",
 | 
			
		||||
      "uk-UA": "ua-uk",
 | 
			
		||||
      "vi-VN": "vn-en",
 | 
			
		||||
      "zh-CN": "cn-zh",
 | 
			
		||||
      "zh-HK": "hk-tzh",
 | 
			
		||||
      "zh-TW": "tw-tzh"
 | 
			
		||||
    },
 | 
			
		||||
    "supported_languages": [
 | 
			
		||||
      "ar-XA",
 | 
			
		||||
      "bg-BG",
 | 
			
		||||
| 
						 | 
				
			
			@ -2268,11 +2392,73 @@
 | 
			
		|||
    ]
 | 
			
		||||
  },
 | 
			
		||||
  "duckduckgo images": {
 | 
			
		||||
    "all_locale": null,
 | 
			
		||||
    "all_locale": "wt-wt",
 | 
			
		||||
    "custom": {},
 | 
			
		||||
    "data_type": "supported_languages",
 | 
			
		||||
    "languages": {},
 | 
			
		||||
    "regions": {},
 | 
			
		||||
    "regions": {
 | 
			
		||||
      "ar-SA": "xa-ar",
 | 
			
		||||
      "bg-BG": "bg-bg",
 | 
			
		||||
      "ca-ES": "es-ca",
 | 
			
		||||
      "cs-CZ": "cz-cs",
 | 
			
		||||
      "da-DK": "dk-da",
 | 
			
		||||
      "de-AT": "at-de",
 | 
			
		||||
      "de-CH": "ch-de",
 | 
			
		||||
      "de-DE": "de-de",
 | 
			
		||||
      "el-GR": "gr-el",
 | 
			
		||||
      "en-AU": "au-en",
 | 
			
		||||
      "en-CA": "ca-en",
 | 
			
		||||
      "en-GB": "uk-en",
 | 
			
		||||
      "en-IE": "ie-en",
 | 
			
		||||
      "en-IL": "il-en",
 | 
			
		||||
      "en-IN": "in-en",
 | 
			
		||||
      "en-MY": "my-en",
 | 
			
		||||
      "en-NZ": "nz-en",
 | 
			
		||||
      "en-PH": "ph-en",
 | 
			
		||||
      "en-PK": "pk-en",
 | 
			
		||||
      "en-SG": "sg-en",
 | 
			
		||||
      "en-US": "us-en",
 | 
			
		||||
      "en-ZA": "za-en",
 | 
			
		||||
      "es-AR": "ar-es",
 | 
			
		||||
      "es-CL": "cl-es",
 | 
			
		||||
      "es-CO": "co-es",
 | 
			
		||||
      "es-ES": "es-es",
 | 
			
		||||
      "es-MX": "mx-es",
 | 
			
		||||
      "es-PE": "pe-es",
 | 
			
		||||
      "es-US": "us-es",
 | 
			
		||||
      "et-EE": "ee-et",
 | 
			
		||||
      "fi-FI": "fi-fi",
 | 
			
		||||
      "fr-BE": "be-fr",
 | 
			
		||||
      "fr-CA": "ca-fr",
 | 
			
		||||
      "fr-CH": "ch-fr",
 | 
			
		||||
      "fr-FR": "fr-fr",
 | 
			
		||||
      "hr-HR": "hr-hr",
 | 
			
		||||
      "hu-HU": "hu-hu",
 | 
			
		||||
      "id-ID": "id-en",
 | 
			
		||||
      "it-IT": "it-it",
 | 
			
		||||
      "ja-JP": "jp-jp",
 | 
			
		||||
      "ko-KR": "kr-kr",
 | 
			
		||||
      "lt-LT": "lt-lt",
 | 
			
		||||
      "lv-LV": "lv-lv",
 | 
			
		||||
      "nb-NO": "no-no",
 | 
			
		||||
      "nl-BE": "be-nl",
 | 
			
		||||
      "nl-NL": "nl-nl",
 | 
			
		||||
      "pl-PL": "pl-pl",
 | 
			
		||||
      "pt-BR": "br-pt",
 | 
			
		||||
      "pt-PT": "pt-pt",
 | 
			
		||||
      "ro-RO": "ro-ro",
 | 
			
		||||
      "ru-RU": "ru-ru",
 | 
			
		||||
      "sk-SK": "sk-sk",
 | 
			
		||||
      "sl-SI": "sl-sl",
 | 
			
		||||
      "sv-SE": "se-sv",
 | 
			
		||||
      "th-TH": "th-en",
 | 
			
		||||
      "tr-TR": "tr-tr",
 | 
			
		||||
      "uk-UA": "ua-uk",
 | 
			
		||||
      "vi-VN": "vn-en",
 | 
			
		||||
      "zh-CN": "cn-zh",
 | 
			
		||||
      "zh-HK": "hk-tzh",
 | 
			
		||||
      "zh-TW": "tw-tzh"
 | 
			
		||||
    },
 | 
			
		||||
    "supported_languages": [
 | 
			
		||||
      "ar-XA",
 | 
			
		||||
      "bg-BG",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,9 +3,8 @@
 | 
			
		|||
"""DuckDuckGo Lite
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from json import loads
 | 
			
		||||
 | 
			
		||||
from lxml.html import fromstring
 | 
			
		||||
import json
 | 
			
		||||
from lxml import html
 | 
			
		||||
 | 
			
		||||
from searx.utils import (
 | 
			
		||||
    dict_subset,
 | 
			
		||||
| 
						 | 
				
			
			@ -14,7 +13,10 @@ from searx.utils import (
 | 
			
		|||
    extract_text,
 | 
			
		||||
    match_language,
 | 
			
		||||
)
 | 
			
		||||
from searx.network import get
 | 
			
		||||
from searx import network
 | 
			
		||||
from searx.enginelib.traits import EngineTraits
 | 
			
		||||
 | 
			
		||||
traits: EngineTraits
 | 
			
		||||
 | 
			
		||||
# about
 | 
			
		||||
about = {
 | 
			
		||||
| 
						 | 
				
			
			@ -120,13 +122,13 @@ def request(query, params):
 | 
			
		|||
def response(resp):
 | 
			
		||||
 | 
			
		||||
    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
 | 
			
		||||
    get(url_ping, headers=headers_ping)
 | 
			
		||||
    network.get(url_ping, headers=headers_ping)
 | 
			
		||||
 | 
			
		||||
    if resp.status_code == 303:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    results = []
 | 
			
		||||
    doc = fromstring(resp.text)
 | 
			
		||||
    doc = html.fromstring(resp.text)
 | 
			
		||||
 | 
			
		||||
    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
 | 
			
		||||
    if not len(result_table) >= 3:
 | 
			
		||||
| 
						 | 
				
			
			@ -180,7 +182,70 @@ def _fetch_supported_languages(resp):
 | 
			
		|||
    response_page = response_page[response_page.find('regions:{') + 8 :]
 | 
			
		||||
    response_page = response_page[: response_page.find('}') + 1]
 | 
			
		||||
 | 
			
		||||
    regions_json = loads(response_page)
 | 
			
		||||
    regions_json = json.loads(response_page)
 | 
			
		||||
    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
 | 
			
		||||
 | 
			
		||||
    return list(supported_languages)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def fetch_traits(engine_traits: EngineTraits):
 | 
			
		||||
    """Fetch regions from DuckDuckGo."""
 | 
			
		||||
    # pylint: disable=import-outside-toplevel
 | 
			
		||||
 | 
			
		||||
    engine_traits.data_type = 'supported_languages'  # deprecated
 | 
			
		||||
 | 
			
		||||
    import babel
 | 
			
		||||
    from searx.locales import region_tag
 | 
			
		||||
 | 
			
		||||
    engine_traits.all_locale = 'wt-wt'
 | 
			
		||||
 | 
			
		||||
    resp = network.get('https://duckduckgo.com/util/u588.js')
 | 
			
		||||
    if not resp.ok:
 | 
			
		||||
        print("ERROR: response from DuckDuckGo is not OK.")
 | 
			
		||||
 | 
			
		||||
    pos = resp.text.find('regions:{') + 8
 | 
			
		||||
    js_code = resp.text[pos:]
 | 
			
		||||
    pos = js_code.find('}') + 1
 | 
			
		||||
    regions = json.loads(js_code[:pos])
 | 
			
		||||
 | 
			
		||||
    reg_map = {
 | 
			
		||||
        'tw-tzh': 'zh_TW',
 | 
			
		||||
        'hk-tzh': 'zh_HK',
 | 
			
		||||
        'ct-ca': 'skip',  # ct-ca and es-ca both map to ca_ES
 | 
			
		||||
        'es-ca': 'ca_ES',
 | 
			
		||||
        'id-en': 'id_ID',
 | 
			
		||||
        'no-no': 'nb_NO',
 | 
			
		||||
        'jp-jp': 'ja_JP',
 | 
			
		||||
        'kr-kr': 'ko_KR',
 | 
			
		||||
        'xa-ar': 'ar_SA',
 | 
			
		||||
        'sl-sl': 'sl_SI',
 | 
			
		||||
        'th-en': 'th_TH',
 | 
			
		||||
        'vn-en': 'vi_VN',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for eng_tag, name in regions.items():
 | 
			
		||||
 | 
			
		||||
        if eng_tag == 'wt-wt':
 | 
			
		||||
            engine_traits.all_locale = 'wt-wt'
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        region = reg_map.get(eng_tag)
 | 
			
		||||
        if region == 'skip':
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        if not region:
 | 
			
		||||
            eng_territory, eng_lang = eng_tag.split('-')
 | 
			
		||||
            region = eng_lang + '_' + eng_territory.upper()
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            sxng_tag = region_tag(babel.Locale.parse(region))
 | 
			
		||||
        except babel.UnknownLocaleError:
 | 
			
		||||
            print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        conflict = engine_traits.regions.get(sxng_tag)
 | 
			
		||||
        if conflict:
 | 
			
		||||
            if conflict != eng_tag:
 | 
			
		||||
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
 | 
			
		||||
            continue
 | 
			
		||||
        engine_traits.regions[sxng_tag] = eng_tag
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,6 +11,7 @@ from lxml import html
 | 
			
		|||
from searx.data import WIKIDATA_UNITS
 | 
			
		||||
from searx.engines.duckduckgo import language_aliases
 | 
			
		||||
from searx.engines.duckduckgo import (  # pylint: disable=unused-import
 | 
			
		||||
    fetch_traits,
 | 
			
		||||
    _fetch_supported_languages,
 | 
			
		||||
    supported_languages_url,
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,6 +8,7 @@ from urllib.parse import urlencode
 | 
			
		|||
from searx.exceptions import SearxEngineAPIException
 | 
			
		||||
from searx.engines.duckduckgo import get_region_code
 | 
			
		||||
from searx.engines.duckduckgo import (  # pylint: disable=unused-import
 | 
			
		||||
    fetch_traits,
 | 
			
		||||
    _fetch_supported_languages,
 | 
			
		||||
    supported_languages_url,
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue