mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[fix] fix match_language issue to make zh-TW match to zh-Hant-TW
pybabel separates locales with underscores but we use hyphens everywhere babel doesn't directly touch
This commit is contained in:
		
							parent
							
								
									4cc1ee8565
								
							
						
					
					
						commit
						66b7be0965
					
				
					 3 changed files with 32 additions and 6 deletions
				
			
		|  | @ -369,6 +369,16 @@ def _get_lang_to_lc_dict(lang_list): | |||
|     return value | ||||
| 
 | ||||
| 
 | ||||
| # babel's get_global contains all sorts of miscellaneous locale and territory related data | ||||
| # see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py | ||||
| def _get_from_babel(lang_code, key): | ||||
|     match = get_global(key).get(lang_code.replace('-', '_')) | ||||
|     # for some keys, such as territory_aliases, match may be a list | ||||
|     if isinstance(match, str): | ||||
|         return match.replace('_', '-') | ||||
|     return match | ||||
| 
 | ||||
| 
 | ||||
| def _match_language(lang_code, lang_list=[], custom_aliases={}):  # pylint: disable=W0102 | ||||
|     """auxiliary function to match lang_code in lang_list""" | ||||
|     # replace language code with a custom alias if necessary | ||||
|  | @ -379,9 +389,11 @@ def _match_language(lang_code, lang_list=[], custom_aliases={}):  # pylint: disa | |||
|         return lang_code | ||||
| 
 | ||||
|     # try to get the most likely country for this language | ||||
|     subtags = get_global('likely_subtags').get(lang_code) | ||||
|     subtags = _get_from_babel(lang_code, 'likely_subtags') | ||||
|     if subtags: | ||||
|         subtag_parts = subtags.split('_') | ||||
|         if subtags in lang_list: | ||||
|             return subtags | ||||
|         subtag_parts = subtags.split('-') | ||||
|         new_code = subtag_parts[0] + '-' + subtag_parts[-1] | ||||
|         if new_code in custom_aliases: | ||||
|             new_code = custom_aliases[new_code] | ||||
|  | @ -402,16 +414,22 @@ def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US | |||
|     locale_parts = locale_code.split('-') | ||||
|     lang_code = locale_parts[0] | ||||
| 
 | ||||
|     # if locale_code has script, try matching without it | ||||
|     if len(locale_parts) > 2: | ||||
|         language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases) | ||||
|         if language: | ||||
|             return language | ||||
| 
 | ||||
|     # try to get language using an equivalent country code | ||||
|     if len(locale_parts) > 1: | ||||
|         country_alias = get_global('territory_aliases').get(locale_parts[-1]) | ||||
|         country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases') | ||||
|         if country_alias: | ||||
|             language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases) | ||||
|             if language: | ||||
|                 return language | ||||
| 
 | ||||
|     # try to get language using an equivalent language code | ||||
|     alias = get_global('language_aliases').get(lang_code) | ||||
|     alias = _get_from_babel(lang_code, 'language_aliases') | ||||
|     if alias: | ||||
|         language = _match_language(alias, lang_list, custom_aliases) | ||||
|         if language: | ||||
|  |  | |||
|  | @ -92,6 +92,14 @@ class TestUtils(SearxTestCase): | |||
|         self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback') | ||||
|         self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp') | ||||
| 
 | ||||
|         # handle script tags | ||||
|         self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN') | ||||
|         self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW') | ||||
|         self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN') | ||||
|         self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW') | ||||
|         self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN') | ||||
|         self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW') | ||||
| 
 | ||||
|         aliases = {'en-GB': 'en-UK', 'he': 'iw'} | ||||
| 
 | ||||
|         # guess country | ||||
|  |  | |||
|  | @ -211,12 +211,12 @@ class ViewsTestCase(SearxTestCase): | |||
|         result = self.app.get('/preferences', headers={'Accept-Language': 'zh-tw;q=0.8'}) | ||||
|         self.assertEqual(result.status_code, 200) | ||||
|         self.assertIn( | ||||
|             b'<option value="zh_TW" selected="selected">', | ||||
|             b'<option value="zh_Hant_TW" selected="selected">', | ||||
|             result.data, | ||||
|             'Interface locale ignored browser preference.' | ||||
|         ) | ||||
|         self.assertIn( | ||||
|             b'<option value="zh-TW" selected="selected">', | ||||
|             b'<option value="zh_Hant_TW" selected="selected">', | ||||
|             result.data, | ||||
|             'Search language ignored browser preference.' | ||||
|         ) | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Marc Abonce Seguin
						Marc Abonce Seguin