forked from zaclys/searxng
		
	Merge pull request #1866 from return42/fix-news
bugfix: google-news and bing-news has changed the language parameter
This commit is contained in:
		
						commit
						a5d3585a0c
					
				
					 10 changed files with 28391 additions and 27527 deletions
				
			
		
							
								
								
									
										13
									
								
								Makefile
									
										
									
									
									
								
							
							
						
						
									
										13
									
								
								Makefile
									
										
									
									
									
								
							|  | @ -27,6 +27,7 @@ help: | ||||||
| 	@echo  '  uninstall - uninstall (./local)' | 	@echo  '  uninstall - uninstall (./local)' | ||||||
| 	@echo  '  gh-pages  - build docs & deploy on gh-pages branch' | 	@echo  '  gh-pages  - build docs & deploy on gh-pages branch' | ||||||
| 	@echo  '  clean     - drop builds and environments' | 	@echo  '  clean     - drop builds and environments' | ||||||
|  | 	@echo  '  project   - re-build generic files of the searx project' | ||||||
| 	@echo  '' | 	@echo  '' | ||||||
| 	@$(MAKE) -s -f utils/makefile.include make-help | 	@$(MAKE) -s -f utils/makefile.include make-help | ||||||
| 	@echo  '' | 	@echo  '' | ||||||
|  | @ -67,6 +68,18 @@ docs-live:  pyenvinstall sphinx-live | ||||||
| $(GH_PAGES):: | $(GH_PAGES):: | ||||||
| 	@echo "doc available at --> $(DOCS_URL)" | 	@echo "doc available at --> $(DOCS_URL)" | ||||||
| 
 | 
 | ||||||
|  | # update project files
 | ||||||
|  | # --------------------
 | ||||||
|  | 
 | ||||||
|  | PHONY += project engines-languages | ||||||
|  | 
 | ||||||
|  | project: searx/data/engines_languages.json | ||||||
|  | 
 | ||||||
|  | searx/data/engines_languages.json:  pyenvinstall | ||||||
|  | 	$(PY_ENV_ACT); python utils/fetch_languages.py | ||||||
|  | 	mv engines_languages.json searx/data/engines_languages.json | ||||||
|  | 	mv languages.py searx/languages.py | ||||||
|  | 
 | ||||||
| # test
 | # test
 | ||||||
| # ----
 | # ----
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -5,6 +5,7 @@ mock==2.0.0 | ||||||
| nose2[coverage_plugin] | nose2[coverage_plugin] | ||||||
| cov-core==1.15.0 | cov-core==1.15.0 | ||||||
| pep8==1.7.0 | pep8==1.7.0 | ||||||
|  | pylint | ||||||
| plone.testing==5.0.0 | plone.testing==5.0.0 | ||||||
| splinter==0.11.0 | splinter==0.11.0 | ||||||
| transifex-client==0.12.2 | transifex-client==0.12.2 | ||||||
|  |  | ||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -110,13 +110,18 @@ def response(resp): | ||||||
| 
 | 
 | ||||||
| # get supported languages from their site | # get supported languages from their site | ||||||
| def _fetch_supported_languages(resp): | def _fetch_supported_languages(resp): | ||||||
|     supported_languages = [] |     lang_tags = set() | ||||||
|     dom = html.fromstring(resp.text) |  | ||||||
|     options = eval_xpath(dom, '//div[@id="limit-languages"]//input') |  | ||||||
|     for option in options: |  | ||||||
|         code = eval_xpath(option, './@id')[0].replace('_', '-') |  | ||||||
|         if code == 'nb': |  | ||||||
|             code = 'no' |  | ||||||
|         supported_languages.append(code) |  | ||||||
| 
 | 
 | ||||||
|     return supported_languages |     setmkt = re.compile('setmkt=([^&]*)') | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  |     lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]") | ||||||
|  | 
 | ||||||
|  |     for a in lang_links: | ||||||
|  |         href = eval_xpath(a, './@href')[0] | ||||||
|  |         match = setmkt.search(href) | ||||||
|  |         l_tag = match.groups()[0] | ||||||
|  |         _lang, _nation = l_tag.split('-', 1) | ||||||
|  |         l_tag = _lang.lower() + '-' + _nation.upper() | ||||||
|  |         lang_tags.add(l_tag) | ||||||
|  | 
 | ||||||
|  |     return list(lang_tags) | ||||||
|  |  | ||||||
|  | @ -18,6 +18,8 @@ import re | ||||||
| from searx.url_utils import urlencode | from searx.url_utils import urlencode | ||||||
| from searx.utils import match_language | from searx.utils import match_language | ||||||
| 
 | 
 | ||||||
|  | from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases | ||||||
|  | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['images'] | categories = ['images'] | ||||||
| paging = True | paging = True | ||||||
|  | @ -103,22 +105,3 @@ def response(resp): | ||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|     return results |     return results | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # get supported languages from their site |  | ||||||
| def _fetch_supported_languages(resp): |  | ||||||
|     supported_languages = [] |  | ||||||
|     dom = html.fromstring(resp.text) |  | ||||||
| 
 |  | ||||||
|     regions_xpath = '//div[@id="region-section-content"]' \ |  | ||||||
|                     + '//ul[@class="b_vList"]/li/a/@href' |  | ||||||
| 
 |  | ||||||
|     regions = dom.xpath(regions_xpath) |  | ||||||
|     for region in regions: |  | ||||||
|         code = re.search('setmkt=[^\&]+', region).group()[7:] |  | ||||||
|         if code == 'nb-NO': |  | ||||||
|             code = 'no-NO' |  | ||||||
| 
 |  | ||||||
|         supported_languages.append(code) |  | ||||||
| 
 |  | ||||||
|     return supported_languages |  | ||||||
|  |  | ||||||
|  | @ -15,9 +15,10 @@ from datetime import datetime | ||||||
| from dateutil import parser | from dateutil import parser | ||||||
| from lxml import etree | from lxml import etree | ||||||
| from searx.utils import list_get, match_language | from searx.utils import list_get, match_language | ||||||
| from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases |  | ||||||
| from searx.url_utils import urlencode, urlparse, parse_qsl | from searx.url_utils import urlencode, urlparse, parse_qsl | ||||||
| 
 | 
 | ||||||
|  | from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases | ||||||
|  | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['news'] | categories = ['news'] | ||||||
| paging = True | paging = True | ||||||
|  | @ -58,6 +59,7 @@ def _get_url(query, language, offset, time_range): | ||||||
|             offset=offset, |             offset=offset, | ||||||
|             interval=time_range_dict[time_range]) |             interval=time_range_dict[time_range]) | ||||||
|     else: |     else: | ||||||
|  |         # e.g. setmkt=de-de&setlang=de | ||||||
|         search_path = search_string.format( |         search_path = search_string.format( | ||||||
|             query=urlencode({'q': query, 'setmkt': language}), |             query=urlencode({'q': query, 'setmkt': language}), | ||||||
|             offset=offset) |             offset=offset) | ||||||
|  |  | ||||||
|  | @ -12,10 +12,10 @@ | ||||||
| 
 | 
 | ||||||
| from json import loads | from json import loads | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url |  | ||||||
| from searx.url_utils import urlencode | from searx.url_utils import urlencode | ||||||
| from searx.utils import match_language | from searx.utils import match_language | ||||||
| 
 | 
 | ||||||
|  | from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases | ||||||
| 
 | 
 | ||||||
| categories = ['videos'] | categories = ['videos'] | ||||||
| paging = True | paging = True | ||||||
|  | @ -67,6 +67,10 @@ def request(query, params): | ||||||
|     if params['time_range'] in time_range_dict: |     if params['time_range'] in time_range_dict: | ||||||
|         params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) |         params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) | ||||||
| 
 | 
 | ||||||
|  |     # bing videos did not like "older" versions < 70.0.1 when selectin other | ||||||
|  |     # languages then 'en' .. very strange ?!?! | ||||||
|  |     params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0.1) Gecko/20100101 Firefox/73.0.1' | ||||||
|  | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -54,7 +54,7 @@ def request(query, params): | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] |         language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] | ||||||
|         if language: |         if language: | ||||||
|             params['url'] += '&lr=lang_' + language |             params['url'] += '&hl=' + language | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3,9 +3,11 @@ | ||||||
| # this file is generated automatically by utils/update_search_languages.py | # this file is generated automatically by utils/update_search_languages.py | ||||||
| 
 | 
 | ||||||
| language_codes = ( | language_codes = ( | ||||||
|  |     (u"af-NA", u"Afrikaans", u"", u"Afrikaans"), | ||||||
|     (u"ar-SA", u"العربية", u"", u"Arabic"), |     (u"ar-SA", u"العربية", u"", u"Arabic"), | ||||||
|  |     (u"be-BY", u"Беларуская", u"", u"Belarusian"), | ||||||
|     (u"bg-BG", u"Български", u"", u"Bulgarian"), |     (u"bg-BG", u"Български", u"", u"Bulgarian"), | ||||||
|     (u"ca-ES", u"Català", u"", u"Catalan"), |     (u"ca-AD", u"Català", u"", u"Catalan"), | ||||||
|     (u"cs-CZ", u"Čeština", u"", u"Czech"), |     (u"cs-CZ", u"Čeština", u"", u"Czech"), | ||||||
|     (u"da-DK", u"Dansk", u"", u"Danish"), |     (u"da-DK", u"Dansk", u"", u"Danish"), | ||||||
|     (u"de", u"Deutsch", u"", u"German"), |     (u"de", u"Deutsch", u"", u"German"), | ||||||
|  | @ -17,11 +19,15 @@ language_codes = ( | ||||||
|     (u"en-AU", u"English", u"Australia", u"English"), |     (u"en-AU", u"English", u"Australia", u"English"), | ||||||
|     (u"en-CA", u"English", u"Canada", u"English"), |     (u"en-CA", u"English", u"Canada", u"English"), | ||||||
|     (u"en-GB", u"English", u"United Kingdom", u"English"), |     (u"en-GB", u"English", u"United Kingdom", u"English"), | ||||||
|  |     (u"en-IE", u"English", u"Ireland", u"English"), | ||||||
|     (u"en-IN", u"English", u"India", u"English"), |     (u"en-IN", u"English", u"India", u"English"), | ||||||
|     (u"en-MY", u"English", u"Malaysia", u"English"), |     (u"en-NZ", u"English", u"New Zealand", u"English"), | ||||||
|  |     (u"en-PH", u"English", u"Philippines", u"English"), | ||||||
|  |     (u"en-SG", u"English", u"Singapore", u"English"), | ||||||
|     (u"en-US", u"English", u"United States", u"English"), |     (u"en-US", u"English", u"United States", u"English"), | ||||||
|     (u"es", u"Español", u"", u"Spanish"), |     (u"es", u"Español", u"", u"Spanish"), | ||||||
|     (u"es-AR", u"Español", u"Argentina", u"Spanish"), |     (u"es-AR", u"Español", u"Argentina", u"Spanish"), | ||||||
|  |     (u"es-CL", u"Español", u"Chile", u"Spanish"), | ||||||
|     (u"es-ES", u"Español", u"España", u"Spanish"), |     (u"es-ES", u"Español", u"España", u"Spanish"), | ||||||
|     (u"es-MX", u"Español", u"México", u"Spanish"), |     (u"es-MX", u"Español", u"México", u"Spanish"), | ||||||
|     (u"et-EE", u"Eesti", u"", u"Estonian"), |     (u"et-EE", u"Eesti", u"", u"Estonian"), | ||||||
|  | @ -35,6 +41,7 @@ language_codes = ( | ||||||
|     (u"he-IL", u"עברית", u"", u"Hebrew"), |     (u"he-IL", u"עברית", u"", u"Hebrew"), | ||||||
|     (u"hr-HR", u"Hrvatski", u"", u"Croatian"), |     (u"hr-HR", u"Hrvatski", u"", u"Croatian"), | ||||||
|     (u"hu-HU", u"Magyar", u"", u"Hungarian"), |     (u"hu-HU", u"Magyar", u"", u"Hungarian"), | ||||||
|  |     (u"hy-AM", u"Հայերեն", u"", u"Armenian"), | ||||||
|     (u"id-ID", u"Indonesia", u"", u"Indonesian"), |     (u"id-ID", u"Indonesia", u"", u"Indonesian"), | ||||||
|     (u"is-IS", u"Íslenska", u"", u"Icelandic"), |     (u"is-IS", u"Íslenska", u"", u"Icelandic"), | ||||||
|     (u"it-IT", u"Italiano", u"", u"Italian"), |     (u"it-IT", u"Italiano", u"", u"Italian"), | ||||||
|  | @ -42,7 +49,7 @@ language_codes = ( | ||||||
|     (u"ko-KR", u"한국어", u"", u"Korean"), |     (u"ko-KR", u"한국어", u"", u"Korean"), | ||||||
|     (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), |     (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), | ||||||
|     (u"lv-LV", u"Latviešu", u"", u"Latvian"), |     (u"lv-LV", u"Latviešu", u"", u"Latvian"), | ||||||
|     (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"), |     (u"ms-MY", u"Melayu", u"", u"Malay"), | ||||||
|     (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"), |     (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"), | ||||||
|     (u"nl", u"Nederlands", u"", u"Dutch"), |     (u"nl", u"Nederlands", u"", u"Dutch"), | ||||||
|     (u"nl-BE", u"Nederlands", u"België", u"Dutch"), |     (u"nl-BE", u"Nederlands", u"België", u"Dutch"), | ||||||
|  | @ -55,8 +62,9 @@ language_codes = ( | ||||||
|     (u"ru-RU", u"Русский", u"", u"Russian"), |     (u"ru-RU", u"Русский", u"", u"Russian"), | ||||||
|     (u"sk-SK", u"Slovenčina", u"", u"Slovak"), |     (u"sk-SK", u"Slovenčina", u"", u"Slovak"), | ||||||
|     (u"sl-SI", u"Slovenščina", u"", u"Slovenian"), |     (u"sl-SI", u"Slovenščina", u"", u"Slovenian"), | ||||||
|     (u"sr-RS", u"Српски", u"", u"Serbian"), |     (u"sr-RS", u"Srpski", u"", u"Serbian"), | ||||||
|     (u"sv-SE", u"Svenska", u"", u"Swedish"), |     (u"sv-SE", u"Svenska", u"", u"Swedish"), | ||||||
|  |     (u"sw-KE", u"Kiswahili", u"", u"Swahili"), | ||||||
|     (u"th-TH", u"ไทย", u"", u"Thai"), |     (u"th-TH", u"ไทย", u"", u"Thai"), | ||||||
|     (u"tr-TR", u"Türkçe", u"", u"Turkish"), |     (u"tr-TR", u"Türkçe", u"", u"Turkish"), | ||||||
|     (u"uk-UA", u"Українська", u"", u"Ukrainian"), |     (u"uk-UA", u"Українська", u"", u"Ukrainian"), | ||||||
|  |  | ||||||
|  | @ -5,7 +5,7 @@ | ||||||
| # Output files (engines_languages.json and languages.py) | # Output files (engines_languages.json and languages.py) | ||||||
| # are written in current directory to avoid overwriting in case something goes wrong. | # are written in current directory to avoid overwriting in case something goes wrong. | ||||||
| 
 | 
 | ||||||
| from json import dump | import json | ||||||
| import io | import io | ||||||
| from sys import path | from sys import path | ||||||
| from babel import Locale, UnknownLocaleError | from babel import Locale, UnknownLocaleError | ||||||
|  | @ -22,19 +22,22 @@ languages_file = 'languages.py' | ||||||
| 
 | 
 | ||||||
| # Fetchs supported languages for each engine and writes json file with those. | # Fetchs supported languages for each engine and writes json file with those. | ||||||
| def fetch_supported_languages(): | def fetch_supported_languages(): | ||||||
|  | 
 | ||||||
|     engines_languages = {} |     engines_languages = {} | ||||||
|     for engine_name in engines: |     names = list(engines) | ||||||
|  |     names.sort() | ||||||
|  | 
 | ||||||
|  |     for engine_name in names: | ||||||
|  |         print("fetching languages of engine %s" % engine_name) | ||||||
|  | 
 | ||||||
|         if hasattr(engines[engine_name], 'fetch_supported_languages'): |         if hasattr(engines[engine_name], 'fetch_supported_languages'): | ||||||
|             try: |             engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() | ||||||
|                 engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() |             if type(engines_languages[engine_name]) == list: | ||||||
|                 if type(engines_languages[engine_name]) == list: |                 engines_languages[engine_name] = sorted(engines_languages[engine_name]) | ||||||
|                     engines_languages[engine_name] = sorted(engines_languages[engine_name]) |  | ||||||
|             except Exception as e: |  | ||||||
|                 print(e) |  | ||||||
| 
 | 
 | ||||||
|     # write json file |     # write json file | ||||||
|     with io.open(engines_languages_file, "w", encoding="utf-8") as f: |     with open(engines_languages_file, 'w', encoding='utf-8') as f: | ||||||
|         dump(engines_languages, f, ensure_ascii=False, indent=4, separators=(',', ': ')) |         json.dump(engines_languages, f, indent=2, sort_keys=True) | ||||||
| 
 | 
 | ||||||
|     return engines_languages |     return engines_languages | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser