mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[fix] update_external_bangs: BANGS_URL 'https://duckduckgo.com/bang.js'
JSON file which contains the bangs / there is no longer a versioning of this file. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									e97e1f9110
								
							
						
					
					
						commit
						cff0097289
					
				
					 1 changed files with 22 additions and 38 deletions
				
			
		|  | @ -2,50 +2,42 @@ | |||
| # lint: pylint | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs | ||||
| (:origin:`CI Update data ... <.github/workflows/data-update.yml>`). | ||||
| from :py:obj:`BANGS_URL`. | ||||
| 
 | ||||
| https://duckduckgo.com/newbang loads: | ||||
| 
 | ||||
| * a javascript which provides the bang version ( https://duckduckgo.com/bv1.js ) | ||||
| * a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example ) | ||||
| 
 | ||||
| This script loads the javascript, then the bangs. | ||||
| 
 | ||||
| The javascript URL may change in the future ( for example | ||||
| https://duckduckgo.com/bv2.js ), but most probably it will requires to update | ||||
| RE_BANG_VERSION | ||||
| - :origin:`CI Update data ... <.github/workflows/data-update.yml>` | ||||
| 
 | ||||
| """ | ||||
| # pylint: disable=C0116 | ||||
| 
 | ||||
| from pathlib import Path | ||||
| import json | ||||
| import re | ||||
| from os.path import join | ||||
| 
 | ||||
| import httpx | ||||
| 
 | ||||
| from searx import searx_dir  # pylint: disable=E0401 C0413 | ||||
| from searx import searx_dir | ||||
| from searx.external_bang import LEAF_KEY | ||||
| 
 | ||||
| # from https://duckduckgo.com/newbang | ||||
| URL_BV1 = 'https://duckduckgo.com/bv1.js' | ||||
| RE_BANG_VERSION = re.compile(r'\/bang\.v([0-9]+)\.js') | ||||
| 
 | ||||
| BANGS_URL = 'https://duckduckgo.com/bang.js' | ||||
| """JSON file which contains the bangs.""" | ||||
| 
 | ||||
| BANGS_DATA_FILE = Path(searx_dir) / 'data' / 'external_bangs.json' | ||||
| 
 | ||||
| HTTPS_COLON = 'https:' | ||||
| HTTP_COLON = 'http:' | ||||
| 
 | ||||
| 
 | ||||
| def get_bang_url(): | ||||
|     response = httpx.get(URL_BV1) | ||||
| def main(): | ||||
|     print(f'fetch bangs from {BANGS_URL}') | ||||
|     response = httpx.get(BANGS_URL) | ||||
|     response.raise_for_status() | ||||
| 
 | ||||
|     r = RE_BANG_VERSION.findall(response.text) | ||||
|     return f'https://duckduckgo.com/bang.v{r[0]}.js', r[0] | ||||
| 
 | ||||
| 
 | ||||
| def fetch_ddg_bangs(url): | ||||
|     response = httpx.get(url) | ||||
|     response.raise_for_status() | ||||
|     return json.loads(response.content.decode()) | ||||
|     ddg_bangs = json.loads(response.content.decode()) | ||||
|     trie = parse_ddg_bangs(ddg_bangs) | ||||
|     output = { | ||||
|         'version': 0, | ||||
|         'trie': trie, | ||||
|     } | ||||
|     with open(BANGS_DATA_FILE, 'w', encoding="utf8") as f: | ||||
|         json.dump(output, f, sort_keys=True, ensure_ascii=False, indent=4) | ||||
| 
 | ||||
| 
 | ||||
| def merge_when_no_leaf(node): | ||||
|  | @ -151,13 +143,5 @@ def parse_ddg_bangs(ddg_bangs): | |||
|     return bang_trie | ||||
| 
 | ||||
| 
 | ||||
| def get_bangs_filename(): | ||||
|     return join(join(searx_dir, "data"), "external_bangs.json") | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     bangs_url, bangs_version = get_bang_url() | ||||
|     print(f'fetch bangs from {bangs_url}') | ||||
|     output = {'version': bangs_version, 'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url))} | ||||
|     with open(get_bangs_filename(), 'w', encoding="utf8") as fp: | ||||
|         json.dump(output, fp, ensure_ascii=False, indent=4) | ||||
|     main() | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser