forked from zaclys/searxng
		
	[mod] add external_urls.json and wikidata_units.json
This commit is contained in:
		
							parent
							
								
									d3d50eff66
								
							
						
					
					
						commit
						ed6696e6bf
					
				
					 5 changed files with 1290 additions and 1 deletions
				
			
		|  | @ -2,7 +2,8 @@ import json | |||
| from pathlib import Path | ||||
| 
 | ||||
| 
 | ||||
| __init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader', 'ahmia_blacklist_loader'] | ||||
| __init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'EXTERNAL_URLS', 'WIKIDATA_UNITS', | ||||
|             'bangs_loader', 'ahmia_blacklist_loader'] | ||||
| data_dir = Path(__file__).parent | ||||
| 
 | ||||
| 
 | ||||
|  | @ -24,3 +25,5 @@ def ahmia_blacklist_loader(): | |||
| ENGINES_LANGUAGES = load('engines_languages.json') | ||||
| CURRENCIES = load('currencies.json') | ||||
| USER_AGENTS = load('useragents.json') | ||||
| EXTERNAL_URLS = load('external_urls.json') | ||||
| WIKIDATA_UNITS = load('wikidata_units.json') | ||||
|  |  | |||
							
								
								
									
										156
									
								
								searx/data/external_urls.json
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										156
									
								
								searx/data/external_urls.json
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,156 @@ | |||
| { | ||||
|     "facebook_profile": { | ||||
|         "category_name": "Facebook", | ||||
|         "url_name": "Facebook profile", | ||||
|         "urls": { | ||||
|             "default": "https://facebook.com/$1" | ||||
|         } | ||||
|     }, | ||||
|     "youtube_channel": { | ||||
|         "category_name": "YouTube", | ||||
|         "url_name": "YouTube channel", | ||||
|         "urls": { | ||||
|             "default": "https://www.youtube.com/channel/$1" | ||||
|         } | ||||
|     }, | ||||
|     "youtube_video": { | ||||
|         "category_name": "YouTube", | ||||
|         "url_name": "YouTube video", | ||||
|         "urls": { | ||||
|             "default": "https://www.youtube.com/watch?v=$1" | ||||
|         } | ||||
|     }, | ||||
|     "twitter_profile": { | ||||
|         "category_name": "Twitter", | ||||
|         "url_name": "Twitter profile", | ||||
|         "urls": { | ||||
|             "default": "https://twitter.com/$1" | ||||
|         } | ||||
|     }, | ||||
|     "instagram_profile": { | ||||
|         "category_name": "Instagram", | ||||
|         "url_name": "Instagram profile", | ||||
|         "urls": { | ||||
|             "default": "https://www.instagram.com/$1" | ||||
|         } | ||||
|     }, | ||||
|     "imdb_title": { | ||||
|         "category_name": "IMDB", | ||||
|         "url_name": "IMDB title", | ||||
|         "urls": { | ||||
|             "default": "https://www.imdb.com/title/$1" | ||||
|         } | ||||
|     }, | ||||
|     "imdb_name": { | ||||
|         "category_name": "IMDB", | ||||
|         "url_name": "IMDB name", | ||||
|         "urls": { | ||||
|             "default": "https://www.imdb.com/name/$1" | ||||
|         } | ||||
|     }, | ||||
|     "imdb_character": { | ||||
|         "category_name": "IMDB", | ||||
|         "url_name": "IMDB character", | ||||
|         "urls": { | ||||
|             "default": "https://www.imdb.com/character/$1" | ||||
|         } | ||||
|     }, | ||||
|     "imdb_company": { | ||||
|         "category_name": "IMDB", | ||||
|         "url_name": "IMDB company", | ||||
|         "urls": { | ||||
|             "default": "https://www.imdb.com/company/$1" | ||||
|         } | ||||
|     }, | ||||
|     "imdb_event": { | ||||
|         "category_name": "IMDB", | ||||
|         "url_name": "IMDB event", | ||||
|         "urls": { | ||||
|             "default": "https://www.imdb.com/event/$1" | ||||
|         } | ||||
|     }, | ||||
|     "rotten_tomatoes": { | ||||
|         "category_name": "Rotten tomatoes", | ||||
|         "url_name": "Rotten tomatoes title", | ||||
|         "urls": { | ||||
|             "default": "https://www.rottentomatoes.com/$1" | ||||
|         } | ||||
|     }, | ||||
|     "spotify_artist_id": { | ||||
|         "category_name": "Spotify", | ||||
|         "url_name": "Spotify artist", | ||||
|         "urls": { | ||||
|             "default": "https://open.spotify.com/artist/$1" | ||||
|         } | ||||
|     }, | ||||
|     "itunes_artist_id": { | ||||
|         "category_name": "iTunes", | ||||
|         "url_name": "iTunes artist", | ||||
|         "urls": { | ||||
|             "default": "https://music.apple.com/us/artist/$1" | ||||
|         } | ||||
|     }, | ||||
|     "soundcloud_id": { | ||||
|         "category_name": "Soundcloud", | ||||
|         "url_name": "Soundcloud artist", | ||||
|         "urls": { | ||||
|             "default": "https://soundcloud.com/$1" | ||||
|         } | ||||
|     }, | ||||
|     "netflix_id": { | ||||
|         "category_name": "Netflix", | ||||
|         "url_name": "Netflix movie", | ||||
|         "urls": { | ||||
|             "default": "https://www.netflix.com/watch/$1" | ||||
|         } | ||||
|     }, | ||||
|     "github_profile": { | ||||
|         "category_name": "Github", | ||||
|         "url_name": "Github profile", | ||||
|         "urls": { | ||||
|             "default": "https://wwww.github.com/$1" | ||||
|         } | ||||
|     }, | ||||
|     "musicbrainz_artist": { | ||||
|         "category_name": "Musicbrainz", | ||||
|         "url_name": "Musicbrainz artist", | ||||
|         "urls": { | ||||
|             "default": "http://musicbrainz.org/artist/$1" | ||||
|         } | ||||
|     }, | ||||
|     "musicbrainz_work": { | ||||
|         "category_name": "Musicbrainz", | ||||
|         "url_name": "Musicbrainz work", | ||||
|         "urls": { | ||||
|             "default": "http://musicbrainz.org/work/$1" | ||||
|         } | ||||
|     }, | ||||
|     "musicbrainz_release_group": { | ||||
|         "category_name": "Musicbrainz", | ||||
|         "url_name": "Musicbrainz release group", | ||||
|         "urls": { | ||||
|             "default": "http://musicbrainz.org/release-group/$1" | ||||
|         } | ||||
|     }, | ||||
|     "musicbrainz_label": { | ||||
|         "category_name": "Musicbrainz", | ||||
|         "url_name": "Musicbrainz label", | ||||
|         "urls": { | ||||
|             "default": "http://musicbrainz.org/label/$1" | ||||
|         } | ||||
|     }, | ||||
|     "wikimedia_image": { | ||||
|         "category_name": "Wikipedia", | ||||
|         "url_name": "Wikipedia image", | ||||
|         "urls": { | ||||
|             "default": "https://commons.wikimedia.org/wiki/Special:FilePath/$1?width=500&height=400" | ||||
|         } | ||||
|     }, | ||||
|     "map": { | ||||
|         "category_name": "Map", | ||||
|         "url_name": "geo map", | ||||
|         "urls": { | ||||
|             "default": "https://www.openstreetmap.org/?lat=${latitude}&lon=${longitude}&zoom=${zoom}&layers=M" | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										1006
									
								
								searx/data/wikidata_units.json
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1006
									
								
								searx/data/wikidata_units.json
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										77
									
								
								searx/external_urls.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								searx/external_urls.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,77 @@ | |||
| import math | ||||
| 
 | ||||
| from searx.data import EXTERNAL_URLS | ||||
| 
 | ||||
| 
 | ||||
| IMDB_PREFIX_TO_URL_ID = { | ||||
|     'tt': 'imdb_title', | ||||
|     'mn': 'imdb_name', | ||||
|     'ch': 'imdb_character', | ||||
|     'co': 'imdb_company', | ||||
|     'ev': 'imdb_event' | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| def get_imdb_url_id(imdb_item_id): | ||||
|     id_prefix = imdb_item_id[:2] | ||||
|     return IMDB_PREFIX_TO_URL_ID.get(id_prefix) | ||||
| 
 | ||||
| 
 | ||||
| def get_external_url(url_id, item_id, alternative="default"): | ||||
|     """Return an external URL or None if url_id is not found. | ||||
| 
 | ||||
|     url_id can take value from data/external_urls.json | ||||
|     The "imdb_id" value is automaticaly converted according to the item_id value. | ||||
| 
 | ||||
|     If item_id is None, the raw URL with the $1 is returned. | ||||
|     """ | ||||
|     if url_id == 'imdb_id' and item_id is not None: | ||||
|         url_id = get_imdb_url_id(item_id) | ||||
| 
 | ||||
|     url_description = EXTERNAL_URLS.get(url_id) | ||||
|     if url_description: | ||||
|         url_template = url_description["urls"].get(alternative) | ||||
|         if url_template is not None: | ||||
|             if item_id is not None: | ||||
|                 return url_template.replace('$1', item_id) | ||||
|             else: | ||||
|                 return url_template | ||||
|     return None | ||||
| 
 | ||||
| 
 | ||||
| def get_earth_coordinates_url(latitude, longitude, osm_zoom, alternative='default'): | ||||
|     url = get_external_url('map', None, alternative)\ | ||||
|         .replace('${latitude}', str(latitude))\ | ||||
|         .replace('${longitude}', str(longitude))\ | ||||
|         .replace('${zoom}', str(osm_zoom)) | ||||
|     return url | ||||
| 
 | ||||
| 
 | ||||
| def area_to_osm_zoom(area): | ||||
|     """Convert an area in km² into an OSM zoom. Less reliable if the shape is not round. | ||||
| 
 | ||||
|     logarithm regression using these data: | ||||
|      * 9596961 -> 4 (China) | ||||
|      * 3287263 -> 5 (India) | ||||
|      * 643801 -> 6 (France) | ||||
|      * 6028 -> 9 | ||||
|      * 1214 -> 10 | ||||
|      * 891 -> 12 | ||||
|      * 12 -> 13 | ||||
| 
 | ||||
|     In WolframAlpha: | ||||
|         >>> log fit {9596961,15},{3287263, 14},{643801,13},{6028,10},{1214,9},{891,7},{12,6} | ||||
| 
 | ||||
|     with 15 = 19-4 (China); 14 = 19-5 (India) and so on | ||||
| 
 | ||||
|     Args: | ||||
|         area (int,float,str): area in km² | ||||
| 
 | ||||
|     Returns: | ||||
|         int: OSM zoom or 19 in area is not a number | ||||
|     """ | ||||
|     try: | ||||
|         amount = float(area) | ||||
|         return max(0, min(19, round(19 - 0.688297 * math.log(226.878 * amount)))) | ||||
|     except ValueError: | ||||
|         return 19 | ||||
							
								
								
									
										47
									
								
								utils/fetch_wikidata_units.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								utils/fetch_wikidata_units.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,47 @@ | |||
| #!/usr/bin/env python | ||||
| 
 | ||||
| import json | ||||
| import collections | ||||
| 
 | ||||
| # set path | ||||
| from sys import path | ||||
| from os.path import realpath, dirname, join | ||||
| path.append(realpath(dirname(realpath(__file__)) + '/../')) | ||||
| 
 | ||||
| from searx import searx_dir | ||||
| from searx.engines.wikidata import send_wikidata_query | ||||
| 
 | ||||
| 
 | ||||
| SARQL_REQUEST = """ | ||||
| SELECT DISTINCT ?item ?symbol ?P2370 ?P2370Unit ?P2442 ?P2442Unit | ||||
| WHERE | ||||
| { | ||||
| ?item wdt:P31/wdt:P279 wd:Q47574. | ||||
| ?item wdt:P5061 ?symbol. | ||||
| FILTER(LANG(?symbol) = "en"). | ||||
| } | ||||
| ORDER BY ?item | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| def get_data(): | ||||
|     def get_key(unit): | ||||
|         return unit['item']['value'].replace('http://www.wikidata.org/entity/', '') | ||||
| 
 | ||||
|     def get_value(unit): | ||||
|         return unit['symbol']['value'] | ||||
| 
 | ||||
|     result = send_wikidata_query(SARQL_REQUEST) | ||||
|     if result is not None: | ||||
|         # sort the unit by entity name | ||||
|         # so different fetchs keep the file unchanged. | ||||
|         list(result['results']['bindings']).sort(key=get_key) | ||||
|         return collections.OrderedDict([(get_key(unit), get_value(unit)) for unit in result['results']['bindings']]) | ||||
| 
 | ||||
| 
 | ||||
| def get_wikidata_units_filename(): | ||||
|     return join(join(searx_dir, "data"), "wikidata_units.json") | ||||
| 
 | ||||
| 
 | ||||
| with open(get_wikidata_units_filename(), 'w') as f: | ||||
|     json.dump(get_data(), f, indent=4, ensure_ascii=False) | ||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Alexandre Flament
						Alexandre Flament