mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge pull request #878 from tiekoetter/fix-wikidata
Fix wikidata info box images
This commit is contained in:
		
						commit
						ae8e3f3543
					
				
					 1 changed files with 59 additions and 12 deletions
				
			
		|  | @ -1,10 +1,11 @@ | ||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|  | # lint: pylint | ||||||
|  | """Wikidata | ||||||
| """ | """ | ||||||
|  Wikidata | # pylint: disable=missing-class-docstring | ||||||
| """ |  | ||||||
| 
 | 
 | ||||||
| 
 | from hashlib import md5 | ||||||
| from urllib.parse import urlencode | from urllib.parse import urlencode, unquote | ||||||
| from json import loads | from json import loads | ||||||
| 
 | 
 | ||||||
| from dateutil.parser import isoparse | from dateutil.parser import isoparse | ||||||
|  | @ -185,7 +186,51 @@ def response(resp): | ||||||
|     return results |     return results | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | _IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/" | ||||||
|  | _IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/thumb/" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_thumbnail(img_src): | ||||||
|  |     """Get Thumbnail image from wikimedia commons | ||||||
|  | 
 | ||||||
|  |     Images from commons.wikimedia.org are (HTTP) redirected to | ||||||
|  |     upload.wikimedia.org.  The redirected URL can be calculated by this | ||||||
|  |     function. | ||||||
|  | 
 | ||||||
|  |     - https://stackoverflow.com/a/33691240 | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     logger.debug('get_thumbnail(): %s', img_src) | ||||||
|  |     if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]: | ||||||
|  |         img_src_name = unquote(img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[0].replace("%20", "_")) | ||||||
|  |         img_src_name_first = img_src_name | ||||||
|  |         img_src_name_second = img_src_name | ||||||
|  | 
 | ||||||
|  |         if ".svg" in img_src_name.split()[0]: | ||||||
|  |             img_src_name_second = img_src_name + ".png" | ||||||
|  | 
 | ||||||
|  |         img_src_size = img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[1] | ||||||
|  |         img_src_size = img_src_size[img_src_size.index("=") + 1 : img_src_size.index("&")] | ||||||
|  |         img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest() | ||||||
|  |         img_src = ( | ||||||
|  |             _IMG_SRC_NEW_URL_PREFIX | ||||||
|  |             + img_src_name_md5[0] | ||||||
|  |             + "/" | ||||||
|  |             + img_src_name_md5[0:2] | ||||||
|  |             + "/" | ||||||
|  |             + img_src_name_first | ||||||
|  |             + "/" | ||||||
|  |             + img_src_size | ||||||
|  |             + "px-" | ||||||
|  |             + img_src_name_second | ||||||
|  |         ) | ||||||
|  |         logger.debug('get_thumbnail() redirected: %s', img_src) | ||||||
|  | 
 | ||||||
|  |     return img_src | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def get_results(attribute_result, attributes, language): | def get_results(attribute_result, attributes, language): | ||||||
|  |     # pylint: disable=too-many-branches | ||||||
|     results = [] |     results = [] | ||||||
|     infobox_title = attribute_result.get('itemLabel') |     infobox_title = attribute_result.get('itemLabel') | ||||||
|     infobox_id = attribute_result['item'] |     infobox_id = attribute_result['item'] | ||||||
|  | @ -194,7 +239,7 @@ def get_results(attribute_result, attributes, language): | ||||||
|     infobox_attributes = [] |     infobox_attributes = [] | ||||||
|     infobox_content = attribute_result.get('itemDescription', []) |     infobox_content = attribute_result.get('itemDescription', []) | ||||||
|     img_src = None |     img_src = None | ||||||
|     img_src_priority = 100 |     img_src_priority = 0 | ||||||
| 
 | 
 | ||||||
|     for attribute in attributes: |     for attribute in attributes: | ||||||
|         value = attribute.get_str(attribute_result, language) |         value = attribute.get_str(attribute_result, language) | ||||||
|  | @ -220,8 +265,8 @@ def get_results(attribute_result, attributes, language): | ||||||
|                 # this attribute is an image. |                 # this attribute is an image. | ||||||
|                 # replace the current image only the priority is lower |                 # replace the current image only the priority is lower | ||||||
|                 # (the infobox contain only one image). |                 # (the infobox contain only one image). | ||||||
|                 if attribute.priority < img_src_priority: |                 if attribute.priority > img_src_priority: | ||||||
|                     img_src = value |                     img_src = get_thumbnail(value) | ||||||
|                     img_src_priority = attribute.priority |                     img_src_priority = attribute.priority | ||||||
|             elif attribute_type == WDGeoAttribute: |             elif attribute_type == WDGeoAttribute: | ||||||
|                 # geocoordinate link |                 # geocoordinate link | ||||||
|  | @ -278,6 +323,7 @@ def get_query(query, language): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def get_attributes(language): | def get_attributes(language): | ||||||
|  |     # pylint: disable=too-many-statements | ||||||
|     attributes = [] |     attributes = [] | ||||||
| 
 | 
 | ||||||
|     def add_value(name): |     def add_value(name): | ||||||
|  | @ -418,7 +464,7 @@ def get_attributes(language): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class WDAttribute: | class WDAttribute: | ||||||
| 
 |     # pylint: disable=no-self-use | ||||||
|     __slots__ = ('name',) |     __slots__ = ('name',) | ||||||
| 
 | 
 | ||||||
|     def __init__(self, name): |     def __init__(self, name): | ||||||
|  | @ -439,7 +485,7 @@ class WDAttribute: | ||||||
|     def get_group_by(self): |     def get_group_by(self): | ||||||
|         return "" |         return "" | ||||||
| 
 | 
 | ||||||
|     def get_str(self, result, language): |     def get_str(self, result, language):  # pylint: disable=unused-argument | ||||||
|         return result.get(self.name + 's') |         return result.get(self.name + 's') | ||||||
| 
 | 
 | ||||||
|     def __repr__(self): |     def __repr__(self): | ||||||
|  | @ -580,6 +626,7 @@ class WDImageAttribute(WDURLAttribute): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class WDDateAttribute(WDAttribute): | class WDDateAttribute(WDAttribute): | ||||||
|  |     # pylint: disable=no-self-use | ||||||
|     def get_select(self): |     def get_select(self): | ||||||
|         return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name) |         return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name) | ||||||
| 
 | 
 | ||||||
|  | @ -600,7 +647,7 @@ class WDDateAttribute(WDAttribute): | ||||||
|     def get_group_by(self): |     def get_group_by(self): | ||||||
|         return self.get_select() |         return self.get_select() | ||||||
| 
 | 
 | ||||||
|     def format_8(self, value, locale): |     def format_8(self, value, locale):  # pylint: disable=unused-argument | ||||||
|         # precision: less than a year |         # precision: less than a year | ||||||
|         return value |         return value | ||||||
| 
 | 
 | ||||||
|  | @ -673,7 +720,7 @@ class WDDateAttribute(WDAttribute): | ||||||
|                     else: |                     else: | ||||||
|                         value = t[0] |                         value = t[0] | ||||||
|                 return format_method(value, language) |                 return format_method(value, language) | ||||||
|             except Exception: |             except Exception:  # pylint: disable=broad-except | ||||||
|                 return value |                 return value | ||||||
|         return value |         return value | ||||||
| 
 | 
 | ||||||
|  | @ -687,7 +734,7 @@ def debug_explain_wikidata_query(query, method='GET'): | ||||||
|     return http_response.content |     return http_response.content | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def init(engine_settings=None): | def init(engine_settings=None):  # pylint: disable=unused-argument | ||||||
|     # WIKIDATA_PROPERTIES : add unit symbols |     # WIKIDATA_PROPERTIES : add unit symbols | ||||||
|     WIKIDATA_PROPERTIES.update(WIKIDATA_UNITS) |     WIKIDATA_PROPERTIES.update(WIKIDATA_UNITS) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser