Merge pull request #113 from dalf/master

[fix] the bang was included in the search string, [mod] infoboxes modifications
This commit is contained in:
Adam Tauber 2014-10-12 12:43:46 +02:00
commit f3d884ef45
3 changed files with 64 additions and 35 deletions

View File

@ -116,15 +116,22 @@ def response(resp):
if len(heading)>0: if len(heading)>0:
# TODO get infobox.meta.value where .label='article_title' # TODO get infobox.meta.value where .label='article_title'
results.append({ if image==None and len(attributes)==0 and len(urls)==1 and len(relatedTopics)==0 and len(content)==0:
'infobox': heading, results.append({
'id': infobox_id, 'url': urls[0]['url'],
'entity': entity, 'title': heading,
'content': content, 'content': content
'img_src' : image, })
'attributes': attributes, else:
'urls': urls, results.append({
'relatedTopics': relatedTopics 'infobox': heading,
}) 'id': infobox_id,
'entity': entity,
'content': content,
'img_src' : image,
'attributes': attributes,
'urls': urls,
'relatedTopics': relatedTopics
})
return results return results

View File

@ -2,7 +2,7 @@ import json
from requests import get from requests import get
from urllib import urlencode from urllib import urlencode
resultCount=2 resultCount=1
urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}' urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}'
urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}' urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}'
urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M' urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
@ -33,17 +33,20 @@ def response(resp):
return results return results
def getDetail(jsonresponse, wikidata_id, language): def getDetail(jsonresponse, wikidata_id, language):
result = jsonresponse.get('entities', {}).get(wikidata_id, {})
title = result.get('labels', {}).get(language, {}).get('value', None)
if title == None:
title = result.get('labels', {}).get('en', {}).get('value', wikidata_id)
results = [] results = []
urls = [] urls = []
attributes = [] attributes = []
description = result.get('descriptions', {}).get(language, {}).get('value', '') result = jsonresponse.get('entities', {}).get(wikidata_id, {})
if description == '':
title = result.get('labels', {}).get(language, {}).get('value', None)
if title == None:
title = result.get('labels', {}).get('en', {}).get('value', None)
if title == None:
return results
description = result.get('descriptions', {}).get(language, {}).get('value', None)
if description == None:
description = result.get('descriptions', {}).get('en', {}).get('value', '') description = result.get('descriptions', {}).get('en', {}).get('value', '')
claims = result.get('claims', {}) claims = result.get('claims', {})
@ -52,11 +55,16 @@ def getDetail(jsonresponse, wikidata_id, language):
urls.append({ 'title' : 'Official site', 'url': official_website }) urls.append({ 'title' : 'Official site', 'url': official_website })
results.append({ 'title': title, 'url' : official_website }) results.append({ 'title': title, 'url' : official_website })
wikipedia_link_count = 0
if language != 'en': if language != 'en':
add_url(urls, 'Wikipedia (' + language + ')', get_wikilink(result, language + 'wiki')) wikipedia_link_count += add_url(urls, 'Wikipedia (' + language + ')', get_wikilink(result, language + 'wiki'))
wikipedia_en_link = get_wikilink(result, 'enwiki') wikipedia_en_link = get_wikilink(result, 'enwiki')
add_url(urls, 'Wikipedia (en)', wikipedia_en_link) wikipedia_link_count += add_url(urls, 'Wikipedia (en)', wikipedia_en_link)
if wikipedia_link_count == 0:
misc_language = get_wiki_firstlanguage(result, 'wiki')
if misc_language != None:
add_url(urls, 'Wikipedia (' + misc_language + ')', get_wikilink(result, misc_language + 'wiki'))
if language != 'en': if language != 'en':
add_url(urls, 'Wiki voyage (' + language + ')', get_wikilink(result, language + 'wikivoyage')) add_url(urls, 'Wiki voyage (' + language + ')', get_wikilink(result, language + 'wikivoyage'))
add_url(urls, 'Wiki voyage (en)', get_wikilink(result, 'enwikivoyage')) add_url(urls, 'Wiki voyage (en)', get_wikilink(result, 'enwikivoyage'))
@ -105,14 +113,20 @@ def getDetail(jsonresponse, wikidata_id, language):
if date_of_death != None: if date_of_death != None:
attributes.append({'label' : 'Date of death', 'value' : date_of_death}) attributes.append({'label' : 'Date of death', 'value' : date_of_death})
if len(attributes)==0 and len(urls)==2 and len(description)==0:
results.append({ results.append({
'infobox' : title, 'url': urls[0]['url'],
'id' : wikipedia_en_link, 'title': title,
'content' : description, 'content': description
'attributes' : attributes, })
'urls' : urls else:
}) results.append({
'infobox' : title,
'id' : wikipedia_en_link,
'content' : description,
'attributes' : attributes,
'urls' : urls
})
return results return results
@ -120,7 +134,9 @@ def getDetail(jsonresponse, wikidata_id, language):
def add_url(urls, title, url): def add_url(urls, title, url):
if url != None: if url != None:
urls.append({'title' : title, 'url' : url}) urls.append({'title' : title, 'url' : url})
return 1
else:
return 0
def get_mainsnak(claims, propertyName): def get_mainsnak(claims, propertyName):
propValue = claims.get(propertyName, {}) propValue = claims.get(propertyName, {})
@ -213,3 +229,9 @@ def get_wikilink(result, wikiid):
elif url.startswith('//'): elif url.startswith('//'):
url = 'https:' + url url = 'https:' + url
return url return url
def get_wiki_firstlanguage(result, wikipatternid):
for k in result.get('sitelinks', {}).keys():
if k.endswith(wikipatternid) and len(k)==(2+len(wikipatternid)):
return k[0:2]
return None

View File

@ -311,9 +311,6 @@ class Search(object):
if not self.request_data.get('q'): if not self.request_data.get('q'):
raise Exception('noquery') raise Exception('noquery')
# set query
self.query = self.request_data['q']
# set pagenumber # set pagenumber
pageno_param = self.request_data.get('pageno', '1') pageno_param = self.request_data.get('pageno', '1')
if not pageno_param.isdigit() or int(pageno_param) < 1: if not pageno_param.isdigit() or int(pageno_param) < 1:
@ -322,8 +319,11 @@ class Search(object):
self.pageno = int(pageno_param) self.pageno = int(pageno_param)
# parse query, if tags are set, which change the serch engine or search-language # parse query, if tags are set, which change the serch engine or search-language
query_obj = Query(self.query, self.blocked_engines) query_obj = Query(self.request_data['q'], self.blocked_engines)
query_obj.parse_query() query_obj.parse_query()
# set query
self.query = query_obj.getSearchQuery()
# get last selected language in query, if possible # get last selected language in query, if possible
# TODO support search with multible languages # TODO support search with multible languages