2022-03-31 18:45:39 +00:00
|
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
"""
|
|
|
|
Jisho (the Japanese-English dictionary)
|
|
|
|
"""
|
|
|
|
|
|
|
|
from urllib.parse import urlencode, urljoin
|
|
|
|
|
|
|
|
# about
|
|
|
|
about = {
|
|
|
|
"website": 'https://jisho.org',
|
|
|
|
"wikidata_id": 'Q24568389',
|
|
|
|
"official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",
|
|
|
|
"use_official_api": True,
|
|
|
|
"require_api_key": False,
|
|
|
|
"results": 'JSON',
|
2022-04-01 13:18:19 +00:00
|
|
|
"language": 'ja',
|
2022-03-31 18:45:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
categories = ['dictionaries']
|
|
|
|
paging = False
|
|
|
|
|
|
|
|
URL = 'https://jisho.org'
|
|
|
|
BASE_URL = 'https://jisho.org/word/'
|
|
|
|
SEARCH_URL = URL + '/api/v1/search/words?{query}'
|
|
|
|
|
|
|
|
|
|
|
|
def request(query, params):
|
|
|
|
query = urlencode({'keyword': query})
|
|
|
|
params['url'] = SEARCH_URL.format(query=query)
|
|
|
|
logger.debug(f"query_url --> {params['url']}")
|
|
|
|
return params
|
|
|
|
|
|
|
|
|
|
|
|
def response(resp):
|
|
|
|
results = []
|
2022-04-02 13:21:58 +00:00
|
|
|
first_result = True
|
2022-03-31 18:45:39 +00:00
|
|
|
|
2022-04-01 13:18:19 +00:00
|
|
|
search_results = resp.json()
|
2022-03-31 18:45:39 +00:00
|
|
|
|
2022-04-02 13:21:58 +00:00
|
|
|
for page in search_results.get('data', []):
|
2022-03-31 18:45:39 +00:00
|
|
|
# Entries that are purely from Wikipedia are excluded.
|
2022-04-02 13:21:58 +00:00
|
|
|
parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')
|
|
|
|
if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':
|
2022-04-01 13:18:19 +00:00
|
|
|
pass
|
2022-04-02 13:21:58 +00:00
|
|
|
|
2022-04-01 13:18:19 +00:00
|
|
|
# Process alternative forms
|
|
|
|
alt_forms = []
|
2022-04-02 13:21:58 +00:00
|
|
|
for title_raw in page['japanese']:
|
2022-04-01 13:18:19 +00:00
|
|
|
if 'word' not in title_raw:
|
|
|
|
alt_forms.append(title_raw['reading'])
|
|
|
|
else:
|
|
|
|
title = title_raw['word']
|
|
|
|
if 'reading' in title_raw:
|
|
|
|
title += ' (' + title_raw['reading'] + ')'
|
|
|
|
alt_forms.append(title)
|
|
|
|
|
2022-04-02 13:21:58 +00:00
|
|
|
#
|
|
|
|
result_url = urljoin(BASE_URL, page['slug'])
|
|
|
|
definitions = get_definitions(page)
|
|
|
|
|
2022-04-01 13:18:19 +00:00
|
|
|
# For results, we'll return the URL, all alternative forms (as title),
|
|
|
|
# and all definitions (as description) truncated to 300 characters.
|
2022-04-02 13:21:58 +00:00
|
|
|
content = " ".join(f"{engdef}." for _, engdef, _ in definitions)
|
2022-04-01 13:18:19 +00:00
|
|
|
results.append({
|
2022-04-02 13:21:58 +00:00
|
|
|
'url': result_url,
|
2022-04-01 13:18:19 +00:00
|
|
|
'title': ", ".join(alt_forms),
|
|
|
|
'content': content[:300] + (content[300:] and '...')
|
|
|
|
})
|
|
|
|
|
|
|
|
# Like Wordnik, we'll return the first result in an infobox too.
|
2022-04-02 13:21:58 +00:00
|
|
|
if first_result:
|
|
|
|
first_result = False
|
|
|
|
results.append(get_infobox(alt_forms, result_url, definitions))
|
2022-03-31 18:45:39 +00:00
|
|
|
|
|
|
|
return results
|
2022-04-02 13:21:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_definitions(page):
|
|
|
|
# Process definitions
|
|
|
|
definitions = []
|
|
|
|
for defn_raw in page['senses']:
|
|
|
|
extra = []
|
|
|
|
# Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
|
|
|
|
if defn_raw.get('tags'):
|
|
|
|
if defn_raw.get('info'):
|
|
|
|
# "usually written as kana: <kana>"
|
|
|
|
extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')
|
|
|
|
else:
|
|
|
|
# abbreviation, archaism, etc.
|
|
|
|
extra.append(', '.join(defn_raw['tags']) + '. ')
|
|
|
|
elif defn_raw.get('info'):
|
|
|
|
# inconsistent
|
|
|
|
extra.append(', '.join(defn_raw['info']).capitalize() + '. ')
|
|
|
|
if defn_raw.get('restrictions'):
|
|
|
|
extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')
|
|
|
|
definitions.append((
|
|
|
|
', '.join(defn_raw['parts_of_speech']),
|
|
|
|
'; '.join(defn_raw['english_definitions']),
|
|
|
|
''.join(extra)[:-1],
|
|
|
|
))
|
|
|
|
return definitions
|
|
|
|
|
|
|
|
|
|
|
|
def get_infobox(alt_forms, result_url, definitions):
|
|
|
|
infobox_content = []
|
|
|
|
# title & alt_forms
|
|
|
|
infobox_title = alt_forms[0]
|
|
|
|
if len(alt_forms) > 1:
|
|
|
|
infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')
|
|
|
|
|
|
|
|
# definitions
|
|
|
|
infobox_content.append('''
|
|
|
|
<small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
|
|
|
|
and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
|
|
|
|
by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>
|
|
|
|
<ul>
|
|
|
|
''')
|
|
|
|
for pos, engdef, extra in definitions:
|
|
|
|
if pos == 'Wikipedia definition':
|
|
|
|
infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')
|
|
|
|
pos = f'<i>{pos}</i>: ' if pos else ''
|
|
|
|
extra = f' ({extra})' if extra else ''
|
|
|
|
infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')
|
|
|
|
infobox_content.append('</ul>')
|
|
|
|
|
|
|
|
#
|
|
|
|
return {
|
|
|
|
'infobox': infobox_title,
|
|
|
|
'content': ''.join(infobox_content),
|
|
|
|
'urls': [
|
|
|
|
{
|
|
|
|
'title': 'Jisho.org',
|
|
|
|
'url': result_url,
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|