# SPDX-License-Identifier: AGPL-3.0-or-later """ Jisho (the Japanese-English dictionary) """ from urllib.parse import urlencode, urljoin # about about = { "website": 'https://jisho.org', "wikidata_id": 'Q24568389', "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api", "use_official_api": True, "require_api_key": False, "results": 'JSON', "language": 'ja', } categories = ['dictionaries'] paging = False URL = 'https://jisho.org' BASE_URL = 'https://jisho.org/word/' SEARCH_URL = URL + '/api/v1/search/words?{query}' def request(query, params): query = urlencode({'keyword': query}) params['url'] = SEARCH_URL.format(query=query) logger.debug(f"query_url --> {params['url']}") return params def response(resp): results = [] first_result = True search_results = resp.json() for page in search_results.get('data', []): # Entries that are purely from Wikipedia are excluded. parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech') if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition': pass # Process alternative forms alt_forms = [] for title_raw in page['japanese']: if 'word' not in title_raw: alt_forms.append(title_raw['reading']) else: title = title_raw['word'] if 'reading' in title_raw: title += ' (' + title_raw['reading'] + ')' alt_forms.append(title) # result_url = urljoin(BASE_URL, page['slug']) definitions = get_definitions(page) # For results, we'll return the URL, all alternative forms (as title), # and all definitions (as description) truncated to 300 characters. content = " ".join(f"{engdef}." for _, engdef, _ in definitions) results.append({ 'url': result_url, 'title': ", ".join(alt_forms), 'content': content[:300] + (content[300:] and '...') }) # Like Wordnik, we'll return the first result in an infobox too. if first_result: first_result = False results.append(get_infobox(alt_forms, result_url, definitions)) return results def get_definitions(page): # Process definitions definitions = [] for defn_raw in page['senses']: extra = [] # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions. if defn_raw.get('tags'): if defn_raw.get('info'): # "usually written as kana: <kana>" extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ') else: # abbreviation, archaism, etc. extra.append(', '.join(defn_raw['tags']) + '. ') elif defn_raw.get('info'): # inconsistent extra.append(', '.join(defn_raw['info']).capitalize() + '. ') if defn_raw.get('restrictions'): extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ') definitions.append(( ', '.join(defn_raw['parts_of_speech']), '; '.join(defn_raw['english_definitions']), ''.join(extra)[:-1], )) return definitions def get_infobox(alt_forms, result_url, definitions): infobox_content = [] # title & alt_forms infobox_title = alt_forms[0] if len(alt_forms) > 1: infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>') # definitions infobox_content.append(''' <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small> <ul> ''') for pos, engdef, extra in definitions: if pos == 'Wikipedia definition': infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>') pos = f'<i>{pos}</i>: ' if pos else '' extra = f' ({extra})' if extra else '' infobox_content.append(f'<li>{pos}{engdef}{extra}</li>') infobox_content.append('</ul>') # return { 'infobox': infobox_title, 'content': ''.join(infobox_content), 'urls': [ { 'title': 'Jisho.org', 'url': result_url, } ] }