mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	
							parent
							
								
									d1334beb4f
								
							
						
					
					
						commit
						934ae4e086
					
				
					 2 changed files with 131 additions and 0 deletions
				
			
		
							
								
								
									
										125
									
								
								searx/engines/jisho.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								searx/engines/jisho.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,125 @@
 | 
			
		|||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
"""
 | 
			
		||||
Jisho (the Japanese-English dictionary)
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
from urllib.parse import urlencode, urljoin
 | 
			
		||||
 | 
			
		||||
# about
 | 
			
		||||
about = {
 | 
			
		||||
    "website": 'https://jisho.org',
 | 
			
		||||
    "wikidata_id": 'Q24568389',
 | 
			
		||||
    "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",
 | 
			
		||||
    "use_official_api": True,
 | 
			
		||||
    "require_api_key": False,
 | 
			
		||||
    "results": 'JSON',
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
categories = ['dictionaries']
 | 
			
		||||
paging = False
 | 
			
		||||
 | 
			
		||||
URL = 'https://jisho.org'
 | 
			
		||||
BASE_URL = 'https://jisho.org/word/'
 | 
			
		||||
SEARCH_URL = URL + '/api/v1/search/words?{query}'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def request(query, params):
 | 
			
		||||
    query = urlencode({'keyword': query})
 | 
			
		||||
    params['url'] = SEARCH_URL.format(query=query)
 | 
			
		||||
    logger.debug(f"query_url --> {params['url']}")
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def response(resp):
 | 
			
		||||
    results = []
 | 
			
		||||
    infoboxed = False
 | 
			
		||||
 | 
			
		||||
    search_results = json.loads(resp.text)
 | 
			
		||||
    pages = search_results.get('data', [])
 | 
			
		||||
 | 
			
		||||
    for page in pages:
 | 
			
		||||
        # Entries that are purely from Wikipedia are excluded.
 | 
			
		||||
        if page['senses'][0]['parts_of_speech'][0] != 'Wikipedia definition':
 | 
			
		||||
            # Process alternative forms
 | 
			
		||||
            japanese = page['japanese']
 | 
			
		||||
            alt_forms = []
 | 
			
		||||
            for title_raw in japanese:
 | 
			
		||||
                if 'word' not in title_raw:
 | 
			
		||||
                    alt_forms.append(title_raw['reading'])
 | 
			
		||||
                else:
 | 
			
		||||
                    title = title_raw['word']
 | 
			
		||||
                    if 'reading' in title_raw:
 | 
			
		||||
                        title += ' (' + title_raw['reading'] + ')'
 | 
			
		||||
                    alt_forms.append(title)
 | 
			
		||||
            # Process definitions
 | 
			
		||||
            definitions = []
 | 
			
		||||
            def_raw = page['senses']
 | 
			
		||||
            for defn_raw in def_raw:
 | 
			
		||||
                extra = ''
 | 
			
		||||
                if not infoboxed:
 | 
			
		||||
                    # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
 | 
			
		||||
                    if defn_raw['tags'] != []:
 | 
			
		||||
                        if defn_raw['info'] != []:
 | 
			
		||||
                            extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
 | 
			
		||||
                        else:
 | 
			
		||||
                            extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
 | 
			
		||||
                    elif defn_raw['info'] != []:
 | 
			
		||||
                        extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
 | 
			
		||||
                    if defn_raw['restrictions'] != []:
 | 
			
		||||
                        extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
 | 
			
		||||
                    extra = extra[:-1]
 | 
			
		||||
                definitions.append((
 | 
			
		||||
                    ', '.join(defn_raw['parts_of_speech']),
 | 
			
		||||
                    '; '.join(defn_raw['english_definitions']),
 | 
			
		||||
                    extra
 | 
			
		||||
                ))
 | 
			
		||||
            content = ''
 | 
			
		||||
            infobox_content = '''
 | 
			
		||||
                <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
 | 
			
		||||
                and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
 | 
			
		||||
                by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
 | 
			
		||||
                '''
 | 
			
		||||
            for pos, engdef, extra in definitions:
 | 
			
		||||
                if pos == 'Wikipedia definition':
 | 
			
		||||
                    infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
 | 
			
		||||
                if pos == '':
 | 
			
		||||
                    infobox_content += f"<li>{engdef}"
 | 
			
		||||
                else:
 | 
			
		||||
                    infobox_content += f"<li><i>{pos}</i>: {engdef}"
 | 
			
		||||
                if extra != '':
 | 
			
		||||
                    infobox_content += f" ({extra})"
 | 
			
		||||
                infobox_content += '</li>'
 | 
			
		||||
                content += f"{engdef}. "
 | 
			
		||||
            infobox_content += '</ul>'
 | 
			
		||||
            
 | 
			
		||||
            # For results, we'll return the URL, all alternative forms (as title),
 | 
			
		||||
            # and all definitions (as description) truncated to 300 characters.
 | 
			
		||||
            results.append({
 | 
			
		||||
                'url': urljoin(BASE_URL, page['slug']),
 | 
			
		||||
                'title': ", ".join(alt_forms),
 | 
			
		||||
                'content': content[:300] + (content[300:] and '...')
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
            # Like Wordnik, we'll return the first result in an infobox too.
 | 
			
		||||
            if not infoboxed:
 | 
			
		||||
                infoboxed = True
 | 
			
		||||
                infobox_urls = []
 | 
			
		||||
                infobox_urls.append({
 | 
			
		||||
                    'title': 'Jisho.org',
 | 
			
		||||
                    'url': urljoin(BASE_URL, page['slug'])
 | 
			
		||||
                })
 | 
			
		||||
                infobox = {
 | 
			
		||||
                    'infobox': alt_forms[0],
 | 
			
		||||
                    'urls': infobox_urls
 | 
			
		||||
                }
 | 
			
		||||
                alt_forms.pop(0)
 | 
			
		||||
                alt_content = ''
 | 
			
		||||
                if len(alt_forms) > 0:
 | 
			
		||||
                    alt_content = '<p><i>Other forms:</i> '
 | 
			
		||||
                    alt_content += ", ".join(alt_forms)
 | 
			
		||||
                    alt_content += '</p>'
 | 
			
		||||
                infobox['content'] = alt_content + infobox_content
 | 
			
		||||
                results.append(infobox)
 | 
			
		||||
 | 
			
		||||
    return results
 | 
			
		||||
| 
						 | 
				
			
			@ -798,6 +798,12 @@ engines:
 | 
			
		|||
    timeout: 3.0
 | 
			
		||||
    disabled: true
 | 
			
		||||
 | 
			
		||||
  - name: jisho
 | 
			
		||||
    engine: jisho
 | 
			
		||||
    shortcut: js
 | 
			
		||||
    timeout: 4.0
 | 
			
		||||
    disabled: true
 | 
			
		||||
 | 
			
		||||
  - name: kickass
 | 
			
		||||
    engine: kickass
 | 
			
		||||
    shortcut: kc
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue