From 934ae4e086a26d1c9c8d25946b43789e55696478 Mon Sep 17 00:00:00 2001
From: Austin Huang <im@austinhuang.me>
Date: Thu, 31 Mar 2022 14:45:39 -0400
Subject: [PATCH 1/4] (feat) add jisho.org

Closes #1016
---
 searx/engines/jisho.py | 125 +++++++++++++++++++++++++++++++++++++++++
 searx/settings.yml     |   6 ++
 2 files changed, 131 insertions(+)
 create mode 100644 searx/engines/jisho.py
diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py
new file mode 100644
index 000000000..6fab054e0
--- /dev/null
+++ b/searx/engines/jisho.py
@@ -0,0 +1,125 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Jisho (the Japanese-English dictionary)
+"""
+
+import json
+from urllib.parse import urlencode, urljoin
+
+# about
+about = {
+    "website": 'https://jisho.org',
+    "wikidata_id": 'Q24568389',
+    "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+categories = ['dictionaries']
+paging = False
+
+URL = 'https://jisho.org'
+BASE_URL = 'https://jisho.org/word/'
+SEARCH_URL = URL + '/api/v1/search/words?{query}'
+
+
+def request(query, params):
+    query = urlencode({'keyword': query})
+    params['url'] = SEARCH_URL.format(query=query)
+    logger.debug(f"query_url --> {params['url']}")
+    return params
+
+
+def response(resp):
+    results = []
+    infoboxed = False
+
+    search_results = json.loads(resp.text)
+    pages = search_results.get('data', [])
+
+    for page in pages:
+        # Entries that are purely from Wikipedia are excluded.
+        if page['senses'][0]['parts_of_speech'][0] != 'Wikipedia definition':
+            # Process alternative forms
+            japanese = page['japanese']
+            alt_forms = []
+            for title_raw in japanese:
+                if 'word' not in title_raw:
+                    alt_forms.append(title_raw['reading'])
+                else:
+                    title = title_raw['word']
+                    if 'reading' in title_raw:
+                        title += ' (' + title_raw['reading'] + ')'
+                    alt_forms.append(title)
+            # Process definitions
+            definitions = []
+            def_raw = page['senses']
+            for defn_raw in def_raw:
+                extra = ''
+                if not infoboxed:
+                    # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
+                    if defn_raw['tags'] != []:
+                        if defn_raw['info'] != []:
+                            extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
+                        else:
+                            extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
+                    elif defn_raw['info'] != []:
+                        extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
+                    if defn_raw['restrictions'] != []:
+                        extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
+                    extra = extra[:-1]
+                definitions.append((
+                    ', '.join(defn_raw['parts_of_speech']),
+                    '; '.join(defn_raw['english_definitions']),
+                    extra
+                ))
+            content = ''
+            infobox_content = '''
+                <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
+                and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
+                by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
+                '''
+            for pos, engdef, extra in definitions:
+                if pos == 'Wikipedia definition':
+                    infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
+                if pos == '':
+                    infobox_content += f"<li>{engdef}"
+                else:
+                    infobox_content += f"<li><i>{pos}</i>: {engdef}"
+                if extra != '':
+                    infobox_content += f" ({extra})"
+                infobox_content += '</li>'
+                content += f"{engdef}. "
+            infobox_content += '</ul>'
+            
+            # For results, we'll return the URL, all alternative forms (as title),
+            # and all definitions (as description) truncated to 300 characters.
+            results.append({
+                'url': urljoin(BASE_URL, page['slug']),
+                'title': ", ".join(alt_forms),
+                'content': content[:300] + (content[300:] and '...')
+            })
+
+            # Like Wordnik, we'll return the first result in an infobox too.
+            if not infoboxed:
+                infoboxed = True
+                infobox_urls = []
+                infobox_urls.append({
+                    'title': 'Jisho.org',
+                    'url': urljoin(BASE_URL, page['slug'])
+                })
+                infobox = {
+                    'infobox': alt_forms[0],
+                    'urls': infobox_urls
+                }
+                alt_forms.pop(0)
+                alt_content = ''
+                if len(alt_forms) > 0:
+                    alt_content = '<p><i>Other forms:</i> '
+                    alt_content += ", ".join(alt_forms)
+                    alt_content += '</p>'
+                infobox['content'] = alt_content + infobox_content
+                results.append(infobox)
+
+    return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 9e9f1f27a..48b074545 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -798,6 +798,12 @@ engines:
     timeout: 3.0
     disabled: true
 
+  - name: jisho
+    engine: jisho
+    shortcut: js
+    timeout: 4.0
+    disabled: true
+
   - name: kickass
     engine: kickass
     shortcut: kc

From a399248f56e6975c78f617defc5ce7df2f62a828 Mon Sep 17 00:00:00 2001
From: Austin Huang <im@austinhuang.me>
Date: Fri, 1 Apr 2022 09:18:19 -0400
Subject: [PATCH 2/4] update jisho.py according to suggestions

---
 searx/engines/jisho.py | 165 +++++++++++++++++++++--------------------
 1 file changed, 84 insertions(+), 81 deletions(-)

diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py
index 6fab054e0..c1324635b 100644
--- a/searx/engines/jisho.py
+++ b/searx/engines/jisho.py
@@ -14,9 +14,11 @@ about = {
     "use_official_api": True,
     "require_api_key": False,
     "results": 'JSON',
+    "language": 'ja',
 }
 
 categories = ['dictionaries']
+engine_type = 'online_dictionary'
 paging = False
 
 URL = 'https://jisho.org'
@@ -35,91 +37,92 @@ def response(resp):
     results = []
     infoboxed = False
 
-    search_results = json.loads(resp.text)
+    search_results = resp.json()
     pages = search_results.get('data', [])
 
     for page in pages:
         # Entries that are purely from Wikipedia are excluded.
-        if page['senses'][0]['parts_of_speech'][0] != 'Wikipedia definition':
-            # Process alternative forms
-            japanese = page['japanese']
-            alt_forms = []
-            for title_raw in japanese:
-                if 'word' not in title_raw:
-                    alt_forms.append(title_raw['reading'])
-                else:
-                    title = title_raw['word']
-                    if 'reading' in title_raw:
-                        title += ' (' + title_raw['reading'] + ')'
-                    alt_forms.append(title)
-            # Process definitions
-            definitions = []
-            def_raw = page['senses']
-            for defn_raw in def_raw:
-                extra = ''
-                if not infoboxed:
-                    # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
-                    if defn_raw['tags'] != []:
-                        if defn_raw['info'] != []:
-                            extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
-                        else:
-                            extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
-                    elif defn_raw['info'] != []:
-                        extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
-                    if defn_raw['restrictions'] != []:
-                        extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
-                    extra = extra[:-1]
-                definitions.append((
-                    ', '.join(defn_raw['parts_of_speech']),
-                    '; '.join(defn_raw['english_definitions']),
-                    extra
-                ))
-            content = ''
-            infobox_content = '''
-                <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
-                and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
-                by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
-                '''
-            for pos, engdef, extra in definitions:
-                if pos == 'Wikipedia definition':
-                    infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
-                if pos == '':
-                    infobox_content += f"<li>{engdef}"
-                else:
-                    infobox_content += f"<li><i>{pos}</i>: {engdef}"
-                if extra != '':
-                    infobox_content += f" ({extra})"
-                infobox_content += '</li>'
-                content += f"{engdef}. "
-            infobox_content += '</ul>'
-            
-            # For results, we'll return the URL, all alternative forms (as title),
-            # and all definitions (as description) truncated to 300 characters.
-            results.append({
-                'url': urljoin(BASE_URL, page['slug']),
-                'title': ", ".join(alt_forms),
-                'content': content[:300] + (content[300:] and '...')
-            })
-
-            # Like Wordnik, we'll return the first result in an infobox too.
+        if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition':
+            pass
+        # Process alternative forms
+        japanese = page['japanese']
+        alt_forms = []
+        for title_raw in japanese:
+            if 'word' not in title_raw:
+                alt_forms.append(title_raw['reading'])
+            else:
+                title = title_raw['word']
+                if 'reading' in title_raw:
+                    title += ' (' + title_raw['reading'] + ')'
+                alt_forms.append(title)
+        # Process definitions
+        definitions = []
+        def_raw = page['senses']
+        for defn_raw in def_raw:
+            extra = ''
             if not infoboxed:
-                infoboxed = True
-                infobox_urls = []
-                infobox_urls.append({
-                    'title': 'Jisho.org',
-                    'url': urljoin(BASE_URL, page['slug'])
-                })
-                infobox = {
-                    'infobox': alt_forms[0],
-                    'urls': infobox_urls
-                }
-                alt_forms.pop(0)
-                alt_content = ''
-                if len(alt_forms) > 0:
-                    alt_content = '<p><i>Other forms:</i> '
-                    alt_content += ", ".join(alt_forms)
-                    alt_content += '</p>'
-                infobox['content'] = alt_content + infobox_content
-                results.append(infobox)
+                # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
+                if defn_raw['tags'] != []:
+                    if defn_raw['info'] != []:
+                        extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
+                    else:
+                        extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
+                elif defn_raw['info'] != []:
+                    extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
+                if defn_raw['restrictions'] != []:
+                    extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
+                extra = extra[:-1]
+            definitions.append((
+                ', '.join(defn_raw['parts_of_speech']),
+                '; '.join(defn_raw['english_definitions']),
+                extra
+            ))
+        content = ''
+        infobox_content = '''
+            <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
+            and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
+            by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
+            '''
+        for pos, engdef, extra in definitions:
+            if pos == 'Wikipedia definition':
+                infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
+            if pos == '':
+                infobox_content += f"<li>{engdef}"
+            else:
+                infobox_content += f"<li><i>{pos}</i>: {engdef}"
+            if extra != '':
+                infobox_content += f" ({extra})"
+            infobox_content += '</li>'
+            content += f"{engdef}. "
+        infobox_content += '</ul>'
+        
+        # For results, we'll return the URL, all alternative forms (as title),
+        # and all definitions (as description) truncated to 300 characters.
+        results.append({
+            'url': urljoin(BASE_URL, page['slug']),
+            'title': ", ".join(alt_forms),
+            'content': content[:300] + (content[300:] and '...')
+        })
+
+        # Like Wordnik, we'll return the first result in an infobox too.
+        if not infoboxed:
+            infoboxed = True
+            infobox_urls = []
+            infobox_urls.append({
+                'title': 'Jisho.org',
+                'url': urljoin(BASE_URL, page['slug'])
+            })
+            infobox = {
+                'infobox': alt_forms[0],
+                'urls': infobox_urls
+            }
+            alt_forms.pop(0)
+            alt_content = ''
+            if len(alt_forms) > 0:
+                alt_content = '<p><i>Other forms:</i> '
+                alt_content += ", ".join(alt_forms)
+                alt_content += '</p>'
+            infobox['content'] = alt_content + infobox_content
+            results.append(infobox)
 
     return results

From 19fa0095a0ab12ed1f7a79d91edf862faf6fdfcf Mon Sep 17 00:00:00 2001
From: Austin Huang <im@austinhuang.me>
Date: Fri, 1 Apr 2022 09:23:24 -0400
Subject: [PATCH 3/4] (fix) satisfy the linter, and btw reduce timeout

---
 searx/engines/jisho.py | 1 -
 searx/settings.yml     | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py
index c1324635b..a34d8e421 100644
--- a/searx/engines/jisho.py
+++ b/searx/engines/jisho.py
@@ -3,7 +3,6 @@
 Jisho (the Japanese-English dictionary)
 """
 
-import json
 from urllib.parse import urlencode, urljoin
 
 # about
diff --git a/searx/settings.yml b/searx/settings.yml
index 48b074545..eee0e1d7d 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -801,7 +801,7 @@ engines:
   - name: jisho
     engine: jisho
     shortcut: js
-    timeout: 4.0
+    timeout: 3.0
     disabled: true
 
   - name: kickass

From 74c7aee9ec52e6b954e48817501a334f23a40e25 Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Sat, 2 Apr 2022 15:21:58 +0200
Subject: [PATCH 4/4] jisho : code refactoring

---
 searx/engines/jisho.py | 143 ++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 67 deletions(-)

diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py
index a34d8e421..87bbe983d 100644
--- a/searx/engines/jisho.py
+++ b/searx/engines/jisho.py
@@ -17,7 +17,6 @@ about = {
 }
 
 categories = ['dictionaries']
-engine_type = 'online_dictionary'
 paging = False
 
 URL = 'https://jisho.org'
@@ -34,19 +33,19 @@ def request(query, params):
 
 def response(resp):
     results = []
-    infoboxed = False
+    first_result = True
 
     search_results = resp.json()
-    pages = search_results.get('data', [])
 
-    for page in pages:
+    for page in search_results.get('data', []):
         # Entries that are purely from Wikipedia are excluded.
-        if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition':
+        parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')
+        if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':
             pass
+
         # Process alternative forms
-        japanese = page['japanese']
         alt_forms = []
-        for title_raw in japanese:
+        for title_raw in page['japanese']:
             if 'word' not in title_raw:
                 alt_forms.append(title_raw['reading'])
             else:
@@ -54,74 +53,84 @@ def response(resp):
                 if 'reading' in title_raw:
                     title += ' (' + title_raw['reading'] + ')'
                 alt_forms.append(title)
-        # Process definitions
-        definitions = []
-        def_raw = page['senses']
-        for defn_raw in def_raw:
-            extra = ''
-            if not infoboxed:
-                # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
-                if defn_raw['tags'] != []:
-                    if defn_raw['info'] != []:
-                        extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
-                    else:
-                        extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
-                elif defn_raw['info'] != []:
-                    extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
-                if defn_raw['restrictions'] != []:
-                    extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
-                extra = extra[:-1]
-            definitions.append((
-                ', '.join(defn_raw['parts_of_speech']),
-                '; '.join(defn_raw['english_definitions']),
-                extra
-            ))
-        content = ''
-        infobox_content = '''
-            <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
-            and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
-            by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
-            '''
-        for pos, engdef, extra in definitions:
-            if pos == 'Wikipedia definition':
-                infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
-            if pos == '':
-                infobox_content += f"<li>{engdef}"
-            else:
-                infobox_content += f"<li><i>{pos}</i>: {engdef}"
-            if extra != '':
-                infobox_content += f" ({extra})"
-            infobox_content += '</li>'
-            content += f"{engdef}. "
-        infobox_content += '</ul>'
         
+        #
+        result_url = urljoin(BASE_URL, page['slug'])
+        definitions = get_definitions(page)
+
         # For results, we'll return the URL, all alternative forms (as title),
         # and all definitions (as description) truncated to 300 characters.
+        content = " ".join(f"{engdef}." for _, engdef, _ in definitions)
         results.append({
-            'url': urljoin(BASE_URL, page['slug']),
+            'url': result_url,
             'title': ", ".join(alt_forms),
             'content': content[:300] + (content[300:] and '...')
         })
 
         # Like Wordnik, we'll return the first result in an infobox too.
-        if not infoboxed:
-            infoboxed = True
-            infobox_urls = []
-            infobox_urls.append({
-                'title': 'Jisho.org',
-                'url': urljoin(BASE_URL, page['slug'])
-            })
-            infobox = {
-                'infobox': alt_forms[0],
-                'urls': infobox_urls
-            }
-            alt_forms.pop(0)
-            alt_content = ''
-            if len(alt_forms) > 0:
-                alt_content = '<p><i>Other forms:</i> '
-                alt_content += ", ".join(alt_forms)
-                alt_content += '</p>'
-            infobox['content'] = alt_content + infobox_content
-            results.append(infobox)
+        if first_result:
+            first_result = False
+            results.append(get_infobox(alt_forms, result_url, definitions))
 
     return results
+
+
+def get_definitions(page):
+    # Process definitions
+    definitions = []
+    for defn_raw in page['senses']:
+        extra = []
+        # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
+        if defn_raw.get('tags'):
+            if defn_raw.get('info'):
+                # "usually written as kana: <kana>"
+                extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')
+            else:
+                # abbreviation, archaism, etc.
+                extra.append(', '.join(defn_raw['tags']) + '. ')
+        elif defn_raw.get('info'):
+            # inconsistent
+            extra.append(', '.join(defn_raw['info']).capitalize() + '. ')
+        if defn_raw.get('restrictions'):
+            extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')
+        definitions.append((
+            ', '.join(defn_raw['parts_of_speech']),
+            '; '.join(defn_raw['english_definitions']),
+            ''.join(extra)[:-1],
+        ))
+    return definitions
+
+
+def get_infobox(alt_forms, result_url, definitions):
+    infobox_content = []
+    # title & alt_forms
+    infobox_title = alt_forms[0]
+    if len(alt_forms) > 1:
+        infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')
+
+    # definitions
+    infobox_content.append('''
+        <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
+        and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
+        by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>
+        <ul>
+    ''')
+    for pos, engdef, extra in definitions:
+        if pos == 'Wikipedia definition':
+            infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')
+        pos = f'<i>{pos}</i>: ' if pos else ''
+        extra = f' ({extra})' if extra else ''
+        infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')
+    infobox_content.append('</ul>')
+
+    #
+    return {
+        'infobox': infobox_title,
+        'content': ''.join(infobox_content),
+        'urls': [
+            {
+                'title': 'Jisho.org',
+                'url': result_url,
+            }
+        ]
+    }