Merge pull request #830 from davidar/se

Semantic Scholar
2024-01-01 19:24:07 +01:00 · 2017-01-18 23:50:11 +01:00 · 2017-01-18 23:50:11 +01:00 · 57149661e4
commit 57149661e4
parent b1d49bacb0 7814d4b796
2 changed files with 15 additions and 4 deletions
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@ -31,8 +31,6 @@ if xpath_results is a string element, then it's already done
 def extract_text(xpath_results):
    if type(xpath_results) == list:
        # it's list of result : concat everything using recursive call
-        if not xpath_results:
-            raise Exception('Empty url resultset')
        result = ''
        for e in xpath_results:
            result = result + extract_text(e)
@ -48,6 +46,8 @@ def extract_text(xpath_results):


 def extract_url(xpath_results, search_url):
+    if xpath_results == []:
+        raise Exception('Empty url resultset')
    url = extract_text(xpath_results)

    if url.startswith('//'):
@ -103,8 +103,8 @@ def response(resp):
    if results_xpath:
        for result in dom.xpath(results_xpath):
            url = extract_url(result.xpath(url_xpath), search_url)
-            title = extract_text(result.xpath(title_xpath)[0])
-            content = extract_text(result.xpath(content_xpath)[0])
+            title = extract_text(result.xpath(title_xpath))
+            content = extract_text(result.xpath(content_xpath))
            results.append({'url': url, 'title': title, 'content': content})
    else:
        for url, title, content in zip(
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -462,6 +462,17 @@ engines:
 #        - ...
 #    disabled : True

+  - name : semantic scholar
+    engine : xpath
+    paging : True
+    search_url : https://www.semanticscholar.org/search?q={query}&sort=relevance&page={pageno}&ae=false
+    results_xpath : //article
+    url_xpath : .//div[@class="search-result-title"]/a/@href
+    title_xpath : .//div[@class="search-result-title"]/a
+    content_xpath : .//div[@class="search-result-abstract"]
+    shortcut : se
+    categories : science
+
  - name : spotify
    engine : spotify
    shortcut : stf