From cf09b500f35fd1bca3fc9cc853bd7ea932220e4e Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Sun, 3 Apr 2016 22:03:41 +0200 Subject: [PATCH 1/7] Add support for dokuwiki engine --- searx/engines/doku.py | 83 +++++++++++++++++++++++++++++++ tests/unit/engines/test_doku.py | 86 +++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 searx/engines/doku.py create mode 100644 tests/unit/engines/test_doku.py diff --git a/searx/engines/doku.py b/searx/engines/doku.py new file mode 100644 index 000000000..18abe75e5 --- /dev/null +++ b/searx/engines/doku.py @@ -0,0 +1,83 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://localhost:8090' +search_url = '/?do=search'\ + '&id={query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'query': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/tests/unit/engines/test_doku.py b/tests/unit/engines/test_doku.py new file mode 100644 index 000000000..331671eeb --- /dev/null +++ b/tests/unit/engines/test_doku.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import doku +from searx.testing import SearxTestCase + + +class TestDokuEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + params = doku.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + + def test_response(self): + self.assertRaises(AttributeError, doku.response, None) + self.assertRaises(AttributeError, doku.response, []) + self.assertRaises(AttributeError, doku.response, '') + self.assertRaises(AttributeError, doku.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(doku.response(response), []) + + html = u""" +
+

Pages trouvées :

+ +
+
+ """ + response = mock.Mock(text=html) + results = doku.response(response) + self.assertEqual(doku.response(response), [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}]) + + html = u""" +
+
xvnc: 40 Occurrences trouvées
+
er = /usr/bin/Xvnc + server_args = -inetd -query localhost -once -geometry 640x480 -depth 8 -Secur... er = /usr/bin/Xvnc + server_args = -inetd -query localhost -once -geometry 800x600 -depth 8 -Secur... er = /usr/bin/Xvnc + server_args = -inetd -query localhost -once -geometry 1024x768 -depth 8 -Secu... er = /usr/bin/Xvnc + server_args = -inetd -query localhost -once -geometry 1280x1024 -depth 8 -Sec
+
postfix_mysql_tls_sasl_1404: 14 Occurrences trouvées
+
tdepasse + hosts = 127.0.0.1 + dbname = postfix + query = SELECT goto FROM alias WHERE address='%s' AND a... tdepasse + hosts = 127.0.0.1 + dbname = postfix + query = SELECT domain FROM domain WHERE domain='%s' + #optional query to use when relaying for backup MX + #query = SELECT domain FROM domain WHERE domain='%s' and backupmx =
tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine: 13 Occurrences trouvées
z gdm (ubuntu) tapez sudo /etc/init.d/gdm stop +X -query 192.168.1.2 +</code> +:) +Si vous désirez, sur la mê... ans une console (tjs sur le vieil ordi) +<code> +X -query 192.168.1.2 :1 +</code> +Un écran de login devrait ... ure. +<note tip>Rajouter "-once" à la commande "X -query 192.168.1.2 :1" permet de quitter la session et r... d'une ubuntu/kubuntu\\ +Testez d'abord que le //X -query ...// fonctionne, dans une console (CTRL-ALT-F1)
+
bind9: 12 Occurrences trouvées
+
printcmd +;; Got answer: +;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 13427 +;; flags: qr aa rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 1, ADDITIONAL: 1 + +[...] + +;; Query time: 1 msec +;; SERVER: 127.0.0.1#53(127.0.0.1) +;... ne énorme diminution du temps mis par la requête (Query time) , entre la première et la deuxième requête.
+
+ """ + response = mock.Mock(text=html) + results = doku.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 4) + self.assertEqual(results[0]['title'], 'xvnc') +# FIXME self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') +# FIXME self.assertEqual(results[0]['content'], 'This should be the content.') From f2d1a530fb8126f66967edc24132eac13dae394d Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Sun, 3 Apr 2016 22:05:03 +0200 Subject: [PATCH 2/7] Add ubuntu-fr wiki with new doku engine --- searx/settings.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index 462a0bcc2..439910d92 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -337,6 +337,11 @@ engines: # number_of_results : 5 # timeout : 3.0 + - name : ubuntuwiki + engine : doku + shortcut : uw + base_url : 'http://doc.ubuntu-fr.org' + locales: en : English bg : Български (Bulgarian) From f26f0dab2e4e6a6f77ea9f04f36fe2eb2d6893df Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Mon, 4 Apr 2016 13:38:22 +0200 Subject: [PATCH 3/7] Fix pep8 E302 Cf. http://legacy.python.org/dev/peps/pep-0008/#blank-lines --- searx/engines/doku.py | 1 + 1 file changed, 1 insertion(+) diff --git a/searx/engines/doku.py b/searx/engines/doku.py index 18abe75e5..233fd2233 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -27,6 +27,7 @@ search_url = '/?do=search'\ # TODO '&startRecord={offset}'\ # TODO '&maximumRecords={limit}'\ + # do search-request def request(query, params): From bb29a910f270648e685a54619b6b1595452bb557 Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Mon, 4 Apr 2016 13:38:57 +0200 Subject: [PATCH 4/7] Fix pep8 about too long lines Code is refactored and example data are truncated. --- tests/unit/engines/test_doku.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/tests/unit/engines/test_doku.py b/tests/unit/engines/test_doku.py index 331671eeb..22ddb7a7f 100644 --- a/tests/unit/engines/test_doku.py +++ b/tests/unit/engines/test_doku.py @@ -34,17 +34,20 @@ class TestDokuEngine(SearxTestCase): """ response = mock.Mock(text=html) results = doku.response(response) - self.assertEqual(doku.response(response), [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}]) + expected = [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}] + self.assertEqual(doku.response(response), expected) html = u"""
xvnc: 40 Occurrences trouvées
er = /usr/bin/Xvnc - server_args = -inetd -query localhost -once -geometry 640x480 -depth 8 -Secur... er = /usr/bin/Xvnc - server_args = -inetd -query localhost -once -geometry 800x600 -depth 8 -Secur... er = /usr/bin/Xvnc - server_args = -inetd -query localhost -once -geometry 1024x768 -depth 8 -Secu... er = /usr/bin/Xvnc - server_args = -inetd -query localhost -once -geometry 1280x1024 -depth 8 -Sec
-
postfix_mysql_tls_sasl_1404: 14 Occurrences trouvées
+ server_args = -inetd -query localhost -geometry 640x480 ... er = /usr/bin/Xvnc + server_args = -inetd -query localhost -geometry 800x600 ... er = /usr/bin/Xvnc + server_args = -inetd -query localhost -geometry 1024x768 ... er = /usr/bin/Xvnc + server_args = -inetd -query localhost -geometry 1280x1024 -depth 8 -Sec +
postfix_mysql_tls_sasl_1404: 14 Occurrences trouvées
tdepasse hosts = 127.0.0.1 dbname = postfix @@ -53,17 +56,7 @@ class TestDokuEngine(SearxTestCase): dbname = postfix query = SELECT domain FROM domain WHERE domain='%s' #optional query to use when relaying for backup MX - #query = SELECT domain FROM domain WHERE domain='%s' and backupmx =
tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine: 13 Occurrences trouvées
z gdm (ubuntu) tapez sudo /etc/init.d/gdm stop -X -query 192.168.1.2 -</code> -:) -Si vous désirez, sur la mê... ans une console (tjs sur le vieil ordi) -<code> -X -query 192.168.1.2 :1 -</code> -Un écran de login devrait ... ure. -<note tip>Rajouter "-once" à la commande "X -query 192.168.1.2 :1" permet de quitter la session et r... d'une ubuntu/kubuntu\\ -Testez d'abord que le //X -query ...// fonctionne, dans une console (CTRL-ALT-F1)
+ #query = SELECT domain FROM domain WHERE domain='%s' and backupmx =
bind9: 12 Occurrences trouvées
printcmd ;; Got answer: @@ -74,13 +67,13 @@ Testez d'abord que le //X -query ...// fonct ;; Query time: 1 msec ;; SERVER: 127.0.0.1#53(127.0.0.1) -;... ne énorme diminution du temps mis par la requête (Query time) , entre la première et la deuxième requête.
+;... par la requête (Query time) , entre la première et la deuxième requête.
""" response = mock.Mock(text=html) results = doku.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 4) + self.assertEqual(len(results), 3) self.assertEqual(results[0]['title'], 'xvnc') # FIXME self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') # FIXME self.assertEqual(results[0]['content'], 'This should be the content.') From b0d42e8cf9b588f1b2c81d29ca0f9adb9983553f Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Tue, 5 Apr 2016 13:30:59 +0200 Subject: [PATCH 5/7] Add myself as author As requested by the contribution guide: https://asciimoo.github.io/searx/dev/contribution_guide.html --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index c5047438a..974fbeb15 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -42,3 +42,4 @@ generally made searx better: - Noemi Vanyi - Kang-min Liu - Kirill Isakov +- Guilhem Bonnefille From 2733a92383f7f8127cdf4871c8091b0489ba7356 Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Tue, 5 Apr 2016 13:31:49 +0200 Subject: [PATCH 6/7] Fix query encoding --- searx/engines/doku.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/engines/doku.py b/searx/engines/doku.py index 233fd2233..93867fd0d 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -23,7 +23,7 @@ number_of_results = 5 # Doku is OpenSearch compatible base_url = 'http://localhost:8090' search_url = '/?do=search'\ - '&id={query}' + '&{query}' # TODO '&startRecord={offset}'\ # TODO '&maximumRecords={limit}'\ @@ -32,7 +32,7 @@ search_url = '/?do=search'\ def request(query, params): params['url'] = base_url +\ - search_url.format(query=urlencode({'query': query})) + search_url.format(query=urlencode({'id': query})) return params From 51cb832601499dedb38285d09c9db222a2bcab1d Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Sat, 9 Apr 2016 22:21:25 +0200 Subject: [PATCH 7/7] Comment out ubuntu-fr as it is not a general search engine --- searx/settings.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/searx/settings.yml b/searx/settings.yml index 439910d92..40f569e9f 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -337,10 +337,12 @@ engines: # number_of_results : 5 # timeout : 3.0 - - name : ubuntuwiki - engine : doku - shortcut : uw - base_url : 'http://doc.ubuntu-fr.org' +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name : ubuntuwiki +# engine : doku +# shortcut : uw +# base_url : 'http://doc.ubuntu-fr.org' locales: en : English