Merge pull request #531 from guyou/add-doku-engine

Add doku engine
This commit is contained in:
Adam Tauber 2016-04-12 13:54:12 +02:00
commit 5544fdb756
4 changed files with 171 additions and 0 deletions

View File

@ -42,3 +42,4 @@ generally made searx better:
- Noemi Vanyi
- Kang-min Liu
- Kirill Isakov
- Guilhem Bonnefille

84
searx/engines/doku.py Normal file
View File

@ -0,0 +1,84 @@
# Doku Wiki
#
# @website https://www.dokuwiki.org/
# @provide-api yes
# (https://www.dokuwiki.org/devel:xmlrpc)
#
# @using-api no
# @results HTML
# @stable yes
# @parse (general) url, title, content
from urllib import urlencode
from lxml.html import fromstring
from searx.engines.xpath import extract_text
# engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
paging = False
language_support = False
number_of_results = 5
# search-url
# Doku is OpenSearch compatible
base_url = 'http://localhost:8090'
search_url = '/?do=search'\
'&{query}'
# TODO '&startRecord={offset}'\
# TODO '&maximumRecords={limit}'\
# do search-request
def request(query, params):
params['url'] = base_url +\
search_url.format(query=urlencode({'id': query}))
return params
# get response from search-request
def response(resp):
results = []
doc = fromstring(resp.text)
# parse results
# Quickhits
for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'):
try:
res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
except:
continue
if not res_url:
continue
title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
# append result
results.append({'title': title,
'content': "",
'url': base_url + res_url})
# Search results
for r in doc.xpath('//dl[@class="search_results"]/*'):
try:
if r.tag == "dt":
res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
elif r.tag == "dd":
content = extract_text(r.xpath('.'))
# append result
results.append({'title': title,
'content': content,
'url': base_url + res_url})
except:
continue
if not res_url:
continue
# return results
return results

View File

@ -337,6 +337,13 @@ engines:
# number_of_results : 5
# timeout : 3.0
# Doku engine lets you access to any Doku wiki instance:
# A public one or a privete/corporate one.
# - name : ubuntuwiki
# engine : doku
# shortcut : uw
# base_url : 'http://doc.ubuntu-fr.org'
locales:
en : English
bg : Български (Bulgarian)

View File

@ -0,0 +1,79 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
from searx.engines import doku
from searx.testing import SearxTestCase
class TestDokuEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dicto = defaultdict(dict)
params = doku.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
def test_response(self):
self.assertRaises(AttributeError, doku.response, None)
self.assertRaises(AttributeError, doku.response, [])
self.assertRaises(AttributeError, doku.response, '')
self.assertRaises(AttributeError, doku.response, '[]')
response = mock.Mock(text='<html></html>')
self.assertEqual(doku.response(response), [])
html = u"""
<div class="search_quickresult">
<h3>Pages trouvées :</h3>
<ul class="search_quickhits">
<li> <a href="/xfconf-query" class="wikilink1" title="xfconf-query">xfconf-query</a></li>
</ul>
<div class="clearer"></div>
</div>
"""
response = mock.Mock(text=html)
results = doku.response(response)
expected = [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}]
self.assertEqual(doku.response(response), expected)
html = u"""
<dl class="search_results">
<dt><a href="/xvnc?s[]=query" class="wikilink1" title="xvnc">xvnc</a>: 40 Occurrences trouvées</dt>
<dd>er = /usr/bin/Xvnc
server_args = -inetd -<strong class="search_hit">query</strong> localhost -geometry 640x480 ... er = /usr/bin/Xvnc
server_args = -inetd -<strong class="search_hit">query</strong> localhost -geometry 800x600 ... er = /usr/bin/Xvnc
server_args = -inetd -<strong class="search_hit">query</strong> localhost -geometry 1024x768 ... er = /usr/bin/Xvnc
server_args = -inetd -<strong class="search_hit">query</strong> localhost -geometry 1280x1024 -depth 8 -Sec</dd>
<dt><a href="/postfix_mysql_tls_sasl_1404?s[]=query"
class="wikilink1"
title="postfix_mysql_tls_sasl_1404">postfix_mysql_tls_sasl_1404</a>: 14 Occurrences trouvées</dt>
<dd>tdepasse
hosts = 127.0.0.1
dbname = postfix
<strong class="search_hit">query</strong> = SELECT goto FROM alias WHERE address='%s' AND a... tdepasse
hosts = 127.0.0.1
dbname = postfix
<strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s'
#optional <strong class="search_hit">query</strong> to use when relaying for backup MX
#<strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s' and backupmx =</dd>
<dt><a href="/bind9?s[]=query" class="wikilink1" title="bind9">bind9</a>: 12 Occurrences trouvées</dt>
<dd> printcmd
;; Got answer:
;; -&gt;&gt;HEADER&lt;&lt;- opcode: <strong class="search_hit">QUERY</strong>, status: NOERROR, id: 13427
;; flags: qr aa rd ra; <strong class="search_hit">QUERY</strong>: 1, ANSWER: 1, AUTHORITY: 1, ADDITIONAL: 1
[...]
;; <strong class="search_hit">Query</strong> time: 1 msec
;; SERVER: 127.0.0.1#53(127.0.0.1)
;... par la requête (<strong class="search_hit">Query</strong> time) , entre la première et la deuxième requête.</dd>
</dl>
"""
response = mock.Mock(text=html)
results = doku.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 3)
self.assertEqual(results[0]['title'], 'xvnc')
# FIXME self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
# FIXME self.assertEqual(results[0]['content'], 'This should be the content.')