[enh] engine types

This commit is contained in:
asciimoo 2013-10-15 19:11:43 +02:00
parent c3b7ed8687
commit d793c2733c
7 changed files with 34 additions and 29 deletions

View file

@ -6,7 +6,7 @@ import grequests
engine_dir = dirname(realpath(__file__))
engines = []
engines = {}
for filename in listdir(engine_dir):
modname = splitext(filename)[0]
@ -16,14 +16,16 @@ for filename in listdir(engine_dir):
engine = load_source(modname, filepath)
if not hasattr(engine, 'request') or not hasattr(engine, 'response'):
continue
engines.append(engine)
engines[modname] = engine
def default_request_params():
return {'method': 'GET', 'headers': {}, 'data': {}, 'url': ''}
def make_callback(results, callback):
def make_callback(engine_name, results, callback):
def process_callback(response, **kwargs):
results.extend(callback(response))
for result in callback(response):
result['engine'] = engine_name
results.append(result)
return process_callback
def search(query, request):
@ -31,11 +33,11 @@ def search(query, request):
requests = []
results = []
user_agent = request.headers.get('User-Agent', '')
for engine in engines:
for ename, engine in engines.items():
headers = default_request_params()
headers['User-Agent'] = user_agent
request_params = engine.request(query, headers)
callback = make_callback(results, engine.response)
callback = make_callback(ename, results, engine.response)
if request_params['method'] == 'GET':
req = grequests.get(request_params['url']
,headers=headers

View file

@ -1,14 +1,19 @@
from lxml import html
from json import loads
def request(query, params):
params['method'] = 'POST'
params['url'] = 'https://duckduckgo.com/html'
params['data']['q'] = query
params['url'] = 'https://duckduckgo.com/d.js?q=%s&l=us-en&p=1&s=0' % query
return params
def response(resp):
dom = html.fromstring(resp.text)
results = dom.xpath('//div[@class="results_links results_links_deep web-result"]')
return [html.tostring(x) for x in results]
results = []
search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
for r in search_res:
if not r.get('t'):
continue
results.append({'title': r['t']
,'content': r['a']
,'url': r['u']
})
return results

View file

@ -1,5 +1,4 @@
import json
from searx import base_result_template
def request(query, params):
params['url'] = 'http://api.duckduckgo.com/?q=%s&format=json&pretty=0' % query
@ -10,10 +9,11 @@ def response(resp):
search_res = json.loads(resp.text)
results = []
if 'Definition' in search_res:
res = {'title' : search_res.get('Heading', '')
,'content' : search_res.get('Definition', '')
,'url' : search_res.get('AbstractURL', '')
}
results.append(base_result_template.format(**res))
if search_res.get('AbstractURL'):
res = {'title' : search_res.get('Heading', '')
,'content' : search_res.get('Definition', '')
,'url' : search_res.get('AbstractURL', '')
}
results.append(res)
return results