[fix] gigablast params ++ json response format

This commit is contained in:
Adam Tauber 2016-01-31 13:24:09 +01:00
parent e061c6e059
commit 37035b7a40
1 changed files with 12 additions and 16 deletions

View File

@ -10,11 +10,11 @@
@parse url, title, content @parse url, title, content
""" """
from urllib import urlencode
from cgi import escape from cgi import escape
from lxml import etree from json import loads
from random import randint from random import randint
from time import time from time import time
from urllib import urlencode
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -27,11 +27,11 @@ safesearch = True
base_url = 'https://gigablast.com/' base_url = 'https://gigablast.com/'
search_string = 'search?{query}'\ search_string = 'search?{query}'\
'&n={number_of_results}'\ '&n={number_of_results}'\
'&c=main'\
'&s={offset}'\ '&s={offset}'\
'&format=xml'\ '&format=json'\
'&qh=0'\ '&qh=0'\
'&rxiyd={rxiyd}'\ '&rxiwd={rxiwd}'\
'&rand={rand}'\
'&qlang={lang}'\ '&qlang={lang}'\
'&ff={safesearch}' '&ff={safesearch}'
@ -59,8 +59,8 @@ def request(query, params):
search_path = search_string.format(query=urlencode({'q': query}), search_path = search_string.format(query=urlencode({'q': query}),
offset=offset, offset=offset,
number_of_results=number_of_results, number_of_results=number_of_results,
rxiyd=randint(10000, 10000000), rxiwd=1,
rand=int(time()), # rand=int(time()),
lang=language, lang=language,
safesearch=safesearch) safesearch=safesearch)
@ -73,18 +73,14 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
dom = etree.fromstring(resp.content)
# parse results # parse results
for result in dom.xpath(results_xpath): response_json = loads(resp.text)
url = result.xpath(url_xpath)[0].text
title = result.xpath(title_xpath)[0].text
content = escape(result.xpath(content_xpath)[0].text)
for result in response_json['results']:
# append result # append result
results.append({'url': url, results.append({'url': result['url'],
'title': title, 'title': escape(result['title']),
'content': content}) 'content': escape(result['sum'])})
# return results # return results
return results return results