From b1234ee88947f2ce61a7df361c706622a3ec2a32 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Mon, 17 Nov 2014 10:19:23 +0100 Subject: [PATCH] [fix] startpage engine compatibility --- searx/engines/startpage.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 2adbfb3e4..16da728cd 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -1,8 +1,8 @@ -## Startpage (Web) -# +# Startpage (Web) +# # @website https://startpage.com # @provide-api no (nothing found) -# +# # @using-api no # @results HTML # @stable no (HTML can change) @@ -17,8 +17,11 @@ import re # engine dependent config categories = ['general'] -# there is a mechanism to block "bot" search (probably the parameter qid), require storing of qid's between mulitble search-calls -#paging = False +# there is a mechanism to block "bot" search +# (probably the parameter qid), require +# storing of qid's between mulitble search-calls + +# paging = False language_support = True # search-url @@ -40,11 +43,12 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['data'] = {'query': query, - 'startat': offset} + 'startat': offset} # set language if specified if params['language'] != 'all': - params['data']['with_language'] = 'lang_' + params['language'].split('_')[0] + params['data']['with_language'] = ('lang_' + + params['language'].split('_')[0]) return params @@ -54,10 +58,13 @@ def response(resp): results = [] dom = html.fromstring(resp.content) - + # parse results for result in dom.xpath(results_xpath): - link = result.xpath(link_xpath)[0] + links = result.xpath(link_xpath) + if not links: + continue + link = links[0] url = link.attrib.get('href') title = escape(link.text_content()) @@ -66,13 +73,14 @@ def response(resp): continue if result.xpath('./p[@class="desc"]'): - content = escape(result.xpath('./p[@class="desc"]')[0].text_content()) + content = escape(result.xpath('./p[@class="desc"]')[0] + .text_content()) else: content = '' # append result - results.append({'url': url, - 'title': title, + results.append({'url': url, + 'title': title, 'content': content}) # return results