forked from zaclys/searxng
		
	fix stackoverflow and add comments
This commit is contained in:
		
							parent
							
								
									80f98d6041
								
							
						
					
					
						commit
						a46bbb4042
					
				
					 2 changed files with 38 additions and 11 deletions
				
			
		|  | @ -1,30 +1,58 @@ | ||||||
|  | ## Stackoverflow (It) | ||||||
|  | #  | ||||||
|  | # @website     https://stackoverflow.com/ | ||||||
|  | # @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) | ||||||
|  | #  | ||||||
|  | # @using-api   no | ||||||
|  | # @results     HTML | ||||||
|  | # @stable      no (HTML can change) | ||||||
|  | # @parse       url, title, content | ||||||
|  | 
 | ||||||
| from urlparse import urljoin | from urlparse import urljoin | ||||||
| from cgi import escape | from cgi import escape | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from lxml import html | from lxml import html | ||||||
| 
 | 
 | ||||||
|  | # engine dependent config | ||||||
| categories = ['it'] | categories = ['it'] | ||||||
| 
 |  | ||||||
| url = 'http://stackoverflow.com/' |  | ||||||
| search_url = url+'search?{query}&page={pageno}' |  | ||||||
| result_xpath = './/div[@class="excerpt"]//text()' |  | ||||||
| 
 |  | ||||||
| paging = True | paging = True | ||||||
| 
 | 
 | ||||||
|  | # search-url | ||||||
|  | url = 'http://stackoverflow.com/' | ||||||
|  | search_url = url+'search?{query}&page={pageno}' | ||||||
| 
 | 
 | ||||||
|  | # specific xpath variables | ||||||
|  | results_xpath = '//div[contains(@class,"question-summary")]' | ||||||
|  | link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' | ||||||
|  | title_xpath = './/text()' | ||||||
|  | content_xpath = './/div[@class="excerpt"]//text()' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     params['url'] = search_url.format(query=urlencode({'q': query}), |     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||||
|                                       pageno=params['pageno']) |                                       pageno=params['pageno']) | ||||||
|  | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # get response from search-request | ||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |     results = [] | ||||||
|  | 
 | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
|     for result in dom.xpath('//div[@class="question-summary search-result"]'): | 
 | ||||||
|         link = result.xpath('.//div[@class="result-link"]//a')[0] |     # parse results | ||||||
|  |     for result in dom.xpath(results_xpath): | ||||||
|  |         link = result.xpath(link_xpath)[0] | ||||||
|         href = urljoin(url, link.attrib.get('href')) |         href = urljoin(url, link.attrib.get('href')) | ||||||
|         title = escape(' '.join(link.xpath('.//text()'))) |         title = escape(' '.join(link.xpath(title_xpath))) | ||||||
|         content = escape(' '.join(result.xpath(result_xpath))) |         content = escape(' '.join(result.xpath(content_xpath))) | ||||||
|         results.append({'url': href, 'title': title, 'content': content}) | 
 | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': href,  | ||||||
|  |                         'title': title,  | ||||||
|  |                         'content': content}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|     return results |     return results | ||||||
|  |  | ||||||
|  | @ -90,7 +90,6 @@ engines: | ||||||
| 
 | 
 | ||||||
|   - name : stackoverflow |   - name : stackoverflow | ||||||
|     engine : stackoverflow |     engine : stackoverflow | ||||||
|     categories : it |  | ||||||
|     shortcut : st |     shortcut : st | ||||||
| 
 | 
 | ||||||
|   - name : startpage |   - name : startpage | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Thomas Pointhuber
						Thomas Pointhuber