forked from zaclys/searxng
		
	[enh] removing result html tags
This commit is contained in:
		
							parent
							
								
									14a53e3430
								
							
						
					
					
						commit
						17bf00ee42
					
				
					 4 changed files with 7 additions and 6 deletions
				
			
		|  | @ -1,5 +1,6 @@ | ||||||
| from json import loads | from json import loads | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
|  | from searx.utils import html_to_text | ||||||
| 
 | 
 | ||||||
| url = 'https://duckduckgo.com/' | url = 'https://duckduckgo.com/' | ||||||
| search_url = url + 'd.js?{query}&l=us-en&p=1&s=0' | search_url = url + 'd.js?{query}&l=us-en&p=1&s=0' | ||||||
|  | @ -16,7 +17,7 @@ def response(resp): | ||||||
|         if not r.get('t'): |         if not r.get('t'): | ||||||
|             continue |             continue | ||||||
|         results.append({'title': r['t'] |         results.append({'title': r['t'] | ||||||
|                        ,'content': r['a'] |                        ,'content': html_to_text(r['a']) | ||||||
|                        ,'url': r['u'] |                        ,'url': r['u'] | ||||||
|                        }) |                        }) | ||||||
|     return results |     return results | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| from urllib import quote | from urllib import urlencode | ||||||
| from lxml import html | from lxml import html | ||||||
| from urlparse import urlparse | from urlparse import urlparse | ||||||
| from cgi import escape | from cgi import escape | ||||||
|  | @ -8,7 +8,7 @@ search_url = base_url+'do/search' | ||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     global search_url |     global search_url | ||||||
|     query = quote(query.replace(' ', '+'), safe='+') |     query = urlencode({'q': query})[2:] | ||||||
|     params['url'] = search_url |     params['url'] = search_url | ||||||
|     params['method'] = 'POST' |     params['method'] = 'POST' | ||||||
|     params['data'] = {'query': query} |     params['data'] = {'query': query} | ||||||
|  |  | ||||||
|  | @ -1,6 +1,7 @@ | ||||||
| from urlparse import urljoin | from urlparse import urljoin | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from lxml import html | from lxml import html | ||||||
|  | from cgi import escape | ||||||
| 
 | 
 | ||||||
| categories = ['social media'] | categories = ['social media'] | ||||||
| 
 | 
 | ||||||
|  | @ -21,6 +22,6 @@ def response(resp): | ||||||
|         link = tweet.xpath('.//small[@class="time"]//a')[0] |         link = tweet.xpath('.//small[@class="time"]//a')[0] | ||||||
|         url = urljoin(base_url, link.attrib.get('href')) |         url = urljoin(base_url, link.attrib.get('href')) | ||||||
|         title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()')) |         title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()')) | ||||||
|         content = ''.join(map(html.tostring, tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//*'))) |         content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()'))) | ||||||
|         results.append({'url': url, 'title': title, 'content': content}) |         results.append({'url': url, 'title': title, 'content': content}) | ||||||
|     return results |     return results | ||||||
|  |  | ||||||
|  | @ -46,12 +46,11 @@ def request(query, params): | ||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |     results = [] | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
|     query = resp.search_params['query'] |  | ||||||
|     if results_xpath: |     if results_xpath: | ||||||
|         for result in dom.xpath(results_xpath): |         for result in dom.xpath(results_xpath): | ||||||
|             url = extract_url(result.xpath(url_xpath)) |             url = extract_url(result.xpath(url_xpath)) | ||||||
|             title = ' '.join(result.xpath(title_xpath)) |             title = ' '.join(result.xpath(title_xpath)) | ||||||
|             content = escape(' '.join(result.xpath(content_xpath))).replace(query, '<b>{0}</b>'.format(query)) |             content = escape(' '.join(result.xpath(content_xpath))) | ||||||
|             results.append({'url': url, 'title': title, 'content': content}) |             results.append({'url': url, 'title': title, 'content': content}) | ||||||
|     else: |     else: | ||||||
|         for content, url, title in zip(dom.xpath(content_xpath), map(extract_url, dom.xpath(url_xpath)), dom.xpath(title_xpath)): |         for content, url, title in zip(dom.xpath(content_xpath), map(extract_url, dom.xpath(url_xpath)), dom.xpath(title_xpath)): | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 asciimoo
						asciimoo