forked from zaclys/searxng
		
	[fix] google news engine change follow-up
This commit is contained in:
		
							parent
							
								
									a764ebb4b1
								
							
						
					
					
						commit
						a2c94895c1
					
				
					 2 changed files with 57 additions and 131 deletions
				
			
		|  | @ -1,41 +1,56 @@ | ||||||
| """ | """ | ||||||
|  Google (News) |  Google (News) | ||||||
| 
 | 
 | ||||||
|  @website     https://www.google.com |  @website     https://news.google.com | ||||||
|  @provide-api yes (https://developers.google.com/web-search/docs/), |  @provide-api no | ||||||
|               deprecated! |  | ||||||
| 
 | 
 | ||||||
|  @using-api   yes |  @using-api   no | ||||||
|  @results     JSON |  @results     HTML | ||||||
|  @stable      yes (but deprecated) |  @stable      no | ||||||
|  @parse       url, title, content, publishedDate |  @parse       url, title, content, publishedDate | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | from lxml import html | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from json import loads |  | ||||||
| from dateutil import parser |  | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| categories = ['news'] | categories = ['news'] | ||||||
| paging = True | paging = True | ||||||
| language_support = True | language_support = True | ||||||
|  | safesearch = True | ||||||
|  | time_range_support = True | ||||||
|  | number_of_results = 10 | ||||||
| 
 | 
 | ||||||
| # engine dependent config | search_url = 'https://www.google.com/search'\ | ||||||
| url = 'https://ajax.googleapis.com/' |     '?{query}'\ | ||||||
| search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={lang}' |     '&tbm=nws'\ | ||||||
|  |     '&gws_rd=cr'\ | ||||||
|  |     '&{search_options}' | ||||||
|  | time_range_attr = "qdr:{range}" | ||||||
|  | time_range_dict = {'day': 'd', | ||||||
|  |                    'week': 'w', | ||||||
|  |                    'month': 'm'} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     offset = (params['pageno'] - 1) * 8 |  | ||||||
| 
 | 
 | ||||||
|     language = 'en-US' |     search_options = { | ||||||
|  |         'start': (params['pageno'] - 1) * number_of_results | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if params['time_range'] in time_range_dict: | ||||||
|  |         search_options['tbs'] = time_range_attr.format(range=time_range_dict[params['time_range']]) | ||||||
|  | 
 | ||||||
|  |     if safesearch and params['safesearch']: | ||||||
|  |         search_options['safe'] = 'on' | ||||||
|  | 
 | ||||||
|  |     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||||
|  |                                       search_options=urlencode(search_options)) | ||||||
|  | 
 | ||||||
|     if params['language'] != 'all': |     if params['language'] != 'all': | ||||||
|         language = params['language'].replace('_', '-') |         language_array = params['language'].lower().split('_') | ||||||
| 
 |         params['url'] += '&lr=lang_' + language_array[0] | ||||||
|     params['url'] = search_url.format(offset=offset, |  | ||||||
|                                       query=urlencode({'q': query}), |  | ||||||
|                                       lang=language) |  | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
|  | @ -44,24 +59,21 @@ def request(query, params): | ||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|     search_res = loads(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 |  | ||||||
|     # return empty array if there are no results |  | ||||||
|     if not search_res.get('responseData', {}).get('results'): |  | ||||||
|         return [] |  | ||||||
| 
 | 
 | ||||||
|     # parse results |     # parse results | ||||||
|     for result in search_res['responseData']['results']: |     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): | ||||||
|         # parse publishedDate |         r = { | ||||||
|         publishedDate = parser.parse(result['publishedDate']) |             'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], | ||||||
|         if 'url' not in result: |             'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), | ||||||
|             continue |             'content': ''.join(result.xpath('.//div[@class="st"]//text()')), | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         # append result |         img = result.xpath('.//img/@src')[0] | ||||||
|         results.append({'url': result['unescapedUrl'], |         if img and not img.startswith('data'): | ||||||
|                         'title': result['titleNoFormatting'], |             r['img_src'] = img | ||||||
|                         'publishedDate': publishedDate, | 
 | ||||||
|                         'content': result['content']}) |         results.append(r) | ||||||
| 
 | 
 | ||||||
|     # return results |     # return results | ||||||
|     return results |     return results | ||||||
|  |  | ||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber