forked from zaclys/searxng
		
	fix Microsoft Academic engine
This commit is contained in:
		
							parent
							
								
									2f69eaeb2f
								
							
						
					
					
						commit
						988cf38196
					
				
					 2 changed files with 76 additions and 9 deletions
				
			
		
							
								
								
									
										75
									
								
								searx/engines/microsoft_academic.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								searx/engines/microsoft_academic.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,75 @@ | |||
| """ | ||||
| Microsoft Academic (Science) | ||||
| 
 | ||||
| @website     https://academic.microsoft.com | ||||
| @provide-api yes | ||||
| @using-api   no | ||||
| @results     JSON | ||||
| @stable      no | ||||
| @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from datetime import datetime | ||||
| from json import loads | ||||
| from uuid import uuid4 | ||||
| 
 | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| categories = ['images'] | ||||
| paging = True | ||||
| result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     correlation_id = uuid4() | ||||
|     msacademic = uuid4() | ||||
|     time_now = datetime.now() | ||||
| 
 | ||||
|     params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id})) | ||||
|     params['cookies']['msacademic'] = str(msacademic) | ||||
|     params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now)) | ||||
|     params['method'] = 'POST' | ||||
|     params['data'] = { | ||||
|         'Query': '@{query}@'.format(query=query), | ||||
|         'Limit': 10, | ||||
|         'Offset': params['pageno'] - 1, | ||||
|         'Filters': '', | ||||
|         'OrderBy': '', | ||||
|         'SortAscending': False, | ||||
|     } | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     results = [] | ||||
|     response_data = loads(resp.text) | ||||
| 
 | ||||
|     for result in response_data['results']: | ||||
|         url = _get_url(result) | ||||
|         title = result['e']['dn'] | ||||
|         content = _get_content(result) | ||||
|         results.append({ | ||||
|             'url': url, | ||||
|             'title': html_to_text(title), | ||||
|             'content': html_to_text(content), | ||||
|         }) | ||||
| 
 | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def _get_url(result): | ||||
|     if 's' in result['e']: | ||||
|         return result['e']['s'][0]['u'] | ||||
|     return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id']) | ||||
| 
 | ||||
| 
 | ||||
| def _get_content(result): | ||||
|     if 'd' in result['e']: | ||||
|         content = result['e']['d'] | ||||
|         if len(content) > 300: | ||||
|             return content[:300] + '...' | ||||
|         return content | ||||
| 
 | ||||
|     return '' | ||||
|  | @ -398,15 +398,7 @@ engines: | |||
|     shortcut : lo | ||||
| 
 | ||||
|   - name : microsoft academic | ||||
|     engine : json_engine | ||||
|     paging : True | ||||
|     search_url : https://academic.microsoft.com/api/search/GetEntityResults?query=%40{query}%40&filters=&offset={pageno}&limit=8&correlationId=undefined | ||||
|     results_query : results | ||||
|     url_query : u | ||||
|     title_query : dn | ||||
|     content_query : d | ||||
|     page_size : 8 | ||||
|     first_page_num : 0 | ||||
|     engine : microsoft_academic | ||||
|     categories : science | ||||
|     shortcut : ma | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Noémi Ványi
						Noémi Ványi