forked from zaclys/searxng
		
	
						commit
						98e6b4d830
					
				
					 3 changed files with 231 additions and 0 deletions
				
			
		
							
								
								
									
										96
									
								
								searx/engines/bing_videos.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								searx/engines/bing_videos.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,96 @@ | |||
| """ | ||||
|  Bing (Videos) | ||||
| 
 | ||||
|  @website     https://www.bing.com/videos | ||||
|  @provide-api yes (http://datamarket.azure.com/dataset/bing/search) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no | ||||
|  @parse       url, title, content, thumbnail | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| 
 | ||||
| categories = ['videos'] | ||||
| paging = True | ||||
| safesearch = True | ||||
| time_range_support = True | ||||
| number_of_results = 10 | ||||
| 
 | ||||
| search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\ | ||||
|              'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5' | ||||
| time_range_string = '&qft=+filterui:videoage-lt{interval}' | ||||
| time_range_dict = {'day': '1440', | ||||
|                    'week': '10080', | ||||
|                    'month': '43200', | ||||
|                    'year': '525600'} | ||||
| 
 | ||||
| # safesearch definitions | ||||
| safesearch_types = {2: 'STRICT', | ||||
|                     1: 'DEMOTE', | ||||
|                     0: 'OFF'} | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     offset = (params['pageno'] - 1) * 10 + 1 | ||||
| 
 | ||||
|     # safesearch cookie | ||||
|     params['cookies']['SRCHHPGUSR'] = \ | ||||
|         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') | ||||
| 
 | ||||
|     # language cookie | ||||
|     params['cookies']['_EDGE_S'] = 'mkt=' + params['language'].lower() + '&F=1' | ||||
| 
 | ||||
|     # query and paging | ||||
|     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||
|                                       offset=offset, | ||||
|                                       number_of_results=number_of_results) | ||||
| 
 | ||||
|     # time range | ||||
|     if params['time_range'] in time_range_dict: | ||||
|         params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     for result in dom.xpath('//div[@class="dg_u"]'): | ||||
| 
 | ||||
|         # try to extract the url | ||||
|         url_container = result.xpath('.//div[@class="sa_wrapper"]/@data-eventpayload') | ||||
|         if len(url_container) > 0: | ||||
|             url = loads(url_container[0])['purl'] | ||||
|         else: | ||||
|             url = result.xpath('./a/@href')[0] | ||||
| 
 | ||||
|             # discard results that do not return an external url | ||||
|             # very recent results sometimes don't return the video's url | ||||
|             if url.startswith('/videos/search?'): | ||||
|                 continue | ||||
| 
 | ||||
|         title = extract_text(result.xpath('./a//div[@class="tl"]')) | ||||
|         content = extract_text(result.xpath('.//div[@class="pubInfo"]')) | ||||
|         thumbnail = result.xpath('.//div[@class="vthumb"]/img/@src')[0] | ||||
| 
 | ||||
|         results.append({'url': url, | ||||
|                         'title': title, | ||||
|                         'content': content, | ||||
|                         'thumbnail': thumbnail, | ||||
|                         'template': 'videos.html'}) | ||||
| 
 | ||||
|         # first page ignores requested number of results | ||||
|         if len(results) >= number_of_results: | ||||
|             break | ||||
| 
 | ||||
|     return results | ||||
|  | @ -81,6 +81,10 @@ engines: | |||
|     engine : bing_news | ||||
|     shortcut : bin | ||||
| 
 | ||||
|   - name : bing videos | ||||
|     engine : bing_videos | ||||
|     shortcut : biv | ||||
| 
 | ||||
|   - name : bitbucket | ||||
|     engine : xpath | ||||
|     paging : True | ||||
|  |  | |||
							
								
								
									
										131
									
								
								tests/unit/engines/test_bing_videos.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								tests/unit/engines/test_bing_videos.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,131 @@ | |||
| # -*- coding: utf-8 -*- | ||||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import bing_videos | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| class TestBingVideosEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 1 | ||||
|         dicto['language'] = 'fr-FR' | ||||
|         dicto['safesearch'] = 0 | ||||
|         dicto['time_range'] = '' | ||||
|         params = bing_videos.request(query, dicto) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('bing.com' in params['url']) | ||||
|         self.assertTrue('SRCHHPGUSR' in params['cookies']) | ||||
|         self.assertTrue('OFF' in params['cookies']['SRCHHPGUSR']) | ||||
|         self.assertTrue('_EDGE_S' in params['cookies']) | ||||
|         self.assertTrue('fr-fr' in params['cookies']['_EDGE_S']) | ||||
| 
 | ||||
|         dicto['pageno'] = 2 | ||||
|         dicto['time_range'] = 'day' | ||||
|         dicto['safesearch'] = 2 | ||||
|         params = bing_videos.request(query, dicto) | ||||
|         self.assertTrue('first=11' in params['url']) | ||||
|         self.assertTrue('1440' in params['url']) | ||||
|         self.assertIn('SRCHHPGUSR', params['cookies']) | ||||
|         self.assertTrue('STRICT' in params['cookies']['SRCHHPGUSR']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         self.assertRaises(AttributeError, bing_videos.response, None) | ||||
|         self.assertRaises(AttributeError, bing_videos.response, []) | ||||
|         self.assertRaises(AttributeError, bing_videos.response, '') | ||||
|         self.assertRaises(AttributeError, bing_videos.response, '[]') | ||||
| 
 | ||||
|         response = mock.Mock(text='<html></html>') | ||||
|         self.assertEqual(bing_videos.response(response), []) | ||||
| 
 | ||||
|         response = mock.Mock(text='<html></html>') | ||||
|         self.assertEqual(bing_videos.response(response), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <div> | ||||
|             <div class="dg_u"> | ||||
|                 <a class="dv_i" href="/videos/search?abcde"> | ||||
|                     <div class="vthblock"> | ||||
|                         <div class="vthumb"> | ||||
|                             <img src="thumb_1.jpg" /> | ||||
|                         </div> | ||||
|                         <div> | ||||
|                             <div class="tl"> | ||||
|                                 Title 1 | ||||
|                             </div> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                     <div class="videoInfoPanel"> | ||||
|                         <div class="pubInfo"> | ||||
|                             <div>Content 1</div> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 </a> | ||||
|                 <div class="sa_wrapper" | ||||
|                     data-eventpayload="{"purl": "https://url.com/1"}"> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </div> | ||||
|         """ | ||||
|         response = mock.Mock(text=html) | ||||
|         results = bing_videos.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
|         self.assertEqual(results[0]['title'], 'Title 1') | ||||
|         self.assertEqual(results[0]['url'], 'https://url.com/1') | ||||
|         self.assertEqual(results[0]['content'], 'Content 1') | ||||
|         self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg') | ||||
| 
 | ||||
|         html = """ | ||||
|         <div> | ||||
|             <div class="dg_u"> | ||||
|                 <a class="dv_i" href="https://url.com/1"> | ||||
|                     <div class="vthblock"> | ||||
|                         <div class="vthumb"> | ||||
|                             <img src="thumb_1.jpg" /> | ||||
|                         </div> | ||||
|                         <div> | ||||
|                             <div class="tl"> | ||||
|                                 Title 1 | ||||
|                             </div> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                     <div class="videoInfoPanel"> | ||||
|                         <div class="pubInfo"> | ||||
|                             <div>Content 1</div> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="dg_u"> | ||||
|                 <a class="dv_i" href="/videos/search?abcde"> | ||||
|                     <div class="vthblock"> | ||||
|                         <div class="vthumb"> | ||||
|                             <img src="thumb_2.jpg" /> | ||||
|                         </div> | ||||
|                         <div> | ||||
|                             <div class="tl"> | ||||
|                                 Title 2 | ||||
|                             </div> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                     <div class="videoInfoPanel"> | ||||
|                         <div class="pubInfo"> | ||||
|                             <div>Content 2</div> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 </a> | ||||
|             </div> | ||||
|         </div> | ||||
|         """ | ||||
|         response = mock.Mock(text=html) | ||||
|         results = bing_videos.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
|         self.assertEqual(results[0]['title'], 'Title 1') | ||||
|         self.assertEqual(results[0]['url'], 'https://url.com/1') | ||||
|         self.assertEqual(results[0]['content'], 'Content 1') | ||||
|         self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg') | ||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber