mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge pull request #1186 from kvch/fix-bing-videos
Fix Bing videos engine
This commit is contained in:
		
						commit
						360f8fab97
					
				
					 2 changed files with 40 additions and 85 deletions
				
			
		|  | @ -69,22 +69,11 @@ def response(resp): | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     for result in dom.xpath('//div[@class="dg_u"]'): |     for result in dom.xpath('//div[@class="dg_u"]'): | ||||||
| 
 |         url = result.xpath('./div[@class="mc_vtvc"]/a/@href')[0] | ||||||
|         # try to extract the url |         url = 'https://bing.com' + url | ||||||
|         url_container = result.xpath('.//div[@class="sa_wrapper"]/@data-eventpayload') |         title = extract_text(result.xpath('./div/a/div/div[@class="mc_vtvc_title"]/@title')) | ||||||
|         if len(url_container) > 0: |         content = extract_text(result.xpath('./div/a/div/div/div/div/text()')) | ||||||
|             url = loads(url_container[0])['purl'] |         thumbnail = result.xpath('./div/a/div/div/img/@src')[0] | ||||||
|         else: |  | ||||||
|             url = result.xpath('./a/@href')[0] |  | ||||||
| 
 |  | ||||||
|             # discard results that do not return an external url |  | ||||||
|             # very recent results sometimes don't return the video's url |  | ||||||
|             if url.startswith('/videos/search?'): |  | ||||||
|                 continue |  | ||||||
| 
 |  | ||||||
|         title = extract_text(result.xpath('./a//div[@class="tl"]')) |  | ||||||
|         content = extract_text(result.xpath('.//div[@class="pubInfo"]')) |  | ||||||
|         thumbnail = result.xpath('.//div[@class="vthumb"]/img/@src')[0] |  | ||||||
| 
 | 
 | ||||||
|         results.append({'url': url, |         results.append({'url': url, | ||||||
|                         'title': title, |                         'title': title, | ||||||
|  | @ -92,7 +81,6 @@ def response(resp): | ||||||
|                         'thumbnail': thumbnail, |                         'thumbnail': thumbnail, | ||||||
|                         'template': 'videos.html'}) |                         'template': 'videos.html'}) | ||||||
| 
 | 
 | ||||||
|         # first page ignores requested number of results |  | ||||||
|         if len(results) >= number_of_results: |         if len(results) >= number_of_results: | ||||||
|             break |             break | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -47,78 +47,45 @@ class TestBingVideosEngine(SearxTestCase): | ||||||
|         self.assertEqual(bing_videos.response(response), []) |         self.assertEqual(bing_videos.response(response), []) | ||||||
| 
 | 
 | ||||||
|         html = """ |         html = """ | ||||||
|         <div> |  | ||||||
|         <div class="dg_u"> |         <div class="dg_u"> | ||||||
|                 <a class="dv_i" href="/videos/search?abcde"> |             <div id="mc_vtvc_1" class="mc_vtvc"> | ||||||
|                     <div class="vthblock"> |                 <a class="mc_vtvc_link" href="/video"> | ||||||
|                         <div class="vthumb"> |                     <div class="mc_vtvc_th"> | ||||||
|  |                         <div class="cico"> | ||||||
|                             <img src="thumb_1.jpg" /> |                             <img src="thumb_1.jpg" /> | ||||||
|                         </div> |                         </div> | ||||||
|                         <div> |                         <div class="mc_vtvc_ban_lo"> | ||||||
|                             <div class="tl"> |                             <div class="vtbc"> | ||||||
|                                 Title 1 |                                 <div class="mc_bc_w b_smText"> | ||||||
|  |                                     <div class="mc_bc pivot bpi_2"> | ||||||
|  |                                         <span title=""> | ||||||
|  |                                              <span class="mv_vtvc_play cipg "></span> | ||||||
|  |                                         </span> | ||||||
|  |                                     </div> | ||||||
|  |                                     <div class="mc_bc items">10:06</div> | ||||||
|                                 </div> |                                 </div> | ||||||
|                             </div> |                             </div> | ||||||
|                         </div> |                         </div> | ||||||
|                     <div class="videoInfoPanel"> |  | ||||||
|                         <div class="pubInfo"> |  | ||||||
|                             <div>Content 1</div> |  | ||||||
|                         </div> |                         </div> | ||||||
|  |                         <div class="mc_vtvc_meta"> | ||||||
|  |                         <div class="mc_vtvc_title" title="Title 1"></div> | ||||||
|  |                         <div class="mc_vtvc_meta_block_area"> | ||||||
|  |                         <div class="mc_vtvc_meta_block"> | ||||||
|  |                             <div class="mc_vtvc_meta_row"> | ||||||
|  |                                 <span>65,696,000+ views</span> | ||||||
|  |                                 <span>1 year ago</span> | ||||||
|                             </div> |                             </div> | ||||||
|                 </a> |                             <div class="mc_vtvc_meta_row mc_vtvc_meta_row_channel">Content 1</div> | ||||||
|                 <div class="sa_wrapper" |                             <div class="mc_vtvc_meta_row"><span> | ||||||
|                     data-eventpayload="{"purl": "https://url.com/1"}"> |                                 <div class="cico mc_vtvc_src_ico"> | ||||||
|  |                                     <div></div> | ||||||
|  |                                 </div> | ||||||
|  |                                 <span>YouTube</span> | ||||||
|  |                             </span></div> | ||||||
|                         </div> |                         </div> | ||||||
|                         </div> |                         </div> | ||||||
|                     </div> |                     </div> | ||||||
|         """ |                     <div class="vrhdata"></div> | ||||||
|         response = mock.Mock(text=html) |  | ||||||
|         results = bing_videos.response(response) |  | ||||||
|         self.assertEqual(type(results), list) |  | ||||||
|         self.assertEqual(len(results), 1) |  | ||||||
|         self.assertEqual(results[0]['title'], 'Title 1') |  | ||||||
|         self.assertEqual(results[0]['url'], 'https://url.com/1') |  | ||||||
|         self.assertEqual(results[0]['content'], 'Content 1') |  | ||||||
|         self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg') |  | ||||||
| 
 |  | ||||||
|         html = """ |  | ||||||
|         <div> |  | ||||||
|             <div class="dg_u"> |  | ||||||
|                 <a class="dv_i" href="https://url.com/1"> |  | ||||||
|                     <div class="vthblock"> |  | ||||||
|                         <div class="vthumb"> |  | ||||||
|                             <img src="thumb_1.jpg" /> |  | ||||||
|                         </div> |  | ||||||
|                         <div> |  | ||||||
|                             <div class="tl"> |  | ||||||
|                                 Title 1 |  | ||||||
|                             </div> |  | ||||||
|                         </div> |  | ||||||
|                     </div> |  | ||||||
|                     <div class="videoInfoPanel"> |  | ||||||
|                         <div class="pubInfo"> |  | ||||||
|                             <div>Content 1</div> |  | ||||||
|                         </div> |  | ||||||
|                     </div> |  | ||||||
|                 </a> |  | ||||||
|             </div> |  | ||||||
|             <div class="dg_u"> |  | ||||||
|                 <a class="dv_i" href="/videos/search?abcde"> |  | ||||||
|                     <div class="vthblock"> |  | ||||||
|                         <div class="vthumb"> |  | ||||||
|                             <img src="thumb_2.jpg" /> |  | ||||||
|                         </div> |  | ||||||
|                         <div> |  | ||||||
|                             <div class="tl"> |  | ||||||
|                                 Title 2 |  | ||||||
|                             </div> |  | ||||||
|                         </div> |  | ||||||
|                     </div> |  | ||||||
|                     <div class="videoInfoPanel"> |  | ||||||
|                         <div class="pubInfo"> |  | ||||||
|                             <div>Content 2</div> |  | ||||||
|                         </div> |  | ||||||
|                     </div> |  | ||||||
|                     </a> |                     </a> | ||||||
|                 </div> |                 </div> | ||||||
|             </div> |             </div> | ||||||
|  | @ -128,6 +95,6 @@ class TestBingVideosEngine(SearxTestCase): | ||||||
|         self.assertEqual(type(results), list) |         self.assertEqual(type(results), list) | ||||||
|         self.assertEqual(len(results), 1) |         self.assertEqual(len(results), 1) | ||||||
|         self.assertEqual(results[0]['title'], 'Title 1') |         self.assertEqual(results[0]['title'], 'Title 1') | ||||||
|         self.assertEqual(results[0]['url'], 'https://url.com/1') |         self.assertEqual(results[0]['url'], 'https://bing.com/video') | ||||||
|         self.assertEqual(results[0]['content'], 'Content 1') |         self.assertEqual(results[0]['content'], 'Content 1') | ||||||
|         self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg') |         self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg') | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber