forked from zaclys/searxng
		
	Bing news' unit test
I have no idea why coverage tell 97% and 2 misses in branches. If anyone has an idea...
This commit is contained in:
		
							parent
							
								
									dad0434f34
								
							
						
					
					
						commit
						efde2c21c8
					
				
					 3 changed files with 249 additions and 17 deletions
				
			
		|  | @ -15,6 +15,7 @@ from lxml import html | |||
| from datetime import datetime, timedelta | ||||
| from dateutil import parser | ||||
| import re | ||||
| from searx.engines.xpath import extract_text | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['news'] | ||||
|  | @ -42,6 +43,7 @@ def request(query, params): | |||
|     params['cookies']['_FP'] = "ui=en-US" | ||||
| 
 | ||||
|     params['url'] = base_url + search_path | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
|  | @ -55,44 +57,37 @@ def response(resp): | |||
|     for result in dom.xpath('//div[@class="sn_r"]'): | ||||
|         link = result.xpath('.//div[@class="newstitle"]/a')[0] | ||||
|         url = link.attrib.get('href') | ||||
|         title = ' '.join(link.xpath('.//text()')) | ||||
|         contentXPath = result.xpath('.//div[@class="sn_txt"]/div' | ||||
|                                     '//span[@class="sn_snip"]//text()') | ||||
|         title = extract_text(link) | ||||
|         contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]') | ||||
|         if contentXPath is not None: | ||||
|             content = escape(' '.join(contentXPath)) | ||||
|             content = escape(extract_text(contentXPath)) | ||||
| 
 | ||||
|         # parse publishedDate | ||||
|         publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div' | ||||
|                                           '//span[contains(@class,"sn_ST")]' | ||||
|                                           '//span[contains(@class,"sn_tm")]' | ||||
|                                           '//text()') | ||||
|                                           '//span[contains(@class,"sn_tm")]') | ||||
| 
 | ||||
|         if publishedDateXPath is not None: | ||||
|             publishedDate = escape(' '.join(publishedDateXPath)) | ||||
|             publishedDate = escape(extract_text(publishedDateXPath)) | ||||
| 
 | ||||
|         if re.match("^[0-9]+ minute(s|) ago$", publishedDate): | ||||
|             timeNumbers = re.findall(r'\d+', publishedDate) | ||||
|             publishedDate = datetime.now()\ | ||||
|                 - timedelta(minutes=int(timeNumbers[0])) | ||||
|             publishedDate = datetime.now() - timedelta(minutes=int(timeNumbers[0])) | ||||
|         elif re.match("^[0-9]+ hour(s|) ago$", publishedDate): | ||||
|             timeNumbers = re.findall(r'\d+', publishedDate) | ||||
|             publishedDate = datetime.now()\ | ||||
|                 - timedelta(hours=int(timeNumbers[0])) | ||||
|         elif re.match("^[0-9]+ hour(s|)," | ||||
|                       " [0-9]+ minute(s|) ago$", publishedDate): | ||||
|             publishedDate = datetime.now() - timedelta(hours=int(timeNumbers[0])) | ||||
|         elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): | ||||
|             timeNumbers = re.findall(r'\d+', publishedDate) | ||||
|             publishedDate = datetime.now()\ | ||||
|                 - timedelta(hours=int(timeNumbers[0]))\ | ||||
|                 - timedelta(minutes=int(timeNumbers[1])) | ||||
|         elif re.match("^[0-9]+ day(s|) ago$", publishedDate): | ||||
|             timeNumbers = re.findall(r'\d+', publishedDate) | ||||
|             publishedDate = datetime.now()\ | ||||
|                 - timedelta(days=int(timeNumbers[0])) | ||||
|             publishedDate = datetime.now() - timedelta(days=int(timeNumbers[0])) | ||||
|         else: | ||||
|             try: | ||||
|                 # FIXME use params['language'] to parse either mm/dd or dd/mm | ||||
|                 publishedDate = parser.parse(publishedDate, dayfirst=False) | ||||
|             except TypeError: | ||||
|                 # FIXME | ||||
|                 publishedDate = datetime.now() | ||||
| 
 | ||||
|         # append result | ||||
|  |  | |||
							
								
								
									
										236
									
								
								searx/tests/engines/test_bing_news.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										236
									
								
								searx/tests/engines/test_bing_news.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,236 @@ | |||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import bing_news | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| class TestBingNewsEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 1 | ||||
|         dicto['language'] = 'fr_FR' | ||||
|         params = bing_news.request(query, dicto) | ||||
|         self.assertIn('url', params) | ||||
|         self.assertIn(query, params['url']) | ||||
|         self.assertIn('bing.com', params['url']) | ||||
|         self.assertIn('fr', params['url']) | ||||
|         self.assertIn('_FP', params['cookies']) | ||||
|         self.assertIn('en', params['cookies']['_FP']) | ||||
| 
 | ||||
|         dicto['language'] = 'all' | ||||
|         params = bing_news.request(query, dicto) | ||||
|         self.assertIn('en', params['url']) | ||||
|         self.assertIn('_FP', params['cookies']) | ||||
|         self.assertIn('en', params['cookies']['_FP']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         self.assertRaises(AttributeError, bing_news.response, None) | ||||
|         self.assertRaises(AttributeError, bing_news.response, []) | ||||
|         self.assertRaises(AttributeError, bing_news.response, '') | ||||
|         self.assertRaises(AttributeError, bing_news.response, '[]') | ||||
| 
 | ||||
|         response = mock.Mock(content='<html></html>') | ||||
|         self.assertEqual(bing_news.response(response), []) | ||||
| 
 | ||||
|         response = mock.Mock(content='<html></html>') | ||||
|         self.assertEqual(bing_news.response(response), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <div class="sn_r"> | ||||
|             <div class="newstitle"> | ||||
|                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||
|                     Title | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_img"> | ||||
|                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||
|                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_txt"> | ||||
|                 <div class="sn_oi"> | ||||
|                     <span class="sn_snip">Article Content</span> | ||||
|                     <span class="sn_ST"> | ||||
|                         <cite class="sn_src">metronews.fr</cite> | ||||
|                          ·  | ||||
|                         <span class="sn_tm">44 minutes ago</span> | ||||
|                     </span> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </div> | ||||
|         """ | ||||
|         response = mock.Mock(content=html) | ||||
|         results = bing_news.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
|         self.assertEqual(results[0]['title'], 'Title') | ||||
|         self.assertEqual(results[0]['url'], 'http://url.of.article/') | ||||
|         self.assertEqual(results[0]['content'], 'Article Content') | ||||
| 
 | ||||
|         html = """ | ||||
|         <div class="sn_r"> | ||||
|             <div class="newstitle"> | ||||
|                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||
|                     Title | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_img"> | ||||
|                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||
|                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_txt"> | ||||
|                 <div class="sn_oi"> | ||||
|                     <span class="sn_snip">Article Content</span> | ||||
|                     <span class="sn_ST"> | ||||
|                         <cite class="sn_src">metronews.fr</cite> | ||||
|                          ·  | ||||
|                         <span class="sn_tm">44 minutes ago</span> | ||||
|                     </span> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </div> | ||||
|         <div class="sn_r"> | ||||
|             <div class="newstitle"> | ||||
|                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||
|                     Title | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_img"> | ||||
|                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||
|                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_txt"> | ||||
|                 <div class="sn_oi"> | ||||
|                     <span class="sn_snip">Article Content</span> | ||||
|                     <span class="sn_ST"> | ||||
|                         <cite class="sn_src">metronews.fr</cite> | ||||
|                          ·  | ||||
|                         <span class="sn_tm">3 hours, 44 minutes ago</span> | ||||
|                     </span> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </div> | ||||
|         <div class="sn_r"> | ||||
|             <div class="newstitle"> | ||||
|                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||
|                     Title | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_img"> | ||||
|                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||
|                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_txt"> | ||||
|                 <div class="sn_oi"> | ||||
|                     <span class="sn_snip">Article Content</span> | ||||
|                     <span class="sn_ST"> | ||||
|                         <cite class="sn_src">metronews.fr</cite> | ||||
|                          ·  | ||||
|                         <span class="sn_tm">44 hours ago</span> | ||||
|                     </span> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </div> | ||||
|         <div class="sn_r"> | ||||
|             <div class="newstitle"> | ||||
|                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||
|                     Title | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_img"> | ||||
|                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||
|                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_txt"> | ||||
|                 <div class="sn_oi"> | ||||
|                     <span class="sn_snip">Article Content</span> | ||||
|                     <span class="sn_ST"> | ||||
|                         <cite class="sn_src">metronews.fr</cite> | ||||
|                          ·  | ||||
|                         <span class="sn_tm">2 days ago</span> | ||||
|                     </span> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </div> | ||||
|         <div class="sn_r"> | ||||
|             <div class="newstitle"> | ||||
|                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||
|                     Title | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_img"> | ||||
|                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||
|                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_txt"> | ||||
|                 <div class="sn_oi"> | ||||
|                     <span class="sn_snip">Article Content</span> | ||||
|                     <span class="sn_ST"> | ||||
|                         <cite class="sn_src">metronews.fr</cite> | ||||
|                          ·  | ||||
|                         <span class="sn_tm">27/01/2015</span> | ||||
|                     </span> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </div> | ||||
|         <div class="sn_r"> | ||||
|             <div class="newstitle"> | ||||
|                 <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||
|                     Title | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_img"> | ||||
|                 <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||
|                     <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||
|                 </a> | ||||
|             </div> | ||||
|             <div class="sn_txt"> | ||||
|                 <div class="sn_oi"> | ||||
|                     <span class="sn_snip">Article Content</span> | ||||
|                     <span class="sn_ST"> | ||||
|                         <cite class="sn_src">metronews.fr</cite> | ||||
|                          ·  | ||||
|                         <span class="sn_tm">Il y a 3 heures</span> | ||||
|                     </span> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </div> | ||||
|         """ | ||||
|         response = mock.Mock(content=html) | ||||
|         results = bing_news.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 6) | ||||
| 
 | ||||
|         html = """ | ||||
|         <div class="newstitle"> | ||||
|             <a href="http://url.of.article/" target="_blank" h="ID=news,5022.1"> | ||||
|                 Title | ||||
|             </a> | ||||
|         </div> | ||||
|         <div class="sn_img"> | ||||
|             <a href="http://url.of.article2/" target="_blank" h="ID=news,5024.1"> | ||||
|                 <img class="rms_img" height="80" id="emb1" src="/image.src" title="Title" width="80" /> | ||||
|             </a> | ||||
|         </div> | ||||
|         <div class="sn_txt"> | ||||
|             <div class="sn_oi"> | ||||
|                 <span class="sn_snip">Article Content</span> | ||||
|                 <span class="sn_ST"> | ||||
|                     <cite class="sn_src">metronews.fr</cite> | ||||
|                      ·  | ||||
|                     <span class="sn_tm">44 minutes ago</span> | ||||
|                 </span> | ||||
|             </div> | ||||
|         </div> | ||||
|         """ | ||||
|         response = mock.Mock(content=html) | ||||
|         results = bing_news.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 0) | ||||
|  | @ -1,5 +1,6 @@ | |||
| from searx.tests.engines.test_bing import *  # noqa | ||||
| from searx.tests.engines.test_bing_images import *  # noqa | ||||
| from searx.tests.engines.test_bing_news import *  # noqa | ||||
| from searx.tests.engines.test_dailymotion import *  # noqa | ||||
| from searx.tests.engines.test_deezer import *  # noqa | ||||
| from searx.tests.engines.test_deviantart import *  # noqa | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Cqoicebordel
						Cqoicebordel