mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Remove content field from ArchWiki results; reformat code in archlinux.py
Content field in Arch Wiki search results is of no real use, more often than not it contains no usable information and includes too many markup tags which make the text unreadable. It is safe to remove it.
This commit is contained in:
		
							parent
							
								
									d748b8419a
								
							
						
					
					
						commit
						8b7dc2acb9
					
				
					 2 changed files with 13 additions and 21 deletions
				
			
		|  | @ -8,7 +8,7 @@ | |||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse		url, title, content | ||||
|  @parse        url, title | ||||
| """ | ||||
| 
 | ||||
| from urlparse import urljoin | ||||
|  | @ -26,7 +26,6 @@ base_url = 'https://wiki.archlinux.org' | |||
| # xpath queries | ||||
| xpath_results = '//ul[@class="mw-search-results"]/li' | ||||
| xpath_link = './/div[@class="mw-search-result-heading"]/a' | ||||
| xpath_content = './/div[@class="searchresult"]' | ||||
| 
 | ||||
| 
 | ||||
| # cut 'en' from 'en_US', 'de' from 'de_CH', and so on | ||||
|  | @ -135,10 +134,8 @@ def response(resp): | |||
|         link = result.xpath(xpath_link)[0] | ||||
|         href = urljoin(base_url, link.attrib.get('href')) | ||||
|         title = escape(extract_text(link)) | ||||
|         content = escape(extract_text(result.xpath(xpath_content))) | ||||
| 
 | ||||
|         results.append({'url': href, | ||||
|                         'title': title, | ||||
|                         'content': content}) | ||||
|                         'title': title}) | ||||
| 
 | ||||
|     return results | ||||
|  |  | |||
|  | @ -18,7 +18,7 @@ class TestArchLinuxEngine(SearxTestCase): | |||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dic = defaultdict(dict) | ||||
|         dic['pageno'] = 0 | ||||
|         dic['pageno'] = 1 | ||||
|         dic['language'] = 'en_US' | ||||
|         params = archlinux.request(query, dic) | ||||
|         self.assertTrue('url' in params) | ||||
|  | @ -31,10 +31,8 @@ class TestArchLinuxEngine(SearxTestCase): | |||
|             self.assertTrue(domain in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         response = mock.Mock(text='<html></html>') | ||||
|         response.search_params = { | ||||
|             'language': 'en_US' | ||||
|         } | ||||
|         response = mock.Mock(text='<html></html>', | ||||
|                              search_params={'language': 'en_US'}) | ||||
|         self.assertEqual(archlinux.response(response), []) | ||||
| 
 | ||||
|         html = """ | ||||
|  | @ -79,18 +77,15 @@ class TestArchLinuxEngine(SearxTestCase): | |||
|         expected = [ | ||||
|             { | ||||
|                 'title': 'ATI', | ||||
|                 'url': 'https://wiki.archlinux.org/index.php/ATI', | ||||
|                 'content': 'Lorem ipsum dolor sit amet' | ||||
|                 'url': 'https://wiki.archlinux.org/index.php/ATI' | ||||
|             }, | ||||
|             { | ||||
|                 'title': 'Frequently asked questions', | ||||
|                 'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions', | ||||
|                 'content': 'CPUs with AMDs instruction set "AMD64"' | ||||
|                 'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions' | ||||
|             }, | ||||
|             { | ||||
|                 'title': 'CPU frequency scaling', | ||||
|                 'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling', | ||||
|                 'content': 'ondemand for AMD and older Intel CPU' | ||||
|                 'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling' | ||||
|             } | ||||
|         ] | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Kirill Isakov
						Kirill Isakov