mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[enh] yandex engine added
This commit is contained in:
		
							parent
							
								
									357fc47811
								
							
						
					
					
						commit
						fafc564874
					
				
					 2 changed files with 60 additions and 0 deletions
				
			
		
							
								
								
									
										55
									
								
								searx/engines/yandex.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								searx/engines/yandex.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,55 @@
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					 Yahoo (Web)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 @website     https://yandex.ru/
 | 
				
			||||||
 | 
					 @provide-api ?
 | 
				
			||||||
 | 
					 @using-api   no
 | 
				
			||||||
 | 
					 @results     HTML (using search portal)
 | 
				
			||||||
 | 
					 @stable      no (HTML can change)
 | 
				
			||||||
 | 
					 @parse       url, title, content
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from urllib import urlencode
 | 
				
			||||||
 | 
					from lxml import html
 | 
				
			||||||
 | 
					from searx.search import logger
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					logger = logger.getChild('yandex engine')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# engine dependent config
 | 
				
			||||||
 | 
					categories = ['general']
 | 
				
			||||||
 | 
					paging = True
 | 
				
			||||||
 | 
					language_support = True  # TODO
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# search-url
 | 
				
			||||||
 | 
					base_url = 'https://yandex.ru/'
 | 
				
			||||||
 | 
					search_url = 'search/?{query}&p={page}'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]'
 | 
				
			||||||
 | 
					url_xpath = './/h2/a/@href'
 | 
				
			||||||
 | 
					title_xpath = './/h2/a//text()'
 | 
				
			||||||
 | 
					content_xpath = './/div[@class="serp-item__text"]//text()'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request(query, params):
 | 
				
			||||||
 | 
					    params['url'] = base_url + search_url.format(page=params['pageno']-1,
 | 
				
			||||||
 | 
					                                                 query=urlencode({'text': query}))
 | 
				
			||||||
 | 
					    return params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get response from search-request
 | 
				
			||||||
 | 
					def response(resp):
 | 
				
			||||||
 | 
					    dom = html.fromstring(resp.text)
 | 
				
			||||||
 | 
					    results = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for result in dom.xpath(results_xpath):
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            res = {'url': result.xpath(url_xpath)[0],
 | 
				
			||||||
 | 
					                   'title': ''.join(result.xpath(title_xpath)),
 | 
				
			||||||
 | 
					                   'content': ''.join(result.xpath(content_xpath))}
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            logger.exception('yandex parse crash')
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        results.append(res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return results
 | 
				
			||||||
| 
						 | 
					@ -274,6 +274,11 @@ engines:
 | 
				
			||||||
    engine : yahoo
 | 
					    engine : yahoo
 | 
				
			||||||
    shortcut : yh
 | 
					    shortcut : yh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - name : yandex
 | 
				
			||||||
 | 
					    engine : yandex
 | 
				
			||||||
 | 
					    shortcut : ya
 | 
				
			||||||
 | 
					    disabled : True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  - name : yahoo news
 | 
					  - name : yahoo news
 | 
				
			||||||
    engine : yahoo_news
 | 
					    engine : yahoo_news
 | 
				
			||||||
    shortcut : yhn
 | 
					    shortcut : yhn
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue