mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[fix] startpage engine: XPath expressions adapted for new HTML layout
Startpage has changed its HTML layout, classes like ``w-gl__result__main`` do no longer exists and the result items have been slightly changed in their structure. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									d577817646
								
							
						
					
					
						commit
						dbed8da284
					
				
					 1 changed files with 5 additions and 11 deletions
				
			
		|  | @ -142,9 +142,6 @@ search_url = base_url + '/sp/search' | ||||||
| # specific xpath variables | # specific xpath variables | ||||||
| # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] | # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] | ||||||
| # not ads: div[@class="result"] are the direct childs of div[@id="results"] | # not ads: div[@class="result"] are the direct childs of div[@id="results"] | ||||||
| results_xpath = '//div[@class="w-gl__result__main"]' |  | ||||||
| link_xpath = './/a[@class="w-gl__result-title result-link"]' |  | ||||||
| content_xpath = './/p[@class="w-gl__description"]' |  | ||||||
| search_form_xpath = '//form[@id="search"]' | search_form_xpath = '//form[@id="search"]' | ||||||
| """XPath of Startpage's origin search form | """XPath of Startpage's origin search form | ||||||
| 
 | 
 | ||||||
|  | @ -334,8 +331,8 @@ def _response_cat_web(dom): | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|     # parse results |     # parse results | ||||||
|     for result in eval_xpath(dom, results_xpath): |     for result in eval_xpath(dom, '//div[@class="w-gl"]/div[contains(@class, "result")]'): | ||||||
|         links = eval_xpath(result, link_xpath) |         links = eval_xpath(result, './/a[contains(@class, "result-title result-link")]') | ||||||
|         if not links: |         if not links: | ||||||
|             continue |             continue | ||||||
|         link = links[0] |         link = links[0] | ||||||
|  | @ -349,12 +346,9 @@ def _response_cat_web(dom): | ||||||
|         if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): |         if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): | ||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|         title = extract_text(link) |         title = extract_text(eval_xpath(link, 'h2')) | ||||||
| 
 |         content = eval_xpath(result, './/p[contains(@class, "description")]') | ||||||
|         if eval_xpath(result, content_xpath): |         content = extract_text(content, allow_none=True) or '' | ||||||
|             content: str = extract_text(eval_xpath(result, content_xpath))  # type: ignore |  | ||||||
|         else: |  | ||||||
|             content = '' |  | ||||||
| 
 | 
 | ||||||
|         published_date = None |         published_date = None | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser