forked from zaclys/searxng
		
	[fix] google-videos engine: ignore news articles
In the video search, google also sometimes includes news. E.g. in the DE language when you search for `!gov paris`, google adds an article from a german newspaper (FAZ), I assume these are sponsored link (not tagged advertisement?) Those links do not have an image / this patch ignores *video links* wqithout an image ID. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									cfb5eda4d1
								
							
						
					
					
						commit
						6e06618e0c
					
				
					 1 changed files with 5 additions and 7 deletions
				
			
		|  | @ -154,25 +154,23 @@ def response(resp): | |||
|     # parse results | ||||
|     for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'): | ||||
| 
 | ||||
|         # google *sections* | ||||
|         # ignore google *sections* | ||||
|         if extract_text(eval_xpath(result, g_section_with_header)): | ||||
|             logger.debug("ingoring <g-section-with-header>") | ||||
|             continue | ||||
| 
 | ||||
|         title = extract_text(eval_xpath_getindex(result, title_xpath, 0)) | ||||
|         url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0) | ||||
| 
 | ||||
|         # <img id="vidthumb1" ...> | ||||
|         # ingnore articles without an image id / e.g. news articles | ||||
|         img_id = eval_xpath_getindex(result, './/g-img/img/@id', 0, default=None) | ||||
|         if img_id is None: | ||||
|             logger.error("no img_id for: %s" % result) | ||||
|             logger.error("no img_id found in item %s (news article?)", len(results) + 1) | ||||
|             continue | ||||
| 
 | ||||
|         img_src = vidthumb_imgdata.get(img_id, None) | ||||
|         if not img_src: | ||||
|             logger.error("no vidthumb imgdata for: %s" % img_id) | ||||
|             img_src = thumbs_src.get(img_id, "") | ||||
| 
 | ||||
|         title = extract_text(eval_xpath_getindex(result, title_xpath, 0)) | ||||
|         url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0) | ||||
|         length = extract_text(eval_xpath( | ||||
|             result, './/div[contains(@class, "P7xzyf")]/span/span')) | ||||
|         c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0) | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser