forked from zaclys/searxng
		
	[fix] googel engine - "some results are invalids: invalid content"
Fix google issues listet in the `/stats?engine=google` and message::
    some results are invalids: invalid content
The log is::
    DEBUG   searx                         : result: invalid content: {'url': 'https://de.wikipedia.org/wiki/Foo', 'title': 'Foo - Wikipedia', 'content': None, 'engine': 'google'}
    WARNING searx.engines.google          : ErrorContext('searx/search/processors/abstract.py', 111, 'result_container.extend(self.engine_name, search_results)', None, 'some results are invalids: invalid content', ()) True
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									f0102a95c9
								
							
						
					
					
						commit
						1a0760c10a
					
				
					 1 changed files with 7 additions and 5 deletions
				
			
		| 
						 | 
					@ -353,20 +353,22 @@ def response(resp):
 | 
				
			||||||
            title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
 | 
					            title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
 | 
				
			||||||
            if title_tag is None:
 | 
					            if title_tag is None:
 | 
				
			||||||
                # this not one of the common google results *section*
 | 
					                # this not one of the common google results *section*
 | 
				
			||||||
                logger.debug('ingoring <div class="g" ../> section: missing title')
 | 
					                logger.debug('ingoring item from the result_xpath list: missing title')
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            title = extract_text(title_tag)
 | 
					            title = extract_text(title_tag)
 | 
				
			||||||
            url = eval_xpath_getindex(result, href_xpath, 0, None)
 | 
					            url = eval_xpath_getindex(result, href_xpath, 0, None)
 | 
				
			||||||
            if url is None:
 | 
					            if url is None:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
 | 
					            content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
 | 
				
			||||||
 | 
					            if content is None:
 | 
				
			||||||
 | 
					                logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title)
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            logger.debug('add link to results: %s', title)
 | 
				
			||||||
            results.append({'url': url, 'title': title, 'content': content})
 | 
					            results.append({'url': url, 'title': title, 'content': content})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        except Exception as e:  # pylint: disable=broad-except
 | 
					        except Exception as e:  # pylint: disable=broad-except
 | 
				
			||||||
            logger.error(e, exc_info=True)
 | 
					            logger.error(e, exc_info=True)
 | 
				
			||||||
            # from lxml import etree
 | 
					 | 
				
			||||||
            # logger.debug(etree.tostring(result, pretty_print=True))
 | 
					 | 
				
			||||||
            # import pdb
 | 
					 | 
				
			||||||
            # pdb.set_trace()
 | 
					 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # parse suggestion
 | 
					    # parse suggestion
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue