mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Closes #2103
This commit is contained in:
		
							parent
							
								
									a1ce141c99
								
							
						
					
					
						commit
						f678388dbc
					
				
					 1 changed files with 22 additions and 3 deletions
				
			
		| 
						 | 
				
			
			@ -24,6 +24,7 @@ Definitions`_.
 | 
			
		|||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import urllib
 | 
			
		||||
from lxml import html
 | 
			
		||||
from flask_babel import gettext
 | 
			
		||||
from searx import logger
 | 
			
		||||
| 
						 | 
				
			
			@ -77,6 +78,19 @@ def scrap_out_thumbs(dom):
 | 
			
		|||
    return ret_val
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def scrap_img_by_id(script, data_id):
 | 
			
		||||
    """Get full image URL by data-id in parent element
 | 
			
		||||
    """
 | 
			
		||||
    img_url = ''
 | 
			
		||||
    _script = script.split('\n')
 | 
			
		||||
    for i, line in enumerate(_script):
 | 
			
		||||
        if 'gstatic.com/images' in line and data_id in line:
 | 
			
		||||
            url_line = _script[i + 1]
 | 
			
		||||
            img_url = url_line.split('"')[1]
 | 
			
		||||
            img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%'))
 | 
			
		||||
    return img_url
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def request(query, params):
 | 
			
		||||
    """Google-Video search request"""
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -133,6 +147,7 @@ def response(resp):
 | 
			
		|||
    # convert the text to dom
 | 
			
		||||
    dom = html.fromstring(resp.text)
 | 
			
		||||
    img_bas64_map = scrap_out_thumbs(dom)
 | 
			
		||||
    img_src_script = eval_xpath(dom, '//script[contains(., "AF_initDataCallback({key: ")]')[1].text
 | 
			
		||||
 | 
			
		||||
    # parse results
 | 
			
		||||
    #
 | 
			
		||||
| 
						 | 
				
			
			@ -142,8 +157,7 @@ def response(resp):
 | 
			
		|||
    #     <div jsmodel="tTXmib"> / <div jsaction="..." data-id="..."
 | 
			
		||||
    #     The data-id matches to a item in a json-data structure in::
 | 
			
		||||
    #         <script nonce="I+vqelcy/01CKiBJi5Z1Ow">AF_initDataCallback({key: 'ds:1', ... data:function(){return [ ...
 | 
			
		||||
    #     In this structure the ling to the origin PNG, JPG or whatever is given
 | 
			
		||||
    #     (we do not blow out the link there, you could still implement that)
 | 
			
		||||
    #     In this structure the link to the origin PNG, JPG or whatever is given
 | 
			
		||||
    # first link per image-div contains a <img> with the data-iid for bas64 encoded image data::
 | 
			
		||||
    #      <img class="rg_i Q4LuWd" data-iid="0"
 | 
			
		||||
    # second link per image-div is the target link::
 | 
			
		||||
| 
						 | 
				
			
			@ -186,12 +200,17 @@ def response(resp):
 | 
			
		|||
                pub_descr = extract_text(pub_nodes[0])
 | 
			
		||||
                pub_source = extract_text(pub_nodes[1])
 | 
			
		||||
 | 
			
		||||
            img_src_id = eval_xpath(img_node, '../../../@data-id')[0]
 | 
			
		||||
            src_url = scrap_img_by_id(img_src_script, img_src_id)
 | 
			
		||||
            if not src_url:
 | 
			
		||||
                src_url = thumbnail_src
 | 
			
		||||
 | 
			
		||||
            results.append({
 | 
			
		||||
                'url': url,
 | 
			
		||||
                'title': img_alt,
 | 
			
		||||
                'content': pub_descr,
 | 
			
		||||
                'source': pub_source,
 | 
			
		||||
                'img_src': url,
 | 
			
		||||
                'img_src': src_url,
 | 
			
		||||
                # 'img_format': img_format,
 | 
			
		||||
                'thumbnail_src': thumbnail_src,
 | 
			
		||||
                'template': 'images.html'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue