mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[fix] engine - duckduckgo vqd edge-case
This commit is contained in:
		
							parent
							
								
									d013f51a25
								
							
						
					
					
						commit
						102502a4f0
					
				
					 1 changed files with 40 additions and 27 deletions
				
			
		| 
						 | 
					@ -57,13 +57,13 @@ url = 'https://lite.duckduckgo.com/lite/'
 | 
				
			||||||
# url_ping = 'https://duckduckgo.com/t/sl_l'
 | 
					# url_ping = 'https://duckduckgo.com/t/sl_l'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 | 
					time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 | 
				
			||||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def cache_vqd(query, value):
 | 
					def cache_vqd(query, value):
 | 
				
			||||||
    """Caches a ``vqd`` value from a query.
 | 
					    """Caches a ``vqd`` token from a query, if token is None the cached value
 | 
				
			||||||
 | 
					    is deleted.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    The vqd value depends on the query string and is needed for the follow up
 | 
					    The vqd token depends on the query string and is needed for the follow up
 | 
				
			||||||
    pages or the images loaded by a XMLHttpRequest:
 | 
					    pages or the images loaded by a XMLHttpRequest:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
 | 
					    - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
 | 
				
			||||||
| 
						 | 
					@ -72,18 +72,27 @@ def cache_vqd(query, value):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    c = redisdb.client()
 | 
					    c = redisdb.client()
 | 
				
			||||||
    if c:
 | 
					    if c:
 | 
				
			||||||
        logger.debug("cache vqd value: %s", value)
 | 
					 | 
				
			||||||
        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
 | 
					        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
 | 
				
			||||||
        c.set(key, value, ex=600)
 | 
					        if value is not None:
 | 
				
			||||||
 | 
					            logger.debug("cache vqd value: %s", value)
 | 
				
			||||||
 | 
					            c.set(key, value, ex=600)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            # remove from cache
 | 
				
			||||||
 | 
					            c.delete(key)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_vqd_value(query):
 | 
				
			||||||
 | 
					    res = get('https://lite.duckduckgo.com/lite/?' + urlencode({'q': query}))
 | 
				
			||||||
 | 
					    doc = lxml.html.fromstring(res.text)
 | 
				
			||||||
 | 
					    return eval_xpath_getindex(doc, "//input[@name='vqd']/@value", 0, None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_vqd(query):
 | 
					def get_vqd(query):
 | 
				
			||||||
    """Returns the ``vqd`` that fits to the *query*.  If there is no ``vqd`` cached
 | 
					    """Returns the ``vqd`` that fits to the *query*.  If there is no ``vqd`` cached
 | 
				
			||||||
    (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
 | 
					    (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd token from the
 | 
				
			||||||
    response.
 | 
					    response.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    value = None
 | 
					 | 
				
			||||||
    c = redisdb.client()
 | 
					    c = redisdb.client()
 | 
				
			||||||
    if c:
 | 
					    if c:
 | 
				
			||||||
        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
 | 
					        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
 | 
				
			||||||
| 
						 | 
					@ -93,12 +102,20 @@ def get_vqd(query):
 | 
				
			||||||
            logger.debug("re-use cached vqd value: %s", value)
 | 
					            logger.debug("re-use cached vqd value: %s", value)
 | 
				
			||||||
            return value
 | 
					            return value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    query_url = 'https://lite.duckduckgo.com/lite/?{args}'.format(args=urlencode({'q': query}))
 | 
					    value = _get_vqd_value(query)
 | 
				
			||||||
    res = get(query_url)
 | 
					    if not value:
 | 
				
			||||||
    doc = lxml.html.fromstring(res.text)
 | 
					        # seems we got a CAPTCHA for this query string, send a dummy request to
 | 
				
			||||||
    value = doc.xpath("//input[@name='vqd']/@value")[0]
 | 
					        # release the captcha and then fetch the vqd value for the query string
 | 
				
			||||||
    logger.debug("new vqd value: %s", value)
 | 
					        # again.
 | 
				
			||||||
    cache_vqd(query, value)
 | 
					        logger.warning("vqd token will no longer work, trying to get a new one by sending another query")
 | 
				
			||||||
 | 
					        _get_vqd_value(f'{query[:3]} duckduckgo')
 | 
				
			||||||
 | 
					        value = _get_vqd_value(query)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if not value:
 | 
				
			||||||
 | 
					        logger.error("was not able to fetch a valid vqd token from DDG")
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        logger.debug("new vqd value: %s", value)
 | 
				
			||||||
 | 
					        cache_vqd(query, value)
 | 
				
			||||||
    return value
 | 
					    return value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -241,10 +258,10 @@ def request(query, params):
 | 
				
			||||||
    # initial page does not have additional data in the input form
 | 
					    # initial page does not have additional data in the input form
 | 
				
			||||||
    if params['pageno'] > 1:
 | 
					    if params['pageno'] > 1:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        params['data']['o'] = form_data.get('o', 'json')
 | 
					        params['data']['o'] = 'json'
 | 
				
			||||||
        params['data']['api'] = form_data.get('api', 'd.js')
 | 
					        params['data']['api'] = 'd.js'
 | 
				
			||||||
        params['data']['nextParams'] = form_data.get('nextParams', '')
 | 
					        params['data']['nextParams'] = ''
 | 
				
			||||||
        params['data']['v'] = form_data.get('v', 'l')
 | 
					        params['data']['v'] = 'l'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params['data']['kl'] = eng_region
 | 
					    params['data']['kl'] = eng_region
 | 
				
			||||||
    params['cookies']['kl'] = eng_region
 | 
					    params['cookies']['kl'] = eng_region
 | 
				
			||||||
| 
						 | 
					@ -274,23 +291,19 @@ def response(resp):
 | 
				
			||||||
        # the layout of the HTML tables is different.
 | 
					        # the layout of the HTML tables is different.
 | 
				
			||||||
        result_table = result_table[1]
 | 
					        result_table = result_table[1]
 | 
				
			||||||
    elif not len(result_table) >= 3:
 | 
					    elif not len(result_table) >= 3:
 | 
				
			||||||
        # no more results
 | 
					        # no more results / if we have the vqd token in cache, it's no longer
 | 
				
			||||||
 | 
					        # valid and has to be deleted
 | 
				
			||||||
 | 
					        cache_vqd(resp.search_params['data']['q'], None)
 | 
				
			||||||
        return []
 | 
					        return []
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        result_table = result_table[2]
 | 
					        result_table = result_table[2]
 | 
				
			||||||
        # update form data from response
 | 
					        # update form data from response
 | 
				
			||||||
        form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
 | 
					        form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
 | 
				
			||||||
        if len(form):
 | 
					        if len(form):
 | 
				
			||||||
 | 
					            value = eval_xpath_getindex(form[0], "//input[@name='vqd']/@value", 0, None)
 | 
				
			||||||
            form = form[0]
 | 
					 | 
				
			||||||
            form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
 | 
					 | 
				
			||||||
            form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
 | 
					 | 
				
			||||||
            form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
 | 
					 | 
				
			||||||
            logger.debug('form_data: %s', form_data)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
 | 
					 | 
				
			||||||
            query = resp.search_params['data']['q']
 | 
					            query = resp.search_params['data']['q']
 | 
				
			||||||
            cache_vqd(query, value)
 | 
					            if value:
 | 
				
			||||||
 | 
					                cache_vqd(query, value)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tr_rows = eval_xpath(result_table, './/tr')
 | 
					    tr_rows = eval_xpath(result_table, './/tr')
 | 
				
			||||||
    # In the last <tr> is the form of the 'previous/next page' links
 | 
					    # In the last <tr> is the form of the 'previous/next page' links
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue