mirror of https://github.com/searxng/searxng.git
[fix] engine - duckduckgo vqd edge-case
This commit is contained in:
parent
d013f51a25
commit
102502a4f0
|
@ -57,13 +57,13 @@ url = 'https://lite.duckduckgo.com/lite/'
|
||||||
# url_ping = 'https://duckduckgo.com/t/sl_l'
|
# url_ping = 'https://duckduckgo.com/t/sl_l'
|
||||||
|
|
||||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
|
||||||
|
|
||||||
|
|
||||||
def cache_vqd(query, value):
|
def cache_vqd(query, value):
|
||||||
"""Caches a ``vqd`` value from a query.
|
"""Caches a ``vqd`` token from a query, if token is None the cached value
|
||||||
|
is deleted.
|
||||||
|
|
||||||
The vqd value depends on the query string and is needed for the follow up
|
The vqd token depends on the query string and is needed for the follow up
|
||||||
pages or the images loaded by a XMLHttpRequest:
|
pages or the images loaded by a XMLHttpRequest:
|
||||||
|
|
||||||
- DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
|
- DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
|
||||||
|
@ -72,18 +72,27 @@ def cache_vqd(query, value):
|
||||||
"""
|
"""
|
||||||
c = redisdb.client()
|
c = redisdb.client()
|
||||||
if c:
|
if c:
|
||||||
logger.debug("cache vqd value: %s", value)
|
|
||||||
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
|
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
|
||||||
|
if value is not None:
|
||||||
|
logger.debug("cache vqd value: %s", value)
|
||||||
c.set(key, value, ex=600)
|
c.set(key, value, ex=600)
|
||||||
|
else:
|
||||||
|
# remove from cache
|
||||||
|
c.delete(key)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_vqd_value(query):
|
||||||
|
res = get('https://lite.duckduckgo.com/lite/?' + urlencode({'q': query}))
|
||||||
|
doc = lxml.html.fromstring(res.text)
|
||||||
|
return eval_xpath_getindex(doc, "//input[@name='vqd']/@value", 0, None)
|
||||||
|
|
||||||
|
|
||||||
def get_vqd(query):
|
def get_vqd(query):
|
||||||
"""Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
|
"""Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
|
||||||
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
|
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd token from the
|
||||||
response.
|
response.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
value = None
|
|
||||||
c = redisdb.client()
|
c = redisdb.client()
|
||||||
if c:
|
if c:
|
||||||
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
|
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
|
||||||
|
@ -93,10 +102,18 @@ def get_vqd(query):
|
||||||
logger.debug("re-use cached vqd value: %s", value)
|
logger.debug("re-use cached vqd value: %s", value)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
query_url = 'https://lite.duckduckgo.com/lite/?{args}'.format(args=urlencode({'q': query}))
|
value = _get_vqd_value(query)
|
||||||
res = get(query_url)
|
if not value:
|
||||||
doc = lxml.html.fromstring(res.text)
|
# seems we got a CAPTCHA for this query string, send a dummy request to
|
||||||
value = doc.xpath("//input[@name='vqd']/@value")[0]
|
# release the captcha and then fetch the vqd value for the query string
|
||||||
|
# again.
|
||||||
|
logger.warning("vqd token will no longer work, trying to get a new one by sending another query")
|
||||||
|
_get_vqd_value(f'{query[:3]} duckduckgo')
|
||||||
|
value = _get_vqd_value(query)
|
||||||
|
|
||||||
|
if not value:
|
||||||
|
logger.error("was not able to fetch a valid vqd token from DDG")
|
||||||
|
else:
|
||||||
logger.debug("new vqd value: %s", value)
|
logger.debug("new vqd value: %s", value)
|
||||||
cache_vqd(query, value)
|
cache_vqd(query, value)
|
||||||
return value
|
return value
|
||||||
|
@ -241,10 +258,10 @@ def request(query, params):
|
||||||
# initial page does not have additional data in the input form
|
# initial page does not have additional data in the input form
|
||||||
if params['pageno'] > 1:
|
if params['pageno'] > 1:
|
||||||
|
|
||||||
params['data']['o'] = form_data.get('o', 'json')
|
params['data']['o'] = 'json'
|
||||||
params['data']['api'] = form_data.get('api', 'd.js')
|
params['data']['api'] = 'd.js'
|
||||||
params['data']['nextParams'] = form_data.get('nextParams', '')
|
params['data']['nextParams'] = ''
|
||||||
params['data']['v'] = form_data.get('v', 'l')
|
params['data']['v'] = 'l'
|
||||||
|
|
||||||
params['data']['kl'] = eng_region
|
params['data']['kl'] = eng_region
|
||||||
params['cookies']['kl'] = eng_region
|
params['cookies']['kl'] = eng_region
|
||||||
|
@ -274,22 +291,18 @@ def response(resp):
|
||||||
# the layout of the HTML tables is different.
|
# the layout of the HTML tables is different.
|
||||||
result_table = result_table[1]
|
result_table = result_table[1]
|
||||||
elif not len(result_table) >= 3:
|
elif not len(result_table) >= 3:
|
||||||
# no more results
|
# no more results / if we have the vqd token in cache, it's no longer
|
||||||
|
# valid and has to be deleted
|
||||||
|
cache_vqd(resp.search_params['data']['q'], None)
|
||||||
return []
|
return []
|
||||||
else:
|
else:
|
||||||
result_table = result_table[2]
|
result_table = result_table[2]
|
||||||
# update form data from response
|
# update form data from response
|
||||||
form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
|
form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
|
||||||
if len(form):
|
if len(form):
|
||||||
|
value = eval_xpath_getindex(form[0], "//input[@name='vqd']/@value", 0, None)
|
||||||
form = form[0]
|
|
||||||
form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
|
|
||||||
form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
|
|
||||||
form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
|
|
||||||
logger.debug('form_data: %s', form_data)
|
|
||||||
|
|
||||||
value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
|
|
||||||
query = resp.search_params['data']['q']
|
query = resp.search_params['data']['q']
|
||||||
|
if value:
|
||||||
cache_vqd(query, value)
|
cache_vqd(query, value)
|
||||||
|
|
||||||
tr_rows = eval_xpath(result_table, './/tr')
|
tr_rows = eval_xpath(result_table, './/tr')
|
||||||
|
|
Loading…
Reference in New Issue