Revert "[fix] engine - duckduckgo vqd edge-case"

This reverts commit 102502a4f09e78682cd4f030605be394bc33282c.
This commit is contained in:
Markus Heiser 2023-09-22 08:53:19 +02:00 committed by MatthieuBarbu
parent d493ad30be
commit deeacad697
1 changed files with 27 additions and 40 deletions

View File

@ -57,13 +57,13 @@ url = 'https://lite.duckduckgo.com/lite/'
# url_ping = 'https://duckduckgo.com/t/sl_l' # url_ping = 'https://duckduckgo.com/t/sl_l'
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
def cache_vqd(query, value): def cache_vqd(query, value):
"""Caches a ``vqd`` token from a query, if token is None the cached value """Caches a ``vqd`` value from a query.
is deleted.
The vqd token depends on the query string and is needed for the follow up The vqd value depends on the query string and is needed for the follow up
pages or the images loaded by a XMLHttpRequest: pages or the images loaded by a XMLHttpRequest:
- DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...` - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
@ -72,27 +72,18 @@ def cache_vqd(query, value):
""" """
c = redisdb.client() c = redisdb.client()
if c: if c:
logger.debug("cache vqd value: %s", value)
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query) key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
if value is not None: c.set(key, value, ex=600)
logger.debug("cache vqd value: %s", value)
c.set(key, value, ex=600)
else:
# remove from cache
c.delete(key)
def _get_vqd_value(query):
res = get('https://lite.duckduckgo.com/lite/?' + urlencode({'q': query}))
doc = lxml.html.fromstring(res.text)
return eval_xpath_getindex(doc, "//input[@name='vqd']/@value", 0, None)
def get_vqd(query): def get_vqd(query):
"""Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached """Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd token from the (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
response. response.
""" """
value = None
c = redisdb.client() c = redisdb.client()
if c: if c:
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query) key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
@ -102,20 +93,12 @@ def get_vqd(query):
logger.debug("re-use cached vqd value: %s", value) logger.debug("re-use cached vqd value: %s", value)
return value return value
value = _get_vqd_value(query) query_url = 'https://lite.duckduckgo.com/lite/?{args}'.format(args=urlencode({'q': query}))
if not value: res = get(query_url)
# seems we got a CAPTCHA for this query string, send a dummy request to doc = lxml.html.fromstring(res.text)
# release the captcha and then fetch the vqd value for the query string value = doc.xpath("//input[@name='vqd']/@value")[0]
# again. logger.debug("new vqd value: %s", value)
logger.warning("vqd token will no longer work, trying to get a new one by sending another query") cache_vqd(query, value)
_get_vqd_value(f'{query[:3]} duckduckgo')
value = _get_vqd_value(query)
if not value:
logger.error("was not able to fetch a valid vqd token from DDG")
else:
logger.debug("new vqd value: %s", value)
cache_vqd(query, value)
return value return value
@ -258,10 +241,10 @@ def request(query, params):
# initial page does not have additional data in the input form # initial page does not have additional data in the input form
if params['pageno'] > 1: if params['pageno'] > 1:
params['data']['o'] = 'json' params['data']['o'] = form_data.get('o', 'json')
params['data']['api'] = 'd.js' params['data']['api'] = form_data.get('api', 'd.js')
params['data']['nextParams'] = '' params['data']['nextParams'] = form_data.get('nextParams', '')
params['data']['v'] = 'l' params['data']['v'] = form_data.get('v', 'l')
params['data']['kl'] = eng_region params['data']['kl'] = eng_region
params['cookies']['kl'] = eng_region params['cookies']['kl'] = eng_region
@ -291,19 +274,23 @@ def response(resp):
# the layout of the HTML tables is different. # the layout of the HTML tables is different.
result_table = result_table[1] result_table = result_table[1]
elif not len(result_table) >= 3: elif not len(result_table) >= 3:
# no more results / if we have the vqd token in cache, it's no longer # no more results
# valid and has to be deleted
cache_vqd(resp.search_params['data']['q'], None)
return [] return []
else: else:
result_table = result_table[2] result_table = result_table[2]
# update form data from response # update form data from response
form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..') form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
if len(form): if len(form):
value = eval_xpath_getindex(form[0], "//input[@name='vqd']/@value", 0, None)
form = form[0]
form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
logger.debug('form_data: %s', form_data)
value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
query = resp.search_params['data']['q'] query = resp.search_params['data']['q']
if value: cache_vqd(query, value)
cache_vqd(query, value)
tr_rows = eval_xpath(result_table, './/tr') tr_rows = eval_xpath(result_table, './/tr')
# In the last <tr> is the form of the 'previous/next page' links # In the last <tr> is the form of the 'previous/next page' links