mirror of https://github.com/searxng/searxng.git
[fix] engine qwant (web-lite) - ignore advertising adds
Closes: https://github.com/searxng/searxng/issues/2812 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
3ac7c40b6a
commit
043dcbf7c5
|
@ -159,9 +159,12 @@ def parse_web_lite(resp):
|
||||||
dom = lxml.html.fromstring(resp.text)
|
dom = lxml.html.fromstring(resp.text)
|
||||||
|
|
||||||
for item in eval_xpath_list(dom, '//section/article'):
|
for item in eval_xpath_list(dom, '//section/article'):
|
||||||
|
if eval_xpath(item, "./span[contains(@class, 'tooltip')]"):
|
||||||
|
# ignore randomly interspersed advertising adds
|
||||||
|
continue
|
||||||
results.append(
|
results.append(
|
||||||
{
|
{
|
||||||
'url': extract_text(eval_xpath(item, './span')),
|
'url': extract_text(eval_xpath(item, "./span[contains(@class, 'url partner')]")),
|
||||||
'title': extract_text(eval_xpath(item, './h2/a')),
|
'title': extract_text(eval_xpath(item, './h2/a')),
|
||||||
'content': extract_text(eval_xpath(item, './p')),
|
'content': extract_text(eval_xpath(item, './p')),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue