From 30c12a206a21531030d369bc5b4b1ff243eeca83 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 19 Sep 2023 12:55:21 +0200 Subject: [PATCH] [fix] engine qwant (web-lite) - ignore advertising adds Closes: https://github.com/searxng/searxng/issues/2812 Signed-off-by: Markus Heiser --- searx/engines/qwant.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 654a76337..168eb860e 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -159,9 +159,12 @@ def parse_web_lite(resp): dom = lxml.html.fromstring(resp.text) for item in eval_xpath_list(dom, '//section/article'): + if eval_xpath(item, "./span[contains(@class, 'tooltip')]"): + # ignore randomly interspersed advertising adds + continue results.append( { - 'url': extract_text(eval_xpath(item, './span')), + 'url': extract_text(eval_xpath(item, "./span[contains(@class, 'url partner')]")), 'title': extract_text(eval_xpath(item, './h2/a')), 'content': extract_text(eval_xpath(item, './p')), }