From 9c22381234ae8f42a953d4dbd9c286e98366829e Mon Sep 17 00:00:00 2001
From: allixx <1695323+allixx@users.noreply.github.com>
Date: Tue, 19 Dec 2023 11:21:54 +0300
Subject: [PATCH] [fix] do highlight replacement at once
Highlights all search queries in search result in one go.
Fixes the case where search query contains word from highlight HTML code,
which causes broken HTML to appear in search results.
Closes #3057
---
searx/webutils.py | 6 ++----
tests/unit/test_webutils.py | 5 +++++
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/searx/webutils.py b/searx/webutils.py
index bfc6b22f7..8cdcab84b 100644
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -290,10 +290,8 @@ def highlight_content(content, query):
if len(qs) > 0:
queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U))
if len(queries) > 0:
- for q in set(queries):
- content = re.sub(
- regex_highlight_cjk(q), f'{q}'.replace('\\', r'\\'), content
- )
+ regex = re.compile("|".join(map(regex_highlight_cjk, queries)))
+ return regex.sub(lambda match: f'{match.group(0)}'.replace('\\', r'\\'), content)
return content
diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py
index 244d2b180..b4395539b 100644
--- a/tests/unit/test_webutils.py
+++ b/tests/unit/test_webutils.py
@@ -57,6 +57,11 @@ class TestWebUtils(SearxTestCase):
]
),
),
+ (
+ 'a class',
+ 'a string with class.',
+ 'a string with class.',
+ ),
)
for query, content, expected in data:
self.assertEqual(webutils.highlight_content(content, query), expected)