diff --git a/searx/webutils.py b/searx/webutils.py
index 150b376fa..7b9a8045c 100644
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -124,13 +124,14 @@ def contains_cjko(s: str) -> bool:
Returns:
bool: True if the input s contains the characters and False otherwise.
"""
- unicode_ranges = ('\u4e00-\u9fff' # Chinese characters
- '\u3040-\u309f' # Japanese hiragana
- '\u30a0-\u30ff' # Japanese katakana
- '\u4e00-\u9faf' # Japanese kanji
- '\uac00-\ud7af' # Korean hangul syllables
- '\u1100-\u11ff' # Korean hangul jamo
- )
+ unicode_ranges = (
+ '\u4e00-\u9fff' # Chinese characters
+ '\u3040-\u309f' # Japanese hiragana
+ '\u30a0-\u30ff' # Japanese katakana
+ '\u4e00-\u9faf' # Japanese kanji
+ '\uac00-\ud7af' # Korean hangul syllables
+ '\u1100-\u11ff' # Korean hangul jamo
+ )
return bool(re.search(fr'[{unicode_ranges}]', s))
@@ -168,7 +169,9 @@ def highlight_content(content, query):
querysplit = query.split()
queries = []
for qs in querysplit:
- queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U))
+ qs = qs.replace("'", "").replace('"', '').replace(" ", "")
+ if len(qs) > 0:
+ queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U))
if len(queries) > 0:
for q in set(queries):
content = re.sub(regex_highlight_cjk(q), f'{q}', content)
diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py
index 31a0f86ce..acf1aeeb7 100644
--- a/tests/unit/test_webutils.py
+++ b/tests/unit/test_webutils.py
@@ -28,32 +28,33 @@ class TestWebUtils(SearxTestCase):
content = 'a'
query = 'test'
- self.assertEqual(webutils.highlight_content(content, query), content)
+ self.assertEqual(webutils.highlight_content(content, query), 'a')
query = 'a test'
- self.assertEqual(webutils.highlight_content(content, query), content)
+ self.assertEqual(webutils.highlight_content(content, query), 'a')
data = (
('" test "', 'a test string', 'a test string'),
- ('"a"', 'this is a test string', 'this is a test string'),
+ ('"a"', 'this is a test string', 'this is a test string'),
(
'a test',
'this is a test string that matches entire query',
- 'this is a test string that matches entire query',
+ 'this is a test string that matches entire query',
),
(
'this a test',
'this is a string to test.',
(
- 'this is a '
- 'string to test.'
+ 'this is a string to test.'
),
),
(
'match this "exact phrase"',
'this string contains the exact phrase we want to match',
- (
- 'this string contains the exact'
- ' phrase we want to match'
+ ''.join(
+ [
+ 'this string contains the exact ',
+ 'phrase we want to match',
+ ]
),
),
)