A little fix and modified the testing for content highlight

This commit is contained in:
ahmad-alkadri 2023-01-15 15:08:11 +00:00 committed by Ahmad Alkadri
parent 6c421110b5
commit 99b5272d9a
2 changed files with 21 additions and 17 deletions

View File

@ -124,7 +124,8 @@ def contains_cjko(s: str) -> bool:
Returns: Returns:
bool: True if the input s contains the characters and False otherwise. bool: True if the input s contains the characters and False otherwise.
""" """
unicode_ranges = ('\u4e00-\u9fff' # Chinese characters unicode_ranges = (
'\u4e00-\u9fff' # Chinese characters
'\u3040-\u309f' # Japanese hiragana '\u3040-\u309f' # Japanese hiragana
'\u30a0-\u30ff' # Japanese katakana '\u30a0-\u30ff' # Japanese katakana
'\u4e00-\u9faf' # Japanese kanji '\u4e00-\u9faf' # Japanese kanji
@ -168,6 +169,8 @@ def highlight_content(content, query):
querysplit = query.split() querysplit = query.split()
queries = [] queries = []
for qs in querysplit: for qs in querysplit:
qs = qs.replace("'", "").replace('"', '').replace(" ", "")
if len(qs) > 0:
queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U)) queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U))
if len(queries) > 0: if len(queries) > 0:
for q in set(queries): for q in set(queries):

View File

@ -28,9 +28,9 @@ class TestWebUtils(SearxTestCase):
content = 'a' content = 'a'
query = 'test' query = 'test'
self.assertEqual(webutils.highlight_content(content, query), content) self.assertEqual(webutils.highlight_content(content, query), 'a')
query = 'a test' query = 'a test'
self.assertEqual(webutils.highlight_content(content, query), content) self.assertEqual(webutils.highlight_content(content, query), '<span class="highlight">a</span>')
data = ( data = (
('" test "', 'a test string', 'a <span class="highlight">test</span> string'), ('" test "', 'a test string', 'a <span class="highlight">test</span> string'),
@ -38,22 +38,23 @@ class TestWebUtils(SearxTestCase):
( (
'a test', 'a test',
'this is a test string that matches entire query', 'this is a test string that matches entire query',
'this is <span class="highlight">a test</span> string that matches entire query', 'this is <span class="highlight">a</span> <span class="highlight">test</span> string that matches entire query',
), ),
( (
'this a test', 'this a test',
'this is a string to test.', 'this is a string to test.',
( (
'<span class="highlight">this</span> is<span class="highlight"> a </span>' '<span class="highlight">this</span> is <span class="highlight">a</span> string to <span class="highlight">test</span>.'
'string to <span class="highlight">test</span>.'
), ),
), ),
( (
'match this "exact phrase"', 'match this "exact phrase"',
'this string contains the exact phrase we want to match', 'this string contains the exact phrase we want to match',
( ''.join(
'<span class="highlight">this</span> string contains the <span class="highlight">exact</span>' [
' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>' '<span class="highlight">this</span> string contains the <span class="highlight">exact</span> ',
'<span class="highlight">phrase</span> we want to <span class="highlight">match</span>',
]
), ),
), ),
) )