diff --git a/searx/tests/test_utils.py b/searx/tests/test_utils.py index 817fd4372..abe411c2b 100644 --- a/searx/tests/test_utils.py +++ b/searx/tests/test_utils.py @@ -10,6 +10,11 @@ class TestUtils(SearxTestCase): self.assertIsNotNone(utils.gen_useragent()) self.assertTrue(utils.gen_useragent().startswith('Mozilla')) + def test_searx_useragent(self): + self.assertIsInstance(utils.searx_useragent(), str) + self.assertIsNotNone(utils.searx_useragent()) + self.assertTrue(utils.searx_useragent().startswith('searx')) + def test_highlight_content(self): self.assertEqual(utils.highlight_content(0, None), None) self.assertEqual(utils.highlight_content(None, None), None) @@ -29,6 +34,23 @@ class TestUtils(SearxTestCase): query = 'a test' self.assertEqual(utils.highlight_content(content, query), content) + def test_html_to_text(self): + html = """ + + + + + + + + Test text + + + """ + self.assertIsInstance(utils.html_to_text(html), unicode) + self.assertIsNotNone(utils.html_to_text(html)) + self.assertEqual(utils.html_to_text(html), "Test text") + class TestHTMLTextExtractor(SearxTestCase): diff --git a/searx/utils.py b/searx/utils.py index f15f8a4bc..59d4b85be 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -115,7 +115,7 @@ class HTMLTextExtractor(HTMLParser): self.result.append(name) def get_text(self): - return u''.join(self.result) + return u''.join(self.result).strip() def html_to_text(html):