[fix] html tag removal

2024-01-01 19:24:07 +01:00 · 2014-01-23 11:08:08 +01:00 · 2014-01-23 11:08:08 +01:00 · 59eeeaab87
commit 59eeeaab87
parent ba0f818e89
1 changed files with 2 additions and 1 deletions
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@ -2,6 +2,7 @@ from lxml import html
 from urllib import urlencode, unquote
 from urlparse import urlparse, urljoin
 from lxml.etree import _ElementStringResult
+from searx.utils import html_to_text

 search_url = None
 url_xpath = None
@ -33,7 +34,7 @@ def extract_text(xpath_results):
        return ''.join(xpath_results)
    else:
        # it's a element
-        return xpath_results.text_content()
+        return html_to_text(xpath_results.text_content())


 def extract_url(xpath_results):