From eea673831bc83cd9ca85a8c86898df217cd91a28 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 18 Oct 2023 14:34:18 +0200 Subject: [PATCH] [fix] HTMLParser: undocumented not implemented method In python versions =py3.10) an error method is implemented which throws an AssertionError exception like the higher Python versions do [3]. [1] https://github.com/python/cpython/issues/76025 [2] https://bugs.python.org/issue31844 [3] https://github.com/python/cpython/pull/8562 Signed-off-by: Markus Heiser --- searx/utils.py | 14 +++++++++++++- tests/unit/test_utils.py | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/searx/utils.py b/searx/utils.py index 7f6017617..c009c3144 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -15,6 +15,7 @@ from numbers import Number from os.path import splitext, join from random import choice from html.parser import HTMLParser +from html import escape from urllib.parse import urljoin, urlparse from markdown_it import MarkdownIt @@ -88,7 +89,7 @@ class _HTMLTextExtractorException(Exception): """Internal exception raised when the HTML is invalid""" -class _HTMLTextExtractor(HTMLParser): # pylint: disable=W0223 # (see https://bugs.python.org/issue31844) +class _HTMLTextExtractor(HTMLParser): """Internal class to extract text from HTML""" def __init__(self): @@ -137,6 +138,11 @@ class _HTMLTextExtractor(HTMLParser): # pylint: disable=W0223 # (see https://b def get_text(self): return ''.join(self.result).strip() + def error(self, message): + # error handle is needed in str: """Extract text from a HTML string @@ -153,12 +159,18 @@ def html_to_text(html_str: str) -> str: >>> html_to_text('Example') 'Example' + + >>> html_to_text(r'regexp: (?