Lazy load fasttext-predict

This commit is contained in:
Alexandre Flament 2022-12-26 08:32:58 +00:00
parent ec4237586d
commit f3515041af
1 changed files with 6 additions and 6 deletions

View File

@ -15,7 +15,6 @@ from os.path import splitext, join
from random import choice from random import choice
from html.parser import HTMLParser from html.parser import HTMLParser
from urllib.parse import urljoin, urlparse from urllib.parse import urljoin, urlparse
import fasttext
from lxml import html from lxml import html
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
@ -51,12 +50,9 @@ _STORAGE_UNIT_VALUE: Dict[str, int] = {
_XPATH_CACHE: Dict[str, XPath] = {} _XPATH_CACHE: Dict[str, XPath] = {}
_LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {} _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
_FASTTEXT_MODEL: Optional[fasttext.FastText._FastText] = None _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
"""fasttext model to predict laguage of a search term""" """fasttext model to predict laguage of a search term"""
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
fasttext.FastText.eprint = lambda x: None
class _NotSetClass: # pylint: disable=too-few-public-methods class _NotSetClass: # pylint: disable=too-few-public-methods
"""Internal class for this module, do not create instance of this class. """Internal class for this module, do not create instance of this class.
@ -630,9 +626,13 @@ def eval_xpath_getindex(elements: ElementBase, xpath_spec: XPathSpecType, index:
return default return default
def _get_fasttext_model() -> fasttext.FastText._FastText: def _get_fasttext_model() -> "fasttext.FastText._FastText":
global _FASTTEXT_MODEL # pylint: disable=global-statement global _FASTTEXT_MODEL # pylint: disable=global-statement
if _FASTTEXT_MODEL is None: if _FASTTEXT_MODEL is None:
import fasttext # pylint: disable=import-outside-toplevel
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
fasttext.FastText.eprint = lambda x: None
_FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz')) _FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz'))
return _FASTTEXT_MODEL return _FASTTEXT_MODEL