[pylint] add scripts from searxng_extra/update to pylint

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-01-03 12:58:48 +01:00
parent ffea5d8ef5
commit 295876abaa
5 changed files with 49 additions and 38 deletions

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""This script saves `Ahmia's blacklist`_ for onion sites. """This script saves `Ahmia's blacklist`_ for onion sites.
@ -21,9 +22,7 @@ def fetch_ahmia_blacklist():
resp = requests.get(URL, timeout=3.0) resp = requests.get(URL, timeout=3.0)
if resp.status_code != 200: if resp.status_code != 200:
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
else: return resp.text.split()
blacklist = resp.text.split()
return blacklist
def get_ahmia_blacklist_filename(): def get_ahmia_blacklist_filename():
@ -32,5 +31,5 @@ def get_ahmia_blacklist_filename():
if __name__ == '__main__': if __name__ == '__main__':
blacklist = fetch_ahmia_blacklist() blacklist = fetch_ahmia_blacklist()
with open(get_ahmia_blacklist_filename(), "w") as f: with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f:
f.write('\n'.join(blacklist)) f.write('\n'.join(blacklist))

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine. """Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
@ -7,13 +8,15 @@ Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
<.github/workflows/data-update.yml>`). <.github/workflows/data-update.yml>`).
""" """
# pylint: disable=invalid-name
import re import re
import unicodedata import unicodedata
import json import json
# set path # set path
from sys import path from os.path import join
from os.path import realpath, dirname, join
from searx import searx_dir from searx import searx_dir
from searx.locales import LOCALE_NAMES from searx.locales import LOCALE_NAMES

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch website description from websites and from """Fetch website description from websites and from
@ -8,6 +9,8 @@ Output file: :origin:`searx/data/engine_descriptions.json`.
""" """
# pylint: disable=invalid-name, global-statement
import json import json
from urllib.parse import urlparse from urllib.parse import urlparse
from os.path import join from os.path import join
@ -109,7 +112,7 @@ def get_wikipedia_summary(lang, pageid):
response.raise_for_status() response.raise_for_status()
api_result = json.loads(response.text) api_result = json.loads(response.text)
return api_result.get('extract') return api_result.get('extract')
except: except Exception: # pylint: disable=broad-except
return None return None
@ -141,7 +144,7 @@ def get_website_description(url, lang1, lang2=None):
try: try:
response = searx.network.get(url, headers=headers, timeout=10) response = searx.network.get(url, headers=headers, timeout=10)
response.raise_for_status() response.raise_for_status()
except Exception: except Exception: # pylint: disable=broad-except
return (None, None) return (None, None)
try: try:

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch firefox useragent signatures """Fetch firefox useragent signatures
@ -9,20 +10,21 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
""" """
import json import json
import requests
import re import re
from os.path import dirname, join from os.path import join
from urllib.parse import urlparse, urljoin from urllib.parse import urlparse, urljoin
from distutils.version import LooseVersion, StrictVersion from distutils.version import LooseVersion
import requests
from lxml import html from lxml import html
from searx import searx_dir from searx import searx_dir
URL = 'https://ftp.mozilla.org/pub/firefox/releases/' URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
RELEASE_PATH = '/pub/firefox/releases/' RELEASE_PATH = '/pub/firefox/releases/'
NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$') NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$')
# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$') # BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$')
# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$') # ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$')
# #
useragents = { useragents = {
@ -39,20 +41,19 @@ def fetch_firefox_versions():
resp = requests.get(URL, timeout=2.0) resp = requests.get(URL, timeout=2.0)
if resp.status_code != 200: if resp.status_code != 200:
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
else: dom = html.fromstring(resp.text)
dom = html.fromstring(resp.text) versions = []
versions = []
for link in dom.xpath('//a/@href'): for link in dom.xpath('//a/@href'):
url = urlparse(urljoin(URL, link)) url = urlparse(urljoin(URL, link))
path = url.path path = url.path
if path.startswith(RELEASE_PATH): if path.startswith(RELEASE_PATH):
version = path[len(RELEASE_PATH) : -1] version = path[len(RELEASE_PATH) : -1]
if NORMAL_REGEX.match(version): if NORMAL_REGEX.match(version):
versions.append(LooseVersion(version)) versions.append(LooseVersion(version))
list.sort(versions, reverse=True) list.sort(versions, reverse=True)
return versions return versions
def fetch_firefox_last_versions(): def fetch_firefox_last_versions():

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""This script generates languages.py from intersecting each engine's supported """This script generates languages.py from intersecting each engine's supported
languages. languages.
@ -9,6 +11,8 @@ Output files: :origin:`searx/data/engines_languages.json` and
""" """
# pylint: disable=invalid-name
import json import json
from pathlib import Path from pathlib import Path
from pprint import pformat from pprint import pformat
@ -28,7 +32,7 @@ languages_file = Path(searx_dir) / 'languages.py'
def fetch_supported_languages(): def fetch_supported_languages():
set_timeout_for_thread(10.0) set_timeout_for_thread(10.0)
engines_languages = dict() engines_languages = {}
names = list(engines) names = list(engines)
names.sort() names.sort()
@ -36,7 +40,7 @@ def fetch_supported_languages():
if hasattr(engines[engine_name], 'fetch_supported_languages'): if hasattr(engines[engine_name], 'fetch_supported_languages'):
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
if type(engines_languages[engine_name]) == list: if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck
engines_languages[engine_name] = sorted(engines_languages[engine_name]) engines_languages[engine_name] = sorted(engines_languages[engine_name])
print("fetched languages from %s engines" % len(engines_languages)) print("fetched languages from %s engines" % len(engines_languages))
@ -59,7 +63,7 @@ def get_locale(lang_code):
# Join all language lists. # Join all language lists.
def join_language_lists(engines_languages): def join_language_lists(engines_languages):
language_list = dict() language_list = {}
for engine_name in engines_languages: for engine_name in engines_languages:
for lang_code in engines_languages[engine_name]: for lang_code in engines_languages[engine_name]:
@ -95,7 +99,7 @@ def join_language_lists(engines_languages):
'name': language_name, 'name': language_name,
'english_name': english_name, 'english_name': english_name,
'counter': set(), 'counter': set(),
'countries': dict(), 'countries': {},
} }
# add language with country if not in list # add language with country if not in list
@ -123,6 +127,7 @@ def join_language_lists(engines_languages):
def filter_language_list(all_languages): def filter_language_list(all_languages):
min_engines_per_lang = 13 min_engines_per_lang = 13
min_engines_per_country = 7 min_engines_per_country = 7
# pylint: disable=consider-using-dict-items, consider-iterating-dictionary
main_engines = [ main_engines = [
engine_name engine_name
for engine_name in engines.keys() for engine_name in engines.keys()
@ -142,7 +147,7 @@ def filter_language_list(all_languages):
} }
def _copy_lang_data(lang, country_name=None): def _copy_lang_data(lang, country_name=None):
new_dict = dict() new_dict = {}
new_dict['name'] = all_languages[lang]['name'] new_dict['name'] = all_languages[lang]['name']
new_dict['english_name'] = all_languages[lang]['english_name'] new_dict['english_name'] = all_languages[lang]['english_name']
if country_name: if country_name:
@ -150,10 +155,10 @@ def filter_language_list(all_languages):
return new_dict return new_dict
# for each language get country codes supported by most engines or at least one country code # for each language get country codes supported by most engines or at least one country code
filtered_languages_with_countries = dict() filtered_languages_with_countries = {}
for lang, lang_data in filtered_languages.items(): for lang, lang_data in filtered_languages.items():
countries = lang_data['countries'] countries = lang_data['countries']
filtered_countries = dict() filtered_countries = {}
# get language's country codes with enough supported engines # get language's country codes with enough supported engines
for lang_country, country_data in countries.items(): for lang_country, country_data in countries.items():
@ -215,7 +220,7 @@ def write_languages_file(languages):
language_codes = tuple(language_codes) language_codes = tuple(language_codes)
with open(languages_file, 'w') as new_file: with open(languages_file, 'w', encoding='utf-8') as new_file:
file_content = "{file_headers} {language_codes},\n)\n".format( file_content = "{file_headers} {language_codes},\n)\n".format(
# fmt: off # fmt: off
file_headers = '\n'.join(file_headers), file_headers = '\n'.join(file_headers),
@ -228,7 +233,7 @@ def write_languages_file(languages):
if __name__ == "__main__": if __name__ == "__main__":
load_engines(settings['engines']) load_engines(settings['engines'])
engines_languages = fetch_supported_languages() _engines_languages = fetch_supported_languages()
all_languages = join_language_lists(engines_languages) _all_languages = join_language_lists(_engines_languages)
filtered_languages = filter_language_list(all_languages) _filtered_languages = filter_language_list(_all_languages)
write_languages_file(filtered_languages) write_languages_file(_filtered_languages)