From e32979aa3e9ebb96ceba8a06c3bb46da547022a9 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 5 Jul 2020 10:47:47 +0200 Subject: [PATCH] [docs] update_external_bangs.py - implement script to document bangs Function 'write_external_bang_doc(..)' generates documentation of the external bangs in folder 'docs/admin/external_bang'. Ducumentation will be update when external bangs are updated:: ./manage pyenv.cmd searx_extra/update/update_external_bangs.py Signed-off-by: Markus Heiser --- docs/admin/external_bang.rst | 19 +++ docs/admin/index.rst | 1 + docs/user/search_syntax.rst | 12 +- searx_extra/update/update_external_bangs.py | 173 ++++++++++++++++++-- 4 files changed, 190 insertions(+), 15 deletions(-) create mode 100644 docs/admin/external_bang.rst diff --git a/docs/admin/external_bang.rst b/docs/admin/external_bang.rst new file mode 100644 index 000000000..cacabf281 --- /dev/null +++ b/docs/admin/external_bang.rst @@ -0,0 +1,19 @@ +.. _external bang: + +=============== +External !!bang +=============== + +.. warning:: + + *External Bangs* are shortcuts that quickly redirect you to search results on + other sites. + + **The search is done on the external site without protecting your privacy.** + +.. toctree:: + :maxdepth: 2 + :caption: Contents + :glob: + + external_bang/*/index diff --git a/docs/admin/index.rst b/docs/admin/index.rst index 3139db99c..2168fc13b 100644 --- a/docs/admin/index.rst +++ b/docs/admin/index.rst @@ -14,6 +14,7 @@ Administrator documentation installation-docker update-searx engines/index + external_bang api architecture filtron diff --git a/docs/user/search_syntax.rst b/docs/user/search_syntax.rst index 57cb51951..7da85f47f 100644 --- a/docs/user/search_syntax.rst +++ b/docs/user/search_syntax.rst @@ -9,13 +9,17 @@ Searx allows you to modify the default categories, engines and search language via the search query. Prefix ``!`` - to set Category/engine + to set category or :ref:`engine ` + +Prefix: ``!!`` + to take you to search results on other sites (also know as :ref:`bang `) Prefix: ``:`` to set language Prefix: ``?`` - to add engines and categories to the currently selected categories + to add :ref:`engines ` and categories to the + currently selected categories Abbrevations of the engines and languages are also accepted. Engine/category modifiers are chainable and inclusive (e.g. with :search:`!it !ddg !wp qwer @@ -28,6 +32,10 @@ categories and languages. Examples ======== +*Bang* take you to search results on Wikipedia: + +- :search:`!!w gallileo ` + Search in wikipedia for ``qwer``: - :search:`!wp qwer ` or diff --git a/searx_extra/update/update_external_bangs.py b/searx_extra/update/update_external_bangs.py index c366fe76b..eb77ac2fa 100755 --- a/searx_extra/update/update_external_bangs.py +++ b/searx_extra/update/update_external_bangs.py @@ -3,7 +3,8 @@ """ Update searx/data/external_bangs.json using the duckduckgo bangs. -https://duckduckgo.com/newbang loads +https://duckduckgo.com/newbang loads: + * a javascript which provides the bang version ( https://duckduckgo.com/bv1.js ) * a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example ) @@ -14,14 +15,18 @@ but most probably it will requires to update RE_BANG_VERSION """ # pylint: disable=C0116 +import sys import json import re -from os.path import join import httpx +from fspath import FSPath from searx import searx_dir # pylint: disable=E0401 C0413 - +from searx.external_bang import ( + get_bang_definition_and_ac, + resolve_bang_definition, +) # from https://duckduckgo.com/newbang URL_BV1 = 'https://duckduckgo.com/bv1.js' @@ -100,6 +105,7 @@ def optimize_leaf(parent, parent_key, node): def parse_ddg_bangs(ddg_bangs): bang_trie = {} bang_urls = {} + bang_doc = {} for bang_definition in ddg_bangs: # bang_list @@ -129,6 +135,7 @@ def parse_ddg_bangs(ddg_bangs): # bang name bang = bang_definition['t'] + bang_doc[bang] = (bang_url, bang_definition) # bang_trie t = bang_trie @@ -140,19 +147,159 @@ def parse_ddg_bangs(ddg_bangs): merge_when_no_leaf(bang_trie) optimize_leaf(None, None, bang_trie) - return bang_trie + return bang_trie, bang_doc -def get_bangs_filename(): - return join(join(searx_dir, "data"), "external_bangs.json") +def sort_by_category(bang_doc): + + ret_val = {} + for bang_name, (bang_url, bang_definition) in bang_doc.items(): + + # add category + categ = bang_definition.get('c', 'no category') + ret_val[categ] = ret_val.get(categ, {}) + + # add sub-category + sub_categ = bang_definition.get('sc', 'no sub-category') + ret_val[categ][sub_categ] = ret_val[categ].get(sub_categ, {}) + + # add bang name to sub-category dict and set tuple: bang_url, + # bang_definition + ret_val[categ][sub_categ][bang_name] = bang_url, bang_definition + + return ret_val + +FILE_INTRO = """\ +.. Do not edit this file, this file was created by: +.. +.. ./manage pyenv.cmd searx_extra/update/update_external_bangs.py +""" + +CATEGORY_INDEX=""" +.. toctree:: + :maxdepth: 2 + :caption: Contents + :glob: +""" + +TABLE_INTRO = """ +.. flat-table:: Bangs in *{categ_name} / {sub_categ_name}* + :header-rows: 1 + :stub-columns: 1 + :widths: 2 1 4 6 + + * - Bang + - Rank + - Description + - URL +""" + +TABLE_ROW = """ + * - `!!{bang_name} <{url}>`__ + - {rank} + - {bang_description} + - ``{url}`` +""" + +def write_external_bang_doc(external_bangs_db, bang_doc): + """Generate documentation of the external bangs in + ``docs/admin/external_bang``. + + """ + # pylint: disable=too-many-locals + def rst_title(name, tag="=", topline=True): + ret_val = "\n%s" % (name,) + if topline: + ret_val = "\n" + len(name) * tag + ret_val + ret_val += "\n" + len(name) * tag + "\n" + return ret_val + + def get_valid_filename(fname): + fname = str(fname).strip().replace(' ', '_') + return re.sub(r'(?u)[^-\w.]', '', fname) + + folder = FSPath(searx_dir + "/../docs/admin/external_bang") + print(f're-create external bang documentation in: {folder}') + folder.delete() + folder.makedirs() + + bang_doc = sort_by_category(bang_doc) + categ_list = list(bang_doc) + categ_list.sort(key=lambda v: v.lower()) + + for categ_name in categ_list: + categ = bang_doc[categ_name] + + # for each category create on folder with index.rst in + + categ_folder = folder / get_valid_filename(categ_name) + categ_folder.makedirs() + + sub_categ_list = list(categ) + sub_categ_list.sort(key=lambda v: v.lower()) + + fname = categ_folder / 'index.rst' + print(f"create file: {fname}") + with open(fname, 'w') as out: + out.write(FILE_INTRO.format(**locals())) + out.write(rst_title(categ_name, topline=True)) + out.write(CATEGORY_INDEX.format(**locals())) + for sub_categ_name in sub_categ_list: + out.write("\n %s" % get_valid_filename(sub_categ_name)) + + for sub_categ_name in sub_categ_list: + sub_categ = categ[sub_categ_name] + + # for each sub-category create on reST-file + + fname = categ_folder / get_valid_filename(sub_categ_name + ".rst") + print(f"create file: {fname}") + with open(fname, 'w') as out: + out.write(FILE_INTRO.format(**locals())) + out.write(rst_title(sub_categ_name)) + out.write(TABLE_INTRO.format(**locals())) + + bang_name_list = list(sub_categ) + bang_name_list.sort(key=lambda v: v.lower()) + + # for each bang create on table row + + for bang_name in bang_name_list: + # pylint: disable=possibly-unused-variable + bang_url, bang_definition = sub_categ[bang_name] + bang_description = bang_definition.get('s','...') + bang_node, bang_ac_list = get_bang_definition_and_ac(external_bangs_db, bang_name) + bang_ac_list = ', '.join(bang_ac_list) or '...' + + try: + url, rank = resolve_bang_definition(bang_node, '') + url = url.strip() + except AttributeError: + # There is one defect entry in external_bangs_db where + # the bang_node is a dict and not a string ... why? + sys.stderr.write("ignore error with bang:: '%s'\n" % (bang_node, )) + continue + + out.write(TABLE_ROW.format(**locals())) if __name__ == '__main__': - bangs_url, bangs_version = get_bang_url() - print(f'fetch bangs from {bangs_url}') - output = { - 'version': bangs_version, - 'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url)) + + _bangs_url, _bangs_version = get_bang_url() + + print(f'fetch & parse bangs from {_bangs_url}') + _bang_trie, _bang_doc = parse_ddg_bangs(fetch_ddg_bangs(_bangs_url)) + + # generate JSON file for: searx.data.EXTERNAL_BANGS + + _fname = FSPath(searx_dir + "/data/external_bangs.json") + print(f'update file: {_fname}') + _external_bangs_db = { + 'version': _bangs_version, + 'trie': _bang_trie, } - with open(get_bangs_filename(), 'w') as fp: - json.dump(output, fp, ensure_ascii=False, indent=4) + with open(_fname, 'w') as fp: + json.dump(_external_bangs_db, fp, ensure_ascii=False, indent=4) + + # generate documentation + write_external_bang_doc(_external_bangs_db, _bang_doc)