mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
[docs] update_external_bangs.py - implement script to document bangs
Function 'write_external_bang_doc(..)' generates documentation of the external bangs in folder 'docs/admin/external_bang'. Ducumentation will be update when external bangs are updated:: ./manage pyenv.cmd searx_extra/update/update_external_bangs.py Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
92c68fe636
commit
e32979aa3e
4 changed files with 190 additions and 15 deletions
19
docs/admin/external_bang.rst
Normal file
19
docs/admin/external_bang.rst
Normal file
|
@ -0,0 +1,19 @@
|
|||
.. _external bang:
|
||||
|
||||
===============
|
||||
External !!bang
|
||||
===============
|
||||
|
||||
.. warning::
|
||||
|
||||
*External Bangs* are shortcuts that quickly redirect you to search results on
|
||||
other sites.
|
||||
|
||||
**The search is done on the external site without protecting your privacy.**
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents
|
||||
:glob:
|
||||
|
||||
external_bang/*/index
|
|
@ -14,6 +14,7 @@ Administrator documentation
|
|||
installation-docker
|
||||
update-searx
|
||||
engines/index
|
||||
external_bang
|
||||
api
|
||||
architecture
|
||||
filtron
|
||||
|
|
|
@ -9,13 +9,17 @@ Searx allows you to modify the default categories, engines and search language
|
|||
via the search query.
|
||||
|
||||
Prefix ``!``
|
||||
to set Category/engine
|
||||
to set category or :ref:`engine <configured engines>`
|
||||
|
||||
Prefix: ``!!``
|
||||
to take you to search results on other sites (also know as :ref:`bang <external bang>`)
|
||||
|
||||
Prefix: ``:``
|
||||
to set language
|
||||
|
||||
Prefix: ``?``
|
||||
to add engines and categories to the currently selected categories
|
||||
to add :ref:`engines <configured engines>` and categories to the
|
||||
currently selected categories
|
||||
|
||||
Abbrevations of the engines and languages are also accepted. Engine/category
|
||||
modifiers are chainable and inclusive (e.g. with :search:`!it !ddg !wp qwer
|
||||
|
@ -28,6 +32,10 @@ categories and languages.
|
|||
Examples
|
||||
========
|
||||
|
||||
*Bang* take you to search results on Wikipedia:
|
||||
|
||||
- :search:`!!w gallileo <?q=%21%21w%20gallileo>`
|
||||
|
||||
Search in wikipedia for ``qwer``:
|
||||
|
||||
- :search:`!wp qwer <?q=%21wp%20qwer>` or
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
"""
|
||||
Update searx/data/external_bangs.json using the duckduckgo bangs.
|
||||
|
||||
https://duckduckgo.com/newbang loads
|
||||
https://duckduckgo.com/newbang loads:
|
||||
|
||||
* a javascript which provides the bang version ( https://duckduckgo.com/bv1.js )
|
||||
* a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example )
|
||||
|
||||
|
@ -14,14 +15,18 @@ but most probably it will requires to update RE_BANG_VERSION
|
|||
"""
|
||||
# pylint: disable=C0116
|
||||
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
from os.path import join
|
||||
|
||||
import httpx
|
||||
from fspath import FSPath
|
||||
|
||||
from searx import searx_dir # pylint: disable=E0401 C0413
|
||||
|
||||
from searx.external_bang import (
|
||||
get_bang_definition_and_ac,
|
||||
resolve_bang_definition,
|
||||
)
|
||||
|
||||
# from https://duckduckgo.com/newbang
|
||||
URL_BV1 = 'https://duckduckgo.com/bv1.js'
|
||||
|
@ -100,6 +105,7 @@ def optimize_leaf(parent, parent_key, node):
|
|||
def parse_ddg_bangs(ddg_bangs):
|
||||
bang_trie = {}
|
||||
bang_urls = {}
|
||||
bang_doc = {}
|
||||
|
||||
for bang_definition in ddg_bangs:
|
||||
# bang_list
|
||||
|
@ -129,6 +135,7 @@ def parse_ddg_bangs(ddg_bangs):
|
|||
|
||||
# bang name
|
||||
bang = bang_definition['t']
|
||||
bang_doc[bang] = (bang_url, bang_definition)
|
||||
|
||||
# bang_trie
|
||||
t = bang_trie
|
||||
|
@ -140,19 +147,159 @@ def parse_ddg_bangs(ddg_bangs):
|
|||
merge_when_no_leaf(bang_trie)
|
||||
optimize_leaf(None, None, bang_trie)
|
||||
|
||||
return bang_trie
|
||||
return bang_trie, bang_doc
|
||||
|
||||
|
||||
def get_bangs_filename():
|
||||
return join(join(searx_dir, "data"), "external_bangs.json")
|
||||
def sort_by_category(bang_doc):
|
||||
|
||||
ret_val = {}
|
||||
for bang_name, (bang_url, bang_definition) in bang_doc.items():
|
||||
|
||||
# add category
|
||||
categ = bang_definition.get('c', 'no category')
|
||||
ret_val[categ] = ret_val.get(categ, {})
|
||||
|
||||
# add sub-category
|
||||
sub_categ = bang_definition.get('sc', 'no sub-category')
|
||||
ret_val[categ][sub_categ] = ret_val[categ].get(sub_categ, {})
|
||||
|
||||
# add bang name to sub-category dict and set tuple: bang_url,
|
||||
# bang_definition
|
||||
ret_val[categ][sub_categ][bang_name] = bang_url, bang_definition
|
||||
|
||||
return ret_val
|
||||
|
||||
FILE_INTRO = """\
|
||||
.. Do not edit this file, this file was created by:
|
||||
..
|
||||
.. ./manage pyenv.cmd searx_extra/update/update_external_bangs.py
|
||||
"""
|
||||
|
||||
CATEGORY_INDEX="""
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents
|
||||
:glob:
|
||||
"""
|
||||
|
||||
TABLE_INTRO = """
|
||||
.. flat-table:: Bangs in *{categ_name} / {sub_categ_name}*
|
||||
:header-rows: 1
|
||||
:stub-columns: 1
|
||||
:widths: 2 1 4 6
|
||||
|
||||
* - Bang
|
||||
- Rank
|
||||
- Description
|
||||
- URL
|
||||
"""
|
||||
|
||||
TABLE_ROW = """
|
||||
* - `!!{bang_name} <{url}>`__
|
||||
- {rank}
|
||||
- {bang_description}
|
||||
- ``{url}``
|
||||
"""
|
||||
|
||||
def write_external_bang_doc(external_bangs_db, bang_doc):
|
||||
"""Generate documentation of the external bangs in
|
||||
``docs/admin/external_bang``.
|
||||
|
||||
"""
|
||||
# pylint: disable=too-many-locals
|
||||
def rst_title(name, tag="=", topline=True):
|
||||
ret_val = "\n%s" % (name,)
|
||||
if topline:
|
||||
ret_val = "\n" + len(name) * tag + ret_val
|
||||
ret_val += "\n" + len(name) * tag + "\n"
|
||||
return ret_val
|
||||
|
||||
def get_valid_filename(fname):
|
||||
fname = str(fname).strip().replace(' ', '_')
|
||||
return re.sub(r'(?u)[^-\w.]', '', fname)
|
||||
|
||||
folder = FSPath(searx_dir + "/../docs/admin/external_bang")
|
||||
print(f're-create external bang documentation in: {folder}')
|
||||
folder.delete()
|
||||
folder.makedirs()
|
||||
|
||||
bang_doc = sort_by_category(bang_doc)
|
||||
categ_list = list(bang_doc)
|
||||
categ_list.sort(key=lambda v: v.lower())
|
||||
|
||||
for categ_name in categ_list:
|
||||
categ = bang_doc[categ_name]
|
||||
|
||||
# for each category create on folder with index.rst in
|
||||
|
||||
categ_folder = folder / get_valid_filename(categ_name)
|
||||
categ_folder.makedirs()
|
||||
|
||||
sub_categ_list = list(categ)
|
||||
sub_categ_list.sort(key=lambda v: v.lower())
|
||||
|
||||
fname = categ_folder / 'index.rst'
|
||||
print(f"create file: {fname}")
|
||||
with open(fname, 'w') as out:
|
||||
out.write(FILE_INTRO.format(**locals()))
|
||||
out.write(rst_title(categ_name, topline=True))
|
||||
out.write(CATEGORY_INDEX.format(**locals()))
|
||||
for sub_categ_name in sub_categ_list:
|
||||
out.write("\n %s" % get_valid_filename(sub_categ_name))
|
||||
|
||||
for sub_categ_name in sub_categ_list:
|
||||
sub_categ = categ[sub_categ_name]
|
||||
|
||||
# for each sub-category create on reST-file
|
||||
|
||||
fname = categ_folder / get_valid_filename(sub_categ_name + ".rst")
|
||||
print(f"create file: {fname}")
|
||||
with open(fname, 'w') as out:
|
||||
out.write(FILE_INTRO.format(**locals()))
|
||||
out.write(rst_title(sub_categ_name))
|
||||
out.write(TABLE_INTRO.format(**locals()))
|
||||
|
||||
bang_name_list = list(sub_categ)
|
||||
bang_name_list.sort(key=lambda v: v.lower())
|
||||
|
||||
# for each bang create on table row
|
||||
|
||||
for bang_name in bang_name_list:
|
||||
# pylint: disable=possibly-unused-variable
|
||||
bang_url, bang_definition = sub_categ[bang_name]
|
||||
bang_description = bang_definition.get('s','...')
|
||||
bang_node, bang_ac_list = get_bang_definition_and_ac(external_bangs_db, bang_name)
|
||||
bang_ac_list = ', '.join(bang_ac_list) or '...'
|
||||
|
||||
try:
|
||||
url, rank = resolve_bang_definition(bang_node, '')
|
||||
url = url.strip()
|
||||
except AttributeError:
|
||||
# There is one defect entry in external_bangs_db where
|
||||
# the bang_node is a dict and not a string ... why?
|
||||
sys.stderr.write("ignore error with bang:: '%s'\n" % (bang_node, ))
|
||||
continue
|
||||
|
||||
out.write(TABLE_ROW.format(**locals()))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
bangs_url, bangs_version = get_bang_url()
|
||||
print(f'fetch bangs from {bangs_url}')
|
||||
output = {
|
||||
'version': bangs_version,
|
||||
'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url))
|
||||
|
||||
_bangs_url, _bangs_version = get_bang_url()
|
||||
|
||||
print(f'fetch & parse bangs from {_bangs_url}')
|
||||
_bang_trie, _bang_doc = parse_ddg_bangs(fetch_ddg_bangs(_bangs_url))
|
||||
|
||||
# generate JSON file for: searx.data.EXTERNAL_BANGS
|
||||
|
||||
_fname = FSPath(searx_dir + "/data/external_bangs.json")
|
||||
print(f'update file: {_fname}')
|
||||
_external_bangs_db = {
|
||||
'version': _bangs_version,
|
||||
'trie': _bang_trie,
|
||||
}
|
||||
with open(get_bangs_filename(), 'w') as fp:
|
||||
json.dump(output, fp, ensure_ascii=False, indent=4)
|
||||
with open(_fname, 'w') as fp:
|
||||
json.dump(_external_bangs_db, fp, ensure_ascii=False, indent=4)
|
||||
|
||||
# generate documentation
|
||||
write_external_bang_doc(_external_bangs_db, _bang_doc)
|
||||
|
|
Loading…
Add table
Reference in a new issue