diff --git a/docs/admin/api.rst b/docs/admin/api.rst index 8f4552f9c..8bd563ba7 100644 --- a/docs/admin/api.rst +++ b/docs/admin/api.rst @@ -84,9 +84,9 @@ HTML of the site. URL of the SearXNG instance and values are customizable. .. code:: html
- - - - - + + + + +
diff --git a/docs/dev/engines/online/gitlab.rst b/docs/dev/engines/online/gitlab.rst new file mode 100644 index 000000000..5f0d3e3d1 --- /dev/null +++ b/docs/dev/engines/online/gitlab.rst @@ -0,0 +1,8 @@ +.. _gitlab engine: + +====== +GitLab +====== + +.. automodule:: searx.engines.gitlab + :members: diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py index 7a484d6d5..8f8e9839c 100644 --- a/searx/botdetection/link_token.py +++ b/searx/botdetection/link_token.py @@ -28,7 +28,7 @@ And in the HTML template from flask a stylesheet link is needed (the value of + type="text/css" > .. _X-Forwarded-For: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index c103d43a3..e9bc47ade 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -3,40 +3,40 @@ "all_locale": "", "custom": { "content": [ - "book_nonfiction", + "book_comic", "book_fiction", + "book_nonfiction", "book_unknown", "magazine", - "book_comic", - "standards_document", - "musical_score" + "musical_score", + "other", + "standards_document" ], "ext": [ - "pdf", + "azw3", + "cbr", + "cbz", + "djvu", "epub", "fb2", "mobi", - "cbr", - "djvu", - "txt", - "cbz", - "azw3" + "pdf", + "txt" ], "sort": [ "", - "newest", - "oldest", "largest", - "smallest", + "newest", "newest_added", - "oldest_added" + "oldest", + "oldest_added", + "smallest" ] }, "data_type": "traits_v1", "languages": { "af": "af", "ar": "ar", - "az": "az", "be": "be", "bg": "bg", "bn": "bn", @@ -86,11 +86,14 @@ "ro": "ro", "ru": "ru", "rw": "rw", + "sa": "sa", + "se": "se", "sk": "sk", "sl": "sl", "sr": "sr", "sv": "sv", "ta": "ta", + "th": "th", "tr": "tr", "uk": "uk", "ur": "ur", @@ -119,6 +122,7 @@ "ar": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629", "bg": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438", "bs": "Bosanski", + "ca": "Catal\u00e0", "cs": "\u010ce\u0161tina", "da": "Dansk", "de": "Deutsch", @@ -1764,8 +1768,10 @@ "id": "id", "it": "it", "ja-JP": "ja-jp", + "ko": "ko", "lt": "lt", "lv": "lv", + "ms": "ms", "nb": "nb", "nl": "nl", "pl": "pl", @@ -1775,10 +1781,15 @@ "sk": "sk", "sl": "sl", "sq-AL": "sq-al", + "sr_Cyrl": "sr-cyrl", + "sr_Latn": "sr-latn", "sv": "sv", "sw-KE": "sw-ke", + "th": "th", "tr": "tr", "uk": "uk", + "vi": "vi", + "zh_Hans": "zh-hans", "zh_Hant": "zh-hant" } }, @@ -1858,8 +1869,10 @@ "id": "id", "it": "it", "ja-JP": "ja-jp", + "ko": "ko", "lt": "lt", "lv": "lv", + "ms": "ms", "nb": "nb", "nl": "nl", "pl": "pl", @@ -1869,10 +1882,15 @@ "sk": "sk", "sl": "sl", "sq-AL": "sq-al", + "sr_Cyrl": "sr-cyrl", + "sr_Latn": "sr-latn", "sv": "sv", "sw-KE": "sw-ke", + "th": "th", "tr": "tr", "uk": "uk", + "vi": "vi", + "zh_Hans": "zh-hans", "zh_Hant": "zh-hant" } }, @@ -1952,8 +1970,10 @@ "id": "id", "it": "it", "ja-JP": "ja-jp", + "ko": "ko", "lt": "lt", "lv": "lv", + "ms": "ms", "nb": "nb", "nl": "nl", "pl": "pl", @@ -1963,10 +1983,15 @@ "sk": "sk", "sl": "sl", "sq-AL": "sq-al", + "sr_Cyrl": "sr-cyrl", + "sr_Latn": "sr-latn", "sv": "sv", "sw-KE": "sw-ke", + "th": "th", "tr": "tr", "uk": "uk", + "vi": "vi", + "zh_Hans": "zh-hans", "zh_Hant": "zh-hant" } }, @@ -2046,8 +2071,10 @@ "id": "id", "it": "it", "ja-JP": "ja-jp", + "ko": "ko", "lt": "lt", "lv": "lv", + "ms": "ms", "nb": "nb", "nl": "nl", "pl": "pl", @@ -2057,10 +2084,15 @@ "sk": "sk", "sl": "sl", "sq-AL": "sq-al", + "sr_Cyrl": "sr-cyrl", + "sr_Latn": "sr-latn", "sv": "sv", "sw-KE": "sw-ke", + "th": "th", "tr": "tr", "uk": "uk", + "vi": "vi", + "zh_Hans": "zh-hans", "zh_Hant": "zh-hant" } }, @@ -6151,7 +6183,6 @@ "BY", "BZ", "CA", - "CC", "CD", "CF", "CG", @@ -6249,6 +6280,7 @@ "MN", "MO", "MQ", + "MS", "MT", "MU", "MW", @@ -6403,7 +6435,6 @@ "la": "latin", "lb": "luxembourgish", "ln": "lingala", - "lo": "lao", "lt": "lithuanian", "lv": "latvian", "mg": "malagasy", @@ -6436,6 +6467,7 @@ "si": "sinhala", "sk": "slovak", "sl": "slovenian", + "sn": "shona", "so": "somali", "sq": "albanian", "sr": "serbian", @@ -6784,6 +6816,7 @@ "bat-smg", "bbc", "bcl", + "bdr", "be", "be-tarask", "bew", diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py index a290dd06e..bc74b3c86 100644 --- a/searx/engines/annas_archive.py +++ b/searx/engines/annas_archive.py @@ -184,3 +184,8 @@ def fetch_traits(engine_traits: EngineTraits): for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"): engine_traits.custom['sort'].append(x.get("value")) + + # for better diff; sort the persistence of these traits + engine_traits.custom['content'].sort() + engine_traits.custom['ext'].sort() + engine_traits.custom['sort'].sort() diff --git a/searx/engines/brave.py b/searx/engines/brave.py index 4c43386ed..6f7e342e7 100644 --- a/searx/engines/brave.py +++ b/searx/engines/brave.py @@ -430,7 +430,8 @@ def fetch_traits(engine_traits: EngineTraits): ui_lang = option.get('value') try: - if '-' in ui_lang and not ui_lang.startswith("zh-"): + l = babel.Locale.parse(ui_lang, sep='-') + if l.territory: sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-')) else: sxng_tag = language_tag(babel.Locale.parse(ui_lang, sep='-')) @@ -453,7 +454,7 @@ def fetch_traits(engine_traits: EngineTraits): if not resp.ok: # type: ignore print("ERROR: response from Brave is not OK.") - country_js = resp.text[resp.text.index("options:{all") + len('options:') :] + country_js = resp.text[resp.text.index("options:{all") + len('options:') :] # type: ignore country_js = country_js[: country_js.index("},k={default")] country_tags = js_variable_to_python(country_js) diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 193785182..5743f4142 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -54,7 +54,6 @@ def response(resp): excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0] content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False) - # it is better to emit
instead of |, but html tags are verboten content = content.strip().replace('\n', ' | ') content = ' '.join(content.split()) diff --git a/searx/engines/gitlab.py b/searx/engines/gitlab.py new file mode 100644 index 000000000..b5ab7df2a --- /dev/null +++ b/searx/engines/gitlab.py @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Engine to search in collaborative software platforms based on GitLab_ with +the `GitLab REST API`_. + +.. _GitLab: https://about.gitlab.com/install/ +.. _GitLab REST API: https://docs.gitlab.com/ee/api/ + +Configuration +============= + +The engine has the following mandatory setting: + +- :py:obj:`base_url` + +Optional settings are: + +- :py:obj:`api_path` + +.. code:: yaml + + - name: gitlab + engine: gitlab + base_url: https://gitlab.com + shortcut: gl + about: + website: https://gitlab.com/ + wikidata_id: Q16639197 + + - name: gnome + engine: gitlab + base_url: https://gitlab.gnome.org + shortcut: gn + about: + website: https://gitlab.gnome.org + wikidata_id: Q44316 + +Implementations +=============== + +""" + +from urllib.parse import urlencode +from dateutil import parser + +about = { + "website": None, + "wikidata_id": None, + "official_api_documentation": "https://docs.gitlab.com/ee/api/", + "use_official_api": True, + "require_api_key": False, + "results": "JSON", +} + +categories = ['it', 'repos'] +paging = True + +base_url: str = "" +"""Base URL of the GitLab host.""" + +api_path: str = 'api/v4/projects' +"""The path the `project API `_. + +The default path should work fine usually. +""" + + +def request(query, params): + args = {'search': query, 'page': params['pageno']} + params['url'] = f"{base_url}/{api_path}?{urlencode(args)}" + + return params + + +def response(resp): + results = [] + + for item in resp.json(): + results.append( + { + 'template': 'packages.html', + 'url': item.get('web_url'), + 'title': item.get('name'), + 'content': item.get('description'), + 'thumbnail': item.get('avatar_url'), + 'package_name': item.get('name'), + 'maintainer': item.get('namespace', {}).get('name'), + 'publishedDate': parser.parse(item.get('last_activity_at') or item.get("created_at")), + 'tags': item.get('tag_list', []), + 'popularity': item.get('star_count'), + 'homepage': item.get('readme_url'), + 'source_code_url': item.get('http_url_to_repo'), + } + ) + + return results diff --git a/searx/engines/google.py b/searx/engines/google.py index f9215783c..5fb5e9a76 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -441,7 +441,7 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True): try: locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-') except babel.UnknownLocaleError: - print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang)) + print("INFO: google UI language %s (%s) is unknown by babel" % (eng_lang, x.text.split("(")[0].strip())) continue sxng_lang = language_tag(locale) diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 989fe1445..c30018d85 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -49,7 +49,11 @@ from flask_babel import gettext import babel import lxml -from searx.exceptions import SearxEngineAPIException, SearxEngineTooManyRequestsException +from searx.exceptions import ( + SearxEngineAPIException, + SearxEngineTooManyRequestsException, + SearxEngineCaptchaException, +) from searx.network import raise_for_httperror from searx.enginelib.traits import EngineTraits @@ -187,6 +191,8 @@ def parse_web_api(resp): error_code = data.get('error_code') if error_code == 24: raise SearxEngineTooManyRequestsException() + if search_results.get("data", {}).get("error_data", {}).get("captchaUrl") is not None: + raise SearxEngineCaptchaException() msg = ",".join(data.get('message', ['unknown'])) raise SearxEngineAPIException(f"{msg} ({error_code})") diff --git a/searx/engines/radio_browser.py b/searx/engines/radio_browser.py index c20580616..a8f07a638 100644 --- a/searx/engines/radio_browser.py +++ b/searx/engines/radio_browser.py @@ -165,10 +165,12 @@ def fetch_traits(engine_traits: EngineTraits): countrycodes = set() for region in country_list: - if region['iso_3166_1'] not in babel_reg_list: + # country_list contains duplicates that differ only in upper/lower case + _reg = region['iso_3166_1'].upper() + if _reg not in babel_reg_list: print(f"ERROR: region tag {region['iso_3166_1']} is unknown by babel") continue - countrycodes.add(region['iso_3166_1']) + countrycodes.add(_reg) countrycodes = list(countrycodes) countrycodes.sort() diff --git a/searx/locales.py b/searx/locales.py index ea9af9438..d7592df3d 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -152,7 +152,7 @@ def locales_initialize(): def region_tag(locale: babel.Locale) -> str: """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US).""" if not locale.territory: - raise ValueError('%s missed a territory') + raise ValueError('babel.Locale %s: missed a territory' % locale) return locale.language + '-' + locale.territory diff --git a/searx/settings.yml b/searx/settings.yml index 4417485bc..3a09ca076 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -807,24 +807,21 @@ engines: timeout: 10 - name: gitlab - engine: json_engine - paging: true - search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno} - url_query: web_url - title_query: name_with_namespace - content_query: description - page_size: 20 - categories: [it, repos] + engine: gitlab + base_url: https://gitlab.com shortcut: gl - timeout: 10.0 disabled: true about: - website: https://about.gitlab.com/ + website: https://gitlab.com/ wikidata_id: Q16639197 - official_api_documentation: https://docs.gitlab.com/ee/api/ - use_official_api: false - require_api_key: false - results: JSON + + # - name: gnome + # engine: gitlab + # base_url: https://gitlab.gnome.org + # shortcut: gn + # about: + # website: https://gitlab.gnome.org + # wikidata_id: Q44316 - name: github engine: github @@ -903,26 +900,6 @@ engines: shortcut: mi disabled: true - - name: gpodder - engine: json_engine - shortcut: gpod - timeout: 4.0 - paging: false - search_url: https://gpodder.net/search.json?q={query} - url_query: url - title_query: title - content_query: description - page_size: 19 - categories: music - disabled: true - about: - website: https://gpodder.net - wikidata_id: Q3093354 - official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/ - use_official_api: false - requires_api_key: false - results: JSON - - name: habrahabr engine: xpath paging: true @@ -1869,25 +1846,6 @@ engines: about: website: https://wiby.me/ - - name: alexandria - engine: json_engine - shortcut: alx - categories: general - paging: true - search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno} - results_query: results - title_query: title - url_query: url - content_query: snippet - timeout: 1.5 - disabled: true - about: - website: https://alexandria.org/ - official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md - use_official_api: true - require_api_key: false - results: JSON - - name: wikibooks engine: mediawiki weight: 0.5 diff --git a/searx/templates/simple/base.html b/searx/templates/simple/base.html index 50065a353..efd0ffb2b 100644 --- a/searx/templates/simple/base.html +++ b/searx/templates/simple/base.html @@ -1,7 +1,7 @@ - + @@ -13,12 +13,12 @@ {% block title %}{% endblock %}{{ instance_name }} {% block meta %}{% endblock %} {% if rtl %} - + {% else %} - + {% endif %} {% if get_setting('server.limiter') or get_setting('server.public_instance') %} - + {% endif %} {% block styles %}{% endblock %} @@ -26,11 +26,11 @@ {% block head %} - + {% endblock %} - +
@@ -66,7 +66,7 @@