From d6dfada1a9c77cae13596c441f7d1cb1c2b30899 Mon Sep 17 00:00:00 2001 From: ta Date: Sat, 13 Aug 2022 06:04:31 +0700 Subject: [PATCH 1/2] add neeva web engine Neeva is "the world's first ad-free, private search engine" and uses data from Apple, Bing, Yelp and "others". They claim to crawl "hundreds of millions" of URLs a day (https://twitter.com/Neeva/status/1536447373903335426). --- searx/settings.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index ec03819a5..52b75082c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -960,6 +960,30 @@ engines: # collection: 'reviews' # name of the db collection # key: 'name' # key in the collection to search for + - name: neeva + engine: xpath + shortcut: nv + search_url: https://neeva.com/search?q={query}&c=All&src=Pagination&page={pageno}{time_range} + results_xpath: //div[@class="web-index__component-2rKiM"] | //li[@class="web-rich-deep-links__deepLink-SIbD4"] + url_xpath: .//a[@class="lib-doc-title__link-1b9rC"]/@href | ./h2/a/@href + title_xpath: .//a[@class="lib-doc-title__link-1b9rC"] | ./h2/a + content_xpath: > + .//div[@class="lib-doc-snippet__component-3ewW6"]/text() | + .//div[@class="lib-doc-snippet__component-3ewW6"]/*[not(self::a)] | + ./p + content_html_to_text: true + suggestion_xpath: //span[@class="result-related-searches__link-2ho_u"] + paging: true + disabled: true + categories: [general, web] + timeout: 5.0 + about: + website: https://neeva.com + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + - name: npm engine: json_engine paging: true From 8fa84ee9f4b2946cc55b8934832923b344d70180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9on=20Tiek=C3=B6tter?= Date: Sat, 13 Aug 2022 01:59:58 +0200 Subject: [PATCH 2/2] Fix time range support and add soft_max_redirects Add custom time_range_url and time_range_map Set soft_max_redirects = 2 to prevent "ErrorContext('searx/search/processors/online.py', 116, 'count_error(', None, '2 redirects, maximum: 0', ('200', 'OK', 'neeva.com')) True" --- searx/settings.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index 52b75082c..6cae30c48 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -963,6 +963,13 @@ engines: - name: neeva engine: xpath shortcut: nv + time_range_support: true + time_range_url: '&alf%5Bfreshness%5D={time_range_val}' + time_range_map: + day: 'Day' + week: 'Week' + month: 'Month' + year: 'Year' search_url: https://neeva.com/search?q={query}&c=All&src=Pagination&page={pageno}{time_range} results_xpath: //div[@class="web-index__component-2rKiM"] | //li[@class="web-rich-deep-links__deepLink-SIbD4"] url_xpath: .//a[@class="lib-doc-title__link-1b9rC"]/@href | ./h2/a/@href @@ -977,6 +984,7 @@ engines: disabled: true categories: [general, web] timeout: 5.0 + soft_max_redirects: 2 about: website: https://neeva.com official_api_documentation: