[mod] engines - add Onesearch engine

Merged from @e-foundation's onesearch-engine branch [1] and slightly modfied / see [2]. [1] https://github.com/e-foundation/searx/tree/onesearch-engine [2] https://github.com/searx/searx/pull/3065 Autor: Israel Yago Pereira <israelyago@e.email> Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-01-01 19:24:07 +01:00 · 2021-10-29 17:02:14 -03:00 · 2021-10-29 17:02:14 -03:00 · c4a8c77026
commit c4a8c77026
parent d0e21a01b4
2 changed files with 73 additions and 0 deletions
--- a/searx/engines/onesearch.py
+++ b/searx/engines/onesearch.py
@ -0,0 +1,67 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+
+"""OneSearch (Yahoo & Verizon)
+
+- https://www.onesearch.com
+
+OneSearch is literally just Bing results flanked by ads that don’t track you
+despite being from a company that makes money tracking you [1].
+
+According to the OneSearch privacy policy, search results will only be
+personalized based on location, which it will collect from IP addresses.
+OneSearch says that it will separate IP addresses from users and their search
+results [2].
+
+[1] https://lifehacker.com/is-yahoos-new-onesearch-engine-good-for-privacy-1841042875
+[2] https://www.theverge.com/2020/1/14/21065640/verizon-onesearch-privacy-tracking-yahoo-breach-hack
+"""
+
+import re
+from urllib.parse import unquote
+from lxml.html import fromstring
+from searx.utils import (
+    eval_xpath,
+    extract_text,
+)
+
+about = {
+    "website": 'https://www.onesearch.com',
+    "wikidata_id": 'Q109682354',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
+# engine dependent config
+categories = ['general']
+paging = True
+
+URL = 'https://www.onesearch.com/yhs/search;?p=%s&b=%d'
+
+def request(query, params):
+    starting_from = (params['pageno'] * 10) - 9
+    params['url'] = URL % (query, starting_from)
+    return params
+
+def response(resp):
+
+    results = []
+    doc = fromstring(resp.text)
+
+    titles_tags = eval_xpath(
+        doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]')
+    contents = eval_xpath(
+        doc, '//div[contains(@class, "algo")]/div[contains(@class, "compText")]/p')
+    onesearch_urls = eval_xpath(
+        doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href')
+
+    for title_tag, content, onesearch_url in zip(titles_tags, contents, onesearch_urls):
+        matches = re.search(r'RU=(.*?)\/', onesearch_url)
+        results.append({
+            'title': title_tag.text_content(),
+            'content': extract_text(content),
+            'url': unquote(matches.group(1)),
+        })
+
+    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -1698,6 +1698,12 @@ engines:
      require_api_key: false
      results: HTML

+  - name: onesearch
+    shortcut: one
+    engine: onesearch
+    categories: general
+    disabled: true
+
 # Doku engine lets you access to any Doku wiki instance:
 # A public one or a privete/corporate one.
 #  - name: ubuntuwiki