[mod] engines - add Onesearch engine

Merged from @e-foundation's onesearch-engine branch [1] and slightly modfied / see [2]. [1] https://github.com/e-foundation/searx/tree/onesearch-engine [2] https://github.com/searx/searx/pull/3065 Autor: Israel Yago Pereira <israelyago@e.email> Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-01-01 19:24:07 +01:00 · 2021-10-29 17:02:14 -03:00 · 2021-10-29 17:02:14 -03:00 · c4a8c77026
commit c4a8c77026
parent d0e21a01b4
2 changed files with 73 additions and 0 deletions
--- a/searx/engines/onesearch.py
+++ b/searx/engines/onesearch.py
@ -0,0 +1,67 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """OneSearch (Yahoo & Verizon)
 - https://www.onesearch.com
 OneSearch is literally just Bing results flanked by ads that don’t track you
 despite being from a company that makes money tracking you [1].
 According to the OneSearch privacy policy, search results will only be
 personalized based on location, which it will collect from IP addresses.
 OneSearch says that it will separate IP addresses from users and their search
 results [2].
 [1] https://lifehacker.com/is-yahoos-new-onesearch-engine-good-for-privacy-1841042875
 [2] https://www.theverge.com/2020/1/14/21065640/verizon-onesearch-privacy-tracking-yahoo-breach-hack
 """
 import re
 from urllib.parse import unquote
 from lxml.html import fromstring
 from searx.utils import (
    eval_xpath,
    extract_text,
 )
 about = {
    "website": 'https://www.onesearch.com',
    "wikidata_id": 'Q109682354',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['general']
 paging = True
 URL = 'https://www.onesearch.com/yhs/search;?p=%s&b=%d'
 def request(query, params):
    starting_from = (params['pageno'] * 10) - 9
    params['url'] = URL % (query, starting_from)
    return params
 def response(resp):
    results = []
    doc = fromstring(resp.text)
    titles_tags = eval_xpath(
        doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]')
    contents = eval_xpath(
        doc, '//div[contains(@class, "algo")]/div[contains(@class, "compText")]/p')
    onesearch_urls = eval_xpath(
        doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href')
    for title_tag, content, onesearch_url in zip(titles_tags, contents, onesearch_urls):
        matches = re.search(r'RU=(.*?)\/', onesearch_url)
        results.append({
            'title': title_tag.text_content(),
            'content': extract_text(content),
            'url': unquote(matches.group(1)),
        })
    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -1698,6 +1698,12 @@ engines:
      require_api_key: false
      results: HTML
  - name: onesearch
    shortcut: one
    engine: onesearch
    categories: general
    disabled: true
 # Doku engine lets you access to any Doku wiki instance:
 # A public one or a privete/corporate one.
 #  - name: ubuntuwiki