diff --git a/searx/engines/onesearch.py b/searx/engines/onesearch.py new file mode 100644 index 000000000..813fb488c --- /dev/null +++ b/searx/engines/onesearch.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint + +"""OneSearch (Yahoo & Verizon) + +- https://www.onesearch.com + +OneSearch is literally just Bing results flanked by ads that don’t track you +despite being from a company that makes money tracking you [1]. + +According to the OneSearch privacy policy, search results will only be +personalized based on location, which it will collect from IP addresses. +OneSearch says that it will separate IP addresses from users and their search +results [2]. + +[1] https://lifehacker.com/is-yahoos-new-onesearch-engine-good-for-privacy-1841042875 +[2] https://www.theverge.com/2020/1/14/21065640/verizon-onesearch-privacy-tracking-yahoo-breach-hack +""" + +import re +from urllib.parse import unquote +from lxml.html import fromstring +from searx.utils import ( + eval_xpath, + extract_text, +) + +about = { + "website": 'https://www.onesearch.com', + "wikidata_id": 'Q109682354', + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + +# engine dependent config +categories = ['general'] +paging = True + +URL = 'https://www.onesearch.com/yhs/search;?p=%s&b=%d' + +def request(query, params): + starting_from = (params['pageno'] * 10) - 9 + params['url'] = URL % (query, starting_from) + return params + +def response(resp): + + results = [] + doc = fromstring(resp.text) + + titles_tags = eval_xpath( + doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]') + contents = eval_xpath( + doc, '//div[contains(@class, "algo")]/div[contains(@class, "compText")]/p') + onesearch_urls = eval_xpath( + doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href') + + for title_tag, content, onesearch_url in zip(titles_tags, contents, onesearch_urls): + matches = re.search(r'RU=(.*?)\/', onesearch_url) + results.append({ + 'title': title_tag.text_content(), + 'content': extract_text(content), + 'url': unquote(matches.group(1)), + }) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index c4ea6ebdb..1215e2147 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1698,6 +1698,12 @@ engines: require_api_key: false results: HTML + - name: onesearch + shortcut: one + engine: onesearch + categories: general + disabled: true + # Doku engine lets you access to any Doku wiki instance: # A public one or a privete/corporate one. # - name: ubuntuwiki