[feat] engine: implementation of brave goggles

This commit is contained in:
Hackurei 2023-12-05 17:57:29 -07:00
parent 3829c253ff
commit b230827533
2 changed files with 30 additions and 6 deletions

View File

@ -30,6 +30,13 @@ Configured ``brave`` engines:
... ...
brave_category: news brave_category: news
- name: brave.goggles
brave_category: goggles
time_range_support: true
paging: true
...
brave_category: goggles
.. _brave regions: .. _brave regions:
@ -95,7 +102,7 @@ Implementations
""" """
from typing import TYPE_CHECKING from typing import Any, TYPE_CHECKING
from urllib.parse import ( from urllib.parse import (
urlencode, urlencode,
@ -133,12 +140,14 @@ about = {
base_url = "https://search.brave.com/" base_url = "https://search.brave.com/"
categories = [] categories = []
brave_category = 'search' brave_category = 'search'
"""Brave supports common web-search, video search, image and video search. Goggles = Any
"""Brave supports common web-search, videos, images, news, and goggles search.
- ``search``: Common WEB search - ``search``: Common WEB search
- ``videos``: search for videos - ``videos``: search for videos
- ``images``: search for images - ``images``: search for images
- ``news``: search for news - ``news``: search for news
- ``goggles``: Common WEB search with custom rules
""" """
brave_spellcheck = False brave_spellcheck = False
@ -151,7 +160,7 @@ in SearXNG, the spellchecking is disabled by default.
send_accept_language_header = True send_accept_language_header = True
paging = False paging = False
"""Brave only supports paging in :py:obj:`brave_category` ``search`` (UI """Brave only supports paging in :py:obj:`brave_category` ``search`` (UI
category All).""" category All) and in the goggles category."""
max_page = 10 max_page = 10
"""Tested 9 pages maximum (``&offset=8``), to be save max is set to 10. Trying """Tested 9 pages maximum (``&offset=8``), to be save max is set to 10. Trying
to do more won't return any result and you will most likely be flagged as a bot. to do more won't return any result and you will most likely be flagged as a bot.
@ -183,12 +192,15 @@ def request(query, params):
if brave_spellcheck: if brave_spellcheck:
args['spellcheck'] = '1' args['spellcheck'] = '1'
if brave_category == 'search': if brave_category in ('search', 'goggles'):
if params.get('pageno', 1) - 1: if params.get('pageno', 1) - 1:
args['offset'] = params.get('pageno', 1) - 1 args['offset'] = params.get('pageno', 1) - 1
if time_range_map.get(params['time_range']): if time_range_map.get(params['time_range']):
args['tf'] = time_range_map.get(params['time_range']) args['tf'] = time_range_map.get(params['time_range'])
if brave_category == 'goggles':
args['goggles_id'] = Goggles
params["url"] = f"{base_url}{brave_category}?{urlencode(args)}" params["url"] = f"{base_url}{brave_category}?{urlencode(args)}"
# set properties in the cookies # set properties in the cookies
@ -209,7 +221,7 @@ def request(query, params):
def response(resp): def response(resp):
if brave_category == 'search': if brave_category in ('search', 'goggles'):
return _parse_search(resp) return _parse_search(resp)
datastr = "" datastr = ""
@ -252,7 +264,9 @@ def _parse_search(resp):
if url is None or title_tag is None or not urlparse(url).netloc: # partial url likely means it's an ad if url is None or title_tag is None or not urlparse(url).netloc: # partial url likely means it's an ad
continue continue
content_tag = eval_xpath_getindex(result, './/div[@class="snippet-description"]', 0, default='') content_tag = eval_xpath_getindex(
result, './/div[@class="snippet-description desktop-default-regular"]', 0, default=''
)
img_src = eval_xpath_getindex(result, './/img[contains(@class, "thumb")]/@src', 0, default='') img_src = eval_xpath_getindex(result, './/img[contains(@class, "thumb")]/@src', 0, default='')
item = { item = {

View File

@ -2116,6 +2116,16 @@ engines:
categories: news categories: news
brave_category: news brave_category: news
# - name: brave.goggles
# engine: brave
# network: brave
# shortcut: brgog
# time_range_support: true
# paging: true
# categories: [general, web]
# brave_category: goggles
# Goggles: # required! This should be a URL ending in .goggle
- name: lib.rs - name: lib.rs
shortcut: lrs shortcut: lrs
engine: xpath engine: xpath