[feat] engine: implementation of brave goggles

This commit is contained in:
Hackurei 2023-12-05 17:57:29 -07:00
parent 3829c253ff
commit b230827533
2 changed files with 30 additions and 6 deletions

View File

@ -30,6 +30,13 @@ Configured ``brave`` engines:
...
brave_category: news
- name: brave.goggles
brave_category: goggles
time_range_support: true
paging: true
...
brave_category: goggles
.. _brave regions:
@ -95,7 +102,7 @@ Implementations
"""
from typing import TYPE_CHECKING
from typing import Any, TYPE_CHECKING
from urllib.parse import (
urlencode,
@ -133,12 +140,14 @@ about = {
base_url = "https://search.brave.com/"
categories = []
brave_category = 'search'
"""Brave supports common web-search, video search, image and video search.
Goggles = Any
"""Brave supports common web-search, videos, images, news, and goggles search.
- ``search``: Common WEB search
- ``videos``: search for videos
- ``images``: search for images
- ``news``: search for news
- ``goggles``: Common WEB search with custom rules
"""
brave_spellcheck = False
@ -151,7 +160,7 @@ in SearXNG, the spellchecking is disabled by default.
send_accept_language_header = True
paging = False
"""Brave only supports paging in :py:obj:`brave_category` ``search`` (UI
category All)."""
category All) and in the goggles category."""
max_page = 10
"""Tested 9 pages maximum (``&offset=8``), to be save max is set to 10. Trying
to do more won't return any result and you will most likely be flagged as a bot.
@ -183,12 +192,15 @@ def request(query, params):
if brave_spellcheck:
args['spellcheck'] = '1'
if brave_category == 'search':
if brave_category in ('search', 'goggles'):
if params.get('pageno', 1) - 1:
args['offset'] = params.get('pageno', 1) - 1
if time_range_map.get(params['time_range']):
args['tf'] = time_range_map.get(params['time_range'])
if brave_category == 'goggles':
args['goggles_id'] = Goggles
params["url"] = f"{base_url}{brave_category}?{urlencode(args)}"
# set properties in the cookies
@ -209,7 +221,7 @@ def request(query, params):
def response(resp):
if brave_category == 'search':
if brave_category in ('search', 'goggles'):
return _parse_search(resp)
datastr = ""
@ -252,7 +264,9 @@ def _parse_search(resp):
if url is None or title_tag is None or not urlparse(url).netloc: # partial url likely means it's an ad
continue
content_tag = eval_xpath_getindex(result, './/div[@class="snippet-description"]', 0, default='')
content_tag = eval_xpath_getindex(
result, './/div[@class="snippet-description desktop-default-regular"]', 0, default=''
)
img_src = eval_xpath_getindex(result, './/img[contains(@class, "thumb")]/@src', 0, default='')
item = {

View File

@ -2116,6 +2116,16 @@ engines:
categories: news
brave_category: news
# - name: brave.goggles
# engine: brave
# network: brave
# shortcut: brgog
# time_range_support: true
# paging: true
# categories: [general, web]
# brave_category: goggles
# Goggles: # required! This should be a URL ending in .goggle
- name: lib.rs
shortcut: lrs
engine: xpath