mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
feat(engines): Add google_cs
This commit is contained in:
parent
666cd1f635
commit
80d79a85aa
3 changed files with 257 additions and 0 deletions
|
@ -30,6 +30,14 @@ Google WEB
|
|||
.. automodule:: searx.engines.google
|
||||
:members:
|
||||
|
||||
.. _google custom search engine:
|
||||
|
||||
Google Custom Search
|
||||
====================
|
||||
|
||||
.. automodule:: searx.engines.google_cs
|
||||
:members:
|
||||
|
||||
.. _google images engine:
|
||||
|
||||
Google Images
|
||||
|
|
241
searx/engines/google_cs.py
Normal file
241
searx/engines/google_cs.py
Normal file
|
@ -0,0 +1,241 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""
|
||||
Google Custom Search API engine
|
||||
|
||||
This engine use Google's paid search API, which requires an API key and do not subject to CAPTCHA.
|
||||
The search API has 100 queries/day free tier, and an initial cap of 10k search/day which can be raised by submitting
|
||||
a request. The search will use a different algorithm than what Google.com provides.
|
||||
|
||||
Setting up
|
||||
----------
|
||||
|
||||
1. Create a `Google Cloud project <https://console.cloud.google.com/projectcreate>`_
|
||||
2. *(optional)* Attach a billing account to the project to enable search quota above the free tier
|
||||
3. Enable the `Custom Search API <https://console.cloud.google.com/apis/library/customsearch.googleapis.com>`_
|
||||
4. Create an `API key <https://console.cloud.google.com/apis/credentials>`_
|
||||
5. *(optional)* Limit the API key to :guilabel:`Custom Search API` and public IP address of the Searx server
|
||||
6. Create a `custom search engine <https://programmablesearchengine.google.com>`_.
|
||||
|
||||
* Enable :guilabel:`Image search`
|
||||
* Enable :guilabel:`Search the entire web`
|
||||
* Other options are not required, including paid element API key
|
||||
|
||||
7. Add the information to :file:`searx.yml`
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
engines:
|
||||
- name: google custom search
|
||||
engine: google_cs
|
||||
shortcut: gocs
|
||||
api_key: Enter API key from step 4
|
||||
cx: Enter search engine ID from step 6
|
||||
|
||||
8. *(optional)* Protect the engine with :doc:`/admin/engines/private-engines` to prevent costly mistakes
|
||||
|
||||
"""
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.engines.google import get_lang_info
|
||||
from searx.exceptions import SearxEngineAPIException, SearxEngineTooManyRequestsException
|
||||
from searx.network import raise_for_httperror
|
||||
|
||||
about = {
|
||||
"website": 'https://www.google.com',
|
||||
"wikidata_id": 'Q9366',
|
||||
"official_api_documentation": 'https://developers.google.com/custom-search/v1/overview',
|
||||
"use_official_api": True,
|
||||
"require_api_key": True,
|
||||
"results": 'HTML',
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'web', 'images']
|
||||
paging = True
|
||||
time_range_support = True
|
||||
safesearch = True
|
||||
send_accept_language_header = True
|
||||
|
||||
# search-url
|
||||
base_url = "https://customsearch.googleapis.com/customsearch/v1?{query}"
|
||||
api_key = None
|
||||
cx = None
|
||||
number_of_results = 10 # 1 - 10
|
||||
|
||||
MAX_SEARCH_RESULT = 100
|
||||
|
||||
time_range_map = {
|
||||
'day': 'd[1]',
|
||||
'week': 'w[1]',
|
||||
'month': 'm[1]',
|
||||
'year': 'y[1]',
|
||||
}
|
||||
|
||||
# https://developers.google.com/custom-search/docs/json_api_reference#international-values
|
||||
supported_languages = {
|
||||
"af": {"Name": "Afrikaans"},
|
||||
"sq": {"Name": "Albanian"},
|
||||
"sm": {"Name": "Amharic"},
|
||||
"ar": {"Name": "Arabic"},
|
||||
"az": {"Name": "Azerbaijani"},
|
||||
"eu": {"Name": "Basque"},
|
||||
"be": {"Name": "Belarusian"},
|
||||
"bn": {"Name": "Bengali"},
|
||||
"bh": {"Name": "Bihari"},
|
||||
"bs": {"Name": "Bosnian"},
|
||||
"bg": {"Name": "Bulgarian"},
|
||||
"ca": {"Name": "Catalan"},
|
||||
"zh-CN": {"Name": "Chinese (Simplified)"},
|
||||
"zh-TW": {"Name": "Chinese (Traditional)"},
|
||||
"hr": {"Name": "Croatian"},
|
||||
"cs": {"Name": "Czech"},
|
||||
"da": {"Name": "Danish"},
|
||||
"nl": {"Name": "Dutch"},
|
||||
"en": {"Name": "English"},
|
||||
"eo": {"Name": "Esperanto"},
|
||||
"et": {"Name": "Estonian"},
|
||||
"fo": {"Name": "Faroese"},
|
||||
"fi": {"Name": "Finnish"},
|
||||
"fr": {"Name": "French"},
|
||||
"fy": {"Name": "Frisian"},
|
||||
"gl": {"Name": "Galician"},
|
||||
"ka": {"Name": "Georgian"},
|
||||
"de": {"Name": "German"},
|
||||
"el": {"Name": "Greek"},
|
||||
"gu": {"Name": "Gujarati"},
|
||||
"iw": {"Name": "Hebrew"},
|
||||
"hi": {"Name": "Hindi"},
|
||||
"hu": {"Name": "Hungarian"},
|
||||
"is": {"Name": "Icelandic"},
|
||||
"id": {"Name": "Indonesian"},
|
||||
"ia": {"Name": "Interlingua"},
|
||||
"ga": {"Name": "Irish"},
|
||||
"it": {"Name": "Italian"},
|
||||
"ja": {"Name": "Japanese"},
|
||||
"jw": {"Name": "Javanese"},
|
||||
"kn": {"Name": "Kannada"},
|
||||
"ko": {"Name": "Korean"},
|
||||
"la": {"Name": "Latin"},
|
||||
"lv": {"Name": "Latvian"},
|
||||
"lt": {"Name": "Lithuanian"},
|
||||
"mk": {"Name": "Macedonian"},
|
||||
"ms": {"Name": "Malay"},
|
||||
"ml": {"Name": "Malayam"},
|
||||
"mt": {"Name": "Maltese"},
|
||||
"mr": {"Name": "Marathi"},
|
||||
"ne": {"Name": "Nepali"},
|
||||
"no": {"Name": "Norwegian"},
|
||||
"nn": {"Name": "Norwegian (Nynorsk)"},
|
||||
"oc": {"Name": "Occitan"},
|
||||
"fa": {"Name": "Persian"},
|
||||
"pl": {"Name": "Polish"},
|
||||
"pt-BR": {"Name": "Portuguese (Brazil)"},
|
||||
"pt-PT": {"Name": "Portuguese (Portugal)"},
|
||||
"pa": {"Name": "Punjabi"},
|
||||
"ro": {"Name": "Romanian"},
|
||||
"ru": {"Name": "Russian"},
|
||||
"gd": {"Name": "Scots Gaelic"},
|
||||
"sr": {"Name": "Serbian"},
|
||||
"si": {"Name": "Sinhalese"},
|
||||
"sk": {"Name": "Slovak"},
|
||||
"sl": {"Name": "Slovenian"},
|
||||
"es": {"Name": "Spanish"},
|
||||
"su": {"Name": "Sudanese"},
|
||||
"sw": {"Name": "Swahili"},
|
||||
"sv": {"Name": "Swedish"},
|
||||
"tl": {"Name": "Tagalog"},
|
||||
"ta": {"Name": "Tamil"},
|
||||
"te": {"Name": "Telugu"},
|
||||
"th": {"Name": "Thai"},
|
||||
"ti": {"Name": "Tigrinya"},
|
||||
"tr": {"Name": "Turkish"},
|
||||
"uk": {"Name": "Ukrainian"},
|
||||
"ur": {"Name": "Urdu"},
|
||||
"uz": {"Name": "Uzbek"},
|
||||
"vi": {"Name": "Vietnamese"},
|
||||
"cy": {"Name": "Welsh"},
|
||||
"xh": {"Name": "Xhosa"},
|
||||
"zu": {"Name": "Zulu"},
|
||||
}
|
||||
|
||||
|
||||
def request(query, params):
|
||||
start = (params['pageno'] * number_of_results) + 1
|
||||
|
||||
if start > MAX_SEARCH_RESULT:
|
||||
raise PageTooLargeException
|
||||
|
||||
query = {
|
||||
'key': api_key,
|
||||
'cx': cx,
|
||||
'q': query,
|
||||
'safe': 'active' if params['safesearch'] > 0 else 'off',
|
||||
'num': number_of_results,
|
||||
'start': start,
|
||||
}
|
||||
|
||||
if params['category'] == 'images':
|
||||
query['searchType'] = 'image'
|
||||
|
||||
if params.get('time_range', None) in time_range_map:
|
||||
query['dateRestrict'] = time_range_map[params['time_range']]
|
||||
|
||||
lang_info = get_lang_info(params, supported_languages, {}, True)
|
||||
query['gl'] = lang_info['country'].lower()
|
||||
query['hl'] = lang_info['params']['hl']
|
||||
if 'lr' in lang_info['params']:
|
||||
query['lr'] = lang_info['params']['lr']
|
||||
|
||||
params['url'] = base_url.format(query=urlencode(query))
|
||||
params['raise_for_httperror'] = False
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
result = resp.json()
|
||||
|
||||
if resp.status_code == 403:
|
||||
try:
|
||||
if result['errors'][0]['reason'] == 'quotaExceeded':
|
||||
raise SearxEngineTooManyRequestsException(message=result['message'])
|
||||
except (KeyError, IndexError):
|
||||
pass
|
||||
|
||||
raise_for_httperror(resp)
|
||||
|
||||
metadata = [
|
||||
{'number_of_results': min(MAX_SEARCH_RESULT, int(result['searchInformation']['totalResults'], 10))},
|
||||
]
|
||||
search_type = result['queries']['request'][0].get('searchType', '')
|
||||
|
||||
if 'spelling' in result:
|
||||
metadata.append({'correction': result['spelling']['correctedQuery']})
|
||||
|
||||
return metadata + [_convert_result(search, search_type) for search in result.get('items', [])]
|
||||
|
||||
|
||||
def _convert_result(search, search_type=''):
|
||||
"""Convert `result JSON <https://developers.google.com/custom-search/v1/reference/rest/v1/Search#Result>`_
|
||||
to Searx result"""
|
||||
out = {
|
||||
"url": search['link'],
|
||||
"title": search['title'],
|
||||
"content": search.get('snippet', ''),
|
||||
}
|
||||
|
||||
if search_type == 'image' and 'image' in search:
|
||||
out['template'] = 'images.html'
|
||||
out['img_src'] = search['link']
|
||||
out['thumbnail_src'] = search['image']['thumbnailLink']
|
||||
out['img_format'] = f"{search['image']['width']} x {search['image']['height']} {search['fileFormat']}"
|
||||
out['url'] = search['image']['contextLink']
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class PageTooLargeException(SearxEngineAPIException):
|
||||
"""Requested page size is over Google's maximum limit"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__('Page size too large')
|
|
@ -707,6 +707,14 @@ engines:
|
|||
# additional_tests:
|
||||
# android: *test_android
|
||||
|
||||
# - name: google custom search
|
||||
# engine: google_cs
|
||||
# shortcut: gocs
|
||||
# API Key and custom search ID required
|
||||
# see https://docs.searxng.org/src/searx.engines.google.html#google-custom-search
|
||||
# api_key:
|
||||
# cx:
|
||||
|
||||
# - name: google italian
|
||||
# engine: google
|
||||
# shortcut: goit
|
||||
|
|
Loading…
Add table
Reference in a new issue