forked from zaclys/searxng
Merge pull request #165 from return42/patch-google
improve & document google engine
This commit is contained in:
commit
f3e56836d6
|
@ -0,0 +1,55 @@
|
||||||
|
.. _google engines:
|
||||||
|
|
||||||
|
==============
|
||||||
|
Google Engines
|
||||||
|
==============
|
||||||
|
|
||||||
|
.. contents:: Contents
|
||||||
|
:depth: 2
|
||||||
|
:local:
|
||||||
|
:backlinks: entry
|
||||||
|
|
||||||
|
|
||||||
|
.. _google API:
|
||||||
|
|
||||||
|
google API
|
||||||
|
==========
|
||||||
|
|
||||||
|
.. _Query Parameter Definitions:
|
||||||
|
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
||||||
|
|
||||||
|
For detailed description of the *REST-full* API see: `Query Parameter
|
||||||
|
Definitions`_. Not all parameters can be appied and some engines are *special*
|
||||||
|
(e.g. :ref:`google news engine`).
|
||||||
|
|
||||||
|
.. _google web engine:
|
||||||
|
|
||||||
|
Google WEB
|
||||||
|
==========
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.google
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. _google images engine:
|
||||||
|
|
||||||
|
Google Images
|
||||||
|
=============
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.google_images
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. _google videos engine:
|
||||||
|
|
||||||
|
Google Videos
|
||||||
|
=============
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.google_videos
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. _google news engine:
|
||||||
|
|
||||||
|
Google News
|
||||||
|
===========
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.google_news
|
||||||
|
:members:
|
|
@ -1,12 +1,28 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# lint: pylint
|
# lint: pylint
|
||||||
"""Google (Web)
|
"""This is the implementation of the google WEB engine. Some of this
|
||||||
|
implementations are shared by other engines:
|
||||||
|
|
||||||
For detailed description of the *REST-full* API see: `Query Parameter
|
- :ref:`google images engine`
|
||||||
Definitions`_.
|
- :ref:`google news engine`
|
||||||
|
- :ref:`google videos engine`
|
||||||
|
|
||||||
|
The google WEB engine itself has a special setup option:
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: google
|
||||||
|
...
|
||||||
|
use_mobile_ui: true
|
||||||
|
|
||||||
|
``use_mobile_ui``: (default: ``true``)
|
||||||
|
Enables to use *mobile endpoint* to bypass the google blocking (see
|
||||||
|
:issue:`159`). On the mobile UI of Google Search, the button :guilabel:`More
|
||||||
|
results` is not affected by Google rate limiting and we can still do requests
|
||||||
|
while actively blocked by the original Google search. By activate
|
||||||
|
``use_mobile_ui`` this behavior is simulated by adding the parameter
|
||||||
|
``async=use_ac:true,_fmt:pc`` to the :py:func:`request`.
|
||||||
|
|
||||||
.. _Query Parameter Definitions:
|
|
||||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# pylint: disable=invalid-name, missing-function-docstring
|
# pylint: disable=invalid-name, missing-function-docstring
|
||||||
|
@ -137,8 +153,9 @@ spelling_suggestion_xpath = '//div[@class="med"]/p/a'
|
||||||
def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||||
"""Composing various language properties for the google engines.
|
"""Composing various language properties for the google engines.
|
||||||
|
|
||||||
This function is called by the various google engines (google itself,
|
This function is called by the various google engines (:ref:`google web
|
||||||
google-images, -news, -scholar, -videos).
|
engine`, :ref:`google images engine`, :ref:`google news engine` and
|
||||||
|
:ref:`google videos engine`).
|
||||||
|
|
||||||
:param dict param: request parameters of the engine
|
:param dict param: request parameters of the engine
|
||||||
|
|
||||||
|
@ -146,7 +163,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||||
:py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
|
:py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
|
||||||
|
|
||||||
:param dict lang_list: custom aliases for non standard language codes
|
:param dict lang_list: custom aliases for non standard language codes
|
||||||
(used when calling :py:func:`searx.utils.match_language)
|
(used when calling :py:func:`searx.utils.match_language`)
|
||||||
|
|
||||||
:param bool supported_any_language: When a language is not specified, the
|
:param bool supported_any_language: When a language is not specified, the
|
||||||
language interpretation is left up to Google to decide how the search
|
language interpretation is left up to Google to decide how the search
|
||||||
|
@ -159,7 +176,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||||
Py-Dictionary with the key/value pairs:
|
Py-Dictionary with the key/value pairs:
|
||||||
|
|
||||||
language:
|
language:
|
||||||
Return value from :py:func:`searx.utils.match_language
|
Return value from :py:func:`searx.utils.match_language`
|
||||||
|
|
||||||
country:
|
country:
|
||||||
The country code (e.g. US, AT, CA, FR, DE ..)
|
The country code (e.g. US, AT, CA, FR, DE ..)
|
||||||
|
@ -270,8 +287,7 @@ def request(query, params):
|
||||||
additional_parameters = {}
|
additional_parameters = {}
|
||||||
if use_mobile_ui:
|
if use_mobile_ui:
|
||||||
additional_parameters = {
|
additional_parameters = {
|
||||||
'asearch': "arc",
|
'async': 'use_ac:true,_fmt:pc',
|
||||||
'async': 'arc_id:srp_510,ffilt:all,ve_name:MoreResultsContainer,next_id:srp_5,use_ac:true,_id:arc-srp_510,_pms:qs,_fmt:pc' # pylint: disable=line-too-long
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
||||||
|
@ -312,9 +328,10 @@ def response(resp):
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
# results --> answer
|
# results --> answer
|
||||||
answer = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]//text()')
|
answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
|
||||||
if answer:
|
if answer_list:
|
||||||
results.append({'answer': ' '.join(answer)})
|
answer_list = [_.xpath("normalize-space()") for _ in answer_list]
|
||||||
|
results.append({'answer': ' '.join(answer_list)})
|
||||||
else:
|
else:
|
||||||
logger.debug("did not find 'answer'")
|
logger.debug("did not find 'answer'")
|
||||||
|
|
||||||
|
|
|
@ -1,19 +1,14 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# lint: pylint
|
# lint: pylint
|
||||||
"""Google (Images)
|
"""This is the implementation of the google images engine.
|
||||||
|
|
||||||
For detailed description of the *REST-full* API see: `Query Parameter
|
.. admonition:: Content-Security-Policy (CSP)
|
||||||
Definitions`_.
|
|
||||||
|
|
||||||
.. _admonition:: Content-Security-Policy (CSP)
|
|
||||||
|
|
||||||
This engine needs to allow images from the `data URLs`_ (prefixed with the
|
This engine needs to allow images from the `data URLs`_ (prefixed with the
|
||||||
``data:` scheme).::
|
``data:`` scheme)::
|
||||||
|
|
||||||
Header set Content-Security-Policy "img-src 'self' data: ;"
|
Header set Content-Security-Policy "img-src 'self' data: ;"
|
||||||
|
|
||||||
.. _Query Parameter Definitions:
|
|
||||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
|
||||||
.. _data URLs:
|
.. _data URLs:
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,16 +1,11 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# lint: pylint
|
# lint: pylint
|
||||||
"""Google (News)
|
"""This is the implementation of the google news engine. The google news API
|
||||||
|
ignores some parameters from the common :ref:`google API`:
|
||||||
For detailed description of the *REST-full* API see: `Query Parameter
|
|
||||||
Definitions`_. Not all parameters can be appied:
|
|
||||||
|
|
||||||
- num_ : the number of search results is ignored
|
- num_ : the number of search results is ignored
|
||||||
- save_ : is ignored / Google-News results are always *SafeSearch*
|
- save_ : is ignored / Google-News results are always *SafeSearch*
|
||||||
|
|
||||||
.. _Query Parameter Definitions:
|
|
||||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
|
||||||
|
|
||||||
.. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
|
.. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
|
||||||
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
|
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
|
||||||
|
|
||||||
|
|
|
@ -1,19 +1,14 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# lint: pylint
|
# lint: pylint
|
||||||
"""Google (Video)
|
"""This is the implementation of the google videos engine.
|
||||||
|
|
||||||
For detailed description of the *REST-full* API see: `Query Parameter
|
.. admonition:: Content-Security-Policy (CSP)
|
||||||
Definitions`_. Not all parameters can be appied.
|
|
||||||
|
|
||||||
.. _admonition:: Content-Security-Policy (CSP)
|
|
||||||
|
|
||||||
This engine needs to allow images from the `data URLs`_ (prefixed with the
|
This engine needs to allow images from the `data URLs`_ (prefixed with the
|
||||||
``data:` scheme).::
|
``data:`` scheme)::
|
||||||
|
|
||||||
Header set Content-Security-Policy "img-src 'self' data: ;"
|
Header set Content-Security-Policy "img-src 'self' data: ;"
|
||||||
|
|
||||||
.. _Query Parameter Definitions:
|
|
||||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
|
||||||
.. _data URLs:
|
.. _data URLs:
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
||||||
|
|
||||||
|
|
|
@ -583,6 +583,7 @@ engines:
|
||||||
- name: google
|
- name: google
|
||||||
engine: google
|
engine: google
|
||||||
shortcut: go
|
shortcut: go
|
||||||
|
# see https://searxng.github.io/searxng/src/searx.engines.google.html#module-searx.engines.google
|
||||||
use_mobile_ui: true
|
use_mobile_ui: true
|
||||||
# additional_tests:
|
# additional_tests:
|
||||||
# android: *test_android
|
# android: *test_android
|
||||||
|
|
Loading…
Reference in New Issue