mirror of https://github.com/searxng/searxng.git
Merge pull request #165 from return42/patch-google
improve & document google engine
This commit is contained in:
commit
f3e56836d6
|
@ -0,0 +1,55 @@
|
|||
.. _google engines:
|
||||
|
||||
==============
|
||||
Google Engines
|
||||
==============
|
||||
|
||||
.. contents:: Contents
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
|
||||
.. _google API:
|
||||
|
||||
google API
|
||||
==========
|
||||
|
||||
.. _Query Parameter Definitions:
|
||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
||||
|
||||
For detailed description of the *REST-full* API see: `Query Parameter
|
||||
Definitions`_. Not all parameters can be appied and some engines are *special*
|
||||
(e.g. :ref:`google news engine`).
|
||||
|
||||
.. _google web engine:
|
||||
|
||||
Google WEB
|
||||
==========
|
||||
|
||||
.. automodule:: searx.engines.google
|
||||
:members:
|
||||
|
||||
.. _google images engine:
|
||||
|
||||
Google Images
|
||||
=============
|
||||
|
||||
.. automodule:: searx.engines.google_images
|
||||
:members:
|
||||
|
||||
.. _google videos engine:
|
||||
|
||||
Google Videos
|
||||
=============
|
||||
|
||||
.. automodule:: searx.engines.google_videos
|
||||
:members:
|
||||
|
||||
.. _google news engine:
|
||||
|
||||
Google News
|
||||
===========
|
||||
|
||||
.. automodule:: searx.engines.google_news
|
||||
:members:
|
|
@ -1,12 +1,28 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Google (Web)
|
||||
"""This is the implementation of the google WEB engine. Some of this
|
||||
implementations are shared by other engines:
|
||||
|
||||
For detailed description of the *REST-full* API see: `Query Parameter
|
||||
Definitions`_.
|
||||
- :ref:`google images engine`
|
||||
- :ref:`google news engine`
|
||||
- :ref:`google videos engine`
|
||||
|
||||
The google WEB engine itself has a special setup option:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: google
|
||||
...
|
||||
use_mobile_ui: true
|
||||
|
||||
``use_mobile_ui``: (default: ``true``)
|
||||
Enables to use *mobile endpoint* to bypass the google blocking (see
|
||||
:issue:`159`). On the mobile UI of Google Search, the button :guilabel:`More
|
||||
results` is not affected by Google rate limiting and we can still do requests
|
||||
while actively blocked by the original Google search. By activate
|
||||
``use_mobile_ui`` this behavior is simulated by adding the parameter
|
||||
``async=use_ac:true,_fmt:pc`` to the :py:func:`request`.
|
||||
|
||||
.. _Query Parameter Definitions:
|
||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
||||
"""
|
||||
|
||||
# pylint: disable=invalid-name, missing-function-docstring
|
||||
|
@ -137,8 +153,9 @@ spelling_suggestion_xpath = '//div[@class="med"]/p/a'
|
|||
def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
||||
"""Composing various language properties for the google engines.
|
||||
|
||||
This function is called by the various google engines (google itself,
|
||||
google-images, -news, -scholar, -videos).
|
||||
This function is called by the various google engines (:ref:`google web
|
||||
engine`, :ref:`google images engine`, :ref:`google news engine` and
|
||||
:ref:`google videos engine`).
|
||||
|
||||
:param dict param: request parameters of the engine
|
||||
|
||||
|
@ -146,7 +163,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|||
:py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
|
||||
|
||||
:param dict lang_list: custom aliases for non standard language codes
|
||||
(used when calling :py:func:`searx.utils.match_language)
|
||||
(used when calling :py:func:`searx.utils.match_language`)
|
||||
|
||||
:param bool supported_any_language: When a language is not specified, the
|
||||
language interpretation is left up to Google to decide how the search
|
||||
|
@ -159,7 +176,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|||
Py-Dictionary with the key/value pairs:
|
||||
|
||||
language:
|
||||
Return value from :py:func:`searx.utils.match_language
|
||||
Return value from :py:func:`searx.utils.match_language`
|
||||
|
||||
country:
|
||||
The country code (e.g. US, AT, CA, FR, DE ..)
|
||||
|
@ -270,8 +287,7 @@ def request(query, params):
|
|||
additional_parameters = {}
|
||||
if use_mobile_ui:
|
||||
additional_parameters = {
|
||||
'asearch': "arc",
|
||||
'async': 'arc_id:srp_510,ffilt:all,ve_name:MoreResultsContainer,next_id:srp_5,use_ac:true,_id:arc-srp_510,_pms:qs,_fmt:pc' # pylint: disable=line-too-long
|
||||
'async': 'use_ac:true,_fmt:pc',
|
||||
}
|
||||
|
||||
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
||||
|
@ -312,9 +328,10 @@ def response(resp):
|
|||
dom = html.fromstring(resp.text)
|
||||
|
||||
# results --> answer
|
||||
answer = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]//text()')
|
||||
if answer:
|
||||
results.append({'answer': ' '.join(answer)})
|
||||
answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
|
||||
if answer_list:
|
||||
answer_list = [_.xpath("normalize-space()") for _ in answer_list]
|
||||
results.append({'answer': ' '.join(answer_list)})
|
||||
else:
|
||||
logger.debug("did not find 'answer'")
|
||||
|
||||
|
|
|
@ -1,19 +1,14 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Google (Images)
|
||||
"""This is the implementation of the google images engine.
|
||||
|
||||
For detailed description of the *REST-full* API see: `Query Parameter
|
||||
Definitions`_.
|
||||
|
||||
.. _admonition:: Content-Security-Policy (CSP)
|
||||
.. admonition:: Content-Security-Policy (CSP)
|
||||
|
||||
This engine needs to allow images from the `data URLs`_ (prefixed with the
|
||||
``data:` scheme).::
|
||||
``data:`` scheme)::
|
||||
|
||||
Header set Content-Security-Policy "img-src 'self' data: ;"
|
||||
Header set Content-Security-Policy "img-src 'self' data: ;"
|
||||
|
||||
.. _Query Parameter Definitions:
|
||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
||||
.. _data URLs:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
||||
"""
|
||||
|
|
|
@ -1,16 +1,11 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Google (News)
|
||||
|
||||
For detailed description of the *REST-full* API see: `Query Parameter
|
||||
Definitions`_. Not all parameters can be appied:
|
||||
"""This is the implementation of the google news engine. The google news API
|
||||
ignores some parameters from the common :ref:`google API`:
|
||||
|
||||
- num_ : the number of search results is ignored
|
||||
- save_ : is ignored / Google-News results are always *SafeSearch*
|
||||
|
||||
.. _Query Parameter Definitions:
|
||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
||||
|
||||
.. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
|
||||
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
|
||||
|
||||
|
|
|
@ -1,19 +1,14 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Google (Video)
|
||||
"""This is the implementation of the google videos engine.
|
||||
|
||||
For detailed description of the *REST-full* API see: `Query Parameter
|
||||
Definitions`_. Not all parameters can be appied.
|
||||
|
||||
.. _admonition:: Content-Security-Policy (CSP)
|
||||
.. admonition:: Content-Security-Policy (CSP)
|
||||
|
||||
This engine needs to allow images from the `data URLs`_ (prefixed with the
|
||||
``data:` scheme).::
|
||||
``data:`` scheme)::
|
||||
|
||||
Header set Content-Security-Policy "img-src 'self' data: ;"
|
||||
|
||||
.. _Query Parameter Definitions:
|
||||
https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
||||
.. _data URLs:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
||||
|
||||
|
|
|
@ -583,6 +583,7 @@ engines:
|
|||
- name: google
|
||||
engine: google
|
||||
shortcut: go
|
||||
# see https://searxng.github.io/searxng/src/searx.engines.google.html#module-searx.engines.google
|
||||
use_mobile_ui: true
|
||||
# additional_tests:
|
||||
# android: *test_android
|
||||
|
|
Loading…
Reference in New Issue