searxngRebrandZaclys/searx/engines/bing.py

# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This is the implementation of the Bing-WEB engine. Some of this
implementations are shared by other engines:

- :ref:`bing images engine`
- :ref:`bing news engine`
- :ref:`bing videos engine`

On the `preference page`_ Bing offers a lot of languages an regions (see section
'Search results languages' and 'Country/region').  However, the abundant choice
does not correspond to reality, where Bing has a full-text indexer only for a
limited number of languages.  By example: you can select a language like Māori
but you never get a result in this language.

What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
to be completely correct either (if you take a closer look you will find some
inaccuracies there too):

- :py:obj:`searx.engines.bing.bing_traits_url`
- :py:obj:`searx.engines.bing_videos.bing_traits_url`
- :py:obj:`searx.engines.bing_images.bing_traits_url`
- :py:obj:`searx.engines.bing_news.bing_traits_url`

.. _preference page: https://www.bing.com/account/general
.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/

"""
# pylint: disable=too-many-branches, invalid-name

from typing import TYPE_CHECKING
import datetime
import re
import uuid
from urllib.parse import urlencode
from lxml import html
import babel
import babel.languages

from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
from searx.locales import language_tag, region_tag
from searx.enginelib.traits import EngineTraits

if TYPE_CHECKING:
    import logging

    logger: logging.Logger

traits: EngineTraits

about = {
    "website": 'https://www.bing.com',
    "wikidata_id": 'Q182496',
    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
}

send_accept_language_header = True
"""Bing tries to guess user's language and territory from the HTTP
Accept-Language.  Optional the user can select a search-language (can be
different to the UI language) and a region (market code)."""

# engine dependent config
categories = ['general', 'web']
paging = True
time_range_support = True
safesearch = True
safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}  # cookie: ADLT=STRICT

base_url = 'https://www.bing.com/search'
"""Bing (Web) search URL"""

bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
"""Bing (Web) search API description"""


def _get_offset_from_pageno(pageno):
    return (pageno - 1) * 10 + 1


def set_bing_cookies(params, engine_language, engine_region, SID):

    # set cookies
    # -----------

    params['cookies']['_EDGE_V'] = '1'

    # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
    _EDGE_S = [
        'F=1',
        'SID=%s' % SID,
        'mkt=%s' % engine_region.lower(),
        'ui=%s' % engine_language.lower(),
    ]
    params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
    logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])

    # "_EDGE_CD": "m=zh-tw",

    _EDGE_CD = [  # pylint: disable=invalid-name
        'm=%s' % engine_region.lower(),  # search region: zh-cn
        'u=%s' % engine_language.lower(),  # UI: en-us
    ]

    params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
    logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])

    SRCHHPGUSR = [  # pylint: disable=invalid-name
        'SRCHLANG=%s' % engine_language,
        # Trying to set ADLT cookie here seems not to have any effect, I assume
        # there is some age verification by a cookie (and/or session ID) needed,
        # to disable the SafeSearch.
        'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
    ]
    params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
    logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])


def request(query, params):
    """Assemble a Bing-Web request."""

    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
    engine_language = traits.get_language(params['searxng_locale'], 'en')

    SID = uuid.uuid1().hex.upper()
    CVID = uuid.uuid1().hex.upper()

    set_bing_cookies(params, engine_language, engine_region, SID)

    # build URL query
    # ---------------

    # query term
    page = int(params.get('pageno', 1))
    query_params = {
        # fmt: off
        'q': query,
        'pq': query,
        'cvid': CVID,
        'qs': 'n',
        'sp': '-1'
        # fmt: on
    }

    # page
    if page > 1:
        referer = base_url + '?' + urlencode(query_params)
        params['headers']['Referer'] = referer
        logger.debug("headers.Referer --> %s", referer)

    query_params['first'] = _get_offset_from_pageno(page)

    if page == 2:
        query_params['FORM'] = 'PERE'
    elif page > 2:
        query_params['FORM'] = 'PERE%s' % (page - 2)

    filters = ''
    if params['time_range']:
        query_params['filt'] = 'custom'

        if params['time_range'] == 'day':
            filters = 'ex1:"ez1"'
        elif params['time_range'] == 'week':
            filters = 'ex1:"ez2"'
        elif params['time_range'] == 'month':
            filters = 'ex1:"ez3"'
        elif params['time_range'] == 'year':
            epoch_1970 = datetime.date(1970, 1, 1)
            today_no = (datetime.date.today() - epoch_1970).days
            filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)

    params['url'] = base_url + '?' + urlencode(query_params)
    if filters:
        params['url'] = params['url'] + '&filters=' + filters
    return params


def response(resp):
    # pylint: disable=too-many-locals,import-outside-toplevel

    from searx.network import Request, multi_requests  # see https://github.com/searxng/searxng/issues/762

    results = []
    result_len = 0

    dom = html.fromstring(resp.text)

    # parse results again if nothing is found yet

    url_to_resolve = []
    url_to_resolve_index = []
    i = 0
    for result in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):

        link = eval_xpath_getindex(result, './/h2/a', 0, None)
        if link is None:
            continue
        url = link.attrib.get('href')
        title = extract_text(link)

        content = eval_xpath(result, '(.//p)[1]')
        for p in content:
            # Make sure that the element is free of <a href> links
            for e in p.xpath('.//a'):
                e.getparent().remove(e)
        content = extract_text(content)

        # get the real URL either using the URL shown to user or following the Bing URL
        if url.startswith('https://www.bing.com/ck/a?'):
            url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
            # Bing can shorten the URL either at the end or in the middle of the string
            if (
                url_cite
                and url_cite.startswith('https://')
                and '…' not in url_cite
                and '...' not in url_cite
                and '›' not in url_cite
            ):
                # no need for an additional HTTP request
                url = url_cite
            else:
                # resolve the URL with an additional HTTP request
                url_to_resolve.append(url.replace('&ntb=1', '&ntb=F'))
                url_to_resolve_index.append(i)
                url = None  # remove the result if the HTTP Bing redirect raise an exception

        # append result
        results.append({'url': url, 'title': title, 'content': content})
        # increment result pointer for the next iteration in this loop
        i += 1

    # resolve all Bing redirections in parallel
    request_list = [
        Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
    ]
    response_list = multi_requests(request_list)
    for i, redirect_response in enumerate(response_list):
        if not isinstance(redirect_response, Exception):
            results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']

    # get number_of_results
    try:
        result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
        if "-" in result_len_container:

            # Remove the part "from-to" for paginated request ...
            result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :]

        result_len_container = re.sub('[^0-9]', '', result_len_container)

        if len(result_len_container) > 0:
            result_len = int(result_len_container)

    except Exception as e:  # pylint: disable=broad-except
        logger.debug('result error :\n%s', e)

    if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
        return []

    results.append({'number_of_results': result_len})
    return results


def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and regions from Bing-Web."""

    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'

    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)


def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
    # pylint: disable=too-many-locals,import-outside-toplevel

    from searx.network import get  # see https://github.com/searxng/searxng/issues/762

    # insert alias to map from a language (zh) to a language + script (zh_Hans)
    engine_traits.languages['zh'] = 'zh-hans'

    resp = get(url)

    if not resp.ok:  # type: ignore
        print("ERROR: response from peertube is not OK.")

    dom = html.fromstring(resp.text)  # type: ignore

    map_lang = {'jp': 'ja'}
    for td in eval_xpath(dom, xpath_language_codes):
        eng_lang = td.text

        if eng_lang in ('en-gb', 'pt-br'):
            # language 'en' is already in the list and a language 'en-gb' can't
            # be handled in SearXNG, same with pt-br which is covered by pt-pt.
            continue

        babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
        try:
            sxng_tag = language_tag(babel.Locale.parse(babel_lang))
        except babel.UnknownLocaleError:
            print("ERROR: language (%s) is unknown by babel" % (eng_lang))
            continue
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
            continue
        engine_traits.languages[sxng_tag] = eng_lang

    map_region = {
        'en-ID': 'id_ID',
        'no-NO': 'nb_NO',
    }

    for td in eval_xpath(dom, xpath_market_codes):
        eng_region = td.text
        babel_region = map_region.get(eng_region, eng_region).replace('-', '_')

        if eng_region == 'en-WW':
            engine_traits.all_locale = eng_region
            continue

        try:
            sxng_tag = region_tag(babel.Locale.parse(babel_region))
        except babel.UnknownLocaleError:
            print("ERROR: region (%s) is unknown by babel" % (eng_region))
            continue
        conflict = engine_traits.regions.get(sxng_tag)
        if conflict:
            if conflict != eng_region:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
            continue
        engine_traits.regions[sxng_tag] = eng_region
-												[enh] engines: add about variable

move meta information from comment to the about variable
so the preferences, the documentation can show these information

											
										
										
											2021-01-13 10:31:25 +00:00
+								# SPDX-License-Identifier: AGPL-3.0-or-later
-												[pylint] Bing (Web) engine

Fix remarks from pylint and improved code-style.  In preparation for a bug-fix
of the Bing (Web) engine I add this engine to the pylint-list.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 10:40:12 +00:00
+								# lint: pylint
-												[doc] add a description of bing engines (web, news, video, images)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-27 17:21:17 +00:00
+								"""This is the implementation of the Bing-WEB engine. Some of this
 								implementations are shared by other engines:
-												[fix] bing engine: fix paging support, show inital page.

Follow up queries for the pages needed to be fixed.

- Split search-term in one for initial query and one for following queries.
- Set some headers in HTTP requests, bing needs for paging support.
- IMO //div[@class="sa_cc"] does no longer match in a bing response.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 12:41:12 +00:00
-												[doc] add a description of bing engines (web, news, video, images)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-27 17:21:17 +00:00
+								- :ref:`bing images engine`
 								- :ref:`bing news engine`
 								- :ref:`bing videos engine`
 								On the `preference page`_ Bing offers a lot of languages an regions (see section
 								'Search results languages' and 'Country/region').  However, the abundant choice
 								does not correspond to reality, where Bing has a full-text indexer only for a
 								limited number of languages.  By example: you can select a language like Māori
 								but you never get a result in this language.
 								What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
 								to be completely correct either (if you take a closer look you will find some
 								inaccuracies there too):
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
-												[doc] add a description of bing engines (web, news, video, images)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-27 17:21:17 +00:00
+								- :py:obj:`searx.engines.bing.bing_traits_url`
 								- :py:obj:`searx.engines.bing_videos.bing_traits_url`
 								- :py:obj:`searx.engines.bing_images.bing_traits_url`
 								- :py:obj:`searx.engines.bing_news.bing_traits_url`
 								.. _preference page: https://www.bing.com/account/general
 								.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
 								"""
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								# pylint: disable=too-many-branches, invalid-name
 								from typing import TYPE_CHECKING
 								import datetime
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								import re
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								import uuid
 								from urllib.parse import urlencode
-												Improves PEP8 compatibility.

											
										
										
											2014-02-05 19:24:31 +00:00
+								from lxml import html
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								import babel
 								import babel.languages
-												[enh] bing engine added

											
										
										
											2013-10-24 21:52:57 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
 								from searx.locales import language_tag, region_tag
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								from searx.enginelib.traits import EngineTraits
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								if TYPE_CHECKING:
 								    import logging
 								    logger: logging.Logger
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								traits: EngineTraits
-												[enh] engines: add about variable

move meta information from comment to the about variable
so the preferences, the documentation can show these information

											
										
										
											2021-01-13 10:31:25 +00:00
+								about = {
 								    "website": 'https://www.bing.com',
 								    "wikidata_id": 'Q182496',
 								    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
 								    "use_official_api": False,
 								    "require_api_key": False,
 								    "results": 'HTML',
 								}
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								send_accept_language_header = True
 								"""Bing tries to guess user's language and territory from the HTTP
 								Accept-Language.  Optional the user can select a search-language (can be
 								different to the UI language) and a region (market code)."""
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
+								# engine dependent config
-												[enh] add more categories

											
										
										
											2021-12-22 15:58:52 +00:00
+								categories = ['general', 'web']
-												[enh] bing, google paging support

											
										
										
											2014-01-29 20:14:38 +00:00
+								paging = True
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								time_range_support = True
 								safesearch = True
 								safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}  # cookie: ADLT=STRICT
-												[fix] bing engine: fix paging support, show inital page.

Follow up queries for the pages needed to be fixed.

- Split search-term in one for initial query and one for following queries.
- Set some headers in HTTP requests, bing needs for paging support.
- IMO //div[@class="sa_cc"] does no longer match in a bing response.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 12:41:12 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								base_url = 'https://www.bing.com/search'
 								"""Bing (Web) search URL"""
-												[fix] bing engine: fix paging support, show inital page.

Follow up queries for the pages needed to be fixed.

- Split search-term in one for initial query and one for following queries.
- Set some headers in HTTP requests, bing needs for paging support.
- IMO //div[@class="sa_cc"] does no longer match in a bing response.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 12:41:12 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
 								"""Bing (Web) search API description"""
-												[enh] bing engine added

											
										
										
											2013-10-24 21:52:57 +00:00
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 08:26:22 +00:00
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								def _get_offset_from_pageno(pageno):
 								    return (pageno - 1) * 10 + 1
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 08:26:22 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								def set_bing_cookies(params, engine_language, engine_region, SID):
 								    # set cookies
 								    # -----------
 								    params['cookies']['_EDGE_V'] = '1'
 								    # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
 								    _EDGE_S = [
 								        'F=1',
 								        'SID=%s' % SID,
 								        'mkt=%s' % engine_region.lower(),
 								        'ui=%s' % engine_language.lower(),
 								    ]
 								    params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
 								    logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
 								    # "_EDGE_CD": "m=zh-tw",
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    _EDGE_CD = [  # pylint: disable=invalid-name
 								        'm=%s' % engine_region.lower(),  # search region: zh-cn
 								        'u=%s' % engine_language.lower(),  # UI: en-us
 								    ]
 								    params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
 								    logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
-												[fix] bing engine: fix paging support, show inital page.

Follow up queries for the pages needed to be fixed.

- Split search-term in one for initial query and one for following queries.
- Set some headers in HTTP requests, bing needs for paging support.
- IMO //div[@class="sa_cc"] does no longer match in a bing response.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 12:41:12 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    SRCHHPGUSR = [  # pylint: disable=invalid-name
 								        'SRCHLANG=%s' % engine_language,
 								        # Trying to set ADLT cookie here seems not to have any effect, I assume
 								        # there is some age verification by a cookie (and/or session ID) needed,
 								        # to disable the SafeSearch.
 								        'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
 								    ]
 								    params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
 								    logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
-												[fix] bing engine: fix paging support, show inital page.

Follow up queries for the pages needed to be fixed.

- Split search-term in one for initial query and one for following queries.
- Set some headers in HTTP requests, bing needs for paging support.
- IMO //div[@class="sa_cc"] does no longer match in a bing response.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 12:41:12 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								def request(query, params):
 								    """Assemble a Bing-Web request."""
-												[fix] use english as default language in bing

If no language is specified, bing returns results with multiple languages
for one query which isn't really useful. Setting english as default
insted if nothing.

											
										
										
											2016-12-30 17:17:14 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
 								    engine_language = traits.get_language(params['searxng_locale'], 'en')
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    SID = uuid.uuid1().hex.upper()
 								    CVID = uuid.uuid1().hex.upper()
-												fix bing "garbage" results (issue #1275)

											
										
										
											2018-05-20 23:10:22 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    set_bing_cookies(params, engine_language, engine_region, SID)
 								    # build URL query
 								    # ---------------
 								    # query term
 								    page = int(params.get('pageno', 1))
 								    query_params = {
 								        # fmt: off
 								        'q': query,
 								        'pq': query,
 								        'cvid': CVID,
 								        'qs': 'n',
 								        'sp': '-1'
 								        # fmt: on
 								    }
 								    # page
 								    if page > 1:
 								        referer = base_url + '?' + urlencode(query_params)
-												[fix] bing engine: fix paging support, show inital page.

Follow up queries for the pages needed to be fixed.

- Split search-term in one for initial query and one for following queries.
- Set some headers in HTTP requests, bing needs for paging support.
- IMO //div[@class="sa_cc"] does no longer match in a bing response.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 12:41:12 +00:00
+								        params['headers']['Referer'] = referer
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 08:26:22 +00:00
+								        logger.debug("headers.Referer --> %s", referer)
-												[enh] bing engine added

											
										
										
											2013-10-24 21:52:57 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    query_params['first'] = _get_offset_from_pageno(page)
 								    if page == 2:
 								        query_params['FORM'] = 'PERE'
 								    elif page > 2:
 								        query_params['FORM'] = 'PERE%s' % (page - 2)
 								    filters = ''
 								    if params['time_range']:
 								        query_params['filt'] = 'custom'
 								        if params['time_range'] == 'day':
 								            filters = 'ex1:"ez1"'
 								        elif params['time_range'] == 'week':
 								            filters = 'ex1:"ez2"'
 								        elif params['time_range'] == 'month':
 								            filters = 'ex1:"ez3"'
 								        elif params['time_range'] == 'year':
 								            epoch_1970 = datetime.date(1970, 1, 1)
 								            today_no = (datetime.date.today() - epoch_1970).days
 								            filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
 								    params['url'] = base_url + '?' + urlencode(query_params)
 								    if filters:
 								        params['url'] = params['url'] + '&filters=' + filters
-												[fix] bing engine: fix paging support, show inital page.

Follow up queries for the pages needed to be fixed.

- Split search-term in one for initial query and one for following queries.
- Set some headers in HTTP requests, bing needs for paging support.
- IMO //div[@class="sa_cc"] does no longer match in a bing response.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 12:41:12 +00:00
+								    return params
-												[enh] bing engine added

											
										
										
											2013-10-24 21:52:57 +00:00
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 08:26:22 +00:00
-												[enh] bing engine added

											
										
										
											2013-10-24 21:52:57 +00:00
+								def response(resp):
-												[fix] engine & network issues / documentation and type annotations

This patch fixes some quirks and issues related to the engines and the network.
Each engine has its own network and this network was broken for the following
engines[1]:

- archlinux
- bing
- dailymotion
- duckduckgo
- google
- peertube
- startpage
- wikipedia

Since the files have been touched anyway, the type annotaions of the engine
modules has also been completed so that error messages from the type checker are
no longer reported.

Related and (partial) fixed issue:

- [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
- [2] https://github.com/searxng/searxng/issues/2513
- [3] https://github.com/searxng/searxng/issues/2515

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2023-06-25 10:37:31 +00:00
+								    # pylint: disable=too-many-locals,import-outside-toplevel
 								    from searx.network import Request, multi_requests  # see https://github.com/searxng/searxng/issues/762
-												[enh] bing engine added

											
										
										
											2013-10-24 21:52:57 +00:00
+								    results = []
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								    result_len = 0
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
-												[fix] bing unicode encode error - fixes #408

											
										
										
											2015-08-28 12:51:32 +00:00
+								    dom = html.fromstring(resp.text)
-												[pylint] Bing (Web) engine

Fix remarks from pylint and improved code-style.  In preparation for a bug-fix
of the Bing (Web) engine I add this engine to the pylint-list.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 10:40:12 +00:00
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
+								    # parse results again if nothing is found yet
-												bing.py: resolve bing.com/ck/a redirections

add a new function searx.network.multi_requests to send multiple HTTP requests at once

											
										
										
											2022-05-21 16:24:47 +00:00
 								    url_to_resolve = []
 								    url_to_resolve_index = []
-												[fix] bing: parsing result; check to see if the element contains links

This patch is to hardening the parsing of the bing response:

1. To fix [2087] check if the selected result item contains a link, otherwise
   skip result item and continue in the result loop.  Increment the result
   pointer when a result has been added / the enumerate that counts for skipped
   items is no longer valid when result items are skipped.

   To test the bugfix use:   ``!bi :all cerbot``

2. Limit the XPath selection of result items to direct children nodes (list
   items ``li``) of the ordered list (``ol``).

   To test the selector use: ``!bi :en pontiac aztek wiki``

   .. in the result list you should find the wikipedia entry on top,
   compare [2068]

[2087] https://github.com/searxng/searxng/issues/2087
[2068] https://github.com/searxng/searxng/issues/2068

											
										
										
											2023-01-08 18:12:52 +00:00
+								    i = 0
 								    for result in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):
-												[pylint] Bing (Web) engine

Fix remarks from pylint and improved code-style.  In preparation for a bug-fix
of the Bing (Web) engine I add this engine to the pylint-list.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 10:40:12 +00:00
-												[fix] bing: parsing result; check to see if the element contains links

This patch is to hardening the parsing of the bing response:

1. To fix [2087] check if the selected result item contains a link, otherwise
   skip result item and continue in the result loop.  Increment the result
   pointer when a result has been added / the enumerate that counts for skipped
   items is no longer valid when result items are skipped.

   To test the bugfix use:   ``!bi :all cerbot``

2. Limit the XPath selection of result items to direct children nodes (list
   items ``li``) of the ordered list (``ol``).

   To test the selector use: ``!bi :en pontiac aztek wiki``

   .. in the result list you should find the wikipedia entry on top,
   compare [2068]

[2087] https://github.com/searxng/searxng/issues/2087
[2068] https://github.com/searxng/searxng/issues/2068

											
										
										
											2023-01-08 18:12:52 +00:00
+								        link = eval_xpath_getindex(result, './/h2/a', 0, None)
 								        if link is None:
 								            continue
-												[enh] bing updates ++ language support

											
										
										
											2013-10-24 23:37:48 +00:00
+								        url = link.attrib.get('href')
-												Add bing in the test units

											
										
										
											2015-01-25 19:14:37 +00:00
+								        title = extract_text(link)
-												[fix] Bing-Web engine: XPath to get the wikipedia result

Modify the XPath selector to get the wikipedia result plus small fixes.

About result content: especially with the Wikipedia result, we'd get several
paragraph elements, only the first paragraph would be taken and displayed on the
search result

											
										
										
											2023-01-03 21:59:01 +00:00
 								        content = eval_xpath(result, '(.//p)[1]')
 								        for p in content:
-												[fix] Bing-WEB: use <span class='algoSlug_icon'> for the description

On some result items from Bing-WEB the `<span class='algoSlug_icon'>` tag is the
only tag that contains a description.  The issue can be reproduced by [1]::

    !bi vmware

[1] https://github.com/searxng/searxng/issues/1764#issuecomment-1417990531

Reported-by: @AlyoshaVasilieva
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2023-04-08 07:43:04 +00:00
+								            # Make sure that the element is free of <a href> links
-												[fix] Bing-Web engine: XPath to get the wikipedia result

Modify the XPath selector to get the wikipedia result plus small fixes.

About result content: especially with the Wikipedia result, we'd get several
paragraph elements, only the first paragraph would be taken and displayed on the
search result

											
										
										
											2023-01-03 21:59:01 +00:00
+								            for e in p.xpath('.//a'):
 								                e.getparent().remove(e)
 								        content = extract_text(content)
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
-												bing.py: resolve bing.com/ck/a redirections

add a new function searx.network.multi_requests to send multiple HTTP requests at once

											
										
										
											2022-05-21 16:24:47 +00:00
+								        # get the real URL either using the URL shown to user or following the Bing URL
 								        if url.startswith('https://www.bing.com/ck/a?'):
 								            url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
 								            # Bing can shorten the URL either at the end or in the middle of the string
 								            if (
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								                url_cite
 								                and url_cite.startswith('https://')
-												bing.py: resolve bing.com/ck/a redirections

add a new function searx.network.multi_requests to send multiple HTTP requests at once

											
										
										
											2022-05-21 16:24:47 +00:00
+								                and '…' not in url_cite
 								                and '...' not in url_cite
 								                and '›' not in url_cite
 								            ):
 								                # no need for an additional HTTP request
 								                url = url_cite
 								            else:
 								                # resolve the URL with an additional HTTP request
 								                url_to_resolve.append(url.replace('&ntb=1', '&ntb=F'))
 								                url_to_resolve_index.append(i)
 								                url = None  # remove the result if the HTTP Bing redirect raise an exception
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
+								        # append result
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 08:26:22 +00:00
+								        results.append({'url': url, 'title': title, 'content': content})
-												[fix] bing: parsing result; check to see if the element contains links

This patch is to hardening the parsing of the bing response:

1. To fix [2087] check if the selected result item contains a link, otherwise
   skip result item and continue in the result loop.  Increment the result
   pointer when a result has been added / the enumerate that counts for skipped
   items is no longer valid when result items are skipped.

   To test the bugfix use:   ``!bi :all cerbot``

2. Limit the XPath selection of result items to direct children nodes (list
   items ``li``) of the ordered list (``ol``).

   To test the selector use: ``!bi :en pontiac aztek wiki``

   .. in the result list you should find the wikipedia entry on top,
   compare [2068]

[2087] https://github.com/searxng/searxng/issues/2087
[2068] https://github.com/searxng/searxng/issues/2068

											
										
										
											2023-01-08 18:12:52 +00:00
+								        # increment result pointer for the next iteration in this loop
 								        i += 1
-												update bing engines and fix bing_news

											
										
										
											2014-09-01 12:38:59 +00:00
-												bing.py: resolve bing.com/ck/a redirections

add a new function searx.network.multi_requests to send multiple HTTP requests at once

											
										
										
											2022-05-21 16:24:47 +00:00
+								    # resolve all Bing redirections in parallel
 								    request_list = [
-												[fix] engine & network issues / documentation and type annotations

This patch fixes some quirks and issues related to the engines and the network.
Each engine has its own network and this network was broken for the following
engines[1]:

- archlinux
- bing
- dailymotion
- duckduckgo
- google
- peertube
- startpage
- wikipedia

Since the files have been touched anyway, the type annotaions of the engine
modules has also been completed so that error messages from the type checker are
no longer reported.

Related and (partial) fixed issue:

- [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
- [2] https://github.com/searxng/searxng/issues/2513
- [3] https://github.com/searxng/searxng/issues/2515

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2023-06-25 10:37:31 +00:00
+								        Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
-												bing.py: resolve bing.com/ck/a redirections

add a new function searx.network.multi_requests to send multiple HTTP requests at once

											
										
										
											2022-05-21 16:24:47 +00:00
+								    ]
-												[fix] engine & network issues / documentation and type annotations

This patch fixes some quirks and issues related to the engines and the network.
Each engine has its own network and this network was broken for the following
engines[1]:

- archlinux
- bing
- dailymotion
- duckduckgo
- google
- peertube
- startpage
- wikipedia

Since the files have been touched anyway, the type annotaions of the engine
modules has also been completed so that error messages from the type checker are
no longer reported.

Related and (partial) fixed issue:

- [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
- [2] https://github.com/searxng/searxng/issues/2513
- [3] https://github.com/searxng/searxng/issues/2515

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2023-06-25 10:37:31 +00:00
+								    response_list = multi_requests(request_list)
-												bing.py: resolve bing.com/ck/a redirections

add a new function searx.network.multi_requests to send multiple HTTP requests at once

											
										
										
											2022-05-21 16:24:47 +00:00
+								    for i, redirect_response in enumerate(response_list):
 								        if not isinstance(redirect_response, Exception):
 								            results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
 								    # get number_of_results
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								    try:
-												[fix] handle missing result size

											
										
										
											2020-01-02 21:28:47 +00:00
+								        result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								        if "-" in result_len_container:
-												[pylint] Bing (Web) engine

Fix remarks from pylint and improved code-style.  In preparation for a bug-fix
of the Bing (Web) engine I add this engine to the pylint-list.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 10:40:12 +00:00
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								            # Remove the part "from-to" for paginated request ...
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 08:26:22 +00:00
+								            result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :]
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
 								        result_len_container = re.sub('[^0-9]', '', result_len_container)
-												[pylint] Bing (Web) engine

Fix remarks from pylint and improved code-style.  In preparation for a bug-fix
of the Bing (Web) engine I add this engine to the pylint-list.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 10:40:12 +00:00
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								        if len(result_len_container) > 0:
 								            result_len = int(result_len_container)
-												[pylint] Bing (Web) engine

Fix remarks from pylint and improved code-style.  In preparation for a bug-fix
of the Bing (Web) engine I add this engine to the pylint-list.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-18 10:40:12 +00:00
 								    except Exception as e:  # pylint: disable=broad-except
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								        logger.debug('result error :\n%s', e)
-												[fix] handle missing result size

											
										
										
											2020-01-02 21:28:47 +00:00
+								    if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
-												Fix bing engine results count (#1387)

This PR fixes the result count from bing which was throwing an (hidden) error and add a validation to avoid reading more results than avalaible.

For example :
If there is 100 results from some search and we try to get results from 120 to 130, Bing will send back the results from 0 to 10 and no error. If we compare results count with the first parameter of the request we can avoid this "invalid" results.
											
										
										
											2019-08-05 14:15:40 +00:00
+								        return []
 								    results.append({'number_of_results': result_len})
-												[enh] bing engine added

											
										
										
											2013-10-24 21:52:57 +00:00
+								    return results
-												[mod] fetch supported languages for several engines
utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.

											
										
										
											2016-11-06 02:51:38 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								def fetch_traits(engine_traits: EngineTraits):
 								    """Fetch languages and regions from Bing-Web."""
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
 								    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
 								    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
-												[fix] engine & network issues / documentation and type annotations

This patch fixes some quirks and issues related to the engines and the network.
Each engine has its own network and this network was broken for the following
engines[1]:

- archlinux
- bing
- dailymotion
- duckduckgo
- google
- peertube
- startpage
- wikipedia

Since the files have been touched anyway, the type annotaions of the engine
modules has also been completed so that error messages from the type checker are
no longer reported.

Related and (partial) fixed issue:

- [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
- [2] https://github.com/searxng/searxng/issues/2513
- [3] https://github.com/searxng/searxng/issues/2515

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2023-06-25 10:37:31 +00:00
+								    # pylint: disable=too-many-locals,import-outside-toplevel
 								    from searx.network import get  # see https://github.com/searxng/searxng/issues/762
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    # insert alias to map from a language (zh) to a language + script (zh_Hans)
 								    engine_traits.languages['zh'] = 'zh-hans'
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[fix] engine & network issues / documentation and type annotations

This patch fixes some quirks and issues related to the engines and the network.
Each engine has its own network and this network was broken for the following
engines[1]:

- archlinux
- bing
- dailymotion
- duckduckgo
- google
- peertube
- startpage
- wikipedia

Since the files have been touched anyway, the type annotaions of the engine
modules has also been completed so that error messages from the type checker are
no longer reported.

Related and (partial) fixed issue:

- [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
- [2] https://github.com/searxng/searxng/issues/2513
- [3] https://github.com/searxng/searxng/issues/2515

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2023-06-25 10:37:31 +00:00
+								    resp = get(url)
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[fix] engine & network issues / documentation and type annotations

This patch fixes some quirks and issues related to the engines and the network.
Each engine has its own network and this network was broken for the following
engines[1]:

- archlinux
- bing
- dailymotion
- duckduckgo
- google
- peertube
- startpage
- wikipedia

Since the files have been touched anyway, the type annotaions of the engine
modules has also been completed so that error messages from the type checker are
no longer reported.

Related and (partial) fixed issue:

- [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
- [2] https://github.com/searxng/searxng/issues/2513
- [3] https://github.com/searxng/searxng/issues/2515

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2023-06-25 10:37:31 +00:00
+								    if not resp.ok:  # type: ignore
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								        print("ERROR: response from peertube is not OK.")
-												[fix] engine & network issues / documentation and type annotations

This patch fixes some quirks and issues related to the engines and the network.
Each engine has its own network and this network was broken for the following
engines[1]:

- archlinux
- bing
- dailymotion
- duckduckgo
- google
- peertube
- startpage
- wikipedia

Since the files have been touched anyway, the type annotaions of the engine
modules has also been completed so that error messages from the type checker are
no longer reported.

Related and (partial) fixed issue:

- [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
- [2] https://github.com/searxng/searxng/issues/2513
- [3] https://github.com/searxng/searxng/issues/2515

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2023-06-25 10:37:31 +00:00
+								    dom = html.fromstring(resp.text)  # type: ignore
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    map_lang = {'jp': 'ja'}
 								    for td in eval_xpath(dom, xpath_language_codes):
 								        eng_lang = td.text
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								        if eng_lang in ('en-gb', 'pt-br'):
 								            # language 'en' is already in the list and a language 'en-gb' can't
 								            # be handled in SearXNG, same with pt-br which is covered by pt-pt.
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								            continue
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								        babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								        try:
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								            sxng_tag = language_tag(babel.Locale.parse(babel_lang))
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								        except babel.UnknownLocaleError:
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								            print("ERROR: language (%s) is unknown by babel" % (eng_lang))
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								            continue
 								        conflict = engine_traits.languages.get(sxng_tag)
 								        if conflict:
 								            if conflict != eng_lang:
 								                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
 								            continue
 								        engine_traits.languages[sxng_tag] = eng_lang
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    map_region = {
 								        'en-ID': 'id_ID',
 								        'no-NO': 'nb_NO',
 								    }
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								    for td in eval_xpath(dom, xpath_market_codes):
 								        eng_region = td.text
 								        babel_region = map_region.get(eng_region, eng_region).replace('-', '_')
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								        if eng_region == 'en-WW':
 								            engine_traits.all_locale = eng_region
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								            continue
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								        try:
 								            sxng_tag = region_tag(babel.Locale.parse(babel_region))
 								        except babel.UnknownLocaleError:
 								            print("ERROR: region (%s) is unknown by babel" % (eng_region))
-												[mod] bing: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Bing engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-03 08:06:17 +00:00
+								            continue
-												[mod] bing: add time_range support & upgrade to data_type: traits_v1

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-15 19:17:39 +00:00
+								        conflict = engine_traits.regions.get(sxng_tag)
 								        if conflict:
 								            if conflict != eng_region:
 								                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
 								            continue
 								        engine_traits.regions[sxng_tag] = eng_region