From 079636c0795aafed9306703a4decdc92447ed57f Mon Sep 17 00:00:00 2001 From: jazzzooo <38244149+jazzzooo@users.noreply.github.com> Date: Wed, 20 Sep 2023 08:39:42 -0700 Subject: [PATCH] [fix] engine - bing fix search, pagination, remove safesearch --- searx/engines/bing.py | 114 +++++------------------------------ searx/engines/bing_images.py | 9 +-- searx/engines/bing_news.py | 7 +-- searx/engines/bing_videos.py | 9 +-- 4 files changed, 23 insertions(+), 116 deletions(-) diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 9086623ea..2a56a7fa6 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -30,9 +30,8 @@ inaccuracies there too): from typing import TYPE_CHECKING import base64 -import datetime import re -import uuid +import time from urllib.parse import parse_qs, urlencode, urlparse from lxml import html import babel @@ -58,17 +57,10 @@ about = { "results": 'HTML', } -send_accept_language_header = True -"""Bing tries to guess user's language and territory from the HTTP -Accept-Language. Optional the user can select a search-language (can be -different to the UI language) and a region (market code).""" - # engine dependent config categories = ['general', 'web'] paging = True time_range_support = True -safesearch = True -safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # cookie: ADLT=STRICT base_url = 'https://www.bing.com/search' """Bing (Web) search URL""" @@ -77,105 +69,29 @@ bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-s """Bing (Web) search API description""" -def _get_offset_from_pageno(pageno): - return (pageno - 1) * 10 + 1 +def _page_offset(pageno): + return (int(pageno) - 1) * 10 + 1 -def set_bing_cookies(params, engine_language, engine_region, SID): - - # set cookies - # ----------- - - params['cookies']['_EDGE_V'] = '1' - - # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw - _EDGE_S = [ - 'F=1', - 'SID=%s' % SID, - 'mkt=%s' % engine_region.lower(), - 'ui=%s' % engine_language.lower(), - ] - params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S) - logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S']) - - # "_EDGE_CD": "m=zh-tw", - - _EDGE_CD = [ # pylint: disable=invalid-name - 'm=%s' % engine_region.lower(), # search region: zh-cn - 'u=%s' % engine_language.lower(), # UI: en-us - ] - - params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';' - logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD']) - - SRCHHPGUSR = [ # pylint: disable=invalid-name - 'SRCHLANG=%s' % engine_language, - # Trying to set ADLT cookie here seems not to have any effect, I assume - # there is some age verification by a cookie (and/or session ID) needed, - # to disable the SafeSearch. - 'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'), - ] - params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR) - logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR']) +def set_bing_cookies(params, engine_language, engine_region): + params['cookies']['_EDGE_CD'] = f'm={engine_region.lower()}&u={engine_language.lower()};' def request(query, params): """Assemble a Bing-Web request.""" - engine_region = traits.get_region(params['searxng_locale'], 'en-US') - engine_language = traits.get_language(params['searxng_locale'], 'en') + engine_region = traits.get_region(params['searxng_locale'], 'en-us') + engine_language = traits.get_language(params['searxng_locale'], 'en-us') + set_bing_cookies(params, engine_language, engine_region) - SID = uuid.uuid1().hex.upper() - CVID = uuid.uuid1().hex.upper() + query_params = {'q': query, 'first': _page_offset(params.get('pageno', 1))} + params['url'] = f'{base_url}?{urlencode(query_params)}' - set_bing_cookies(params, engine_language, engine_region, SID) + unix_day = int(time.time() / 86400) + time_ranges = {'day': '1', 'week': '2', 'month': '3', 'year': f'5_{unix_day-365}_{unix_day}'} + if params.get('time_range') in time_ranges: + params['url'] += f'&filters=ex1:"ez{time_ranges[params["time_range"]]}"' - # build URL query - # --------------- - - # query term - page = int(params.get('pageno', 1)) - query_params = { - # fmt: off - 'q': query, - 'pq': query, - 'cvid': CVID, - 'qs': 'n', - 'sp': '-1' - # fmt: on - } - - # page - if page > 1: - referer = base_url + '?' + urlencode(query_params) - params['headers']['Referer'] = referer - logger.debug("headers.Referer --> %s", referer) - - query_params['first'] = _get_offset_from_pageno(page) - - if page == 2: - query_params['FORM'] = 'PERE' - elif page > 2: - query_params['FORM'] = 'PERE%s' % (page - 2) - - filters = '' - if params['time_range']: - query_params['filt'] = 'custom' - - if params['time_range'] == 'day': - filters = 'ex1:"ez1"' - elif params['time_range'] == 'week': - filters = 'ex1:"ez2"' - elif params['time_range'] == 'month': - filters = 'ex1:"ez3"' - elif params['time_range'] == 'year': - epoch_1970 = datetime.date(1970, 1, 1) - today_no = (datetime.date.today() - epoch_1970).days - filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no) - - params['url'] = base_url + '?' + urlencode(query_params) - if filters: - params['url'] = params['url'] + '&filters=' + filters return params @@ -236,7 +152,7 @@ def response(resp): except Exception as e: # pylint: disable=broad-except logger.debug('result error :\n%s', e) - if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len: + if result_len and _page_offset(resp.search_params.get("pageno", 0)) > result_len: # Avoid reading more results than avalaible. # For example, if there is 100 results from some search and we try to get results from 120 to 130, # Bing will send back the results from 0 to 10 and no error. diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index bd3a34aa5..25b4e4f41 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -6,7 +6,6 @@ from typing import TYPE_CHECKING -import uuid import json from urllib.parse import urlencode @@ -17,7 +16,6 @@ from searx.engines.bing import ( set_bing_cookies, _fetch_traits, ) -from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import if TYPE_CHECKING: import logging @@ -61,11 +59,10 @@ time_map = { def request(query, params): """Assemble a Bing-Image request.""" - engine_region = traits.get_region(params['searxng_locale'], 'en-US') - engine_language = traits.get_language(params['searxng_locale'], 'en') + engine_region = traits.get_region(params['searxng_locale'], 'en-us') + engine_language = traits.get_language(params['searxng_locale'], 'en-us') - SID = uuid.uuid1().hex.upper() - set_bing_cookies(params, engine_language, engine_region, SID) + set_bing_cookies(params, engine_language, engine_region) # build URL query # - example: https://www.bing.com/images/async?q=foo&first=155&count=35 diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 18992e2d1..81c8df0f3 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -6,7 +6,6 @@ # pylint: disable=invalid-name from typing import TYPE_CHECKING -import uuid from urllib.parse import urlencode from lxml import html @@ -16,7 +15,6 @@ from searx.engines.bing import ( set_bing_cookies, _fetch_traits, ) -from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import if TYPE_CHECKING: import logging @@ -70,10 +68,9 @@ def request(query, params): sxng_locale = params['searxng_locale'] engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale) - engine_language = traits.get_language(sxng_locale, 'en') + engine_language = traits.get_language(sxng_locale, 'en-us') - SID = uuid.uuid1().hex.upper() - set_bing_cookies(params, engine_language, engine_region, SID) + set_bing_cookies(params, engine_language, engine_region) # build URL query # diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 8ee0bb66e..d4cb6058b 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -5,7 +5,6 @@ # pylint: disable=invalid-name from typing import TYPE_CHECKING -import uuid import json from urllib.parse import urlencode @@ -16,7 +15,6 @@ from searx.engines.bing import ( set_bing_cookies, _fetch_traits, ) -from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import if TYPE_CHECKING: import logging @@ -60,11 +58,10 @@ time_map = { def request(query, params): """Assemble a Bing-Video request.""" - engine_region = traits.get_region(params['searxng_locale'], 'en-US') - engine_language = traits.get_language(params['searxng_locale'], 'en') + engine_region = traits.get_region(params['searxng_locale'], 'en-us') + engine_language = traits.get_language(params['searxng_locale'], 'en-us') - SID = uuid.uuid1().hex.upper() - set_bing_cookies(params, engine_language, engine_region, SID) + set_bing_cookies(params, engine_language, engine_region) # build URL query #