mirror of https://github.com/searxng/searxng.git
[fix] engine - bing fix search, pagination, remove safesearch
This commit is contained in:
parent
5ce1792432
commit
079636c079
|
@ -30,9 +30,8 @@ inaccuracies there too):
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
import base64
|
import base64
|
||||||
import datetime
|
|
||||||
import re
|
import re
|
||||||
import uuid
|
import time
|
||||||
from urllib.parse import parse_qs, urlencode, urlparse
|
from urllib.parse import parse_qs, urlencode, urlparse
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import babel
|
import babel
|
||||||
|
@ -58,17 +57,10 @@ about = {
|
||||||
"results": 'HTML',
|
"results": 'HTML',
|
||||||
}
|
}
|
||||||
|
|
||||||
send_accept_language_header = True
|
|
||||||
"""Bing tries to guess user's language and territory from the HTTP
|
|
||||||
Accept-Language. Optional the user can select a search-language (can be
|
|
||||||
different to the UI language) and a region (market code)."""
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general', 'web']
|
categories = ['general', 'web']
|
||||||
paging = True
|
paging = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
safesearch = True
|
|
||||||
safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # cookie: ADLT=STRICT
|
|
||||||
|
|
||||||
base_url = 'https://www.bing.com/search'
|
base_url = 'https://www.bing.com/search'
|
||||||
"""Bing (Web) search URL"""
|
"""Bing (Web) search URL"""
|
||||||
|
@ -77,105 +69,29 @@ bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-s
|
||||||
"""Bing (Web) search API description"""
|
"""Bing (Web) search API description"""
|
||||||
|
|
||||||
|
|
||||||
def _get_offset_from_pageno(pageno):
|
def _page_offset(pageno):
|
||||||
return (pageno - 1) * 10 + 1
|
return (int(pageno) - 1) * 10 + 1
|
||||||
|
|
||||||
|
|
||||||
def set_bing_cookies(params, engine_language, engine_region, SID):
|
def set_bing_cookies(params, engine_language, engine_region):
|
||||||
|
params['cookies']['_EDGE_CD'] = f'm={engine_region.lower()}&u={engine_language.lower()};'
|
||||||
# set cookies
|
|
||||||
# -----------
|
|
||||||
|
|
||||||
params['cookies']['_EDGE_V'] = '1'
|
|
||||||
|
|
||||||
# _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
|
|
||||||
_EDGE_S = [
|
|
||||||
'F=1',
|
|
||||||
'SID=%s' % SID,
|
|
||||||
'mkt=%s' % engine_region.lower(),
|
|
||||||
'ui=%s' % engine_language.lower(),
|
|
||||||
]
|
|
||||||
params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
|
|
||||||
logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
|
|
||||||
|
|
||||||
# "_EDGE_CD": "m=zh-tw",
|
|
||||||
|
|
||||||
_EDGE_CD = [ # pylint: disable=invalid-name
|
|
||||||
'm=%s' % engine_region.lower(), # search region: zh-cn
|
|
||||||
'u=%s' % engine_language.lower(), # UI: en-us
|
|
||||||
]
|
|
||||||
|
|
||||||
params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
|
|
||||||
logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
|
|
||||||
|
|
||||||
SRCHHPGUSR = [ # pylint: disable=invalid-name
|
|
||||||
'SRCHLANG=%s' % engine_language,
|
|
||||||
# Trying to set ADLT cookie here seems not to have any effect, I assume
|
|
||||||
# there is some age verification by a cookie (and/or session ID) needed,
|
|
||||||
# to disable the SafeSearch.
|
|
||||||
'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
|
|
||||||
]
|
|
||||||
params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
|
|
||||||
logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Assemble a Bing-Web request."""
|
"""Assemble a Bing-Web request."""
|
||||||
|
|
||||||
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
|
engine_region = traits.get_region(params['searxng_locale'], 'en-us')
|
||||||
engine_language = traits.get_language(params['searxng_locale'], 'en')
|
engine_language = traits.get_language(params['searxng_locale'], 'en-us')
|
||||||
|
set_bing_cookies(params, engine_language, engine_region)
|
||||||
|
|
||||||
SID = uuid.uuid1().hex.upper()
|
query_params = {'q': query, 'first': _page_offset(params.get('pageno', 1))}
|
||||||
CVID = uuid.uuid1().hex.upper()
|
params['url'] = f'{base_url}?{urlencode(query_params)}'
|
||||||
|
|
||||||
set_bing_cookies(params, engine_language, engine_region, SID)
|
unix_day = int(time.time() / 86400)
|
||||||
|
time_ranges = {'day': '1', 'week': '2', 'month': '3', 'year': f'5_{unix_day-365}_{unix_day}'}
|
||||||
|
if params.get('time_range') in time_ranges:
|
||||||
|
params['url'] += f'&filters=ex1:"ez{time_ranges[params["time_range"]]}"'
|
||||||
|
|
||||||
# build URL query
|
|
||||||
# ---------------
|
|
||||||
|
|
||||||
# query term
|
|
||||||
page = int(params.get('pageno', 1))
|
|
||||||
query_params = {
|
|
||||||
# fmt: off
|
|
||||||
'q': query,
|
|
||||||
'pq': query,
|
|
||||||
'cvid': CVID,
|
|
||||||
'qs': 'n',
|
|
||||||
'sp': '-1'
|
|
||||||
# fmt: on
|
|
||||||
}
|
|
||||||
|
|
||||||
# page
|
|
||||||
if page > 1:
|
|
||||||
referer = base_url + '?' + urlencode(query_params)
|
|
||||||
params['headers']['Referer'] = referer
|
|
||||||
logger.debug("headers.Referer --> %s", referer)
|
|
||||||
|
|
||||||
query_params['first'] = _get_offset_from_pageno(page)
|
|
||||||
|
|
||||||
if page == 2:
|
|
||||||
query_params['FORM'] = 'PERE'
|
|
||||||
elif page > 2:
|
|
||||||
query_params['FORM'] = 'PERE%s' % (page - 2)
|
|
||||||
|
|
||||||
filters = ''
|
|
||||||
if params['time_range']:
|
|
||||||
query_params['filt'] = 'custom'
|
|
||||||
|
|
||||||
if params['time_range'] == 'day':
|
|
||||||
filters = 'ex1:"ez1"'
|
|
||||||
elif params['time_range'] == 'week':
|
|
||||||
filters = 'ex1:"ez2"'
|
|
||||||
elif params['time_range'] == 'month':
|
|
||||||
filters = 'ex1:"ez3"'
|
|
||||||
elif params['time_range'] == 'year':
|
|
||||||
epoch_1970 = datetime.date(1970, 1, 1)
|
|
||||||
today_no = (datetime.date.today() - epoch_1970).days
|
|
||||||
filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
|
|
||||||
|
|
||||||
params['url'] = base_url + '?' + urlencode(query_params)
|
|
||||||
if filters:
|
|
||||||
params['url'] = params['url'] + '&filters=' + filters
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -236,7 +152,7 @@ def response(resp):
|
||||||
except Exception as e: # pylint: disable=broad-except
|
except Exception as e: # pylint: disable=broad-except
|
||||||
logger.debug('result error :\n%s', e)
|
logger.debug('result error :\n%s', e)
|
||||||
|
|
||||||
if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
|
if result_len and _page_offset(resp.search_params.get("pageno", 0)) > result_len:
|
||||||
# Avoid reading more results than avalaible.
|
# Avoid reading more results than avalaible.
|
||||||
# For example, if there is 100 results from some search and we try to get results from 120 to 130,
|
# For example, if there is 100 results from some search and we try to get results from 120 to 130,
|
||||||
# Bing will send back the results from 0 to 10 and no error.
|
# Bing will send back the results from 0 to 10 and no error.
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
import uuid
|
|
||||||
import json
|
import json
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
@ -17,7 +16,6 @@ from searx.engines.bing import (
|
||||||
set_bing_cookies,
|
set_bing_cookies,
|
||||||
_fetch_traits,
|
_fetch_traits,
|
||||||
)
|
)
|
||||||
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import logging
|
import logging
|
||||||
|
@ -61,11 +59,10 @@ time_map = {
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Assemble a Bing-Image request."""
|
"""Assemble a Bing-Image request."""
|
||||||
|
|
||||||
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
|
engine_region = traits.get_region(params['searxng_locale'], 'en-us')
|
||||||
engine_language = traits.get_language(params['searxng_locale'], 'en')
|
engine_language = traits.get_language(params['searxng_locale'], 'en-us')
|
||||||
|
|
||||||
SID = uuid.uuid1().hex.upper()
|
set_bing_cookies(params, engine_language, engine_region)
|
||||||
set_bing_cookies(params, engine_language, engine_region, SID)
|
|
||||||
|
|
||||||
# build URL query
|
# build URL query
|
||||||
# - example: https://www.bing.com/images/async?q=foo&first=155&count=35
|
# - example: https://www.bing.com/images/async?q=foo&first=155&count=35
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
# pylint: disable=invalid-name
|
# pylint: disable=invalid-name
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
import uuid
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
@ -16,7 +15,6 @@ from searx.engines.bing import (
|
||||||
set_bing_cookies,
|
set_bing_cookies,
|
||||||
_fetch_traits,
|
_fetch_traits,
|
||||||
)
|
)
|
||||||
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import logging
|
import logging
|
||||||
|
@ -70,10 +68,9 @@ def request(query, params):
|
||||||
|
|
||||||
sxng_locale = params['searxng_locale']
|
sxng_locale = params['searxng_locale']
|
||||||
engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
|
engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
|
||||||
engine_language = traits.get_language(sxng_locale, 'en')
|
engine_language = traits.get_language(sxng_locale, 'en-us')
|
||||||
|
|
||||||
SID = uuid.uuid1().hex.upper()
|
set_bing_cookies(params, engine_language, engine_region)
|
||||||
set_bing_cookies(params, engine_language, engine_region, SID)
|
|
||||||
|
|
||||||
# build URL query
|
# build URL query
|
||||||
#
|
#
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
# pylint: disable=invalid-name
|
# pylint: disable=invalid-name
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
import uuid
|
|
||||||
import json
|
import json
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
@ -16,7 +15,6 @@ from searx.engines.bing import (
|
||||||
set_bing_cookies,
|
set_bing_cookies,
|
||||||
_fetch_traits,
|
_fetch_traits,
|
||||||
)
|
)
|
||||||
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import logging
|
import logging
|
||||||
|
@ -60,11 +58,10 @@ time_map = {
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Assemble a Bing-Video request."""
|
"""Assemble a Bing-Video request."""
|
||||||
|
|
||||||
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
|
engine_region = traits.get_region(params['searxng_locale'], 'en-us')
|
||||||
engine_language = traits.get_language(params['searxng_locale'], 'en')
|
engine_language = traits.get_language(params['searxng_locale'], 'en-us')
|
||||||
|
|
||||||
SID = uuid.uuid1().hex.upper()
|
set_bing_cookies(params, engine_language, engine_region)
|
||||||
set_bing_cookies(params, engine_language, engine_region, SID)
|
|
||||||
|
|
||||||
# build URL query
|
# build URL query
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue