mirror of https://github.com/searxng/searxng.git
[mod] improve engine startpage to reduce the frequency of CAPTCHA
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
9100a48541
commit
79c499d145
|
@ -83,6 +83,7 @@ Startpage's category (for Web-search, News, Videos, ..) is set by
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urlencode
|
||||||
from unicodedata import normalize, combining
|
from unicodedata import normalize, combining
|
||||||
from time import time
|
from time import time
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
@ -161,7 +162,7 @@ search_form_xpath = '//form[@id="search"]'
|
||||||
# timestamp of the last fetch of 'sc' code
|
# timestamp of the last fetch of 'sc' code
|
||||||
sc_code_ts = 0
|
sc_code_ts = 0
|
||||||
sc_code = ''
|
sc_code = ''
|
||||||
sc_code_cache_sec = 30
|
sc_code_cache_sec = 3600
|
||||||
"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
|
"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
|
||||||
|
|
||||||
|
|
||||||
|
@ -275,42 +276,46 @@ def _request_cat_web(query, params):
|
||||||
args['language'] = engine_language
|
args['language'] = engine_language
|
||||||
args['lui'] = engine_language
|
args['lui'] = engine_language
|
||||||
|
|
||||||
args['abp'] = '1'
|
# args['abp'] = '1'
|
||||||
if params['pageno'] > 1:
|
if params['pageno'] > 1:
|
||||||
args['page'] = params['pageno']
|
args['page'] = params['pageno']
|
||||||
|
|
||||||
# build cookie
|
# build cookie
|
||||||
lang_homepage = 'en'
|
lang_homepage = 'en'
|
||||||
cookie = OrderedDict()
|
cookie = OrderedDict()
|
||||||
|
cookie['connect_to_server'] = 'us'
|
||||||
cookie['date_time'] = 'world'
|
cookie['date_time'] = 'world'
|
||||||
cookie['disable_family_filter'] = safesearch_dict[params['safesearch']]
|
cookie['disable_family_filter'] = safesearch_dict[params['safesearch']]
|
||||||
cookie['disable_open_in_new_window'] = '0'
|
cookie['disable_open_in_new_window'] = '0'
|
||||||
cookie['enable_post_method'] = '1' # hint: POST
|
cookie['enable_post_method'] = '0' # hint: GET
|
||||||
cookie['enable_proxy_safety_suggest'] = '1'
|
cookie['enable_proxy_safety_suggest'] = '1'
|
||||||
cookie['enable_stay_control'] = '1'
|
cookie['enable_stay_control'] = '1'
|
||||||
cookie['instant_answers'] = '1'
|
cookie['instant_answers'] = '1'
|
||||||
cookie['lang_homepage'] = 's/device/%s/' % lang_homepage
|
cookie['lang_homepage'] = 's/device/%s' % lang_homepage
|
||||||
cookie['num_of_results'] = '10'
|
|
||||||
cookie['suggestions'] = '1'
|
|
||||||
cookie['wt_unit'] = 'celsius'
|
|
||||||
|
|
||||||
if engine_language:
|
if engine_language:
|
||||||
cookie['language'] = engine_language
|
cookie['language'] = engine_language
|
||||||
cookie['language_ui'] = engine_language
|
cookie['language_ui'] = engine_language
|
||||||
|
cookie['num_of_results'] = '10'
|
||||||
if engine_region:
|
if engine_region:
|
||||||
cookie['search_results_region'] = engine_region
|
cookie['search_results_region'] = engine_region
|
||||||
|
cookie['suggestions'] = '1'
|
||||||
|
cookie['wt_unit'] = 'celsius'
|
||||||
|
|
||||||
params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()])
|
params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()])
|
||||||
logger.debug('cookie preferences: %s', params['cookies']['preferences'])
|
logger.debug('cookie preferences: %s', params['cookies']['preferences'])
|
||||||
|
|
||||||
|
# GET request
|
||||||
|
params['method'] = 'GET'
|
||||||
|
# https://www.startpage.com/do/search?sc=CmEL6wNu8t5j20&query=foo&cat=web&qloc=eyJsYXQiOiBudWxsLCAibG5nIjogbnVsbCwgInR5cGUiOiAibm9uZSJ9
|
||||||
|
params['url'] = search_url + '?' + urlencode(args)
|
||||||
|
|
||||||
# POST request
|
# POST request
|
||||||
logger.debug("data: %s", args)
|
# logger.debug("data: %s", args)
|
||||||
params['data'] = args
|
# params['data'] = args
|
||||||
params['method'] = 'POST'
|
# params['method'] = 'GET'
|
||||||
params['url'] = search_url
|
# params['url'] = search_url
|
||||||
params['headers']['Origin'] = base_url
|
# params['headers']['Origin'] = base_url
|
||||||
params['headers']['Referer'] = base_url + '/'
|
# params['headers']['Referer'] = base_url + '/'
|
||||||
# is the Accept header needed?
|
# is the Accept header needed?
|
||||||
# params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
# params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue