forked from zaclys/searxng
[fix] pep8
This commit is contained in:
parent
9f5cd28dba
commit
52eba0c721
|
@ -99,9 +99,9 @@ time_range_dict = {
|
||||||
|
|
||||||
# Filter results. 0: None, 1: Moderate, 2: Strict
|
# Filter results. 0: None, 1: Moderate, 2: Strict
|
||||||
filter_mapping = {
|
filter_mapping = {
|
||||||
0 : 'off',
|
0: 'off',
|
||||||
1 : 'medium',
|
1: 'medium',
|
||||||
2 : 'high'
|
2: 'high'
|
||||||
}
|
}
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
|
@ -111,7 +111,7 @@ filter_mapping = {
|
||||||
results_xpath = '//div[@class="g"]'
|
results_xpath = '//div[@class="g"]'
|
||||||
|
|
||||||
# google *sections* are no usual *results*, we ignore them
|
# google *sections* are no usual *results*, we ignore them
|
||||||
g_section_with_header='./g-section-with-header'
|
g_section_with_header = './g-section-with-header'
|
||||||
|
|
||||||
# the title is a h3 tag relative to the result group
|
# the title is a h3 tag relative to the result group
|
||||||
title_xpath = './/h3[1]'
|
title_xpath = './/h3[1]'
|
||||||
|
@ -131,6 +131,7 @@ suggestion_xpath = '//div[contains(@class, "card-section")]//a'
|
||||||
# *spelling suggestions*, we use them anyway.
|
# *spelling suggestions*, we use them anyway.
|
||||||
spelling_suggestion_xpath = '//div[@class="med"]/p/a'
|
spelling_suggestion_xpath = '//div[@class="med"]/p/a'
|
||||||
|
|
||||||
|
|
||||||
def extract_text_from_dom(result, xpath):
|
def extract_text_from_dom(result, xpath):
|
||||||
"""returns extract_text on the first result selected by the xpath or None"""
|
"""returns extract_text on the first result selected by the xpath or None"""
|
||||||
r = eval_xpath(result, xpath)
|
r = eval_xpath(result, xpath)
|
||||||
|
@ -138,6 +139,7 @@ def extract_text_from_dom(result, xpath):
|
||||||
return extract_text(r[0])
|
return extract_text(r[0])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_lang_country(params, lang_list, custom_aliases):
|
def get_lang_country(params, lang_list, custom_aliases):
|
||||||
"""Returns a tuple with *langauage* on its first and *country* on its second
|
"""Returns a tuple with *langauage* on its first and *country* on its second
|
||||||
position."""
|
position."""
|
||||||
|
@ -159,6 +161,7 @@ def get_lang_country(params, lang_list, custom_aliases):
|
||||||
|
|
||||||
return language, country, lang_country
|
return language, country, lang_country
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Google search request"""
|
"""Google search request"""
|
||||||
|
|
||||||
|
@ -170,7 +173,7 @@ def request(query, params):
|
||||||
subdomain = 'www.' + google_domains.get(country.upper(), 'google.com')
|
subdomain = 'www.' + google_domains.get(country.upper(), 'google.com')
|
||||||
|
|
||||||
# https://www.google.de/search?q=corona&hl=de-DE&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
# https://www.google.de/search?q=corona&hl=de-DE&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
||||||
query_url = 'https://'+ subdomain + '/search' + "?" + urlencode({
|
query_url = 'https://' + subdomain + '/search' + "?" + urlencode({
|
||||||
'q': query,
|
'q': query,
|
||||||
'hl': lang_country,
|
'hl': lang_country,
|
||||||
'lr': "lang_" + language,
|
'lr': "lang_" + language,
|
||||||
|
@ -190,16 +193,17 @@ def request(query, params):
|
||||||
# en-US,en;q=0.8,en;q=0.5
|
# en-US,en;q=0.8,en;q=0.5
|
||||||
params['headers']['Accept-Language'] = (
|
params['headers']['Accept-Language'] = (
|
||||||
lang_country + ',' + language + ';q=0.8,' + language + ';q=0.5'
|
lang_country + ',' + language + ';q=0.8,' + language + ';q=0.5'
|
||||||
)
|
)
|
||||||
logger.debug("HTTP header Accept-Language --> %s",
|
logger.debug("HTTP header Accept-Language --> %s",
|
||||||
params['headers']['Accept-Language'])
|
params['headers']['Accept-Language'])
|
||||||
params['headers']['Accept'] = (
|
params['headers']['Accept'] = (
|
||||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
)
|
)
|
||||||
#params['google_subdomain'] = subdomain
|
# params['google_subdomain'] = subdomain
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
"""Get response from google's search request"""
|
"""Get response from google's search request"""
|
||||||
results = []
|
results = []
|
||||||
|
@ -249,16 +253,16 @@ def response(resp):
|
||||||
url = eval_xpath(result, href_xpath)[0]
|
url = eval_xpath(result, href_xpath)[0]
|
||||||
content = extract_text_from_dom(result, content_xpath)
|
content = extract_text_from_dom(result, content_xpath)
|
||||||
results.append({
|
results.append({
|
||||||
'url': url,
|
'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'content': content
|
'content': content
|
||||||
})
|
})
|
||||||
except Exception as e: # pylint: disable=broad-except
|
except Exception as e: # pylint: disable=broad-except
|
||||||
logger.error(e, exc_info=True)
|
logger.error(e, exc_info=True)
|
||||||
#from lxml import etree
|
# from lxml import etree
|
||||||
#logger.debug(etree.tostring(result, pretty_print=True))
|
# logger.debug(etree.tostring(result, pretty_print=True))
|
||||||
#import pdb
|
# import pdb
|
||||||
#pdb.set_trace()
|
# pdb.set_trace()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# parse suggestion
|
# parse suggestion
|
||||||
|
@ -272,6 +276,7 @@ def response(resp):
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
# get supported languages from their site
|
# get supported languages from their site
|
||||||
def _fetch_supported_languages(resp):
|
def _fetch_supported_languages(resp):
|
||||||
ret_val = {}
|
ret_val = {}
|
||||||
|
|
|
@ -33,15 +33,15 @@ from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
# pylint: disable=unused-import
|
# pylint: disable=unused-import
|
||||||
from searx.engines.google import (
|
from searx.engines.google import (
|
||||||
supported_languages_url
|
supported_languages_url,
|
||||||
, _fetch_supported_languages
|
_fetch_supported_languages,
|
||||||
)
|
)
|
||||||
# pylint: enable=unused-import
|
# pylint: enable=unused-import
|
||||||
|
|
||||||
from searx.engines.google import (
|
from searx.engines.google import (
|
||||||
get_lang_country
|
get_lang_country,
|
||||||
, google_domains
|
google_domains,
|
||||||
, time_range_dict
|
time_range_dict,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logger.getChild('google images')
|
logger = logger.getChild('google images')
|
||||||
|
@ -56,11 +56,12 @@ time_range_support = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
|
|
||||||
filter_mapping = {
|
filter_mapping = {
|
||||||
0 : 'images',
|
0: 'images',
|
||||||
1 : 'active',
|
1: 'active',
|
||||||
2 : 'active'
|
2: 'active'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def scrap_out_thumbs(dom):
|
def scrap_out_thumbs(dom):
|
||||||
"""Scrap out thumbnail data from <script> tags.
|
"""Scrap out thumbnail data from <script> tags.
|
||||||
"""
|
"""
|
||||||
|
@ -68,13 +69,14 @@ def scrap_out_thumbs(dom):
|
||||||
for script in eval_xpath(dom, '//script[contains(., "_setImgSrc(")]'):
|
for script in eval_xpath(dom, '//script[contains(., "_setImgSrc(")]'):
|
||||||
_script = script.text
|
_script = script.text
|
||||||
# _setImgSrc('0','data:image\/jpeg;base64,\/9j\/4AAQSkZJR ....');
|
# _setImgSrc('0','data:image\/jpeg;base64,\/9j\/4AAQSkZJR ....');
|
||||||
_thumb_no, _img_data = _script[len("_setImgSrc("):-2].split(",",1)
|
_thumb_no, _img_data = _script[len("_setImgSrc("):-2].split(",", 1)
|
||||||
_thumb_no = _thumb_no.replace("'","")
|
_thumb_no = _thumb_no.replace("'", "")
|
||||||
_img_data = _img_data.replace("'","")
|
_img_data = _img_data.replace("'", "")
|
||||||
_img_data = _img_data.replace(r"\/", r"/")
|
_img_data = _img_data.replace(r"\/", r"/")
|
||||||
ret_val[_thumb_no] = _img_data.replace(r"\x3d", "=")
|
ret_val[_thumb_no] = _img_data.replace(r"\x3d", "=")
|
||||||
return ret_val
|
return ret_val
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Google-Video search request"""
|
"""Google-Video search request"""
|
||||||
|
|
||||||
|
@ -84,10 +86,10 @@ def request(query, params):
|
||||||
)
|
)
|
||||||
subdomain = 'www.' + google_domains.get(country.upper(), 'google.com')
|
subdomain = 'www.' + google_domains.get(country.upper(), 'google.com')
|
||||||
|
|
||||||
query_url = 'https://'+ subdomain + '/search' + "?" + urlencode({
|
query_url = 'https://' + subdomain + '/search' + "?" + urlencode({
|
||||||
'q': query,
|
'q': query,
|
||||||
'tbm': "isch",
|
'tbm': "isch",
|
||||||
'hl': lang_country,
|
'hl': lang_country,
|
||||||
'lr': "lang_" + language,
|
'lr': "lang_" + language,
|
||||||
'ie': "utf8",
|
'ie': "utf8",
|
||||||
'oe': "utf8",
|
'oe': "utf8",
|
||||||
|
@ -108,8 +110,8 @@ def request(query, params):
|
||||||
"HTTP Accept-Language --> %s", params['headers']['Accept-Language'])
|
"HTTP Accept-Language --> %s", params['headers']['Accept-Language'])
|
||||||
params['headers']['Accept'] = (
|
params['headers']['Accept'] = (
|
||||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
)
|
)
|
||||||
#params['google_subdomain'] = subdomain
|
# params['google_subdomain'] = subdomain
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -196,10 +198,10 @@ def response(resp):
|
||||||
})
|
})
|
||||||
except Exception as e: # pylint: disable=broad-except
|
except Exception as e: # pylint: disable=broad-except
|
||||||
logger.error(e, exc_info=True)
|
logger.error(e, exc_info=True)
|
||||||
#from lxml import etree
|
# from lxml import etree
|
||||||
#logger.debug(etree.tostring(img_node, pretty_print=True))
|
# logger.debug(etree.tostring(img_node, pretty_print=True))
|
||||||
#import pdb
|
# import pdb
|
||||||
#pdb.set_trace()
|
# pdb.set_trace()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
Loading…
Reference in New Issue