mirror of https://github.com/searxng/searxng.git
[enh] reduce the number of http outgoing connections.
engines that still use http : gigablast, bing image for thumbnails, 1x and dbpedia autocompleter
This commit is contained in:
parent
bbd83f5a51
commit
78edc16e66
|
@ -111,7 +111,7 @@ def searx_bang(full_query):
|
||||||
|
|
||||||
|
|
||||||
def dbpedia(query):
|
def dbpedia(query):
|
||||||
# dbpedia autocompleter
|
# dbpedia autocompleter, no HTTPS
|
||||||
autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' # noqa
|
autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' # noqa
|
||||||
|
|
||||||
response = get(autocomplete_url
|
response = get(autocomplete_url
|
||||||
|
@ -139,7 +139,7 @@ def duckduckgo(query):
|
||||||
|
|
||||||
def google(query):
|
def google(query):
|
||||||
# google autocompleter
|
# google autocompleter
|
||||||
autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&' # noqa
|
autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' # noqa
|
||||||
|
|
||||||
response = get(autocomplete_url
|
response = get(autocomplete_url
|
||||||
+ urlencode(dict(q=query)))
|
+ urlencode(dict(q=query)))
|
||||||
|
|
|
@ -60,6 +60,9 @@ def response(resp):
|
||||||
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
||||||
embedded = embedded_url.format(videoid=res['id'])
|
embedded = embedded_url.format(videoid=res['id'])
|
||||||
|
|
||||||
|
# http to https
|
||||||
|
thumbnail = thumbnail.replace("http://", "https://")
|
||||||
|
|
||||||
results.append({'template': 'videos.html',
|
results.append({'template': 'videos.html',
|
||||||
'url': url,
|
'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
|
@ -22,7 +22,7 @@ paging = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.deviantart.com/'
|
base_url = 'https://www.deviantart.com/'
|
||||||
search_url = base_url+'search?offset={offset}&{query}'
|
search_url = base_url+'browse/all/?offset={offset}&{query}'
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
|
@ -56,6 +56,12 @@ def response(resp):
|
||||||
thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
|
thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
|
||||||
img_src = regex.sub('/', thumbnail_src)
|
img_src = regex.sub('/', thumbnail_src)
|
||||||
|
|
||||||
|
# http to https, remove domain sharding
|
||||||
|
thumbnail_src = re.sub(r"https?://(th|fc)\d+.", "https://th01.", thumbnail_src)
|
||||||
|
thumbnail_src = re.sub(r"http://", "https://", thumbnail_src)
|
||||||
|
|
||||||
|
url = re.sub(r"http://(.*)\.deviantart\.com/", "https://\\1.deviantart.com/", url)
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
|
@ -58,6 +58,9 @@ def response(resp):
|
||||||
pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
|
pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
|
||||||
publishedDate = parser.parse(pubdate)
|
publishedDate = parser.parse(pubdate)
|
||||||
|
|
||||||
|
# http to https
|
||||||
|
thumbnail = thumbnail.replace("http://static.digg.com", "https://static.digg.com")
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
|
@ -17,7 +17,7 @@ categories = ['general']
|
||||||
paging = True
|
paging = True
|
||||||
number_of_results = 5
|
number_of_results = 5
|
||||||
|
|
||||||
# search-url
|
# search-url, invalid HTTPS certificate
|
||||||
base_url = 'http://gigablast.com/'
|
base_url = 'http://gigablast.com/'
|
||||||
search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0'
|
search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0'
|
||||||
|
|
||||||
|
|
|
@ -56,6 +56,9 @@ def response(resp):
|
||||||
continue
|
continue
|
||||||
thumbnail_src = result['tbUrl']
|
thumbnail_src = result['tbUrl']
|
||||||
|
|
||||||
|
# http to https
|
||||||
|
thumbnail_src = thumbnail_src.replace("http://", "https://")
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
|
@ -19,8 +19,8 @@ import re
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
paging = False
|
paging = False
|
||||||
|
|
||||||
# search-url
|
# search-url, no HTTPS
|
||||||
base_url = 'http://1x.com'
|
base_url = 'https://1x.com'
|
||||||
search_url = base_url+'/backend/search.php?{query}'
|
search_url = base_url+'/backend/search.php?{query}'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -75,7 +75,7 @@ class TestDeviantartEngine(SearxTestCase):
|
||||||
self.assertEqual(results[0]['title'], 'Title of image')
|
self.assertEqual(results[0]['title'], 'Title of image')
|
||||||
self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
|
self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
|
||||||
self.assertNotIn('content', results[0])
|
self.assertNotIn('content', results[0])
|
||||||
self.assertEqual(results[0]['thumbnail_src'], 'http://url.of.thumbnail')
|
self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail')
|
||||||
|
|
||||||
html = """
|
html = """
|
||||||
<span class="tt-fh-tc" style="width: 202px;">
|
<span class="tt-fh-tc" style="width: 202px;">
|
||||||
|
|
|
@ -65,7 +65,7 @@ class TestGoogleImagesEngine(SearxTestCase):
|
||||||
self.assertEqual(len(results), 1)
|
self.assertEqual(len(results), 1)
|
||||||
self.assertEqual(results[0]['title'], 'This is the title')
|
self.assertEqual(results[0]['title'], 'This is the title')
|
||||||
self.assertEqual(results[0]['url'], 'http://this.is.the.url')
|
self.assertEqual(results[0]['url'], 'http://this.is.the.url')
|
||||||
self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.url')
|
self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url')
|
||||||
self.assertEqual(results[0]['img_src'], 'http://image.url.jpg')
|
self.assertEqual(results[0]['img_src'], 'http://image.url.jpg')
|
||||||
self.assertEqual(results[0]['content'], '<b>test</b>')
|
self.assertEqual(results[0]['content'], '<b>test</b>')
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ class TestWww1xEngine(SearxTestCase):
|
||||||
results = www1x.response(response)
|
results = www1x.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 1)
|
self.assertEqual(len(results), 1)
|
||||||
self.assertEqual(results[0]['url'], 'http://1x.com/photo/123456')
|
self.assertEqual(results[0]['url'], 'https://1x.com/photo/123456')
|
||||||
self.assertEqual(results[0]['thumbnail_src'], 'http://1x.com/images/user/testimage-123456.jpg')
|
self.assertEqual(results[0]['thumbnail_src'], 'https://1x.com/images/user/testimage-123456.jpg')
|
||||||
self.assertEqual(results[0]['content'], '')
|
self.assertEqual(results[0]['content'], '')
|
||||||
self.assertEqual(results[0]['template'], 'images.html')
|
self.assertEqual(results[0]['template'], 'images.html')
|
||||||
|
|
Loading…
Reference in New Issue