forked from zaclys/searxng
Merge branch 'master' of https://github.com/asciimoo/searx
This commit is contained in:
commit
51278ee0be
@ -16,7 +16,10 @@ update_dev_packages() {
|
|||||||
|
|
||||||
pep8_check() {
|
pep8_check() {
|
||||||
echo '[!] Running pep8 check'
|
echo '[!] Running pep8 check'
|
||||||
pep8 --max-line-length=120 "$SEARX_DIR" "$BASE_DIR/tests"
|
# ignored rules:
|
||||||
|
# E402 module level import not at top of file
|
||||||
|
# W503 line break before binary operator
|
||||||
|
pep8 --max-line-length=120 --ignore "E402,W503" "$SEARX_DIR" "$BASE_DIR/tests"
|
||||||
}
|
}
|
||||||
|
|
||||||
unit_tests() {
|
unit_tests() {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
babel==2.2.0
|
babel==2.2.0
|
||||||
flake8==2.5.1
|
|
||||||
mock==1.0.1
|
mock==1.0.1
|
||||||
nose2[coverage-plugin]
|
nose2[coverage-plugin]
|
||||||
|
pep8==1.7.0
|
||||||
plone.testing==4.0.15
|
plone.testing==4.0.15
|
||||||
robotframework-selenium2library==1.7.4
|
robotframework-selenium2library==1.7.4
|
||||||
robotsuite==1.7.0
|
robotsuite==1.7.0
|
||||||
|
@ -114,8 +114,7 @@ def dbpedia(query):
|
|||||||
# dbpedia autocompleter, no HTTPS
|
# dbpedia autocompleter, no HTTPS
|
||||||
autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
|
autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
|
||||||
|
|
||||||
response = get(autocomplete_url
|
response = get(autocomplete_url + urlencode(dict(QueryString=query)))
|
||||||
+ urlencode(dict(QueryString=query)))
|
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
@ -141,8 +140,7 @@ def google(query):
|
|||||||
# google autocompleter
|
# google autocompleter
|
||||||
autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
|
autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
|
||||||
|
|
||||||
response = get(autocomplete_url
|
response = get(autocomplete_url + urlencode(dict(q=query)))
|
||||||
+ urlencode(dict(q=query)))
|
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ def request(query, params):
|
|||||||
c=c)
|
c=c)
|
||||||
|
|
||||||
if params['pageno'] != 1:
|
if params['pageno'] != 1:
|
||||||
params['url'] += '&page={pageno}'.format(pageno=(params['pageno']-1))
|
params['url'] += '&page={pageno}'.format(pageno=(params['pageno'] - 1))
|
||||||
|
|
||||||
# let Blekko know we wan't have profiling
|
# let Blekko know we wan't have profiling
|
||||||
params['cookies']['tag_lesslogging'] = '1'
|
params['cookies']['tag_lesslogging'] = '1'
|
||||||
|
@ -29,7 +29,7 @@ search_url = url + '/search?q={search_term}&p={pageno}'
|
|||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(search_term=quote(query),
|
params['url'] = search_url.format(search_term=quote(query),
|
||||||
pageno=params['pageno']-1)
|
pageno=params['pageno'] - 1)
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ paging = True
|
|||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.deviantart.com/'
|
base_url = 'https://www.deviantart.com/'
|
||||||
search_url = base_url+'browse/all/?offset={offset}&{query}'
|
search_url = base_url + 'browse/all/?offset={offset}&{query}'
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
|
@ -22,7 +22,7 @@ paging = True
|
|||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://digg.com/'
|
base_url = 'https://digg.com/'
|
||||||
search_url = base_url+'api/search/{query}.json?position={position}&format=html'
|
search_url = base_url + 'api/search/{query}.json?position={position}&format=html'
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
results_xpath = '//article'
|
results_xpath = '//article'
|
||||||
|
@ -88,7 +88,7 @@ def response(resp):
|
|||||||
for result in search_res['results']:
|
for result in search_res['results']:
|
||||||
if result['news']:
|
if result['news']:
|
||||||
# timestamp (milliseconds since 1970)
|
# timestamp (milliseconds since 1970)
|
||||||
publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0) # noqa
|
publishedDate = datetime.datetime.fromtimestamp(result['date'] / 1000.0) # noqa
|
||||||
|
|
||||||
# append news result
|
# append news result
|
||||||
results.append({'url': result['url'],
|
results.append({'url': result['url'],
|
||||||
|
@ -209,29 +209,29 @@ def response(resp):
|
|||||||
parsed_url = urlparse(url, google_hostname)
|
parsed_url = urlparse(url, google_hostname)
|
||||||
|
|
||||||
# map result
|
# map result
|
||||||
if ((parsed_url.netloc == google_hostname and parsed_url.path.startswith(maps_path))
|
if parsed_url.netloc == google_hostname:
|
||||||
or (parsed_url.netloc.startswith(map_hostname_start))):
|
# TODO fix inside links
|
||||||
x = result.xpath(map_near)
|
continue
|
||||||
if len(x) > 0:
|
# if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start):
|
||||||
# map : near the location
|
# print "yooooo"*30
|
||||||
results = results + parse_map_near(parsed_url, x, google_hostname)
|
# x = result.xpath(map_near)
|
||||||
else:
|
# if len(x) > 0:
|
||||||
# map : detail about a location
|
# # map : near the location
|
||||||
results = results + parse_map_detail(parsed_url, result, google_hostname)
|
# results = results + parse_map_near(parsed_url, x, google_hostname)
|
||||||
|
# else:
|
||||||
|
# # map : detail about a location
|
||||||
|
# results = results + parse_map_detail(parsed_url, result, google_hostname)
|
||||||
|
# # google news
|
||||||
|
# elif parsed_url.path == search_path:
|
||||||
|
# # skipping news results
|
||||||
|
# pass
|
||||||
|
|
||||||
# google news
|
# # images result
|
||||||
elif (parsed_url.netloc == google_hostname
|
# elif parsed_url.path == images_path:
|
||||||
and parsed_url.path == search_path):
|
# # only thumbnail image provided,
|
||||||
# skipping news results
|
# # so skipping image results
|
||||||
pass
|
# # results = results + parse_images(result, google_hostname)
|
||||||
|
# pass
|
||||||
# images result
|
|
||||||
elif (parsed_url.netloc == google_hostname
|
|
||||||
and parsed_url.path == images_path):
|
|
||||||
# only thumbnail image provided,
|
|
||||||
# so skipping image results
|
|
||||||
# results = results + parse_images(result, google_hostname)
|
|
||||||
pass
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# normal result
|
# normal result
|
||||||
|
@ -20,7 +20,7 @@ paging = True
|
|||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
url = 'https://searchcode.com/'
|
url = 'https://searchcode.com/'
|
||||||
search_url = url+'api/codesearch_I/?{query}&p={pageno}'
|
search_url = url + 'api/codesearch_I/?{query}&p={pageno}'
|
||||||
|
|
||||||
# special code-endings which are not recognised by the file ending
|
# special code-endings which are not recognised by the file ending
|
||||||
code_endings = {'cs': 'c#',
|
code_endings = {'cs': 'c#',
|
||||||
@ -32,7 +32,7 @@ code_endings = {'cs': 'c#',
|
|||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||||
pageno=params['pageno']-1)
|
pageno=params['pageno'] - 1)
|
||||||
|
|
||||||
# Disable SSL verification
|
# Disable SSL verification
|
||||||
# error: (60) SSL certificate problem: unable to get local issuer
|
# error: (60) SSL certificate problem: unable to get local issuer
|
||||||
|
@ -19,13 +19,13 @@ paging = True
|
|||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
url = 'https://searchcode.com/'
|
url = 'https://searchcode.com/'
|
||||||
search_url = url+'api/search_IV/?{query}&p={pageno}'
|
search_url = url + 'api/search_IV/?{query}&p={pageno}'
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||||
pageno=params['pageno']-1)
|
pageno=params['pageno'] - 1)
|
||||||
|
|
||||||
# Disable SSL verification
|
# Disable SSL verification
|
||||||
# error: (60) SSL certificate problem: unable to get local issuer
|
# error: (60) SSL certificate problem: unable to get local issuer
|
||||||
|
@ -22,7 +22,7 @@ paging = True
|
|||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
url = 'https://stackoverflow.com/'
|
url = 'https://stackoverflow.com/'
|
||||||
search_url = url+'search?{query}&page={pageno}'
|
search_url = url + 'search?{query}&page={pageno}'
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
results_xpath = '//div[contains(@class,"question-summary")]'
|
results_xpath = '//div[contains(@class,"question-summary")]'
|
||||||
|
@ -90,8 +90,8 @@ def response(resp):
|
|||||||
|
|
||||||
# check if search result starts with something like: "2 Sep 2014 ... "
|
# check if search result starts with something like: "2 Sep 2014 ... "
|
||||||
if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
|
if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
|
||||||
date_pos = content.find('...')+4
|
date_pos = content.find('...') + 4
|
||||||
date_string = content[0:date_pos-5]
|
date_string = content[0:date_pos - 5]
|
||||||
published_date = parser.parse(date_string, dayfirst=True)
|
published_date = parser.parse(date_string, dayfirst=True)
|
||||||
|
|
||||||
# fix content string
|
# fix content string
|
||||||
@ -99,8 +99,8 @@ def response(resp):
|
|||||||
|
|
||||||
# check if search result starts with something like: "5 days ago ... "
|
# check if search result starts with something like: "5 days ago ... "
|
||||||
elif re.match("^[0-9]+ days? ago \.\.\. ", content):
|
elif re.match("^[0-9]+ days? ago \.\.\. ", content):
|
||||||
date_pos = content.find('...')+4
|
date_pos = content.find('...') + 4
|
||||||
date_string = content[0:date_pos-5]
|
date_string = content[0:date_pos - 5]
|
||||||
|
|
||||||
# calculate datetime
|
# calculate datetime
|
||||||
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
|
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
|
||||||
|
@ -295,7 +295,7 @@ def get_geolink(claims, propertyName, defaultValue=''):
|
|||||||
if precision < 0.0003:
|
if precision < 0.0003:
|
||||||
zoom = 19
|
zoom = 19
|
||||||
else:
|
else:
|
||||||
zoom = int(15 - precision*8.8322 + precision*precision*0.625447)
|
zoom = int(15 - precision * 8.8322 + precision * precision * 0.625447)
|
||||||
|
|
||||||
url = url_map\
|
url = url_map\
|
||||||
.replace('{latitude}', str(value.get('latitude', 0)))\
|
.replace('{latitude}', str(value.get('latitude', 0)))\
|
||||||
@ -318,6 +318,6 @@ def get_wikilink(result, wikiid):
|
|||||||
|
|
||||||
def get_wiki_firstlanguage(result, wikipatternid):
|
def get_wiki_firstlanguage(result, wikipatternid):
|
||||||
for k in result.get('sitelinks', {}).keys():
|
for k in result.get('sitelinks', {}).keys():
|
||||||
if k.endswith(wikipatternid) and len(k) == (2+len(wikipatternid)):
|
if k.endswith(wikipatternid) and len(k) == (2 + len(wikipatternid)):
|
||||||
return k[0:2]
|
return k[0:2]
|
||||||
return None
|
return None
|
||||||
|
@ -22,7 +22,7 @@ paging = False
|
|||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://1x.com'
|
base_url = 'https://1x.com'
|
||||||
search_url = base_url+'/backend/search.php?{query}'
|
search_url = base_url + '/backend/search.php?{query}'
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
|
@ -43,7 +43,7 @@ def extract_url(xpath_results, search_url):
|
|||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
# add http or https to this kind of url //example.com/
|
# add http or https to this kind of url //example.com/
|
||||||
parsed_search_url = urlparse(search_url)
|
parsed_search_url = urlparse(search_url)
|
||||||
url = parsed_search_url.scheme+url
|
url = parsed_search_url.scheme + url
|
||||||
elif url.startswith('/'):
|
elif url.startswith('/'):
|
||||||
# fix relative url to the search engine
|
# fix relative url to the search engine
|
||||||
url = urljoin(search_url, url)
|
url = urljoin(search_url, url)
|
||||||
@ -69,7 +69,7 @@ def normalize_url(url):
|
|||||||
p = parsed_url.path
|
p = parsed_url.path
|
||||||
mark = p.find('/**')
|
mark = p.find('/**')
|
||||||
if mark != -1:
|
if mark != -1:
|
||||||
return unquote(p[mark+3:]).decode('utf-8')
|
return unquote(p[mark + 3:]).decode('utf-8')
|
||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ content_xpath = './/div[@class="serp-item__text"]//text()'
|
|||||||
def request(query, params):
|
def request(query, params):
|
||||||
lang = params['language'].split('_')[0]
|
lang = params['language'].split('_')[0]
|
||||||
host = base_url.format(tld=language_map.get(lang) or default_tld)
|
host = base_url.format(tld=language_map.get(lang) or default_tld)
|
||||||
params['url'] = host + search_url.format(page=params['pageno']-1,
|
params['url'] = host + search_url.format(page=params['pageno'] - 1,
|
||||||
query=urlencode({'text': query}))
|
query=urlencode({'text': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -103,10 +103,10 @@ def load_single_https_ruleset(rules_path):
|
|||||||
# into a valid python regex group
|
# into a valid python regex group
|
||||||
rule_from = ruleset.attrib['from'].replace('$', '\\')
|
rule_from = ruleset.attrib['from'].replace('$', '\\')
|
||||||
if rule_from.endswith('\\'):
|
if rule_from.endswith('\\'):
|
||||||
rule_from = rule_from[:-1]+'$'
|
rule_from = rule_from[:-1] + '$'
|
||||||
rule_to = ruleset.attrib['to'].replace('$', '\\')
|
rule_to = ruleset.attrib['to'].replace('$', '\\')
|
||||||
if rule_to.endswith('\\'):
|
if rule_to.endswith('\\'):
|
||||||
rule_to = rule_to[:-1]+'$'
|
rule_to = rule_to[:-1] + '$'
|
||||||
|
|
||||||
# TODO, not working yet because of the hack above,
|
# TODO, not working yet because of the hack above,
|
||||||
# currently doing that in webapp.py
|
# currently doing that in webapp.py
|
||||||
|
Loading…
Reference in New Issue
Block a user