This commit is contained in:
a01200356 2016-01-18 11:29:45 -06:00
commit 51278ee0be
19 changed files with 52 additions and 51 deletions

View File

@ -16,7 +16,10 @@ update_dev_packages() {
pep8_check() { pep8_check() {
echo '[!] Running pep8 check' echo '[!] Running pep8 check'
pep8 --max-line-length=120 "$SEARX_DIR" "$BASE_DIR/tests" # ignored rules:
# E402 module level import not at top of file
# W503 line break before binary operator
pep8 --max-line-length=120 --ignore "E402,W503" "$SEARX_DIR" "$BASE_DIR/tests"
} }
unit_tests() { unit_tests() {

View File

@ -1,7 +1,7 @@
babel==2.2.0 babel==2.2.0
flake8==2.5.1
mock==1.0.1 mock==1.0.1
nose2[coverage-plugin] nose2[coverage-plugin]
pep8==1.7.0
plone.testing==4.0.15 plone.testing==4.0.15
robotframework-selenium2library==1.7.4 robotframework-selenium2library==1.7.4
robotsuite==1.7.0 robotsuite==1.7.0

View File

@ -114,8 +114,7 @@ def dbpedia(query):
# dbpedia autocompleter, no HTTPS # dbpedia autocompleter, no HTTPS
autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
response = get(autocomplete_url response = get(autocomplete_url + urlencode(dict(QueryString=query)))
+ urlencode(dict(QueryString=query)))
results = [] results = []
@ -141,8 +140,7 @@ def google(query):
# google autocompleter # google autocompleter
autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
response = get(autocomplete_url response = get(autocomplete_url + urlencode(dict(q=query)))
+ urlencode(dict(q=query)))
results = [] results = []

View File

@ -37,7 +37,7 @@ def request(query, params):
c=c) c=c)
if params['pageno'] != 1: if params['pageno'] != 1:
params['url'] += '&page={pageno}'.format(pageno=(params['pageno']-1)) params['url'] += '&page={pageno}'.format(pageno=(params['pageno'] - 1))
# let Blekko know we wan't have profiling # let Blekko know we wan't have profiling
params['cookies']['tag_lesslogging'] = '1' params['cookies']['tag_lesslogging'] = '1'

View File

@ -29,7 +29,7 @@ search_url = url + '/search?q={search_term}&p={pageno}'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(search_term=quote(query), params['url'] = search_url.format(search_term=quote(query),
pageno=params['pageno']-1) pageno=params['pageno'] - 1)
return params return params

View File

@ -24,7 +24,7 @@ paging = True
# search-url # search-url
base_url = 'https://www.deviantart.com/' base_url = 'https://www.deviantart.com/'
search_url = base_url+'browse/all/?offset={offset}&{query}' search_url = base_url + 'browse/all/?offset={offset}&{query}'
# do search-request # do search-request

View File

@ -22,7 +22,7 @@ paging = True
# search-url # search-url
base_url = 'https://digg.com/' base_url = 'https://digg.com/'
search_url = base_url+'api/search/{query}.json?position={position}&format=html' search_url = base_url + 'api/search/{query}.json?position={position}&format=html'
# specific xpath variables # specific xpath variables
results_xpath = '//article' results_xpath = '//article'

View File

@ -88,7 +88,7 @@ def response(resp):
for result in search_res['results']: for result in search_res['results']:
if result['news']: if result['news']:
# timestamp (milliseconds since 1970) # timestamp (milliseconds since 1970)
publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0) # noqa publishedDate = datetime.datetime.fromtimestamp(result['date'] / 1000.0) # noqa
# append news result # append news result
results.append({'url': result['url'], results.append({'url': result['url'],

View File

@ -209,29 +209,29 @@ def response(resp):
parsed_url = urlparse(url, google_hostname) parsed_url = urlparse(url, google_hostname)
# map result # map result
if ((parsed_url.netloc == google_hostname and parsed_url.path.startswith(maps_path)) if parsed_url.netloc == google_hostname:
or (parsed_url.netloc.startswith(map_hostname_start))): # TODO fix inside links
x = result.xpath(map_near) continue
if len(x) > 0: # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start):
# map : near the location # print "yooooo"*30
results = results + parse_map_near(parsed_url, x, google_hostname) # x = result.xpath(map_near)
else: # if len(x) > 0:
# map : detail about a location # # map : near the location
results = results + parse_map_detail(parsed_url, result, google_hostname) # results = results + parse_map_near(parsed_url, x, google_hostname)
# else:
# # map : detail about a location
# results = results + parse_map_detail(parsed_url, result, google_hostname)
# # google news
# elif parsed_url.path == search_path:
# # skipping news results
# pass
# google news # # images result
elif (parsed_url.netloc == google_hostname # elif parsed_url.path == images_path:
and parsed_url.path == search_path): # # only thumbnail image provided,
# skipping news results # # so skipping image results
pass # # results = results + parse_images(result, google_hostname)
# pass
# images result
elif (parsed_url.netloc == google_hostname
and parsed_url.path == images_path):
# only thumbnail image provided,
# so skipping image results
# results = results + parse_images(result, google_hostname)
pass
else: else:
# normal result # normal result

View File

@ -20,7 +20,7 @@ paging = True
# search-url # search-url
url = 'https://searchcode.com/' url = 'https://searchcode.com/'
search_url = url+'api/codesearch_I/?{query}&p={pageno}' search_url = url + 'api/codesearch_I/?{query}&p={pageno}'
# special code-endings which are not recognised by the file ending # special code-endings which are not recognised by the file ending
code_endings = {'cs': 'c#', code_endings = {'cs': 'c#',
@ -32,7 +32,7 @@ code_endings = {'cs': 'c#',
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno']-1) pageno=params['pageno'] - 1)
# Disable SSL verification # Disable SSL verification
# error: (60) SSL certificate problem: unable to get local issuer # error: (60) SSL certificate problem: unable to get local issuer

View File

@ -19,13 +19,13 @@ paging = True
# search-url # search-url
url = 'https://searchcode.com/' url = 'https://searchcode.com/'
search_url = url+'api/search_IV/?{query}&p={pageno}' search_url = url + 'api/search_IV/?{query}&p={pageno}'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno']-1) pageno=params['pageno'] - 1)
# Disable SSL verification # Disable SSL verification
# error: (60) SSL certificate problem: unable to get local issuer # error: (60) SSL certificate problem: unable to get local issuer

View File

@ -22,7 +22,7 @@ paging = True
# search-url # search-url
url = 'https://stackoverflow.com/' url = 'https://stackoverflow.com/'
search_url = url+'search?{query}&page={pageno}' search_url = url + 'search?{query}&page={pageno}'
# specific xpath variables # specific xpath variables
results_xpath = '//div[contains(@class,"question-summary")]' results_xpath = '//div[contains(@class,"question-summary")]'

View File

@ -90,8 +90,8 @@ def response(resp):
# check if search result starts with something like: "2 Sep 2014 ... " # check if search result starts with something like: "2 Sep 2014 ... "
if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
date_pos = content.find('...')+4 date_pos = content.find('...') + 4
date_string = content[0:date_pos-5] date_string = content[0:date_pos - 5]
published_date = parser.parse(date_string, dayfirst=True) published_date = parser.parse(date_string, dayfirst=True)
# fix content string # fix content string
@ -99,8 +99,8 @@ def response(resp):
# check if search result starts with something like: "5 days ago ... " # check if search result starts with something like: "5 days ago ... "
elif re.match("^[0-9]+ days? ago \.\.\. ", content): elif re.match("^[0-9]+ days? ago \.\.\. ", content):
date_pos = content.find('...')+4 date_pos = content.find('...') + 4
date_string = content[0:date_pos-5] date_string = content[0:date_pos - 5]
# calculate datetime # calculate datetime
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))

View File

@ -295,7 +295,7 @@ def get_geolink(claims, propertyName, defaultValue=''):
if precision < 0.0003: if precision < 0.0003:
zoom = 19 zoom = 19
else: else:
zoom = int(15 - precision*8.8322 + precision*precision*0.625447) zoom = int(15 - precision * 8.8322 + precision * precision * 0.625447)
url = url_map\ url = url_map\
.replace('{latitude}', str(value.get('latitude', 0)))\ .replace('{latitude}', str(value.get('latitude', 0)))\
@ -318,6 +318,6 @@ def get_wikilink(result, wikiid):
def get_wiki_firstlanguage(result, wikipatternid): def get_wiki_firstlanguage(result, wikipatternid):
for k in result.get('sitelinks', {}).keys(): for k in result.get('sitelinks', {}).keys():
if k.endswith(wikipatternid) and len(k) == (2+len(wikipatternid)): if k.endswith(wikipatternid) and len(k) == (2 + len(wikipatternid)):
return k[0:2] return k[0:2]
return None return None

View File

@ -22,7 +22,7 @@ paging = False
# search-url # search-url
base_url = 'https://1x.com' base_url = 'https://1x.com'
search_url = base_url+'/backend/search.php?{query}' search_url = base_url + '/backend/search.php?{query}'
# do search-request # do search-request

View File

@ -43,7 +43,7 @@ def extract_url(xpath_results, search_url):
if url.startswith('//'): if url.startswith('//'):
# add http or https to this kind of url //example.com/ # add http or https to this kind of url //example.com/
parsed_search_url = urlparse(search_url) parsed_search_url = urlparse(search_url)
url = parsed_search_url.scheme+url url = parsed_search_url.scheme + url
elif url.startswith('/'): elif url.startswith('/'):
# fix relative url to the search engine # fix relative url to the search engine
url = urljoin(search_url, url) url = urljoin(search_url, url)
@ -69,7 +69,7 @@ def normalize_url(url):
p = parsed_url.path p = parsed_url.path
mark = p.find('/**') mark = p.find('/**')
if mark != -1: if mark != -1:
return unquote(p[mark+3:]).decode('utf-8') return unquote(p[mark + 3:]).decode('utf-8')
return url return url

View File

@ -38,7 +38,7 @@ content_xpath = './/div[@class="serp-item__text"]//text()'
def request(query, params): def request(query, params):
lang = params['language'].split('_')[0] lang = params['language'].split('_')[0]
host = base_url.format(tld=language_map.get(lang) or default_tld) host = base_url.format(tld=language_map.get(lang) or default_tld)
params['url'] = host + search_url.format(page=params['pageno']-1, params['url'] = host + search_url.format(page=params['pageno'] - 1,
query=urlencode({'text': query})) query=urlencode({'text': query}))
return params return params

View File

@ -103,10 +103,10 @@ def load_single_https_ruleset(rules_path):
# into a valid python regex group # into a valid python regex group
rule_from = ruleset.attrib['from'].replace('$', '\\') rule_from = ruleset.attrib['from'].replace('$', '\\')
if rule_from.endswith('\\'): if rule_from.endswith('\\'):
rule_from = rule_from[:-1]+'$' rule_from = rule_from[:-1] + '$'
rule_to = ruleset.attrib['to'].replace('$', '\\') rule_to = ruleset.attrib['to'].replace('$', '\\')
if rule_to.endswith('\\'): if rule_to.endswith('\\'):
rule_to = rule_to[:-1]+'$' rule_to = rule_to[:-1] + '$'
# TODO, not working yet because of the hack above, # TODO, not working yet because of the hack above,
# currently doing that in webapp.py # currently doing that in webapp.py