From b3ab221b9808ba2b7b01d417210af9b9527e661c Mon Sep 17 00:00:00 2001 From: stepshal Date: Mon, 11 Jul 2016 20:29:47 +0700 Subject: [PATCH] Fix anomalous backslash in string --- searx/engines/currency_convert.py | 2 +- searx/engines/deviantart.py | 2 +- searx/engines/google.py | 4 ++-- searx/engines/startpage.py | 10 ++++---- searx/engines/swisscows.py | 8 +++---- searx/engines/tokyotoshokan.py | 2 +- searx/engines/www500px.py | 2 +- searx/engines/yahoo_news.py | 2 +- searx/plugins/https_rewrite.py | 2 +- searx/results.py | 2 +- searx/utils.py | 2 +- tests/unit/engines/test_dailymotion.py | 2 +- tests/unit/engines/test_deezer.py | 4 ++-- tests/unit/engines/test_flickr.py | 10 ++++---- tests/unit/engines/test_flickr_noapi.py | 2 +- tests/unit/engines/test_ina.py | 24 +++++++++---------- tests/unit/engines/test_mediawiki.py | 2 +- tests/unit/engines/test_mixcloud.py | 2 +- tests/unit/engines/test_searchcode_code.py | 2 +- tests/unit/engines/test_searchcode_doc.py | 2 +- tests/unit/engines/test_wolframalpha_noapi.py | 6 ++--- 21 files changed, 47 insertions(+), 47 deletions(-) diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index b0ffb490a..bc839cfb5 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -9,7 +9,7 @@ categories = [] url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(u'.*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa +parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa db = 1 diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index 135aeb324..c7816b9bc 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -47,7 +47,7 @@ def response(resp): dom = html.fromstring(resp.text) - regex = re.compile('\/200H\/') + regex = re.compile(r'\/200H\/') # parse results for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'): diff --git a/searx/engines/google.py b/searx/engines/google.py index 6018ad1b2..fd5e7b54c 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -300,9 +300,9 @@ def parse_map_detail(parsed_url, result, google_hostname): results = [] # try to parse the geoloc - m = re.search('@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path) + m = re.search(r'@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path) if m is None: - m = re.search('ll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query) + m = re.search(r'll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query) if m is not None: # geoloc found (ignored) diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 52dd0b92f..d8b702c4d 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -68,15 +68,15 @@ def response(resp): url = link.attrib.get('href') # block google-ad url's - if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url): + if re.match(r"^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url): continue # block startpage search url's - if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): + if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): continue # block ixquick search url's - if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): + if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): continue title = escape(extract_text(link)) @@ -89,7 +89,7 @@ def response(resp): published_date = None # check if search result starts with something like: "2 Sep 2014 ... " - if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): + if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): date_pos = content.find('...') + 4 date_string = content[0:date_pos - 5] published_date = parser.parse(date_string, dayfirst=True) @@ -98,7 +98,7 @@ def response(resp): content = content[date_pos:] # check if search result starts with something like: "5 days ago ... " - elif re.match("^[0-9]+ days? ago \.\.\. ", content): + elif re.match(r"^[0-9]+ days? ago \.\.\. ", content): date_pos = content.find('...') + 4 date_string = content[0:date_pos - 5] diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index 864436a52..1a94ed64e 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -25,10 +25,10 @@ base_url = 'https://swisscows.ch/' search_string = '?{query}&page={page}' # regex -regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment') -regex_json_remove_start = re.compile('^initialData:\s*') -regex_json_remove_end = re.compile(',\s*environment$') -regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=') +regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') +regex_json_remove_start = re.compile(r'^initialData:\s*') +regex_json_remove_end = re.compile(r',\s*environment$') +regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=') # do search-request diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 17e8e2191..e2990e153 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -48,7 +48,7 @@ def response(resp): return [] # regular expression for parsing torrent size strings - size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) + size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) # processing the results, two rows at a time for i in xrange(0, len(rows), 2): diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py index c98e19443..f1bc6c583 100644 --- a/searx/engines/www500px.py +++ b/searx/engines/www500px.py @@ -41,7 +41,7 @@ def response(resp): results = [] dom = html.fromstring(resp.text) - regex = re.compile('3\.jpg.*$') + regex = re.compile(r'3\.jpg.*$') # parse results for result in dom.xpath('//div[@class="photo"]'): diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index d4cfbeda2..e91c1d34e 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -55,7 +55,7 @@ def request(query, params): def sanitize_url(url): if ".yahoo.com/" in url: - return re.sub(u"\;\_ylt\=.+$", "", url) + return re.sub(u"\\;\\_ylt\\=.+$", "", url) else: return url diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py index 8c29520d2..8a9fcd4ad 100644 --- a/searx/plugins/https_rewrite.py +++ b/searx/plugins/https_rewrite.py @@ -87,7 +87,7 @@ def load_single_https_ruleset(rules_path): # convert host-rule to valid regex host = ruleset.attrib.get('host')\ - .replace('.', '\.').replace('*', '.*') + .replace('.', r'\.').replace('*', '.*') # append to host list hosts.append(host) diff --git a/searx/results.py b/searx/results.py index 4bb0de0d8..d5d88af6b 100644 --- a/searx/results.py +++ b/searx/results.py @@ -5,7 +5,7 @@ from threading import RLock from urlparse import urlparse, unquote from searx.engines import engines -CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile('[,;:!?\./\\\\ ()-_]', re.M | re.U) +CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) diff --git a/searx/utils.py b/searx/utils.py index 219135a4b..c027bff20 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -63,7 +63,7 @@ def highlight_content(content, query): regex_parts = [] for chunk in query.split(): if len(chunk) == 1: - regex_parts.append(u'\W+{0}\W+'.format(re.escape(chunk))) + regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk))) else: regex_parts.append(u'{0}'.format(re.escape(chunk))) query_regex = u'({0})'.format('|'.join(regex_parts)) diff --git a/tests/unit/engines/test_dailymotion.py b/tests/unit/engines/test_dailymotion.py index 4c31ff5d5..368b3a7a5 100644 --- a/tests/unit/engines/test_dailymotion.py +++ b/tests/unit/engines/test_dailymotion.py @@ -62,7 +62,7 @@ class TestDailymotionEngine(SearxTestCase): self.assertEqual(results[0]['content'], 'Description') self.assertIn('x2fit7q', results[0]['embedded']) - json = """ + json = r""" {"toto":[ {"id":200,"name":"Artist Name", "link":"http:\/\/www.dailymotion.com\/artist\/1217","type":"artist"} diff --git a/tests/unit/engines/test_deezer.py b/tests/unit/engines/test_deezer.py index ad09d2a2c..cfef852af 100644 --- a/tests/unit/engines/test_deezer.py +++ b/tests/unit/engines/test_deezer.py @@ -27,7 +27,7 @@ class TestDeezerEngine(SearxTestCase): response = mock.Mock(text='{"data": []}') self.assertEqual(deezer.response(response), []) - json = """ + json = r""" {"data":[ {"id":100, "title":"Title of track", "link":"https:\/\/www.deezer.com\/track\/1094042","duration":232, @@ -45,7 +45,7 @@ class TestDeezerEngine(SearxTestCase): self.assertEqual(results[0]['content'], 'Artist Name • Album Title • Title of track') self.assertTrue('100' in results[0]['embedded']) - json = """ + json = r""" {"data":[ {"id":200,"name":"Artist Name", "link":"https:\/\/www.deezer.com\/artist\/1217","type":"artist"} diff --git a/tests/unit/engines/test_flickr.py b/tests/unit/engines/test_flickr.py index 8b39e922f..2d7472a92 100644 --- a/tests/unit/engines/test_flickr.py +++ b/tests/unit/engines/test_flickr.py @@ -27,7 +27,7 @@ class TestFlickrEngine(SearxTestCase): response = mock.Mock(text='{"data": []}') self.assertEqual(flickr.response(response), []) - json = """ + json = r""" { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", "photo": [ { "id": "15751017054", "owner": "66847915@N08", @@ -55,7 +55,7 @@ class TestFlickrEngine(SearxTestCase): self.assertTrue('Owner' in results[0]['content']) self.assertTrue('Description' in results[0]['content']) - json = """ + json = r""" { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", "photo": [ { "id": "15751017054", "owner": "66847915@N08", @@ -79,7 +79,7 @@ class TestFlickrEngine(SearxTestCase): self.assertTrue('Owner' in results[0]['content']) self.assertTrue('Description' in results[0]['content']) - json = """ + json = r""" { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", "photo": [ { "id": "15751017054", "owner": "66847915@N08", @@ -103,7 +103,7 @@ class TestFlickrEngine(SearxTestCase): self.assertTrue('Owner' in results[0]['content']) self.assertTrue('Description' in results[0]['content']) - json = """ + json = r""" { "photos": { "page": 1, "pages": "41001", "perpage": 100, "total": "4100032", "photo": [ { "id": "15751017054", "owner": "66847915@N08", @@ -130,7 +130,7 @@ class TestFlickrEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 0) - json = """ + json = r""" {"toto":[ {"id":200,"name":"Artist Name", "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"} diff --git a/tests/unit/engines/test_flickr_noapi.py b/tests/unit/engines/test_flickr_noapi.py index 3b337a2d8..42f38f90b 100644 --- a/tests/unit/engines/test_flickr_noapi.py +++ b/tests/unit/engines/test_flickr_noapi.py @@ -316,7 +316,7 @@ class TestFlickrNoapiEngine(SearxTestCase): self.assertEqual(len(results), 0) # garbage test - json = """ + json = r""" {"toto":[ {"id":200,"name":"Artist Name", "link":"http:\/\/www.flickr.com\/artist\/1217","type":"artist"} diff --git a/tests/unit/engines/test_ina.py b/tests/unit/engines/test_ina.py index 2c87a0ff8..109a9592d 100644 --- a/tests/unit/engines/test_ina.py +++ b/tests/unit/engines/test_ina.py @@ -33,23 +33,23 @@ class TestInaEngine(SearxTestCase):
\\n\ \\t\\t\\t\\t\\n\ - \\n\ + \\"Conf\\u00e9rence\\n\ - \\t\\t\\t\\t\\t<\/a>\\n\ + \\t\\t\\t\\t\\t<\\/a>\\n\ \\t\\t\\t\\t\\t
\\n\\t\\t\\t\\t\\t\\t

\\n\ \\t\\t\\t\\t\\t\\t\\t\ - Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle <\/a>\\n\ - <\/h3>\\n\ -
\\n27\/11\/1967<\/span>\\n\ - 29321 vues<\/span>\\n\ - 01h 33m 07s<\/span>\\n\ - <\/div>\\n\ + href=\\"\\/video\\/CAF89035682\\/conference-de-presse-du-general-de-gaulle-video.html\\">\ + Conf\\u00e9rence de presse du G\\u00e9n\\u00e9ral de Gaulle <\\/a>\\n\ + <\\/h3>\\n\ +
\\n27\\/11\\/1967<\\/span>\\n\ + 29321 vues<\\/span>\\n\ + 01h 33m 07s<\\/span>\\n\ + <\\/div>\\n\

VERSION INTEGRALE DE LA CONFERENCE DE PRESSE DU GENERAL DE GAULLE . \ - - PA le Pr\\u00e9sident DE GAULLE : il ouvre les bras et s'assied. DP journalis...<\/p>\\n\ - <\/div>\\n<\/div>\\n" + - PA le Pr\\u00e9sident DE GAULLE : il ouvre les bras et s'assied. DP journalis...<\\/p>\\n\ + <\\/div>\\n<\\/div>\\n" } """ response = mock.Mock(text=json) diff --git a/tests/unit/engines/test_mediawiki.py b/tests/unit/engines/test_mediawiki.py index 63f7da6b2..b86372700 100644 --- a/tests/unit/engines/test_mediawiki.py +++ b/tests/unit/engines/test_mediawiki.py @@ -118,7 +118,7 @@ class TestMediawikiEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 0) - json = """ + json = r""" {"toto":[ {"id":200,"name":"Artist Name", "link":"http:\/\/www.mediawiki.com\/artist\/1217","type":"artist"} diff --git a/tests/unit/engines/test_mixcloud.py b/tests/unit/engines/test_mixcloud.py index a2ea47cf9..9c79a478e 100644 --- a/tests/unit/engines/test_mixcloud.py +++ b/tests/unit/engines/test_mixcloud.py @@ -55,7 +55,7 @@ class TestMixcloudEngine(SearxTestCase): self.assertEqual(results[0]['content'], 'User') self.assertTrue('http://www.mixcloud.com/user/this-is-the-url/' in results[0]['embedded']) - json = """ + json = r""" {"toto":[ {"id":200,"name":"Artist Name", "link":"http:\/\/www.mixcloud.com\/artist\/1217","type":"artist"} diff --git a/tests/unit/engines/test_searchcode_code.py b/tests/unit/engines/test_searchcode_code.py index c0ac2025c..955aea111 100644 --- a/tests/unit/engines/test_searchcode_code.py +++ b/tests/unit/engines/test_searchcode_code.py @@ -63,7 +63,7 @@ class TestSearchcodeCodeEngine(SearxTestCase): self.assertEqual(results[0]['repository'], 'https://repo') self.assertEqual(results[0]['code_language'], 'cpp') - json = """ + json = r""" {"toto":[ {"id":200,"name":"Artist Name", "link":"http:\/\/www.searchcode_code.com\/artist\/1217","type":"artist"} diff --git a/tests/unit/engines/test_searchcode_doc.py b/tests/unit/engines/test_searchcode_doc.py index b9dcf380b..7228613ed 100644 --- a/tests/unit/engines/test_searchcode_doc.py +++ b/tests/unit/engines/test_searchcode_doc.py @@ -61,7 +61,7 @@ class TestSearchcodeDocEngine(SearxTestCase): self.assertIn('test', results[0]['content']) self.assertIn('Description', results[0]['content']) - json = """ + json = r""" {"toto":[ {"id":200,"name":"Artist Name", "link":"http:\/\/www.searchcode_doc.com\/artist\/1217","type":"artist"} diff --git a/tests/unit/engines/test_wolframalpha_noapi.py b/tests/unit/engines/test_wolframalpha_noapi.py index 068c1be79..a8f73470e 100644 --- a/tests/unit/engines/test_wolframalpha_noapi.py +++ b/tests/unit/engines/test_wolframalpha_noapi.py @@ -28,7 +28,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): request = Request(headers={'Referer': referer_url}) # test failure - json = ''' + json = r''' {"queryresult" : { "success" : false, "error" : false, @@ -42,7 +42,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(wolframalpha_noapi.response(response), []) # test basic case - json = ''' + json = r''' {"queryresult" : { "success" : true, "error" : false, @@ -143,7 +143,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual('Wolfram|Alpha', results[1]['title']) # test calc - json = """ + json = r""" {"queryresult" : { "success" : true, "error" : false,