From d2a636f75d24953f5094ea97ab54a8a4353a65ff Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Mon, 13 Apr 2015 00:30:12 +0200 Subject: [PATCH] [mod] https rewrite pluginification --- searx/__init__.py | 11 -------- searx/plugins/__init__.py | 9 ++++--- searx/{ => plugins}/https_rewrite.py | 26 ++++++++++++++++--- searx/{ => plugins}/https_rules/00README | 0 searx/{ => plugins}/https_rules/Bing.xml | 0 .../{ => plugins}/https_rules/Dailymotion.xml | 0 .../{ => plugins}/https_rules/Deviantart.xml | 0 .../{ => plugins}/https_rules/DuckDuckGo.xml | 0 searx/{ => plugins}/https_rules/Flickr.xml | 0 .../https_rules/Github-Pages.xml | 0 searx/{ => plugins}/https_rules/Github.xml | 0 .../https_rules/Google-mismatches.xml | 0 .../{ => plugins}/https_rules/Google.org.xml | 0 .../{ => plugins}/https_rules/GoogleAPIs.xml | 0 .../https_rules/GoogleCanada.xml | 0 .../https_rules/GoogleImages.xml | 0 .../https_rules/GoogleMainSearch.xml | 0 .../{ => plugins}/https_rules/GoogleMaps.xml | 0 .../https_rules/GoogleMelange.xml | 0 .../https_rules/GoogleSearch.xml | 0 .../https_rules/GoogleServices.xml | 0 .../https_rules/GoogleShopping.xml | 0 .../{ => plugins}/https_rules/GoogleSorry.xml | 0 .../https_rules/GoogleTranslate.xml | 0 .../https_rules/GoogleVideos.xml | 0 .../https_rules/GoogleWatchBlog.xml | 0 .../https_rules/Google_App_Engine.xml | 0 .../https_rules/Googleplex.com.xml | 0 .../https_rules/OpenStreetMap.xml | 0 .../https_rules/Rawgithub.com.xml | 0 .../{ => plugins}/https_rules/Soundcloud.xml | 0 .../https_rules/ThePirateBay.xml | 0 .../{ => plugins}/https_rules/Torproject.xml | 0 searx/{ => plugins}/https_rules/Twitter.xml | 0 searx/{ => plugins}/https_rules/Vimeo.xml | 0 searx/{ => plugins}/https_rules/WikiLeaks.xml | 0 searx/{ => plugins}/https_rules/Wikimedia.xml | 0 searx/{ => plugins}/https_rules/Yahoo.xml | 0 searx/{ => plugins}/https_rules/YouTube.xml | 0 searx/settings.yml | 1 - searx/webapp.py | 8 +----- 41 files changed, 29 insertions(+), 26 deletions(-) rename searx/{ => plugins}/https_rewrite.py (91%) rename searx/{ => plugins}/https_rules/00README (100%) rename searx/{ => plugins}/https_rules/Bing.xml (100%) rename searx/{ => plugins}/https_rules/Dailymotion.xml (100%) rename searx/{ => plugins}/https_rules/Deviantart.xml (100%) rename searx/{ => plugins}/https_rules/DuckDuckGo.xml (100%) rename searx/{ => plugins}/https_rules/Flickr.xml (100%) rename searx/{ => plugins}/https_rules/Github-Pages.xml (100%) rename searx/{ => plugins}/https_rules/Github.xml (100%) rename searx/{ => plugins}/https_rules/Google-mismatches.xml (100%) rename searx/{ => plugins}/https_rules/Google.org.xml (100%) rename searx/{ => plugins}/https_rules/GoogleAPIs.xml (100%) rename searx/{ => plugins}/https_rules/GoogleCanada.xml (100%) rename searx/{ => plugins}/https_rules/GoogleImages.xml (100%) rename searx/{ => plugins}/https_rules/GoogleMainSearch.xml (100%) rename searx/{ => plugins}/https_rules/GoogleMaps.xml (100%) rename searx/{ => plugins}/https_rules/GoogleMelange.xml (100%) rename searx/{ => plugins}/https_rules/GoogleSearch.xml (100%) rename searx/{ => plugins}/https_rules/GoogleServices.xml (100%) rename searx/{ => plugins}/https_rules/GoogleShopping.xml (100%) rename searx/{ => plugins}/https_rules/GoogleSorry.xml (100%) rename searx/{ => plugins}/https_rules/GoogleTranslate.xml (100%) rename searx/{ => plugins}/https_rules/GoogleVideos.xml (100%) rename searx/{ => plugins}/https_rules/GoogleWatchBlog.xml (100%) rename searx/{ => plugins}/https_rules/Google_App_Engine.xml (100%) rename searx/{ => plugins}/https_rules/Googleplex.com.xml (100%) rename searx/{ => plugins}/https_rules/OpenStreetMap.xml (100%) rename searx/{ => plugins}/https_rules/Rawgithub.com.xml (100%) rename searx/{ => plugins}/https_rules/Soundcloud.xml (100%) rename searx/{ => plugins}/https_rules/ThePirateBay.xml (100%) rename searx/{ => plugins}/https_rules/Torproject.xml (100%) rename searx/{ => plugins}/https_rules/Twitter.xml (100%) rename searx/{ => plugins}/https_rules/Vimeo.xml (100%) rename searx/{ => plugins}/https_rules/WikiLeaks.xml (100%) rename searx/{ => plugins}/https_rules/Wikimedia.xml (100%) rename searx/{ => plugins}/https_rules/Yahoo.xml (100%) rename searx/{ => plugins}/https_rules/YouTube.xml (100%) diff --git a/searx/__init__.py b/searx/__init__.py index 110f46af8..2d545a809 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -36,11 +36,6 @@ if 'SEARX_SETTINGS_PATH' in environ: else: settings_path = join(searx_dir, 'settings.yml') -if 'SEARX_HTTPS_REWRITE_PATH' in environ: - https_rewrite_path = environ['SEARX_HTTPS_REWRITE_PATH'] -else: - https_rewrite_path = join(searx_dir, 'https_rules') - # load settings with open(settings_path) as settings_yaml: settings = load(settings_yaml) @@ -52,10 +47,4 @@ else: logger = logging.getLogger('searx') -# load https rules only if https rewrite is enabled -if settings.get('server', {}).get('https_rewrite'): - # loade https rules - from searx.https_rewrite import load_https_rules - load_https_rules(https_rewrite_path) - logger.info('Initialisation done') diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index a8e400c93..5ac3f447c 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -14,13 +14,15 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, ''' -from searx.plugins import (self_ip, - search_on_category_select) -from searx import logger from sys import exit +from searx import logger logger = logger.getChild('plugins') +from searx.plugins import (https_rewrite, + self_ip, + search_on_category_select) + required_attrs = (('name', str), ('description', str), ('default_on', bool)) @@ -68,5 +70,6 @@ class PluginStore(): plugins = PluginStore() +plugins.register(https_rewrite) plugins.register(self_ip) plugins.register(search_on_category_select) diff --git a/searx/https_rewrite.py b/searx/plugins/https_rewrite.py similarity index 91% rename from searx/https_rewrite.py rename to searx/plugins/https_rewrite.py index 71aec1c9b..409b122e6 100644 --- a/searx/https_rewrite.py +++ b/searx/plugins/https_rewrite.py @@ -18,11 +18,22 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. import re from urlparse import urlparse from lxml import etree -from os import listdir +from os import listdir, environ from os.path import isfile, isdir, join -from searx import logger +from searx.plugins import logger +from flask.ext.babel import gettext +from searx import searx_dir +name = "HTTPS rewrite" +description = gettext('Rewrite HTTP links to HTTPS if possible') +default_on = True + +if 'SEARX_HTTPS_REWRITE_PATH' in environ: + rules_path = environ['SEARX_rules_path'] +else: + rules_path = join(searx_dir, 'plugins/https_rules') + logger = logger.getChild("https_rewrite") # https://gitweb.torproject.org/\ @@ -33,7 +44,7 @@ https_rules = [] # load single ruleset from a xml file -def load_single_https_ruleset(filepath): +def load_single_https_ruleset(rules_path): ruleset = () # init parser @@ -41,7 +52,7 @@ def load_single_https_ruleset(filepath): # load and parse xml-file try: - tree = etree.parse(filepath, parser) + tree = etree.parse(rules_path, parser) except: # TODO, error message return () @@ -207,3 +218,10 @@ def https_url_rewrite(result): # target has matched, do not search over the other rules break return result + + +def on_result(request, ctx): + result = ctx['result'] + if result['parsed_url'].scheme == 'http': + https_url_rewrite(result) + return True diff --git a/searx/https_rules/00README b/searx/plugins/https_rules/00README similarity index 100% rename from searx/https_rules/00README rename to searx/plugins/https_rules/00README diff --git a/searx/https_rules/Bing.xml b/searx/plugins/https_rules/Bing.xml similarity index 100% rename from searx/https_rules/Bing.xml rename to searx/plugins/https_rules/Bing.xml diff --git a/searx/https_rules/Dailymotion.xml b/searx/plugins/https_rules/Dailymotion.xml similarity index 100% rename from searx/https_rules/Dailymotion.xml rename to searx/plugins/https_rules/Dailymotion.xml diff --git a/searx/https_rules/Deviantart.xml b/searx/plugins/https_rules/Deviantart.xml similarity index 100% rename from searx/https_rules/Deviantart.xml rename to searx/plugins/https_rules/Deviantart.xml diff --git a/searx/https_rules/DuckDuckGo.xml b/searx/plugins/https_rules/DuckDuckGo.xml similarity index 100% rename from searx/https_rules/DuckDuckGo.xml rename to searx/plugins/https_rules/DuckDuckGo.xml diff --git a/searx/https_rules/Flickr.xml b/searx/plugins/https_rules/Flickr.xml similarity index 100% rename from searx/https_rules/Flickr.xml rename to searx/plugins/https_rules/Flickr.xml diff --git a/searx/https_rules/Github-Pages.xml b/searx/plugins/https_rules/Github-Pages.xml similarity index 100% rename from searx/https_rules/Github-Pages.xml rename to searx/plugins/https_rules/Github-Pages.xml diff --git a/searx/https_rules/Github.xml b/searx/plugins/https_rules/Github.xml similarity index 100% rename from searx/https_rules/Github.xml rename to searx/plugins/https_rules/Github.xml diff --git a/searx/https_rules/Google-mismatches.xml b/searx/plugins/https_rules/Google-mismatches.xml similarity index 100% rename from searx/https_rules/Google-mismatches.xml rename to searx/plugins/https_rules/Google-mismatches.xml diff --git a/searx/https_rules/Google.org.xml b/searx/plugins/https_rules/Google.org.xml similarity index 100% rename from searx/https_rules/Google.org.xml rename to searx/plugins/https_rules/Google.org.xml diff --git a/searx/https_rules/GoogleAPIs.xml b/searx/plugins/https_rules/GoogleAPIs.xml similarity index 100% rename from searx/https_rules/GoogleAPIs.xml rename to searx/plugins/https_rules/GoogleAPIs.xml diff --git a/searx/https_rules/GoogleCanada.xml b/searx/plugins/https_rules/GoogleCanada.xml similarity index 100% rename from searx/https_rules/GoogleCanada.xml rename to searx/plugins/https_rules/GoogleCanada.xml diff --git a/searx/https_rules/GoogleImages.xml b/searx/plugins/https_rules/GoogleImages.xml similarity index 100% rename from searx/https_rules/GoogleImages.xml rename to searx/plugins/https_rules/GoogleImages.xml diff --git a/searx/https_rules/GoogleMainSearch.xml b/searx/plugins/https_rules/GoogleMainSearch.xml similarity index 100% rename from searx/https_rules/GoogleMainSearch.xml rename to searx/plugins/https_rules/GoogleMainSearch.xml diff --git a/searx/https_rules/GoogleMaps.xml b/searx/plugins/https_rules/GoogleMaps.xml similarity index 100% rename from searx/https_rules/GoogleMaps.xml rename to searx/plugins/https_rules/GoogleMaps.xml diff --git a/searx/https_rules/GoogleMelange.xml b/searx/plugins/https_rules/GoogleMelange.xml similarity index 100% rename from searx/https_rules/GoogleMelange.xml rename to searx/plugins/https_rules/GoogleMelange.xml diff --git a/searx/https_rules/GoogleSearch.xml b/searx/plugins/https_rules/GoogleSearch.xml similarity index 100% rename from searx/https_rules/GoogleSearch.xml rename to searx/plugins/https_rules/GoogleSearch.xml diff --git a/searx/https_rules/GoogleServices.xml b/searx/plugins/https_rules/GoogleServices.xml similarity index 100% rename from searx/https_rules/GoogleServices.xml rename to searx/plugins/https_rules/GoogleServices.xml diff --git a/searx/https_rules/GoogleShopping.xml b/searx/plugins/https_rules/GoogleShopping.xml similarity index 100% rename from searx/https_rules/GoogleShopping.xml rename to searx/plugins/https_rules/GoogleShopping.xml diff --git a/searx/https_rules/GoogleSorry.xml b/searx/plugins/https_rules/GoogleSorry.xml similarity index 100% rename from searx/https_rules/GoogleSorry.xml rename to searx/plugins/https_rules/GoogleSorry.xml diff --git a/searx/https_rules/GoogleTranslate.xml b/searx/plugins/https_rules/GoogleTranslate.xml similarity index 100% rename from searx/https_rules/GoogleTranslate.xml rename to searx/plugins/https_rules/GoogleTranslate.xml diff --git a/searx/https_rules/GoogleVideos.xml b/searx/plugins/https_rules/GoogleVideos.xml similarity index 100% rename from searx/https_rules/GoogleVideos.xml rename to searx/plugins/https_rules/GoogleVideos.xml diff --git a/searx/https_rules/GoogleWatchBlog.xml b/searx/plugins/https_rules/GoogleWatchBlog.xml similarity index 100% rename from searx/https_rules/GoogleWatchBlog.xml rename to searx/plugins/https_rules/GoogleWatchBlog.xml diff --git a/searx/https_rules/Google_App_Engine.xml b/searx/plugins/https_rules/Google_App_Engine.xml similarity index 100% rename from searx/https_rules/Google_App_Engine.xml rename to searx/plugins/https_rules/Google_App_Engine.xml diff --git a/searx/https_rules/Googleplex.com.xml b/searx/plugins/https_rules/Googleplex.com.xml similarity index 100% rename from searx/https_rules/Googleplex.com.xml rename to searx/plugins/https_rules/Googleplex.com.xml diff --git a/searx/https_rules/OpenStreetMap.xml b/searx/plugins/https_rules/OpenStreetMap.xml similarity index 100% rename from searx/https_rules/OpenStreetMap.xml rename to searx/plugins/https_rules/OpenStreetMap.xml diff --git a/searx/https_rules/Rawgithub.com.xml b/searx/plugins/https_rules/Rawgithub.com.xml similarity index 100% rename from searx/https_rules/Rawgithub.com.xml rename to searx/plugins/https_rules/Rawgithub.com.xml diff --git a/searx/https_rules/Soundcloud.xml b/searx/plugins/https_rules/Soundcloud.xml similarity index 100% rename from searx/https_rules/Soundcloud.xml rename to searx/plugins/https_rules/Soundcloud.xml diff --git a/searx/https_rules/ThePirateBay.xml b/searx/plugins/https_rules/ThePirateBay.xml similarity index 100% rename from searx/https_rules/ThePirateBay.xml rename to searx/plugins/https_rules/ThePirateBay.xml diff --git a/searx/https_rules/Torproject.xml b/searx/plugins/https_rules/Torproject.xml similarity index 100% rename from searx/https_rules/Torproject.xml rename to searx/plugins/https_rules/Torproject.xml diff --git a/searx/https_rules/Twitter.xml b/searx/plugins/https_rules/Twitter.xml similarity index 100% rename from searx/https_rules/Twitter.xml rename to searx/plugins/https_rules/Twitter.xml diff --git a/searx/https_rules/Vimeo.xml b/searx/plugins/https_rules/Vimeo.xml similarity index 100% rename from searx/https_rules/Vimeo.xml rename to searx/plugins/https_rules/Vimeo.xml diff --git a/searx/https_rules/WikiLeaks.xml b/searx/plugins/https_rules/WikiLeaks.xml similarity index 100% rename from searx/https_rules/WikiLeaks.xml rename to searx/plugins/https_rules/WikiLeaks.xml diff --git a/searx/https_rules/Wikimedia.xml b/searx/plugins/https_rules/Wikimedia.xml similarity index 100% rename from searx/https_rules/Wikimedia.xml rename to searx/plugins/https_rules/Wikimedia.xml diff --git a/searx/https_rules/Yahoo.xml b/searx/plugins/https_rules/Yahoo.xml similarity index 100% rename from searx/https_rules/Yahoo.xml rename to searx/plugins/https_rules/Yahoo.xml diff --git a/searx/https_rules/YouTube.xml b/searx/plugins/https_rules/YouTube.xml similarity index 100% rename from searx/https_rules/YouTube.xml rename to searx/plugins/https_rules/YouTube.xml diff --git a/searx/settings.yml b/searx/settings.yml index 5594c54c5..f37c56b26 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -6,7 +6,6 @@ server: base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" themes_path : "" # Custom ui themes path - leave it blank if you didn't change default_theme : oscar # ui theme - https_rewrite : True # Force rewrite result urls. See searx/https_rewrite.py useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator image_proxy : False # Proxying image results through searx default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section diff --git a/searx/webapp.py b/searx/webapp.py index 89ab9b543..52ced1363 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -59,7 +59,6 @@ from searx.utils import ( ) from searx.version import VERSION_STRING from searx.languages import language_codes -from searx.https_rewrite import https_url_rewrite from searx.search import Search from searx.query import Query from searx.autocomplete import searx_bang, backends as autocomplete_backends @@ -359,15 +358,10 @@ def index(): for result in search.results: + plugins.call('on_result', request, locals()) if not search.paging and engines[result['engine']].paging: search.paging = True - # check if HTTPS rewrite is required - if settings['server']['https_rewrite']\ - and result['parsed_url'].scheme == 'http': - - result = https_url_rewrite(result) - if search.request_data.get('format', 'html') == 'html': if 'content' in result: result['content'] = highlight_content(result['content'],