From dca4d276a45cc85f97db326636b4b671a3263225 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sun, 30 Oct 2016 01:01:22 +0200 Subject: [PATCH] add sample config of filtron --- docs/admin/filtron.rst | 114 +++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 2 files changed, 115 insertions(+) create mode 100644 docs/admin/filtron.rst diff --git a/docs/admin/filtron.rst b/docs/admin/filtron.rst new file mode 100644 index 000000000..c422cb0a8 --- /dev/null +++ b/docs/admin/filtron.rst @@ -0,0 +1,114 @@ +How to protect an instance +========================== + +Searx depens on external search services. To avoid the abuse of these services it is advised to limit the number of requests processed by searx. + +An application firewall, ``filtron`` solves exactly this problem. Information on how to install it can be found at the `project page of filtron `__. + +Sample configuration of filtron +------------------------------- + +An example configuration can be find below. This configuration limits the access of + + * scripts or applications (roboagent limit) + + * webcrawlers (botlimit) + + * IPs which send too many requests (IP limit) + + * too many json, csv, etc. requests (rss/json limit) + + * the same UserAgent of if too many requests (useragent limit) + + +.. code:: json + + [ + { + "name": "search request", + "filters": ["Param:q", "Path=^(/|/search)$"], + "interval": , + "limit": , + "subrules": [ + { + "name": "roboagent limit", + "interval": , + "limit": , + "filters": ["Header:User-Agent=(curl|cURL|Wget|python-requests|Scrapy|FeedFetcher|Go-http-client)"], + "actions": [ + {"name": "block", + "params": {"message": "Rate limit exceeded"}} + ] + }, + { + "name": "botlimit", + "limit": 0, + "stop": true, + "filters": ["Header:User-Agent=(Googlebot|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT)"], + "actions": [ + {"name": "block", + "params": {"message": "Rate limit exceeded"}} + ] + }, + { + "name": "IP limit", + "interval": , + "limit": , + "stop": true, + "aggregations": ["Header:X-Forwarded-For"], + "actions": [ + {"name": "block", + "params": {"message": "Rate limit exceeded"}} + ] + }, + { + "name": "rss/json limit", + "interval": , + "limit": , + "stop": true, + "filters": ["Param:format=(csv|json|rss)"], + "actions": [ + {"name": "block", + "params": {"message": "Rate limit exceeded"}} + ] + }, + { + "name": "useragent limit", + "interval": , + "limit": , + "aggregations": ["Header:User-Agent"], + "actions": [ + {"name": "block", + "params": {"message": "Rate limit exceeded"}} + ] + } + ] + } + ] + + + +Route request through filtron +----------------------------- + +Filtron can be started using the following command: + +.. code:: bash + + $ filtron -rules rules.json + +It listens on 127.0.0.1:4004 and forwards filtered requests to 127.0.0.1:8888 by default. + +Use it along with ``nginx`` with the following example configuration. + +.. code:: bash + + location / { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Scheme $scheme; + proxy_pass http://127.0.0.1:4004/; + } + +Requests are coming from port 4004 going through filtron and then forwarded to port 8888 where a searx is being run. diff --git a/docs/index.rst b/docs/index.rst index 46960c0fc..18e4b6d07 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -38,6 +38,7 @@ Administrator documentation dev/install/installation admin/api + admin/filtron Developer documentation -----------------------