tests for _fetch_supported_languages in engines

and refactor method to make it testable without making requests
This commit is contained in:
marc 2016-12-15 00:34:43 -06:00
parent e0c270bd72
commit af35eee10b
27 changed files with 387 additions and 3388 deletions

Binary file not shown.

View file

@ -21,6 +21,7 @@ import sys
from flask_babel import gettext
from operator import itemgetter
from json import loads
from requests import get
from searx import settings
from searx import logger
from searx.utils import load_module
@ -79,9 +80,6 @@ def load_engine(engine_data):
if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value)
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# checking required variables
for engine_attr in dir(engine):
if engine_attr.startswith('_'):
@ -91,6 +89,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr))
sys.exit(1)
# assign supported languages from json file
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
setattr(engine, 'fetch_supported_languages',
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
engine.stats = {
'result_count': 0,
'search_count': 0,

View file

@ -15,7 +15,6 @@
from urllib import urlencode
from lxml import html
from requests import get
from searx.engines.xpath import extract_text
# engine dependent config
@ -86,10 +85,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = html.fromstring(response.text)
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options:
code = option.xpath('./@id')[0].replace('_', '-')

View file

@ -19,7 +19,7 @@ from urllib import urlencode
from lxml import html
from json import loads
import re
from searx.engines.bing import fetch_supported_languages
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['images']

View file

@ -17,7 +17,7 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
from searx.engines.bing import fetch_supported_languages
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['news']

View file

@ -80,11 +80,10 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
response_json = loads(response.text)
response_json = loads(resp.text)
for language in response_json['list']:
supported_languages[language['code']] = {}

View file

@ -119,11 +119,10 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
response = get(supported_languages_url)
def _fetch_supported_languages(resp):
# response is a js file with regions as an embedded object
response_page = response.text
response_page = resp.text
response_page = response_page[response_page.find('regions:{') + 8:]
response_page = response_page[:response_page.find('}') + 1]

View file

@ -4,7 +4,7 @@ from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import fetch_supported_languages
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'

View file

@ -14,7 +14,6 @@ from json import loads
from random import randint
from time import time
from urllib import urlencode
from requests import get
from lxml.html import fromstring
# engine dependent config
@ -91,10 +90,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
links = dom.xpath('//span[@id="menu2"]/a')
for link in links:
code = link.xpath('./@href')[0][-2:]

View file

@ -12,7 +12,6 @@ import re
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from lxml import html, etree
from requests import get
from searx.engines.xpath import extract_text, extract_url
from searx.search import logger
@ -364,14 +363,13 @@ def attributes_to_html(attributes):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
dom = html.fromstring(response.text)
options = dom.xpath('//select[@name="hl"]/option')
dom = html.fromstring(resp.text)
options = dom.xpath('//table//td/font/label/span')
for option in options:
code = option.xpath('./@value')[0].split('-')[0]
name = option.text[:-1].title()
code = option.xpath('./@id')[0][1:]
name = option.text.title()
supported_languages[code] = {"name": name}
return supported_languages

View file

@ -13,7 +13,7 @@
from lxml import html
from urllib import urlencode
from json import loads
from searx.engines.google import fetch_supported_languages
from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url
categories = ['news']

View file

@ -13,7 +13,6 @@
from json import loads
from urllib import urlencode, unquote
import re
from requests import get
from lxml.html import fromstring
# engine dependent config
@ -25,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}'
supported_languages_url = base_url
# regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*')
@ -113,10 +114,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(base_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options:
code = option.xpath('./@data-val')[0]

View file

@ -15,7 +15,7 @@ from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
from searx.engines.wikipedia import fetch_supported_languages
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads
from lxml.html import fromstring

View file

@ -12,7 +12,6 @@
from json import loads
from urllib import urlencode, quote
from requests import get
from lxml.html import fromstring
@ -119,10 +118,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
tables = dom.xpath('//table[contains(@class,"sortable")]')
for table in tables:
# exclude header row

View file

@ -14,7 +14,6 @@
from urllib import urlencode
from urlparse import unquote
from lxml import html
from requests import get
from searx.engines.xpath import extract_text, extract_url
# engine dependent config
@ -144,13 +143,12 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = html.fromstring(response.text)
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options:
code = option.xpath('./@value')[0][5:]
code = option.xpath('./@value')[0][5:].replace('_', '-')
supported_languages.append(code)
return supported_languages

View file

@ -12,7 +12,7 @@
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url, fetch_supported_languages
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser