mirror of
https://github.com/searxng/searxng
synced 2024-01-01 18:24:07 +00:00
Merge pull request #678 from potato/master
[engine] dictzone + mymemory.translated engine
This commit is contained in:
commit
596c6b6c93
70
searx/engines/dictzone.py
Normal file
70
searx/engines/dictzone.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
"""
|
||||||
|
Dictzone
|
||||||
|
|
||||||
|
@website https://dictzone.com/
|
||||||
|
@provide-api no
|
||||||
|
@using-api no
|
||||||
|
@results HTML (using search portal)
|
||||||
|
@stable no (HTML can change)
|
||||||
|
@parse url, title, content
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from urlparse import urljoin
|
||||||
|
from lxml import html
|
||||||
|
from cgi import escape
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.utils import is_valid_lang
|
||||||
|
|
||||||
|
categories = ['general']
|
||||||
|
url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
||||||
|
weight = 100
|
||||||
|
|
||||||
|
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
|
||||||
|
results_xpath = './/table[@id="r"]/tr'
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
m = parser_re.match(unicode(query, 'utf8'))
|
||||||
|
if not m:
|
||||||
|
return params
|
||||||
|
|
||||||
|
from_lang, to_lang, query = m.groups()
|
||||||
|
|
||||||
|
from_lang = is_valid_lang(from_lang)
|
||||||
|
to_lang = is_valid_lang(to_lang)
|
||||||
|
|
||||||
|
if not from_lang or not to_lang:
|
||||||
|
return params
|
||||||
|
|
||||||
|
params['url'] = url.format(from_lang=from_lang[2],
|
||||||
|
to_lang=to_lang[2],
|
||||||
|
query=query)
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
|
for k, result in enumerate(dom.xpath(results_xpath)[1:]):
|
||||||
|
try:
|
||||||
|
from_result, to_results_raw = result.xpath('./td')
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_results = []
|
||||||
|
for to_result in to_results_raw.xpath('./p/a'):
|
||||||
|
t = to_result.text_content()
|
||||||
|
if t.strip():
|
||||||
|
to_results.append(to_result.text_content())
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
'url': urljoin(resp.url, '?%d' % k),
|
||||||
|
'title': escape(from_result.text_content()),
|
||||||
|
'content': escape('; '.join(to_results))
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
69
searx/engines/translated.py
Normal file
69
searx/engines/translated.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
"""
|
||||||
|
MyMemory Translated
|
||||||
|
|
||||||
|
@website https://mymemory.translated.net/
|
||||||
|
@provide-api yes (https://mymemory.translated.net/doc/spec.php)
|
||||||
|
@using-api yes
|
||||||
|
@results JSON
|
||||||
|
@stable yes
|
||||||
|
@parse url, title, content
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from urlparse import urljoin
|
||||||
|
from lxml import html
|
||||||
|
from cgi import escape
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.utils import is_valid_lang
|
||||||
|
|
||||||
|
categories = ['general']
|
||||||
|
url = 'http://api.mymemory.translated.net/get?q={query}' \
|
||||||
|
'&langpair={from_lang}|{to_lang}{key}'
|
||||||
|
web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
|
||||||
|
weight = 100
|
||||||
|
|
||||||
|
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
|
||||||
|
api_key = ''
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
m = parser_re.match(unicode(query, 'utf8'))
|
||||||
|
if not m:
|
||||||
|
return params
|
||||||
|
|
||||||
|
from_lang, to_lang, query = m.groups()
|
||||||
|
|
||||||
|
from_lang = is_valid_lang(from_lang)
|
||||||
|
to_lang = is_valid_lang(to_lang)
|
||||||
|
|
||||||
|
if not from_lang or not to_lang:
|
||||||
|
return params
|
||||||
|
|
||||||
|
if api_key:
|
||||||
|
key_form = '&key=' + api_key
|
||||||
|
else:
|
||||||
|
key_form = ''
|
||||||
|
params['url'] = url.format(from_lang=from_lang[1],
|
||||||
|
to_lang=to_lang[1],
|
||||||
|
query=query,
|
||||||
|
key=key_form)
|
||||||
|
params['query'] = query
|
||||||
|
params['from_lang'] = from_lang
|
||||||
|
params['to_lang'] = to_lang
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
results.append({
|
||||||
|
'url': escape(web_url.format(
|
||||||
|
from_lang=resp.search_params['from_lang'][2],
|
||||||
|
to_lang=resp.search_params['to_lang'][2],
|
||||||
|
query=resp.search_params['query'])),
|
||||||
|
'title': escape('[{0}-{1}] {2}'.format(
|
||||||
|
resp.search_params['from_lang'][1],
|
||||||
|
resp.search_params['to_lang'][1],
|
||||||
|
resp.search_params['query'])),
|
||||||
|
'content': escape(resp.json()['responseData']['translatedText'])
|
||||||
|
})
|
||||||
|
return results
|
@ -495,6 +495,19 @@ engines:
|
|||||||
timeout: 6.0
|
timeout: 6.0
|
||||||
categories : science
|
categories : science
|
||||||
|
|
||||||
|
- name : dictzone
|
||||||
|
engine : dictzone
|
||||||
|
shortcut : dc
|
||||||
|
|
||||||
|
- name : mymemory translated
|
||||||
|
engine : translated
|
||||||
|
shortcut : tl
|
||||||
|
timeout : 5.0
|
||||||
|
disabled : True
|
||||||
|
# You can use without an API key, but you are limited to 1000 words/day
|
||||||
|
# See : http://mymemory.translated.net/doc/usagelimits.php
|
||||||
|
# api_key : ''
|
||||||
|
|
||||||
#The blekko technology and team have joined IBM Watson! -> https://blekko.com/
|
#The blekko technology and team have joined IBM Watson! -> https://blekko.com/
|
||||||
# - name : blekko images
|
# - name : blekko images
|
||||||
# engine : blekko_images
|
# engine : blekko_images
|
||||||
|
@ -9,6 +9,7 @@ from HTMLParser import HTMLParser
|
|||||||
from random import choice
|
from random import choice
|
||||||
|
|
||||||
from searx.version import VERSION_STRING
|
from searx.version import VERSION_STRING
|
||||||
|
from searx.languages import language_codes
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
|
||||||
@ -255,3 +256,17 @@ def get_torrent_size(filesize, filesize_multiplier):
|
|||||||
filesize = None
|
filesize = None
|
||||||
|
|
||||||
return filesize
|
return filesize
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_lang(lang):
|
||||||
|
is_abbr = (len(lang) == 2)
|
||||||
|
if is_abbr:
|
||||||
|
for l in language_codes:
|
||||||
|
if l[0][:2] == lang.lower():
|
||||||
|
return (True, l[0][:2], l[1].lower())
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
for l in language_codes:
|
||||||
|
if l[1].lower() == lang.lower():
|
||||||
|
return (True, l[0][:2], l[1].lower())
|
||||||
|
return False
|
||||||
|
Loading…
Reference in New Issue
Block a user