forked from zaclys/searxng
		
	Merge pull request #678 from potato/master
[engine] dictzone + mymemory.translated engine
This commit is contained in:
		
						commit
						596c6b6c93
					
				
					 4 changed files with 167 additions and 0 deletions
				
			
		
							
								
								
									
										70
									
								
								searx/engines/dictzone.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								searx/engines/dictzone.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,70 @@ | |||
| """ | ||||
|  Dictzone | ||||
| 
 | ||||
|  @website     https://dictzone.com/ | ||||
|  @provide-api no | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from urlparse import urljoin | ||||
| from lxml import html | ||||
| from cgi import escape | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import is_valid_lang | ||||
| 
 | ||||
| categories = ['general'] | ||||
| url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' | ||||
| weight = 100 | ||||
| 
 | ||||
| parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) | ||||
| results_xpath = './/table[@id="r"]/tr' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     m = parser_re.match(unicode(query, 'utf8')) | ||||
|     if not m: | ||||
|         return params | ||||
| 
 | ||||
|     from_lang, to_lang, query = m.groups() | ||||
| 
 | ||||
|     from_lang = is_valid_lang(from_lang) | ||||
|     to_lang = is_valid_lang(to_lang) | ||||
| 
 | ||||
|     if not from_lang or not to_lang: | ||||
|         return params | ||||
| 
 | ||||
|     params['url'] = url.format(from_lang=from_lang[2], | ||||
|                                to_lang=to_lang[2], | ||||
|                                query=query) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     for k, result in enumerate(dom.xpath(results_xpath)[1:]): | ||||
|         try: | ||||
|             from_result, to_results_raw = result.xpath('./td') | ||||
|         except: | ||||
|             continue | ||||
| 
 | ||||
|         to_results = [] | ||||
|         for to_result in to_results_raw.xpath('./p/a'): | ||||
|             t = to_result.text_content() | ||||
|             if t.strip(): | ||||
|                 to_results.append(to_result.text_content()) | ||||
| 
 | ||||
|         results.append({ | ||||
|             'url': urljoin(resp.url, '?%d' % k), | ||||
|             'title': escape(from_result.text_content()), | ||||
|             'content': escape('; '.join(to_results)) | ||||
|         }) | ||||
| 
 | ||||
|     return results | ||||
							
								
								
									
										69
									
								
								searx/engines/translated.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								searx/engines/translated.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,69 @@ | |||
| """ | ||||
|  MyMemory Translated | ||||
| 
 | ||||
|  @website     https://mymemory.translated.net/ | ||||
|  @provide-api yes (https://mymemory.translated.net/doc/spec.php) | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| import re | ||||
| from urlparse import urljoin | ||||
| from lxml import html | ||||
| from cgi import escape | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import is_valid_lang | ||||
| 
 | ||||
| categories = ['general'] | ||||
| url = 'http://api.mymemory.translated.net/get?q={query}' \ | ||||
|       '&langpair={from_lang}|{to_lang}{key}' | ||||
| web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' | ||||
| weight = 100 | ||||
| 
 | ||||
| parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) | ||||
| api_key = '' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     m = parser_re.match(unicode(query, 'utf8')) | ||||
|     if not m: | ||||
|         return params | ||||
| 
 | ||||
|     from_lang, to_lang, query = m.groups() | ||||
| 
 | ||||
|     from_lang = is_valid_lang(from_lang) | ||||
|     to_lang = is_valid_lang(to_lang) | ||||
| 
 | ||||
|     if not from_lang or not to_lang: | ||||
|         return params | ||||
| 
 | ||||
|     if api_key: | ||||
|         key_form = '&key=' + api_key | ||||
|     else: | ||||
|         key_form = '' | ||||
|     params['url'] = url.format(from_lang=from_lang[1], | ||||
|                                to_lang=to_lang[1], | ||||
|                                query=query, | ||||
|                                key=key_form) | ||||
|     params['query'] = query | ||||
|     params['from_lang'] = from_lang | ||||
|     params['to_lang'] = to_lang | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     results = [] | ||||
|     results.append({ | ||||
|         'url': escape(web_url.format( | ||||
|             from_lang=resp.search_params['from_lang'][2], | ||||
|             to_lang=resp.search_params['to_lang'][2], | ||||
|             query=resp.search_params['query'])), | ||||
|         'title': escape('[{0}-{1}] {2}'.format( | ||||
|             resp.search_params['from_lang'][1], | ||||
|             resp.search_params['to_lang'][1], | ||||
|             resp.search_params['query'])), | ||||
|         'content': escape(resp.json()['responseData']['translatedText']) | ||||
|     }) | ||||
|     return results | ||||
|  | @ -495,6 +495,19 @@ engines: | |||
|     timeout: 6.0 | ||||
|     categories : science | ||||
| 
 | ||||
|   - name : dictzone | ||||
|     engine : dictzone | ||||
|     shortcut : dc | ||||
| 
 | ||||
|   - name : mymemory translated | ||||
|     engine : translated | ||||
|     shortcut : tl | ||||
|     timeout : 5.0 | ||||
|     disabled : True | ||||
|     # You can use without an API key, but you are limited to 1000 words/day | ||||
|     # See : http://mymemory.translated.net/doc/usagelimits.php | ||||
|     # api_key : '' | ||||
| 
 | ||||
| #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ | ||||
| #  - name : blekko images | ||||
| #    engine : blekko_images | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ from HTMLParser import HTMLParser | |||
| from random import choice | ||||
| 
 | ||||
| from searx.version import VERSION_STRING | ||||
| from searx.languages import language_codes | ||||
| from searx import settings | ||||
| from searx import logger | ||||
| 
 | ||||
|  | @ -255,3 +256,17 @@ def get_torrent_size(filesize, filesize_multiplier): | |||
|         filesize = None | ||||
| 
 | ||||
|     return filesize | ||||
| 
 | ||||
| 
 | ||||
| def is_valid_lang(lang): | ||||
|     is_abbr = (len(lang) == 2) | ||||
|     if is_abbr: | ||||
|         for l in language_codes: | ||||
|             if l[0][:2] == lang.lower(): | ||||
|                 return (True, l[0][:2], l[1].lower()) | ||||
|         return False | ||||
|     else: | ||||
|         for l in language_codes: | ||||
|             if l[1].lower() == lang.lower(): | ||||
|                 return (True, l[0][:2], l[1].lower()) | ||||
|         return False | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber