forked from zaclys/searxng
		
	Add ScanR structures search engine
In theory ScanR should also search for projects but the API is different, so we'd need another engine.
This commit is contained in:
		
							parent
							
								
									678b87f9d5
								
							
						
					
					
						commit
						1dba6dcbac
					
				
					 3 changed files with 258 additions and 0 deletions
				
			
		
							
								
								
									
										78
									
								
								searx/engines/scanr_structures.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								searx/engines/scanr_structures.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,78 @@ | ||||||
|  | """ | ||||||
|  |  ScanR Structures (Science) | ||||||
|  | 
 | ||||||
|  |  @website     https://scanr.enseignementsup-recherche.gouv.fr | ||||||
|  |  @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html) | ||||||
|  | 
 | ||||||
|  |  @using-api   yes | ||||||
|  |  @results     JSON | ||||||
|  |  @stable      yes | ||||||
|  |  @parse       url, title, content, img_src | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from urllib import urlencode | ||||||
|  | from json import loads, dumps | ||||||
|  | from dateutil import parser | ||||||
|  | from searx.utils import html_to_text | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['science'] | ||||||
|  | paging = True | ||||||
|  | page_size = 20 | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | url = 'https://scanr.enseignementsup-recherche.gouv.fr/' | ||||||
|  | search_url = url + 'api/structures/search' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
|  | def request(query, params): | ||||||
|  | 
 | ||||||
|  |     params['url'] = search_url | ||||||
|  |     params['method'] = 'POST' | ||||||
|  |     params['headers']['Content-type'] = "application/json" | ||||||
|  |     params['data'] = dumps({"query": query, | ||||||
|  |                             "searchField": "ALL", | ||||||
|  |                             "sortDirection": "ASC", | ||||||
|  |                             "sortOrder": "RELEVANCY", | ||||||
|  |                             "page": params['pageno'], | ||||||
|  |                             "pageSize": page_size}) | ||||||
|  | 
 | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     search_res = loads(resp.text) | ||||||
|  | 
 | ||||||
|  |     # return empty array if there are no results | ||||||
|  |     if search_res.get('total') < 1: | ||||||
|  |         return [] | ||||||
|  | 
 | ||||||
|  |     # parse results | ||||||
|  |     for result in search_res['results']: | ||||||
|  |         if 'id' not in result: | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         # is it thumbnail or img_src?? | ||||||
|  |         thumbnail = None | ||||||
|  |         if 'logo' in result: | ||||||
|  |             thumbnail = result['logo'] | ||||||
|  |             if thumbnail[0] == '/': | ||||||
|  |                 thumbnail = url + thumbnail | ||||||
|  | 
 | ||||||
|  |         content = None | ||||||
|  |         if 'highlights' in result: | ||||||
|  |             content = result['highlights'][0]['value'] | ||||||
|  | 
 | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': url + 'structure/' + result['id'], | ||||||
|  |                         'title': result['label'], | ||||||
|  |                         # 'thumbnail': thumbnail, | ||||||
|  |                         'img_src': thumbnail, | ||||||
|  |                         'content': html_to_text(content)}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|  |     return results | ||||||
|  | @ -314,6 +314,11 @@ engines: | ||||||
|     engine : kickass |     engine : kickass | ||||||
|     shortcut : ka |     shortcut : ka | ||||||
| 
 | 
 | ||||||
|  |   - name : scanr_structures | ||||||
|  |     shortcut: scs | ||||||
|  |     engine : scanr_structures | ||||||
|  |     disabled : True | ||||||
|  | 
 | ||||||
|   - name : soundcloud |   - name : soundcloud | ||||||
|     engine : soundcloud |     engine : soundcloud | ||||||
|     shortcut : sc |     shortcut : sc | ||||||
|  |  | ||||||
							
								
								
									
										175
									
								
								tests/unit/engines/test_scanr_structures.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										175
									
								
								tests/unit/engines/test_scanr_structures.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,175 @@ | ||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import scanr_structures | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestScanrStructuresEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dicto = defaultdict(dict) | ||||||
|  |         dicto['pageno'] = 1 | ||||||
|  |         params = scanr_structures.request(query, dicto) | ||||||
|  |         self.assertIn('url', params) | ||||||
|  |         self.assertIn(query, params['data']) | ||||||
|  |         self.assertIn('scanr.enseignementsup-recherche.gouv.fr', params['url']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         self.assertRaises(AttributeError, scanr_structures.response, None) | ||||||
|  |         self.assertRaises(AttributeError, scanr_structures.response, []) | ||||||
|  |         self.assertRaises(AttributeError, scanr_structures.response, '') | ||||||
|  |         self.assertRaises(AttributeError, scanr_structures.response, '[]') | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(text='{}') | ||||||
|  |         self.assertEqual(scanr_structures.response(response), []) | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(text='{"data": []}') | ||||||
|  |         self.assertEqual(scanr_structures.response(response), []) | ||||||
|  | 
 | ||||||
|  |         json = u""" | ||||||
|  |         { | ||||||
|  |           "request": | ||||||
|  |             { | ||||||
|  |               "query":"test_query", | ||||||
|  |               "page":1, | ||||||
|  |               "pageSize":20, | ||||||
|  |               "sortOrder":"RELEVANCY", | ||||||
|  |               "sortDirection":"ASC", | ||||||
|  |               "searchField":"ALL", | ||||||
|  |               "from":0 | ||||||
|  |             }, | ||||||
|  |           "total":2471, | ||||||
|  |           "results":[ | ||||||
|  |             { | ||||||
|  |               "id":"200711886U", | ||||||
|  |               "label":"Laboratoire d'Informatique de Grenoble", | ||||||
|  |               "kind":"RNSR", | ||||||
|  |               "publicEntity":true, | ||||||
|  |               "address":{"city":"Grenoble","departement":"38"}, | ||||||
|  |               "logo":"/static/logos/200711886U.png", | ||||||
|  |               "acronym":"LIG", | ||||||
|  |               "type":{"code":"UR","label":"Unit\xe9 de recherche"}, | ||||||
|  |               "level":2, | ||||||
|  |               "institutions":[ | ||||||
|  |                 { | ||||||
|  |                   "id":"193819125", | ||||||
|  |                   "label":"Grenoble INP", | ||||||
|  |                   "acronym":"IPG", | ||||||
|  |                   "code":"UMR 5217" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "id":"130021397", | ||||||
|  |                   "label":"Universit\xe9 de Grenoble Alpes", | ||||||
|  |                   "acronym":"UGA", | ||||||
|  |                   "code":"UMR 5217" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "id":"180089013", | ||||||
|  |                   "label":"Centre national de la recherche scientifique", | ||||||
|  |                   "acronym":"CNRS", | ||||||
|  |                   "code":"UMR 5217" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "id":"180089047", | ||||||
|  |                   "label":"Institut national de recherche en informatique et en automatique", | ||||||
|  |                   "acronym":"Inria", | ||||||
|  |                   "code":"UMR 5217" | ||||||
|  |                 } | ||||||
|  |               ], | ||||||
|  |               "highlights":[ | ||||||
|  |                 { | ||||||
|  |                   "type":"projects", | ||||||
|  |                   "value":"linguicielles d\xe9velopp\xe9s jusqu'ici par le GETALP\ | ||||||
|  |  du <strong>LIG</strong> en tant que prototypes op\xe9rationnels.\ | ||||||
|  | \\r\\nDans le contexte" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "type":"acronym", | ||||||
|  |                   "value":"<strong>LIG</strong>" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "type":"websiteContents", | ||||||
|  |                   "value":"S\xe9lection\\nListe structures\\nD\xe9tail\\n\ | ||||||
|  |                     Accueil\\n200711886U : <strong>LIG</strong>\ | ||||||
|  |                     Laboratoire d'Informatique de Grenoble Unit\xe9 de recherche"}, | ||||||
|  |                 { | ||||||
|  |                   "type":"publications", | ||||||
|  |                   "value":"de noms. Nous avons d'abord d\xe9velopp\xe9 LOOV \ | ||||||
|  |                     (pour <strong>Lig</strong> Overlaid OCR in Vid\xe9o), \ | ||||||
|  |                     un outil d'extraction des" | ||||||
|  |                 } | ||||||
|  |               ] | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |               "id":"199511665F", | ||||||
|  |               "label":"Laboratoire Bordelais de Recherche en Informatique", | ||||||
|  |               "kind":"RNSR", | ||||||
|  |               "publicEntity":true, | ||||||
|  |               "address":{"city":"Talence","departement":"33"}, | ||||||
|  |               "logo":"/static/logos/199511665F.png", | ||||||
|  |               "acronym":"LaBRI", | ||||||
|  |               "type":{"code":"UR","label":"Unit\xe9 de recherche"}, | ||||||
|  |               "level":2, | ||||||
|  |               "institutions":[ | ||||||
|  |                 { | ||||||
|  |                   "id":"130006356", | ||||||
|  |                   "label":"Institut polytechnique de Bordeaux", | ||||||
|  |                   "acronym":"IPB", | ||||||
|  |                   "code":"UMR 5800" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "id":"130018351", | ||||||
|  |                   "label":"Universit\xe9 de Bordeaux", | ||||||
|  |                   "acronym":null, | ||||||
|  |                   "code":"UMR 5800" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "id":"180089013", | ||||||
|  |                   "label":"Centre national de la recherche scientifique", | ||||||
|  |                   "acronym":"CNRS", | ||||||
|  |                   "code":"UMR 5800" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "id":"180089047", | ||||||
|  |                   "label":"Institut national de recherche en informatique et en automatique", | ||||||
|  |                   "acronym":"Inria", | ||||||
|  |                   "code":"UMR 5800" | ||||||
|  |                 } | ||||||
|  |               ], | ||||||
|  |               "highlights":[ | ||||||
|  |                 { | ||||||
|  |                   "type":"websiteContents", | ||||||
|  |                   "value":"Samia Kerdjoudj\\n2016-07-05\\nDouble-exponential\ | ||||||
|  |  and <strong>triple</strong>-exponential bounds for\ | ||||||
|  |  choosability problems parameterized" | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                   "type":"publications", | ||||||
|  |                   "value":"de cam\xe9ras install\xe9es dans les lieux publiques \ | ||||||
|  |  a <strong>tripl\xe9</strong> en 2009, passant de 20 000 \ | ||||||
|  |  \xe0 60 000. Malgr\xe9 le" | ||||||
|  |                 } | ||||||
|  |               ] | ||||||
|  |             } | ||||||
|  |           ] | ||||||
|  |         } | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=json) | ||||||
|  |         results = scanr_structures.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 2) | ||||||
|  |         self.assertEqual(results[0]['title'], u"Laboratoire d'Informatique de Grenoble") | ||||||
|  |         self.assertEqual(results[0]['url'], 'https://scanr.enseignementsup-recherche.gouv.fr/structure/200711886U') | ||||||
|  |         self.assertEqual(results[0]['content'], | ||||||
|  |                          u"linguicielles d\xe9velopp\xe9s jusqu'ici par le GETALP " | ||||||
|  |                          u"du LIG en tant que prototypes " | ||||||
|  |                          u"op\xe9rationnels. Dans le contexte") | ||||||
|  |         self.assertEqual(results[1]['img_src'], | ||||||
|  |                          'https://scanr.enseignementsup-recherche.gouv.fr//static/logos/199511665F.png') | ||||||
|  |         self.assertEqual(results[1]['content'], | ||||||
|  |                          "Samia Kerdjoudj 2016-07-05 Double-exponential and" | ||||||
|  |                          " triple-exponential bounds for " | ||||||
|  |                          "choosability problems parameterized") | ||||||
|  |         self.assertEqual(results[1]['url'], 'https://scanr.enseignementsup-recherche.gouv.fr/structure/199511665F') | ||||||
|  |         self.assertEqual(results[1]['title'], u"Laboratoire Bordelais de Recherche en Informatique") | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 François Revol
						François Revol