forked from zaclys/searxng
		
	Merge pull request #2562 from dalf/mod-json-engine
[mod] json_engine: add content_html_to_text and title_html_to_text
This commit is contained in:
		
						commit
						d76660463b
					
				
					 2 changed files with 19 additions and 5 deletions
				
			
		|  | @ -3,13 +3,15 @@ | ||||||
| from collections.abc import Iterable | from collections.abc import Iterable | ||||||
| from json import loads | from json import loads | ||||||
| from urllib.parse import urlencode | from urllib.parse import urlencode | ||||||
| from searx.utils import to_string | from searx.utils import to_string, html_to_text | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| search_url = None | search_url = None | ||||||
| url_query = None | url_query = None | ||||||
| content_query = None | content_query = None | ||||||
| title_query = None | title_query = None | ||||||
|  | content_html_to_text = False | ||||||
|  | title_html_to_text = False | ||||||
| paging = False | paging = False | ||||||
| suggestion_query = '' | suggestion_query = '' | ||||||
| results_query = '' | results_query = '' | ||||||
|  | @ -92,9 +94,17 @@ def request(query, params): | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def identity(arg): | ||||||
|  |     return arg | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |     results = [] | ||||||
|     json = loads(resp.text) |     json = loads(resp.text) | ||||||
|  | 
 | ||||||
|  |     title_filter = html_to_text if title_html_to_text else identity | ||||||
|  |     content_filter = html_to_text if content_html_to_text else identity | ||||||
|  | 
 | ||||||
|     if results_query: |     if results_query: | ||||||
|         rs = query(json, results_query) |         rs = query(json, results_query) | ||||||
|         if not len(rs): |         if not len(rs): | ||||||
|  | @ -111,8 +121,8 @@ def response(resp): | ||||||
|                 content = "" |                 content = "" | ||||||
|             results.append({ |             results.append({ | ||||||
|                 'url': to_string(url), |                 'url': to_string(url), | ||||||
|                 'title': to_string(title), |                 'title': title_filter(to_string(title)), | ||||||
|                 'content': to_string(content), |                 'content': content_filter(to_string(content)), | ||||||
|             }) |             }) | ||||||
|     else: |     else: | ||||||
|         for url, title, content in zip( |         for url, title, content in zip( | ||||||
|  | @ -122,8 +132,8 @@ def response(resp): | ||||||
|         ): |         ): | ||||||
|             results.append({ |             results.append({ | ||||||
|                 'url': to_string(url), |                 'url': to_string(url), | ||||||
|                 'title': to_string(title), |                 'title': title_filter(to_string(title)), | ||||||
|                 'content': to_string(content), |                 'content': content_filter(to_string(content)), | ||||||
|             }) |             }) | ||||||
| 
 | 
 | ||||||
|     if not suggestion_query: |     if not suggestion_query: | ||||||
|  |  | ||||||
|  | @ -267,7 +267,9 @@ engines: | ||||||
|     search_url : https://search.crossref.org/dois?q={query}&page={pageno} |     search_url : https://search.crossref.org/dois?q={query}&page={pageno} | ||||||
|     url_query : doi |     url_query : doi | ||||||
|     title_query : title |     title_query : title | ||||||
|  |     title_html_to_text: True | ||||||
|     content_query : fullCitation |     content_query : fullCitation | ||||||
|  |     content_html_to_text: True | ||||||
|     categories : science |     categories : science | ||||||
|     shortcut : cr |     shortcut : cr | ||||||
|     about: |     about: | ||||||
|  | @ -757,6 +759,7 @@ engines: | ||||||
|     url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ |     url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ | ||||||
|     title_query : metadata/oaf:entity/oaf:result/title/$ |     title_query : metadata/oaf:entity/oaf:result/title/$ | ||||||
|     content_query : metadata/oaf:entity/oaf:result/description/$ |     content_query : metadata/oaf:entity/oaf:result/description/$ | ||||||
|  |     content_html_to_text: True | ||||||
|     categories : science |     categories : science | ||||||
|     shortcut : oad |     shortcut : oad | ||||||
|     timeout: 5.0 |     timeout: 5.0 | ||||||
|  | @ -776,6 +779,7 @@ engines: | ||||||
|     url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ |     url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ | ||||||
|     title_query : metadata/oaf:entity/oaf:result/title/$ |     title_query : metadata/oaf:entity/oaf:result/title/$ | ||||||
|     content_query : metadata/oaf:entity/oaf:result/description/$ |     content_query : metadata/oaf:entity/oaf:result/description/$ | ||||||
|  |     content_html_to_text: True | ||||||
|     categories : science |     categories : science | ||||||
|     shortcut : oap |     shortcut : oap | ||||||
|     timeout: 5.0 |     timeout: 5.0 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Alexandre Flament
						Alexandre Flament