forked from zaclys/searxng
		
	Merge pull request #2562 from dalf/mod-json-engine
[mod] json_engine: add content_html_to_text and title_html_to_text
This commit is contained in:
		
						commit
						d76660463b
					
				
					 2 changed files with 19 additions and 5 deletions
				
			
		| 
						 | 
				
			
			@ -3,13 +3,15 @@
 | 
			
		|||
from collections.abc import Iterable
 | 
			
		||||
from json import loads
 | 
			
		||||
from urllib.parse import urlencode
 | 
			
		||||
from searx.utils import to_string
 | 
			
		||||
from searx.utils import to_string, html_to_text
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
search_url = None
 | 
			
		||||
url_query = None
 | 
			
		||||
content_query = None
 | 
			
		||||
title_query = None
 | 
			
		||||
content_html_to_text = False
 | 
			
		||||
title_html_to_text = False
 | 
			
		||||
paging = False
 | 
			
		||||
suggestion_query = ''
 | 
			
		||||
results_query = ''
 | 
			
		||||
| 
						 | 
				
			
			@ -92,9 +94,17 @@ def request(query, params):
 | 
			
		|||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def identity(arg):
 | 
			
		||||
    return arg
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def response(resp):
 | 
			
		||||
    results = []
 | 
			
		||||
    json = loads(resp.text)
 | 
			
		||||
 | 
			
		||||
    title_filter = html_to_text if title_html_to_text else identity
 | 
			
		||||
    content_filter = html_to_text if content_html_to_text else identity
 | 
			
		||||
 | 
			
		||||
    if results_query:
 | 
			
		||||
        rs = query(json, results_query)
 | 
			
		||||
        if not len(rs):
 | 
			
		||||
| 
						 | 
				
			
			@ -111,8 +121,8 @@ def response(resp):
 | 
			
		|||
                content = ""
 | 
			
		||||
            results.append({
 | 
			
		||||
                'url': to_string(url),
 | 
			
		||||
                'title': to_string(title),
 | 
			
		||||
                'content': to_string(content),
 | 
			
		||||
                'title': title_filter(to_string(title)),
 | 
			
		||||
                'content': content_filter(to_string(content)),
 | 
			
		||||
            })
 | 
			
		||||
    else:
 | 
			
		||||
        for url, title, content in zip(
 | 
			
		||||
| 
						 | 
				
			
			@ -122,8 +132,8 @@ def response(resp):
 | 
			
		|||
        ):
 | 
			
		||||
            results.append({
 | 
			
		||||
                'url': to_string(url),
 | 
			
		||||
                'title': to_string(title),
 | 
			
		||||
                'content': to_string(content),
 | 
			
		||||
                'title': title_filter(to_string(title)),
 | 
			
		||||
                'content': content_filter(to_string(content)),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
    if not suggestion_query:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -267,7 +267,9 @@ engines:
 | 
			
		|||
    search_url : https://search.crossref.org/dois?q={query}&page={pageno}
 | 
			
		||||
    url_query : doi
 | 
			
		||||
    title_query : title
 | 
			
		||||
    title_html_to_text: True
 | 
			
		||||
    content_query : fullCitation
 | 
			
		||||
    content_html_to_text: True
 | 
			
		||||
    categories : science
 | 
			
		||||
    shortcut : cr
 | 
			
		||||
    about:
 | 
			
		||||
| 
						 | 
				
			
			@ -757,6 +759,7 @@ engines:
 | 
			
		|||
    url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
 | 
			
		||||
    title_query : metadata/oaf:entity/oaf:result/title/$
 | 
			
		||||
    content_query : metadata/oaf:entity/oaf:result/description/$
 | 
			
		||||
    content_html_to_text: True
 | 
			
		||||
    categories : science
 | 
			
		||||
    shortcut : oad
 | 
			
		||||
    timeout: 5.0
 | 
			
		||||
| 
						 | 
				
			
			@ -776,6 +779,7 @@ engines:
 | 
			
		|||
    url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
 | 
			
		||||
    title_query : metadata/oaf:entity/oaf:result/title/$
 | 
			
		||||
    content_query : metadata/oaf:entity/oaf:result/description/$
 | 
			
		||||
    content_html_to_text: True
 | 
			
		||||
    categories : science
 | 
			
		||||
    shortcut : oap
 | 
			
		||||
    timeout: 5.0
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue