mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge remote-tracking branch 'searx/master'
This commit is contained in:
		
						commit
						48720e20a8
					
				
					 7 changed files with 182 additions and 4 deletions
				
			
		
							
								
								
									
										73
									
								
								searx/engines/bandcamp.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								searx/engines/bandcamp.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,73 @@ | ||||||
|  | """ | ||||||
|  | Bandcamp (Music) | ||||||
|  | 
 | ||||||
|  | @website     https://bandcamp.com/ | ||||||
|  | @provide-api no | ||||||
|  | @results     HTML | ||||||
|  | @parse       url, title, content, publishedDate, embedded, thumbnail | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from urllib.parse import urlencode, urlparse, parse_qs | ||||||
|  | from dateutil.parser import parse as dateparse | ||||||
|  | from lxml import html | ||||||
|  | from searx.utils import extract_text | ||||||
|  | 
 | ||||||
|  | categories = ['music'] | ||||||
|  | paging = True | ||||||
|  | 
 | ||||||
|  | base_url = "https://bandcamp.com/" | ||||||
|  | search_string = search_string = 'search?{query}&page={page}' | ||||||
|  | embedded_url = '''<iframe width="100%" height="166" | ||||||
|  |     scrolling="no" frameborder="no" | ||||||
|  |     data-src="https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=ffffff/linkcol=0687f5/tracklist=false/artwork=small/transparent=true/" | ||||||
|  | ></iframe>''' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def request(query, params): | ||||||
|  |     '''pre-request callback | ||||||
|  |     params<dict>: | ||||||
|  |       method  : POST/GET | ||||||
|  |       headers : {} | ||||||
|  |       data    : {} # if method == POST | ||||||
|  |       url     : '' | ||||||
|  |       category: 'search category' | ||||||
|  |       pageno  : 1 # number of the requested page | ||||||
|  |     ''' | ||||||
|  | 
 | ||||||
|  |     search_path = search_string.format( | ||||||
|  |         query=urlencode({'q': query}), | ||||||
|  |         page=params['pageno']) | ||||||
|  | 
 | ||||||
|  |     params['url'] = base_url + search_path | ||||||
|  | 
 | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def response(resp): | ||||||
|  |     '''post-response callback | ||||||
|  |     resp: requests response object | ||||||
|  |     ''' | ||||||
|  |     results = [] | ||||||
|  |     tree = html.fromstring(resp.text) | ||||||
|  |     search_results = tree.xpath('//li[contains(@class, "searchresult")]') | ||||||
|  |     for result in search_results: | ||||||
|  |         link = result.xpath('.//div[@class="itemurl"]/a')[0] | ||||||
|  |         result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] | ||||||
|  |         title = result.xpath('.//div[@class="heading"]/a/text()') | ||||||
|  |         date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", "")) | ||||||
|  |         content = result.xpath('.//div[@class="subhead"]/text()') | ||||||
|  |         new_result = { | ||||||
|  |             "url": extract_text(link), | ||||||
|  |             "title": extract_text(title), | ||||||
|  |             "content": extract_text(content), | ||||||
|  |             "publishedDate": date, | ||||||
|  |         } | ||||||
|  |         thumbnail = result.xpath('.//div[@class="art"]/img/@src') | ||||||
|  |         if thumbnail: | ||||||
|  |             new_result['thumbnail'] = thumbnail[0] | ||||||
|  |         if "album" in result.classes: | ||||||
|  |             new_result["embedded"] = embedded_url.format(type='album', result_id=result_id) | ||||||
|  |         elif "track" in result.classes: | ||||||
|  |             new_result["embedded"] = embedded_url.format(type='track', result_id=result_id) | ||||||
|  |         results.append(new_result) | ||||||
|  |     return results | ||||||
|  | @ -242,7 +242,7 @@ def response(resp): | ||||||
|     if answer: |     if answer: | ||||||
|         results.append({'answer': ' '.join(answer)}) |         results.append({'answer': ' '.join(answer)}) | ||||||
|     else: |     else: | ||||||
|         logger.debug("did not found 'answer'") |         logger.debug("did not find 'answer'") | ||||||
| 
 | 
 | ||||||
|     # results --> number_of_results |     # results --> number_of_results | ||||||
|         try: |         try: | ||||||
|  |  | ||||||
							
								
								
									
										92
									
								
								searx/engines/sjp.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								searx/engines/sjp.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,92 @@ | ||||||
|  | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|  | """Słownik Języka Polskiego (general) | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from lxml.html import fromstring | ||||||
|  | from searx import logger | ||||||
|  | from searx.utils import extract_text | ||||||
|  | from searx.raise_for_httperror import raise_for_httperror | ||||||
|  | 
 | ||||||
|  | logger = logger.getChild('sjp engine') | ||||||
|  | 
 | ||||||
|  | # about | ||||||
|  | about = { | ||||||
|  |     "website": 'https://sjp.pwn.pl', | ||||||
|  |     "wikidata_id": 'Q55117369', | ||||||
|  |     "official_api_documentation": None, | ||||||
|  |     "use_official_api": False, | ||||||
|  |     "require_api_key": False, | ||||||
|  |     "results": 'HTML', | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | categories = ['general'] | ||||||
|  | paging = False | ||||||
|  | 
 | ||||||
|  | URL = 'https://sjp.pwn.pl' | ||||||
|  | SEARCH_URL = URL + '/szukaj/{query}.html' | ||||||
|  | 
 | ||||||
|  | word_xpath = '//div[@class="query"]' | ||||||
|  | dict_xpath = ['//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]', | ||||||
|  |               '//div[@class="wyniki sjp-wyniki sjp-anchor"]', | ||||||
|  |               '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]'] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def request(query, params): | ||||||
|  |     params['url'] = SEARCH_URL.format(query=query) | ||||||
|  |     logger.debug(f"query_url --> {params['url']}") | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     raise_for_httperror(resp) | ||||||
|  |     dom = fromstring(resp.text) | ||||||
|  |     word = extract_text(dom.xpath(word_xpath)) | ||||||
|  | 
 | ||||||
|  |     definitions = [] | ||||||
|  | 
 | ||||||
|  |     for dict_src in dict_xpath: | ||||||
|  |         for src in dom.xpath(dict_src): | ||||||
|  |             src_text = extract_text(src.xpath('.//span[@class="entry-head-title"]/text()')).strip() | ||||||
|  | 
 | ||||||
|  |             src_defs = [] | ||||||
|  |             for def_item in src.xpath('.//div[contains(@class, "ribbon-element")]'): | ||||||
|  |                 if def_item.xpath('./div[@class="znacz"]'): | ||||||
|  |                     sub_defs = [] | ||||||
|  |                     for def_sub_item in def_item.xpath('./div[@class="znacz"]'): | ||||||
|  |                         def_sub_text = extract_text(def_sub_item).lstrip('0123456789. ') | ||||||
|  |                         sub_defs.append(def_sub_text) | ||||||
|  |                     src_defs.append((word, sub_defs)) | ||||||
|  |                 else: | ||||||
|  |                     def_text = extract_text(def_item).strip() | ||||||
|  |                     def_link = def_item.xpath('./span/a/@href') | ||||||
|  |                     if 'doroszewski' in def_link[0]: | ||||||
|  |                         def_text = f"<a href='{def_link[0]}'>{def_text}</a>" | ||||||
|  |                     src_defs.append((def_text, '')) | ||||||
|  | 
 | ||||||
|  |             definitions.append((src_text, src_defs)) | ||||||
|  | 
 | ||||||
|  |     if not definitions: | ||||||
|  |         return results | ||||||
|  | 
 | ||||||
|  |     infobox = '' | ||||||
|  |     for src in definitions: | ||||||
|  |         infobox += f"<div><small>{src[0]}</small>" | ||||||
|  |         infobox += "<ul>" | ||||||
|  |         for (def_text, sub_def) in src[1]: | ||||||
|  |             infobox += f"<li>{def_text}</li>" | ||||||
|  |             if sub_def: | ||||||
|  |                 infobox += "<ol>" | ||||||
|  |                 for sub_def_text in sub_def: | ||||||
|  |                     infobox += f"<li>{sub_def_text}</li>" | ||||||
|  |                 infobox += "</ol>" | ||||||
|  |         infobox += "</ul></div>" | ||||||
|  | 
 | ||||||
|  |     results.append({ | ||||||
|  |         'infobox': word, | ||||||
|  |         'content': infobox, | ||||||
|  |     }) | ||||||
|  | 
 | ||||||
|  |     return results | ||||||
|  | @ -196,6 +196,11 @@ engines: | ||||||
| #    engine : base | #    engine : base | ||||||
| #    shortcut : bs | #    shortcut : bs | ||||||
| 
 | 
 | ||||||
|  |   - name: bandcamp | ||||||
|  |     engine: bandcamp | ||||||
|  |     shortcut: bc | ||||||
|  |     categories: music | ||||||
|  | 
 | ||||||
|   - name : wikipedia |   - name : wikipedia | ||||||
|     engine : wikipedia |     engine : wikipedia | ||||||
|     shortcut : wp |     shortcut : wp | ||||||
|  | @ -1280,6 +1285,14 @@ engines: | ||||||
|     timeout: 5.0 |     timeout: 5.0 | ||||||
|     disabled: True |     disabled: True | ||||||
| 
 | 
 | ||||||
|  |   - name: słownik języka polskiego | ||||||
|  |     engine: sjp | ||||||
|  |     shortcut: sjp | ||||||
|  |     base_url: https://sjp.pwn.pl/ | ||||||
|  |     categories: general | ||||||
|  |     timeout: 5.0 | ||||||
|  |     disabled: True | ||||||
|  | 
 | ||||||
| # Doku engine lets you access to any Doku wiki instance: | # Doku engine lets you access to any Doku wiki instance: | ||||||
| # A public one or a privete/corporate one. | # A public one or a privete/corporate one. | ||||||
| #  - name : ubuntuwiki | #  - name : ubuntuwiki | ||||||
|  |  | ||||||
							
								
								
									
										
											BIN
										
									
								
								searx/static/themes/oscar/img/icons/bandcamp.png
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								searx/static/themes/oscar/img/icons/bandcamp.png
									
										
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 919 B | 
|  | @ -13,10 +13,10 @@ | ||||||
| </div> | </div> | ||||||
| {%- endif -%} | {%- endif -%} | ||||||
| 
 | 
 | ||||||
| {%- if result.img_src -%} | {%- if result.img_src or result.thumbnail -%} | ||||||
| <div class="container-fluid">{{- "" -}} | <div class="container-fluid">{{- "" -}} | ||||||
|     <div class="row">{{- "" -}} |     <div class="row">{{- "" -}} | ||||||
|         <img src="{{ image_proxify(result.img_src) }}" title="{{ result.title|striptags }}" style="width: auto; max-height: 60px; min-height: 60px;" class="col-xs-2 col-sm-4 col-md-4 result-content"> |         <img src="{{ image_proxify(result.img_src or result.thumbnail) }}" title="{{ result.title|striptags }}" style="width: auto; max-height: 60px; min-height: 60px;" class="col-xs-2 col-sm-4 col-md-4 result-content"> | ||||||
|         {%- if result.content %}<p class="result-content col-xs-8 col-sm-8 col-md-8">{{ result.content|safe }}</p>{% endif -%} |         {%- if result.content %}<p class="result-content col-xs-8 col-sm-8 col-md-8">{{ result.content|safe }}</p>{% endif -%} | ||||||
|     </div>{{- "" -}} |     </div>{{- "" -}} | ||||||
| </div> | </div> | ||||||
|  |  | ||||||
|  | @ -31,7 +31,7 @@ Example to run it from python: | ||||||
| ... engine_cs = list(searx.engines.categories.keys()) | ... engine_cs = list(searx.engines.categories.keys()) | ||||||
| ... # load module | ... # load module | ||||||
| ... spec = importlib.util.spec_from_file_location( | ... spec = importlib.util.spec_from_file_location( | ||||||
| ...     'utils.standalone_searx', 'utils/standalone_searx.py') | ...     'utils.standalone_searx', 'searx_extra/standalone_searx.py') | ||||||
| ... sas = importlib.util.module_from_spec(spec) | ... sas = importlib.util.module_from_spec(spec) | ||||||
| ... spec.loader.exec_module(sas) | ... spec.loader.exec_module(sas) | ||||||
| ... # use function from module | ... # use function from module | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Alexandre Flament
						Alexandre Flament