forked from zaclys/searxng
		
	[fix] bandcamp: fix itemtype (album|track) and exceptions
BTW: polish implementation and show tracklist for albums Closes: https://github.com/searxng/searxng/issues/883 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									bf2a2ed48f
								
							
						
					
					
						commit
						05c105b837
					
				
					 2 changed files with 42 additions and 16 deletions
				
			
		|  | @ -1,16 +1,23 @@ | |||
| """ | ||||
| Bandcamp (Music) | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| # lint: pylint | ||||
| """Bandcamp (Music) | ||||
| 
 | ||||
| @website     https://bandcamp.com/ | ||||
| @provide-api no | ||||
| @results     HTML | ||||
| @parse       url, title, content, publishedDate, iframe_src, thumbnail | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urlparse, parse_qs | ||||
| from dateutil.parser import parse as dateparse | ||||
| from lxml import html | ||||
| from searx.utils import extract_text | ||||
| 
 | ||||
| from searx.utils import ( | ||||
|     eval_xpath_getindex, | ||||
|     eval_xpath_list, | ||||
|     extract_text, | ||||
| ) | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|  | @ -26,12 +33,13 @@ categories = ['music'] | |||
| paging = True | ||||
| 
 | ||||
| base_url = "https://bandcamp.com/" | ||||
| search_string = search_string = 'search?{query}&page={page}' | ||||
| iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=ffffff/linkcol=0687f5/tracklist=false/artwork=small/transparent=true/" | ||||
| search_string = 'search?{query}&page={page}' | ||||
| iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small" | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     '''pre-request callback | ||||
| 
 | ||||
|     params<dict>: | ||||
|       method  : POST/GET | ||||
|       headers : {} | ||||
|  | @ -42,37 +50,45 @@ def request(query, params): | |||
|     ''' | ||||
| 
 | ||||
|     search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) | ||||
| 
 | ||||
|     params['url'] = base_url + search_path | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     '''post-response callback | ||||
| 
 | ||||
|     resp: requests response object | ||||
|     ''' | ||||
|     results = [] | ||||
|     tree = html.fromstring(resp.text) | ||||
|     search_results = tree.xpath('//li[contains(@class, "searchresult")]') | ||||
|     for result in search_results: | ||||
|         link = result.xpath('.//div[@class="itemurl"]/a')[0] | ||||
|         result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'): | ||||
| 
 | ||||
|         link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None) | ||||
|         if link is None: | ||||
|             continue | ||||
| 
 | ||||
|         title = result.xpath('.//div[@class="heading"]/a/text()') | ||||
|         date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", "")) | ||||
|         content = result.xpath('.//div[@class="subhead"]/text()') | ||||
|         new_result = { | ||||
|             "url": extract_text(link), | ||||
|             "title": extract_text(title), | ||||
|             "content": extract_text(content), | ||||
|             "publishedDate": date, | ||||
|         } | ||||
| 
 | ||||
|         date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None) | ||||
|         if date: | ||||
|             new_result["publishedDate"] = dateparse(date.replace("released ", "")) | ||||
| 
 | ||||
|         thumbnail = result.xpath('.//div[@class="art"]/img/@src') | ||||
|         if thumbnail: | ||||
|             new_result['thumbnail'] = thumbnail[0] | ||||
|         if "album" in result.classes: | ||||
| 
 | ||||
|         result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] | ||||
|         itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower() | ||||
|         if "album" == itemtype: | ||||
|             new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id) | ||||
|         elif "track" in result.classes: | ||||
|         elif "track" == itemtype: | ||||
|             new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id) | ||||
| 
 | ||||
|         results.append(new_result) | ||||
|  |  | |||
|  | @ -17,3 +17,13 @@ iframe[src^="https://www.mixcloud.com"] { | |||
|   // 200px, somtimes 250px. | ||||
|   height: 250px; | ||||
| } | ||||
| 
 | ||||
| iframe[src^="https://bandcamp.com/EmbeddedPlayer"] { | ||||
|   // show playlist | ||||
|   height: 350px; | ||||
| } | ||||
| 
 | ||||
| iframe[src^="https://bandcamp.com/EmbeddedPlayer/track"] { | ||||
|   // hide playlist | ||||
|   height: 120px; | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser