Merge pull request #900 from return42/fix-883

[fix] bandcamp: fix itemtype (album|track) and exceptions
This commit is contained in:
Alexandre Flament 2022-02-19 13:42:53 +01:00 committed by GitHub
commit ace5401632
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 47 additions and 21 deletions

View File

@ -1,16 +1,23 @@
""" # SPDX-License-Identifier: AGPL-3.0-or-later
Bandcamp (Music) # lint: pylint
"""Bandcamp (Music)
@website https://bandcamp.com/ @website https://bandcamp.com/
@provide-api no @provide-api no
@results HTML @results HTML
@parse url, title, content, publishedDate, iframe_src, thumbnail @parse url, title, content, publishedDate, iframe_src, thumbnail
""" """
from urllib.parse import urlencode, urlparse, parse_qs from urllib.parse import urlencode, urlparse, parse_qs
from dateutil.parser import parse as dateparse from dateutil.parser import parse as dateparse
from lxml import html from lxml import html
from searx.utils import extract_text
from searx.utils import (
eval_xpath_getindex,
eval_xpath_list,
extract_text,
)
# about # about
about = { about = {
@ -26,12 +33,13 @@ categories = ['music']
paging = True paging = True
base_url = "https://bandcamp.com/" base_url = "https://bandcamp.com/"
search_string = search_string = 'search?{query}&page={page}' search_string = 'search?{query}&page={page}'
iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=ffffff/linkcol=0687f5/tracklist=false/artwork=small/transparent=true/" iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small"
def request(query, params): def request(query, params):
'''pre-request callback '''pre-request callback
params<dict>: params<dict>:
method : POST/GET method : POST/GET
headers : {} headers : {}
@ -42,37 +50,45 @@ def request(query, params):
''' '''
search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno'])
params['url'] = base_url + search_path params['url'] = base_url + search_path
return params return params
def response(resp): def response(resp):
'''post-response callback '''post-response callback
resp: requests response object resp: requests response object
''' '''
results = [] results = []
tree = html.fromstring(resp.text) dom = html.fromstring(resp.text)
search_results = tree.xpath('//li[contains(@class, "searchresult")]')
for result in search_results: for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'):
link = result.xpath('.//div[@class="itemurl"]/a')[0]
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0] link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None)
if link is None:
continue
title = result.xpath('.//div[@class="heading"]/a/text()') title = result.xpath('.//div[@class="heading"]/a/text()')
date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", ""))
content = result.xpath('.//div[@class="subhead"]/text()') content = result.xpath('.//div[@class="subhead"]/text()')
new_result = { new_result = {
"url": extract_text(link), "url": extract_text(link),
"title": extract_text(title), "title": extract_text(title),
"content": extract_text(content), "content": extract_text(content),
"publishedDate": date,
} }
date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None)
if date:
new_result["publishedDate"] = dateparse(date.replace("released ", ""))
thumbnail = result.xpath('.//div[@class="art"]/img/@src') thumbnail = result.xpath('.//div[@class="art"]/img/@src')
if thumbnail: if thumbnail:
new_result['thumbnail'] = thumbnail[0] new_result['thumbnail'] = thumbnail[0]
if "album" in result.classes:
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower()
if "album" == itemtype:
new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id) new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id)
elif "track" in result.classes: elif "track" == itemtype:
new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id) new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id)
results.append(new_result) results.append(new_result)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -17,3 +17,13 @@ iframe[src^="https://www.mixcloud.com"] {
// 200px, somtimes 250px. // 200px, somtimes 250px.
height: 250px; height: 250px;
} }
iframe[src^="https://bandcamp.com/EmbeddedPlayer"] {
// show playlist
height: 350px;
}
iframe[src^="https://bandcamp.com/EmbeddedPlayer/track"] {
// hide playlist
height: 120px;
}