forked from zaclys/searxng
		
	[fix]soundcloud.com guest client_id fetches dynamically
This commit is contained in:
		
							parent
							
								
									35a2bc5650
								
							
						
					
					
						commit
						dc55d87e92
					
				
					 1 changed files with 29 additions and 3 deletions
				
			
		|  | @ -10,17 +10,19 @@ | ||||||
|  @parse       url, title, content, publishedDate, embedded |  @parse       url, title, content, publishedDate, embedded | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | import re | ||||||
|  | from StringIO import StringIO | ||||||
| from json import loads | from json import loads | ||||||
|  | from lxml import etree | ||||||
| from urllib import urlencode, quote_plus | from urllib import urlencode, quote_plus | ||||||
| from dateutil import parser | from dateutil import parser | ||||||
|  | from searx import logger | ||||||
|  | from searx.poolrequests import get as http_get | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['music'] | categories = ['music'] | ||||||
| paging = True | paging = True | ||||||
| 
 | 
 | ||||||
| # api-key |  | ||||||
| guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28' |  | ||||||
| 
 |  | ||||||
| # search-url | # search-url | ||||||
| url = 'https://api.soundcloud.com/' | url = 'https://api.soundcloud.com/' | ||||||
| search_url = url + 'search?{query}'\ | search_url = url + 'search?{query}'\ | ||||||
|  | @ -35,6 +37,30 @@ embedded_url = '<iframe width="100%" height="166" ' +\ | ||||||
|     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' |     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def get_client_id(): | ||||||
|  |     response = http_get("https://soundcloud.com") | ||||||
|  |     rx_namespace = {"re": "http://exslt.org/regular-expressions"} | ||||||
|  | 
 | ||||||
|  |     if response.ok: | ||||||
|  |         tree = etree.parse(StringIO(response.content), etree.HTMLParser()) | ||||||
|  |         script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) | ||||||
|  |         app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] | ||||||
|  | 
 | ||||||
|  |         # extracts valid app_js urls from soundcloud.com content | ||||||
|  |         for app_js_url in app_js_urls: | ||||||
|  |             # gets app_js and searches for the clientid | ||||||
|  |             response = http_get(app_js_url) | ||||||
|  |             if response.ok: | ||||||
|  |                 cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) | ||||||
|  |                 if cids is not None and len(cids.groups()): | ||||||
|  |                     return cids.groups()[0] | ||||||
|  |     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") | ||||||
|  |     return "" | ||||||
|  | 
 | ||||||
|  | # api-key | ||||||
|  | guest_client_id = get_client_id() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     offset = (params['pageno'] - 1) * 20 |     offset = (params['pageno'] - 1) * 20 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 misnyo
						misnyo