From 87baa74a863ac74ae4c86bbfcb04148ba7f70696 Mon Sep 17 00:00:00 2001 From: Venca24 Date: Thu, 25 Jul 2019 07:46:41 +0200 Subject: [PATCH] [fix] fixes google play engines and adds thumbnails to their results (#1612) fix google play apps, google play apps, google play music engines xpath engine: thumbnail_xpath can define an optional thumbnail --- searx/engines/xpath.py | 18 ++++++++++++++++-- searx/settings.yml | 36 +++++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 50f98d935..a5f30d86d 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -7,6 +7,7 @@ search_url = None url_xpath = None content_xpath = None title_xpath = None +thumbnail_xpath = False paging = False suggestion_xpath = '' results_xpath = '' @@ -40,7 +41,9 @@ def extract_text(xpath_results): return ''.join(xpath_results) else: # it's a element - text = html.tostring(xpath_results, encoding='unicode', method='text', with_tail=False) + text = html.tostring( + xpath_results, encoding='unicode', method='text', with_tail=False + ) text = text.strip().replace('\n', ' ') return ' '.join(text.split()) @@ -105,7 +108,18 @@ def response(resp): url = extract_url(result.xpath(url_xpath), search_url) title = extract_text(result.xpath(title_xpath)) content = extract_text(result.xpath(content_xpath)) - results.append({'url': url, 'title': title, 'content': content}) + tmp_result = {'url': url, 'title': title, 'content': content} + + # add thumbnail if available + thumbnail = None + if thumbnail_xpath: + thumbnail = extract_url( + result.xpath(thumbnail_xpath), search_url + ) + if thumbnail: + tmp_result['img_src'] = thumbnail + + results.append(tmp_result) else: for url, title, content in zip( (extract_url(x, search_url) for diff --git a/searx/settings.yml b/searx/settings.yml index 10a049872..53dfaae2c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -311,31 +311,37 @@ engines: shortcut : gos - name : google play apps - engine : xpath - search_url : https://play.google.com/store/search?q={query}&c=apps - url_xpath : //a[@class="title"]/@href - title_xpath : //a[@class="title"] - content_xpath : //a[@class="subtitle"] + engine : xpath + search_url : https://play.google.com/store/search?q={query}&c=apps + results_xpath : '//div[@class="WHE7ib mpg5gc"]' + title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a' + url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href' + content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]' + thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src' categories : files shortcut : gpa disabled : True - name : google play movies - engine : xpath - search_url : https://play.google.com/store/search?q={query}&c=movies - url_xpath : //a[@class="title"]/@href - title_xpath : //a[@class="title"]/@title - content_xpath : //a[contains(@class, "subtitle")] + engine : xpath + search_url : https://play.google.com/store/search?q={query}&c=movies + results_xpath : '//div[@class="WHE7ib mpg5gc"]' + title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a' + url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href' + content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]' + thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src' categories : videos shortcut : gpm disabled : True - name : google play music - engine : xpath - search_url : https://play.google.com/store/search?q={query}&c=music - url_xpath : //a[@class="title"]/@href - title_xpath : //a[@class="title"] - content_xpath : //a[@class="subtitle"] + engine : xpath + search_url : https://play.google.com/store/search?q={query}&c=music + results_xpath : '//div[@class="WHE7ib mpg5gc"]' + title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a' + url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href' + content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]' + thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src' categories : music shortcut : gps disabled : True