From 8e876ad9ff7529d2418df87ba76b7351c372b52c Mon Sep 17 00:00:00 2001 From: ta Date: Sat, 18 Jun 2022 07:14:19 +0700 Subject: [PATCH 1/5] add google shopping engine --- searx/engines/google_shopping.py | 59 ++++++++++++++++++++++++++++++++ searx/settings.yml | 4 +++ 2 files changed, 63 insertions(+) create mode 100644 searx/engines/google_shopping.py diff --git a/searx/engines/google_shopping.py b/searx/engines/google_shopping.py new file mode 100644 index 000000000..bd15c78b8 --- /dev/null +++ b/searx/engines/google_shopping.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Google Shopping +""" + +from urllib.parse import urlencode +from lxml import html +from searx.utils import extract_text + +about = { + "website": "https://shopping.google.com", + "wikidata_id": "Q1433417", + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +categories = ["shopping"] +paging = True + +search_url = "https://shopping.google.com/search?{query}&tbm=shop&start={pageno}" + +results_xpath = '//div[@class="op4oU"]/div[@role="listitem" and @style=""]' +title_xpath = './/h2[@class="MPhl6c pqv9ne azTb0d ulfEhd YAEPj XkyFEf"]' +url_xpath = './/a[@class="loT5Qd kneS6c"]/@href' +price_xpath = './/span[@class="aZK3gc Lhpu7d"]' +thumbnail_xpath = './/img[@class="Ws3Esf"]/@src' + + +def request(query, params): + pageno = (params["pageno"] - 1) * 60 + params["url"] = search_url.format(query=urlencode({"q": query}), pageno=pageno) + + return params + + +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + res = dom.xpath(results_xpath) + for result in res: + url = extract_text(result.xpath(url_xpath)) + title = extract_text(result.xpath(title_xpath)) + price = extract_text(result.xpath(price_xpath)) + thumbnail = extract_text(result.xpath(thumbnail_xpath)) + + results.append( + { + "url": url, + "title": title, + "price": price, + "thumbnail": thumbnail, + "template": "products.html", + } + ) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 709e43627..195407333 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -739,6 +739,10 @@ engines: require_api_key: false results: HTML + - name: google shopping + engine: google_shopping + shortcut: gsh + - name: gpodder engine: json_engine shortcut: gpod From 29d82c276f1585cb5f9e7bcbbd91d91498cf77a6 Mon Sep 17 00:00:00 2001 From: ta Date: Sat, 9 Jul 2022 14:22:42 +0700 Subject: [PATCH 2/5] add shipping, delivery, and site info to google shopping engine --- searx/engines/google_shopping.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/searx/engines/google_shopping.py b/searx/engines/google_shopping.py index bd15c78b8..810a5ec1f 100644 --- a/searx/engines/google_shopping.py +++ b/searx/engines/google_shopping.py @@ -25,6 +25,9 @@ title_xpath = './/h2[@class="MPhl6c pqv9ne azTb0d ulfEhd YAEPj XkyFEf"]' url_xpath = './/a[@class="loT5Qd kneS6c"]/@href' price_xpath = './/span[@class="aZK3gc Lhpu7d"]' thumbnail_xpath = './/img[@class="Ws3Esf"]/@src' +shipping_xpath = './/div[@class="KT7Ysc"]' +site_xpath = './/div[@class="X8HN5e FAZYFf ApBhXe"]' +condition_xpath = './/span[@class="JkJxid HFeBod"]' def request(query, params): @@ -45,6 +48,9 @@ def response(resp): title = extract_text(result.xpath(title_xpath)) price = extract_text(result.xpath(price_xpath)) thumbnail = extract_text(result.xpath(thumbnail_xpath)) + shipping = extract_text(result.xpath(shipping_xpath)) + site = extract_text(result.xpath(site_xpath)) + condition = extract_text(result.xpath(condition_xpath)) results.append( { @@ -53,6 +59,9 @@ def response(resp): "price": price, "thumbnail": thumbnail, "template": "products.html", + "shipping": shipping, + "content": condition, + "source_country": site, } ) From 610d1f73ffe5abc08d50525a0472ce3578cb1970 Mon Sep 17 00:00:00 2001 From: ta Date: Sat, 9 Jul 2022 14:23:19 +0700 Subject: [PATCH 3/5] disable google shopping engine by default --- searx/settings.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/searx/settings.yml b/searx/settings.yml index f15fcbeaf..91f76a679 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -759,6 +759,7 @@ engines: - name: google shopping engine: google_shopping shortcut: gsh + disabled: true - name: gpodder engine: json_engine From 692855b4427a67f639a75544f28013408859445e Mon Sep 17 00:00:00 2001 From: ta Date: Sat, 9 Jul 2022 15:17:39 +0700 Subject: [PATCH 4/5] use stricter pylint profile for google shopping --- searx/engines/google_shopping.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/searx/engines/google_shopping.py b/searx/engines/google_shopping.py index 810a5ec1f..d2a91db8e 100644 --- a/searx/engines/google_shopping.py +++ b/searx/engines/google_shopping.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Google Shopping -""" +# lint: pylint +"""Google Shopping""" from urllib.parse import urlencode from lxml import html From 1dc6c86ffee3ff33c0bbb17f1fa3b77a38e19e5f Mon Sep 17 00:00:00 2001 From: ta Date: Sat, 27 Aug 2022 17:16:00 +0700 Subject: [PATCH 5/5] support language selection, detect google sorry, etc --- searx/data/engines_languages.json | 140 ++++++++++++++++++++++++++++++ searx/engines/google_shopping.py | 22 +++++ 2 files changed, 162 insertions(+) diff --git a/searx/data/engines_languages.json b/searx/data/engines_languages.json index bf73c17e7..07151cb4d 100644 --- a/searx/data/engines_languages.json +++ b/searx/data/engines_languages.json @@ -1234,6 +1234,146 @@ "name": "\u4e2d\u6587 (\u7e41\u9ad4)" } }, + "google shopping": { + "af": { + "name": "Afrikaans" + }, + "ar": { + "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" + }, + "be": { + "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" + }, + "bg": { + "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" + }, + "ca": { + "name": "catal\u00e0" + }, + "cs": { + "name": "\u010de\u0161tina" + }, + "da": { + "name": "dansk" + }, + "de": { + "name": "Deutsch" + }, + "el": { + "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" + }, + "en": { + "name": "English" + }, + "eo": { + "name": "esperanto" + }, + "es": { + "name": "espa\u00f1ol" + }, + "et": { + "name": "eesti" + }, + "fa": { + "name": "\u0641\u0627\u0631\u0633\u06cc" + }, + "fi": { + "name": "suomi" + }, + "fr": { + "name": "fran\u00e7ais" + }, + "hi": { + "name": "\u0939\u093f\u0928\u094d\u0926\u0940" + }, + "hr": { + "name": "hrvatski" + }, + "hu": { + "name": "magyar" + }, + "hy": { + "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576" + }, + "id": { + "name": "Indonesia" + }, + "is": { + "name": "\u00edslenska" + }, + "it": { + "name": "italiano" + }, + "iw": { + "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" + }, + "ja": { + "name": "\u65e5\u672c\u8a9e" + }, + "ko": { + "name": "\ud55c\uad6d\uc5b4" + }, + "lt": { + "name": "lietuvi\u0173" + }, + "lv": { + "name": "latvie\u0161u" + }, + "nl": { + "name": "Nederlands" + }, + "no": { + "name": "norsk" + }, + "pl": { + "name": "polski" + }, + "pt": { + "name": "portugu\u00eas" + }, + "ro": { + "name": "rom\u00e2n\u0103" + }, + "ru": { + "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439" + }, + "sk": { + "name": "sloven\u010dina" + }, + "sl": { + "name": "sloven\u0161\u010dina" + }, + "sr": { + "name": "\u0441\u0440\u043f\u0441\u043a\u0438" + }, + "sv": { + "name": "svenska" + }, + "sw": { + "name": "Kiswahili" + }, + "th": { + "name": "\u0e44\u0e17\u0e22" + }, + "tl": { + "name": "Filipino" + }, + "tr": { + "name": "T\u00fcrk\u00e7e" + }, + "uk": { + "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" + }, + "vi": { + "name": "Ti\u1ebfng Vi\u1ec7t" + }, + "zh-CN": { + "name": "\u4e2d\u6587 (\u7b80\u4f53)" + }, + "zh-TW": { + "name": "\u4e2d\u6587 (\u7e41\u9ad4)" + } + }, "google videos": { "af": { "name": "Afrikaans" diff --git a/searx/engines/google_shopping.py b/searx/engines/google_shopping.py index d2a91db8e..9ff4a15c4 100644 --- a/searx/engines/google_shopping.py +++ b/searx/engines/google_shopping.py @@ -6,6 +6,19 @@ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text +from searx.engines.google import ( + get_lang_info, + detect_google_sorry, +) + +# pylint: disable=unused-import +from searx.engines.google import ( + supported_languages_url, + _fetch_supported_languages, +) + +# pylint: enable=unused-import + about = { "website": "https://shopping.google.com", "wikidata_id": "Q1433417", @@ -31,14 +44,23 @@ condition_xpath = './/span[@class="JkJxid HFeBod"]' def request(query, params): pageno = (params["pageno"] - 1) * 60 + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + params["url"] = search_url.format(query=urlencode({"q": query}), pageno=pageno) + params['headers'].update(lang_info['headers']) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' + + params['cookies']['CONSENT'] = "YES+" + return params def response(resp): results = [] + detect_google_sorry(resp) + dom = html.fromstring(resp.text) res = dom.xpath(results_xpath)