Merge pull request #1648 from unixfox/google_mobile_ui_parameter

output format protobuf to HTML for google mobile
This commit is contained in:
Markus Heiser 2022-08-10 20:50:45 +02:00 committed by GitHub
commit 43c545910b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 5 additions and 12 deletions

View File

@ -112,21 +112,14 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
# specific xpath variables # specific xpath variables
# ------------------------ # ------------------------
# google results are grouped into <div class="jtfYYd ..." ../> results_xpath = '//div[contains(@class, "MjjYud")]'
results_xpath = '//div[contains(@class, "jtfYYd")]' title_xpath = './/h3[1]'
href_xpath = './/a/@href'
content_xpath = './/div[@data-content-feature=1]'
# google *sections* are no usual *results*, we ignore them # google *sections* are no usual *results*, we ignore them
g_section_with_header = './g-section-with-header' g_section_with_header = './g-section-with-header'
# the title is a h3 tag relative to the result group
title_xpath = './/h3[1]'
# in the result group there is <div class="yuRUbf" ../> it's first child is a <a
# href=...>
href_xpath = './/div[@class="yuRUbf"]//a/@href'
# in the result group there is <div class="VwiC3b ..." ../> containing the *content*
content_xpath = './/div[contains(@class, "VwiC3b")]'
# Suggestions are links placed in a *card-section*, we extract only the text # Suggestions are links placed in a *card-section*, we extract only the text
# from the links not the links itself. # from the links not the links itself.
@ -261,7 +254,7 @@ def request(query, params):
if use_mobile_ui: if use_mobile_ui:
additional_parameters = { additional_parameters = {
'asearch': 'arc', 'asearch': 'arc',
'async': 'use_ac:true,_fmt:pc', 'async': 'use_ac:true,_fmt:html',
} }
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium