Merge branch 'master' into boilerplate

This commit is contained in:
Markus Heiser 2019-12-10 13:10:51 +00:00 committed by GitHub
commit 7beb49b1fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 95 deletions

View File

@ -1,4 +1,4 @@
Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament and Noémi Ványi. Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament, Noémi Ványi, @pofilo and Markus Heiser.
Major contributing authors: Major contributing authors:
@ -9,6 +9,8 @@ Major contributing authors:
- @Cqoicebordel - @Cqoicebordel
- Noémi Ványi - Noémi Ványi
- Marc Abonce Seguin @a01200356 - Marc Abonce Seguin @a01200356
- @pofilo
- Markus Heiser @return42
People who have submitted patches/translates, reported bugs, consulted features or People who have submitted patches/translates, reported bugs, consulted features or
generally made searx better: generally made searx better:

View File

@ -107,13 +107,12 @@ images_path = '/images'
supported_languages_url = 'https://www.google.com/preferences?#languages' supported_languages_url = 'https://www.google.com/preferences?#languages'
# specific xpath variables # specific xpath variables
results_xpath = '//div[@class="g"]' results_xpath = '//div[contains(@class, "ZINbbc")]'
url_xpath = './/h3/a/@href' url_xpath = './/div[@class="kCrYT"][1]/a/@href'
title_xpath = './/h3' title_xpath = './/div[@class="kCrYT"][1]/a/div[1]'
content_xpath = './/span[@class="st"]' content_xpath = './/div[@class="kCrYT"][2]//div[contains(@class, "BNeawe")]//div[contains(@class, "BNeawe")]'
content_misc_xpath = './/div[@class="f slp"]' suggestion_xpath = '//div[contains(@class, "ZINbbc")][last()]//div[@class="rVLSBd"]/a//div[contains(@class, "BNeawe")]'
suggestion_xpath = '//p[@class="_Bmc"]' spelling_suggestion_xpath = '//div[@id="scc"]//a'
spelling_suggestion_xpath = '//a[@class="spell"]'
# map : detail location # map : detail location
map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()' map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()'
@ -199,10 +198,6 @@ def request(query, params):
params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept-Language'] = language + ',' + language + '-' + country
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
# Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse
params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)"
"AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1")
params['google_hostname'] = google_hostname params['google_hostname'] = google_hostname
return params return params
@ -274,9 +269,7 @@ def response(resp):
content = extract_text_from_dom(result, content_xpath) content = extract_text_from_dom(result, content_xpath)
if content is None: if content is None:
continue continue
content_misc = extract_text_from_dom(result, content_misc_xpath)
if content_misc is not None:
content = content_misc + "<br />" + content
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,

View File

@ -58,93 +58,50 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(google.response(response), []) self.assertEqual(google.response(response), [])
html = """ html = """
<div class="g"> <div class="ZINbbc xpd O9g5cc uUPGi">
<h3 class="r"> <div>
<a href="http://this.should.be.the.link/"> <div class="kCrYT">
<b>This</b> is <b>the</b> title <a href="/url?q=http://this.should.be.the.link/">
</a> <div class="BNeawe">
</h3> <b>This</b> is <b>the</b> title
<div class="s">
<div class="kv" style="margin-bottom:2px">
<cite>
<b>test</b>.psychologies.com/
</cite>
<div class="_nBb">
<div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
<span class="_O0">
</span>
</div> </div>
<div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1"> <div class="BNeawe">
<ul> http://website
<li class="_Ykb"> </div>
<a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent </a>
.com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/"> </div>
En cache <div class="kCrYT">
</a> <div>
</li> <div class="BNeawe">
<li class="_Ykb"> <div>
<a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/"> <div class="BNeawe">
Pages similaires This should be the content.
</a> </div>
</li> </div>
</ul>
</div> </div>
</div> </div>
</div> </div>
<span class="st"> </div>
This should be the content. </p>
</span> <div class="ZINbbc xpd O9g5cc uUPGi">
<br> <div>
<div class="osl"> <div class="kCrYT">
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/"> <span>
Test Personnalité <div class="BNeawe">
</a> - Related searches
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/"> </div>
Tests - Moi </span>
</a> - </div>
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple"> <div class="rVLSBd">
Test Couple <a>
</a> <div>
- <div class="BNeawe">
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour"> suggestion title
Test Amour </div>
</div>
</a> </a>
</div> </div>
</div> </div>
</div>
<div class="g">
<h3 class="r">
<a href="http://www.google.com/images?q=toto">
<b>This</b>
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="http://www.google.com/search?q=toto">
<b>This</b> is
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="">
<b>This</b> is <b>the</b>
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="/url?q=url">
<b>This</b> is <b>the</b>
</a>
</h3>
</div>
<p class="_Bmc" style="margin:3px 8px">
<a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved=">
suggestion <b>title</b>
</a>
</p> </p>
""" """
response = self.mock_response(html) response = self.mock_response(html)