mirror of
https://github.com/searxng/searxng
synced 2024-01-01 18:24:07 +00:00
[fix] update yahoo engine according to the web site changes
This commit is contained in:
parent
dc036ece85
commit
57996b12fc
@ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/'
|
|||||||
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
results_xpath = '//div[@class="res"]'
|
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
|
||||||
url_xpath = './/h3/a/@href'
|
url_xpath = './/h3/a/@href'
|
||||||
title_xpath = './/h3/a'
|
title_xpath = './/h3/a'
|
||||||
content_xpath = './/div[@class="abstr"]'
|
content_xpath = './/div[@class="compText aAbs"]'
|
||||||
suggestion_xpath = '//div[@id="satat"]//a'
|
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
|
||||||
|
|
||||||
|
|
||||||
# remove yahoo-specific tracking-url
|
# remove yahoo-specific tracking-url
|
||||||
@ -91,11 +91,12 @@ def response(resp):
|
|||||||
'content': content})
|
'content': content})
|
||||||
|
|
||||||
# if no suggestion found, return results
|
# if no suggestion found, return results
|
||||||
if not dom.xpath(suggestion_xpath):
|
suggestions = dom.xpath(suggestion_xpath)
|
||||||
|
if not suggestions:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
# parse suggestion
|
# parse suggestion
|
||||||
for suggestion in dom.xpath(suggestion_xpath):
|
for suggestion in suggestions:
|
||||||
# append suggestion
|
# append suggestion
|
||||||
results.append({'suggestion': extract_text(suggestion)})
|
results.append({'suggestion': extract_text(suggestion)})
|
||||||
|
|
||||||
|
@ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase):
|
|||||||
self.assertEqual(yahoo.response(response), [])
|
self.assertEqual(yahoo.response(response), [])
|
||||||
|
|
||||||
html = """
|
html = """
|
||||||
<div class="res">
|
<ol class="reg mb-15 searchCenterMiddle">
|
||||||
<div>
|
<li class="first">
|
||||||
<h3>
|
<div class="dd algo fst Sr">
|
||||||
<a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
|
<div class="compTitle">
|
||||||
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
|
<h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
|
||||||
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
|
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
|
||||||
<b>This</b> is the title
|
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
|
||||||
</a>
|
target="_blank" data-bid="54e712e13671c">
|
||||||
|
<b><b>This is the title</b></b></a>
|
||||||
</h3>
|
</h3>
|
||||||
</div>
|
</div>
|
||||||
<span class="url" dir="ltr">www.<b>test</b>.com</span>
|
<div class="compText aAbs">
|
||||||
<div class="abstr">
|
<p class="lh-18"><b><b>This is the </b>content</b>
|
||||||
<b>This</b> is the content
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div id="satat" data-bns="Yahoo" data-bk="124.1">
|
</li>
|
||||||
<h2>Also Try</h2>
|
<li>
|
||||||
<table>
|
<div class="dd algo lst Sr">
|
||||||
|
<div class="compTitle">
|
||||||
|
<h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA;
|
||||||
|
_ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10
|
||||||
|
/RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-"
|
||||||
|
target="_blank" data-bid="54e712e136926">
|
||||||
|
This is the second <b><b>title</b></b></a>
|
||||||
|
</h3>
|
||||||
|
</div>
|
||||||
|
<div class="compText aAbs">
|
||||||
|
<p class="lh-18">This is the second content</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
|
<div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04">
|
||||||
|
<div class="compTitle mb-4 h-17">
|
||||||
|
<h3 class="title">Also Try</h3> </div>
|
||||||
|
<table class="compTable m-0 ac-1st td-u fz-ms">
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a>
|
||||||
<a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" >
|
|
||||||
<span>
|
|
||||||
<b></b>This is <b>the suggestion</b>
|
|
||||||
</span>
|
|
||||||
</a>
|
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
response = mock.Mock(text=html)
|
response = mock.Mock(text=html)
|
||||||
results = yahoo.response(response)
|
results = yahoo.response(response)
|
||||||
|
print results
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 2)
|
self.assertEqual(len(results), 3)
|
||||||
self.assertEqual(results[0]['title'], 'This is the title')
|
self.assertEqual(results[0]['title'], 'This is the title')
|
||||||
self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
|
self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
|
||||||
self.assertEqual(results[0]['content'], 'This is the content')
|
self.assertEqual(results[0]['content'], 'This is the content')
|
||||||
self.assertEqual(results[1]['suggestion'], 'This is the suggestion')
|
self.assertEqual(results[1]['title'], 'This is the second title')
|
||||||
|
self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/')
|
||||||
|
self.assertEqual(results[1]['content'], 'This is the second content')
|
||||||
|
self.assertEqual(results[2]['suggestion'], 'This is the suggestion')
|
||||||
|
|
||||||
html = """
|
html = """
|
||||||
<div class="res">
|
<ol class="reg mb-15 searchCenterMiddle">
|
||||||
<div>
|
<li class="first">
|
||||||
<h3>
|
<div class="dd algo fst Sr">
|
||||||
<a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
|
<div class="compTitle">
|
||||||
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
|
<h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
|
||||||
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
|
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
|
||||||
<b>This</b> is the title
|
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
|
||||||
</a>
|
target="_blank" data-bid="54e712e13671c">
|
||||||
|
<b><b>This is the title</b></b></a>
|
||||||
</h3>
|
</h3>
|
||||||
</div>
|
</div>
|
||||||
<span class="url" dir="ltr">www.<b>test</b>.com</span>
|
<div class="compText aAbs">
|
||||||
<div class="abstr">
|
<p class="lh-18"><b><b>This is the </b>content</b>
|
||||||
<b>This</b> is the content
|
</p>
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="res">
|
|
||||||
<div>
|
|
||||||
<h3>
|
|
||||||
<a id="link-1" class="yschttl spt">
|
|
||||||
<b>This</b> is the title
|
|
||||||
</a>
|
|
||||||
</h3>
|
|
||||||
</div>
|
|
||||||
<span class="url" dir="ltr">www.<b>test</b>.com</span>
|
|
||||||
<div class="abstr">
|
|
||||||
<b>This</b> is the content
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="res">
|
|
||||||
<div>
|
|
||||||
<h3>
|
|
||||||
</h3>
|
|
||||||
</div>
|
|
||||||
<span class="url" dir="ltr">www.<b>test</b>.com</span>
|
|
||||||
<div class="abstr">
|
|
||||||
<b>This</b> is the content
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
"""
|
"""
|
||||||
response = mock.Mock(text=html)
|
response = mock.Mock(text=html)
|
||||||
results = yahoo.response(response)
|
results = yahoo.response(response)
|
||||||
|
Loading…
Reference in New Issue
Block a user