Merge pull request #249 from dalf/master

[fix] update yahoo engine according to the web site changes
This commit is contained in:
Adam Tauber 2015-02-20 14:22:25 +01:00
commit 7f7f10bb6f
2 changed files with 65 additions and 67 deletions

View File

@ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
# specific xpath variables # specific xpath variables
results_xpath = '//div[@class="res"]' results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
url_xpath = './/h3/a/@href' url_xpath = './/h3/a/@href'
title_xpath = './/h3/a' title_xpath = './/h3/a'
content_xpath = './/div[@class="abstr"]' content_xpath = './/div[@class="compText aAbs"]'
suggestion_xpath = '//div[@id="satat"]//a' suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
# remove yahoo-specific tracking-url # remove yahoo-specific tracking-url
@ -91,11 +91,12 @@ def response(resp):
'content': content}) 'content': content})
# if no suggestion found, return results # if no suggestion found, return results
if not dom.xpath(suggestion_xpath): suggestions = dom.xpath(suggestion_xpath)
if not suggestions:
return results return results
# parse suggestion # parse suggestion
for suggestion in dom.xpath(suggestion_xpath): for suggestion in suggestions:
# append suggestion # append suggestion
results.append({'suggestion': extract_text(suggestion)}) results.append({'suggestion': extract_text(suggestion)})

View File

@ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase):
self.assertEqual(yahoo.response(response), []) self.assertEqual(yahoo.response(response), [])
html = """ html = """
<div class="res"> <ol class="reg mb-15 searchCenterMiddle">
<div> <li class="first">
<h3> <div class="dd algo fst Sr">
<a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA; <div class="compTitle">
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1"> _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
<b>This</b> is the title /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
</a> target="_blank" data-bid="54e712e13671c">
<b><b>This is the title</b></b></a>
</h3> </h3>
</div> </div>
<span class="url" dir="ltr">www.<b>test</b>.com</span> <div class="compText aAbs">
<div class="abstr"> <p class="lh-18"><b><b>This is the </b>content</b>
<b>This</b> is the content </p>
</div> </div>
</div> </div>
<div id="satat" data-bns="Yahoo" data-bk="124.1"> </li>
<h2>Also Try</h2> <li>
<table> <div class="dd algo lst Sr">
<div class="compTitle">
<h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA;
_ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10
/RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-"
target="_blank" data-bid="54e712e136926">
This is the second <b><b>title</b></b></a>
</h3>
</div>
<div class="compText aAbs">
<p class="lh-18">This is the second content</p>
</div>
</div>
</li>
</ol>
<div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04">
<div class="compTitle mb-4 h-17">
<h3 class="title">Also Try</h3> </div>
<table class="compTable m-0 ac-1st td-u fz-ms">
<tbody> <tbody>
<tr> <tr>
<td> <td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a>
<a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" >
<span>
<b></b>This is <b>the suggestion</b>
</span>
</a>
</td> </td>
</tr> </tr>
</tbody>
</table> </table>
</div> </div>
""" """
response = mock.Mock(text=html) response = mock.Mock(text=html)
results = yahoo.response(response) results = yahoo.response(response)
print results
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 2) self.assertEqual(len(results), 3)
self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], 'https://this.is.the.url/') self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
self.assertEqual(results[0]['content'], 'This is the content') self.assertEqual(results[0]['content'], 'This is the content')
self.assertEqual(results[1]['suggestion'], 'This is the suggestion') self.assertEqual(results[1]['title'], 'This is the second title')
self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/')
self.assertEqual(results[1]['content'], 'This is the second content')
self.assertEqual(results[2]['suggestion'], 'This is the suggestion')
html = """ html = """
<div class="res"> <ol class="reg mb-15 searchCenterMiddle">
<div> <li class="first">
<h3> <div class="dd algo fst Sr">
<a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA; <div class="compTitle">
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1"> _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
<b>This</b> is the title /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
</a> target="_blank" data-bid="54e712e13671c">
<b><b>This is the title</b></b></a>
</h3> </h3>
</div> </div>
<span class="url" dir="ltr">www.<b>test</b>.com</span> <div class="compText aAbs">
<div class="abstr"> <p class="lh-18"><b><b>This is the </b>content</b>
<b>This</b> is the content </p>
</div>
</div>
<div class="res">
<div>
<h3>
<a id="link-1" class="yschttl spt">
<b>This</b> is the title
</a>
</h3>
</div>
<span class="url" dir="ltr">www.<b>test</b>.com</span>
<div class="abstr">
<b>This</b> is the content
</div>
</div>
<div class="res">
<div>
<h3>
</h3>
</div>
<span class="url" dir="ltr">www.<b>test</b>.com</span>
<div class="abstr">
<b>This</b> is the content
</div> </div>
</div> </div>
</li>
</ol>
""" """
response = mock.Mock(text=html) response = mock.Mock(text=html)
results = yahoo.response(response) results = yahoo.response(response)