mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge pull request #57 from pointhi/results
improving publishDate extraction and output of it
This commit is contained in:
		
						commit
						018a14431b
					
				
					 9 changed files with 26 additions and 15 deletions
				
			
		|  | @ -3,3 +3,4 @@ flask-babel | |||
| grequests | ||||
| lxml | ||||
| pyyaml | ||||
| python-dateutil | ||||
|  |  | |||
|  | @ -2,6 +2,7 @@ | |||
| 
 | ||||
| from urllib import urlencode | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| from datetime import datetime | ||||
| 
 | ||||
| categories = ['news'] | ||||
|  | @ -32,16 +33,9 @@ def response(resp): | |||
|         return [] | ||||
| 
 | ||||
|     for result in search_res['responseData']['results']: | ||||
| # S.149 (159), library.pdf | ||||
| # datetime.strptime("Mon, 10 Mar 2014 16:26:15 -0700", | ||||
| #                   "%a, %d %b %Y %H:%M:%S %z") | ||||
| #        publishedDate = parse(result['publishedDate']) | ||||
|         publishedDate = datetime.strptime( | ||||
|             str.join(' ', result['publishedDate'].split(None)[0:5]), | ||||
|             "%a, %d %b %Y %H:%M:%S") | ||||
|         #utc_offset = timedelta(result['publishedDate'].split(None)[5]) | ||||
|         # local = utc + offset | ||||
|         #publishedDate = publishedDate + utc_offset | ||||
| 
 | ||||
| # Mon, 10 Mar 2014 16:26:15 -0700 | ||||
|         publishedDate = parser.parse(result['publishedDate']) | ||||
| 
 | ||||
|         results.append({'url': result['unescapedUrl'], | ||||
|                         'title': result['titleNoFormatting'], | ||||
|  |  | |||
|  | @ -2,6 +2,8 @@ from urllib import urlencode | |||
| from HTMLParser import HTMLParser | ||||
| from lxml import html | ||||
| from xpath import extract_text | ||||
| from datetime import datetime | ||||
| from dateutil import parser | ||||
| 
 | ||||
| base_url = 'http://vimeo.com' | ||||
| search_url = base_url + '/search?{query}' | ||||
|  | @ -10,6 +12,7 @@ content_xpath = None | |||
| title_xpath = None | ||||
| results_xpath = '' | ||||
| content_tpl = '<a href="{0}">  <img src="{2}"/> </a>' | ||||
| publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' | ||||
| 
 | ||||
| # the cookie set by vimeo contains all the following values, | ||||
| # but only __utma seems to be requiered | ||||
|  | @ -40,9 +43,12 @@ def response(resp): | |||
|         url = base_url + result.xpath(url_xpath)[0] | ||||
|         title = p.unescape(extract_text(result.xpath(title_xpath))) | ||||
|         thumbnail = extract_text(result.xpath(content_xpath)[0]) | ||||
|         publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0])) | ||||
| 
 | ||||
|         results.append({'url': url, | ||||
|                         'title': title, | ||||
|                         'content': content_tpl.format(url, title, thumbnail), | ||||
|                         'template': 'videos.html', | ||||
|                         'publishedDate': publishedDate, | ||||
|                         'thumbnail': thumbnail}) | ||||
|     return results | ||||
|  |  | |||
|  | @ -6,6 +6,7 @@ from searx.engines.xpath import extract_text, extract_url | |||
| from searx.engines.yahoo import parse_url | ||||
| from datetime import datetime, timedelta | ||||
| import re | ||||
| from dateutil import parser | ||||
| 
 | ||||
| categories = ['news'] | ||||
| search_url = 'http://news.search.yahoo.com/search?{query}&b={offset}' | ||||
|  | @ -52,9 +53,7 @@ def response(resp): | |||
|                     - timedelta(hours=int(timeNumbers[0]))\ | ||||
|                     - timedelta(minutes=int(timeNumbers[1])) | ||||
|             else: | ||||
|                 # TODO year in string possible? | ||||
|                 publishedDate = datetime.strptime(publishedDate, | ||||
|                                                   "%b %d %H:%M%p") | ||||
|                 publishedDate = parser.parse(publishedDate) | ||||
| 
 | ||||
|         if publishedDate.year == 1900: | ||||
|             publishedDate = publishedDate.replace(year=datetime.now().year) | ||||
|  |  | |||
|  | @ -1,5 +1,7 @@ | |||
| from json import loads | ||||
| from urllib import urlencode | ||||
| from dateutil import parser | ||||
| from datetime import datetime | ||||
| 
 | ||||
| categories = ['videos'] | ||||
| 
 | ||||
|  | @ -35,6 +37,10 @@ def response(resp): | |||
|         content = '' | ||||
|         thumbnail = '' | ||||
| 
 | ||||
| #"2013-12-31T15:22:51.000Z" | ||||
|         pubdate = result['published']['$t'] | ||||
|         publishedDate = parser.parse(pubdate) | ||||
| 
 | ||||
|         if result['media$group']['media$thumbnail']: | ||||
|             thumbnail = result['media$group']['media$thumbnail'][0]['url'] | ||||
|             content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail)  # noqa | ||||
|  | @ -48,6 +54,7 @@ def response(resp): | |||
|                         'title': title, | ||||
|                         'content': content, | ||||
|                         'template': 'videos.html', | ||||
|                         'publishedDate': publishedDate, | ||||
|                         'thumbnail': thumbnail}) | ||||
| 
 | ||||
|     return results | ||||
|  |  | |||
|  | @ -16,6 +16,7 @@ | |||
|       <title>{{ r.title }}</title> | ||||
|       <link>{{ r.url }}</link> | ||||
|       <description>{{ r.content }}</description> | ||||
|       {% if r.pubdate %}<pubDate>{{ r.pubdate }}</pubDate>{% endif %} | ||||
|     </item> | ||||
|     {% endfor %} | ||||
|   </channel> | ||||
|  |  | |||
|  | @ -5,6 +5,7 @@ | |||
| 
 | ||||
|     <p> | ||||
|       <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> | ||||
|       {% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %} | ||||
|       <a href="{{ result.url }}"><img width="400px" src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a> | ||||
|       <p class="url">{{ result.url }}</p> | ||||
|     </p> | ||||
|  |  | |||
|  | @ -159,8 +159,8 @@ def index(): | |||
| 
 | ||||
|         # TODO, check if timezone is calculated right | ||||
|         if 'publishedDate' in result: | ||||
|             if result['publishedDate'] >= datetime.now() - timedelta(days=1): | ||||
|                 timedifference = datetime.now() - result['publishedDate'] | ||||
|             if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): | ||||
|                 timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) | ||||
|                 minutes = int((timedifference.seconds / 60) % 60) | ||||
|                 hours = int(timedifference.seconds / 60 / 60) | ||||
|                 if hours == 0: | ||||
|  | @ -168,6 +168,7 @@ def index(): | |||
|                 else: | ||||
|                     result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa | ||||
|             else: | ||||
|                 result['pubdate'] = result['publishedDate'].strftime('%a, %d %b %Y %H:%M:%S %z') | ||||
|                 result['publishedDate'] = format_date(result['publishedDate']) | ||||
| 
 | ||||
|     if search.request_data.get('format') == 'json': | ||||
|  |  | |||
							
								
								
									
										1
									
								
								setup.py
									
										
									
									
									
								
							
							
						
						
									
										1
									
								
								setup.py
									
										
									
									
									
								
							|  | @ -35,6 +35,7 @@ setup( | |||
|         'lxml', | ||||
|         'pyyaml', | ||||
|         'setuptools', | ||||
|         'python-dateutil', | ||||
|     ], | ||||
|     extras_require={ | ||||
|         'test': [ | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber