mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge pull request #50 from pointhi/results
Showing publish Date of articles in search results
This commit is contained in:
		
						commit
						1467a2e0fc
					
				
					 8 changed files with 62 additions and 2 deletions
				
			
		|  | @ -2,6 +2,7 @@ | ||||||
| 
 | 
 | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from json import loads | from json import loads | ||||||
|  | from datetime import datetime, timedelta | ||||||
| 
 | 
 | ||||||
| categories = ['news'] | categories = ['news'] | ||||||
| 
 | 
 | ||||||
|  | @ -31,7 +32,15 @@ def response(resp): | ||||||
|         return [] |         return [] | ||||||
| 
 | 
 | ||||||
|     for result in search_res['responseData']['results']: |     for result in search_res['responseData']['results']: | ||||||
|  | # S.149 (159), library.pdf | ||||||
|  | # datetime.strptime("Mon, 10 Mar 2014 16:26:15 -0700", "%a, %d %b %Y %H:%M:%S %z") | ||||||
|  | #        publishedDate = parse(result['publishedDate']) | ||||||
|  |         publishedDate = datetime.strptime(str.join(' ',result['publishedDate'].split(None)[0:5]), "%a, %d %b %Y %H:%M:%S") | ||||||
|  |         #utc_offset = timedelta(result['publishedDate'].split(None)[5])  # local = utc + offset | ||||||
|  |         #publishedDate = publishedDate + utc_offset | ||||||
|  | 
 | ||||||
|         results.append({'url': result['unescapedUrl'], |         results.append({'url': result['unescapedUrl'], | ||||||
|                         'title': result['titleNoFormatting'], |                         'title': result['titleNoFormatting'], | ||||||
|  | 						'publishedDate': publishedDate, | ||||||
|                         'content': result['content']}) |                         'content': result['content']}) | ||||||
|     return results |     return results | ||||||
|  |  | ||||||
|  | @ -4,6 +4,8 @@ from urllib import urlencode | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx.engines.xpath import extract_text, extract_url | from searx.engines.xpath import extract_text, extract_url | ||||||
| from searx.engines.yahoo import parse_url | from searx.engines.yahoo import parse_url | ||||||
|  | from datetime import datetime, timedelta | ||||||
|  | import re | ||||||
| 
 | 
 | ||||||
| categories = ['news'] | categories = ['news'] | ||||||
| search_url = 'http://news.search.yahoo.com/search?{query}&b={offset}' | search_url = 'http://news.search.yahoo.com/search?{query}&b={offset}' | ||||||
|  | @ -11,6 +13,7 @@ results_xpath = '//div[@class="res"]' | ||||||
| url_xpath = './/h3/a/@href' | url_xpath = './/h3/a/@href' | ||||||
| title_xpath = './/h3/a' | title_xpath = './/h3/a' | ||||||
| content_xpath = './/div[@class="abstr"]' | content_xpath = './/div[@class="abstr"]' | ||||||
|  | publishedDate_xpath = './/span[@class="timestamp"]' | ||||||
| suggestion_xpath = '//div[@id="satat"]//a' | suggestion_xpath = '//div[@id="satat"]//a' | ||||||
| 
 | 
 | ||||||
| paging = True | paging = True | ||||||
|  | @ -37,7 +40,22 @@ def response(resp): | ||||||
|         url = parse_url(extract_url(result.xpath(url_xpath), search_url)) |         url = parse_url(extract_url(result.xpath(url_xpath), search_url)) | ||||||
|         title = extract_text(result.xpath(title_xpath)[0]) |         title = extract_text(result.xpath(title_xpath)[0]) | ||||||
|         content = extract_text(result.xpath(content_xpath)[0]) |         content = extract_text(result.xpath(content_xpath)[0]) | ||||||
|         results.append({'url': url, 'title': title, 'content': content}) |         publishedDate = extract_text(result.xpath(publishedDate_xpath)[0]) | ||||||
|  | 
 | ||||||
|  |         if re.match("^[0-9]+ minute(s|) ago$", publishedDate): | ||||||
|  |             publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) | ||||||
|  |         else: | ||||||
|  |             if re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): | ||||||
|  |                 timeNumbers = re.findall(r'\d+', publishedDate) | ||||||
|  |                 publishedDate = datetime.now() - timedelta(hours=int(timeNumbers[0])) - timedelta(minutes=int(timeNumbers[1])) | ||||||
|  |             else: | ||||||
|  |                 # TODO year in string possible? | ||||||
|  |                 publishedDate = datetime.strptime(publishedDate,"%b %d %H:%M%p") | ||||||
|  | 
 | ||||||
|  |         if publishedDate.year == 1900: | ||||||
|  |             publishedDate = publishedDate.replace(year=datetime.now().year) | ||||||
|  | 
 | ||||||
|  |         results.append({'url': url, 'title': title, 'content': content,'publishedDate':publishedDate}) | ||||||
| 
 | 
 | ||||||
|     if not suggestion_xpath: |     if not suggestion_xpath: | ||||||
|         return results |         return results | ||||||
|  |  | ||||||
|  | @ -35,6 +35,7 @@ a{text-decoration:none;color:#1a11be}a:visited{color:#8e44ad} | ||||||
| .result h3{font-size:1em;word-wrap:break-word;margin:5px 0 1px 0;padding:0} | .result h3{font-size:1em;word-wrap:break-word;margin:5px 0 1px 0;padding:0} | ||||||
| .result .content{font-size:.8em;margin:0;padding:0;max-width:54em;word-wrap:break-word;line-height:1.24} | .result .content{font-size:.8em;margin:0;padding:0;max-width:54em;word-wrap:break-word;line-height:1.24} | ||||||
| .result .url{font-size:.8em;margin:3px 0 0 0;padding:0;max-width:54em;word-wrap:break-word;color:#c0392b} | .result .url{font-size:.8em;margin:3px 0 0 0;padding:0;max-width:54em;word-wrap:break-word;color:#c0392b} | ||||||
|  | .result .published_date{font-size:.8em;color:#888;margin:5px 20px} | ||||||
| .engines{color:#888} | .engines{color:#888} | ||||||
| .small_font{font-size:.8em} | .small_font{font-size:.8em} | ||||||
| .small p{margin:2px 0} | .small p{margin:2px 0} | ||||||
|  |  | ||||||
|  | @ -64,6 +64,9 @@ | ||||||
| // Url to result | // Url to result | ||||||
| @color-result-url-font: #C0392B; | @color-result-url-font: #C0392B; | ||||||
| 
 | 
 | ||||||
|  | // Publish Date | ||||||
|  | @color-result-publishdate-font: #888; | ||||||
|  | 
 | ||||||
| // Images | // Images | ||||||
| @color-result-image-span-background-hover: rgba(0, 0, 0, 0.6); | @color-result-image-span-background-hover: rgba(0, 0, 0, 0.6); | ||||||
| @color-result-image-span-font: #FFF; | @color-result-image-span-font: #FFF; | ||||||
|  |  | ||||||
|  | @ -248,6 +248,12 @@ a { | ||||||
| 		word-wrap:break-word; | 		word-wrap:break-word; | ||||||
| 		color: @color-result-url-font; | 		color: @color-result-url-font; | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	.published_date { | ||||||
|  | 		font-size: 0.8em; | ||||||
|  | 		color: @color-result-publishdate-font; | ||||||
|  |     	margin: 5px 20px; | ||||||
|  | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| .engines { | .engines { | ||||||
|  |  | ||||||
|  | @ -6,6 +6,7 @@ | ||||||
| 
 | 
 | ||||||
|   <div> |   <div> | ||||||
|     <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> |     <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> | ||||||
|  | 	{% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %} | ||||||
|     <p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p> |     <p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p> | ||||||
|     <p class="url">{{ result.pretty_url }}</p> |     <p class="url">{{ result.pretty_url }}</p> | ||||||
|   </div> |   </div> | ||||||
|  |  | ||||||
|  | @ -37,6 +37,14 @@ msgstr "" | ||||||
| msgid "Errors" | msgid "Errors" | ||||||
| msgstr "" | msgstr "" | ||||||
| 
 | 
 | ||||||
|  | #: searx/webapp.py:167 | ||||||
|  | msgid "{minutes} minute(s) ago" | ||||||
|  | msgstr "" | ||||||
|  | 
 | ||||||
|  | #: searx/webapp.py:169 | ||||||
|  | msgid "{hours} hour(s), {minutes} minute(s) ago" | ||||||
|  | msgstr "" | ||||||
|  | 
 | ||||||
| #: searx/templates/index.html:7 | #: searx/templates/index.html:7 | ||||||
| msgid "about" | msgid "about" | ||||||
| msgstr "" | msgstr "" | ||||||
|  |  | ||||||
|  | @ -26,12 +26,13 @@ import json | ||||||
| import cStringIO | import cStringIO | ||||||
| import os | import os | ||||||
| 
 | 
 | ||||||
|  | from datetime import datetime, timedelta | ||||||
| from itertools import chain | from itertools import chain | ||||||
| from flask import ( | from flask import ( | ||||||
|     Flask, request, render_template, url_for, Response, make_response, |     Flask, request, render_template, url_for, Response, make_response, | ||||||
|     redirect, send_from_directory |     redirect, send_from_directory | ||||||
| ) | ) | ||||||
| from flask.ext.babel import Babel | from flask.ext.babel import Babel, gettext, ngettext, format_date | ||||||
| from searx import settings, searx_dir | from searx import settings, searx_dir | ||||||
| from searx.engines import ( | from searx.engines import ( | ||||||
|     search as do_search, categories, engines, get_engines_stats, |     search as do_search, categories, engines, get_engines_stats, | ||||||
|  | @ -156,6 +157,19 @@ def index(): | ||||||
|             if engine in favicons: |             if engine in favicons: | ||||||
|                 result['favicon'] = engine |                 result['favicon'] = engine | ||||||
| 
 | 
 | ||||||
|  |         # TODO, check if timezone is calculated right | ||||||
|  |         if 'publishedDate' in result: | ||||||
|  |             if result['publishedDate'] >= datetime.now() - timedelta(days=1): | ||||||
|  |                 timedifference = datetime.now() - result['publishedDate'] | ||||||
|  |                 minutes = int((timedifference.seconds/60)%60) | ||||||
|  |                 hours = int(timedifference.seconds/60/60) | ||||||
|  |                 if hours == 0: | ||||||
|  |                     result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) | ||||||
|  |                 else: | ||||||
|  |                     result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) | ||||||
|  |             else: | ||||||
|  |                 result['publishedDate'] = format_date(result['publishedDate']) | ||||||
|  | 
 | ||||||
|     if search.request_data.get('format') == 'json': |     if search.request_data.get('format') == 'json': | ||||||
|         return Response(json.dumps({'query': search.query, |         return Response(json.dumps({'query': search.query, | ||||||
|                                     'results': search.results}), |                                     'results': search.results}), | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber