searxngRebrandZaclys/searx/engines/reddit.py

"""
 Reddit

 @website      https://www.reddit.com/
 @provide-api  yes (https://www.reddit.com/dev/api)

 @using-api    yes
 @results      JSON
 @stable       yes
 @parse        url, title, content, thumbnail, publishedDate
"""

import json
from cgi import escape
from urllib import urlencode
from urlparse import urlparse, urljoin
from datetime import datetime

# engine dependent config
categories = ['general', 'images', 'news', 'social media']
page_size = 25

# search-url
base_url = 'https://www.reddit.com/'
search_url = base_url + 'search.json?{query}'


# do search-request
def request(query, params):
    query = urlencode({'q': query,
                       'limit': page_size})
    params['url'] = search_url.format(query=query)

    return params


# get response from search-request
def response(resp):
    img_results = []
    text_results = []

    search_results = json.loads(resp.text)

    # return empty array if there are no results
    if 'data' not in search_results:
        return []

    posts = search_results.get('data', {}).get('children', [])

    # process results
    for post in posts:
        data = post['data']

        # extract post information
        params = {
            'url': urljoin(base_url, data['permalink']),
            'title': data['title']
        }

        # if thumbnail field contains a valid URL, we need to change template
        thumbnail = data['thumbnail']
        url_info = urlparse(thumbnail)
        # netloc & path
        if url_info[1] != '' and url_info[2] != '':
            params['img_src'] = data['url']
            params['thumbnail_src'] = thumbnail
            params['template'] = 'images.html'
            img_results.append(params)
        else:
            created = datetime.fromtimestamp(data['created_utc'])
            content = escape(data['selftext'])
            if len(content) > 500:
                content = content[:500] + '...'
            params['content'] = content
            params['publishedDate'] = created
            text_results.append(params)

    # show images first and text results second
    return img_results + text_results
Add Reddit search engine 2016-03-25 13:30:32 +00:00			`"""`
			`Reddit`

			`@website https://www.reddit.com/`
			`@provide-api yes (https://www.reddit.com/dev/api)`

			`@using-api yes`
			`@results JSON`
			`@stable yes`
			`@parse url, title, content, thumbnail, publishedDate`
			`"""`

			`import json`
			`from cgi import escape`
			`from urllib import urlencode`
[fix] incorrect URLs in Reddit results - closes #538 2016-04-16 10:22:31 +00:00			`from urlparse import urlparse, urljoin`
Add Reddit search engine 2016-03-25 13:30:32 +00:00			`from datetime import datetime`

			`# engine dependent config`
			`categories = ['general', 'images', 'news', 'social media']`
			`page_size = 25`

			`# search-url`
[fix] incorrect URLs in Reddit results - closes #538 2016-04-16 10:22:31 +00:00			`base_url = 'https://www.reddit.com/'`
			`search_url = base_url + 'search.json?{query}'`
Add Reddit search engine 2016-03-25 13:30:32 +00:00

			`# do search-request`
			`def request(query, params):`
			`query = urlencode({'q': query,`
			`'limit': page_size})`
			`params['url'] = search_url.format(query=query)`

			`return params`


			`# get response from search-request`
			`def response(resp):`
			`img_results = []`
			`text_results = []`

			`search_results = json.loads(resp.text)`

			`# return empty array if there are no results`
			`if 'data' not in search_results:`
			`return []`

			`posts = search_results.get('data', {}).get('children', [])`

			`# process results`
			`for post in posts:`
			`data = post['data']`

			`# extract post information`
			`params = {`
[fix] incorrect URLs in Reddit results - closes #538 2016-04-16 10:22:31 +00:00			`'url': urljoin(base_url, data['permalink']),`
Add Reddit search engine 2016-03-25 13:30:32 +00:00			`'title': data['title']`
			`}`

			`# if thumbnail field contains a valid URL, we need to change template`
			`thumbnail = data['thumbnail']`
			`url_info = urlparse(thumbnail)`
			`# netloc & path`
			`if url_info[1] != '' and url_info[2] != '':`
[fix] incorrect URLs in Reddit results - closes #538 2016-04-16 10:22:31 +00:00			`params['img_src'] = data['url']`
Add Reddit search engine 2016-03-25 13:30:32 +00:00			`params['thumbnail_src'] = thumbnail`
			`params['template'] = 'images.html'`
			`img_results.append(params)`
			`else:`
			`created = datetime.fromtimestamp(data['created_utc'])`
Shorten content field for very long Reddit search results 2016-03-26 23:09:04 +00:00			`content = escape(data['selftext'])`
			`if len(content) > 500:`
			`content = content[:500] + '...'`
			`params['content'] = content`
Add Reddit search engine 2016-03-25 13:30:32 +00:00			`params['publishedDate'] = created`
			`text_results.append(params)`

			`# show images first and text results second`
			`return img_results + text_results`