From ff78b1a90265449495bc0200c6fa7706f4466468 Mon Sep 17 00:00:00 2001 From: Hackurei Date: Tue, 3 Oct 2023 09:12:28 -0600 Subject: [PATCH] [feat] implement hackernews engine - news.ycombinator.com --- searx/engines/hackernews.py | 91 +++++++++++++++++++++++++++++++++++++ searx/settings.yml | 5 ++ 2 files changed, 96 insertions(+) create mode 100644 searx/engines/hackernews.py diff --git a/searx/engines/hackernews.py b/searx/engines/hackernews.py new file mode 100644 index 000000000..3f07b6e58 --- /dev/null +++ b/searx/engines/hackernews.py @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Hackernews +""" + +from datetime import datetime +from urllib.parse import urlencode +from dateutil.relativedelta import relativedelta + +from flask_babel import gettext + +# Engine metadata +about = { + "website": "https://news.ycombinator.com/", + "wikidata_id": "Q686797", + "official_api_documentation": "https://hn.algolia.com/api", + "use_official_api": True, + "require_api_key": False, + "results": "JSON", +} + +# Engine configuration +paging = True +time_range_support = True +categories = ["it"] +results_per_page = 30 + +# Search URL +base_url = "https://hn.algolia.com/api/v1" + + +def request(query, params): + search_type = 'search' + if not query: + # if search query is empty show results from HN's front page + search_type = 'search_by_date' + query_params = { + "tags": "front_page", + "page": (params["pageno"] - 1), + } + else: + query_params = { + "query": query, + "page": (params["pageno"] - 1), + "hitsPerPage": results_per_page, + "minWordSizefor1Typo": 4, + "minWordSizefor2Typos": 8, + "advancedSyntax": "true", + "ignorePlurals": "false", + "minProximity": 7, + "numericFilters": '[]', + "tagFilters": '["story",[]]', + "typoTolerance": "true", + "queryType": "prefixLast", + "restrictSearchableAttributes": '["title","comment_text","url","story_text","author"]', + "getRankingInfo": "true", + } + + if params['time_range']: + search_type = 'search_by_date' + timestamp = (datetime.now() - relativedelta(**{f"{params['time_range']}s": 1})).timestamp() + query_params["numericFilters"] = f"created_at_i>{timestamp}" + + params["url"] = f"{base_url}/{search_type}?{urlencode(query_params)}" + return params + + +def response(resp): + results = [] + data = resp.json() + + for hit in data["hits"]: + object_id = hit["objectID"] + points = hit["points"] or 0 + num_comments = hit["num_comments"] or 0 + + metadata = "" + if points != 0 or num_comments != 0: + metadata = f"{gettext('points')}: {points}" f" | {gettext('comments')}: {num_comments}" + results.append( + { + "title": hit["title"] or f"{gettext('author')}: {hit['author']}", + "url": f"https://news.ycombinator.com/item?id={object_id}", + "content": hit["url"] or hit["comment_text"] or hit["story_text"] or "", + "metadata": metadata, + "author": hit["author"], + "publishedDate": datetime.utcfromtimestamp(hit["created_at_i"]), + } + ) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index a96199c0c..2e78c8f2d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -865,6 +865,11 @@ engines: require_api_key: false results: HTML + - name: hackernews + engine: hackernews + shortcut: hn + disabled: true + - name: hoogle engine: xpath paging: true