[feat] implement hackernews engine - news.ycombinator.com

2024-01-01 19:24:07 +01:00 · 2023-10-03 09:12:28 -06:00 · 2023-10-03 09:12:28 -06:00 · ff78b1a902
commit ff78b1a902
parent 213cb74378
2 changed files with 96 additions and 0 deletions
--- a/searx/engines/hackernews.py
+++ b/searx/engines/hackernews.py
@ -0,0 +1,91 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Hackernews
+"""
+
+from datetime import datetime
+from urllib.parse import urlencode
+from dateutil.relativedelta import relativedelta
+
+from flask_babel import gettext
+
+# Engine metadata
+about = {
+    "website": "https://news.ycombinator.com/",
+    "wikidata_id": "Q686797",
+    "official_api_documentation": "https://hn.algolia.com/api",
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": "JSON",
+}
+
+# Engine configuration
+paging = True
+time_range_support = True
+categories = ["it"]
+results_per_page = 30
+
+# Search URL
+base_url = "https://hn.algolia.com/api/v1"
+
+
+def request(query, params):
+    search_type = 'search'
+    if not query:
+        # if search query is empty show results from HN's front page
+        search_type = 'search_by_date'
+        query_params = {
+            "tags": "front_page",
+            "page": (params["pageno"] - 1),
+        }
+    else:
+        query_params = {
+            "query": query,
+            "page": (params["pageno"] - 1),
+            "hitsPerPage": results_per_page,
+            "minWordSizefor1Typo": 4,
+            "minWordSizefor2Typos": 8,
+            "advancedSyntax": "true",
+            "ignorePlurals": "false",
+            "minProximity": 7,
+            "numericFilters": '[]',
+            "tagFilters": '["story",[]]',
+            "typoTolerance": "true",
+            "queryType": "prefixLast",
+            "restrictSearchableAttributes": '["title","comment_text","url","story_text","author"]',
+            "getRankingInfo": "true",
+        }
+
+        if params['time_range']:
+            search_type = 'search_by_date'
+            timestamp = (datetime.now() - relativedelta(**{f"{params['time_range']}s": 1})).timestamp()
+            query_params["numericFilters"] = f"created_at_i>{timestamp}"
+
+    params["url"] = f"{base_url}/{search_type}?{urlencode(query_params)}"
+    return params
+
+
+def response(resp):
+    results = []
+    data = resp.json()
+
+    for hit in data["hits"]:
+        object_id = hit["objectID"]
+        points = hit["points"] or 0
+        num_comments = hit["num_comments"] or 0
+
+        metadata = ""
+        if points != 0 or num_comments != 0:
+            metadata = f"{gettext('points')}: {points}" f" | {gettext('comments')}: {num_comments}"
+        results.append(
+            {
+                "title": hit["title"] or f"{gettext('author')}: {hit['author']}",
+                "url": f"https://news.ycombinator.com/item?id={object_id}",
+                "content": hit["url"] or hit["comment_text"] or hit["story_text"] or "",
+                "metadata": metadata,
+                "author": hit["author"],
+                "publishedDate": datetime.utcfromtimestamp(hit["created_at_i"]),
+            }
+        )
+
+    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -865,6 +865,11 @@ engines:
      require_api_key: false
      results: HTML

+  - name: hackernews
+    engine: hackernews
+    shortcut: hn
+    disabled: true
+
  - name: hoogle
    engine: xpath
    paging: true