From a174dcf83a61aca1dd1c6e96e86e4356e1e13333 Mon Sep 17 00:00:00 2001
From: Joseph Cheung <luxtiasco@outlook.com>
Date: Fri, 24 Feb 2023 08:25:15 +0800
Subject: [PATCH] Update google_internal_search.py

---
 searx/engines/google_internal_search.py | 107 ++----------------------
 1 file changed, 9 insertions(+), 98 deletions(-)

diff --git a/searx/engines/google_internal_search.py b/searx/engines/google_internal_search.py
index 2fa07e1ab..ed5fde8ad 100644
--- a/searx/engines/google_internal_search.py
+++ b/searx/engines/google_internal_search.py
@@ -19,8 +19,8 @@ The implementation is shared by other engines:
 
 from urllib.parse import urlencode
 from json import loads, dumps
-from datetime import datetime, timedelta
-from dateutil.tz import tzoffset
+from datetime import datetime
+from zoneinfo import ZoneInfo
 from babel.dates import format_datetime
 import babel
 from searx.utils import html_to_text
@@ -91,50 +91,25 @@ def get_query_url_images(query, lang_info, query_params):
     )
 
 
-def get_query_url_news(query, lang_info, query_params):
-    return (
-        'https://'
-        + lang_info['subdomain']
-        + '/search'
-        + "?"
-        + urlencode(
-            {
-                'q': query,
-                'tbm': "nws",
-                **query_params,
-            }
-        )
-    )
-
-
 CATEGORY_TO_GET_QUERY_URL = {
     'general': get_query_url_general,
     'images': get_query_url_images,
-    'news': get_query_url_news,
-}
-
-CATEGORY_RESULT_COUNT_PER_PAGE = {
-    'general': 10,
-    'images': 100,
-    'news': 10,
 }
 
 
 def request(query, params):
     """Google search request"""
 
-    result_count_per_page = CATEGORY_RESULT_COUNT_PER_PAGE[categories[0]]  # pylint: disable=unsubscriptable-object
-
-    offset = (params['pageno'] - 1) * result_count_per_page
+    offset = (params['pageno'] - 1) * 10
 
     lang_info = get_lang_info(params, supported_languages, language_aliases, True)
 
     query_params = {
         **lang_info['params'],
-        'ie': 'utf8',
-        'oe': 'utf8',
+        'ie': "utf8",
+        'oe': "utf8",
+        'num': 30,
         'start': offset,
-        'num': result_count_per_page,
         'filter': '0',
         'asearch': 'arc',
         'async': 'use_ac:true,_fmt:json',
@@ -207,7 +182,6 @@ class ParseResultGroupItem:
             "WEB_ANSWERS_CARD_BLOCK": self.web_answers_card_block,
             "IMAGE_RESULT_GROUP": self.image_result_group,
             "TWITTER_RESULT_GROUP": self.twitter_result_group,
-            "NEWS_WHOLEPAGE": self.news_wholepage,
             # WHOLEPAGE_PAGE_GROUP - found for keyword what is t in English language
             # EXPLORE_UNIVERSAL_BLOCK
             # TRAVEL_ANSWERS_RESULT
@@ -376,68 +350,10 @@ class ParseResultGroupItem:
             return results
 
         for item in item_to_parse["image_result_group_element"]:
+            print(item)
             results.append(parse_search_feature_proto(item["image_result"]))
         return results
 
-    def news_wholepage(self, item_to_parse):
-        """Parse a news search result"""
-
-        def iter_snippets():
-            """Iterate over all the results, yield result_index, snippet to deal with nested structured"""
-            result_index = 0
-            for item in item_to_parse["element"]:
-                if "news_singleton_result_group" in item:
-                    payload = item["news_singleton_result_group"]["result"]["payload"]["liquid_item_data"]
-                    yield result_index, payload["article"]["stream_simplified_snippet"]
-                    result_index += 1
-                    continue
-
-                if "top_coverage" in item:
-                    for element in item["top_coverage"]["element"]:
-                        yield result_index, element["result"]["payload"]["liquid_item_data"]["article"][
-                            "stream_simplified_snippet"
-                        ]
-                        result_index += 1
-                    continue
-
-                if "news_sports_hub_result_group" in item:
-                    for element in item["news_sports_hub_result_group"]["element"]:
-                        yield result_index, element["result"]["payload"]["liquid_item_data"]["article"][
-                            "stream_simplified_snippet"
-                        ]
-                        result_index += 1
-                    continue
-
-                if "news_topic_hub_refinements_result_group" in item:
-                    for ref_list in item["news_topic_hub_refinements_result_group"]["refinements"]["refinement_list"]:
-                        for result in ref_list["results"]:
-                            yield result_index, result["payload"]["liquid_item_data"]["article"][
-                                "stream_simplified_snippet"
-                            ]
-                            result_index += 1
-                    continue
-
-                print("unknow news", item)
-
-        results = []
-        for result_index, snippet in iter_snippets():
-            publishedDate = snippet["date"]["timestamp"]
-            url = snippet["url"]["result_url"]
-            title = html_to_text(snippet["title"]["text"])
-            content = html_to_text(snippet["snippet"]["snippet"])
-            img_src = snippet.get("thumbnail_info", {}).get("sffe_50k_thumbnail_url")
-            results.append(
-                {
-                    'url': url,
-                    'title': title,
-                    'content': content,
-                    'img_src': img_src,
-                    'publishedDate': datetime.fromtimestamp(publishedDate),
-                    "result_index": result_index,
-                }
-            )
-        return results
-
 
 class ParseResultItem:  # pylint: disable=too-few-public-methods
     """Parse result_search_feature_proto.search_feature_proto"""
@@ -457,13 +373,7 @@ class ParseResultItem:  # pylint: disable=too-few-public-methods
         timezones_0 = item_to_parse["payload"]["target_location"]["timezones"][0]
         iana_timezone = timezones_0["iana_timezone"]
         localized_location = timezones_0["localized_location"]
-        # parse timezone_abbrev_specific to create result_tz
-        # timezone_abbrev_specific for India is "UTC+5:30" and for New York is "UTC−4"
-        # the values for offsets are respectively ["5", "30", "0"] and ["-4": "0"]
-        timezone_abbrev_specific = timezones_0["timezone_abbrev_specific"]
-        offsets = timezone_abbrev_specific.replace("UTC", "").replace("GMT", "").replace("−", "-").split(":")
-        offsets.append("0")
-        result_tz = tzoffset(iana_timezone, timedelta(hours=int(offsets[0]), minutes=int(offsets[1])))
+        result_tz = ZoneInfo(iana_timezone)
         result_dt = datetime.fromtimestamp(seconds_utc, tz=result_tz)
         result_dt_str = format_datetime(result_dt, 'long', tzinfo=result_tz, locale=self.locale)
         answer = f"{result_dt_str} ( {localized_location} )"
@@ -480,6 +390,7 @@ def parse_web_results_list(json_data, locale):
     results_list = tier_1_search_results["result_list"]["item"]
 
     if "spell_suggestion" in tier_1_search_results:
+        print(tier_1_search_results["spell_suggestion"])
         spell_suggestion = tier_1_search_results["spell_suggestion"]
         if "spell_column" in spell_suggestion:
             for spell_suggestion in tier_1_search_results["spell_suggestion"]["spell_column"]: