From 3a456b1282f972c1b59ad8d39bfc73f1007eb9a9 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 26 Sep 2023 11:00:08 +0200 Subject: [PATCH] [fix] engine annas archive - fetch traits (modified xpath selectors) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anna’s Archive has cleaned up their languages, available file extensions and changed the HTML form. Signed-off-by: Markus Heiser --- searx/data/engine_traits.json | 70 +++++----------------------------- searx/engines/annas_archive.py | 6 +-- 2 files changed, 12 insertions(+), 64 deletions(-) diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index d0b59dc51..9cbaa210b 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -3,38 +3,24 @@ "all_locale": "", "custom": { "content": [ - "", - "journal_article", - "book_any", + "book_nonfiction", "book_fiction", "book_unknown", - "book_nonfiction", + "journal_article", "book_comic", "magazine", "standards_document" ], "ext": [ - "", "pdf", "epub", "cbr", - "fb2", "mobi", + "fb2", "cbz", - "djvu", "azw3", - "fb2.zip", - "txt", - "rar", - "zip", - "doc", - "lit", - "rtf", - "htm", - "html", - "lrf", - "mht", - "docx" + "djvu", + "fb2.zip" ], "sort": [ "", @@ -48,84 +34,46 @@ "languages": { "af": "af", "ar": "ar", - "az": "az", "be": "be", "bg": "bg", "bn": "bn", - "bo": "bo", - "bs": "bs", "ca": "ca", "cs": "cs", + "cy": "cy", "da": "da", "de": "de", "el": "el", "en": "en", - "eo": "eo", "es": "es", - "et": "et", - "eu": "eu", "fa": "fa", - "fi": "fi", - "fil": "tl", "fr": "fr", - "gl": "gl", - "gu": "gu", "he": "he", "hi": "hi", "hr": "hr", "hu": "hu", - "hy": "hy", "id": "id", - "is": "is", "it": "it", "ja": "ja", - "ka": "ka", + "jv": "jv", "kk": "kk", - "kn": "kn", "ko": "ko", - "ku": "ku", - "ky": "ky", - "lo": "lo", "lt": "lt", "lv": "lv", - "mk": "mk", - "ml": "ml", "mn": "mn", - "mr": "mr", - "ms": "ms", - "my": "my", - "nb": "nb", - "ne": "ne", "nl": "nl", "no": "no", - "pa": "pa", "pl": "pl", - "ps": "ps", "pt": "pt", "ro": "ro", "ru": "ru", - "sa": "sa", - "sd": "sd", - "si": "si", - "sk": "sk", - "sl": "sl", - "so": "so", - "sq": "sq", "sr": "sr", "sv": "sv", - "sw": "sw", "ta": "ta", - "te": "te", - "tg": "tg", "tr": "tr", - "tt": "tt", - "ug": "ug", "uk": "uk", - "ur": "ur", - "uz": "uz", "vi": "vi", - "yi": "yi", - "zh": "zh" + "zh": "zh", + "zh_Hant": "zh-Hant" }, "regions": {} }, diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py index 1bcdeeec6..a2db32ab4 100644 --- a/searx/engines/annas_archive.py +++ b/searx/engines/annas_archive.py @@ -159,7 +159,7 @@ def fetch_traits(engine_traits: EngineTraits): # supported language codes lang_map = {} - for x in eval_xpath_list(dom, "//form//select[@name='lang']//option"): + for x in eval_xpath_list(dom, "//form//input[@name='lang']"): eng_lang = x.get("value") if eng_lang in ('', '_empty', 'nl-BE', 'und'): continue @@ -177,10 +177,10 @@ def fetch_traits(engine_traits: EngineTraits): continue engine_traits.languages[sxng_lang] = eng_lang - for x in eval_xpath_list(dom, "//form//select[@name='content']//option"): + for x in eval_xpath_list(dom, "//form//input[@name='content']"): engine_traits.custom['content'].append(x.get("value")) - for x in eval_xpath_list(dom, "//form//select[@name='ext']//option"): + for x in eval_xpath_list(dom, "//form//input[@name='ext']"): engine_traits.custom['ext'].append(x.get("value")) for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):