From 0d9d68d96c85151688b6868c8a3721f1839e381f Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 14 Jan 2022 16:18:16 +0100 Subject: [PATCH] [mod] engine deviantart: calculate img_src from thumbnail_src (WIP) To get images with higher resolutions, the img_src can be calculated from the thumbnail_src. WIP: The calculated img_src is not valid for all images, some calculated URIs will end in a `Forbidden` response. I can reproduce it for example with a `!deviantart dali` query: The URI that is calculated for the image with the title `Dali - Pencil` results in a `Forbidden` response. DEBUG searx.engines.deviantart : Dali - Pencil: /v1/fit/w_300,h_592 --> /v1/fit/w_648,h_1280 https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/d5f45947-64b5-4168-bbdc-a5b763e6a10d/d1kvvlh-78d4e8a5-de18-4077-98e1-e282002805a0.jpg/v1/fit/w_648,h_1280,q_70,strp/dali___pencil_by_ckoffler_d1kvvlh-300w.jpg?token=.... Signed-off-by: Markus Heiser --- searx/engines/deviantart.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index e44ac28e5..bfd3030d0 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -4,6 +4,7 @@ Deviantart (Images) """ +import re from urllib.parse import urlencode from lxml import html @@ -31,6 +32,9 @@ time_range_dict = { # search-url base_url = 'https://www.deviantart.com' +w_h_pattern = re.compile(r'/v1/fit/w_([^,]*),h_([^,]*)') +w_h_format = '/v1/fit/w_%(w)s,h_%(h)s' +max_size = 1280 def request(query, params): @@ -69,12 +73,35 @@ def response(resp): continue img_tag = img_tag[0] + url = a_tag.attrib.get('href') + title = img_tag.attrib.get('alt') + img_src = thumbnail_src = img_tag.attrib.get('src') + + w_h = w_h_pattern.search(thumbnail_src) + if w_h: + # calc img_src with higher solution / aspect raitio + w = int(w_h[1]) + h = int(w_h[2]) + if w > h: + fact = max_size / w + else: + fact = max_size / h + + w = int(fact * w) + h = int(fact * h) + + old_uri = w_h[0] + new_uri = w_h_format % dict(w=w, h=h) + logger.debug("%s: %s --> %s", title, old_uri, new_uri) + img_src = img_src.replace(old_uri, new_uri) + results.append( { 'template': 'images.html', - 'url': a_tag.attrib.get('href'), - 'img_src': img_tag.attrib.get('src'), - 'title': img_tag.attrib.get('alt'), + 'url': url, + 'img_src': img_src, + 'thumbnail_src': thumbnail_src, + 'title': title, } )