[enh] engine - add Tineye reverse image search

Other optional parameter .. `&sort=crawl_date` can be appended to search_string to sort results by date. `&domain=example.org` can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. -- Merged from @allendema's commit [1] and slightly modfied / see [2]. Related-to: [1] 455b2b4460 Related-to: [2] https://github.com/searx/searx/pull/3040 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2021-10-27 03:04:52 +02:00 · 2021-10-27 03:04:52 +02:00 · 880555e263
commit 880555e263
parent 8f100d7046
2 changed files with 89 additions and 0 deletions
--- a/searx/engines/tineye.py
+++ b/searx/engines/tineye.py
@ -0,0 +1,83 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Tineye - Reverse search images
 """
 from json import loads
 from urllib.parse import urlencode
 from datetime import datetime
 about = {
    "website": 'https://tineye.com',
    "wikidata_id": 'Q2382535',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
 }
 categories = ['images']
 paging = True
 safesearch = False
 base_url = 'https://tineye.com'
 search_string = '/result_json/?page={page}&{query}'
 def request(query, params):
    # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
    params['url'] = base_url + search_string.format(query=urlencode({'url': query}), page=params['pageno'])
    params['headers'].update(
        {
            'Connection': 'keep-alive',
            'Accept-Encoding': 'gzip, defalte, br',
            'Host': 'tineye.com',
            'DNT': '1',
            'TE': 'trailers',
        }
    )
    return params
 def response(resp):
    results = []
    # Define wanted results
    json_data = loads(resp.text)
    number_of_results = json_data['num_matches']
    for i in json_data['matches']:
        image_format = i['format']
        width = i['width']
        height = i['height']
        thumbnail_src = i['image_url']
        backlink = i['domains'][0]['backlinks'][0]
        url = backlink['backlink']
        source = backlink['url']
        title = backlink['image_name']
        img_src = backlink['url']
        # Get and convert published date
        api_date = backlink['crawl_date'][:-3]
        publishedDate = datetime.fromisoformat(api_date)
        # Append results
        results.append(
            {
                'template': 'images.html',
                'url': url,
                'thumbnail_src': thumbnail_src,
                'source': source,
                'title': title,
                'img_src': img_src,
                'format': image_format,
                'widht': width,
                'height': height,
                'publishedDate': publishedDate,
            }
        )
    # Append number of results
    results.append({'number_of_results': number_of_results})
    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -483,6 +483,12 @@ engines:
    timeout: 3.0
    disabled: true
  - name: tineye
    engine: tineye
    shortcut: tin
    timeout: 9.0
    disabled: true
  - name: etymonline
    engine: xpath
    paging: true