Merge pull request #475 from return42/tineye

[enh] engine - add Tineye reverse image search
This commit is contained in:
Markus Heiser 2022-01-31 08:51:35 +01:00 committed by GitHub
commit 60e7fee47a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 168 additions and 1 deletions

View File

@ -0,0 +1,9 @@
.. _tineye engine:
======
Tineye
======
.. automodule:: searx.engines.tineye
:members:

103
searx/engines/tineye.py Normal file
View File

@ -0,0 +1,103 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This engine implements *Tineye - reverse image search*
Using TinEye, you can search by image or perform what we call a reverse image
search. You can do that by uploading an image or searching by URL. You can also
simply drag and drop your images to start your search. TinEye constantly crawls
the web and adds images to its index. Today, the TinEye index is over 50.2
billion images `[tineye.com] <https://tineye.com/how>`_.
.. hint::
This SearXNG engine only supports *'searching by URL'* and it does not use
the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.
"""
from urllib.parse import urlencode
from datetime import datetime
about = {
"website": 'https://tineye.com',
"wikidata_id": 'Q2382535',
"official_api_documentation": 'https://api.tineye.com/python/docs/',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
engine_type = 'online_url_search'
categories = ['general']
paging = True
safesearch = False
base_url = 'https://tineye.com'
search_string = '/result_json/?page={page}&{query}'
def request(query, params):
if params['search_urls']['data:image']:
query = params['search_urls']['data:image']
elif params['search_urls']['http']:
query = params['search_urls']['http']
query = urlencode({'url': query})
# see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
params['url'] = base_url + search_string.format(query=query, page=params['pageno'])
params['headers'].update(
{
'Connection': 'keep-alive',
'Accept-Encoding': 'gzip, defalte, br',
'Host': 'tineye.com',
'DNT': '1',
'TE': 'trailers',
}
)
return params
def response(resp):
results = []
# Define wanted results
json_data = resp.json()
number_of_results = json_data['num_matches']
for i in json_data['matches']:
image_format = i['format']
width = i['width']
height = i['height']
thumbnail_src = i['image_url']
backlink = i['domains'][0]['backlinks'][0]
url = backlink['backlink']
source = backlink['url']
title = backlink['image_name']
img_src = backlink['url']
# Get and convert published date
api_date = backlink['crawl_date'][:-3]
publishedDate = datetime.fromisoformat(api_date)
# Append results
results.append(
{
'template': 'images.html',
'url': url,
'thumbnail_src': thumbnail_src,
'source': source,
'title': title,
'img_src': img_src,
'format': image_format,
'widht': width,
'height': height,
'publishedDate': publishedDate,
}
)
# Append number of results
results.append({'number_of_results': number_of_results})
return results

View File

@ -11,6 +11,7 @@ __all__ = [
'OnlineProcessor', 'OnlineProcessor',
'OnlineDictionaryProcessor', 'OnlineDictionaryProcessor',
'OnlineCurrencyProcessor', 'OnlineCurrencyProcessor',
'OnlineUrlSearchProcessor',
'PROCESSORS', 'PROCESSORS',
] ]
@ -24,6 +25,7 @@ from .online import OnlineProcessor
from .offline import OfflineProcessor from .offline import OfflineProcessor
from .online_dictionary import OnlineDictionaryProcessor from .online_dictionary import OnlineDictionaryProcessor
from .online_currency import OnlineCurrencyProcessor from .online_currency import OnlineCurrencyProcessor
from .online_url_search import OnlineUrlSearchProcessor
from .abstract import EngineProcessor from .abstract import EngineProcessor
logger = logger.getChild('search.processors') logger = logger.getChild('search.processors')
@ -33,7 +35,13 @@ PROCESSORS: Dict[str, EngineProcessor] = {}
def get_processor_class(engine_type): def get_processor_class(engine_type):
"""Return processor class according to the ``engine_type``""" """Return processor class according to the ``engine_type``"""
for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]: for c in [
OnlineProcessor,
OfflineProcessor,
OnlineDictionaryProcessor,
OnlineCurrencyProcessor,
OnlineUrlSearchProcessor,
]:
if c.engine_type == engine_type: if c.engine_type == engine_type:
return c return c
return None return None

View File

@ -0,0 +1,42 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Processores for engine-type: ``online_url_search``
"""
import re
from .online import OnlineProcessor
re_search_urls = {
'http': re.compile(r'https?:\/\/[^ ]*'),
'ftp': re.compile(r'ftps?:\/\/[^ ]*'),
'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'),
}
class OnlineUrlSearchProcessor(OnlineProcessor):
"""Processor class used by ``online_url_search`` engines."""
engine_type = 'online_url_search'
def get_params(self, search_query, engine_category):
params = super().get_params(search_query, engine_category)
if params is None:
return None
url_match = False
search_urls = {}
for k, v in re_search_urls.items():
m = v.search(search_query.query)
v = None
if m:
url_match = True
v = m[0]
search_urls[k] = v
if not url_match:
return None
params['search_urls'] = search_urls
return params

View File

@ -483,6 +483,11 @@ engines:
timeout: 3.0 timeout: 3.0
disabled: true disabled: true
- name: tineye
engine: tineye
shortcut: tin
timeout: 9.0
- name: etymonline - name: etymonline
engine: xpath engine: xpath
paging: true paging: true