Merge pull request #475 from return42/tineye

[enh] engine - add Tineye reverse image search
2024-01-01 19:24:07 +01:00 · 2022-01-31 08:51:35 +01:00 · 2022-01-31 08:51:35 +01:00 · 60e7fee47a
commit 60e7fee47a
parent 32c5acb1fb ebd3013a1a
5 changed files with 168 additions and 1 deletions
--- a/docs/src/searx.engines.tineye.rst
+++ b/docs/src/searx.engines.tineye.rst
@ -0,0 +1,9 @@
 .. _tineye engine:
 ======
 Tineye
 ======
 .. automodule:: searx.engines.tineye
  :members:
--- a/searx/engines/tineye.py
+++ b/searx/engines/tineye.py
@ -0,0 +1,103 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This engine implements *Tineye - reverse image search*
 Using TinEye, you can search by image or perform what we call a reverse image
 search.  You can do that by uploading an image or searching by URL. You can also
 simply drag and drop your images to start your search.  TinEye constantly crawls
 the web and adds images to its index.  Today, the TinEye index is over 50.2
 billion images `[tineye.com] <https://tineye.com/how>`_.
 .. hint::
   This SearXNG engine only supports *'searching by URL'* and it does not use
   the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.
 """
 from urllib.parse import urlencode
 from datetime import datetime
 about = {
    "website": 'https://tineye.com',
    "wikidata_id": 'Q2382535',
    "official_api_documentation": 'https://api.tineye.com/python/docs/',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
 }
 engine_type = 'online_url_search'
 categories = ['general']
 paging = True
 safesearch = False
 base_url = 'https://tineye.com'
 search_string = '/result_json/?page={page}&{query}'
 def request(query, params):
    if params['search_urls']['data:image']:
        query = params['search_urls']['data:image']
    elif params['search_urls']['http']:
        query = params['search_urls']['http']
    query = urlencode({'url': query})
    # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
    params['url'] = base_url + search_string.format(query=query, page=params['pageno'])
    params['headers'].update(
        {
            'Connection': 'keep-alive',
            'Accept-Encoding': 'gzip, defalte, br',
            'Host': 'tineye.com',
            'DNT': '1',
            'TE': 'trailers',
        }
    )
    return params
 def response(resp):
    results = []
    # Define wanted results
    json_data = resp.json()
    number_of_results = json_data['num_matches']
    for i in json_data['matches']:
        image_format = i['format']
        width = i['width']
        height = i['height']
        thumbnail_src = i['image_url']
        backlink = i['domains'][0]['backlinks'][0]
        url = backlink['backlink']
        source = backlink['url']
        title = backlink['image_name']
        img_src = backlink['url']
        # Get and convert published date
        api_date = backlink['crawl_date'][:-3]
        publishedDate = datetime.fromisoformat(api_date)
        # Append results
        results.append(
            {
                'template': 'images.html',
                'url': url,
                'thumbnail_src': thumbnail_src,
                'source': source,
                'title': title,
                'img_src': img_src,
                'format': image_format,
                'widht': width,
                'height': height,
                'publishedDate': publishedDate,
            }
        )
    # Append number of results
    results.append({'number_of_results': number_of_results})
    return results
--- a/searx/search/processors/init.py
+++ b/searx/search/processors/init.py
@ -11,6 +11,7 @@ __all__ = [
    'OnlineProcessor',
    'OnlineDictionaryProcessor',
    'OnlineCurrencyProcessor',
    'OnlineUrlSearchProcessor',
    'PROCESSORS',
 ]
@ -24,6 +25,7 @@ from .online import OnlineProcessor
 from .offline import OfflineProcessor
 from .online_dictionary import OnlineDictionaryProcessor
 from .online_currency import OnlineCurrencyProcessor
 from .online_url_search import OnlineUrlSearchProcessor
 from .abstract import EngineProcessor
 logger = logger.getChild('search.processors')
@ -33,7 +35,13 @@ PROCESSORS: Dict[str, EngineProcessor] = {}
 def get_processor_class(engine_type):
    """Return processor class according to the ``engine_type``"""
-    for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]:
+    for c in [
        OnlineProcessor,
        OfflineProcessor,
        OnlineDictionaryProcessor,
        OnlineCurrencyProcessor,
        OnlineUrlSearchProcessor,
    ]:
        if c.engine_type == engine_type:
            return c
    return None
--- a/searx/search/processors/online_url_search.py
+++ b/searx/search/processors/online_url_search.py
@ -0,0 +1,42 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Processores for engine-type: ``online_url_search``
 """
 import re
 from .online import OnlineProcessor
 re_search_urls = {
    'http': re.compile(r'https?:\/\/[^ ]*'),
    'ftp': re.compile(r'ftps?:\/\/[^ ]*'),
    'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'),
 }
 class OnlineUrlSearchProcessor(OnlineProcessor):
    """Processor class used by ``online_url_search`` engines."""
    engine_type = 'online_url_search'
    def get_params(self, search_query, engine_category):
        params = super().get_params(search_query, engine_category)
        if params is None:
            return None
        url_match = False
        search_urls = {}
        for k, v in re_search_urls.items():
            m = v.search(search_query.query)
            v = None
            if m:
                url_match = True
                v = m[0]
            search_urls[k] = v
        if not url_match:
            return None
        params['search_urls'] = search_urls
        return params
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -483,6 +483,11 @@ engines:
    timeout: 3.0
    disabled: true
  - name: tineye
    engine: tineye
    shortcut: tin
    timeout: 9.0
  - name: etymonline
    engine: xpath
    paging: true