[mod] mudularize & document searx.results

The intention of this patch is to improve modularization & documentation of the implementations about the *result* items. This patch does not contain any functional change! Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-01-01 19:24:07 +01:00 · 2022-06-21 13:14:11 +02:00 · 2022-06-21 13:14:11 +02:00 · 3d473a773d
commit 3d473a773d
parent cee586029c
15 changed files with 926 additions and 256 deletions
--- a/docs/dev/engine_overview.rst
+++ b/docs/dev/engine_overview.rst
@ -120,6 +120,11 @@ module:
 Making a Request
 ================

+.. sidebar:: info
+
+   - Demo of :py:obj:`request(query, params)
+     <searx.engines.demo_online.request>` function.
+
 To perform a search an URL have to be specified.  In addition to specifying an
 URL, arguments can be passed to the query.

@ -205,109 +210,18 @@ following parameters can be used to specify a search request:
   raise_for_httperror bool        True by default: raise an exception if the HTTP code of response is >= 300
   =================== =========== ==========================================================================

+.. _engine response:

-.. _engine results:
-.. _engine media types:
+Making a Response
+=================

-Media Types
-===========
+.. sidebar:: info

-Each result item of an engine can be of different media-types.  Currently the
-following media-types are supported.  To set another media-type as ``default``,
-the parameter ``template`` must be set to the desired type.
+   - Demo of a :py:obj:`response(resp) <searx.engines.demo_online.response>` function.

-.. table::  Parameter of the **default** media type:
-   :width: 100%
+In the ``response`` function of the engine, the HTTP response (``resp``) is
+parsed and a list of results is returned.

-   ========================= =====================================================
-   result-parameter          information
-   ========================= =====================================================
-   url                       string, url of the result
-   title                     string, title of the result
-   content                   string, general result-text
-   publishedDate             :py:class:`datetime.datetime`, time of publish
-   ========================= =====================================================
-
-
-.. table::  Parameter of the **images** media type:
-   :width: 100%
-
-   ========================= =====================================================
-   result-parameter          information
-   ------------------------- -----------------------------------------------------
-   template                  is set to ``images.html``
-   ========================= =====================================================
-   url                       string, url to the result site
-   title                     string, title of the result *(partly implemented)*
-   content                   *(partly implemented)*
-   publishedDate             :py:class:`datetime.datetime`,
-                             time of publish *(partly implemented)*
-   img\_src                  string, url to the result image
-   thumbnail\_src            string, url to a small-preview image
-   ========================= =====================================================
-
-
-.. table::  Parameter of the **videos** media type:
-   :width: 100%
-
-   ========================= =====================================================
-   result-parameter          information
-   ------------------------- -----------------------------------------------------
-   template                  is set to ``videos.html``
-   ========================= =====================================================
-   url                       string, url of the result
-   title                     string, title of the result
-   content                   *(not implemented yet)*
-   publishedDate             :py:class:`datetime.datetime`, time of publish
-   thumbnail                 string, url to a small-preview image
-   ========================= =====================================================
-
-.. _magnetlink: https://en.wikipedia.org/wiki/Magnet_URI_scheme
-
-.. table::  Parameter of the **torrent** media type:
-   :width: 100%
-
-   ========================= =====================================================
-   result-parameter          information
-   ------------------------- -----------------------------------------------------
-   template                  is set to ``torrent.html``
-   ========================= =====================================================
-   url                       string, url of the result
-   title                     string, title of the result
-   content                   string, general result-text
-   publishedDate             :py:class:`datetime.datetime`,
-                             time of publish *(not implemented yet)*
-   seed                      int, number of seeder
-   leech                     int, number of leecher
-   filesize                  int, size of file in bytes
-   files                     int, number of files
-   magnetlink                string, magnetlink_ of the result
-   torrentfile               string, torrentfile of the result
-   ========================= =====================================================
-
-.. table::  Parameter of the **map** media type:
-   :width: 100%
-
-   ========================= =====================================================
-   result-parameter          information
-   ------------------------- -----------------------------------------------------
-   template                  is set to ``map.html``
-   ========================= =====================================================
-   url                       string, url of the result
-   title                     string, title of the result
-   content                   string, general result-text
-   publishedDate             :py:class:`datetime.datetime`, time of publish
-   latitude                  latitude of result (in decimal format)
-   longitude                 longitude of result (in decimal format)
-   boundingbox               boundingbox of result (array of 4. values
-                             ``[lat-min, lat-max, lon-min, lon-max]``)
-   geojson                   geojson of result (https://geojson.org/)
-   osm.type                  type of osm-object (if OSM-Result)
-   osm.id                    id of osm-object (if OSM-Result)
-   address.name              name of object
-   address.road              street name of object
-   address.house_number      house number of object
-   address.locality          city, place of object
-   address.postcode          postcode of object
-   address.country           country of object
-   ========================= =====================================================
+A engine can append result-items of different media-types and different
+result-types to the result list.  The list of the result items is render to HTML
+by templates.  For more details read section :ref:`engine results`.
--- a/docs/dev/searxng_extra/update.rst
+++ b/docs/dev/searxng_extra/update.rst
@ -61,6 +61,8 @@ Scripts to update static data in :origin:`searx/data/`
  :members:


+.. _update_osm_keys_tags.py:
+
 ``update_osm_keys_tags.py``
 ===========================

--- a/docs/src/searx.results.rst
+++ b/docs/src/searx.results.rst
@ -0,0 +1,466 @@
+.. _engine results:
+.. _searx.results:
+
+==============
+Engine Results
+==============
+
+.. automodule:: searx.results
+  :members:
+
+The result items are organized in the :py:obj:`container.ResultContainer` and
+rendered in the :ref:`result template macros` and :ref:`result template files`.
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+.. _standard result:
+
+Result items
+============
+
+A result **item** is a python dictionary with dedicated keys and values.  In the
+result list a **standard result type** is identified by the existence of the key
+``url``.  Other **result types** are:
+
+- :py:obj:`searx.results.suggestion`
+- :py:obj:`searx.results.answer`
+- :py:obj:`searx.results.correction`
+- :py:obj:`searx.results.infobox`
+
+The **standard result type**:
+
+.. code:: python
+
+   results.append({
+       'template'      : str,
+
+       # result_header
+
+       'url'           : str,
+       'title'         : str,
+       'content'       : str,
+       'img_src'       : str,
+       'thumbnail'     : str,
+
+       # result_sub_header
+
+       'publishedDate' : datetime.datetime,
+       'length'        : time.struct_time,
+       'author'        : str,
+       'metadata'      : str,
+    })
+
+template : ``str``
+  :reF:`Media type <result media types>` of the result item.  Name of the
+  :ref:`template file <result template files>` from :origin:`result_templates
+  <searx/templates/simple/result_templates>`.  If unset, ``default.html`` is
+  used.
+
+.. hint::
+
+   Each **standard result type** of an engine can be of different
+   :reF:`media-types <result media types>`.
+
+
+.. _result template macros:
+
+Result template macros
+======================
+
+.. _macro result_header:
+
+``result_header``
+-----------------
+
+Execpt ``image.html`` this macro is used in all :ref:`result template files`.
+Fields used in the template :origin:`macro result_header
+<searx/templates/simple/macros.html>`:
+
+url :  ``str``
+  Link URL of the result item.
+
+title :  ``str``
+  Link title of the result item.
+
+img_src, thumbnail : ``str``
+  URL of a image or thumbnail that is displayed in the result item.
+
+.. _macro result_sub_header:
+
+``result_sub_header``
+---------------------
+
+Execpt ``image.html`` this macro is used in all :ref:`result template files`.
+Fields used in the template :origin:`macro result_sub_header
+<searx/templates/simple/macros.html>`:
+
+publishedDate : :py:obj:`datetime.datetime`
+  The date on which the object was published.
+
+length: :py:obj:`time.struct_time`
+  Playing duration in seconds.
+
+author : ``str``
+  Author of the title.
+
+metadata : ``str``
+  Miscellaneous metadata.
+
+.. _engine_data:
+
+``engine_data_form``
+--------------------
+
+The ``engine_data_form`` macro is used in :origin:`results,html
+<searx/templates/simple/results.html>` in a HTML ``<form/>`` element.  The
+intention of this macro is to pass data of a engine from one :py:obj:`response
+<searx.engines.demo_online.response>` to the :py:obj:`searx.search.SearchQuery`
+of the next :py:obj:`request <searx.engines.demo_online.request>`.
+
+.. hint::
+
+   The engine-data values are transfered to the next request when the user press
+   the "next page" button.  When a new search request is made, the enigine-data
+   are removed from the client request.
+
+To pass data, engine's response handler can append result items of typ
+``engine_data``.  This is by example used to pass a token from the response to
+the next request:
+
+.. code:: python
+
+   def response(resp):
+       ...
+       results.append({
+          'engine_data': token,
+          'key': 'next_page_token',
+       })
+       ...
+       return results
+
+   def request(query, params):
+       page_token = params['engine_data'].get('next_page_token')
+
+.. _result media types:
+.. _result template files:
+
+Result template files
+=====================
+
+The **media types** of the **standard result type** are the template files in
+the :origin:`result_templates <searx/templates/simple/result_templates>`.
+
+``default.html``
+----------------
+
+Displays result fields from:
+
+- :ref:`macro result_header` and
+- :ref:`macro result_sub_header`
+
+Additional fields used in the :origin:`default.html
+<searx/templates/simple/result_templates/default.html>`:
+
+content :  ``str``
+  General text of the result item.
+
+iframe_src : ``str``
+  URL of an embedded ``<iframe>`` / the frame is collapsible.
+
+audio_src : uri,
+  URL of an embedded ``<audio controls>``.
+
+
+``code.html``
+-------------
+
+Displays result fields from:
+
+- :ref:`macro result_header` and
+- :ref:`macro result_sub_header`
+
+Additional fields used in the :origin:`code.html
+<searx/templates/simple/result_templates/code.html>`:
+
+content :  ``str``
+  Description of the code fragment.
+
+codelines : ``[line1, line2, ...]``
+  Lines of the code fragment.
+
+code_language : ``str``
+  Name of the code language, the value is passed to
+  :py:obj:`pygments.lexers.get_lexer_by_name`.
+
+repository : ``str``
+  URL of the repository of the code fragment.
+
+
+``images.html``
+---------------
+
+Fields used in the :origin:`images.html
+<searx/templates/simple/result_templates/images.html>`:
+
+title :  ``str``
+  Title of the image.
+
+thumbnail_src : ``str``
+  URL of a preview of the image.
+
+img_src : ``str``
+  URL of the full size image.
+
+Image labels
+~~~~~~~~~~~~
+
+content:  ``str``
+  Description of the image.
+
+author:  ``str``
+  Name of the author of the image.
+
+img_format : ``str``
+  Format of the image.
+
+source : ``str``
+  Source of the image.
+
+url :  ``str``
+  URL of the page from where the images comes from (source).
+
+
+``videos.html``
+---------------
+
+Displays result fields from:
+
+- :ref:`macro result_header` and
+- :ref:`macro result_sub_header`
+
+Additional fields used in the :origin:`videos.html
+<searx/templates/simple/result_templates/videos.html>`:
+
+iframe_src : ``str``
+  URL of an embedded ``<iframe>`` / the frame is collapsible.
+
+content :  ``str``
+  Description of the code fragment.
+
+
+``map.html``
+------------
+
+.. _GeoJSON: https://en.wikipedia.org/wiki/GeoJSON
+.. _Leaflet: https://github.com/Leaflet/Leaflet
+.. _bbox: https://wiki.openstreetmap.org/wiki/Bounding_Box
+.. _HTMLElement.dataset: https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement/dataset
+.. _Nominatim: https://nominatim.org/release-docs/latest/
+.. _Lookup: https://nominatim.org/release-docs/latest/api/Lookup/
+.. _place_id is not a persistent id:
+    https://nominatim.org/release-docs/latest/api/Output/#place_id-is-not-a-persistent-id
+.. _perma_id: https://wiki.openstreetmap.org/wiki/Permanent_ID
+.. _country code: https://wiki.openstreetmap.org/wiki/Country_code
+
+Displays result fields from:
+
+- :ref:`macro result_header` and
+- :ref:`macro result_sub_header`
+
+Additional fields used in the :origin:`map.html
+<searx/templates/simple/result_templates/map.html>`:
+
+content :  ``str``
+  Description of the item.
+
+address_label : ``str``
+  Label of the address / default ``_('address')``.
+
+geojson : GeoJSON_
+  Geometries mapped to HTMLElement.dataset_ (``data-map-geojson``) and used by
+  Leaflet_.
+
+boundingbox : ``[ min-lon, min-lat, max-lon, max-lat]``
+  A bbox_ area defined by min longitude , min latitude , max longitude and max
+  latitude.  The bounding box is mapped to HTMLElement.dataset_
+  (``data-map-boundingbox``) and is used by Leaflet_.
+
+longitude, latitude : ``str``
+  Geographical coordinates, mapped to HTMLElement.dataset_ (``data-map-lon``,
+  ``data-map-lat``) and is used by Leaflet_.
+
+address : ``{...}``
+  A dicticonary with the address data:
+
+  .. code:: python
+
+     address = {
+         'name'          : str,  # name of object
+         'road'          : str,  # street name of object
+         'house_number'  : str,  # house number of object
+         'postcode'      : str,  # postcode of object
+         'country'       : str,  # country of object
+         'country_code'  : str,
+         'locality'      : str,
+     }
+
+  country_code : ``str``
+    `Country code`_ of the object.
+
+  locality : ``str``
+    The name of the city, town, township, village, borough, etc. in which this
+    object is located.
+
+links : ``[link1, link2, ...]``
+  A list of links with labels:
+
+  .. code:: python
+
+     links.append({
+         'label'       : str,
+         'url'         : str,
+         'url_label'   : str,  # set by some engines but unused (oscar)
+     })
+
+data : ``[data1, data2, ...]``
+  A list of additional data, shown in two columns and containing a label and
+  value.
+
+  .. code:: python
+
+     data.append({
+        'label'   : str,
+        'value'   : str,
+        'key'     : str,  # set by some engines but unused
+     })
+
+type : ``str``  # set by some engines but unused (oscar)
+  Tag label from :ref:`OSM_KEYS_TAGS['tags'] <update_osm_keys_tags.py>`.
+
+type_icon : ``str``  # set by some engines but unused (oscar)
+  Type's icon.
+
+osm : ``{...}``
+  OSM-type and OSM-ID, can be used to Lookup_ OSM data (Nominatim_). There is
+  also a discussion about "`place_id is not a persistent id`_" and the
+  perma_id_.
+
+  .. code:: python
+
+     osm = {
+         'type': str,
+         'id':   str,
+     }
+
+  type : ``str``
+    Type of osm-object (if OSM-Result).
+
+  id :
+    ID of osm-object (if OSM-Result).
+
+  .. hint::
+
+     The ``osm`` property is set by engine ``openstreetmap.py``, but it is not
+     used in the ``map.html`` template yet.
+
+
+``products.html``
+-----------------
+
+Displays result fields from:
+
+- :ref:`macro result_header` and
+- :ref:`macro result_sub_header`
+
+Additional fields used in the :origin:`products.html
+<searx/templates/simple/result_templates/products.html>`:
+
+content :  ``str``
+  Description of the product.
+
+price : ``str``
+  The price must include the currency.
+
+shipping : ``str``
+  Shipping details.
+
+source_country : ``str``
+  Place from which the shipment is made.
+
+
+``torrent.html``
+----------------
+
+.. _magnet link: https://en.wikipedia.org/wiki/Magnet_URI_scheme
+.. _torrent file: https://en.wikipedia.org/wiki/Torrent_file
+
+Displays result fields from:
+
+- :ref:`macro result_header` and
+- :ref:`macro result_sub_header`
+
+Additional fields used in the :origin:`torrent.html
+<searx/templates/simple/result_templates/torrent.html>`:
+
+magnetlink:
+  URL of the `magnet link`_.
+
+torrentfile
+  URL of the `torrent file`_.
+
+seed : ``int``
+  Number of seeders.
+
+leech : ``int``
+  Number of leecher
+
+filesize : ``int``
+  Size in Bytes (rendered to human readable unit of measurement).
+
+files : ``int``
+  Number of files.
+
+
+Suggestion results
+==================
+
+.. automodule:: searx.results.suggestion
+  :members:
+
+
+Answer results
+==============
+
+.. automodule:: searx.results.answer
+  :members:
+
+
+Correction results
+==================
+
+.. automodule:: searx.results.correction
+  :members:
+
+
+Infobox results
+===============
+
+.. automodule:: searx.results.infobox
+  :members:
+
+
+Result container
+================
+
+.. automodule:: searx.results.container
+  :members:
+
+results.core
+============
+
+.. automodule:: searx.results.core
+  :members:
+
--- a/searx/results/init.py
+++ b/searx/results/init.py
@ -0,0 +1,3 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Implementation of the result container and the result types."""
--- a/searx/results/answer.py
+++ b/searx/results/answer.py
@ -0,0 +1,30 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Answer item in the result list.  The answer result item is used in
+the :origin:`results.html <searx/templates/simple/results.html>` template.
+
+A answer item is a dictionary type with dedicated keys and values.  In the
+result list a answer item is identified by the existence of the key
+``suggestion``.
+
+.. code:: python
+
+   results.append({
+       'answer' : str,
+       'url'    : str,
+   })
+
+answer : ``str``
+  The answer string append by the engine.
+
+url : ``str``
+  A link that is related to the answer (e.g. the origin of the answer)
+
+"""
+
+
+class Answers(dict):
+    """Dictionary of answers in the :py:obj:`.container.ResultContainer`"""
+
+    def add(self, result):
+        self[result['answer']] = result
--- a/searx/results/container.py
+++ b/searx/results/container.py
@ -1,158 +1,95 @@
-import re
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""ResultContainer
+"""
+
 from collections import defaultdict
 from operator import itemgetter
 from threading import RLock
-from typing import List, NamedTuple, Set
-from urllib.parse import urlparse, unquote
+from typing import List, Set
+from urllib.parse import urlparse

 from searx import logger
 from searx.engines import engines
 from searx.metrics import histogram_observe, counter_add, count_error

+from .core import (
+    WHITESPACE_REGEX,
+    Timing,
+    UnresponsiveEngine,
+    result_content_len,
+    result_score,
+    compare_urls,
+)

-CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
-WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
+from .infobox import Infoboxes, merge_two_infoboxes
+from .suggestion import Suggestions
+from .answer import Answers
+from .correction import Corrections


-# return the meaningful length of the content for a result
-def result_content_len(content):
-    if isinstance(content, str):
-        return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
-    else:
-        return 0
+def is_suggestion(result):
+    """Returns ``True`` if result type is :py:obj:`.suggestion`, otherwise
+    ``False``"""
+    return 'suggestion' in result


-def compare_urls(url_a, url_b):
-    """Lazy compare between two URL.
-    "www.example.com" and "example.com" are equals.
-    "www.example.com/path/" and "www.example.com/path" are equals.
-    "https://www.example.com/" and "http://www.example.com/" are equals.
-
-    Args:
-        url_a (ParseResult): first URL
-        url_b (ParseResult): second URL
-
-    Returns:
-        bool: True if url_a and url_b are equals
-    """
-    # ignore www. in comparison
-    if url_a.netloc.startswith('www.'):
-        host_a = url_a.netloc.replace('www.', '', 1)
-    else:
-        host_a = url_a.netloc
-    if url_b.netloc.startswith('www.'):
-        host_b = url_b.netloc.replace('www.', '', 1)
-    else:
-        host_b = url_b.netloc
-
-    if host_a != host_b or url_a.query != url_b.query or url_a.fragment != url_b.fragment:
-        return False
-
-    # remove / from the end of the url if required
-    path_a = url_a.path[:-1] if url_a.path.endswith('/') else url_a.path
-    path_b = url_b.path[:-1] if url_b.path.endswith('/') else url_b.path
-
-    return unquote(path_a) == unquote(path_b)
+def is_answer(result):
+    """Returns ``True`` if result type is :py:obj:`.answer`, otherwise ``False``"""
+    return 'answer' in result


-def merge_two_infoboxes(infobox1, infobox2):
-    # get engines weights
-    if hasattr(engines[infobox1['engine']], 'weight'):
-        weight1 = engines[infobox1['engine']].weight
-    else:
-        weight1 = 1
-    if hasattr(engines[infobox2['engine']], 'weight'):
-        weight2 = engines[infobox2['engine']].weight
-    else:
-        weight2 = 1
-
-    if weight2 > weight1:
-        infobox1['engine'] = infobox2['engine']
-
-    infobox1['engines'] |= infobox2['engines']
-
-    if 'urls' in infobox2:
-        urls1 = infobox1.get('urls', None)
-        if urls1 is None:
-            urls1 = []
-
-        for url2 in infobox2.get('urls', []):
-            unique_url = True
-            parsed_url2 = urlparse(url2.get('url', ''))
-            entity_url2 = url2.get('entity')
-            for url1 in urls1:
-                if (entity_url2 is not None and url1.get('entity') == entity_url2) or compare_urls(
-                    urlparse(url1.get('url', '')), parsed_url2
-                ):
-                    unique_url = False
-                    break
-            if unique_url:
-                urls1.append(url2)
-
-        infobox1['urls'] = urls1
-
-    if 'img_src' in infobox2:
-        img1 = infobox1.get('img_src', None)
-        img2 = infobox2.get('img_src')
-        if img1 is None:
-            infobox1['img_src'] = img2
-        elif weight2 > weight1:
-            infobox1['img_src'] = img2
-
-    if 'attributes' in infobox2:
-        attributes1 = infobox1.get('attributes')
-        if attributes1 is None:
-            infobox1['attributes'] = attributes1 = []
-
-        attributeSet = set()
-        for attribute in attributes1:
-            label = attribute.get('label')
-            if label not in attributeSet:
-                attributeSet.add(label)
-            entity = attribute.get('entity')
-            if entity not in attributeSet:
-                attributeSet.add(entity)
-
-        for attribute in infobox2.get('attributes', []):
-            if attribute.get('label') not in attributeSet and attribute.get('entity') not in attributeSet:
-                attributes1.append(attribute)
-
-    if 'content' in infobox2:
-        content1 = infobox1.get('content', None)
-        content2 = infobox2.get('content', '')
-        if content1 is not None:
-            if result_content_len(content2) > result_content_len(content1):
-                infobox1['content'] = content2
-        else:
-            infobox1['content'] = content2
+def is_correction(result):
+    """Returns ``True`` if result type is :py:obj:`.correction`, otherwise
+    ``False``"""
+    return 'correction' in result


-def result_score(result):
-    weight = 1.0
-
-    for result_engine in result['engines']:
-        if hasattr(engines[result_engine], 'weight'):
-            weight *= float(engines[result_engine].weight)
-
-    occurences = len(result['positions'])
-
-    return sum((occurences * weight) / position for position in result['positions'])
+def is_infobox(result):
+    """Returns ``True`` if result type is :py:obj:`.infobox`, otherwise ``False``"""
+    return 'infobox' in result


-class Timing(NamedTuple):
-    engine: str
-    total: float
-    load: float
+def is_number_of_results(result):
+    """Returns ``True`` if result type is ``number_of_results``, otherwise
+    ``False``"""
+    return 'number_of_results' in result


-class UnresponsiveEngine(NamedTuple):
-    engine: str
-    error_type: str
-    suspended: bool
+def is_engine_data(result):
+    """Returns ``True`` if result type is :ref:`engine_data`, otherwise ``False``"""
+    return 'engine_data' in result
+
+
+def is_standard_result(result):
+    """Returns ``True`` if result type is a :ref:`standard result <standard
+    result>`, otherwise ``False``"""
+    return 'url' in result


 class ResultContainer:
-    """docstring for ResultContainer"""
+    """A container to organize the result items and the various result types.  New
+    results can be added by :py:obj:`ResultContainer.extend`.
+
+    To be clear, a result-type is not a special python data-type, a result is
+    always a python dicticonary.  The result-type is determined by the presence
+    of one of the following keys (in that order, first match wins)
+
+    1. suggestion: :py:obj:`.suggestion`
+    2. answer: :py:obj:`.answer`
+    3. correction: :py:obj:`.correction`
+    4. infobox: :py:obj:`.infobox`
+    5. number_of_results: Number of results origin engine has.
+
+       .. code:: python
+
+          results.append({
+              'number_of_results' : int,
+          })
+
+    6. engine_data: used to pass :ref:`engine_data <engine_data>` to next request.
+    7. url: :ref:`standard result <standard result>`
+    """

    __slots__ = (
        '_merged_results',
@ -174,10 +111,10 @@ class ResultContainer:
    def __init__(self):
        super().__init__()
        self._merged_results = []
-        self.infoboxes = []
-        self.suggestions = set()
-        self.answers = {}
-        self.corrections = set()
+        self.infoboxes = Infoboxes()
+        self.suggestions = Suggestions()
+        self.answers = Answers()
+        self.corrections = Corrections()
        self._number_of_results = []
        self.engine_data = defaultdict(dict)
        self._closed = False
@ -188,7 +125,8 @@ class ResultContainer:
        self.on_result = lambda _: True
        self._lock = RLock()

-    def extend(self, engine_name, results):
+    def extend(self, engine_name, results):  # pylint: disable=too-many-branches
+        """Add a result item to the container."""
        if self._closed:
            return

@ -196,19 +134,19 @@ class ResultContainer:
        error_msgs = set()
        for result in list(results):
            result['engine'] = engine_name
-            if 'suggestion' in result and self.on_result(result):
+            if is_suggestion(result) and self.on_result(result):
                self.suggestions.add(result['suggestion'])
-            elif 'answer' in result and self.on_result(result):
-                self.answers[result['answer']] = result
-            elif 'correction' in result and self.on_result(result):
+            elif is_answer(result) and self.on_result(result):
+                self.answers.add(result)
+            elif is_correction(result) and self.on_result(result):
                self.corrections.add(result['correction'])
-            elif 'infobox' in result and self.on_result(result):
+            elif is_infobox(result) and self.on_result(result):
                self._merge_infobox(result)
-            elif 'number_of_results' in result and self.on_result(result):
+            elif is_number_of_results(result) and self.on_result(result):
                self._number_of_results.append(result['number_of_results'])
-            elif 'engine_data' in result and self.on_result(result):
+            elif is_engine_data(result) and self.on_result(result):
                self.engine_data[engine_name][result['key']] = result['engine_data']
-            elif 'url' in result:
+            elif is_standard_result(result):
                # standard result (url, title, content)
                if not self._is_valid_url_result(result, error_msgs):
                    continue
@ -313,7 +251,6 @@ class ResultContainer:
                if result_template != 'images.html':
                    # not an image, same template, same url : it's a duplicate
                    return merged_result
-                else:
                # it's an image
                # it's a duplicate if the parsed_url, template and img_src are differents
                if result.get('img_src', '') == merged_result.get('img_src', ''):
@ -363,6 +300,7 @@ class ResultContainer:
        categoryPositions = {}

        for res in results:
+            # pylint: disable=fixme
            # FIXME : handle more than one category per engine
            engine = engines[res['engine']]
            res['category'] = engine.categories[0] if len(engine.categories) > 0 else ''
@ -389,7 +327,7 @@ class ResultContainer:

                # update every index after the current one
                # (including the current one)
-                for k in categoryPositions:
+                for k in categoryPositions:  # pylint: disable=consider-using-dict-items
                    v = categoryPositions[k]['index']
                    if v >= index:
                        categoryPositions[k]['index'] = v + 1
--- a/searx/results/core.py
+++ b/searx/results/core.py
@ -0,0 +1,80 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+
+"""Core methods
+"""
+# pylint: disable=too-few-public-methods
+
+import re
+from urllib.parse import unquote
+from typing import NamedTuple
+
+from searx.engines import engines
+
+CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
+WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
+
+
+class Timing(NamedTuple):  # pylint: disable=missing-class-docstring
+    engine: str
+    total: float
+    load: float
+
+
+class UnresponsiveEngine(NamedTuple):  # pylint: disable=missing-class-docstring
+    engine: str
+    error_type: str
+    suspended: bool
+
+
+def result_content_len(content):
+    """Return the meaningful length of the content for a result."""
+    if isinstance(content, str):
+        return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
+    return 0
+
+
+def result_score(result):
+    weight = 1.0
+
+    for result_engine in result['engines']:
+        if hasattr(engines[result_engine], 'weight'):
+            weight *= float(engines[result_engine].weight)
+
+    occurences = len(result['positions'])
+
+    return sum((occurences * weight) / position for position in result['positions'])
+
+
+def compare_urls(url_a, url_b):
+    """Lazy compare between two URL.
+
+    "www.example.com" and "example.com" are equals.
+    "www.example.com/path/" and "www.example.com/path" are equals.
+    "https://www.example.com/" and "http://www.example.com/" are equals.
+
+    Args:
+        url_a (ParseResult): first URL
+        url_b (ParseResult): second URL
+
+    Returns:
+        bool: True if url_a and url_b are equals
+    """
+    # ignore www. in comparison
+    if url_a.netloc.startswith('www.'):
+        host_a = url_a.netloc.replace('www.', '', 1)
+    else:
+        host_a = url_a.netloc
+    if url_b.netloc.startswith('www.'):
+        host_b = url_b.netloc.replace('www.', '', 1)
+    else:
+        host_b = url_b.netloc
+
+    if host_a != host_b or url_a.query != url_b.query or url_a.fragment != url_b.fragment:
+        return False
+
+    # remove / from the end of the url if required
+    path_a = url_a.path[:-1] if url_a.path.endswith('/') else url_a.path
+    path_b = url_b.path[:-1] if url_b.path.endswith('/') else url_b.path
+
+    return unquote(path_a) == unquote(path_b)
--- a/searx/results/correction.py
+++ b/searx/results/correction.py
@ -0,0 +1,39 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Correction item in the result list.  The correction result item is used in
+the :origin:`results.html <searx/templates/simple/results.html>` template.
+
+A correction item is a dictionary type with dedicated keys and values.  In the
+result list a answer item is identified by the existence of the key
+``correction``.
+
+.. code:: python
+
+   results.append({
+       'correction' : str,
+   })
+
+The context ``corrections`` of the HTML template is a set of dictionaries:
+
+.. code:: python
+
+   corrections = [
+       {
+           'url'   : str,
+           'title' : str,
+       },
+       {...},
+       ...
+   ]
+
+url : ``str``
+  The search URL for the correction
+
+title : ``str``
+  The 'correction' string append by the engine.
+
+"""
+
+
+class Corrections(set):
+    """Set of corrections in the :py:obj:`.container.ResultContainer`"""
--- a/searx/results/infobox.py
+++ b/searx/results/infobox.py
@ -0,0 +1,157 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Infobox item in the result list.  The infobox result item is used in the
+:origin:`infobox.html <searx/templates/simple/infobox.html>` template.
+
+A infobox item is a dictionary type with dedicated keys and values.  In the
+result list a infobox item is identified by the existence of the key ``infobox``.
+
+.. code:: python
+
+   results.append({
+       'infobox'       : str,
+       'id'            : str,
+       'content'       : str,
+       'img_src'       : str,
+       'urls'          : [url, ...],
+       'attributes'    : [attribute, ...],
+       'relatedTopics' : [topic, ...],
+       'engine'        : engine,
+   })
+
+infobox : ``str``
+  Name of the infobox (mandatory).
+
+id : ``str``
+  URL of the infobox.  Will be used to merge infoboxes.
+
+content : ``str``
+  Content of the infobox (the description)
+
+img_src:
+  URL of the image to show in the infobox
+
+urls : ``[url, ...]``
+  A list of dictionaries with links shown in the infobox.  A **url** item in the
+  ``infobox.urls`` list is a dicticonary:
+
+  .. code:: python
+
+     url = {
+         'title'    : str,
+         'url'      : str,
+         'entity'   : str,  # set by some engines but unused
+         'official' : bool, # set by some engines but unused (oscar)
+     }
+
+attributes : ``[attribute, ...]``
+  A **attribute** item in the ``infobox.attributes`` list is a dictionary:
+
+  .. code:: python
+
+     attribute = {
+         'label'    : str,
+         'value'    : str,
+         'image'    : {
+             'src': str,
+             'alt': str,
+         },
+         'entity'   : str,  # set by some engines but unused
+     }
+
+relatedTopics : ``[topic, ...]``
+  A **topic** item in the ``infobox.relatedTopics`` list is a dictionary:
+
+  .. code:: python
+
+     topic = {
+         'suggestion'  : str,
+         'name'        : str,  # set by some engines but unused
+     }
+
+"""
+
+from urllib.parse import urlparse
+from searx.engines import engines
+from .core import (
+    result_content_len,
+    compare_urls,
+)
+
+
+class Infoboxes(list):
+    """List of infobox items in the :py:obj:`.container.ResultContainer`"""
+
+
+def merge_two_infoboxes(infobox1, infobox2):
+    # pylint: disable=too-many-branches, too-many-statements
+
+    # get engines weights
+    if hasattr(engines[infobox1['engine']], 'weight'):
+        weight1 = engines[infobox1['engine']].weight
+    else:
+        weight1 = 1
+    if hasattr(engines[infobox2['engine']], 'weight'):
+        weight2 = engines[infobox2['engine']].weight
+    else:
+        weight2 = 1
+
+    if weight2 > weight1:
+        infobox1['engine'] = infobox2['engine']
+
+    infobox1['engines'] |= infobox2['engines']
+
+    if 'urls' in infobox2:
+        urls1 = infobox1.get('urls', None)
+        if urls1 is None:
+            urls1 = []
+
+        for url2 in infobox2.get('urls', []):
+            unique_url = True
+            parsed_url2 = urlparse(url2.get('url', ''))
+            entity_url2 = url2.get('entity')
+            for url1 in urls1:
+                if (entity_url2 is not None and url1.get('entity') == entity_url2) or compare_urls(
+                    urlparse(url1.get('url', '')), parsed_url2
+                ):
+                    unique_url = False
+                    break
+            if unique_url:
+                urls1.append(url2)
+
+        infobox1['urls'] = urls1
+
+    if 'img_src' in infobox2:
+        img1 = infobox1.get('img_src', None)
+        img2 = infobox2.get('img_src')
+        if img1 is None:
+            infobox1['img_src'] = img2
+        elif weight2 > weight1:
+            infobox1['img_src'] = img2
+
+    if 'attributes' in infobox2:
+        attributes1 = infobox1.get('attributes')
+        if attributes1 is None:
+            infobox1['attributes'] = attributes1 = []
+
+        attributeSet = set()
+        for attribute in attributes1:
+            label = attribute.get('label')
+            if label not in attributeSet:
+                attributeSet.add(label)
+            entity = attribute.get('entity')
+            if entity not in attributeSet:
+                attributeSet.add(entity)
+
+        for attribute in infobox2.get('attributes', []):
+            if attribute.get('label') not in attributeSet and attribute.get('entity') not in attributeSet:
+                attributes1.append(attribute)
+
+    if 'content' in infobox2:
+        content1 = infobox1.get('content', None)
+        content2 = infobox2.get('content', '')
+        if content1 is not None:
+            if result_content_len(content2) > result_content_len(content1):
+                infobox1['content'] = content2
+        else:
+            infobox1['content'] = content2
--- a/searx/results/suggestion.py
+++ b/searx/results/suggestion.py
@ -0,0 +1,41 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Suggestion item in the result list.  The suggestion result item is used in
+the :origin:`infobox.html <searx/templates/simple/results.html>` template.
+
+A sugestion item is a dictionary type with dedicated keys and values.  In the
+result list a suggestion item is identified by the existence of the key
+``suggestion``.
+
+.. code:: python
+
+   results.append({
+       'suggestion' : str,
+   })
+
+The context ``suggestions`` of the HTML template is a set of dictionaries:
+
+.. code:: python
+
+   suggestions = [
+       {
+           'url'   : str,
+           'title' : str,
+       },
+       {...},
+       ...
+   ]
+
+url : ``str``
+  The search URL for the suggestion
+
+title : ``str``
+  The 'suggestion' string append by the engine.
+
+"""
+
+from typing import Set
+
+
+class Suggestions(Set):
+    """Set of suggestions in the :py:obj:`.container.ResultContainer`"""
--- a/searx/search/init.py
+++ b/searx/search/init.py
@ -11,7 +11,7 @@ import flask
 from searx import settings
 from searx.answerers import ask
 from searx.external_bang import get_bang_url
-from searx.results import ResultContainer
+from searx.results.container import ResultContainer
 from searx import logger
 from searx.plugins import plugins
 from searx.search.models import EngineRef, SearchQuery
--- a/searx/search/checker/impl.py
+++ b/searx/search/checker/impl.py
@ -16,7 +16,7 @@ import httpx

 from searx import network, logger
 from searx.utils import gen_useragent
-from searx.results import ResultContainer
+from searx.results.container import ResultContainer
 from searx.search.models import SearchQuery, EngineRef
 from searx.search.processors import EngineProcessor
 from searx.metrics import counter_inc
--- a/searx/webapp.py
+++ b/searx/webapp.py
@ -58,7 +58,7 @@ from searx import (

 from searx import infopage
 from searx.data import ENGINE_DESCRIPTIONS
-from searx.results import Timing, UnresponsiveEngine
+from searx.results.core import Timing, UnresponsiveEngine
 from searx.settings_defaults import OUTPUT_FORMATS
 from searx.settings_loader import get_default_settings_path
 from searx.exceptions import SearxParameterException
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-from searx.results import ResultContainer
+from searx.results.container import ResultContainer
 from tests import SearxTestCase


--- a/tests/unit/test_webapp.py
+++ b/tests/unit/test_webapp.py
@ -3,7 +3,7 @@
 import json
 from urllib.parse import ParseResult
 from mock import Mock
-from searx.results import Timing
+from searx.results.core import Timing

 import searx.search.processors
 from searx.search import Search