forked from zaclys/searxng
build from commit 905ce2a6f6
This commit is contained in:
commit
5849f6a4b3
277 changed files with 61654 additions and 0 deletions
299
_modules/searx/engines/annas_archive.html
Normal file
299
_modules/searx/engines/annas_archive.html
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.annas_archive — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.annas_archive</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.annas_archive</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""`Anna's Archive`_ is a free non-profit online shadow library metasearch</span>
|
||||
<span class="sd">engine providing access to a variety of book resources (also via IPFS), created</span>
|
||||
<span class="sd">by a team of anonymous archivists (AnnaArchivist_).</span>
|
||||
|
||||
<span class="sd">.. _Anna's Archive: https://annas-archive.org/</span>
|
||||
<span class="sd">.. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive</span>
|
||||
|
||||
<span class="sd">Configuration</span>
|
||||
<span class="sd">=============</span>
|
||||
|
||||
<span class="sd">The engine has the following additional settings:</span>
|
||||
|
||||
<span class="sd">- :py:obj:`aa_content`</span>
|
||||
<span class="sd">- :py:obj:`aa_ext`</span>
|
||||
<span class="sd">- :py:obj:`aa_sort`</span>
|
||||
|
||||
<span class="sd">With this options a SearXNG maintainer is able to configure **additional**</span>
|
||||
<span class="sd">engines for specific searches in Anna's Archive. For example a engine to search</span>
|
||||
<span class="sd">for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> - name: annas articles</span>
|
||||
<span class="sd"> engine: annas_archive</span>
|
||||
<span class="sd"> shortcut: aaa</span>
|
||||
<span class="sd"> aa_content: 'journal_article'</span>
|
||||
<span class="sd"> aa_ext: 'pdf'</span>
|
||||
<span class="sd"> aa_sort: 'newest'</span>
|
||||
|
||||
<span class="sd">Implementations</span>
|
||||
<span class="sd">===============</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Optional</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">quote</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">eval_xpath_list</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">ENGINE_TRAITS</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s2">"https://annas-archive.org/"</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s2">"Q115288326"</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s2">"HTML"</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"files"</span><span class="p">]</span>
|
||||
<span class="n">paging</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="c1"># search-url</span>
|
||||
<span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"https://annas-archive.org"</span>
|
||||
<span class="n">aa_content</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="sd">"""Anan's search form field **Content** / possible values::</span>
|
||||
|
||||
<span class="sd"> journal_article, book_any, book_fiction, book_unknown, book_nonfiction,</span>
|
||||
<span class="sd"> book_comic, magazine, standards_document</span>
|
||||
|
||||
<span class="sd">To not filter use an empty string (default).</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="n">aa_sort</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="sd">"""Sort Anna's results, possible values::</span>
|
||||
|
||||
<span class="sd"> newest, oldest, largest, smallest</span>
|
||||
|
||||
<span class="sd">To sort by *most relevant* use an empty string (default)."""</span>
|
||||
|
||||
<span class="n">aa_ext</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="sd">"""Filter Anna's results by a file ending. Common filters for example are</span>
|
||||
<span class="sd">``pdf`` and ``epub``.</span>
|
||||
|
||||
<span class="sd">.. note::</span>
|
||||
|
||||
<span class="sd"> Anna's Archive is a beta release: Filter results by file extension does not</span>
|
||||
<span class="sd"> really work on Anna's Archive.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="init"><a class="viewcode-back" href="../../../dev/engines/online/annas_archive.html#searx.engines.annas_archive.init">[docs]</a><span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span>
|
||||
<span class="w"> </span><span class="sd">"""Check of engine's settings."""</span>
|
||||
<span class="n">traits</span> <span class="o">=</span> <span class="n">EngineTraits</span><span class="p">(</span><span class="o">**</span><span class="n">ENGINE_TRAITS</span><span class="p">[</span><span class="s1">'annas archive'</span><span class="p">])</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">aa_content</span> <span class="ow">and</span> <span class="n">aa_content</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'content'</span><span class="p">]:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'invalid setting content: </span><span class="si">{</span><span class="n">aa_content</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">aa_sort</span> <span class="ow">and</span> <span class="n">aa_sort</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'sort'</span><span class="p">]:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'invalid setting sort: </span><span class="si">{</span><span class="n">aa_sort</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">aa_ext</span> <span class="ow">and</span> <span class="n">aa_ext</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ext'</span><span class="p">]:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'invalid setting ext: </span><span class="si">{</span><span class="n">aa_ext</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">quote</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
|
||||
<span class="n">lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"language"</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="sa">f</span><span class="s2">"/search?lang=</span><span class="si">{</span><span class="n">lang</span><span class="w"> </span><span class="ow">or</span><span class="w"> </span><span class="s1">''</span><span class="si">}</span><span class="s2">&content=</span><span class="si">{</span><span class="n">aa_content</span><span class="si">}</span><span class="s2">&ext=</span><span class="si">{</span><span class="n">aa_ext</span><span class="si">}</span><span class="s2">&sort=</span><span class="si">{</span><span class="n">aa_sort</span><span class="si">}</span><span class="s2">&q=</span><span class="si">{</span><span class="n">q</span><span class="si">}</span><span class="s2">"</span>
|
||||
<span class="k">return</span> <span class="n">params</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]:</span>
|
||||
<span class="n">results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//main//div[contains(@class, "h-[125]")]/a'</span><span class="p">):</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_get_result</span><span class="p">(</span><span class="n">item</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># The rendering of the WEB page is very strange; except the first position</span>
|
||||
<span class="c1"># all other positions of Anna's result page are enclosed in SGML comments.</span>
|
||||
<span class="c1"># These comments are *uncommented* by some JS code, see query of class</span>
|
||||
<span class="c1"># '.js-scroll-hidden' in Anna's HTML template:</span>
|
||||
<span class="c1"># https://annas-software.org/AnnaArchivist/annas-archive/-/blob/main/allthethings/templates/macros/md5_list.html</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//main//div[contains(@class, "js-scroll-hidden")]'</span><span class="p">):</span>
|
||||
<span class="n">item</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">item</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./comment()'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_get_result</span><span class="p">(</span><span class="n">item</span><span class="p">))</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_result</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="p">{</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'paper.html'</span><span class="p">,</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">item</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./@href'</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//h3/text()[1]'</span><span class="p">)),</span>
|
||||
<span class="s1">'publisher'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "text-sm")]'</span><span class="p">)),</span>
|
||||
<span class="s1">'authors'</span><span class="p">:</span> <span class="p">[</span><span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "italic")]'</span><span class="p">))],</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "text-xs")]'</span><span class="p">)),</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">item</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//img/@src'</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/annas_archive.html#searx.engines.annas_archive.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages and other search arguments from Anna's search form."""</span>
|
||||
<span class="c1"># pylint: disable=import-outside-toplevel</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'content'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ext'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'sort'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/search'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from Anna's search page is not OK."</span><span class="p">)</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="c1"># supported language codes</span>
|
||||
|
||||
<span class="n">lang_map</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form//select[@name='lang']//option"</span><span class="p">):</span>
|
||||
<span class="n">eng_lang</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">''</span><span class="p">,</span> <span class="s1">'_empty'</span><span class="p">,</span> <span class="s1">'nl-BE'</span><span class="p">,</span> <span class="s1">'und'</span><span class="p">):</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">),</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="c1"># silently ignore unknown languages</span>
|
||||
<span class="c1"># print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span>
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_lang</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form//select[@name='content']//option"</span><span class="p">):</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'content'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form//select[@name='ext']//option"</span><span class="p">):</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ext'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form//select[@name='sort']//option"</span><span class="p">):</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'sort'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
264
_modules/searx/engines/archlinux.html
Normal file
264
_modules/searx/engines/archlinux.html
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.archlinux — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.archlinux</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.archlinux</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd">Arch Linux Wiki</span>
|
||||
<span class="sd">~~~~~~~~~~~~~~~</span>
|
||||
|
||||
<span class="sd">This implementation does not use a official API: Mediawiki provides API, but</span>
|
||||
<span class="sd">Arch Wiki blocks access to it.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">urljoin</span><span class="p">,</span> <span class="n">urlparse</span>
|
||||
<span class="kn">import</span> <span class="nn">lxml</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">,</span> <span class="n">eval_xpath_getindex</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://wiki.archlinux.org/'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q101445877'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'it'</span><span class="p">,</span> <span class="s1">'software wikis'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">main_wiki</span> <span class="o">=</span> <span class="s1">'wiki.archlinux.org'</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
|
||||
<span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">netloc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">main_wiki</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">title</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="s1">'Special:Search'</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://'</span> <span class="o">+</span> <span class="n">netloc</span> <span class="o">+</span> <span class="s1">'/index.php?'</span>
|
||||
<span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">20</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">netloc</span> <span class="o">==</span> <span class="n">main_wiki</span><span class="p">:</span>
|
||||
<span class="n">eng_lang</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="s1">'English'</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">query</span> <span class="o">+=</span> <span class="s1">' ('</span> <span class="o">+</span> <span class="n">eng_lang</span> <span class="o">+</span> <span class="s1">')'</span>
|
||||
<span class="k">elif</span> <span class="n">netloc</span> <span class="o">==</span> <span class="s1">'wiki.archlinuxcn.org'</span><span class="p">:</span>
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://'</span> <span class="o">+</span> <span class="n">netloc</span> <span class="o">+</span> <span class="s1">'/wzh/index.php?'</span>
|
||||
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'limit'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span>
|
||||
<span class="s1">'offset'</span><span class="p">:</span> <span class="n">offset</span><span class="p">,</span>
|
||||
<span class="s1">'profile'</span><span class="p">:</span> <span class="s1">'default'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">params</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="c1"># get the base URL for the language in which request was made</span>
|
||||
<span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">netloc</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">main_wiki</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://'</span> <span class="o">+</span> <span class="n">netloc</span> <span class="o">+</span> <span class="s1">'/index.php?'</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//ul[@class="mw-search-results"]/li'</span><span class="p">):</span>
|
||||
<span class="n">link</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="mw-search-result-heading"]/a'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="searchresult"]'</span><span class="p">))</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">urljoin</span><span class="p">(</span><span class="n">base_url</span><span class="p">,</span> <span class="n">link</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">)),</span> <span class="c1"># type: ignore</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">link</span><span class="p">),</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/archlinux.html#searx.engines.archlinux.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages from Archlinix-Wiki. The location of the Wiki address of a</span>
|
||||
<span class="sd"> language is mapped in a :py:obj:`custom field</span>
|
||||
<span class="sd"> <searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``). Depending</span>
|
||||
<span class="sd"> on the location, the ``title`` argument in the request is translated.</span>
|
||||
|
||||
<span class="sd"> .. code:: python</span>
|
||||
|
||||
<span class="sd"> "custom": {</span>
|
||||
<span class="sd"> "wiki_netloc": {</span>
|
||||
<span class="sd"> "de": "wiki.archlinux.de",</span>
|
||||
<span class="sd"> # ...</span>
|
||||
<span class="sd"> "zh": "wiki.archlinuxcn.org"</span>
|
||||
<span class="sd"> }</span>
|
||||
<span class="sd"> "title": {</span>
|
||||
<span class="sd"> "de": "Spezial:Suche",</span>
|
||||
<span class="sd"> # ...</span>
|
||||
<span class="sd"> "zh": "Special:\u641c\u7d22"</span>
|
||||
<span class="sd"> },</span>
|
||||
<span class="sd"> },</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="c1"># pylint: disable=import-outside-toplevel</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
|
||||
<span class="n">title_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'de'</span><span class="p">:</span> <span class="s1">'Spezial:Suche'</span><span class="p">,</span>
|
||||
<span class="s1">'fa'</span><span class="p">:</span> <span class="s1">'ویژه:جستجو'</span><span class="p">,</span>
|
||||
<span class="s1">'ja'</span><span class="p">:</span> <span class="s1">'特別:検索'</span><span class="p">,</span>
|
||||
<span class="s1">'zh'</span><span class="p">:</span> <span class="s1">'Special:搜索'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://wiki.archlinux.org/'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from wiki.archlinix.org is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//a[@class='interlanguage-link-target']"</span><span class="p">):</span>
|
||||
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">a</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'lang'</span><span class="p">),</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">))</span>
|
||||
<span class="c1"># zh_Hans --> zh</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">sxng_tag</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<span class="n">netloc</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">a</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">))</span><span class="o">.</span><span class="n">netloc</span>
|
||||
<span class="k">if</span> <span class="n">netloc</span> <span class="o">!=</span> <span class="s1">'wiki.archlinux.org'</span><span class="p">:</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">title_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">title</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: title tag from </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) is unknown"</span> <span class="o">%</span> <span class="p">(</span><span class="n">netloc</span><span class="p">,</span> <span class="n">sxng_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">][</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">netloc</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'title'</span><span class="p">][</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">title</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="s2">".//span"</span><span class="p">))</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'en'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'English'</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
449
_modules/searx/engines/bing.html
Normal file
449
_modules/searx/engines/bing.html
Normal file
|
|
@ -0,0 +1,449 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.bing — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.bing</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.bing</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This is the implementation of the Bing-WEB engine. Some of this</span>
|
||||
<span class="sd">implementations are shared by other engines:</span>
|
||||
|
||||
<span class="sd">- :ref:`bing images engine`</span>
|
||||
<span class="sd">- :ref:`bing news engine`</span>
|
||||
<span class="sd">- :ref:`bing videos engine`</span>
|
||||
|
||||
<span class="sd">On the `preference page`_ Bing offers a lot of languages an regions (see section</span>
|
||||
<span class="sd">'Search results languages' and 'Country/region'). However, the abundant choice</span>
|
||||
<span class="sd">does not correspond to reality, where Bing has a full-text indexer only for a</span>
|
||||
<span class="sd">limited number of languages. By example: you can select a language like Māori</span>
|
||||
<span class="sd">but you never get a result in this language.</span>
|
||||
|
||||
<span class="sd">What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem</span>
|
||||
<span class="sd">to be completely correct either (if you take a closer look you will find some</span>
|
||||
<span class="sd">inaccuracies there too):</span>
|
||||
|
||||
<span class="sd">- :py:obj:`searx.engines.bing.bing_traits_url`</span>
|
||||
<span class="sd">- :py:obj:`searx.engines.bing_videos.bing_traits_url`</span>
|
||||
<span class="sd">- :py:obj:`searx.engines.bing_images.bing_traits_url`</span>
|
||||
<span class="sd">- :py:obj:`searx.engines.bing_news.bing_traits_url`</span>
|
||||
|
||||
<span class="sd">.. _preference page: https://www.bing.com/account/general</span>
|
||||
<span class="sd">.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
<span class="c1"># pylint: disable=too-many-branches, invalid-name</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">import</span> <span class="nn">datetime</span>
|
||||
<span class="kn">import</span> <span class="nn">re</span>
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
<span class="kn">import</span> <span class="nn">babel.languages</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">,</span> <span class="n">eval_xpath_getindex</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span><span class="p">,</span> <span class="n">region_tag</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.bing.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q182496'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="sd">"""Bing tries to guess user's language and territory from the HTTP</span>
|
||||
<span class="sd">Accept-Language. Optional the user can select a search-language (can be</span>
|
||||
<span class="sd">different to the UI language) and a region (market code)."""</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch_types</span> <span class="o">=</span> <span class="p">{</span><span class="mi">2</span><span class="p">:</span> <span class="s1">'STRICT'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'DEMOTE'</span><span class="p">,</span> <span class="mi">0</span><span class="p">:</span> <span class="s1">'OFF'</span><span class="p">}</span> <span class="c1"># cookie: ADLT=STRICT</span>
|
||||
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/search'</span>
|
||||
<span class="sd">"""Bing (Web) search URL"""</span>
|
||||
|
||||
<span class="n">bing_traits_url</span> <span class="o">=</span> <span class="s1">'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'</span>
|
||||
<span class="sd">"""Bing (Web) search API description"""</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_offset_from_pageno</span><span class="p">(</span><span class="n">pageno</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">pageno</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span> <span class="o">+</span> <span class="mi">1</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">,</span> <span class="n">SID</span><span class="p">):</span>
|
||||
|
||||
<span class="c1"># set cookies</span>
|
||||
<span class="c1"># -----------</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'_EDGE_V'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span>
|
||||
|
||||
<span class="c1"># _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw</span>
|
||||
<span class="n">_EDGE_S</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="s1">'F=1'</span><span class="p">,</span>
|
||||
<span class="s1">'SID=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">SID</span><span class="p">,</span>
|
||||
<span class="s1">'mkt=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">engine_region</span><span class="o">.</span><span class="n">lower</span><span class="p">(),</span>
|
||||
<span class="s1">'ui=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">engine_language</span><span class="o">.</span><span class="n">lower</span><span class="p">(),</span>
|
||||
<span class="p">]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'_EDGE_S'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'&'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">_EDGE_S</span><span class="p">)</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"cookie _EDGE_S=</span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'_EDGE_S'</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># "_EDGE_CD": "m=zh-tw",</span>
|
||||
|
||||
<span class="n">_EDGE_CD</span> <span class="o">=</span> <span class="p">[</span> <span class="c1"># pylint: disable=invalid-name</span>
|
||||
<span class="s1">'m=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">engine_region</span><span class="o">.</span><span class="n">lower</span><span class="p">(),</span> <span class="c1"># search region: zh-cn</span>
|
||||
<span class="s1">'u=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">engine_language</span><span class="o">.</span><span class="n">lower</span><span class="p">(),</span> <span class="c1"># UI: en-us</span>
|
||||
<span class="p">]</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'_EDGE_CD'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'&'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">_EDGE_CD</span><span class="p">)</span> <span class="o">+</span> <span class="s1">';'</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"cookie _EDGE_CD=</span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'_EDGE_CD'</span><span class="p">])</span>
|
||||
|
||||
<span class="n">SRCHHPGUSR</span> <span class="o">=</span> <span class="p">[</span> <span class="c1"># pylint: disable=invalid-name</span>
|
||||
<span class="s1">'SRCHLANG=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">engine_language</span><span class="p">,</span>
|
||||
<span class="c1"># Trying to set ADLT cookie here seems not to have any effect, I assume</span>
|
||||
<span class="c1"># there is some age verification by a cookie (and/or session ID) needed,</span>
|
||||
<span class="c1"># to disable the SafeSearch.</span>
|
||||
<span class="s1">'ADLT=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">safesearch_types</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">],</span> <span class="s1">'DEMOTE'</span><span class="p">),</span>
|
||||
<span class="p">]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'SRCHHPGUSR'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'&'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">SRCHHPGUSR</span><span class="p">)</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"cookie SRCHHPGUSR=</span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'SRCHHPGUSR'</span><span class="p">])</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Assemble a Bing-Web request."""</span>
|
||||
|
||||
<span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en-US'</span><span class="p">)</span>
|
||||
<span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">SID</span> <span class="o">=</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid1</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
|
||||
<span class="n">CVID</span> <span class="o">=</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid1</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
|
||||
|
||||
<span class="n">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">,</span> <span class="n">SID</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># build URL query</span>
|
||||
<span class="c1"># ---------------</span>
|
||||
|
||||
<span class="c1"># query term</span>
|
||||
<span class="n">page</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
|
||||
<span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># fmt: off</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'pq'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'cvid'</span><span class="p">:</span> <span class="n">CVID</span><span class="p">,</span>
|
||||
<span class="s1">'qs'</span><span class="p">:</span> <span class="s1">'n'</span><span class="p">,</span>
|
||||
<span class="s1">'sp'</span><span class="p">:</span> <span class="s1">'-1'</span>
|
||||
<span class="c1"># fmt: on</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># page</span>
|
||||
<span class="k">if</span> <span class="n">page</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">referer</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Referer'</span><span class="p">]</span> <span class="o">=</span> <span class="n">referer</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"headers.Referer --> </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">referer</span><span class="p">)</span>
|
||||
|
||||
<span class="n">query_params</span><span class="p">[</span><span class="s1">'first'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_get_offset_from_pageno</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">page</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="n">query_params</span><span class="p">[</span><span class="s1">'FORM'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'PERE'</span>
|
||||
<span class="k">elif</span> <span class="n">page</span> <span class="o">></span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="n">query_params</span><span class="p">[</span><span class="s1">'FORM'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'PERE</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">page</span> <span class="o">-</span> <span class="mi">2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">filters</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]:</span>
|
||||
<span class="n">query_params</span><span class="p">[</span><span class="s1">'filt'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'custom'</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'day'</span><span class="p">:</span>
|
||||
<span class="n">filters</span> <span class="o">=</span> <span class="s1">'ex1:"ez1"'</span>
|
||||
<span class="k">elif</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'week'</span><span class="p">:</span>
|
||||
<span class="n">filters</span> <span class="o">=</span> <span class="s1">'ex1:"ez2"'</span>
|
||||
<span class="k">elif</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'month'</span><span class="p">:</span>
|
||||
<span class="n">filters</span> <span class="o">=</span> <span class="s1">'ex1:"ez3"'</span>
|
||||
<span class="k">elif</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'year'</span><span class="p">:</span>
|
||||
<span class="n">epoch_1970</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">date</span><span class="p">(</span><span class="mi">1970</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">today_no</span> <span class="o">=</span> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">date</span><span class="o">.</span><span class="n">today</span><span class="p">()</span> <span class="o">-</span> <span class="n">epoch_1970</span><span class="p">)</span><span class="o">.</span><span class="n">days</span>
|
||||
<span class="n">filters</span> <span class="o">=</span> <span class="s1">'ex1:"ez5_</span><span class="si">%s</span><span class="s1">_</span><span class="si">%s</span><span class="s1">"'</span> <span class="o">%</span> <span class="p">(</span><span class="n">today_no</span> <span class="o">-</span> <span class="mi">365</span><span class="p">,</span> <span class="n">today_no</span><span class="p">)</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">filters</span><span class="p">:</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+</span> <span class="s1">'&filters='</span> <span class="o">+</span> <span class="n">filters</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="c1"># pylint: disable=too-many-locals,import-outside-toplevel</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">Request</span><span class="p">,</span> <span class="n">multi_requests</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">result_len</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># parse results again if nothing is found yet</span>
|
||||
|
||||
<span class="n">url_to_resolve</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">url_to_resolve_index</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">i</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//ol[@id="b_results"]/li[contains(@class, "b_algo")]'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">link</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h2/a'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">link</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">link</span><span class="o">.</span><span class="n">attrib</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">)</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">link</span><span class="p">)</span>
|
||||
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'(.//p)[1]'</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">content</span><span class="p">:</span>
|
||||
<span class="c1"># Make sure that the element is free of <a href> links</span>
|
||||
<span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">p</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//a'</span><span class="p">):</span>
|
||||
<span class="n">e</span><span class="o">.</span><span class="n">getparent</span><span class="p">()</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># get the real URL either using the URL shown to user or following the Bing URL</span>
|
||||
<span class="k">if</span> <span class="n">url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'https://www.bing.com/ck/a?'</span><span class="p">):</span>
|
||||
<span class="n">url_cite</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="b_attribution"]/cite'</span><span class="p">))</span>
|
||||
<span class="c1"># Bing can shorten the URL either at the end or in the middle of the string</span>
|
||||
<span class="k">if</span> <span class="p">(</span>
|
||||
<span class="n">url_cite</span>
|
||||
<span class="ow">and</span> <span class="n">url_cite</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'https://'</span><span class="p">)</span>
|
||||
<span class="ow">and</span> <span class="s1">'…'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">url_cite</span>
|
||||
<span class="ow">and</span> <span class="s1">'...'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">url_cite</span>
|
||||
<span class="ow">and</span> <span class="s1">'›'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">url_cite</span>
|
||||
<span class="p">):</span>
|
||||
<span class="c1"># no need for an additional HTTP request</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">url_cite</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># resolve the URL with an additional HTTP request</span>
|
||||
<span class="n">url_to_resolve</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'&ntb=1'</span><span class="p">,</span> <span class="s1">'&ntb=F'</span><span class="p">))</span>
|
||||
<span class="n">url_to_resolve_index</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="kc">None</span> <span class="c1"># remove the result if the HTTP Bing redirect raise an exception</span>
|
||||
|
||||
<span class="c1"># append result</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">})</span>
|
||||
<span class="c1"># increment result pointer for the next iteration in this loop</span>
|
||||
<span class="n">i</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
|
||||
<span class="c1"># resolve all Bing redirections in parallel</span>
|
||||
<span class="n">request_list</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="n">Request</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">u</span><span class="p">,</span> <span class="n">allow_redirects</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span> <span class="k">for</span> <span class="n">u</span> <span class="ow">in</span> <span class="n">url_to_resolve</span>
|
||||
<span class="p">]</span>
|
||||
<span class="n">response_list</span> <span class="o">=</span> <span class="n">multi_requests</span><span class="p">(</span><span class="n">request_list</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">redirect_response</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">response_list</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">redirect_response</span><span class="p">,</span> <span class="ne">Exception</span><span class="p">):</span>
|
||||
<span class="n">results</span><span class="p">[</span><span class="n">url_to_resolve_index</span><span class="p">[</span><span class="n">i</span><span class="p">]][</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">redirect_response</span><span class="o">.</span><span class="n">headers</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># get number_of_results</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">result_len_container</span> <span class="o">=</span> <span class="s2">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//span[@class="sb_count"]//text()'</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="s2">"-"</span> <span class="ow">in</span> <span class="n">result_len_container</span><span class="p">:</span>
|
||||
|
||||
<span class="c1"># Remove the part "from-to" for paginated request ...</span>
|
||||
<span class="n">result_len_container</span> <span class="o">=</span> <span class="n">result_len_container</span><span class="p">[</span><span class="n">result_len_container</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s2">"-"</span><span class="p">)</span> <span class="o">*</span> <span class="mi">2</span> <span class="o">+</span> <span class="mi">2</span> <span class="p">:]</span>
|
||||
|
||||
<span class="n">result_len_container</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s1">'[^0-9]'</span><span class="p">,</span> <span class="s1">''</span><span class="p">,</span> <span class="n">result_len_container</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">result_len_container</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">result_len</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">result_len_container</span><span class="p">)</span>
|
||||
|
||||
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'result error :</span><span class="se">\n</span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">result_len</span> <span class="ow">and</span> <span class="n">_get_offset_from_pageno</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"pageno"</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span> <span class="o">></span> <span class="n">result_len</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'number_of_results'</span><span class="p">:</span> <span class="n">result_len</span><span class="p">})</span>
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages and regions from Bing-Web."""</span>
|
||||
|
||||
<span class="n">xpath_market_codes</span> <span class="o">=</span> <span class="s1">'//table[1]/tbody/tr/td[3]'</span>
|
||||
<span class="c1"># xpath_country_codes = '//table[2]/tbody/tr/td[2]'</span>
|
||||
<span class="n">xpath_language_codes</span> <span class="o">=</span> <span class="s1">'//table[3]/tbody/tr/td[2]'</span>
|
||||
|
||||
<span class="n">_fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">,</span> <span class="n">bing_traits_url</span><span class="p">,</span> <span class="n">xpath_language_codes</span><span class="p">,</span> <span class="n">xpath_market_codes</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">,</span> <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">xpath_language_codes</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">xpath_market_codes</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="c1"># pylint: disable=too-many-locals,import-outside-toplevel</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
|
||||
<span class="c1"># insert alias to map from a language (zh) to a language + script (zh_Hans)</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'zh'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'zh-hans'</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from peertube is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="n">map_lang</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'jp'</span><span class="p">:</span> <span class="s1">'ja'</span><span class="p">}</span>
|
||||
<span class="k">for</span> <span class="n">td</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">xpath_language_codes</span><span class="p">):</span>
|
||||
<span class="n">eng_lang</span> <span class="o">=</span> <span class="n">td</span><span class="o">.</span><span class="n">text</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'en-gb'</span><span class="p">,</span> <span class="s1">'pt-br'</span><span class="p">):</span>
|
||||
<span class="c1"># language 'en' is already in the list and a language 'en-gb' can't</span>
|
||||
<span class="c1"># be handled in SearXNG, same with pt-br which is covered by pt-pt.</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">babel_lang</span> <span class="o">=</span> <span class="n">map_lang</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">'_'</span><span class="p">)</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_lang</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: language (</span><span class="si">%s</span><span class="s2">) is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span>
|
||||
|
||||
<span class="n">map_region</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'en-ID'</span><span class="p">:</span> <span class="s1">'id_ID'</span><span class="p">,</span>
|
||||
<span class="s1">'no-NO'</span><span class="p">:</span> <span class="s1">'nb_NO'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">td</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">xpath_market_codes</span><span class="p">):</span>
|
||||
<span class="n">eng_region</span> <span class="o">=</span> <span class="n">td</span><span class="o">.</span><span class="n">text</span>
|
||||
<span class="n">babel_region</span> <span class="o">=</span> <span class="n">map_region</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_region</span><span class="p">,</span> <span class="n">eng_region</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">'_'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">eng_region</span> <span class="o">==</span> <span class="s1">'en-WW'</span><span class="p">:</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="n">eng_region</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_region</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: region (</span><span class="si">%s</span><span class="s2">) is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">eng_region</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_region</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_region</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
244
_modules/searx/engines/bing_images.html
Normal file
244
_modules/searx/engines/bing_images.html
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.bing_images — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.bing_images</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.bing_images</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Bing-Images: description see :py:obj:`searx.engines.bing`.</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="c1"># pylint: disable=invalid-name</span>
|
||||
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
<span class="kn">import</span> <span class="nn">json</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">set_bing_cookies</span><span class="p">,</span>
|
||||
<span class="n">_fetch_traits</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">send_accept_language_header</span> <span class="c1"># pylint: disable=unused-import</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.bing.com/images'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q182496'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'images'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/images/async'</span>
|
||||
<span class="sd">"""Bing (Images) search URL"""</span>
|
||||
|
||||
<span class="n">bing_traits_url</span> <span class="o">=</span> <span class="s1">'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes'</span>
|
||||
<span class="sd">"""Bing (Images) search API description"""</span>
|
||||
|
||||
<span class="n">time_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># fmt: off</span>
|
||||
<span class="s1">'day'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span><span class="p">,</span>
|
||||
<span class="s1">'week'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">7</span><span class="p">,</span>
|
||||
<span class="s1">'month'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">31</span><span class="p">,</span>
|
||||
<span class="s1">'year'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">365</span><span class="p">,</span>
|
||||
<span class="c1"># fmt: on</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_images.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Assemble a Bing-Image request."""</span>
|
||||
|
||||
<span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en-US'</span><span class="p">)</span>
|
||||
<span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">SID</span> <span class="o">=</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid1</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
|
||||
<span class="n">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">,</span> <span class="n">SID</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># build URL query</span>
|
||||
<span class="c1"># - example: https://www.bing.com/images/async?q=foo&first=155&count=35</span>
|
||||
|
||||
<span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># fmt: off</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'async'</span> <span class="p">:</span> <span class="s1">'content'</span><span class="p">,</span>
|
||||
<span class="c1"># to simplify the page count lets use the default of 35 images per page</span>
|
||||
<span class="s1">'first'</span> <span class="p">:</span> <span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">35</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
|
||||
<span class="s1">'count'</span> <span class="p">:</span> <span class="mi">35</span><span class="p">,</span>
|
||||
<span class="c1"># fmt: on</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># time range</span>
|
||||
<span class="c1"># - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]:</span>
|
||||
<span class="n">query_params</span><span class="p">[</span><span class="s1">'qft'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'filterui:age-lt</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">time_map</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_images.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get response from Bing-Images"""</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//ul[contains(@class, "dgControl_list")]/li'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">metadata</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//a[@class="iusc"]/@m'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">metadata</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">metadata</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//a[@class="iusc"]/@m'</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="infnmpt"]//a/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="n">img_format</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="imgpt"]/div/span/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="n">source</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'purl'</span><span class="p">],</span>
|
||||
<span class="s1">'thumbnail_src'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'turl'</span><span class="p">],</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'murl'</span><span class="p">],</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'desc'</span><span class="p">],</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'source'</span><span class="p">:</span> <span class="n">source</span><span class="p">,</span>
|
||||
<span class="s1">'img_format'</span><span class="p">:</span> <span class="n">img_format</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_images.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages and regions from Bing-News."""</span>
|
||||
|
||||
<span class="n">xpath_market_codes</span> <span class="o">=</span> <span class="s1">'//table[1]/tbody/tr/td[3]'</span>
|
||||
<span class="c1"># xpath_country_codes = '//table[2]/tbody/tr/td[2]'</span>
|
||||
<span class="n">xpath_language_codes</span> <span class="o">=</span> <span class="s1">'//table[3]/tbody/tr/td[2]'</span>
|
||||
|
||||
<span class="n">_fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">,</span> <span class="n">bing_traits_url</span><span class="p">,</span> <span class="n">xpath_language_codes</span><span class="p">,</span> <span class="n">xpath_market_codes</span><span class="p">)</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
262
_modules/searx/engines/bing_news.html
Normal file
262
_modules/searx/engines/bing_news.html
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.bing_news — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.bing_news</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.bing_news</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Bing-News: description see :py:obj:`searx.engines.bing`.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="c1"># pylint: disable=invalid-name</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">set_bing_cookies</span><span class="p">,</span>
|
||||
<span class="n">_fetch_traits</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">send_accept_language_header</span> <span class="c1"># pylint: disable=unused-import</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.bing.com/news'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q2878637'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'RSS'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'news'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'day'</span><span class="p">:</span> <span class="s1">'4'</span><span class="p">,</span>
|
||||
<span class="s1">'week'</span><span class="p">:</span> <span class="s1">'8'</span><span class="p">,</span>
|
||||
<span class="s1">'month'</span><span class="p">:</span> <span class="s1">'9'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="sd">"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the</span>
|
||||
<span class="sd">difference of *last day* and *last week* in the result list is just marginally.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/news/infinitescrollajax'</span>
|
||||
<span class="sd">"""Bing (News) search URL"""</span>
|
||||
|
||||
<span class="n">bing_traits_url</span> <span class="o">=</span> <span class="s1">'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes'</span>
|
||||
<span class="sd">"""Bing (News) search API description"""</span>
|
||||
|
||||
<span class="n">mkt_alias</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'zh'</span><span class="p">:</span> <span class="s1">'en-WW'</span><span class="p">,</span>
|
||||
<span class="s1">'zh-CN'</span><span class="p">:</span> <span class="s1">'en-WW'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="sd">"""Bing News has an official market code 'zh-CN' but we won't get a result with</span>
|
||||
<span class="sd">this market code. For 'zh' and 'zh-CN' we better use the *Worldwide aggregate*</span>
|
||||
<span class="sd">market code (en-WW).</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_news.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Assemble a Bing-News request."""</span>
|
||||
|
||||
<span class="n">sxng_locale</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span>
|
||||
<span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">mkt_alias</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">),</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span>
|
||||
<span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">SID</span> <span class="o">=</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid1</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
|
||||
<span class="n">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">,</span> <span class="n">SID</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># build URL query</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># example: https://www.bing.com/news/infinitescrollajax?q=london&first=1</span>
|
||||
|
||||
<span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># fmt: off</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'InfiniteScroll'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
|
||||
<span class="c1"># to simplify the page count lets use the default of 10 images per page</span>
|
||||
<span class="s1">'first'</span> <span class="p">:</span> <span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
|
||||
<span class="c1"># fmt: on</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]:</span>
|
||||
<span class="c1"># qft=interval:"7"</span>
|
||||
<span class="n">query_params</span><span class="p">[</span><span class="s1">'qft'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'qft=interval="</span><span class="si">%s</span><span class="s1">"'</span> <span class="o">%</span> <span class="n">time_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">],</span> <span class="s1">'9'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_news.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get response from Bing-Video"""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">newsitem</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//div[contains(@class, "newsitem")]'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">newsitem</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./@url'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">newsitem</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="caption"]//a[@class="title"]/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">newsitem</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="snippet"]/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="n">thumbnail</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">author</span> <span class="o">=</span> <span class="n">newsitem</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./@data-author'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">metadata</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">newsitem</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="source"]/span/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="n">newsitem</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//a[@class="imagelink"]//img/@src'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">img_src</span><span class="p">:</span>
|
||||
<span class="n">thumbnail</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/'</span> <span class="o">+</span> <span class="n">img_src</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span>
|
||||
<span class="s1">'author'</span><span class="p">:</span> <span class="n">author</span><span class="p">,</span>
|
||||
<span class="s1">'metadata'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_news.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages and regions from Bing-News.</span>
|
||||
|
||||
<span class="sd"> The :py:obj:`description <searx.engines.bing_news.bing_traits_url>` of the</span>
|
||||
<span class="sd"> first table says *"query parameter when calling the Video Search API."*</span>
|
||||
<span class="sd"> .. thats why I use the 4. table "News Category API markets" for the</span>
|
||||
<span class="sd"> ``xpath_market_codes``.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">xpath_market_codes</span> <span class="o">=</span> <span class="s1">'//table[4]/tbody/tr/td[3]'</span>
|
||||
<span class="c1"># xpath_country_codes = '//table[2]/tbody/tr/td[2]'</span>
|
||||
<span class="n">xpath_language_codes</span> <span class="o">=</span> <span class="s1">'//table[3]/tbody/tr/td[2]'</span>
|
||||
|
||||
<span class="n">_fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">,</span> <span class="n">bing_traits_url</span><span class="p">,</span> <span class="n">xpath_language_codes</span><span class="p">,</span> <span class="n">xpath_market_codes</span><span class="p">)</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
240
_modules/searx/engines/bing_videos.html
Normal file
240
_modules/searx/engines/bing_videos.html
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.bing_videos — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.bing_videos</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.bing_videos</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Bing-Videos: description see :py:obj:`searx.engines.bing`.</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="c1"># pylint: disable=invalid-name</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">import</span> <span class="nn">uuid</span>
|
||||
<span class="kn">import</span> <span class="nn">json</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">set_bing_cookies</span><span class="p">,</span>
|
||||
<span class="n">_fetch_traits</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.bing</span> <span class="kn">import</span> <span class="n">send_accept_language_header</span> <span class="c1"># pylint: disable=unused-import</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.bing.com/videos'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q4914152'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.bing.com/videos/asyncv2'</span>
|
||||
<span class="sd">"""Bing (Videos) async search URL."""</span>
|
||||
|
||||
<span class="n">bing_traits_url</span> <span class="o">=</span> <span class="s1">'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes'</span>
|
||||
<span class="sd">"""Bing (Video) search API description"""</span>
|
||||
|
||||
<span class="n">time_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># fmt: off</span>
|
||||
<span class="s1">'day'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span><span class="p">,</span>
|
||||
<span class="s1">'week'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">7</span><span class="p">,</span>
|
||||
<span class="s1">'month'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">31</span><span class="p">,</span>
|
||||
<span class="s1">'year'</span><span class="p">:</span> <span class="mi">60</span> <span class="o">*</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">365</span><span class="p">,</span>
|
||||
<span class="c1"># fmt: on</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_videos.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Assemble a Bing-Video request."""</span>
|
||||
|
||||
<span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en-US'</span><span class="p">)</span>
|
||||
<span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">SID</span> <span class="o">=</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid1</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
|
||||
<span class="n">set_bing_cookies</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">engine_language</span><span class="p">,</span> <span class="n">engine_region</span><span class="p">,</span> <span class="n">SID</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># build URL query</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35</span>
|
||||
|
||||
<span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># fmt: off</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'async'</span> <span class="p">:</span> <span class="s1">'content'</span><span class="p">,</span>
|
||||
<span class="c1"># to simplify the page count lets use the default of 35 images per page</span>
|
||||
<span class="s1">'first'</span> <span class="p">:</span> <span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">35</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
|
||||
<span class="s1">'count'</span> <span class="p">:</span> <span class="mi">35</span><span class="p">,</span>
|
||||
<span class="c1"># fmt: on</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># time range</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># example: one week (10080 minutes) '&qft= filterui:videoage-lt10080' '&form=VRFLTR'</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]:</span>
|
||||
<span class="n">query_params</span><span class="p">[</span><span class="s1">'form'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'VRFLTR'</span>
|
||||
<span class="n">query_params</span><span class="p">[</span><span class="s1">'qft'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">' filterui:videoage-lt</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">time_map</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">query_params</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_videos.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get response from Bing-Video"""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'</span><span class="p">):</span>
|
||||
<span class="n">metadata</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="vrhdata"]/@vrhm'</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="n">info</span> <span class="o">=</span> <span class="s1">' - '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[@class="mc_vtvc_meta_block"]//span/text()'</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{0}</span><span class="s1"> - </span><span class="si">{1}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">metadata</span><span class="p">[</span><span class="s1">'du'</span><span class="p">],</span> <span class="n">info</span><span class="p">)</span>
|
||||
<span class="n">thumbnail</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//div[contains(@class, "mc_vtvc_th")]//img/@src'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">'murl'</span><span class="p">],</span>
|
||||
<span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'vt'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/bing.html#searx.engines.bing_videos.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages and regions from Bing-Videos."""</span>
|
||||
|
||||
<span class="n">xpath_market_codes</span> <span class="o">=</span> <span class="s1">'//table[1]/tbody/tr/td[3]'</span>
|
||||
<span class="c1"># xpath_country_codes = '//table[2]/tbody/tr/td[2]'</span>
|
||||
<span class="n">xpath_language_codes</span> <span class="o">=</span> <span class="s1">'//table[3]/tbody/tr/td[2]'</span>
|
||||
|
||||
<span class="n">_fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">,</span> <span class="n">bing_traits_url</span><span class="p">,</span> <span class="n">xpath_language_codes</span><span class="p">,</span> <span class="n">xpath_market_codes</span><span class="p">)</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
531
_modules/searx/engines/brave.html
Normal file
531
_modules/searx/engines/brave.html
Normal file
|
|
@ -0,0 +1,531 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.brave — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.brave</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.brave</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Brave supports the categories listed in :py:obj:`brave_category` (General,</span>
|
||||
<span class="sd">news, videos, images). The support of :py:obj:`paging` and :py:obj:`time range</span>
|
||||
<span class="sd"><time_range_support>` is limited (see remarks).</span>
|
||||
|
||||
<span class="sd">Configured ``brave`` engines:</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> - name: brave</span>
|
||||
<span class="sd"> engine: brave</span>
|
||||
<span class="sd"> ...</span>
|
||||
<span class="sd"> brave_category: search</span>
|
||||
<span class="sd"> time_range_support: true</span>
|
||||
<span class="sd"> paging: true</span>
|
||||
|
||||
<span class="sd"> - name: brave.images</span>
|
||||
<span class="sd"> engine: brave</span>
|
||||
<span class="sd"> ...</span>
|
||||
<span class="sd"> brave_category: images</span>
|
||||
|
||||
<span class="sd"> - name: brave.videos</span>
|
||||
<span class="sd"> engine: brave</span>
|
||||
<span class="sd"> ...</span>
|
||||
<span class="sd"> brave_category: videos</span>
|
||||
|
||||
<span class="sd"> - name: brave.news</span>
|
||||
<span class="sd"> engine: brave</span>
|
||||
<span class="sd"> ...</span>
|
||||
<span class="sd"> brave_category: news</span>
|
||||
|
||||
|
||||
<span class="sd">.. _brave regions:</span>
|
||||
|
||||
<span class="sd">Brave regions</span>
|
||||
<span class="sd">=============</span>
|
||||
|
||||
<span class="sd">Brave uses two-digit tags for the regions like ``ca`` while SearXNG deals with</span>
|
||||
<span class="sd">locales. To get a mapping, all *officatl de-facto* languages of the Brave</span>
|
||||
<span class="sd">region are mapped to regions in SearXNG (see :py:obj:`babel</span>
|
||||
<span class="sd"><babel.languages.get_official_languages>`):</span>
|
||||
|
||||
<span class="sd">.. code:: python</span>
|
||||
|
||||
<span class="sd"> "regions": {</span>
|
||||
<span class="sd"> ..</span>
|
||||
<span class="sd"> "en-CA": "ca",</span>
|
||||
<span class="sd"> "fr-CA": "ca",</span>
|
||||
<span class="sd"> ..</span>
|
||||
<span class="sd"> }</span>
|
||||
|
||||
|
||||
<span class="sd">.. note::</span>
|
||||
|
||||
<span class="sd"> The language (aka region) support of Brave's index is limited to very basic</span>
|
||||
<span class="sd"> languages. The search results for languages like Chinese or Arabic are of</span>
|
||||
<span class="sd"> low quality.</span>
|
||||
|
||||
|
||||
<span class="sd">.. _brave languages:</span>
|
||||
|
||||
<span class="sd">Brave languages</span>
|
||||
<span class="sd">===============</span>
|
||||
|
||||
<span class="sd">Brave's language support is limited to the UI (menues, area local notations,</span>
|
||||
<span class="sd">etc). Brave's index only seems to support a locale, but it does not seem to</span>
|
||||
<span class="sd">support any languages in its index. The choice of available languages is very</span>
|
||||
<span class="sd">small (and its not clear to me where the differencee in UI is when switching</span>
|
||||
<span class="sd">from en-us to en-ca or en-gb).</span>
|
||||
|
||||
<span class="sd">In the :py:obj:`EngineTraits object <searx.enginelib.traits.EngineTraits>` the</span>
|
||||
<span class="sd">UI languages are stored in a custom field named ``ui_lang``:</span>
|
||||
|
||||
<span class="sd">.. code:: python</span>
|
||||
|
||||
<span class="sd"> "custom": {</span>
|
||||
<span class="sd"> "ui_lang": {</span>
|
||||
<span class="sd"> "ca": "ca",</span>
|
||||
<span class="sd"> "de-DE": "de-de",</span>
|
||||
<span class="sd"> "en-CA": "en-ca",</span>
|
||||
<span class="sd"> "en-GB": "en-gb",</span>
|
||||
<span class="sd"> "en-US": "en-us",</span>
|
||||
<span class="sd"> "es": "es",</span>
|
||||
<span class="sd"> "fr-CA": "fr-ca",</span>
|
||||
<span class="sd"> "fr-FR": "fr-fr",</span>
|
||||
<span class="sd"> "ja-JP": "ja-jp",</span>
|
||||
<span class="sd"> "pt-BR": "pt-br",</span>
|
||||
<span class="sd"> "sq-AL": "sq-al"</span>
|
||||
<span class="sd"> }</span>
|
||||
<span class="sd"> },</span>
|
||||
|
||||
<span class="sd">Implementations</span>
|
||||
<span class="sd">===============</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">re</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">urlencode</span><span class="p">,</span>
|
||||
<span class="n">urlparse</span><span class="p">,</span>
|
||||
<span class="n">parse_qs</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">chompjs</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">locales</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">extract_text</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_list</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_getindex</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://search.brave.com/'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q22906900'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"https://search.brave.com/"</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">brave_category</span> <span class="o">=</span> <span class="s1">'search'</span>
|
||||
<span class="sd">"""Brave supports common web-search, video search, image and video search.</span>
|
||||
|
||||
<span class="sd">- ``search``: Common WEB search</span>
|
||||
<span class="sd">- ``videos``: search for videos</span>
|
||||
<span class="sd">- ``images``: search for images</span>
|
||||
<span class="sd">- ``news``: search for news</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">brave_spellcheck</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="sd">"""Brave supports some kind of spell checking. When activated, Brave tries to</span>
|
||||
<span class="sd">fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``. In</span>
|
||||
<span class="sd">the UI of Brave the user gets warned about this, since we can not warn the user</span>
|
||||
<span class="sd">in SearXNG, the spellchecking is disabled by default.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="sd">"""Brave only supports paging in :py:obj:`brave_category` ``search`` (UI</span>
|
||||
<span class="sd">category All)."""</span>
|
||||
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch_map</span> <span class="o">=</span> <span class="p">{</span><span class="mi">2</span><span class="p">:</span> <span class="s1">'strict'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'moderate'</span><span class="p">,</span> <span class="mi">0</span><span class="p">:</span> <span class="s1">'off'</span><span class="p">}</span> <span class="c1"># cookie: safesearch=off</span>
|
||||
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="sd">"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI</span>
|
||||
<span class="sd">category All)."""</span>
|
||||
|
||||
<span class="n">time_range_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'day'</span><span class="p">:</span> <span class="s1">'pd'</span><span class="p">,</span>
|
||||
<span class="s1">'week'</span><span class="p">:</span> <span class="s1">'pw'</span><span class="p">,</span>
|
||||
<span class="s1">'month'</span><span class="p">:</span> <span class="s1">'pm'</span><span class="p">,</span>
|
||||
<span class="s1">'year'</span><span class="p">:</span> <span class="s1">'py'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
|
||||
<span class="c1"># Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Accept-Encoding'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'gzip, deflate'</span>
|
||||
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="k">if</span> <span class="n">brave_spellcheck</span><span class="p">:</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'spellcheck'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'search'</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'offset'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'pageno'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span>
|
||||
<span class="k">if</span> <span class="n">time_range_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]):</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'tf'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_range_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">])</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">base_url</span><span class="si">}{</span><span class="n">brave_category</span><span class="si">}</span><span class="s2">?</span><span class="si">{</span><span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span>
|
||||
|
||||
<span class="c1"># set properties in the cookies</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'safesearch'</span><span class="p">]</span> <span class="o">=</span> <span class="n">safesearch_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">],</span> <span class="s1">'off'</span><span class="p">)</span>
|
||||
<span class="c1"># the useLocation is IP based, we use cookie 'country' for the region</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'useLocation'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'0'</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'summarizer'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'0'</span>
|
||||
|
||||
<span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'all'</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'country'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_region</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="n">ui_lang</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">get_engine_locale</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ui_lang"</span><span class="p">],</span> <span class="s1">'en-us'</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'ui_lang'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ui_lang</span>
|
||||
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"cookies </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">])</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'search'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">_parse_search</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
<span class="n">datastr</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"</span><span class="se">\n</span><span class="s2">"</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="s2">"const data = "</span> <span class="ow">in</span> <span class="n">line</span><span class="p">:</span>
|
||||
<span class="n">datastr</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"const data = "</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">break</span>
|
||||
|
||||
<span class="n">json_data</span> <span class="o">=</span> <span class="n">chompjs</span><span class="o">.</span><span class="n">parse_js_object</span><span class="p">(</span><span class="n">datastr</span><span class="p">)</span>
|
||||
<span class="n">json_resp</span> <span class="o">=</span> <span class="n">json_data</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'body'</span><span class="p">][</span><span class="s1">'response'</span><span class="p">]</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'news'</span><span class="p">:</span>
|
||||
<span class="n">json_resp</span> <span class="o">=</span> <span class="n">json_resp</span><span class="p">[</span><span class="s1">'news'</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="n">_parse_news</span><span class="p">(</span><span class="n">json_resp</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'images'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">_parse_images</span><span class="p">(</span><span class="n">json_resp</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">brave_category</span> <span class="o">==</span> <span class="s1">'videos'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">_parse_videos</span><span class="p">(</span><span class="n">json_resp</span><span class="p">)</span>
|
||||
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Unsupported brave category: </span><span class="si">{</span><span class="n">brave_category</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_parse_search</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
|
||||
<span class="n">result_list</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="n">answer_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@class="answer"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">answer_tag</span><span class="p">:</span>
|
||||
<span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'answer'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">answer_tag</span><span class="p">)})</span>
|
||||
|
||||
<span class="c1"># xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'</span>
|
||||
<span class="n">xpath_results</span> <span class="o">=</span> <span class="s1">'//div[contains(@class, "snippet")]'</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">xpath_results</span><span class="p">):</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a[@class="result-header"]/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="n">title_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//span[@class="snippet-title"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">url</span> <span class="ow">and</span> <span class="n">title_tag</span><span class="p">):</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">content_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//p[@class="snippet-description"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span>
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//img[@class="thumb"]/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span>
|
||||
|
||||
<span class="n">item</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">title_tag</span><span class="p">),</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content_tag</span><span class="p">),</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">img_src</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">video_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span>
|
||||
<span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "video-snippet") and @data-macro="video"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">video_tag</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
|
||||
<span class="c1"># In my tests a video tag in the WEB search was mostoften not a</span>
|
||||
<span class="c1"># video, except the ones from youtube ..</span>
|
||||
|
||||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="n">_get_iframe_src</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">iframe_src</span><span class="p">:</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'iframe_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">iframe_src</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'template'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'videos.html'</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">video_tag</span><span class="p">,</span> <span class="s1">'.//img/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'img_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">video_tag</span><span class="p">,</span> <span class="s1">'.//img/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span>
|
||||
|
||||
<span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">result_list</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_iframe_src</span><span class="p">(</span><span class="n">url</span><span class="p">):</span>
|
||||
<span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span> <span class="o">==</span> <span class="s1">'/watch'</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">query</span><span class="p">:</span>
|
||||
<span class="n">video_id</span> <span class="o">=</span> <span class="n">parse_qs</span><span class="p">(</span><span class="n">parsed_url</span><span class="o">.</span><span class="n">query</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'v'</span><span class="p">,</span> <span class="p">[])</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">if</span> <span class="n">video_id</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="s1">'https://www.youtube-nocookie.com/embed/'</span> <span class="o">+</span> <span class="n">video_id</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_parse_news</span><span class="p">(</span><span class="n">json_resp</span><span class="p">):</span>
|
||||
<span class="n">result_list</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_resp</span><span class="p">[</span><span class="s2">"results"</span><span class="p">]:</span>
|
||||
<span class="n">item</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'title'</span><span class="p">],</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'description'</span><span class="p">],</span>
|
||||
<span class="p">}</span>
|
||||
<span class="k">if</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">!=</span> <span class="s2">"null"</span><span class="p">:</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'img_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">][</span><span class="s1">'src'</span><span class="p">]</span>
|
||||
<span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">result_list</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_parse_images</span><span class="p">(</span><span class="n">json_resp</span><span class="p">):</span>
|
||||
<span class="n">result_list</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_resp</span><span class="p">[</span><span class="s2">"results"</span><span class="p">]:</span>
|
||||
<span class="n">item</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'title'</span><span class="p">],</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'description'</span><span class="p">],</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span>
|
||||
<span class="s1">'img_format'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'properties'</span><span class="p">][</span><span class="s1">'format'</span><span class="p">],</span>
|
||||
<span class="s1">'source'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'source'</span><span class="p">],</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'properties'</span><span class="p">][</span><span class="s1">'url'</span><span class="p">],</span>
|
||||
<span class="p">}</span>
|
||||
<span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">result_list</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_parse_videos</span><span class="p">(</span><span class="n">json_resp</span><span class="p">):</span>
|
||||
<span class="n">result_list</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_resp</span><span class="p">[</span><span class="s2">"results"</span><span class="p">]:</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span>
|
||||
<span class="n">item</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'title'</span><span class="p">],</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'description'</span><span class="p">],</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span>
|
||||
<span class="s1">'length'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'video'</span><span class="p">][</span><span class="s1">'duration'</span><span class="p">],</span>
|
||||
<span class="s1">'duration'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'video'</span><span class="p">][</span><span class="s1">'duration'</span><span class="p">],</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">!=</span> <span class="s2">"null"</span><span class="p">:</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">]</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'thumbnail'</span><span class="p">][</span><span class="s1">'src'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="n">_get_iframe_src</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">iframe_src</span><span class="p">:</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'iframe_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">iframe_src</span>
|
||||
|
||||
<span class="n">result_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">result_list</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/brave.html#searx.engines.brave.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch :ref:`languages <brave languages>` and :ref:`regions <brave</span>
|
||||
<span class="sd"> regions>` from Brave."""</span>
|
||||
|
||||
<span class="c1"># pylint: disable=import-outside-toplevel</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">babel.languages</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">region_tag</span><span class="p">,</span> <span class="n">language_tag</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ui_lang"</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
|
||||
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'Accept-Encoding'</span><span class="p">:</span> <span class="s1">'gzip, deflate'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="n">lang_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'no'</span><span class="p">:</span> <span class="s1">'nb'</span><span class="p">}</span> <span class="c1"># norway</span>
|
||||
|
||||
<span class="c1"># languages (UI)</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://search.brave.com/settings'</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from Brave is not OK."</span><span class="p">)</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">option</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//div[@id="language-select"]//option'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">ui_lang</span> <span class="o">=</span> <span class="n">option</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">)</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="s1">'-'</span> <span class="ow">in</span> <span class="n">ui_lang</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">ui_lang</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">ui_lang</span><span class="p">))</span>
|
||||
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't determine babel locale of Brave's (UI) language </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">ui_lang</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ui_lang"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">ui_lang</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">ui_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ui_lang"</span><span class="p">][</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">ui_lang</span>
|
||||
|
||||
<span class="c1"># search regions of brave</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'all'</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">country</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//div[@id="sidebar"]//ul/li/div[contains(@class, "country")]'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">flag</span> <span class="o">=</span> <span class="n">country</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./span[contains(@class, "flag")]'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="c1"># country_name = extract_text(flag.xpath('./following-sibling::*')[0])</span>
|
||||
<span class="n">country_tag</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="sa">r</span><span class="s1">'flag-([^\s]*)\s'</span><span class="p">,</span> <span class="n">flag</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./@class'</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="c1"># add offical languages of the country ..</span>
|
||||
<span class="k">for</span> <span class="n">lang_tag</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get_official_languages</span><span class="p">(</span><span class="n">country_tag</span><span class="p">,</span> <span class="n">de_facto</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="n">lang_tag</span> <span class="o">=</span> <span class="n">lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">lang_tag</span><span class="p">,</span> <span class="n">lang_tag</span><span class="p">)</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="s1">'</span><span class="si">%s</span><span class="s1">_</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">lang_tag</span><span class="p">,</span> <span class="n">country_tag</span><span class="o">.</span><span class="n">upper</span><span class="p">())))</span>
|
||||
<span class="c1"># print("%-20s: %s <-- %s" % (country_name, country_tag, sxng_tag))</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">country_tag</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">country_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">country_tag</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
355
_modules/searx/engines/command.html
Normal file
355
_modules/searx/engines/command.html
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.command — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.command</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.command</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="sd">"""With *command engines* administrators can run engines to integrate arbitrary</span>
|
||||
<span class="sd">shell commands.</span>
|
||||
|
||||
<span class="sd">.. attention::</span>
|
||||
|
||||
<span class="sd"> When creating and enabling a ``command`` engine on a public instance, you</span>
|
||||
<span class="sd"> must be careful to avoid leaking private data.</span>
|
||||
|
||||
<span class="sd">The easiest solution is to limit the access by setting ``tokens`` as described</span>
|
||||
<span class="sd">in section :ref:`private engines`. The engine base is flexible. Only your</span>
|
||||
<span class="sd">imagination can limit the power of this engine (and maybe security concerns).</span>
|
||||
|
||||
<span class="sd">Configuration</span>
|
||||
<span class="sd">=============</span>
|
||||
|
||||
<span class="sd">The following options are available:</span>
|
||||
|
||||
<span class="sd">``command``:</span>
|
||||
<span class="sd"> A comma separated list of the elements of the command. A special token</span>
|
||||
<span class="sd"> ``{{QUERY}}`` tells where to put the search terms of the user. Example:</span>
|
||||
|
||||
<span class="sd"> .. code:: yaml</span>
|
||||
|
||||
<span class="sd"> ['ls', '-l', '-h', '{{QUERY}}']</span>
|
||||
|
||||
<span class="sd">``delimiter``:</span>
|
||||
<span class="sd"> A mapping containing a delimiter ``char`` and the *titles* of each element in</span>
|
||||
<span class="sd"> ``keys``.</span>
|
||||
|
||||
<span class="sd">``parse_regex``:</span>
|
||||
<span class="sd"> A dict containing the regular expressions for each result key.</span>
|
||||
|
||||
<span class="sd">``query_type``:</span>
|
||||
|
||||
<span class="sd"> The expected type of user search terms. Possible values: ``path`` and</span>
|
||||
<span class="sd"> ``enum``.</span>
|
||||
|
||||
<span class="sd"> ``path``:</span>
|
||||
<span class="sd"> Checks if the user provided path is inside the working directory. If not,</span>
|
||||
<span class="sd"> the query is not executed.</span>
|
||||
|
||||
<span class="sd"> ``enum``:</span>
|
||||
<span class="sd"> Is a list of allowed search terms. If the user submits something which is</span>
|
||||
<span class="sd"> not included in the list, the query returns an error.</span>
|
||||
|
||||
<span class="sd">``query_enum``:</span>
|
||||
<span class="sd"> A list containing allowed search terms if ``query_type`` is set to ``enum``.</span>
|
||||
|
||||
<span class="sd">``working_dir``:</span>
|
||||
<span class="sd"> The directory where the command has to be executed. Default: ``./``.</span>
|
||||
|
||||
<span class="sd">``result_separator``:</span>
|
||||
<span class="sd"> The character that separates results. Default: ``\\n``.</span>
|
||||
|
||||
<span class="sd">Example</span>
|
||||
<span class="sd">=======</span>
|
||||
|
||||
<span class="sd">The example engine below can be used to find files with a specific name in the</span>
|
||||
<span class="sd">configured working directory:</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> - name: find</span>
|
||||
<span class="sd"> engine: command</span>
|
||||
<span class="sd"> command: ['find', '.', '-name', '{{QUERY}}']</span>
|
||||
<span class="sd"> query_type: path</span>
|
||||
<span class="sd"> shortcut: fnd</span>
|
||||
<span class="sd"> delimiter:</span>
|
||||
<span class="sd"> chars: ' '</span>
|
||||
<span class="sd"> keys: ['line']</span>
|
||||
|
||||
<span class="sd">Implementations</span>
|
||||
<span class="sd">===============</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">re</span>
|
||||
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">expanduser</span><span class="p">,</span> <span class="n">isabs</span><span class="p">,</span> <span class="n">realpath</span><span class="p">,</span> <span class="n">commonprefix</span>
|
||||
<span class="kn">from</span> <span class="nn">shlex</span> <span class="kn">import</span> <span class="n">split</span> <span class="k">as</span> <span class="n">shlex_split</span>
|
||||
<span class="kn">from</span> <span class="nn">subprocess</span> <span class="kn">import</span> <span class="n">Popen</span><span class="p">,</span> <span class="n">PIPE</span>
|
||||
<span class="kn">from</span> <span class="nn">threading</span> <span class="kn">import</span> <span class="n">Thread</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">logger</span>
|
||||
|
||||
|
||||
<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'offline'</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">command</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">delimiter</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">parse_regex</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">query_type</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">query_enum</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">environment_variables</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">working_dir</span> <span class="o">=</span> <span class="n">realpath</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span>
|
||||
<span class="n">result_separator</span> <span class="o">=</span> <span class="s1">'</span><span class="se">\n</span><span class="s1">'</span>
|
||||
<span class="n">result_template</span> <span class="o">=</span> <span class="s1">'key-value.html'</span>
|
||||
<span class="n">timeout</span> <span class="o">=</span> <span class="mf">4.0</span>
|
||||
|
||||
<span class="n">_command_logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'command'</span><span class="p">)</span>
|
||||
<span class="n">_compiled_parse_regex</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span>
|
||||
<span class="n">check_parsing_options</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'command'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'engine command : missing configuration key: command'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">global</span> <span class="n">command</span><span class="p">,</span> <span class="n">working_dir</span><span class="p">,</span> <span class="n">delimiter</span><span class="p">,</span> <span class="n">parse_regex</span><span class="p">,</span> <span class="n">environment_variables</span>
|
||||
|
||||
<span class="n">command</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'command'</span><span class="p">]</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'working_dir'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="n">working_dir</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'working_dir'</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">isabs</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">[</span><span class="s1">'working_dir'</span><span class="p">]):</span>
|
||||
<span class="n">working_dir</span> <span class="o">=</span> <span class="n">realpath</span><span class="p">(</span><span class="n">working_dir</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'parse_regex'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="n">parse_regex</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'parse_regex'</span><span class="p">]</span>
|
||||
<span class="k">for</span> <span class="n">result_key</span><span class="p">,</span> <span class="n">regex</span> <span class="ow">in</span> <span class="n">parse_regex</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="n">_compiled_parse_regex</span><span class="p">[</span><span class="n">result_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="n">regex</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="n">re</span><span class="o">.</span><span class="n">MULTILINE</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="s1">'delimiter'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="n">delimiter</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'delimiter'</span><span class="p">]</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'environment_variables'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="n">environment_variables</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'environment_variables'</span><span class="p">]</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="n">cmd</span> <span class="o">=</span> <span class="n">_get_command_to_run</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">cmd</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">reader_thread</span> <span class="o">=</span> <span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="n">_get_results_from_process</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="n">results</span><span class="p">,</span> <span class="n">cmd</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]))</span>
|
||||
<span class="n">reader_thread</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
|
||||
<span class="n">reader_thread</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_command_to_run</span><span class="p">(</span><span class="n">query</span><span class="p">):</span>
|
||||
<span class="n">params</span> <span class="o">=</span> <span class="n">shlex_split</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
|
||||
<span class="n">__check_query_params</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
||||
|
||||
<span class="n">cmd</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">command</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">c</span> <span class="o">==</span> <span class="s1">'{{QUERY}}'</span><span class="p">:</span>
|
||||
<span class="n">cmd</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">cmd</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">c</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">cmd</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_results_from_process</span><span class="p">(</span><span class="n">results</span><span class="p">,</span> <span class="n">cmd</span><span class="p">,</span> <span class="n">pageno</span><span class="p">):</span>
|
||||
<span class="n">leftover</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">count</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="n">start</span><span class="p">,</span> <span class="n">end</span> <span class="o">=</span> <span class="n">__get_results_limits</span><span class="p">(</span><span class="n">pageno</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">Popen</span><span class="p">(</span><span class="n">cmd</span><span class="p">,</span> <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">env</span><span class="o">=</span><span class="n">environment_variables</span><span class="p">)</span> <span class="k">as</span> <span class="n">process</span><span class="p">:</span>
|
||||
<span class="n">line</span> <span class="o">=</span> <span class="n">process</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">readline</span><span class="p">()</span>
|
||||
<span class="k">while</span> <span class="n">line</span><span class="p">:</span>
|
||||
<span class="n">buf</span> <span class="o">=</span> <span class="n">leftover</span> <span class="o">+</span> <span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span>
|
||||
<span class="n">raw_results</span> <span class="o">=</span> <span class="n">buf</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">result_separator</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">raw_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span>
|
||||
<span class="n">leftover</span> <span class="o">=</span> <span class="n">raw_results</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">raw_results</span> <span class="o">=</span> <span class="n">raw_results</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">raw_result</span> <span class="ow">in</span> <span class="n">raw_results</span><span class="p">:</span>
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="n">__parse_single_result</span><span class="p">(</span><span class="n">raw_result</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">_command_logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'skipped result:'</span><span class="p">,</span> <span class="n">raw_result</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">start</span> <span class="o"><=</span> <span class="n">count</span> <span class="ow">and</span> <span class="n">count</span> <span class="o"><=</span> <span class="n">end</span><span class="p">:</span>
|
||||
<span class="n">result</span><span class="p">[</span><span class="s1">'template'</span><span class="p">]</span> <span class="o">=</span> <span class="n">result_template</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
|
||||
|
||||
<span class="n">count</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="k">if</span> <span class="n">end</span> <span class="o"><</span> <span class="n">count</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
<span class="n">line</span> <span class="o">=</span> <span class="n">process</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">readline</span><span class="p">()</span>
|
||||
|
||||
<span class="n">return_code</span> <span class="o">=</span> <span class="n">process</span><span class="o">.</span><span class="n">wait</span><span class="p">(</span><span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">return_code</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s1">'non-zero return code when running command'</span><span class="p">,</span> <span class="n">cmd</span><span class="p">,</span> <span class="n">return_code</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">__get_results_limits</span><span class="p">(</span><span class="n">pageno</span><span class="p">):</span>
|
||||
<span class="n">start</span> <span class="o">=</span> <span class="p">(</span><span class="n">pageno</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span>
|
||||
<span class="n">end</span> <span class="o">=</span> <span class="n">start</span> <span class="o">+</span> <span class="mi">9</span>
|
||||
<span class="k">return</span> <span class="n">start</span><span class="p">,</span> <span class="n">end</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">__check_query_params</span><span class="p">(</span><span class="n">params</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">query_type</span><span class="p">:</span>
|
||||
<span class="k">return</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">query_type</span> <span class="o">==</span> <span class="s1">'path'</span><span class="p">:</span>
|
||||
<span class="n">query_path</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">query_path</span> <span class="o">=</span> <span class="n">expanduser</span><span class="p">(</span><span class="n">query_path</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">commonprefix</span><span class="p">([</span><span class="n">realpath</span><span class="p">(</span><span class="n">query_path</span><span class="p">),</span> <span class="n">working_dir</span><span class="p">])</span> <span class="o">!=</span> <span class="n">working_dir</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'requested path is outside of configured working directory'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">query_type</span> <span class="o">==</span> <span class="s1">'enum'</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">query_enum</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="n">params</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">param</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">query_enum</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'submitted query params is not allowed'</span><span class="p">,</span> <span class="n">param</span><span class="p">,</span> <span class="s1">'allowed params:'</span><span class="p">,</span> <span class="n">query_enum</span><span class="p">)</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="check_parsing_options"><a class="viewcode-back" href="../../../dev/engines/offline/command-line-engines.html#searx.engines.command.check_parsing_options">[docs]</a><span class="k">def</span> <span class="nf">check_parsing_options</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Checks if delimiter based parsing or regex parsing is configured correctly"""</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'delimiter'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span> <span class="ow">and</span> <span class="s1">'parse_regex'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'failed to init settings for parsing lines: missing delimiter or parse_regex'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="s1">'delimiter'</span> <span class="ow">in</span> <span class="n">engine_settings</span> <span class="ow">and</span> <span class="s1">'parse_regex'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'failed to init settings for parsing lines: too many settings'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'delimiter'</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="s1">'chars'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'delimiter'</span><span class="p">]</span> <span class="ow">or</span> <span class="s1">'keys'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'delimiter'</span><span class="p">]:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">__parse_single_result</span><span class="p">(</span><span class="n">raw_result</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Parses command line output based on configuration"""</span>
|
||||
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">delimiter</span><span class="p">:</span>
|
||||
<span class="n">elements</span> <span class="o">=</span> <span class="n">raw_result</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">delimiter</span><span class="p">[</span><span class="s1">'chars'</span><span class="p">],</span> <span class="n">maxsplit</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">delimiter</span><span class="p">[</span><span class="s1">'keys'</span><span class="p">])</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">elements</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">delimiter</span><span class="p">[</span><span class="s1">'keys'</span><span class="p">]):</span>
|
||||
<span class="k">return</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">elements</span><span class="p">)):</span>
|
||||
<span class="n">result</span><span class="p">[</span><span class="n">delimiter</span><span class="p">[</span><span class="s1">'keys'</span><span class="p">][</span><span class="n">i</span><span class="p">]]</span> <span class="o">=</span> <span class="n">elements</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">parse_regex</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">result_key</span><span class="p">,</span> <span class="n">regex</span> <span class="ow">in</span> <span class="n">_compiled_parse_regex</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="n">found</span> <span class="o">=</span> <span class="n">regex</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">raw_result</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">found</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="p">{}</span>
|
||||
<span class="n">result</span><span class="p">[</span><span class="n">result_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">raw_result</span><span class="p">[</span><span class="n">found</span><span class="o">.</span><span class="n">start</span><span class="p">()</span> <span class="p">:</span> <span class="n">found</span><span class="o">.</span><span class="n">end</span><span class="p">()]</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">result</span>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
364
_modules/searx/engines/dailymotion.html
Normal file
364
_modules/searx/engines/dailymotion.html
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.dailymotion — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.dailymotion</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.dailymotion</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd">Dailymotion (Videos)</span>
|
||||
<span class="sd">~~~~~~~~~~~~~~~~~~~~</span>
|
||||
|
||||
<span class="sd">.. _REST GET: https://developers.dailymotion.com/tools/</span>
|
||||
<span class="sd">.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters</span>
|
||||
<span class="sd">.. _Video filters API: https://developers.dailymotion.com/api/#video-filters</span>
|
||||
<span class="sd">.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">import</span> <span class="nn">time</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span><span class="p">,</span> <span class="n">raise_for_httperror</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">html_to_text</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineAPIException</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">region_tag</span><span class="p">,</span> <span class="n">language_tag</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.dailymotion.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q769222'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://www.dailymotion.com/developer'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">number_of_results</span> <span class="o">=</span> <span class="mi">10</span>
|
||||
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_delta_dict</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"day"</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
|
||||
<span class="s2">"week"</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">7</span><span class="p">),</span>
|
||||
<span class="s2">"month"</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">31</span><span class="p">),</span>
|
||||
<span class="s2">"year"</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">365</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch_params</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="mi">2</span><span class="p">:</span> <span class="p">{</span><span class="s1">'is_created_for_kids'</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">},</span>
|
||||
<span class="mi">1</span><span class="p">:</span> <span class="p">{</span><span class="s1">'is_created_for_kids'</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">},</span>
|
||||
<span class="mi">0</span><span class="p">:</span> <span class="p">{},</span>
|
||||
<span class="p">}</span>
|
||||
<span class="sd">"""True if this video is "Created for Kids" / intends to target an audience</span>
|
||||
<span class="sd">under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">family_filter_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="mi">2</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">,</span>
|
||||
<span class="mi">1</span><span class="p">:</span> <span class="s1">'true'</span><span class="p">,</span>
|
||||
<span class="mi">0</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="sd">"""By default, the family filter is turned on. Setting this parameter to</span>
|
||||
<span class="sd">``false`` will stop filtering-out explicit content from searches and global</span>
|
||||
<span class="sd">contexts (``family_filter`` in `Global API Parameters`_ ).</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">result_fields</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="s1">'allow_embed'</span><span class="p">,</span>
|
||||
<span class="s1">'description'</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">,</span>
|
||||
<span class="s1">'created_time'</span><span class="p">,</span>
|
||||
<span class="s1">'duration'</span><span class="p">,</span>
|
||||
<span class="s1">'url'</span><span class="p">,</span>
|
||||
<span class="s1">'thumbnail_360_url'</span><span class="p">,</span>
|
||||
<span class="s1">'id'</span><span class="p">,</span>
|
||||
<span class="p">]</span>
|
||||
<span class="sd">"""`Fields selection`_, by default, a few fields are returned. To request more</span>
|
||||
<span class="sd">specific fields, the ``fields`` parameter is used with the list of fields</span>
|
||||
<span class="sd">SearXNG needs in the response to build a video result list.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">search_url</span> <span class="o">=</span> <span class="s1">'https://api.dailymotion.com/videos?'</span>
|
||||
<span class="sd">"""URL to retrieve a list of videos.</span>
|
||||
|
||||
<span class="sd">- `REST GET`_</span>
|
||||
<span class="sd">- `Global API Parameters`_</span>
|
||||
<span class="sd">- `Video filters API`_</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="s2">"https://www.dailymotion.com/embed/video/</span><span class="si">{video_id}</span><span class="s2">"</span>
|
||||
<span class="sd">"""URL template to embed video in SearXNG's result list."""</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">query</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
|
||||
<span class="n">eng_region</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en_US'</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">eng_lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'family_filter'</span><span class="p">:</span> <span class="n">family_filter_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">],</span> <span class="s1">'false'</span><span class="p">),</span>
|
||||
<span class="s1">'thumbnail_ratio'</span><span class="p">:</span> <span class="s1">'original'</span><span class="p">,</span> <span class="c1"># original|widescreen|square</span>
|
||||
<span class="c1"># https://developers.dailymotion.com/api/#video-filters</span>
|
||||
<span class="s1">'languages'</span><span class="p">:</span> <span class="n">eng_lang</span><span class="p">,</span>
|
||||
<span class="s1">'page'</span><span class="p">:</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">],</span>
|
||||
<span class="s1">'password_protected'</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">,</span>
|
||||
<span class="s1">'private'</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">,</span>
|
||||
<span class="s1">'sort'</span><span class="p">:</span> <span class="s1">'relevance'</span><span class="p">,</span>
|
||||
<span class="s1">'limit'</span><span class="p">:</span> <span class="n">number_of_results</span><span class="p">,</span>
|
||||
<span class="s1">'fields'</span><span class="p">:</span> <span class="s1">','</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result_fields</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">args</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">safesearch_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">],</span> <span class="p">{}))</span>
|
||||
|
||||
<span class="c1"># Don't add localization and country arguments if the user does select a</span>
|
||||
<span class="c1"># language (:de, :en, ..)</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">))</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="c1"># https://developers.dailymotion.com/api/#global-parameters</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'localization'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'country'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="c1"># Insufficient rights for the `ams_country' parameter of route `GET /videos'</span>
|
||||
<span class="c1"># 'ams_country': eng_region.split('_')[1],</span>
|
||||
|
||||
<span class="n">time_delta</span> <span class="o">=</span> <span class="n">time_delta_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"time_range"</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="n">time_delta</span><span class="p">:</span>
|
||||
<span class="n">created_after</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">time_delta</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'created_after'</span><span class="p">]</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timestamp</span><span class="p">(</span><span class="n">created_after</span><span class="p">)</span>
|
||||
|
||||
<span class="n">query_str</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span> <span class="o">+</span> <span class="n">query_str</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span>
|
||||
|
||||
|
||||
<span class="c1"># get response from search-request</span>
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">search_res</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># check for an API error</span>
|
||||
<span class="k">if</span> <span class="s1">'error'</span> <span class="ow">in</span> <span class="n">search_res</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="n">SearxEngineAPIException</span><span class="p">(</span><span class="n">search_res</span><span class="p">[</span><span class="s1">'error'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'message'</span><span class="p">))</span>
|
||||
|
||||
<span class="n">raise_for_httperror</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># parse results</span>
|
||||
<span class="k">for</span> <span class="n">res</span> <span class="ow">in</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'list'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">res</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">res</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">res</span><span class="p">[</span><span class="s1">'description'</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="o">></span> <span class="mi">300</span><span class="p">:</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">content</span><span class="p">[:</span><span class="mi">300</span><span class="p">]</span> <span class="o">+</span> <span class="s1">'...'</span>
|
||||
|
||||
<span class="n">publishedDate</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="n">res</span><span class="p">[</span><span class="s1">'created_time'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span>
|
||||
|
||||
<span class="n">length</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">gmtime</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'duration'</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">length</span><span class="o">.</span><span class="n">tm_hour</span><span class="p">:</span>
|
||||
<span class="n">length</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">"%H:%M:%S"</span><span class="p">,</span> <span class="n">length</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">length</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">"%M:%S"</span><span class="p">,</span> <span class="n">length</span><span class="p">)</span>
|
||||
|
||||
<span class="n">thumbnail</span> <span class="o">=</span> <span class="n">res</span><span class="p">[</span><span class="s1">'thumbnail_360_url'</span><span class="p">]</span>
|
||||
<span class="n">thumbnail</span> <span class="o">=</span> <span class="n">thumbnail</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"http://"</span><span class="p">,</span> <span class="s2">"https://"</span><span class="p">)</span>
|
||||
|
||||
<span class="n">item</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">publishedDate</span><span class="p">,</span>
|
||||
<span class="s1">'length'</span><span class="p">:</span> <span class="n">length</span><span class="p">,</span>
|
||||
<span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">thumbnail</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># HINT: no mater what the value is, without API token videos can't shown</span>
|
||||
<span class="c1"># embedded</span>
|
||||
<span class="k">if</span> <span class="n">res</span><span class="p">[</span><span class="s1">'allow_embed'</span><span class="p">]:</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'iframe_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">iframe_src</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">video_id</span><span class="o">=</span><span class="n">res</span><span class="p">[</span><span class="s1">'id'</span><span class="p">])</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># return results</span>
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/dailymotion.html#searx.engines.dailymotion.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch locales & languages from dailymotion.</span>
|
||||
|
||||
<span class="sd"> Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.</span>
|
||||
<span class="sd"> There are duplications in the locale codes returned from Dailymotion which</span>
|
||||
<span class="sd"> can be ignored::</span>
|
||||
|
||||
<span class="sd"> en_EN --> en_GB, en_US</span>
|
||||
<span class="sd"> ar_AA --> ar_EG, ar_AE, ar_SA</span>
|
||||
|
||||
<span class="sd"> The language list `api/languages <https://api.dailymotion.com/languages>`_</span>
|
||||
<span class="sd"> contains over 7000 *languages* codes (see PR1071_). We use only those</span>
|
||||
<span class="sd"> language codes that are used in the locales.</span>
|
||||
|
||||
<span class="sd"> .. _PR1071: https://github.com/searxng/searxng/pull/1071</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://api.dailymotion.com/locales'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from dailymotion/locales is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()[</span><span class="s1">'list'</span><span class="p">]:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'locale'</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'en_EN'</span><span class="p">,</span> <span class="s1">'ar_AA'</span><span class="p">):</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: item unknown --> </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">item</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||||
|
||||
<span class="n">locale_lang_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">values</span><span class="p">()]</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://api.dailymotion.com/languages'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from dailymotion/languages is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()[</span><span class="s1">'list'</span><span class="p">]:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s1">'code'</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">locale_lang_list</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
185
_modules/searx/engines/demo_offline.html
Normal file
185
_modules/searx/engines/demo_offline.html
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.demo_offline — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.demo_offline</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.demo_offline</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Within this module we implement a *demo offline engine*. Do not look to</span>
|
||||
<span class="sd">close to the implementation, its just a simple example. To get in use of this</span>
|
||||
<span class="sd">*demo* engine add the following entry to your engines list in ``settings.yml``:</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> - name: my offline engine</span>
|
||||
<span class="sd"> engine: demo_offline</span>
|
||||
<span class="sd"> shortcut: demo</span>
|
||||
<span class="sd"> disabled: false</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">json</span>
|
||||
|
||||
<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'offline'</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">]</span>
|
||||
<span class="n">disabled</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">timeout</span> <span class="o">=</span> <span class="mf">2.0</span>
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># if there is a need for globals, use a leading underline</span>
|
||||
<span class="n">_my_offline_engine</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="init"><a class="viewcode-back" href="../../../dev/engines/demo/demo_offline.html#searx.engines.demo_offline.init">[docs]</a><span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Initialization of the (offline) engine. The origin of this demo engine is a</span>
|
||||
<span class="sd"> simple json string which is loaded in this example while the engine is</span>
|
||||
<span class="sd"> initialized.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">global</span> <span class="n">_my_offline_engine</span> <span class="c1"># pylint: disable=global-statement</span>
|
||||
|
||||
<span class="n">_my_offline_engine</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="s1">'[ {"value": "</span><span class="si">%s</span><span class="s1">"}'</span>
|
||||
<span class="s1">', {"value":"first item"}'</span>
|
||||
<span class="s1">', {"value":"second item"}'</span>
|
||||
<span class="s1">', {"value":"third item"}'</span>
|
||||
<span class="s1">']'</span> <span class="o">%</span> <span class="n">engine_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span>
|
||||
<span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="search"><a class="viewcode-back" href="../../../dev/engines/demo/demo_offline.html#searx.engines.demo_offline.search">[docs]</a><span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">request_params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Query (offline) engine and return results. Assemble the list of results from</span>
|
||||
<span class="sd"> your local engine. In this demo engine we ignore the 'query' term, usual</span>
|
||||
<span class="sd"> you would pass the 'query' term to your local engine to filter out the</span>
|
||||
<span class="sd"> results.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">ret_val</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">result_list</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">_my_offline_engine</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">result_list</span><span class="p">:</span>
|
||||
<span class="n">entry</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'language'</span><span class="p">:</span> <span class="n">request_params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span>
|
||||
<span class="s1">'value'</span><span class="p">:</span> <span class="n">row</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">),</span>
|
||||
<span class="c1"># choose a result template or comment out to use the *default*</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'key-value.html'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="n">ret_val</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">entry</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">ret_val</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
212
_modules/searx/engines/demo_online.html
Normal file
212
_modules/searx/engines/demo_online.html
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.demo_online — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.demo_online</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.demo_online</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Within this module we implement a *demo online engine*. Do not look to</span>
|
||||
<span class="sd">close to the implementation, its just a simple example which queries `The Art</span>
|
||||
<span class="sd">Institute of Chicago <https://www.artic.edu>`_</span>
|
||||
|
||||
<span class="sd">To get in use of this *demo* engine add the following entry to your engines</span>
|
||||
<span class="sd">list in ``settings.yml``:</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> - name: my online engine</span>
|
||||
<span class="sd"> engine: demo_online</span>
|
||||
<span class="sd"> shortcut: demo</span>
|
||||
<span class="sd"> disabled: false</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
|
||||
<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'online'</span>
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">]</span>
|
||||
<span class="n">disabled</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">timeout</span> <span class="o">=</span> <span class="mf">2.0</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'images'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">page_size</span> <span class="o">=</span> <span class="mi">20</span>
|
||||
|
||||
<span class="n">search_api</span> <span class="o">=</span> <span class="s1">'https://api.artic.edu/api/v1/artworks/search?'</span>
|
||||
<span class="n">image_api</span> <span class="o">=</span> <span class="s1">'https://www.artic.edu/iiif/2/'</span>
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.artic.edu'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q239303'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'http://api.artic.edu/docs/'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<span class="c1"># if there is a need for globals, use a leading underline</span>
|
||||
<span class="n">_my_online_engine</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="init"><a class="viewcode-back" href="../../../dev/engines/demo/demo_online.html#searx.engines.demo_online.init">[docs]</a><span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Initialization of the (online) engine. If no initialization is needed, drop</span>
|
||||
<span class="sd"> this init function.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">global</span> <span class="n">_my_online_engine</span> <span class="c1"># pylint: disable=global-statement</span>
|
||||
<span class="n">_my_online_engine</span> <span class="o">=</span> <span class="n">engine_settings</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/demo/demo_online.html#searx.engines.demo_online.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Build up the ``params`` for the online request. In this example we build a</span>
|
||||
<span class="sd"> URL to fetch images from `artic.edu <https://artic.edu>`__</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'page'</span><span class="p">:</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">],</span>
|
||||
<span class="s1">'fields'</span><span class="p">:</span> <span class="s1">'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles'</span><span class="p">,</span>
|
||||
<span class="s1">'limit'</span><span class="p">:</span> <span class="n">page_size</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_api</span> <span class="o">+</span> <span class="n">args</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/demo/demo_online.html#searx.engines.demo_online.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Parse out the result items from the response. In this example we parse the</span>
|
||||
<span class="sd"> response from `api.artic.edu <https://artic.edu>`__ and filter out all</span>
|
||||
<span class="sd"> images.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">json_data</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]:</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">result</span><span class="p">[</span><span class="s1">'image_id'</span><span class="p">]:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="s1">'https://artic.edu/artworks/</span><span class="si">%(id)s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">result</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span> <span class="o">+</span> <span class="s2">" (</span><span class="si">%(date_display)s</span><span class="s2">) // </span><span class="si">%(artist_display)s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">result</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'medium_display'</span><span class="p">],</span>
|
||||
<span class="s1">'author'</span><span class="p">:</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s1">'artist_titles'</span><span class="p">]),</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">image_api</span> <span class="o">+</span> <span class="s1">'/</span><span class="si">%(image_id)s</span><span class="s1">/full/843,/0/default.jpg'</span> <span class="o">%</span> <span class="n">result</span><span class="p">,</span>
|
||||
<span class="s1">'img_format'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'dimensions'</span><span class="p">],</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
549
_modules/searx/engines/duckduckgo.html
Normal file
549
_modules/searx/engines/duckduckgo.html
Normal file
|
|
@ -0,0 +1,549 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.duckduckgo — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.duckduckgo</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.duckduckgo</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd">DuckDuckGo Lite</span>
|
||||
<span class="sd">~~~~~~~~~~~~~~~</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">import</span> <span class="nn">re</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">import</span> <span class="nn">json</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
<span class="kn">import</span> <span class="nn">lxml.html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">locales</span><span class="p">,</span>
|
||||
<span class="n">redislib</span><span class="p">,</span>
|
||||
<span class="n">external_bang</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">eval_xpath</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_getindex</span><span class="p">,</span>
|
||||
<span class="n">extract_text</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">redisdb</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineAPIException</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://lite.duckduckgo.com/lite/'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q12805'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="sd">"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP</span>
|
||||
<span class="sd">``Accept-Language``. Optional the user can select a region filter (but not a</span>
|
||||
<span class="sd">language).</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span> <span class="c1"># user can't select but the results are filtered</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="s1">'https://lite.duckduckgo.com/lite/'</span>
|
||||
<span class="c1"># url_ping = 'https://duckduckgo.com/t/sl_l'</span>
|
||||
|
||||
<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'day'</span><span class="p">:</span> <span class="s1">'d'</span><span class="p">,</span> <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'w'</span><span class="p">,</span> <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'m'</span><span class="p">,</span> <span class="s1">'year'</span><span class="p">:</span> <span class="s1">'y'</span><span class="p">}</span>
|
||||
<span class="n">form_data</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'v'</span><span class="p">:</span> <span class="s1">'l'</span><span class="p">,</span> <span class="s1">'api'</span><span class="p">:</span> <span class="s1">'d.js'</span><span class="p">,</span> <span class="s1">'o'</span><span class="p">:</span> <span class="s1">'json'</span><span class="p">}</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="cache_vqd"><a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.cache_vqd">[docs]</a><span class="k">def</span> <span class="nf">cache_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Caches a ``vqd`` value from a query.</span>
|
||||
|
||||
<span class="sd"> The vqd value depends on the query string and is needed for the follow up</span>
|
||||
<span class="sd"> pages or the images loaded by a XMLHttpRequest:</span>
|
||||
|
||||
<span class="sd"> - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`</span>
|
||||
<span class="sd"> - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">c</span> <span class="o">=</span> <span class="n">redisdb</span><span class="o">.</span><span class="n">client</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">c</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"cache vqd value: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
|
||||
<span class="n">key</span> <span class="o">=</span> <span class="s1">'SearXNG_ddg_vqd'</span> <span class="o">+</span> <span class="n">redislib</span><span class="o">.</span><span class="n">secret_hash</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
|
||||
<span class="n">c</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">ex</span><span class="o">=</span><span class="mi">600</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_vqd"><a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.get_vqd">[docs]</a><span class="k">def</span> <span class="nf">get_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">headers</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached</span>
|
||||
<span class="sd"> (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the</span>
|
||||
<span class="sd"> response.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">c</span> <span class="o">=</span> <span class="n">redisdb</span><span class="o">.</span><span class="n">client</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">c</span><span class="p">:</span>
|
||||
<span class="n">key</span> <span class="o">=</span> <span class="s1">'SearXNG_ddg_vqd'</span> <span class="o">+</span> <span class="n">redislib</span><span class="o">.</span><span class="n">secret_hash</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">c</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">value</span><span class="p">:</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"re-use cached vqd value: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">value</span>
|
||||
|
||||
<span class="n">query_url</span> <span class="o">=</span> <span class="s1">'https://duckduckgo.com/?q=</span><span class="si">{query}</span><span class="s1">&atb=v290-5'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">}))</span>
|
||||
<span class="n">res</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">query_url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">res</span><span class="o">.</span><span class="n">text</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">if</span> <span class="n">content</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'vqd=</span><span class="se">\"</span><span class="s1">'</span><span class="p">)</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="n">SearxEngineAPIException</span><span class="p">(</span><span class="s1">'Request failed'</span><span class="p">)</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">content</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'vqd=</span><span class="se">\"</span><span class="s1">'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">5</span> <span class="p">:]</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">value</span><span class="p">[:</span> <span class="n">value</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'</span><span class="se">\'</span><span class="s1">'</span><span class="p">)]</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"new vqd value: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
|
||||
<span class="n">cache_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">value</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_ddg_lang"><a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.get_ddg_lang">[docs]</a><span class="k">def</span> <span class="nf">get_ddg_lang</span><span class="p">(</span><span class="n">eng_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">'en_US'</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get DuckDuckGo's language identifier from SearXNG's locale.</span>
|
||||
|
||||
<span class="sd"> DuckDuckGo defines its lanaguages by region codes (see</span>
|
||||
<span class="sd"> :py:obj:`fetch_traits`).</span>
|
||||
|
||||
<span class="sd"> To get region and language of a DDG service use:</span>
|
||||
|
||||
<span class="sd"> .. code: python</span>
|
||||
|
||||
<span class="sd"> eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)</span>
|
||||
<span class="sd"> eng_lang = get_ddg_lang(traits, params['searxng_locale'])</span>
|
||||
|
||||
<span class="sd"> It might confuse, but the ``l`` value of the cookie is what SearXNG calls</span>
|
||||
<span class="sd"> the *region*:</span>
|
||||
|
||||
<span class="sd"> .. code:: python</span>
|
||||
|
||||
<span class="sd"> # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}</span>
|
||||
<span class="sd"> params['cookies']['ad'] = eng_lang</span>
|
||||
<span class="sd"> params['cookies']['ah'] = eng_region</span>
|
||||
<span class="sd"> params['cookies']['l'] = eng_region</span>
|
||||
|
||||
<span class="sd"> .. hint::</span>
|
||||
|
||||
<span class="sd"> `DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language</span>
|
||||
<span class="sd"> selection to the user, only a region can be selected by the user</span>
|
||||
<span class="sd"> (``eng_region`` from the example above). DDG-lite stores the selected</span>
|
||||
<span class="sd"> region in a cookie::</span>
|
||||
|
||||
<span class="sd"> params['cookies']['kl'] = eng_region # 'ar-es'</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'lang_region'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">sxng_locale</span><span class="p">,</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">default</span><span class="p">)</span>
|
||||
<span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="n">ddg_reg_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'tw-tzh'</span><span class="p">:</span> <span class="s1">'zh_TW'</span><span class="p">,</span>
|
||||
<span class="s1">'hk-tzh'</span><span class="p">:</span> <span class="s1">'zh_HK'</span><span class="p">,</span>
|
||||
<span class="s1">'ct-ca'</span><span class="p">:</span> <span class="s1">'skip'</span><span class="p">,</span> <span class="c1"># ct-ca and es-ca both map to ca_ES</span>
|
||||
<span class="s1">'es-ca'</span><span class="p">:</span> <span class="s1">'ca_ES'</span><span class="p">,</span>
|
||||
<span class="s1">'id-en'</span><span class="p">:</span> <span class="s1">'id_ID'</span><span class="p">,</span>
|
||||
<span class="s1">'no-no'</span><span class="p">:</span> <span class="s1">'nb_NO'</span><span class="p">,</span>
|
||||
<span class="s1">'jp-jp'</span><span class="p">:</span> <span class="s1">'ja_JP'</span><span class="p">,</span>
|
||||
<span class="s1">'kr-kr'</span><span class="p">:</span> <span class="s1">'ko_KR'</span><span class="p">,</span>
|
||||
<span class="s1">'xa-ar'</span><span class="p">:</span> <span class="s1">'ar_SA'</span><span class="p">,</span>
|
||||
<span class="s1">'sl-sl'</span><span class="p">:</span> <span class="s1">'sl_SI'</span><span class="p">,</span>
|
||||
<span class="s1">'th-en'</span><span class="p">:</span> <span class="s1">'th_TH'</span><span class="p">,</span>
|
||||
<span class="s1">'vn-en'</span><span class="p">:</span> <span class="s1">'vi_VN'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">ddg_lang_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># use ar --> ar_EG (Egypt's arabic)</span>
|
||||
<span class="s2">"ar_DZ"</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s2">"ar_JO"</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s2">"ar_SA"</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="c1"># use bn --> bn_BD</span>
|
||||
<span class="s1">'bn_IN'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="c1"># use de --> de_DE</span>
|
||||
<span class="s1">'de_CH'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="c1"># use en --> en_US,</span>
|
||||
<span class="s1">'en_AU'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'en_CA'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'en_GB'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="c1"># Esperanto</span>
|
||||
<span class="s1">'eo_XX'</span><span class="p">:</span> <span class="s1">'eo'</span><span class="p">,</span>
|
||||
<span class="c1"># use es --> es_ES,</span>
|
||||
<span class="s1">'es_AR'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'es_CL'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'es_CO'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'es_CR'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'es_EC'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'es_MX'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'es_PE'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'es_UY'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'es_VE'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="c1"># use fr --> rf_FR</span>
|
||||
<span class="s1">'fr_CA'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'fr_CH'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="s1">'fr_BE'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="c1"># use nl --> nl_NL</span>
|
||||
<span class="s1">'nl_BE'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="c1"># use pt --> pt_PT</span>
|
||||
<span class="s1">'pt_BR'</span><span class="p">:</span> <span class="s1">'lang_region'</span><span class="p">,</span>
|
||||
<span class="c1"># skip these languages</span>
|
||||
<span class="s1">'od_IN'</span><span class="p">:</span> <span class="s1">'skip'</span><span class="p">,</span>
|
||||
<span class="s1">'io_XX'</span><span class="p">:</span> <span class="s1">'skip'</span><span class="p">,</span>
|
||||
<span class="s1">'tokipona_XX'</span><span class="p">:</span> <span class="s1">'skip'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
|
||||
<span class="c1"># quote ddg bangs</span>
|
||||
<span class="n">query_parts</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="c1"># for val in re.split(r'(\s+)', query):</span>
|
||||
<span class="k">for</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="sa">r</span><span class="s1">'(\s+)'</span><span class="p">,</span> <span class="n">query</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">val</span><span class="o">.</span><span class="n">strip</span><span class="p">():</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="k">if</span> <span class="n">val</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'!'</span><span class="p">)</span> <span class="ow">and</span> <span class="n">external_bang</span><span class="o">.</span><span class="n">get_node</span><span class="p">(</span><span class="n">external_bang</span><span class="o">.</span><span class="n">EXTERNAL_BANGS</span><span class="p">,</span> <span class="n">val</span><span class="p">[</span><span class="mi">1</span><span class="p">:]):</span>
|
||||
<span class="n">val</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"'</span><span class="si">{</span><span class="n">val</span><span class="si">}</span><span class="s2">'"</span>
|
||||
<span class="n">query_parts</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
|
||||
<span class="n">query</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">query_parts</span><span class="p">)</span>
|
||||
|
||||
<span class="n">eng_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span>
|
||||
<span class="c1"># eng_lang = get_ddg_lang(traits, params['searxng_locale'])</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">url</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'POST'</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'q'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query</span>
|
||||
|
||||
<span class="c1"># The API is not documented, so we do some reverse engineering and emulate</span>
|
||||
<span class="c1"># what https://lite.duckduckgo.com/lite/ does when you press "next Page"</span>
|
||||
<span class="c1"># link again and again ..</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Content-Type'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'application/x-www-form-urlencoded'</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Referer'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'https://google.com/'</span>
|
||||
|
||||
<span class="c1"># initial page does not have an offset</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="c1"># second page does have an offset of 30</span>
|
||||
<span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">30</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'s'</span><span class="p">]</span> <span class="o">=</span> <span class="n">offset</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'dc'</span><span class="p">]</span> <span class="o">=</span> <span class="n">offset</span> <span class="o">+</span> <span class="mi">1</span>
|
||||
|
||||
<span class="k">elif</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">></span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="c1"># third and following pages do have an offset of 30 + n*50</span>
|
||||
<span class="n">offset</span> <span class="o">=</span> <span class="mi">30</span> <span class="o">+</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">2</span><span class="p">)</span> <span class="o">*</span> <span class="mi">50</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'s'</span><span class="p">]</span> <span class="o">=</span> <span class="n">offset</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'dc'</span><span class="p">]</span> <span class="o">=</span> <span class="n">offset</span> <span class="o">+</span> <span class="mi">1</span>
|
||||
|
||||
<span class="c1"># request needs a vqd argument</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'vqd'</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s2">"headers"</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># initial page does not have additional data in the input form</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'o'</span><span class="p">]</span> <span class="o">=</span> <span class="n">form_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'o'</span><span class="p">,</span> <span class="s1">'json'</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'api'</span><span class="p">]</span> <span class="o">=</span> <span class="n">form_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'api'</span><span class="p">,</span> <span class="s1">'d.js'</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'nextParams'</span><span class="p">]</span> <span class="o">=</span> <span class="n">form_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'nextParams'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'v'</span><span class="p">]</span> <span class="o">=</span> <span class="n">form_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'v'</span><span class="p">,</span> <span class="s1">'l'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'kl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'kl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_region</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'df'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'df'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'df'</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span>
|
||||
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"param data: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">])</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"param cookies: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="n">params</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">303</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">doc</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="n">result_table</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">doc</span><span class="p">,</span> <span class="s1">'//html/body/form/div[@class="filters"]/table'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">result_table</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="c1"># some locales (at least China) does not have a "next page" button and</span>
|
||||
<span class="c1"># the layout of the HTML tables is different.</span>
|
||||
<span class="n">result_table</span> <span class="o">=</span> <span class="n">result_table</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">len</span><span class="p">(</span><span class="n">result_table</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">3</span><span class="p">:</span>
|
||||
<span class="c1"># no more results</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">result_table</span> <span class="o">=</span> <span class="n">result_table</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
|
||||
<span class="c1"># update form data from response</span>
|
||||
<span class="n">form</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">doc</span><span class="p">,</span> <span class="s1">'//html/body/form/div[@class="filters"]/table//input/..'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">form</span><span class="p">):</span>
|
||||
|
||||
<span class="n">form</span> <span class="o">=</span> <span class="n">form</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">form_data</span><span class="p">[</span><span class="s1">'v'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">form</span><span class="p">,</span> <span class="s1">'//input[@name="v"]/@value'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">form_data</span><span class="p">[</span><span class="s1">'api'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">form</span><span class="p">,</span> <span class="s1">'//input[@name="api"]/@value'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">form_data</span><span class="p">[</span><span class="s1">'o'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">form</span><span class="p">,</span> <span class="s1">'//input[@name="o"]/@value'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'form_data: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">form_data</span><span class="p">)</span>
|
||||
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">form</span><span class="p">,</span> <span class="s1">'//input[@name="vqd"]/@value'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">query</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'q'</span><span class="p">]</span>
|
||||
<span class="n">cache_vqd</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
|
||||
|
||||
<span class="n">tr_rows</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result_table</span><span class="p">,</span> <span class="s1">'.//tr'</span><span class="p">)</span>
|
||||
<span class="c1"># In the last <tr> is the form of the 'previous/next page' links</span>
|
||||
<span class="n">tr_rows</span> <span class="o">=</span> <span class="n">tr_rows</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
|
||||
<span class="n">len_tr_rows</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">tr_rows</span><span class="p">)</span>
|
||||
<span class="n">offset</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
|
||||
<span class="k">while</span> <span class="n">len_tr_rows</span> <span class="o">>=</span> <span class="n">offset</span> <span class="o">+</span> <span class="mi">4</span><span class="p">:</span>
|
||||
|
||||
<span class="c1"># assemble table rows we need to scrap</span>
|
||||
<span class="n">tr_title</span> <span class="o">=</span> <span class="n">tr_rows</span><span class="p">[</span><span class="n">offset</span><span class="p">]</span>
|
||||
<span class="n">tr_content</span> <span class="o">=</span> <span class="n">tr_rows</span><span class="p">[</span><span class="n">offset</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">offset</span> <span class="o">+=</span> <span class="mi">4</span>
|
||||
|
||||
<span class="c1"># ignore sponsored Adds <tr class="result-sponsored"></span>
|
||||
<span class="k">if</span> <span class="n">tr_content</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'class'</span><span class="p">)</span> <span class="o">==</span> <span class="s1">'result-sponsored'</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">a_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">tr_title</span><span class="p">,</span> <span class="s1">'.//td//a[@class="result-link"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">a_tag</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">td_content</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">tr_content</span><span class="p">,</span> <span class="s1">'.//td[@class="result-snippet"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">td_content</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">a_tag</span><span class="o">.</span><span class="n">text_content</span><span class="p">(),</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">td_content</span><span class="p">),</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">a_tag</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages & regions from DuckDuckGo.</span>
|
||||
|
||||
<span class="sd"> SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).</span>
|
||||
<span class="sd"> DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no</span>
|
||||
<span class="sd"> sense in a SearXNG request since SearXNG's ``all`` will not add a</span>
|
||||
<span class="sd"> ``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``</span>
|
||||
<span class="sd"> is ``wt-wt`` (the region).</span>
|
||||
|
||||
<span class="sd"> Beside regions DuckDuckGo also defines its lanaguages by region codes. By</span>
|
||||
<span class="sd"> example these are the english languages in DuckDuckGo:</span>
|
||||
|
||||
<span class="sd"> - en_US</span>
|
||||
<span class="sd"> - en_AU</span>
|
||||
<span class="sd"> - en_CA</span>
|
||||
<span class="sd"> - en_GB</span>
|
||||
|
||||
<span class="sd"> The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from</span>
|
||||
<span class="sd"> SearXNG's locale.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="c1"># pylint: disable=too-many-branches, too-many-statements</span>
|
||||
<span class="c1"># fetch regions</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'wt-wt'</span>
|
||||
|
||||
<span class="c1"># updated from u588 to u661 / should be updated automatically?</span>
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://duckduckgo.com/util/u661.js'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from DuckDuckGo is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="n">pos</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'regions:{'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">8</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">js_code</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">[</span><span class="n">pos</span><span class="p">:]</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">pos</span> <span class="o">=</span> <span class="n">js_code</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'}'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span>
|
||||
<span class="n">regions</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">js_code</span><span class="p">[:</span><span class="n">pos</span><span class="p">])</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">regions</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">eng_tag</span> <span class="o">==</span> <span class="s1">'wt-wt'</span><span class="p">:</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'wt-wt'</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">region</span> <span class="o">=</span> <span class="n">ddg_reg_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">region</span> <span class="o">==</span> <span class="s1">'skip'</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">region</span><span class="p">:</span>
|
||||
<span class="n">eng_territory</span><span class="p">,</span> <span class="n">eng_lang</span> <span class="o">=</span> <span class="n">eng_tag</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="n">region</span> <span class="o">=</span> <span class="n">eng_lang</span> <span class="o">+</span> <span class="s1">'_'</span> <span class="o">+</span> <span class="n">eng_territory</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">region</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) -> </span><span class="si">%s</span><span class="s2"> is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">region</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||||
|
||||
<span class="c1"># fetch languages</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'lang_region'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
|
||||
<span class="n">pos</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'languages:{'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">10</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">js_code</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">[</span><span class="n">pos</span><span class="p">:]</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">pos</span> <span class="o">=</span> <span class="n">js_code</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'}'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span>
|
||||
<span class="n">js_code</span> <span class="o">=</span> <span class="s1">'{"'</span> <span class="o">+</span> <span class="n">js_code</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">pos</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">':'</span><span class="p">,</span> <span class="s1">'":'</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">',"'</span><span class="p">)</span>
|
||||
<span class="n">languages</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">js_code</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">eng_lang</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">languages</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">eng_lang</span> <span class="o">==</span> <span class="s1">'wt_WT'</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">babel_tag</span> <span class="o">=</span> <span class="n">ddg_lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">babel_tag</span> <span class="o">==</span> <span class="s1">'skip'</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">babel_tag</span> <span class="o">==</span> <span class="s1">'lang_region'</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'lang_region'</span><span class="p">][</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_tag</span><span class="p">))</span>
|
||||
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: language </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
367
_modules/searx/engines/duckduckgo_definitions.html
Normal file
367
_modules/searx/engines/duckduckgo_definitions.html
Normal file
|
|
@ -0,0 +1,367 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.duckduckgo_definitions — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.duckduckgo_definitions</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.duckduckgo_definitions</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd">DuckDuckGo Instant Answer API</span>
|
||||
<span class="sd">~~~~~~~~~~~~~~~~~~~~~~~~~~~~~</span>
|
||||
|
||||
<span class="sd">The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from</span>
|
||||
<span class="sd">reverse engineering we can see that some services (e.g. instant answers) still</span>
|
||||
<span class="sd">in use from the DDG search engine.</span>
|
||||
|
||||
<span class="sd">As far we can say the *instant answers* API does not support languages, or at</span>
|
||||
<span class="sd">least we could not find out how language support should work. It seems that</span>
|
||||
<span class="sd">most of the features are based on English terms.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">urlparse</span><span class="p">,</span> <span class="n">urljoin</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">WIKIDATA_UNITS</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">html_to_text</span><span class="p">,</span> <span class="n">get_string_replaces_function</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.external_urls</span> <span class="kn">import</span> <span class="n">get_external_url</span><span class="p">,</span> <span class="n">get_earth_coordinates_url</span><span class="p">,</span> <span class="n">area_to_osm_zoom</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://duckduckgo.com/'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q12805'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://duckduckgo.com/api'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">URL</span> <span class="o">=</span> <span class="s1">'https://api.duckduckgo.com/'</span> <span class="o">+</span> <span class="s1">'?</span><span class="si">{query}</span><span class="s1">&format=json&pretty=0&no_redirect=1&d=1'</span>
|
||||
|
||||
<span class="n">WIKIDATA_PREFIX</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'http://www.wikidata.org/entity/'</span><span class="p">,</span> <span class="s1">'https://www.wikidata.org/entity/'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">replace_http_by_https</span> <span class="o">=</span> <span class="n">get_string_replaces_function</span><span class="p">({</span><span class="s1">'http:'</span><span class="p">:</span> <span class="s1">'https:'</span><span class="p">})</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="is_broken_text"><a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo_definitions.is_broken_text">[docs]</a><span class="k">def</span> <span class="nf">is_broken_text</span><span class="p">(</span><span class="n">text</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``</span>
|
||||
|
||||
<span class="sd"> The href URL is broken, the "Related website" may contains some HTML.</span>
|
||||
|
||||
<span class="sd"> The best solution seems to ignore these results.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">text</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">)</span> <span class="ow">and</span> <span class="s1">' '</span> <span class="ow">in</span> <span class="n">text</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">result_to_text</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">htmlResult</span><span class="p">):</span>
|
||||
<span class="c1"># TODO : remove result ending with "Meaning" or "Category" # pylint: disable=fixme</span>
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">htmlResult</span><span class="p">)</span>
|
||||
<span class="n">a</span> <span class="o">=</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//a'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">a</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="n">text</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_broken_text</span><span class="p">(</span><span class="n">result</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">result</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">URL</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">}))</span>
|
||||
<span class="k">return</span> <span class="n">params</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="c1"># pylint: disable=too-many-locals, too-many-branches, too-many-statements</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">search_res</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># search_res.get('Entity') possible values (not exhaustive) :</span>
|
||||
<span class="c1"># * continent / country / department / location / waterfall</span>
|
||||
<span class="c1"># * actor / musician / artist</span>
|
||||
<span class="c1"># * book / performing art / film / television / media franchise / concert tour / playwright</span>
|
||||
<span class="c1"># * prepared food</span>
|
||||
<span class="c1"># * website / software / os / programming language / file format / software engineer</span>
|
||||
<span class="c1"># * company</span>
|
||||
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">heading</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Heading'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
<span class="n">attributes</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">urls</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">infobox_id</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">relatedTopics</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="c1"># add answer if there is one</span>
|
||||
<span class="n">answer</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Answer'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">answer</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'AnswerType="</span><span class="si">%s</span><span class="s1">" Answer="</span><span class="si">%s</span><span class="s1">"'</span><span class="p">,</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AnswerType'</span><span class="p">),</span> <span class="n">answer</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AnswerType'</span><span class="p">)</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'calc'</span><span class="p">,</span> <span class="s1">'ip'</span><span class="p">]:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'answer'</span><span class="p">:</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">answer</span><span class="p">)})</span>
|
||||
|
||||
<span class="c1"># add infobox</span>
|
||||
<span class="k">if</span> <span class="s1">'Definition'</span> <span class="ow">in</span> <span class="n">search_res</span><span class="p">:</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">content</span> <span class="o">+</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Definition'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'Abstract'</span> <span class="ow">in</span> <span class="n">search_res</span><span class="p">:</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">content</span> <span class="o">+</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Abstract'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># image</span>
|
||||
<span class="n">image</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Image'</span><span class="p">)</span>
|
||||
<span class="n">image</span> <span class="o">=</span> <span class="kc">None</span> <span class="k">if</span> <span class="n">image</span> <span class="o">==</span> <span class="s1">''</span> <span class="k">else</span> <span class="n">image</span>
|
||||
<span class="k">if</span> <span class="n">image</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">image</span><span class="p">)</span><span class="o">.</span><span class="n">netloc</span> <span class="o">==</span> <span class="s1">''</span><span class="p">:</span>
|
||||
<span class="n">image</span> <span class="o">=</span> <span class="n">urljoin</span><span class="p">(</span><span class="s1">'https://duckduckgo.com'</span><span class="p">,</span> <span class="n">image</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># urls</span>
|
||||
<span class="c1"># Official website, Wikipedia page</span>
|
||||
<span class="k">for</span> <span class="n">ddg_result</span> <span class="ow">in</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Results'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
<span class="n">firstURL</span> <span class="o">=</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'FirstURL'</span><span class="p">)</span>
|
||||
<span class="n">text</span> <span class="o">=</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Text'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">firstURL</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">text</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">text</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">firstURL</span><span class="p">})</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">heading</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">firstURL</span><span class="p">})</span>
|
||||
|
||||
<span class="c1"># related topics</span>
|
||||
<span class="k">for</span> <span class="n">ddg_result</span> <span class="ow">in</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'RelatedTopics'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
<span class="k">if</span> <span class="s1">'FirstURL'</span> <span class="ow">in</span> <span class="n">ddg_result</span><span class="p">:</span>
|
||||
<span class="n">firstURL</span> <span class="o">=</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'FirstURL'</span><span class="p">)</span>
|
||||
<span class="n">text</span> <span class="o">=</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Text'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_broken_text</span><span class="p">(</span><span class="n">text</span><span class="p">):</span>
|
||||
<span class="n">suggestion</span> <span class="o">=</span> <span class="n">result_to_text</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Result'</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">suggestion</span> <span class="o">!=</span> <span class="n">heading</span> <span class="ow">and</span> <span class="n">suggestion</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">suggestion</span><span class="p">})</span>
|
||||
<span class="k">elif</span> <span class="s1">'Topics'</span> <span class="ow">in</span> <span class="n">ddg_result</span><span class="p">:</span>
|
||||
<span class="n">suggestions</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">relatedTopics</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'name'</span><span class="p">:</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Name'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span> <span class="s1">'suggestions'</span><span class="p">:</span> <span class="n">suggestions</span><span class="p">})</span>
|
||||
<span class="k">for</span> <span class="n">topic_result</span> <span class="ow">in</span> <span class="n">ddg_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Topics'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
<span class="n">suggestion</span> <span class="o">=</span> <span class="n">result_to_text</span><span class="p">(</span><span class="n">topic_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Text'</span><span class="p">),</span> <span class="n">topic_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Result'</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">suggestion</span> <span class="o">!=</span> <span class="n">heading</span> <span class="ow">and</span> <span class="n">suggestion</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">suggestions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># abstract</span>
|
||||
<span class="n">abstractURL</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AbstractURL'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">abstractURL</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span>
|
||||
<span class="c1"># add as result ? problem always in english</span>
|
||||
<span class="n">infobox_id</span> <span class="o">=</span> <span class="n">abstractURL</span>
|
||||
<span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'AbstractSource'</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">abstractURL</span><span class="p">,</span> <span class="s1">'official'</span><span class="p">:</span> <span class="kc">True</span><span class="p">})</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">abstractURL</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">heading</span><span class="p">})</span>
|
||||
|
||||
<span class="c1"># definition</span>
|
||||
<span class="n">definitionURL</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'DefinitionURL'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">definitionURL</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span>
|
||||
<span class="c1"># add as result ? as answer ? problem always in english</span>
|
||||
<span class="n">infobox_id</span> <span class="o">=</span> <span class="n">definitionURL</span>
|
||||
<span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'DefinitionSource'</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">definitionURL</span><span class="p">})</span>
|
||||
|
||||
<span class="c1"># to merge with wikidata's infobox</span>
|
||||
<span class="k">if</span> <span class="n">infobox_id</span><span class="p">:</span>
|
||||
<span class="n">infobox_id</span> <span class="o">=</span> <span class="n">replace_http_by_https</span><span class="p">(</span><span class="n">infobox_id</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># attributes</span>
|
||||
<span class="c1"># some will be converted to urls</span>
|
||||
<span class="k">if</span> <span class="s1">'Infobox'</span> <span class="ow">in</span> <span class="n">search_res</span><span class="p">:</span>
|
||||
<span class="n">infobox</span> <span class="o">=</span> <span class="n">search_res</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'Infobox'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="s1">'content'</span> <span class="ow">in</span> <span class="n">infobox</span><span class="p">:</span>
|
||||
<span class="n">osm_zoom</span> <span class="o">=</span> <span class="mi">17</span>
|
||||
<span class="n">coordinates</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">for</span> <span class="n">info</span> <span class="ow">in</span> <span class="n">infobox</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'content'</span><span class="p">):</span>
|
||||
<span class="n">data_type</span> <span class="o">=</span> <span class="n">info</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'data_type'</span><span class="p">)</span>
|
||||
<span class="n">data_label</span> <span class="o">=</span> <span class="n">info</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'label'</span><span class="p">)</span>
|
||||
<span class="n">data_value</span> <span class="o">=</span> <span class="n">info</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Workaround: ddg may return a double quote</span>
|
||||
<span class="k">if</span> <span class="n">data_value</span> <span class="o">==</span> <span class="s1">'""'</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="c1"># Is it an external URL ?</span>
|
||||
<span class="c1"># * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile</span>
|
||||
<span class="c1"># * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id</span>
|
||||
<span class="c1"># * netflix_id</span>
|
||||
<span class="n">external_url</span> <span class="o">=</span> <span class="n">get_external_url</span><span class="p">(</span><span class="n">data_type</span><span class="p">,</span> <span class="n">data_value</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">external_url</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">data_label</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">external_url</span><span class="p">})</span>
|
||||
<span class="k">elif</span> <span class="n">data_type</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'instance'</span><span class="p">,</span> <span class="s1">'wiki_maps_trigger'</span><span class="p">,</span> <span class="s1">'google_play_artist_id'</span><span class="p">]:</span>
|
||||
<span class="c1"># ignore instance: Wikidata value from "Instance Of" (Qxxxx)</span>
|
||||
<span class="c1"># ignore wiki_maps_trigger: reference to a javascript</span>
|
||||
<span class="c1"># ignore google_play_artist_id: service shutdown</span>
|
||||
<span class="k">pass</span>
|
||||
<span class="k">elif</span> <span class="n">data_type</span> <span class="o">==</span> <span class="s1">'string'</span> <span class="ow">and</span> <span class="n">data_label</span> <span class="o">==</span> <span class="s1">'Website'</span><span class="p">:</span>
|
||||
<span class="c1"># There is already an URL for the website</span>
|
||||
<span class="k">pass</span>
|
||||
<span class="k">elif</span> <span class="n">data_type</span> <span class="o">==</span> <span class="s1">'area'</span><span class="p">:</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'label'</span><span class="p">:</span> <span class="n">data_label</span><span class="p">,</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">area_to_str</span><span class="p">(</span><span class="n">data_value</span><span class="p">),</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="s1">'P2046'</span><span class="p">})</span>
|
||||
<span class="n">osm_zoom</span> <span class="o">=</span> <span class="n">area_to_osm_zoom</span><span class="p">(</span><span class="n">data_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'amount'</span><span class="p">))</span>
|
||||
<span class="k">elif</span> <span class="n">data_type</span> <span class="o">==</span> <span class="s1">'coordinates'</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">data_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'globe'</span><span class="p">)</span> <span class="o">==</span> <span class="s1">'http://www.wikidata.org/entity/Q2'</span><span class="p">:</span>
|
||||
<span class="c1"># coordinate on Earth</span>
|
||||
<span class="c1"># get the zoom information from the area</span>
|
||||
<span class="n">coordinates</span> <span class="o">=</span> <span class="n">info</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># coordinate NOT on Earth</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'label'</span><span class="p">:</span> <span class="n">data_label</span><span class="p">,</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">data_value</span><span class="p">,</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="s1">'P625'</span><span class="p">})</span>
|
||||
<span class="k">elif</span> <span class="n">data_type</span> <span class="o">==</span> <span class="s1">'string'</span><span class="p">:</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'label'</span><span class="p">:</span> <span class="n">data_label</span><span class="p">,</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">data_value</span><span class="p">})</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">coordinates</span><span class="p">:</span>
|
||||
<span class="n">data_label</span> <span class="o">=</span> <span class="n">coordinates</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'label'</span><span class="p">)</span>
|
||||
<span class="n">data_value</span> <span class="o">=</span> <span class="n">coordinates</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">)</span>
|
||||
<span class="n">latitude</span> <span class="o">=</span> <span class="n">data_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'latitude'</span><span class="p">)</span>
|
||||
<span class="n">longitude</span> <span class="o">=</span> <span class="n">data_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'longitude'</span><span class="p">)</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">get_earth_coordinates_url</span><span class="p">(</span><span class="n">latitude</span><span class="p">,</span> <span class="n">longitude</span><span class="p">,</span> <span class="n">osm_zoom</span><span class="p">)</span>
|
||||
<span class="n">urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="s1">'OpenStreetMap'</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="s1">'P625'</span><span class="p">})</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">heading</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="c1"># TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme</span>
|
||||
<span class="k">if</span> <span class="n">image</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">attributes</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">urls</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">relatedTopics</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">urls</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'url'</span><span class="p">],</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">heading</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">})</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'infobox'</span><span class="p">:</span> <span class="n">heading</span><span class="p">,</span>
|
||||
<span class="s1">'id'</span><span class="p">:</span> <span class="n">infobox_id</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">image</span><span class="p">,</span>
|
||||
<span class="s1">'attributes'</span><span class="p">:</span> <span class="n">attributes</span><span class="p">,</span>
|
||||
<span class="s1">'urls'</span><span class="p">:</span> <span class="n">urls</span><span class="p">,</span>
|
||||
<span class="s1">'relatedTopics'</span><span class="p">:</span> <span class="n">relatedTopics</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">unit_to_str</span><span class="p">(</span><span class="n">unit</span><span class="p">):</span>
|
||||
<span class="k">for</span> <span class="n">prefix</span> <span class="ow">in</span> <span class="n">WIKIDATA_PREFIX</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">unit</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="n">prefix</span><span class="p">):</span>
|
||||
<span class="n">wikidata_entity</span> <span class="o">=</span> <span class="n">unit</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">prefix</span><span class="p">)</span> <span class="p">:]</span>
|
||||
<span class="k">return</span> <span class="n">WIKIDATA_UNITS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">wikidata_entity</span><span class="p">,</span> <span class="n">unit</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">unit</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="area_to_str"><a class="viewcode-back" href="../../../dev/engines/online/duckduckgo.html#searx.engines.duckduckgo_definitions.area_to_str">[docs]</a><span class="k">def</span> <span class="nf">area_to_str</span><span class="p">(</span><span class="n">area</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""</span>
|
||||
<span class="n">unit</span> <span class="o">=</span> <span class="n">unit_to_str</span><span class="p">(</span><span class="n">area</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'unit'</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">unit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">amount</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">area</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'amount'</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="s1">'</span><span class="si">{}</span><span class="s1"> </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">amount</span><span class="p">,</span> <span class="n">unit</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||||
<span class="k">pass</span>
|
||||
<span class="k">return</span> <span class="s1">'</span><span class="si">{}</span><span class="s1"> </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">area</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'amount'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span> <span class="n">area</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'unit'</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
605
_modules/searx/engines/google.html
Normal file
605
_modules/searx/engines/google.html
Normal file
|
|
@ -0,0 +1,605 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.google — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.google</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.google</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This is the implementation of the Google WEB engine. Some of this</span>
|
||||
<span class="sd">implementations (manly the :py:obj:`get_google_info`) are shared by other</span>
|
||||
<span class="sd">engines:</span>
|
||||
|
||||
<span class="sd">- :ref:`google images engine`</span>
|
||||
<span class="sd">- :ref:`google news engine`</span>
|
||||
<span class="sd">- :ref:`google videos engine`</span>
|
||||
<span class="sd">- :ref:`google scholar engine`</span>
|
||||
<span class="sd">- :ref:`google autocomplete`</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">re</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
<span class="kn">import</span> <span class="nn">babel.core</span>
|
||||
<span class="kn">import</span> <span class="nn">babel.languages</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">,</span> <span class="n">eval_xpath_getindex</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span><span class="p">,</span> <span class="n">region_tag</span><span class="p">,</span> <span class="n">get_offical_locales</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineCaptchaException</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.google.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q9366'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search/'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'day'</span><span class="p">:</span> <span class="s1">'d'</span><span class="p">,</span> <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'w'</span><span class="p">,</span> <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'m'</span><span class="p">,</span> <span class="s1">'year'</span><span class="p">:</span> <span class="s1">'y'</span><span class="p">}</span>
|
||||
|
||||
<span class="c1"># Filter results. 0: None, 1: Moderate, 2: Strict</span>
|
||||
<span class="n">filter_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'off'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'medium'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'high'</span><span class="p">}</span>
|
||||
|
||||
<span class="c1"># specific xpath variables</span>
|
||||
<span class="c1"># ------------------------</span>
|
||||
|
||||
<span class="n">results_xpath</span> <span class="o">=</span> <span class="s1">'.//div[contains(@jscontroller, "SC7lYd")]'</span>
|
||||
<span class="n">title_xpath</span> <span class="o">=</span> <span class="s1">'.//a/h3[1]'</span>
|
||||
<span class="n">href_xpath</span> <span class="o">=</span> <span class="s1">'.//a[h3]/@href'</span>
|
||||
<span class="n">content_xpath</span> <span class="o">=</span> <span class="s1">'.//div[@data-sncf]'</span>
|
||||
|
||||
<span class="c1"># Suggestions are links placed in a *card-section*, we extract only the text</span>
|
||||
<span class="c1"># from the links not the links itself.</span>
|
||||
<span class="n">suggestion_xpath</span> <span class="o">=</span> <span class="s1">'//div[contains(@class, "EIaa9b")]//a'</span>
|
||||
|
||||
<span class="c1"># UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for</span>
|
||||
<span class="c1"># # celebrities like '!google natasha allegri'</span>
|
||||
<span class="c1"># # or '!google chris evans'</span>
|
||||
<span class="n">UI_ASYNC</span> <span class="o">=</span> <span class="s1">'use_ac:true,_fmt:prog'</span>
|
||||
<span class="sd">"""Format of the response from UI's async request."""</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_google_info"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google.get_google_info">[docs]</a><span class="k">def</span> <span class="nf">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">eng_traits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Composing various (language) properties for the google engines (:ref:`google</span>
|
||||
<span class="sd"> API`).</span>
|
||||
|
||||
<span class="sd"> This function is called by the various google engines (:ref:`google web</span>
|
||||
<span class="sd"> engine`, :ref:`google images engine`, :ref:`google news engine` and</span>
|
||||
<span class="sd"> :ref:`google videos engine`).</span>
|
||||
|
||||
<span class="sd"> :param dict param: Request parameters of the engine. At least</span>
|
||||
<span class="sd"> a ``searxng_locale`` key should be in the dictionary.</span>
|
||||
|
||||
<span class="sd"> :param eng_traits: Engine's traits fetched from google preferences</span>
|
||||
<span class="sd"> (:py:obj:`searx.enginelib.traits.EngineTraits`)</span>
|
||||
|
||||
<span class="sd"> :rtype: dict</span>
|
||||
<span class="sd"> :returns:</span>
|
||||
<span class="sd"> Py-Dictionary with the key/value pairs:</span>
|
||||
|
||||
<span class="sd"> language:</span>
|
||||
<span class="sd"> The language code that is used by google (e.g. ``lang_en`` or</span>
|
||||
<span class="sd"> ``lang_zh-TW``)</span>
|
||||
|
||||
<span class="sd"> country:</span>
|
||||
<span class="sd"> The country code that is used by google (e.g. ``US`` or ``TW``)</span>
|
||||
|
||||
<span class="sd"> locale:</span>
|
||||
<span class="sd"> A instance of :py:obj:`babel.core.Locale` build from the</span>
|
||||
<span class="sd"> ``searxng_locale`` value.</span>
|
||||
|
||||
<span class="sd"> subdomain:</span>
|
||||
<span class="sd"> Google subdomain :py:obj:`google_domains` that fits to the country</span>
|
||||
<span class="sd"> code.</span>
|
||||
|
||||
<span class="sd"> params:</span>
|
||||
<span class="sd"> Py-Dictionary with additional request arguments (can be passed to</span>
|
||||
<span class="sd"> :py:func:`urllib.parse.urlencode`).</span>
|
||||
|
||||
<span class="sd"> - ``hl`` parameter: specifies the interface language of user interface.</span>
|
||||
<span class="sd"> - ``lr`` parameter: restricts search results to documents written in</span>
|
||||
<span class="sd"> a particular language.</span>
|
||||
<span class="sd"> - ``cr`` parameter: restricts search results to documents</span>
|
||||
<span class="sd"> originating in a particular country.</span>
|
||||
<span class="sd"> - ``ie`` parameter: sets the character encoding scheme that should</span>
|
||||
<span class="sd"> be used to interpret the query string ('utf8').</span>
|
||||
<span class="sd"> - ``oe`` parameter: sets the character encoding scheme that should</span>
|
||||
<span class="sd"> be used to decode the XML result ('utf8').</span>
|
||||
|
||||
<span class="sd"> headers:</span>
|
||||
<span class="sd"> Py-Dictionary with additional HTTP headers (can be passed to</span>
|
||||
<span class="sd"> request's headers)</span>
|
||||
|
||||
<span class="sd"> - ``Accept: '*/*``</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">ret_val</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'language'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s1">'country'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s1">'subdomain'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s1">'params'</span><span class="p">:</span> <span class="p">{},</span>
|
||||
<span class="s1">'headers'</span><span class="p">:</span> <span class="p">{},</span>
|
||||
<span class="s1">'cookies'</span><span class="p">:</span> <span class="p">{},</span>
|
||||
<span class="s1">'locale'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">sxng_locale</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'searxng_locale'</span><span class="p">,</span> <span class="s1">'all'</span><span class="p">)</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="n">locale</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="n">eng_lang</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="s1">'lang_en'</span><span class="p">)</span>
|
||||
<span class="n">lang_code</span> <span class="o">=</span> <span class="n">eng_lang</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="c1"># lang_zh-TW --> zh-TW / lang_en --> en</span>
|
||||
<span class="n">country</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Test zh_hans & zh_hant --> in the topmost links in the result list of list</span>
|
||||
<span class="c1"># TW and HK you should a find wiktionary.org zh_hant link. In the result</span>
|
||||
<span class="c1"># list of zh-CN should not be no hant link instead you should find</span>
|
||||
<span class="c1"># zh.m.wikipedia.org/zh somewhere in the top.</span>
|
||||
|
||||
<span class="c1"># '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5</span>
|
||||
<span class="c1"># '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5</span>
|
||||
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span>
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'country'</span><span class="p">]</span> <span class="o">=</span> <span class="n">country</span>
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'locale'</span><span class="p">]</span> <span class="o">=</span> <span class="n">locale</span>
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'supported_domains'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">country</span><span class="o">.</span><span class="n">upper</span><span class="p">(),</span> <span class="s1">'www.google.com'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># hl parameter:</span>
|
||||
<span class="c1"># The hl parameter specifies the interface language (host language) of</span>
|
||||
<span class="c1"># your user interface. To improve the performance and the quality of your</span>
|
||||
<span class="c1"># search results, you are strongly encouraged to set this parameter</span>
|
||||
<span class="c1"># explicitly.</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results#hlsp</span>
|
||||
<span class="c1"># The Interface Language:</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages</span>
|
||||
|
||||
<span class="c1"># https://github.com/searxng/searxng/issues/2515#issuecomment-1607150817</span>
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">lang_code</span><span class="si">}</span><span class="s1">-</span><span class="si">{</span><span class="n">country</span><span class="si">}</span><span class="s1">'</span>
|
||||
|
||||
<span class="c1"># lr parameter:</span>
|
||||
<span class="c1"># The lr (language restrict) parameter restricts search results to</span>
|
||||
<span class="c1"># documents written in a particular language.</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results#lrsp</span>
|
||||
<span class="c1"># Language Collection Values:</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># To select 'all' languages an empty 'lr' value is used.</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># Different to other google services, Google Schloar supports to select more</span>
|
||||
<span class="c1"># than one language. The languages are seperated by a pipe '|' (logical OR).</span>
|
||||
<span class="c1"># By example: &lr=lang_zh-TW%7Clang_de selects articles written in</span>
|
||||
<span class="c1"># traditional chinese OR german language.</span>
|
||||
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'lr'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span>
|
||||
<span class="k">if</span> <span class="n">sxng_locale</span> <span class="o">==</span> <span class="s1">'all'</span><span class="p">:</span>
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'lr'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
|
||||
<span class="c1"># cr parameter:</span>
|
||||
<span class="c1"># The cr parameter restricts search results to documents originating in a</span>
|
||||
<span class="c1"># particular country.</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results#crsp</span>
|
||||
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'cr'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'country'</span> <span class="o">+</span> <span class="n">country</span>
|
||||
<span class="k">if</span> <span class="n">sxng_locale</span> <span class="o">==</span> <span class="s1">'all'</span><span class="p">:</span>
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'cr'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
|
||||
<span class="c1"># gl parameter: (mandatory by Geeogle News)</span>
|
||||
<span class="c1"># The gl parameter value is a two-letter country code. For WebSearch</span>
|
||||
<span class="c1"># results, the gl parameter boosts search results whose country of origin</span>
|
||||
<span class="c1"># matches the parameter value. See the Country Codes section for a list of</span>
|
||||
<span class="c1"># valid values.</span>
|
||||
<span class="c1"># Specifying a gl parameter value in WebSearch requests should improve the</span>
|
||||
<span class="c1"># relevance of results. This is particularly true for international</span>
|
||||
<span class="c1"># customers and, even more specifically, for customers in English-speaking</span>
|
||||
<span class="c1"># countries other than the United States.</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results#glsp</span>
|
||||
|
||||
<span class="c1"># https://github.com/searxng/searxng/issues/2515#issuecomment-1606294635</span>
|
||||
<span class="c1"># ret_val['params']['gl'] = country</span>
|
||||
|
||||
<span class="c1"># ie parameter:</span>
|
||||
<span class="c1"># The ie parameter sets the character encoding scheme that should be used</span>
|
||||
<span class="c1"># to interpret the query string. The default ie value is latin1.</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results#iesp</span>
|
||||
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'ie'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'utf8'</span>
|
||||
|
||||
<span class="c1"># oe parameter:</span>
|
||||
<span class="c1"># The oe parameter sets the character encoding scheme that should be used</span>
|
||||
<span class="c1"># to decode the XML result. The default oe value is latin1.</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results#oesp</span>
|
||||
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'oe'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'utf8'</span>
|
||||
|
||||
<span class="c1"># num parameter:</span>
|
||||
<span class="c1"># The num parameter identifies the number of search results to return.</span>
|
||||
<span class="c1"># The default num value is 10, and the maximum value is 20. If you request</span>
|
||||
<span class="c1"># more than 20 results, only 20 results will be returned.</span>
|
||||
<span class="c1"># https://developers.google.com/custom-search/docs/xml_results#numsp</span>
|
||||
|
||||
<span class="c1"># HINT: seems to have no effect (tested in google WEB & Images)</span>
|
||||
<span class="c1"># ret_val['params']['num'] = 20</span>
|
||||
|
||||
<span class="c1"># HTTP headers</span>
|
||||
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Accept'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'*/*'</span>
|
||||
|
||||
<span class="c1"># Cookies</span>
|
||||
|
||||
<span class="c1"># - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746</span>
|
||||
<span class="c1"># - https://github.com/searxng/searxng/issues/1555</span>
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'CONSENT'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"YES+"</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">ret_val</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="o">.</span><span class="n">host</span> <span class="o">==</span> <span class="s1">'sorry.google.com'</span> <span class="ow">or</span> <span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/sorry'</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">()</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Google search request"""</span>
|
||||
<span class="c1"># pylint: disable=line-too-long</span>
|
||||
<span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span>
|
||||
<span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium</span>
|
||||
<span class="n">query_url</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="s1">'https://'</span>
|
||||
<span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span>
|
||||
<span class="o">+</span> <span class="s1">'/search'</span>
|
||||
<span class="o">+</span> <span class="s2">"?"</span>
|
||||
<span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span>
|
||||
<span class="s1">'filter'</span><span class="p">:</span> <span class="s1">'0'</span><span class="p">,</span>
|
||||
<span class="s1">'start'</span><span class="p">:</span> <span class="n">offset</span><span class="p">,</span>
|
||||
<span class="c1"># 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i',</span>
|
||||
<span class="c1"># 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG',</span>
|
||||
<span class="c1"># 'cs' : 1,</span>
|
||||
<span class="c1"># 'sa': 'N',</span>
|
||||
<span class="c1"># 'yv': 3,</span>
|
||||
<span class="c1"># 'prmd': 'vin',</span>
|
||||
<span class="c1"># 'ei': 'GASaY6TxOcy_xc8PtYeY6AE',</span>
|
||||
<span class="c1"># 'sa': 'N',</span>
|
||||
<span class="c1"># 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg'</span>
|
||||
<span class="c1"># formally known as use_mobile_ui</span>
|
||||
<span class="s1">'asearch'</span><span class="p">:</span> <span class="s1">'arc'</span><span class="p">,</span>
|
||||
<span class="s1">'async'</span><span class="p">:</span> <span class="n">UI_ASYNC</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span>
|
||||
<span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'tbs'</span><span class="p">:</span> <span class="s1">'qdr:'</span> <span class="o">+</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]})</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]:</span>
|
||||
<span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'safe'</span><span class="p">:</span> <span class="n">filter_mapping</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]})</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query_url</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<span class="c1"># =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA</span>
|
||||
<span class="c1"># ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;</span>
|
||||
<span class="n">RE_DATA_IMAGE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);'</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_parse_data_images</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span>
|
||||
<span class="n">data_image_map</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">img_id</span><span class="p">,</span> <span class="n">data_image</span> <span class="ow">in</span> <span class="n">RE_DATA_IMAGE</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">dom</span><span class="o">.</span><span class="n">text_content</span><span class="p">()):</span>
|
||||
<span class="n">end_pos</span> <span class="o">=</span> <span class="n">data_image</span><span class="o">.</span><span class="n">rfind</span><span class="p">(</span><span class="s1">'='</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">end_pos</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">data_image</span> <span class="o">=</span> <span class="n">data_image</span><span class="p">[:</span> <span class="n">end_pos</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">data_image_map</span><span class="p">[</span><span class="n">img_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">data_image</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'data:image objects --> </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">list</span><span class="p">(</span><span class="n">data_image_map</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span>
|
||||
<span class="k">return</span> <span class="n">data_image_map</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get response from google's search request"""</span>
|
||||
<span class="c1"># pylint: disable=too-many-branches, too-many-statements</span>
|
||||
<span class="n">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="c1"># convert the text to dom</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
<span class="n">data_image_map</span> <span class="o">=</span> <span class="n">_parse_data_images</span><span class="p">(</span><span class="n">dom</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># results --> answer</span>
|
||||
<span class="n">answer_list</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "LGOjhe")]'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">answer_list</span><span class="p">:</span>
|
||||
<span class="n">answer_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">_</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s2">"normalize-space()"</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">answer_list</span><span class="p">]</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'answer'</span><span class="p">:</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">answer_list</span><span class="p">)})</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"did not find 'answer'"</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># parse results</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">results_xpath</span><span class="p">):</span> <span class="c1"># pylint: disable=too-many-nested-blocks</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">title_tag</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">title_xpath</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">title_tag</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="c1"># this not one of the common google results *section*</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'ignoring item from the result_xpath list: missing title'</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">title_tag</span><span class="p">)</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">href_xpath</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'ignoring item from the result_xpath list: missing url of title "</span><span class="si">%s</span><span class="s1">"'</span><span class="p">,</span> <span class="n">title</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">content_nodes</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">)</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content_nodes</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">content</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'ignoring item from the result_xpath list: missing content of title "</span><span class="si">%s</span><span class="s1">"'</span><span class="p">,</span> <span class="n">title</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="n">content_nodes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//img/@src'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">img_src</span><span class="p">:</span>
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="n">img_src</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">img_src</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'data:image'</span><span class="p">):</span>
|
||||
<span class="n">img_id</span> <span class="o">=</span> <span class="n">content_nodes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'.//img/@id'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">img_id</span><span class="p">:</span>
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="n">data_image_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">img_id</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> <span class="s1">'img_src'</span><span class="p">:</span> <span class="n">img_src</span><span class="p">})</span>
|
||||
|
||||
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="n">exc_info</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="c1"># parse suggestion</span>
|
||||
<span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">suggestion_xpath</span><span class="p">):</span>
|
||||
<span class="c1"># append suggestion</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span>
|
||||
|
||||
<span class="c1"># return results</span>
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
|
||||
|
||||
<span class="c1"># get supported languages from their site</span>
|
||||
|
||||
|
||||
<span class="n">skip_countries</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="c1"># official language of google-country not in google-languages</span>
|
||||
<span class="s1">'AL'</span><span class="p">,</span> <span class="c1"># Albanien (sq)</span>
|
||||
<span class="s1">'AZ'</span><span class="p">,</span> <span class="c1"># Aserbaidschan (az)</span>
|
||||
<span class="s1">'BD'</span><span class="p">,</span> <span class="c1"># Bangladesch (bn)</span>
|
||||
<span class="s1">'BN'</span><span class="p">,</span> <span class="c1"># Brunei Darussalam (ms)</span>
|
||||
<span class="s1">'BT'</span><span class="p">,</span> <span class="c1"># Bhutan (dz)</span>
|
||||
<span class="s1">'ET'</span><span class="p">,</span> <span class="c1"># Äthiopien (am)</span>
|
||||
<span class="s1">'GE'</span><span class="p">,</span> <span class="c1"># Georgien (ka, os)</span>
|
||||
<span class="s1">'GL'</span><span class="p">,</span> <span class="c1"># Grönland (kl)</span>
|
||||
<span class="s1">'KH'</span><span class="p">,</span> <span class="c1"># Kambodscha (km)</span>
|
||||
<span class="s1">'LA'</span><span class="p">,</span> <span class="c1"># Laos (lo)</span>
|
||||
<span class="s1">'LK'</span><span class="p">,</span> <span class="c1"># Sri Lanka (si, ta)</span>
|
||||
<span class="s1">'ME'</span><span class="p">,</span> <span class="c1"># Montenegro (sr)</span>
|
||||
<span class="s1">'MK'</span><span class="p">,</span> <span class="c1"># Nordmazedonien (mk, sq)</span>
|
||||
<span class="s1">'MM'</span><span class="p">,</span> <span class="c1"># Myanmar (my)</span>
|
||||
<span class="s1">'MN'</span><span class="p">,</span> <span class="c1"># Mongolei (mn)</span>
|
||||
<span class="s1">'MV'</span><span class="p">,</span> <span class="c1"># Malediven (dv) // dv_MV is unknown by babel</span>
|
||||
<span class="s1">'MY'</span><span class="p">,</span> <span class="c1"># Malaysia (ms)</span>
|
||||
<span class="s1">'NP'</span><span class="p">,</span> <span class="c1"># Nepal (ne)</span>
|
||||
<span class="s1">'TJ'</span><span class="p">,</span> <span class="c1"># Tadschikistan (tg)</span>
|
||||
<span class="s1">'TM'</span><span class="p">,</span> <span class="c1"># Turkmenistan (tk)</span>
|
||||
<span class="s1">'UZ'</span><span class="p">,</span> <span class="c1"># Usbekistan (uz)</span>
|
||||
<span class="p">]</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">,</span> <span class="n">add_domains</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages from Google."""</span>
|
||||
<span class="c1"># pylint: disable=import-outside-toplevel, too-many-branches</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'supported_domains'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://www.google.com/preferences'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from Google's preferences is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="c1"># supported language codes</span>
|
||||
|
||||
<span class="n">lang_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'no'</span><span class="p">:</span> <span class="s1">'nb'</span><span class="p">}</span>
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//*[@id="langSec"]//input[@name="lr"]'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">eng_lang</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_lang</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">),</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: </span><span class="si">%s</span><span class="s2"> -> </span><span class="si">%s</span><span class="s2"> is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"data-name"</span><span class="p">),</span> <span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_lang</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'lang_'</span> <span class="o">+</span> <span class="n">eng_lang</span>
|
||||
|
||||
<span class="c1"># alias languages</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'zh'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'lang_zh-CN'</span>
|
||||
|
||||
<span class="c1"># supported region codes</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//*[@name="region"]/..//input[@name="region"]'</span><span class="p">):</span>
|
||||
<span class="n">eng_country</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">eng_country</span> <span class="ow">in</span> <span class="n">skip_countries</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="k">if</span> <span class="n">eng_country</span> <span class="o">==</span> <span class="s1">'ZZ'</span><span class="p">:</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'ZZ'</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">sxng_locales</span> <span class="o">=</span> <span class="n">get_offical_locales</span><span class="p">(</span><span class="n">eng_country</span><span class="p">,</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">keys</span><span class="p">(),</span> <span class="n">regional</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">sxng_locales</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't map from google country </span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">) to a babel region."</span> <span class="o">%</span> <span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'data-name'</span><span class="p">),</span> <span class="n">eng_country</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">sxng_locale</span> <span class="ow">in</span> <span class="n">sxng_locales</span><span class="p">:</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">region_tag</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">)]</span> <span class="o">=</span> <span class="n">eng_country</span>
|
||||
|
||||
<span class="c1"># alias regions</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="s1">'zh-CN'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'HK'</span>
|
||||
|
||||
<span class="c1"># supported domains</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">add_domains</span><span class="p">:</span>
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://www.google.com/supported_domains'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from https://www.google.com/supported_domains is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">domain</span> <span class="ow">in</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">():</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">domain</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">domain</span> <span class="ow">or</span> <span class="n">domain</span> <span class="ow">in</span> <span class="p">[</span>
|
||||
<span class="s1">'.google.com'</span><span class="p">,</span>
|
||||
<span class="p">]:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">region</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'supported_domains'</span><span class="p">][</span><span class="n">region</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'www'</span> <span class="o">+</span> <span class="n">domain</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">if</span> <span class="n">region</span> <span class="o">==</span> <span class="s1">'HK'</span><span class="p">:</span>
|
||||
<span class="c1"># There is no google.cn, we use .com.hk for zh-CN</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'supported_domains'</span><span class="p">][</span><span class="s1">'CN'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'www'</span> <span class="o">+</span> <span class="n">domain</span> <span class="c1"># type: ignore</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
241
_modules/searx/engines/google_images.html
Normal file
241
_modules/searx/engines/google_images.html
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.google_images — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.google_images</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.google_images</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This is the implementation of the Google Images engine using the internal</span>
|
||||
<span class="sd">Google API used by the Google Go Android app.</span>
|
||||
|
||||
<span class="sd">This internal API offer results in</span>
|
||||
|
||||
<span class="sd">- JSON (``_fmt:json``)</span>
|
||||
<span class="sd">- Protobuf_ (``_fmt:pb``)</span>
|
||||
<span class="sd">- Protobuf_ compressed? (``_fmt:pc``)</span>
|
||||
<span class="sd">- HTML (``_fmt:html``)</span>
|
||||
<span class="sd">- Protobuf_ encoded in JSON (``_fmt:jspb``).</span>
|
||||
|
||||
<span class="sd">.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">get_google_info</span><span class="p">,</span>
|
||||
<span class="n">time_range_dict</span><span class="p">,</span>
|
||||
<span class="n">detect_google_sorry</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://images.google.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q521550'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'images'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">filter_mapping</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'images'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'active'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'active'</span><span class="p">}</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_images.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Google-Image search request"""</span>
|
||||
|
||||
<span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span>
|
||||
|
||||
<span class="n">query_url</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="s1">'https://'</span>
|
||||
<span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span>
|
||||
<span class="o">+</span> <span class="s1">'/search'</span>
|
||||
<span class="o">+</span> <span class="s2">"?"</span>
|
||||
<span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'tbm'</span><span class="p">:</span> <span class="s2">"isch"</span><span class="p">,</span>
|
||||
<span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span>
|
||||
<span class="s1">'asearch'</span><span class="p">:</span> <span class="s1">'isch'</span><span class="p">,</span>
|
||||
<span class="s1">'async'</span><span class="p">:</span> <span class="s1">'_fmt:json,p:1,ijn:'</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]),</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span>
|
||||
<span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'tbs'</span><span class="p">:</span> <span class="s1">'qdr:'</span> <span class="o">+</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]})</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]:</span>
|
||||
<span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'safe'</span><span class="p">:</span> <span class="n">filter_mapping</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]})</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query_url</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_images.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get response from google's search request"""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
<span class="n">json_start</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'{"ischj":'</span><span class="p">)</span>
|
||||
<span class="n">json_data</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">[</span><span class="n">json_start</span><span class="p">:])</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">[</span><span class="s2">"ischj"</span><span class="p">][</span><span class="s2">"metadata"</span><span class="p">]:</span>
|
||||
|
||||
<span class="n">result_item</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">][</span><span class="s2">"referrer_url"</span><span class="p">],</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">][</span><span class="s2">"page_title"</span><span class="p">],</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"text_in_grid"</span><span class="p">][</span><span class="s2">"snippet"</span><span class="p">],</span>
|
||||
<span class="s1">'source'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">][</span><span class="s2">"site_title"</span><span class="p">],</span>
|
||||
<span class="s1">'img_format'</span><span class="p">:</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">item</span><span class="p">[</span><span class="s2">"original_image"</span><span class="p">][</span><span class="s2">"width"</span><span class="p">]</span><span class="si">}</span><span class="s1"> x </span><span class="si">{</span><span class="n">item</span><span class="p">[</span><span class="s2">"original_image"</span><span class="p">][</span><span class="s2">"height"</span><span class="p">]</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"original_image"</span><span class="p">][</span><span class="s2">"url"</span><span class="p">],</span>
|
||||
<span class="s1">'thumbnail_src'</span><span class="p">:</span> <span class="n">item</span><span class="p">[</span><span class="s2">"thumbnail"</span><span class="p">][</span><span class="s2">"url"</span><span class="p">],</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">author</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'iptc'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'creator'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">author</span><span class="p">:</span>
|
||||
<span class="n">result_item</span><span class="p">[</span><span class="s1">'author'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">author</span><span class="p">)</span>
|
||||
|
||||
<span class="n">copyright_notice</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'iptc'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'copyright_notice'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">copyright_notice</span><span class="p">:</span>
|
||||
<span class="n">result_item</span><span class="p">[</span><span class="s1">'source'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">' | '</span> <span class="o">+</span> <span class="n">copyright_notice</span>
|
||||
|
||||
<span class="n">freshness_date</span> <span class="o">=</span> <span class="n">item</span><span class="p">[</span><span class="s2">"result"</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"freshness_date"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">freshness_date</span><span class="p">:</span>
|
||||
<span class="n">result_item</span><span class="p">[</span><span class="s1">'source'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">' | '</span> <span class="o">+</span> <span class="n">freshness_date</span>
|
||||
|
||||
<span class="n">file_size</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'gsa'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'file_size'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">file_size</span><span class="p">:</span>
|
||||
<span class="n">result_item</span><span class="p">[</span><span class="s1">'source'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">' (</span><span class="si">%s</span><span class="s1">)'</span> <span class="o">%</span> <span class="n">file_size</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">result_item</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
417
_modules/searx/engines/google_news.html
Normal file
417
_modules/searx/engines/google_news.html
Normal file
|
|
@ -0,0 +1,417 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.google_news — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.google_news</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.google_news</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This is the implementation of the Google News engine.</span>
|
||||
|
||||
<span class="sd">Google News has a different region handling compared to Google WEB.</span>
|
||||
|
||||
<span class="sd">- the ``ceid`` argument has to be set (:py:obj:`ceid_list`)</span>
|
||||
<span class="sd">- the hl_ argument has to be set correctly (and different to Google WEB)</span>
|
||||
<span class="sd">- the gl_ argument is mandatory</span>
|
||||
|
||||
<span class="sd">If one of this argument is not set correctly, the request is redirected to</span>
|
||||
<span class="sd">CONSENT dialog::</span>
|
||||
|
||||
<span class="sd"> https://consent.google.com/m?continue=</span>
|
||||
|
||||
<span class="sd">The google news API ignores some parameters from the common :ref:`google API`:</span>
|
||||
|
||||
<span class="sd">- num_ : the number of search results is ignored / there is no paging all</span>
|
||||
<span class="sd"> results for a query term are in the first response.</span>
|
||||
<span class="sd">- save_ : is ignored / Google-News results are always *SafeSearch*</span>
|
||||
|
||||
<span class="sd">.. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp</span>
|
||||
<span class="sd">.. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp</span>
|
||||
<span class="sd">.. _num: https://developers.google.com/custom-search/docs/xml_results#numsp</span>
|
||||
<span class="sd">.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">import</span> <span class="nn">base64</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">locales</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">eval_xpath</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_list</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_getindex</span><span class="p">,</span>
|
||||
<span class="n">extract_text</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="k">as</span> <span class="n">_fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">get_google_info</span><span class="p">,</span>
|
||||
<span class="n">detect_google_sorry</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://news.google.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q12020'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'news'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="c1"># Google-News results are always *SafeSearch*. Option 'safesearch' is set to</span>
|
||||
<span class="c1"># False here, otherwise checker will report safesearch-errors::</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># safesearch : results are identitical for safesearch=0 and safesearch=2</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="c1"># send_accept_language_header = True</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_news.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Google-News search request"""</span>
|
||||
|
||||
<span class="n">sxng_locale</span> <span class="o">=</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'searxng_locale'</span><span class="p">,</span> <span class="s1">'en-US'</span><span class="p">)</span>
|
||||
<span class="n">ceid</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">get_engine_locale</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ceid'</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="s1">'US:en'</span><span class="p">)</span>
|
||||
<span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span>
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'news.google.com'</span> <span class="c1"># google news has only one domain</span>
|
||||
|
||||
<span class="n">ceid_region</span><span class="p">,</span> <span class="n">ceid_lang</span> <span class="o">=</span> <span class="n">ceid</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">':'</span><span class="p">)</span>
|
||||
<span class="n">ceid_lang</span><span class="p">,</span> <span class="n">ceid_suffix</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="n">ceid_lang</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="o">+</span> <span class="p">[</span>
|
||||
<span class="kc">None</span><span class="p">,</span>
|
||||
<span class="p">]</span>
|
||||
<span class="p">)[:</span><span class="mi">2</span><span class="p">]</span>
|
||||
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">ceid_suffix</span> <span class="ow">and</span> <span class="n">ceid_suffix</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'Hans'</span><span class="p">,</span> <span class="s1">'Hant'</span><span class="p">]:</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">ceid_region</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="n">ceid_lang</span><span class="p">:</span>
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">ceid_region</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">ceid_suffix</span>
|
||||
|
||||
<span class="k">elif</span> <span class="n">ceid_region</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">!=</span> <span class="n">ceid_lang</span><span class="p">:</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">ceid_region</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'AT'</span><span class="p">,</span> <span class="s1">'BE'</span><span class="p">,</span> <span class="s1">'CH'</span><span class="p">,</span> <span class="s1">'IL'</span><span class="p">,</span> <span class="s1">'SA'</span><span class="p">,</span> <span class="s1">'IN'</span><span class="p">,</span> <span class="s1">'BD'</span><span class="p">,</span> <span class="s1">'PT'</span><span class="p">]:</span>
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'hl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">ceid_region</span>
|
||||
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'lr'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'lang_'</span> <span class="o">+</span> <span class="n">ceid_lang</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">][</span><span class="s1">'gl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ceid_region</span>
|
||||
|
||||
<span class="n">query_url</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="s1">'https://'</span>
|
||||
<span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span>
|
||||
<span class="o">+</span> <span class="s2">"/search?"</span>
|
||||
<span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="c1"># ceid includes a ':' character which must not be urlencoded</span>
|
||||
<span class="o">+</span> <span class="p">(</span><span class="s1">'&ceid=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">ceid</span><span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query_url</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_news.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get response from google's search request"""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># convert the text to dom</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@class="xrnccd"]'</span><span class="p">):</span>
|
||||
|
||||
<span class="c1"># The first <a> tag in the <article> contains the link to the article</span>
|
||||
<span class="c1"># The href attribute of the <a> tag is a google internal link, we have</span>
|
||||
<span class="c1"># to decode</span>
|
||||
|
||||
<span class="n">href</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'./article/a/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">href</span> <span class="o">=</span> <span class="n">href</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'?'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">href</span> <span class="o">=</span> <span class="n">href</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">href</span> <span class="o">=</span> <span class="n">base64</span><span class="o">.</span><span class="n">urlsafe_b64decode</span><span class="p">(</span><span class="n">href</span> <span class="o">+</span> <span class="s1">'===='</span><span class="p">)</span>
|
||||
<span class="n">href</span> <span class="o">=</span> <span class="n">href</span><span class="p">[</span><span class="n">href</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="sa">b</span><span class="s1">'http'</span><span class="p">)</span> <span class="p">:]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="sa">b</span><span class="s1">'</span><span class="se">\xd2</span><span class="s1">'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">href</span> <span class="o">=</span> <span class="n">href</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span>
|
||||
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'./article/h3[1]'</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># The pub_date is mostly a string like 'yesertday', not a real</span>
|
||||
<span class="c1"># timezone date or time. Therefore we can't use publishedDate.</span>
|
||||
<span class="n">pub_date</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'./article//time'</span><span class="p">))</span>
|
||||
<span class="n">pub_origin</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'./article//a[@data-n-tid]'</span><span class="p">))</span>
|
||||
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="s1">' / '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span><span class="n">pub_origin</span><span class="p">,</span> <span class="n">pub_date</span><span class="p">]</span> <span class="k">if</span> <span class="n">x</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># The image URL is located in a preceding sibling <img> tag, e.g.:</span>
|
||||
<span class="c1"># "https://lh3.googleusercontent.com/DjhQh7DMszk.....z=-p-h100-w100"</span>
|
||||
<span class="c1"># These URL are long but not personalized (double checked via tor).</span>
|
||||
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'preceding-sibling::a/figure/img/@src'</span><span class="p">))</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">href</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">img_src</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="c1"># return results</span>
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
|
||||
|
||||
<span class="n">ceid_list</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="s1">'AE:ar'</span><span class="p">,</span>
|
||||
<span class="s1">'AR:es-419'</span><span class="p">,</span>
|
||||
<span class="s1">'AT:de'</span><span class="p">,</span>
|
||||
<span class="s1">'AU:en'</span><span class="p">,</span>
|
||||
<span class="s1">'BD:bn'</span><span class="p">,</span>
|
||||
<span class="s1">'BE:fr'</span><span class="p">,</span>
|
||||
<span class="s1">'BE:nl'</span><span class="p">,</span>
|
||||
<span class="s1">'BG:bg'</span><span class="p">,</span>
|
||||
<span class="s1">'BR:pt-419'</span><span class="p">,</span>
|
||||
<span class="s1">'BW:en'</span><span class="p">,</span>
|
||||
<span class="s1">'CA:en'</span><span class="p">,</span>
|
||||
<span class="s1">'CA:fr'</span><span class="p">,</span>
|
||||
<span class="s1">'CH:de'</span><span class="p">,</span>
|
||||
<span class="s1">'CH:fr'</span><span class="p">,</span>
|
||||
<span class="s1">'CL:es-419'</span><span class="p">,</span>
|
||||
<span class="s1">'CN:zh-Hans'</span><span class="p">,</span>
|
||||
<span class="s1">'CO:es-419'</span><span class="p">,</span>
|
||||
<span class="s1">'CU:es-419'</span><span class="p">,</span>
|
||||
<span class="s1">'CZ:cs'</span><span class="p">,</span>
|
||||
<span class="s1">'DE:de'</span><span class="p">,</span>
|
||||
<span class="s1">'EG:ar'</span><span class="p">,</span>
|
||||
<span class="s1">'ES:es'</span><span class="p">,</span>
|
||||
<span class="s1">'ET:en'</span><span class="p">,</span>
|
||||
<span class="s1">'FR:fr'</span><span class="p">,</span>
|
||||
<span class="s1">'GB:en'</span><span class="p">,</span>
|
||||
<span class="s1">'GH:en'</span><span class="p">,</span>
|
||||
<span class="s1">'GR:el'</span><span class="p">,</span>
|
||||
<span class="s1">'HK:zh-Hant'</span><span class="p">,</span>
|
||||
<span class="s1">'HU:hu'</span><span class="p">,</span>
|
||||
<span class="s1">'ID:en'</span><span class="p">,</span>
|
||||
<span class="s1">'ID:id'</span><span class="p">,</span>
|
||||
<span class="s1">'IE:en'</span><span class="p">,</span>
|
||||
<span class="s1">'IL:en'</span><span class="p">,</span>
|
||||
<span class="s1">'IL:he'</span><span class="p">,</span>
|
||||
<span class="s1">'IN:bn'</span><span class="p">,</span>
|
||||
<span class="s1">'IN:en'</span><span class="p">,</span>
|
||||
<span class="s1">'IN:hi'</span><span class="p">,</span>
|
||||
<span class="s1">'IN:ml'</span><span class="p">,</span>
|
||||
<span class="s1">'IN:mr'</span><span class="p">,</span>
|
||||
<span class="s1">'IN:ta'</span><span class="p">,</span>
|
||||
<span class="s1">'IN:te'</span><span class="p">,</span>
|
||||
<span class="s1">'IT:it'</span><span class="p">,</span>
|
||||
<span class="s1">'JP:ja'</span><span class="p">,</span>
|
||||
<span class="s1">'KE:en'</span><span class="p">,</span>
|
||||
<span class="s1">'KR:ko'</span><span class="p">,</span>
|
||||
<span class="s1">'LB:ar'</span><span class="p">,</span>
|
||||
<span class="s1">'LT:lt'</span><span class="p">,</span>
|
||||
<span class="s1">'LV:en'</span><span class="p">,</span>
|
||||
<span class="s1">'LV:lv'</span><span class="p">,</span>
|
||||
<span class="s1">'MA:fr'</span><span class="p">,</span>
|
||||
<span class="s1">'MX:es-419'</span><span class="p">,</span>
|
||||
<span class="s1">'MY:en'</span><span class="p">,</span>
|
||||
<span class="s1">'NA:en'</span><span class="p">,</span>
|
||||
<span class="s1">'NG:en'</span><span class="p">,</span>
|
||||
<span class="s1">'NL:nl'</span><span class="p">,</span>
|
||||
<span class="s1">'NO:no'</span><span class="p">,</span>
|
||||
<span class="s1">'NZ:en'</span><span class="p">,</span>
|
||||
<span class="s1">'PE:es-419'</span><span class="p">,</span>
|
||||
<span class="s1">'PH:en'</span><span class="p">,</span>
|
||||
<span class="s1">'PK:en'</span><span class="p">,</span>
|
||||
<span class="s1">'PL:pl'</span><span class="p">,</span>
|
||||
<span class="s1">'PT:pt-150'</span><span class="p">,</span>
|
||||
<span class="s1">'RO:ro'</span><span class="p">,</span>
|
||||
<span class="s1">'RS:sr'</span><span class="p">,</span>
|
||||
<span class="s1">'RU:ru'</span><span class="p">,</span>
|
||||
<span class="s1">'SA:ar'</span><span class="p">,</span>
|
||||
<span class="s1">'SE:sv'</span><span class="p">,</span>
|
||||
<span class="s1">'SG:en'</span><span class="p">,</span>
|
||||
<span class="s1">'SI:sl'</span><span class="p">,</span>
|
||||
<span class="s1">'SK:sk'</span><span class="p">,</span>
|
||||
<span class="s1">'SN:fr'</span><span class="p">,</span>
|
||||
<span class="s1">'TH:th'</span><span class="p">,</span>
|
||||
<span class="s1">'TR:tr'</span><span class="p">,</span>
|
||||
<span class="s1">'TW:zh-Hant'</span><span class="p">,</span>
|
||||
<span class="s1">'TZ:en'</span><span class="p">,</span>
|
||||
<span class="s1">'UA:ru'</span><span class="p">,</span>
|
||||
<span class="s1">'UA:uk'</span><span class="p">,</span>
|
||||
<span class="s1">'UG:en'</span><span class="p">,</span>
|
||||
<span class="s1">'US:en'</span><span class="p">,</span>
|
||||
<span class="s1">'US:es-419'</span><span class="p">,</span>
|
||||
<span class="s1">'VE:es-419'</span><span class="p">,</span>
|
||||
<span class="s1">'VN:vi'</span><span class="p">,</span>
|
||||
<span class="s1">'ZA:en'</span><span class="p">,</span>
|
||||
<span class="s1">'ZW:en'</span><span class="p">,</span>
|
||||
<span class="p">]</span>
|
||||
<span class="sd">"""List of region/language combinations supported by Google News. Values of the</span>
|
||||
<span class="sd">``ceid`` argument of the Google News REST API."""</span>
|
||||
|
||||
|
||||
<span class="n">_skip_values</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="s1">'ET:en'</span><span class="p">,</span> <span class="c1"># english (ethiopia)</span>
|
||||
<span class="s1">'ID:en'</span><span class="p">,</span> <span class="c1"># english (indonesia)</span>
|
||||
<span class="s1">'LV:en'</span><span class="p">,</span> <span class="c1"># english (latvia)</span>
|
||||
<span class="p">]</span>
|
||||
|
||||
<span class="n">_ceid_locale_map</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'NO:no'</span><span class="p">:</span> <span class="s1">'nb-NO'</span><span class="p">}</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="n">_fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">,</span> <span class="n">add_domains</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ceid'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">ceid</span> <span class="ow">in</span> <span class="n">ceid_list</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">ceid</span> <span class="ow">in</span> <span class="n">_skip_values</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">region</span><span class="p">,</span> <span class="n">lang</span> <span class="o">=</span> <span class="n">ceid</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">':'</span><span class="p">)</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">lang</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'Hant'</span><span class="p">,</span> <span class="s1">'Hans'</span><span class="p">]:</span>
|
||||
<span class="n">lang</span> <span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<span class="n">sxng_locale</span> <span class="o">=</span> <span class="n">_ceid_locale_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ceid</span><span class="p">,</span> <span class="n">lang</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">region</span><span class="p">)</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: </span><span class="si">%s</span><span class="s2"> -> </span><span class="si">%s</span><span class="s2"> is unknown by babel"</span> <span class="o">%</span> <span class="p">(</span><span class="n">ceid</span><span class="p">,</span> <span class="n">sxng_locale</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'ceid'</span><span class="p">][</span><span class="n">locales</span><span class="o">.</span><span class="n">region_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)]</span> <span class="o">=</span> <span class="n">ceid</span>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
329
_modules/searx/engines/google_scholar.html
Normal file
329
_modules/searx/engines/google_scholar.html
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.google_scholar — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.google_scholar</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.google_scholar</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This is the implementation of the Google Scholar engine.</span>
|
||||
|
||||
<span class="sd">Compared to other Google services the Scholar engine has a simple GET REST-API</span>
|
||||
<span class="sd">and there does not exists `async` API. Even though the API slightly vintage we</span>
|
||||
<span class="sd">can make use of the :ref:`google API` to assemble the arguments of the GET</span>
|
||||
<span class="sd">request.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Optional</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">eval_xpath</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_getindex</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_list</span><span class="p">,</span>
|
||||
<span class="n">extract_text</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineCaptchaException</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">get_google_info</span><span class="p">,</span>
|
||||
<span class="n">time_range_dict</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://scholar.google.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q494817'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'science'</span><span class="p">,</span> <span class="s1">'scientific publications'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">language_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="time_range_args"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.time_range_args">[docs]</a><span class="k">def</span> <span class="nf">time_range_args</span><span class="p">(</span><span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Returns a dictionary with a time range arguments based on</span>
|
||||
<span class="sd"> ``params['time_range']``.</span>
|
||||
|
||||
<span class="sd"> Google Scholar supports a detailed search by year. Searching by *last</span>
|
||||
<span class="sd"> month* or *last week* (as offered by SearXNG) is uncommon for scientific</span>
|
||||
<span class="sd"> publications and is not supported by Google Scholar.</span>
|
||||
|
||||
<span class="sd"> To limit the result list when the users selects a range, all the SearXNG</span>
|
||||
<span class="sd"> ranges (*day*, *week*, *month*, *year*) are mapped to *year*. If no range</span>
|
||||
<span class="sd"> is set an empty dictionary of arguments is returned. Example; when</span>
|
||||
<span class="sd"> user selects a time range (current year minus one in 2022):</span>
|
||||
|
||||
<span class="sd"> .. code:: python</span>
|
||||
|
||||
<span class="sd"> { 'as_ylo' : 2021 }</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">ret_val</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span>
|
||||
<span class="n">ret_val</span><span class="p">[</span><span class="s1">'as_ylo'</span><span class="p">]</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">year</span> <span class="o">-</span> <span class="mi">1</span>
|
||||
<span class="k">return</span> <span class="n">ret_val</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="detect_google_captcha"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.detect_google_captcha">[docs]</a><span class="k">def</span> <span class="nf">detect_google_captcha</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is</span>
|
||||
<span class="sd"> not redirected to ``sorry.google.com``.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//form[@id='gs_captcha_f']"</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Google-Scholar search request"""</span>
|
||||
|
||||
<span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span>
|
||||
<span class="c1"># subdomain is: scholar.google.xy</span>
|
||||
<span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"www."</span><span class="p">,</span> <span class="s2">"scholar."</span><span class="p">)</span>
|
||||
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span>
|
||||
<span class="s1">'start'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span><span class="p">,</span>
|
||||
<span class="s1">'as_sdt'</span><span class="p">:</span> <span class="s1">'2007'</span><span class="p">,</span> <span class="c1"># include patents / to disable set '0,5'</span>
|
||||
<span class="s1">'as_vis'</span><span class="p">:</span> <span class="s1">'0'</span><span class="p">,</span> <span class="c1"># include citations / to disable set '1'</span>
|
||||
<span class="p">}</span>
|
||||
<span class="n">args</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">time_range_args</span><span class="p">(</span><span class="n">params</span><span class="p">))</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'https://'</span> <span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span> <span class="o">+</span> <span class="s1">'/scholar?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="parse_gs_a"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.parse_gs_a">[docs]</a><span class="k">def</span> <span class="nf">parse_gs_a</span><span class="p">(</span><span class="n">text</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]):</span>
|
||||
<span class="w"> </span><span class="sd">"""Parse the text written in green.</span>
|
||||
|
||||
<span class="sd"> Possible formats:</span>
|
||||
<span class="sd"> * "{authors} - {journal}, {year} - {publisher}"</span>
|
||||
<span class="sd"> * "{authors} - {year} - {publisher}"</span>
|
||||
<span class="sd"> * "{authors} - {publisher}"</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">text</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">text</span> <span class="o">==</span> <span class="s2">""</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
||||
|
||||
<span class="n">s_text</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">' - '</span><span class="p">)</span>
|
||||
<span class="n">authors</span> <span class="o">=</span> <span class="n">s_text</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">', '</span><span class="p">)</span>
|
||||
<span class="n">publisher</span> <span class="o">=</span> <span class="n">s_text</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">s_text</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">3</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">authors</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">publisher</span><span class="p">,</span> <span class="kc">None</span>
|
||||
|
||||
<span class="c1"># the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"</span>
|
||||
<span class="c1"># get journal and year</span>
|
||||
<span class="n">journal_year</span> <span class="o">=</span> <span class="n">s_text</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">', '</span><span class="p">)</span>
|
||||
<span class="c1"># journal is optional and may contains some coma</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">journal_year</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">journal</span> <span class="o">=</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">journal_year</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="n">journal</span> <span class="o">==</span> <span class="s1">'…'</span><span class="p">:</span>
|
||||
<span class="n">journal</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">journal</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="c1"># year</span>
|
||||
<span class="n">year</span> <span class="o">=</span> <span class="n">journal_year</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">publishedDate</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">year</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span> <span class="s1">'%Y'</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||||
<span class="n">publishedDate</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">return</span> <span class="n">authors</span><span class="p">,</span> <span class="n">journal</span><span class="p">,</span> <span class="n">publisher</span><span class="p">,</span> <span class="n">publishedDate</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_scholar.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> <span class="c1"># pylint: disable=too-many-locals</span>
|
||||
<span class="w"> </span><span class="sd">"""Parse response from Google Scholar"""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="c1"># convert the text to dom</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
<span class="n">detect_google_captcha</span><span class="p">(</span><span class="n">dom</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># parse results</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@data-rp]'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h3[1]//a'</span><span class="p">))</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">title</span><span class="p">:</span>
|
||||
<span class="c1"># this is a [ZITATION] block</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">pub_type</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//span[@class="gs_ctg2"]'</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">pub_type</span><span class="p">:</span>
|
||||
<span class="n">pub_type</span> <span class="o">=</span> <span class="n">pub_type</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h3[1]//a/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="gs_rs"]'</span><span class="p">))</span>
|
||||
<span class="n">authors</span><span class="p">,</span> <span class="n">journal</span><span class="p">,</span> <span class="n">publisher</span><span class="p">,</span> <span class="n">publishedDate</span> <span class="o">=</span> <span class="n">parse_gs_a</span><span class="p">(</span>
|
||||
<span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="gs_a"]'</span><span class="p">))</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">publisher</span> <span class="ow">in</span> <span class="n">url</span><span class="p">:</span>
|
||||
<span class="n">publisher</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="c1"># cited by</span>
|
||||
<span class="n">comments</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># link to the html or pdf document</span>
|
||||
<span class="n">html_url</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">pdf_url</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">doc_url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="gs_or_ggsm"]/a/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="n">doc_type</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//span[@class="gs_ctg2"]'</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">doc_type</span> <span class="o">==</span> <span class="s2">"[PDF]"</span><span class="p">:</span>
|
||||
<span class="n">pdf_url</span> <span class="o">=</span> <span class="n">doc_url</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">html_url</span> <span class="o">=</span> <span class="n">doc_url</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'paper.html'</span><span class="p">,</span>
|
||||
<span class="s1">'type'</span><span class="p">:</span> <span class="n">pub_type</span><span class="p">,</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'authors'</span><span class="p">:</span> <span class="n">authors</span><span class="p">,</span>
|
||||
<span class="s1">'publisher'</span><span class="p">:</span> <span class="n">publisher</span><span class="p">,</span>
|
||||
<span class="s1">'journal'</span><span class="p">:</span> <span class="n">journal</span><span class="p">,</span>
|
||||
<span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">publishedDate</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="s1">'comments'</span><span class="p">:</span> <span class="n">comments</span><span class="p">,</span>
|
||||
<span class="s1">'html_url'</span><span class="p">:</span> <span class="n">html_url</span><span class="p">,</span>
|
||||
<span class="s1">'pdf_url'</span><span class="p">:</span> <span class="n">pdf_url</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="c1"># parse suggestion</span>
|
||||
<span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "gs_qsuggest_wrap")]//li//a'</span><span class="p">):</span>
|
||||
<span class="c1"># append suggestion</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">correction</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[@class="gs_r gs_pda"]/a'</span><span class="p">):</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'correction'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">correction</span><span class="p">)})</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
251
_modules/searx/engines/google_videos.html
Normal file
251
_modules/searx/engines/google_videos.html
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.google_videos — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.google_videos</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.google_videos</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This is the implementation of the Google Videos engine.</span>
|
||||
|
||||
<span class="sd">.. admonition:: Content-Security-Policy (CSP)</span>
|
||||
|
||||
<span class="sd"> This engine needs to allow images from the `data URLs`_ (prefixed with the</span>
|
||||
<span class="sd"> ``data:`` scheme)::</span>
|
||||
|
||||
<span class="sd"> Header set Content-Security-Policy "img-src 'self' data: ;"</span>
|
||||
|
||||
<span class="sd">.. _data URLs:</span>
|
||||
<span class="sd"> https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">eval_xpath</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_list</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_getindex</span><span class="p">,</span>
|
||||
<span class="n">extract_text</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.google</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">get_google_info</span><span class="p">,</span>
|
||||
<span class="n">time_range_dict</span><span class="p">,</span>
|
||||
<span class="n">filter_mapping</span><span class="p">,</span>
|
||||
<span class="n">suggestion_xpath</span><span class="p">,</span>
|
||||
<span class="n">detect_google_sorry</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.google.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q219885'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developers.google.com/custom-search'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">language_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_videos.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Google-Video search request"""</span>
|
||||
|
||||
<span class="n">google_info</span> <span class="o">=</span> <span class="n">get_google_info</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">traits</span><span class="p">)</span>
|
||||
|
||||
<span class="n">query_url</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="s1">'https://'</span>
|
||||
<span class="o">+</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'subdomain'</span><span class="p">]</span>
|
||||
<span class="o">+</span> <span class="s1">'/search'</span>
|
||||
<span class="o">+</span> <span class="s2">"?"</span>
|
||||
<span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'tbm'</span><span class="p">:</span> <span class="s2">"vid"</span><span class="p">,</span>
|
||||
<span class="s1">'start'</span><span class="p">:</span> <span class="mi">10</span> <span class="o">*</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">],</span>
|
||||
<span class="o">**</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'params'</span><span class="p">],</span>
|
||||
<span class="s1">'asearch'</span><span class="p">:</span> <span class="s1">'arc'</span><span class="p">,</span>
|
||||
<span class="s1">'async'</span><span class="p">:</span> <span class="s1">'use_ac:true,_fmt:html'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_dict</span><span class="p">:</span>
|
||||
<span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'tbs'</span><span class="p">:</span> <span class="s1">'qdr:'</span> <span class="o">+</span> <span class="n">time_range_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]})</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]:</span>
|
||||
<span class="n">query_url</span> <span class="o">+=</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'safe'</span><span class="p">:</span> <span class="n">filter_mapping</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]})</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">query_url</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span> <span class="o">=</span> <span class="n">google_info</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">google_info</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/google.html#searx.engines.google_videos.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get response from google's search request"""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">detect_google_sorry</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># convert the text to dom</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># parse results</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "g ")]'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//img/@src'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">img_src</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a/h3[1]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//a/h3[1]/../@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
|
||||
|
||||
<span class="n">c_node</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="Uroaid"]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">c_node</span><span class="p">)</span>
|
||||
<span class="n">pub_info</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="P7xzyf"]'</span><span class="p">))</span>
|
||||
<span class="n">length</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[@class="J1mWY"]'</span><span class="p">))</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="s1">'author'</span><span class="p">:</span> <span class="n">pub_info</span><span class="p">,</span>
|
||||
<span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">img_src</span><span class="p">,</span>
|
||||
<span class="s1">'length'</span><span class="p">:</span> <span class="n">length</span><span class="p">,</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="c1"># parse suggestion</span>
|
||||
<span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">suggestion_xpath</span><span class="p">):</span>
|
||||
<span class="c1"># append suggestion</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
298
_modules/searx/engines/peertube.html
Normal file
298
_modules/searx/engines/peertube.html
Normal file
|
|
@ -0,0 +1,298 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.peertube — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.peertube</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.peertube</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share</span>
|
||||
<span class="sd">(more or less) the same REST API and the schema of the JSON result is identical.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">re</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
|
||||
<span class="kn">from</span> <span class="nn">dateutil.parser</span> <span class="kn">import</span> <span class="n">parse</span>
|
||||
<span class="kn">from</span> <span class="nn">dateutil.relativedelta</span> <span class="kn">import</span> <span class="n">relativedelta</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">html_to_text</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># pylint: disable=line-too-long</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://joinpeertube.org'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q50938515'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"videos"</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"https://peer.tube"</span>
|
||||
<span class="sd">"""Base URL of the Peertube instance. A list of instances is available at:</span>
|
||||
|
||||
<span class="sd">- https://instances.joinpeertube.org/instances</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_table</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'day'</span><span class="p">:</span> <span class="n">relativedelta</span><span class="p">(),</span>
|
||||
<span class="s1">'week'</span><span class="p">:</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">weeks</span><span class="o">=-</span><span class="mi">1</span><span class="p">),</span>
|
||||
<span class="s1">'month'</span><span class="p">:</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">months</span><span class="o">=-</span><span class="mi">1</span><span class="p">),</span>
|
||||
<span class="s1">'year'</span><span class="p">:</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">years</span><span class="o">=-</span><span class="mi">1</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch_table</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'both'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'false'</span><span class="p">}</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">minute_to_hm</span><span class="p">(</span><span class="n">minute</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">minute</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"</span><span class="si">%d</span><span class="s2">:</span><span class="si">%02d</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="nb">divmod</span><span class="p">(</span><span class="n">minute</span><span class="p">,</span> <span class="mi">60</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/peertube.html#searx.engines.peertube.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Assemble request for the Peertube API"""</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">query</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
|
||||
<span class="c1"># eng_region = traits.get_region(params['searxng_locale'], 'en_US')</span>
|
||||
<span class="n">eng_lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="n">base_url</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">"/"</span><span class="p">)</span>
|
||||
<span class="o">+</span> <span class="s2">"/api/v1/search/videos?"</span>
|
||||
<span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'searchTarget'</span><span class="p">:</span> <span class="s1">'search-index'</span><span class="p">,</span> <span class="c1"># Vidiversum</span>
|
||||
<span class="s1">'resultType'</span><span class="p">:</span> <span class="s1">'videos'</span><span class="p">,</span>
|
||||
<span class="s1">'start'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span><span class="p">,</span>
|
||||
<span class="s1">'count'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span>
|
||||
<span class="c1"># -createdAt: sort by date ascending / createdAt: date descending</span>
|
||||
<span class="s1">'sort'</span><span class="p">:</span> <span class="s1">'-match'</span><span class="p">,</span> <span class="c1"># sort by *match descending*</span>
|
||||
<span class="s1">'nsfw'</span><span class="p">:</span> <span class="n">safesearch_table</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]],</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&languageOneOf[]='</span> <span class="o">+</span> <span class="n">eng_lang</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&boostLanguages[]='</span> <span class="o">+</span> <span class="n">eng_lang</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_table</span><span class="p">:</span>
|
||||
<span class="n">time</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">date</span><span class="p">()</span> <span class="o">+</span> <span class="n">time_range_table</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&startDate='</span> <span class="o">+</span> <span class="n">time</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">video_response</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="video_response"><a class="viewcode-back" href="../../../dev/engines/online/peertube.html#searx.engines.peertube.video_response">[docs]</a><span class="k">def</span> <span class="nf">video_response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Parse video response from SepiaSearch and Peertube instances."""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">json_data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'data'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]:</span>
|
||||
<span class="n">metadata</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="n">x</span>
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span>
|
||||
<span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'channel'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'displayName'</span><span class="p">),</span>
|
||||
<span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'channel'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'name'</span><span class="p">)</span> <span class="o">+</span> <span class="s1">'@'</span> <span class="o">+</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'channel'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'host'</span><span class="p">),</span>
|
||||
<span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'tags'</span><span class="p">,</span> <span class="p">[])),</span>
|
||||
<span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">x</span>
|
||||
<span class="p">]</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">],</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'description'</span><span class="p">)</span> <span class="ow">or</span> <span class="s1">''</span><span class="p">),</span>
|
||||
<span class="s1">'author'</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'account'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'displayName'</span><span class="p">),</span>
|
||||
<span class="s1">'length'</span><span class="p">:</span> <span class="n">minute_to_hm</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'duration'</span><span class="p">)),</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'videos.html'</span><span class="p">,</span>
|
||||
<span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">parse</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s1">'publishedAt'</span><span class="p">]),</span>
|
||||
<span class="s1">'iframe_src'</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'embedUrl'</span><span class="p">),</span>
|
||||
<span class="s1">'thumbnail'</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'thumbnailUrl'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'previewUrl'</span><span class="p">),</span>
|
||||
<span class="s1">'metadata'</span><span class="p">:</span> <span class="s1">' | '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">metadata</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/peertube.html#searx.engines.peertube.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages from peertube's search-index source code.</span>
|
||||
|
||||
<span class="sd"> See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_</span>
|
||||
|
||||
<span class="sd"> .. _8ed5c729 - Refactor and redesign client:</span>
|
||||
<span class="sd"> https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729</span>
|
||||
<span class="sd"> .. _videoLanguages:</span>
|
||||
<span class="sd"> https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span>
|
||||
<span class="s1">'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue'</span><span class="p">,</span>
|
||||
<span class="c1"># the response from search-index repository is very slow</span>
|
||||
<span class="n">timeout</span><span class="o">=</span><span class="mi">60</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from peertube is not OK."</span><span class="p">)</span>
|
||||
<span class="k">return</span>
|
||||
|
||||
<span class="n">js_lang</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="sa">r</span><span class="s2">"videoLanguages \(\)[^\n]+(.*?)\]"</span><span class="p">,</span> <span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">DOTALL</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">js_lang</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't determine languages from peertube"</span><span class="p">)</span>
|
||||
<span class="k">return</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">lang</span> <span class="ow">in</span> <span class="n">re</span><span class="o">.</span><span class="n">finditer</span><span class="p">(</span><span class="sa">r</span><span class="s2">"\{ id: '([a-z]+)', label:"</span><span class="p">,</span> <span class="n">js_lang</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)):</span>
|
||||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">lang</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">eng_tag</span> <span class="o">==</span> <span class="s1">'oc'</span><span class="p">:</span>
|
||||
<span class="c1"># Occitanis not known by babel, its closest relative is Catalan</span>
|
||||
<span class="c1"># but 'ca' is already in the list of engine_traits.languages --></span>
|
||||
<span class="c1"># 'oc' will be ignored.</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: </span><span class="si">%s</span><span class="s2"> is unknown by babel"</span> <span class="o">%</span> <span class="n">eng_tag</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'zh_Hans'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'zh'</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="s1">'zh_Hant'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'zh'</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
198
_modules/searx/engines/sepiasearch.html
Normal file
198
_modules/searx/engines/sepiasearch.html
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.sepiasearch — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.sepiasearch</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.sepiasearch</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""SepiaSearch uses the same languages as :py:obj:`Peertube</span>
|
||||
<span class="sd"><searx.engines.peertube>` and the response is identical to the response from the</span>
|
||||
<span class="sd">peertube engines.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.peertube</span> <span class="kn">import</span> <span class="n">fetch_traits</span> <span class="c1"># pylint: disable=unused-import</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.peertube</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="c1"># pylint: disable=unused-import</span>
|
||||
<span class="n">video_response</span><span class="p">,</span>
|
||||
<span class="n">safesearch_table</span><span class="p">,</span>
|
||||
<span class="n">time_range_table</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># pylint: disable=line-too-long</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://sepiasearch.org'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'videos'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://sepiasearch.org'</span>
|
||||
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/peertube.html#searx.engines.sepiasearch.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Assemble request for the SepiaSearch API"""</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">query</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
|
||||
<span class="c1"># eng_region = traits.get_region(params['searxng_locale'], 'en_US')</span>
|
||||
<span class="n">eng_lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="n">base_url</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">"/"</span><span class="p">)</span>
|
||||
<span class="o">+</span> <span class="s2">"/api/v1/search/videos?"</span>
|
||||
<span class="o">+</span> <span class="n">urlencode</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'search'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'start'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">10</span><span class="p">,</span>
|
||||
<span class="s1">'count'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span>
|
||||
<span class="c1"># -createdAt: sort by date ascending / createdAt: date descending</span>
|
||||
<span class="s1">'sort'</span><span class="p">:</span> <span class="s1">'-match'</span><span class="p">,</span> <span class="c1"># sort by *match descending*</span>
|
||||
<span class="s1">'nsfw'</span><span class="p">:</span> <span class="n">safesearch_table</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]],</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&languageOneOf[]='</span> <span class="o">+</span> <span class="n">eng_lang</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&boostLanguages[]='</span> <span class="o">+</span> <span class="n">eng_lang</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]</span> <span class="ow">in</span> <span class="n">time_range_table</span><span class="p">:</span>
|
||||
<span class="n">time</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">date</span><span class="p">()</span> <span class="o">+</span> <span class="n">time_range_table</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">]]</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">+=</span> <span class="s1">'&startDate='</span> <span class="o">+</span> <span class="n">time</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">video_response</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
213
_modules/searx/engines/sqlite.html
Normal file
213
_modules/searx/engines/sqlite.html
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.sqlite — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.sqlite</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.sqlite</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""SQLite is a small, fast and reliable SQL database engine. It does not require</span>
|
||||
<span class="sd">any extra dependency.</span>
|
||||
|
||||
<span class="sd">Example</span>
|
||||
<span class="sd">=======</span>
|
||||
|
||||
<span class="sd">.. _MediathekView: https://mediathekview.de/</span>
|
||||
|
||||
<span class="sd">To demonstrate the power of database engines, here is a more complex example</span>
|
||||
<span class="sd">which reads from a MediathekView_ (DE) movie database. For this example of the</span>
|
||||
<span class="sd">SQlite engine download the database:</span>
|
||||
|
||||
<span class="sd">- https://liste.mediathekview.de/filmliste-v2.db.bz2</span>
|
||||
|
||||
<span class="sd">and unpack into ``searx/data/filmliste-v2.db``. To search the database use e.g</span>
|
||||
<span class="sd">Query to test: ``!mediathekview concert``</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> - name: mediathekview</span>
|
||||
<span class="sd"> engine: sqlite</span>
|
||||
<span class="sd"> disabled: False</span>
|
||||
<span class="sd"> categories: general</span>
|
||||
<span class="sd"> result_template: default.html</span>
|
||||
<span class="sd"> database: searx/data/filmliste-v2.db</span>
|
||||
<span class="sd"> query_str: >-</span>
|
||||
<span class="sd"> SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title,</span>
|
||||
<span class="sd"> COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url,</span>
|
||||
<span class="sd"> description AS content</span>
|
||||
<span class="sd"> FROM film</span>
|
||||
<span class="sd"> WHERE title LIKE :wildcard OR description LIKE :wildcard</span>
|
||||
<span class="sd"> ORDER BY duration DESC</span>
|
||||
|
||||
<span class="sd">Implementations</span>
|
||||
<span class="sd">===============</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">sqlite3</span>
|
||||
<span class="kn">import</span> <span class="nn">contextlib</span>
|
||||
|
||||
<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'offline'</span>
|
||||
<span class="n">database</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="n">query_str</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="n">limit</span> <span class="o">=</span> <span class="mi">10</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">result_template</span> <span class="o">=</span> <span class="s1">'key-value.html'</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="s1">'query_str'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine_settings</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'query_str cannot be empty'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">engine_settings</span><span class="p">[</span><span class="s1">'query_str'</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'select '</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'only SELECT query is supported'</span><span class="p">)</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="sqlite_cursor"><a class="viewcode-back" href="../../../dev/engines/offline/sql-engines.html#searx.engines.sqlite.sqlite_cursor">[docs]</a><span class="nd">@contextlib</span><span class="o">.</span><span class="n">contextmanager</span>
|
||||
<span class="k">def</span> <span class="nf">sqlite_cursor</span><span class="p">():</span>
|
||||
<span class="w"> </span><span class="sd">"""Implements a :py:obj:`Context Manager <contextlib.contextmanager>` for a</span>
|
||||
<span class="sd"> :py:obj:`sqlite3.Cursor`.</span>
|
||||
|
||||
<span class="sd"> Open database in read only mode: if the database doesn't exist. The default</span>
|
||||
<span class="sd"> mode creates an empty file on the file system. See:</span>
|
||||
|
||||
<span class="sd"> * https://docs.python.org/3/library/sqlite3.html#sqlite3.connect</span>
|
||||
<span class="sd"> * https://www.sqlite.org/uri.html</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">uri</span> <span class="o">=</span> <span class="s1">'file:'</span> <span class="o">+</span> <span class="n">database</span> <span class="o">+</span> <span class="s1">'?mode=ro'</span>
|
||||
<span class="k">with</span> <span class="n">contextlib</span><span class="o">.</span><span class="n">closing</span><span class="p">(</span><span class="n">sqlite3</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">uri</span><span class="p">,</span> <span class="n">uri</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span> <span class="k">as</span> <span class="n">connect</span><span class="p">:</span>
|
||||
<span class="n">connect</span><span class="o">.</span><span class="n">row_factory</span> <span class="o">=</span> <span class="n">sqlite3</span><span class="o">.</span><span class="n">Row</span>
|
||||
<span class="k">with</span> <span class="n">contextlib</span><span class="o">.</span><span class="n">closing</span><span class="p">(</span><span class="n">connect</span><span class="o">.</span><span class="n">cursor</span><span class="p">())</span> <span class="k">as</span> <span class="n">cursor</span><span class="p">:</span>
|
||||
<span class="k">yield</span> <span class="n">cursor</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">query_params</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'wildcard'</span><span class="p">:</span> <span class="sa">r</span><span class="s1">'%'</span> <span class="o">+</span> <span class="n">query</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">' '</span><span class="p">,</span> <span class="sa">r</span><span class="s1">'%'</span><span class="p">)</span> <span class="o">+</span> <span class="sa">r</span><span class="s1">'%'</span><span class="p">,</span>
|
||||
<span class="s1">'limit'</span><span class="p">:</span> <span class="n">limit</span><span class="p">,</span>
|
||||
<span class="s1">'offset'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">limit</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="n">query_to_run</span> <span class="o">=</span> <span class="n">query_str</span> <span class="o">+</span> <span class="s1">' LIMIT :limit OFFSET :offset'</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">sqlite_cursor</span><span class="p">()</span> <span class="k">as</span> <span class="n">cur</span><span class="p">:</span>
|
||||
|
||||
<span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">query_to_run</span><span class="p">,</span> <span class="n">query_params</span><span class="p">)</span>
|
||||
<span class="n">col_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">cn</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">cn</span> <span class="ow">in</span> <span class="n">cur</span><span class="o">.</span><span class="n">description</span><span class="p">]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">cur</span><span class="o">.</span><span class="n">fetchall</span><span class="p">():</span>
|
||||
<span class="n">item</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">col_names</span><span class="p">,</span> <span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">row</span><span class="p">)))</span>
|
||||
<span class="n">item</span><span class="p">[</span><span class="s1">'template'</span><span class="p">]</span> <span class="o">=</span> <span class="n">result_template</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"append result --> </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
606
_modules/searx/engines/startpage.html
Normal file
606
_modules/searx/engines/startpage.html
Normal file
|
|
@ -0,0 +1,606 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.startpage — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.startpage</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.startpage</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Startpage's language & region selectors are a mess ..</span>
|
||||
|
||||
<span class="sd">.. _startpage regions:</span>
|
||||
|
||||
<span class="sd">Startpage regions</span>
|
||||
<span class="sd">=================</span>
|
||||
|
||||
<span class="sd">In the list of regions there are tags we need to map to common region tags::</span>
|
||||
|
||||
<span class="sd"> pt-BR_BR --> pt_BR</span>
|
||||
<span class="sd"> zh-CN_CN --> zh_Hans_CN</span>
|
||||
<span class="sd"> zh-TW_TW --> zh_Hant_TW</span>
|
||||
<span class="sd"> zh-TW_HK --> zh_Hant_HK</span>
|
||||
<span class="sd"> en-GB_GB --> en_GB</span>
|
||||
|
||||
<span class="sd">and there is at least one tag with a three letter language tag (ISO 639-2)::</span>
|
||||
|
||||
<span class="sd"> fil_PH --> fil_PH</span>
|
||||
|
||||
<span class="sd">The locale code ``no_NO`` from Startpage does not exists and is mapped to</span>
|
||||
<span class="sd">``nb-NO``::</span>
|
||||
|
||||
<span class="sd"> babel.core.UnknownLocaleError: unknown locale 'no_NO'</span>
|
||||
|
||||
<span class="sd">For reference see languages-subtag at iana; ``no`` is the macrolanguage [1]_ and</span>
|
||||
<span class="sd">W3C recommends subtag over macrolanguage [2]_.</span>
|
||||
|
||||
<span class="sd">.. [1] `iana: language-subtag-registry</span>
|
||||
<span class="sd"> <https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry>`_ ::</span>
|
||||
|
||||
<span class="sd"> type: language</span>
|
||||
<span class="sd"> Subtag: nb</span>
|
||||
<span class="sd"> Description: Norwegian Bokmål</span>
|
||||
<span class="sd"> Added: 2005-10-16</span>
|
||||
<span class="sd"> Suppress-Script: Latn</span>
|
||||
<span class="sd"> Macrolanguage: no</span>
|
||||
|
||||
<span class="sd">.. [2]</span>
|
||||
<span class="sd"> Use macrolanguages with care. Some language subtags have a Scope field set to</span>
|
||||
<span class="sd"> macrolanguage, i.e. this primary language subtag encompasses a number of more</span>
|
||||
<span class="sd"> specific primary language subtags in the registry. ... As we recommended for</span>
|
||||
<span class="sd"> the collection subtags mentioned above, in most cases you should try to use</span>
|
||||
<span class="sd"> the more specific subtags ... `W3: The primary language subtag</span>
|
||||
<span class="sd"> <https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag>`_</span>
|
||||
|
||||
<span class="sd">.. _startpage languages:</span>
|
||||
|
||||
<span class="sd">Startpage languages</span>
|
||||
<span class="sd">===================</span>
|
||||
|
||||
<span class="sd">:py:obj:`send_accept_language_header`:</span>
|
||||
<span class="sd"> The displayed name in Startpage's settings page depend on the location of the</span>
|
||||
<span class="sd"> IP when ``Accept-Language`` HTTP header is unset. In :py:obj:`fetch_traits`</span>
|
||||
<span class="sd"> we use::</span>
|
||||
|
||||
<span class="sd"> 'Accept-Language': "en-US,en;q=0.5",</span>
|
||||
<span class="sd"> ..</span>
|
||||
|
||||
<span class="sd"> to get uniform names independent from the IP).</span>
|
||||
|
||||
<span class="sd">.. _startpage categories:</span>
|
||||
|
||||
<span class="sd">Startpage categories</span>
|
||||
<span class="sd">====================</span>
|
||||
|
||||
<span class="sd">Startpage's category (for Web-search, News, Videos, ..) is set by</span>
|
||||
<span class="sd">:py:obj:`startpage_categ` in settings.yml::</span>
|
||||
|
||||
<span class="sd"> - name: startpage</span>
|
||||
<span class="sd"> engine: startpage</span>
|
||||
<span class="sd"> startpage_categ: web</span>
|
||||
<span class="sd"> ...</span>
|
||||
|
||||
<span class="sd">.. hint::</span>
|
||||
|
||||
<span class="sd"> The default category is ``web`` .. and other categories than ``web`` are not</span>
|
||||
<span class="sd"> yet implemented.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">OrderedDict</span>
|
||||
<span class="kn">import</span> <span class="nn">re</span>
|
||||
<span class="kn">from</span> <span class="nn">unicodedata</span> <span class="kn">import</span> <span class="n">normalize</span><span class="p">,</span> <span class="n">combining</span>
|
||||
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">dateutil.parser</span>
|
||||
<span class="kn">import</span> <span class="nn">lxml.html</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">gen_useragent</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineCaptchaException</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">region_tag</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://startpage.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q2333295'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">startpage_categ</span> <span class="o">=</span> <span class="s1">'web'</span>
|
||||
<span class="sd">"""Startpage's category, visit :ref:`startpage categories`.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="sd">"""Startpage tries to guess user's language and territory from the HTTP</span>
|
||||
<span class="sd">``Accept-Language``. Optional the user can select a search-language (can be</span>
|
||||
<span class="sd">different to the UI language) and a region filter.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'day'</span><span class="p">:</span> <span class="s1">'d'</span><span class="p">,</span> <span class="s1">'week'</span><span class="p">:</span> <span class="s1">'w'</span><span class="p">,</span> <span class="s1">'month'</span><span class="p">:</span> <span class="s1">'m'</span><span class="p">,</span> <span class="s1">'year'</span><span class="p">:</span> <span class="s1">'y'</span><span class="p">}</span>
|
||||
<span class="n">safesearch_dict</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'0'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">}</span>
|
||||
|
||||
<span class="c1"># search-url</span>
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.startpage.com'</span>
|
||||
<span class="n">search_url</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/sp/search'</span>
|
||||
|
||||
<span class="c1"># specific xpath variables</span>
|
||||
<span class="c1"># ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]</span>
|
||||
<span class="c1"># not ads: div[@class="result"] are the direct childs of div[@id="results"]</span>
|
||||
<span class="n">results_xpath</span> <span class="o">=</span> <span class="s1">'//div[@class="w-gl__result__main"]'</span>
|
||||
<span class="n">link_xpath</span> <span class="o">=</span> <span class="s1">'.//a[@class="w-gl__result-title result-link"]'</span>
|
||||
<span class="n">content_xpath</span> <span class="o">=</span> <span class="s1">'.//p[@class="w-gl__description"]'</span>
|
||||
<span class="n">search_form_xpath</span> <span class="o">=</span> <span class="s1">'//form[@id="search"]'</span>
|
||||
<span class="sd">"""XPath of Startpage's origin search form</span>
|
||||
|
||||
<span class="sd">.. code: html</span>
|
||||
|
||||
<span class="sd"> <form action="/sp/search" method="post"></span>
|
||||
<span class="sd"> <input type="text" name="query" value="" ..></span>
|
||||
<span class="sd"> <input type="hidden" name="t" value="device"></span>
|
||||
<span class="sd"> <input type="hidden" name="lui" value="english"></span>
|
||||
<span class="sd"> <input type="hidden" name="sc" value="Q7Mt5TRqowKB00"></span>
|
||||
<span class="sd"> <input type="hidden" name="cat" value="web"></span>
|
||||
<span class="sd"> <input type="hidden" class="abp" id="abp-input" name="abp" value="1"></span>
|
||||
<span class="sd"> </form></span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="c1"># timestamp of the last fetch of 'sc' code</span>
|
||||
<span class="n">sc_code_ts</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="n">sc_code</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">sc_code_cache_sec</span> <span class="o">=</span> <span class="mi">30</span>
|
||||
<span class="sd">"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_sc_code"><a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.get_sc_code">[docs]</a><span class="k">def</span> <span class="nf">get_sc_code</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get an actual ``sc`` argument from Startpage's search form (HTML page).</span>
|
||||
|
||||
<span class="sd"> Startpage puts a ``sc`` argument on every HTML :py:obj:`search form</span>
|
||||
<span class="sd"> <search_form_xpath>`. Without this argument Startpage considers the request</span>
|
||||
<span class="sd"> is from a bot. We do not know what is encoded in the value of the ``sc``</span>
|
||||
<span class="sd"> argument, but it seems to be a kind of a *time-stamp*.</span>
|
||||
|
||||
<span class="sd"> Startpage's search form generates a new sc-code on each request. This</span>
|
||||
<span class="sd"> function scrap a new sc-code from Startpage's home page every</span>
|
||||
<span class="sd"> :py:obj:`sc_code_cache_sec` seconds.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">global</span> <span class="n">sc_code_ts</span><span class="p">,</span> <span class="n">sc_code</span> <span class="c1"># pylint: disable=global-statement</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">sc_code</span> <span class="ow">and</span> <span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o"><</span> <span class="p">(</span><span class="n">sc_code_ts</span> <span class="o">+</span> <span class="n">sc_code_cache_sec</span><span class="p">)):</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"get_sc_code: reuse '</span><span class="si">%s</span><span class="s2">'"</span><span class="p">,</span> <span class="n">sc_code</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">sc_code</span>
|
||||
|
||||
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]}</span>
|
||||
<span class="n">headers</span><span class="p">[</span><span class="s1">'Origin'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span>
|
||||
<span class="n">headers</span><span class="p">[</span><span class="s1">'Referer'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/'</span>
|
||||
<span class="c1"># headers['Connection'] = 'keep-alive'</span>
|
||||
<span class="c1"># headers['Accept-Encoding'] = 'gzip, deflate, br'</span>
|
||||
<span class="c1"># headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'</span>
|
||||
<span class="c1"># headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0'</span>
|
||||
|
||||
<span class="c1"># add Accept-Language header</span>
|
||||
<span class="k">if</span> <span class="n">searxng_locale</span> <span class="o">==</span> <span class="s1">'all'</span><span class="p">:</span>
|
||||
<span class="n">searxng_locale</span> <span class="o">=</span> <span class="s1">'en-US'</span>
|
||||
<span class="n">locale</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">searxng_locale</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">send_accept_language_header</span><span class="p">:</span>
|
||||
<span class="n">ac_lang</span> <span class="o">=</span> <span class="n">locale</span><span class="o">.</span><span class="n">language</span>
|
||||
<span class="k">if</span> <span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">:</span>
|
||||
<span class="n">ac_lang</span> <span class="o">=</span> <span class="s2">"</span><span class="si">%s</span><span class="s2">-</span><span class="si">%s</span><span class="s2">,</span><span class="si">%s</span><span class="s2">;q=0.9,*;q=0.5"</span> <span class="o">%</span> <span class="p">(</span>
|
||||
<span class="n">locale</span><span class="o">.</span><span class="n">language</span><span class="p">,</span>
|
||||
<span class="n">locale</span><span class="o">.</span><span class="n">territory</span><span class="p">,</span>
|
||||
<span class="n">locale</span><span class="o">.</span><span class="n">language</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">headers</span><span class="p">[</span><span class="s1">'Accept-Language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">ac_lang</span>
|
||||
|
||||
<span class="n">get_sc_url</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/?sc=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">sc_code</span><span class="p">)</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"query new sc time-stamp ... </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">get_sc_url</span><span class="p">)</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"headers: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">headers</span><span class="p">)</span>
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">get_sc_url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)</span>
|
||||
<span class="c1"># ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg</span>
|
||||
<span class="c1"># ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">str</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="p">)</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'https://www.startpage.com/sp/captcha'</span><span class="p">):</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">(</span>
|
||||
<span class="n">message</span><span class="o">=</span><span class="s2">"get_sc_code: got redirected to https://www.startpage.com/sp/captcha"</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sc_code</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">search_form_xpath</span> <span class="o">+</span> <span class="s1">'//input[@name="sc"]/@value'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="k">except</span> <span class="ne">IndexError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"suspend startpage API --> https://github.com/searxng/searxng/pull/695"</span><span class="p">)</span>
|
||||
<span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">(</span>
|
||||
<span class="n">message</span><span class="o">=</span><span class="s2">"get_sc_code: [PR-695] query new sc time-stamp failed! (</span><span class="si">%s</span><span class="s2">)"</span> <span class="o">%</span> <span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="c1"># type: ignore</span>
|
||||
<span class="p">)</span> <span class="kn">from</span> <span class="nn">exc</span>
|
||||
|
||||
<span class="n">sc_code_ts</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"get_sc_code: new value is: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">sc_code</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">sc_code</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Assemble a Startpage request.</span>
|
||||
|
||||
<span class="sd"> To avoid CAPTCHA we need to send a well formed HTTP POST request with a</span>
|
||||
<span class="sd"> cookie. We need to form a request that is identical to the request build by</span>
|
||||
<span class="sd"> Startpage's search form:</span>
|
||||
|
||||
<span class="sd"> - in the cookie the **region** is selected</span>
|
||||
<span class="sd"> - in the HTTP POST data the **language** is selected</span>
|
||||
|
||||
<span class="sd"> Additionally the arguments form Startpage's search form needs to be set in</span>
|
||||
<span class="sd"> HTML POST data / compare ``<input>`` elements: :py:obj:`search_form_xpath`.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">startpage_categ</span> <span class="o">==</span> <span class="s1">'web'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">_request_cat_web</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span>
|
||||
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Startpages's category '%' is not yet implemented."</span><span class="p">,</span> <span class="n">startpage_categ</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_request_cat_web</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
|
||||
<span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en-US'</span><span class="p">)</span>
|
||||
<span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="s1">'en'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># build arguments</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'cat'</span><span class="p">:</span> <span class="s1">'web'</span><span class="p">,</span>
|
||||
<span class="s1">'t'</span><span class="p">:</span> <span class="s1">'device'</span><span class="p">,</span>
|
||||
<span class="s1">'sc'</span><span class="p">:</span> <span class="n">get_sc_code</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">params</span><span class="p">),</span> <span class="c1"># hint: this func needs HTTP headers,</span>
|
||||
<span class="s1">'with_date'</span><span class="p">:</span> <span class="n">time_range_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">],</span> <span class="s1">''</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">engine_language</span><span class="p">:</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'lui'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span>
|
||||
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'abp'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">args</span><span class="p">[</span><span class="s1">'page'</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># build cookie</span>
|
||||
<span class="n">lang_homepage</span> <span class="o">=</span> <span class="s1">'en'</span>
|
||||
<span class="n">cookie</span> <span class="o">=</span> <span class="n">OrderedDict</span><span class="p">()</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'date_time'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'world'</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'disable_family_filter'</span><span class="p">]</span> <span class="o">=</span> <span class="n">safesearch_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'disable_open_in_new_window'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'0'</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'enable_post_method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span> <span class="c1"># hint: POST</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'enable_proxy_safety_suggest'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'enable_stay_control'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'instant_answers'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'lang_homepage'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'s/device/</span><span class="si">%s</span><span class="s1">/'</span> <span class="o">%</span> <span class="n">lang_homepage</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'num_of_results'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'10'</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'suggestions'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'1'</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'wt_unit'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'celsius'</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">engine_language</span><span class="p">:</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'language_ui'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">engine_region</span><span class="p">:</span>
|
||||
<span class="n">cookie</span><span class="p">[</span><span class="s1">'search_results_region'</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_region</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'preferences'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'N1N'</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s2">"</span><span class="si">%s</span><span class="s2">EEE</span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cookie</span><span class="o">.</span><span class="n">items</span><span class="p">()])</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'cookie preferences: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">][</span><span class="s1">'preferences'</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># POST request</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"data: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'POST'</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Origin'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Referer'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/'</span>
|
||||
<span class="c1"># is the Accept header needed?</span>
|
||||
<span class="c1"># params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span>
|
||||
|
||||
|
||||
<span class="c1"># get response from search-request</span>
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">startpage_categ</span> <span class="o">==</span> <span class="s1">'web'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">_response_cat_web</span><span class="p">(</span><span class="n">dom</span><span class="p">)</span>
|
||||
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Startpages's category '%' is not yet implemented."</span><span class="p">,</span> <span class="n">startpage_categ</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_response_cat_web</span><span class="p">(</span><span class="n">dom</span><span class="p">):</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="c1"># parse results</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">results_xpath</span><span class="p">):</span>
|
||||
<span class="n">links</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">link_xpath</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">links</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">link</span> <span class="o">=</span> <span class="n">links</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">link</span><span class="o">.</span><span class="n">attrib</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'href'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># block google-ad url's</span>
|
||||
<span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^http(s|)://(www\.)?google\.[a-z]+/aclk.*$"</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="c1"># block startpage search url's</span>
|
||||
<span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^http(s|)://(www\.)?startpage\.com/do/search\?.*$"</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">link</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">):</span>
|
||||
<span class="n">content</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">))</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
|
||||
<span class="n">published_date</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="c1"># check if search result starts with something like: "2 Sep 2014 ... "</span>
|
||||
<span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]</span><span class="si">{2}</span><span class="s2"> [0-9]</span><span class="si">{4}</span><span class="s2"> \.\.\. "</span><span class="p">,</span> <span class="n">content</span><span class="p">):</span>
|
||||
<span class="n">date_pos</span> <span class="o">=</span> <span class="n">content</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'...'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">4</span>
|
||||
<span class="n">date_string</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="mi">0</span> <span class="p">:</span> <span class="n">date_pos</span> <span class="o">-</span> <span class="mi">5</span><span class="p">]</span>
|
||||
<span class="c1"># fix content string</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">date_pos</span><span class="p">:]</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">published_date</span> <span class="o">=</span> <span class="n">dateutil</span><span class="o">.</span><span class="n">parser</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">date_string</span><span class="p">,</span> <span class="n">dayfirst</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||||
<span class="k">pass</span>
|
||||
|
||||
<span class="c1"># check if search result starts with something like: "5 days ago ... "</span>
|
||||
<span class="k">elif</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^[0-9]+ days? ago \.\.\. "</span><span class="p">,</span> <span class="n">content</span><span class="p">):</span>
|
||||
<span class="n">date_pos</span> <span class="o">=</span> <span class="n">content</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'...'</span><span class="p">)</span> <span class="o">+</span> <span class="mi">4</span>
|
||||
<span class="n">date_string</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="mi">0</span> <span class="p">:</span> <span class="n">date_pos</span> <span class="o">-</span> <span class="mi">5</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># calculate datetime</span>
|
||||
<span class="n">published_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\d+'</span><span class="p">,</span> <span class="n">date_string</span><span class="p">)</span><span class="o">.</span><span class="n">group</span><span class="p">()))</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="c1"># fix content string</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">date_pos</span><span class="p">:]</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">published_date</span><span class="p">:</span>
|
||||
<span class="c1"># append result</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> <span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">published_date</span><span class="p">})</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># append result</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">})</span>
|
||||
|
||||
<span class="c1"># return results</span>
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch :ref:`languages <startpage languages>` and :ref:`regions <startpage</span>
|
||||
<span class="sd"> regions>` from Startpage."""</span>
|
||||
<span class="c1"># pylint: disable=too-many-branches</span>
|
||||
|
||||
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'User-Agent'</span><span class="p">:</span> <span class="n">gen_useragent</span><span class="p">(),</span>
|
||||
<span class="s1">'Accept-Language'</span><span class="p">:</span> <span class="s2">"en-US,en;q=0.5"</span><span class="p">,</span> <span class="c1"># bing needs to set the English language</span>
|
||||
<span class="p">}</span>
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="s1">'https://www.startpage.com/do/settings'</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from Startpage is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="c1"># regions</span>
|
||||
|
||||
<span class="n">sp_region_names</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">option</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//form[@name="settings"]//select[@name="search_results_region"]/option'</span><span class="p">):</span>
|
||||
<span class="n">sp_region_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">option</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">))</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">sp_region_names</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">eng_tag</span> <span class="o">==</span> <span class="s1">'all'</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">babel_region_tag</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'no_NO'</span><span class="p">:</span> <span class="s1">'nb_NO'</span><span class="p">}</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">)</span> <span class="c1"># norway</span>
|
||||
|
||||
<span class="k">if</span> <span class="s1">'-'</span> <span class="ow">in</span> <span class="n">babel_region_tag</span><span class="p">:</span>
|
||||
<span class="n">l</span><span class="p">,</span> <span class="n">r</span> <span class="o">=</span> <span class="n">babel_region_tag</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="n">r</span> <span class="o">=</span> <span class="n">r</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">l</span> <span class="o">+</span> <span class="s1">'_'</span> <span class="o">+</span> <span class="n">r</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'_'</span><span class="p">))</span>
|
||||
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_region_tag</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'_'</span><span class="p">))</span>
|
||||
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: can't determine babel locale of startpage's locale </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">eng_tag</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||||
|
||||
<span class="c1"># languages</span>
|
||||
|
||||
<span class="n">catalog_engine2code</span> <span class="o">=</span> <span class="p">{</span><span class="n">name</span><span class="o">.</span><span class="n">lower</span><span class="p">():</span> <span class="n">lang_code</span> <span class="k">for</span> <span class="n">lang_code</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">(</span><span class="s1">'en'</span><span class="p">)</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||||
|
||||
<span class="c1"># get the native name of every language known by babel</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">lang_code</span> <span class="ow">in</span> <span class="nb">filter</span><span class="p">(</span>
|
||||
<span class="k">lambda</span> <span class="n">lang_code</span><span class="p">:</span> <span class="n">lang_code</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'_'</span><span class="p">)</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">babel</span><span class="o">.</span><span class="n">localedata</span><span class="o">.</span><span class="n">locale_identifiers</span><span class="p">()</span> <span class="c1"># type: ignore</span>
|
||||
<span class="p">):</span>
|
||||
<span class="n">native_name</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">(</span><span class="n">lang_code</span><span class="p">)</span><span class="o">.</span><span class="n">get_language_name</span><span class="p">()</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="c1"># type: ignore</span>
|
||||
<span class="c1"># add native name exactly as it is</span>
|
||||
<span class="n">catalog_engine2code</span><span class="p">[</span><span class="n">native_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang_code</span>
|
||||
|
||||
<span class="c1"># add "normalized" language name (i.e. français becomes francais and español becomes espanol)</span>
|
||||
<span class="n">unaccented_name</span> <span class="o">=</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">c</span><span class="p">:</span> <span class="ow">not</span> <span class="n">combining</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> <span class="n">normalize</span><span class="p">(</span><span class="s1">'NFKD'</span><span class="p">,</span> <span class="n">native_name</span><span class="p">)))</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">unaccented_name</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">unaccented_name</span><span class="o">.</span><span class="n">encode</span><span class="p">()):</span>
|
||||
<span class="c1"># add only if result is ascii (otherwise "normalization" didn't work)</span>
|
||||
<span class="n">catalog_engine2code</span><span class="p">[</span><span class="n">unaccented_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang_code</span>
|
||||
|
||||
<span class="c1"># values that can't be determined by babel's languages names</span>
|
||||
|
||||
<span class="n">catalog_engine2code</span><span class="o">.</span><span class="n">update</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="c1"># traditional chinese used in ..</span>
|
||||
<span class="s1">'fantizhengwen'</span><span class="p">:</span> <span class="s1">'zh_Hant'</span><span class="p">,</span>
|
||||
<span class="c1"># Korean alphabet</span>
|
||||
<span class="s1">'hangul'</span><span class="p">:</span> <span class="s1">'ko'</span><span class="p">,</span>
|
||||
<span class="c1"># Malayalam is one of 22 scheduled languages of India.</span>
|
||||
<span class="s1">'malayam'</span><span class="p">:</span> <span class="s1">'ml'</span><span class="p">,</span>
|
||||
<span class="s1">'norsk'</span><span class="p">:</span> <span class="s1">'nb'</span><span class="p">,</span>
|
||||
<span class="s1">'sinhalese'</span><span class="p">:</span> <span class="s1">'si'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">skip_eng_tags</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'english_uk'</span><span class="p">,</span> <span class="c1"># SearXNG lang 'en' already maps to 'english'</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">option</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//form[@name="settings"]//select[@name="language"]/option'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">option</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'value'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">skip_eng_tags</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">name</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">option</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">catalog_engine2code</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">sxng_tag</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">catalog_engine2code</span><span class="p">[</span><span class="n">name</span><span class="p">]</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
337
_modules/searx/engines/tineye.html
Normal file
337
_modules/searx/engines/tineye.html
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.tineye — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.tineye</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.tineye</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This engine implements *Tineye - reverse image search*</span>
|
||||
|
||||
<span class="sd">Using TinEye, you can search by image or perform what we call a reverse image</span>
|
||||
<span class="sd">search. You can do that by uploading an image or searching by URL. You can also</span>
|
||||
<span class="sd">simply drag and drop your images to start your search. TinEye constantly crawls</span>
|
||||
<span class="sd">the web and adds images to its index. Today, the TinEye index is over 50.2</span>
|
||||
<span class="sd">billion images `[tineye.com] <https://tineye.com/how>`_.</span>
|
||||
|
||||
<span class="sd">.. hint::</span>
|
||||
|
||||
<span class="sd"> This SearXNG engine only supports *'searching by URL'* and it does not use</span>
|
||||
<span class="sd"> the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
|
||||
<span class="kn">from</span> <span class="nn">flask_babel</span> <span class="kn">import</span> <span class="n">gettext</span>
|
||||
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://tineye.com'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q2382535'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://api.tineye.com/python/docs/'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">engine_type</span> <span class="o">=</span> <span class="s1">'online_url_search'</span>
|
||||
<span class="sd">""":py:obj:`searx.search.processors.online_url_search`"""</span>
|
||||
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://tineye.com'</span>
|
||||
<span class="n">search_string</span> <span class="o">=</span> <span class="s1">'/result_json/?page=</span><span class="si">{page}</span><span class="s1">&</span><span class="si">{query}</span><span class="s1">'</span>
|
||||
|
||||
<span class="n">FORMAT_NOT_SUPPORTED</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span>
|
||||
<span class="s2">"Could not read that image url. This may be due to an unsupported file"</span>
|
||||
<span class="s2">" format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."</span>
|
||||
<span class="p">)</span>
|
||||
<span class="sd">"""TinEye error message"""</span>
|
||||
|
||||
<span class="n">NO_SIGNATURE_ERROR</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span>
|
||||
<span class="s2">"The image is too simple to find matches. TinEye requires a basic level of"</span>
|
||||
<span class="s2">" visual detail to successfully identify matches."</span>
|
||||
<span class="p">)</span>
|
||||
<span class="sd">"""TinEye error message"""</span>
|
||||
|
||||
<span class="n">DOWNLOAD_ERROR</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"The image could not be downloaded."</span><span class="p">)</span>
|
||||
<span class="sd">"""TinEye error message"""</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online_url_search/tineye.html#searx.engines.tineye.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Build TinEye HTTP request using ``search_urls`` of a :py:obj:`engine_type`."""</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">][</span><span class="s1">'data:image'</span><span class="p">]:</span>
|
||||
<span class="n">query</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">][</span><span class="s1">'data:image'</span><span class="p">]</span>
|
||||
<span class="k">elif</span> <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">][</span><span class="s1">'http'</span><span class="p">]:</span>
|
||||
<span class="n">query</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'search_urls'</span><span class="p">][</span><span class="s1">'http'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"query URL: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">query</span><span class="p">)</span>
|
||||
<span class="n">query</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">query</span><span class="p">})</span>
|
||||
|
||||
<span class="c1"># see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">search_string</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">query</span><span class="p">,</span> <span class="n">page</span><span class="o">=</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">])</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'Connection'</span><span class="p">:</span> <span class="s1">'keep-alive'</span><span class="p">,</span>
|
||||
<span class="s1">'Accept-Encoding'</span><span class="p">:</span> <span class="s1">'gzip, defalte, br'</span><span class="p">,</span>
|
||||
<span class="s1">'Host'</span><span class="p">:</span> <span class="s1">'tineye.com'</span><span class="p">,</span>
|
||||
<span class="s1">'DNT'</span><span class="p">:</span> <span class="s1">'1'</span><span class="p">,</span>
|
||||
<span class="s1">'TE'</span><span class="p">:</span> <span class="s1">'trailers'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="parse_tineye_match"><a class="viewcode-back" href="../../../dev/engines/online_url_search/tineye.html#searx.engines.tineye.parse_tineye_match">[docs]</a><span class="k">def</span> <span class="nf">parse_tineye_match</span><span class="p">(</span><span class="n">match_json</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Takes parsed JSON from the API server and turns it into a :py:obj:`dict`</span>
|
||||
<span class="sd"> object.</span>
|
||||
|
||||
<span class="sd"> Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__</span>
|
||||
|
||||
<span class="sd"> - `image_url`, link to the result image.</span>
|
||||
<span class="sd"> - `domain`, domain this result was found on.</span>
|
||||
<span class="sd"> - `score`, a number (0 to 100) that indicates how closely the images match.</span>
|
||||
<span class="sd"> - `width`, image width in pixels.</span>
|
||||
<span class="sd"> - `height`, image height in pixels.</span>
|
||||
<span class="sd"> - `size`, image area in pixels.</span>
|
||||
<span class="sd"> - `format`, image format.</span>
|
||||
<span class="sd"> - `filesize`, image size in bytes.</span>
|
||||
<span class="sd"> - `overlay`, overlay URL.</span>
|
||||
<span class="sd"> - `tags`, whether this match belongs to a collection or stock domain.</span>
|
||||
|
||||
<span class="sd"> - `backlinks`, a list of Backlink objects pointing to the original websites</span>
|
||||
<span class="sd"> and image URLs. List items are instances of :py:obj:`dict`, (`Backlink</span>
|
||||
<span class="sd"> <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):</span>
|
||||
|
||||
<span class="sd"> - `url`, the image URL to the image.</span>
|
||||
<span class="sd"> - `backlink`, the original website URL.</span>
|
||||
<span class="sd"> - `crawl_date`, the date the image was crawled.</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="c1"># HINT: there exists an alternative backlink dict in the domains list / e.g.::</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># match_json['domains'][0]['backlinks']</span>
|
||||
|
||||
<span class="n">backlinks</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">if</span> <span class="s2">"backlinks"</span> <span class="ow">in</span> <span class="n">match_json</span><span class="p">:</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">backlink_json</span> <span class="ow">in</span> <span class="n">match_json</span><span class="p">[</span><span class="s2">"backlinks"</span><span class="p">]:</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">backlink_json</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">crawl_date</span> <span class="o">=</span> <span class="n">backlink_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"crawl_date"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">crawl_date</span><span class="p">:</span>
|
||||
<span class="n">crawl_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromisoformat</span><span class="p">(</span><span class="n">crawl_date</span><span class="p">[:</span><span class="o">-</span><span class="mi">3</span><span class="p">])</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">crawl_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">min</span>
|
||||
|
||||
<span class="n">backlinks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">backlink_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"url"</span><span class="p">),</span>
|
||||
<span class="s1">'backlink'</span><span class="p">:</span> <span class="n">backlink_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"backlink"</span><span class="p">),</span>
|
||||
<span class="s1">'crawl_date'</span><span class="p">:</span> <span class="n">crawl_date</span><span class="p">,</span>
|
||||
<span class="s1">'image_name'</span><span class="p">:</span> <span class="n">backlink_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"image_name"</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="p">{</span>
|
||||
<span class="s1">'image_url'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"image_url"</span><span class="p">),</span>
|
||||
<span class="s1">'domain'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"domain"</span><span class="p">),</span>
|
||||
<span class="s1">'score'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"score"</span><span class="p">),</span>
|
||||
<span class="s1">'width'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"width"</span><span class="p">),</span>
|
||||
<span class="s1">'height'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"height"</span><span class="p">),</span>
|
||||
<span class="s1">'size'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"size"</span><span class="p">),</span>
|
||||
<span class="s1">'image_format'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"format"</span><span class="p">),</span>
|
||||
<span class="s1">'filesize'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"filesize"</span><span class="p">),</span>
|
||||
<span class="s1">'overlay'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"overlay"</span><span class="p">),</span>
|
||||
<span class="s1">'tags'</span><span class="p">:</span> <span class="n">match_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"tags"</span><span class="p">),</span>
|
||||
<span class="s1">'backlinks'</span><span class="p">:</span> <span class="n">backlinks</span><span class="p">,</span>
|
||||
<span class="p">}</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online_url_search/tineye.html#searx.engines.tineye.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Parse HTTP response from TinEye."""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">json_data</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
|
||||
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
|
||||
<span class="n">msg</span> <span class="o">=</span> <span class="s2">"can't parse JSON response // </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">exc</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
|
||||
<span class="n">json_data</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'error'</span><span class="p">:</span> <span class="n">msg</span><span class="p">}</span>
|
||||
|
||||
<span class="c1"># handle error codes from Tineye</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">is_error</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="ow">in</span> <span class="p">(</span><span class="mi">400</span><span class="p">,</span> <span class="mi">422</span><span class="p">):</span>
|
||||
|
||||
<span class="n">message</span> <span class="o">=</span> <span class="s1">'HTTP status: </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span>
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">json_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'error'</span><span class="p">)</span>
|
||||
<span class="n">s_key</span> <span class="o">=</span> <span class="n">json_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'suggestions'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'key'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">error</span> <span class="ow">and</span> <span class="n">s_key</span><span class="p">:</span>
|
||||
<span class="n">message</span> <span class="o">=</span> <span class="s2">"</span><span class="si">%s</span><span class="s2"> (</span><span class="si">%s</span><span class="s2">)"</span> <span class="o">%</span> <span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="n">s_key</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">error</span><span class="p">:</span>
|
||||
<span class="n">message</span> <span class="o">=</span> <span class="n">error</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">s_key</span> <span class="o">==</span> <span class="s2">"Invalid image URL"</span><span class="p">:</span>
|
||||
<span class="c1"># test https://docs.searxng.org/_static/searxng-wordmark.svg</span>
|
||||
<span class="n">message</span> <span class="o">=</span> <span class="n">FORMAT_NOT_SUPPORTED</span>
|
||||
<span class="k">elif</span> <span class="n">s_key</span> <span class="o">==</span> <span class="s1">'NO_SIGNATURE_ERROR'</span><span class="p">:</span>
|
||||
<span class="c1"># test https://pngimg.com/uploads/dot/dot_PNG4.png</span>
|
||||
<span class="n">message</span> <span class="o">=</span> <span class="n">NO_SIGNATURE_ERROR</span>
|
||||
<span class="k">elif</span> <span class="n">s_key</span> <span class="o">==</span> <span class="s1">'Download Error'</span><span class="p">:</span>
|
||||
<span class="c1"># test https://notexists</span>
|
||||
<span class="n">message</span> <span class="o">=</span> <span class="n">DOWNLOAD_ERROR</span>
|
||||
|
||||
<span class="c1"># see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023</span>
|
||||
<span class="c1"># results.append({'answer': message})</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">message</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
<span class="n">resp</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># append results from matches</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">match_json</span> <span class="ow">in</span> <span class="n">json_data</span><span class="p">[</span><span class="s1">'matches'</span><span class="p">]:</span>
|
||||
|
||||
<span class="n">tineye_match</span> <span class="o">=</span> <span class="n">parse_tineye_match</span><span class="p">(</span><span class="n">match_json</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'backlinks'</span><span class="p">]:</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">backlink</span> <span class="o">=</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'backlinks'</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'images.html'</span><span class="p">,</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'backlink'</span><span class="p">],</span>
|
||||
<span class="s1">'thumbnail_src'</span><span class="p">:</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'image_url'</span><span class="p">],</span>
|
||||
<span class="s1">'source'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'image_name'</span><span class="p">],</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'url'</span><span class="p">],</span>
|
||||
<span class="s1">'format'</span><span class="p">:</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'image_format'</span><span class="p">],</span>
|
||||
<span class="s1">'widht'</span><span class="p">:</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'width'</span><span class="p">],</span>
|
||||
<span class="s1">'height'</span><span class="p">:</span> <span class="n">tineye_match</span><span class="p">[</span><span class="s1">'height'</span><span class="p">],</span>
|
||||
<span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">backlink</span><span class="p">[</span><span class="s1">'crawl_date'</span><span class="p">],</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="c1"># append number of results</span>
|
||||
|
||||
<span class="n">number_of_results</span> <span class="o">=</span> <span class="n">json_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'num_matches'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">number_of_results</span><span class="p">:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'number_of_results'</span><span class="p">:</span> <span class="n">number_of_results</span><span class="p">})</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
355
_modules/searx/engines/torznab.html
Normal file
355
_modules/searx/engines/torznab.html
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.torznab — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.torznab</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.torznab</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Torznab_ is an API specification that provides a standardized way to query</span>
|
||||
<span class="sd">torrent site for content. It is used by a number of torrent applications,</span>
|
||||
<span class="sd">including Prowlarr_ and Jackett_.</span>
|
||||
|
||||
<span class="sd">Using this engine together with Prowlarr_ or Jackett_ allows you to search</span>
|
||||
<span class="sd">a huge number of torrent sites which are not directly supported.</span>
|
||||
|
||||
<span class="sd">Configuration</span>
|
||||
<span class="sd">=============</span>
|
||||
|
||||
<span class="sd">The engine has the following settings:</span>
|
||||
|
||||
<span class="sd">``base_url``:</span>
|
||||
<span class="sd"> Torznab endpoint URL.</span>
|
||||
|
||||
<span class="sd">``api_key``:</span>
|
||||
<span class="sd"> The API key to use for authentication.</span>
|
||||
|
||||
<span class="sd">``torznab_categories``:</span>
|
||||
<span class="sd"> The categories to use for searching. This is a list of category IDs. See</span>
|
||||
<span class="sd"> Prowlarr-categories_ or Jackett-categories_ for more information.</span>
|
||||
|
||||
<span class="sd">``show_torrent_files``:</span>
|
||||
<span class="sd"> Whether to show the torrent file in the search results. Be carful as using</span>
|
||||
<span class="sd"> this with Prowlarr_ or Jackett_ leaks the API key. This should be used only</span>
|
||||
<span class="sd"> if you are querying a Torznab endpoint without authentication or if the</span>
|
||||
<span class="sd"> instance is private. Be aware that private trackers may ban you if you share</span>
|
||||
<span class="sd"> the torrent file. Defaults to ``false``.</span>
|
||||
|
||||
<span class="sd">``show_magnet_links``:</span>
|
||||
<span class="sd"> Whether to show the magnet link in the search results. Be aware that private</span>
|
||||
<span class="sd"> trackers may ban you if you share the magnet link. Defaults to ``true``.</span>
|
||||
|
||||
<span class="sd">.. _Torznab:</span>
|
||||
<span class="sd"> https://torznab.github.io/spec-1.3-draft/index.html</span>
|
||||
<span class="sd">.. _Prowlarr:</span>
|
||||
<span class="sd"> https://github.com/Prowlarr/Prowlarr</span>
|
||||
<span class="sd">.. _Jackett:</span>
|
||||
<span class="sd"> https://github.com/Jackett/Jackett</span>
|
||||
<span class="sd">.. _Prowlarr-categories:</span>
|
||||
<span class="sd"> https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories</span>
|
||||
<span class="sd">.. _Jackett-categories:</span>
|
||||
<span class="sd"> https://github.com/Jackett/Jackett/wiki/Jackett-Categories</span>
|
||||
|
||||
<span class="sd">Implementations</span>
|
||||
<span class="sd">===============</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Any</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">quote</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">etree</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.exceptions</span> <span class="kn">import</span> <span class="n">SearxEngineAPIException</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">httpx</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="c1"># engine settings</span>
|
||||
<span class="n">about</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s2">"https://torznab.github.io/spec-1.3-draft"</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'XML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="n">categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'files'</span><span class="p">]</span>
|
||||
<span class="n">paging</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="n">time_range_support</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="c1"># defined in settings.yml</span>
|
||||
<span class="c1"># example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"</span>
|
||||
<span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">api_key</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="c1"># https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories</span>
|
||||
<span class="n">torznab_categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">show_torrent_files</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="n">show_magnet_links</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="init"><a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.init">[docs]</a><span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span>
|
||||
<span class="w"> </span><span class="sd">"""Initialize the engine."""</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">base_url</span><span class="p">)</span> <span class="o"><</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'missing torznab base_url'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
|
||||
<span class="w"> </span><span class="sd">"""Build the request params."""</span>
|
||||
<span class="n">search_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s1">'?t=search&q=</span><span class="si">{search_query}</span><span class="s1">'</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">api_key</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">search_url</span> <span class="o">+=</span> <span class="s1">'&apikey=</span><span class="si">{api_key}</span><span class="s1">'</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">torznab_categories</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">search_url</span> <span class="o">+=</span> <span class="s1">'&cat=</span><span class="si">{torznab_categories}</span><span class="s1">'</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
|
||||
<span class="n">search_query</span><span class="o">=</span><span class="n">quote</span><span class="p">(</span><span class="n">query</span><span class="p">),</span> <span class="n">api_key</span><span class="o">=</span><span class="n">api_key</span><span class="p">,</span> <span class="n">torznab_categories</span><span class="o">=</span><span class="s2">","</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">torznab_categories</span><span class="p">])</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">:</span> <span class="n">httpx</span><span class="o">.</span><span class="n">Response</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span>
|
||||
<span class="w"> </span><span class="sd">"""Parse the XML response and return a list of results."""</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">search_results</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XML</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">content</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># handle errors: https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes</span>
|
||||
<span class="k">if</span> <span class="n">search_results</span><span class="o">.</span><span class="n">tag</span> <span class="o">==</span> <span class="s2">"error"</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="n">SearxEngineAPIException</span><span class="p">(</span><span class="n">search_results</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"description"</span><span class="p">))</span>
|
||||
|
||||
<span class="n">channel</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> <span class="o">=</span> <span class="n">search_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<span class="n">item</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span>
|
||||
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">channel</span><span class="o">.</span><span class="n">iterfind</span><span class="p">(</span><span class="s1">'item'</span><span class="p">):</span>
|
||||
<span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="n">build_result</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="build_result"><a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.build_result">[docs]</a><span class="k">def</span> <span class="nf">build_result</span><span class="p">(</span><span class="n">item</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
|
||||
<span class="w"> </span><span class="sd">"""Build a result from a XML item."""</span>
|
||||
|
||||
<span class="c1"># extract attributes from XML</span>
|
||||
<span class="c1"># see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes</span>
|
||||
<span class="n">enclosure</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'enclosure'</span><span class="p">)</span>
|
||||
<span class="n">enclosure_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">if</span> <span class="n">enclosure</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">enclosure_url</span> <span class="o">=</span> <span class="n">enclosure</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'url'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">size</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'size'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">size</span> <span class="ow">and</span> <span class="n">enclosure</span><span class="p">:</span>
|
||||
<span class="n">size</span> <span class="o">=</span> <span class="n">enclosure</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'length'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">size</span><span class="p">:</span>
|
||||
<span class="n">size</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">size</span><span class="p">)</span>
|
||||
|
||||
<span class="n">guid</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'guid'</span><span class="p">)</span>
|
||||
<span class="n">comments</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'comments'</span><span class="p">)</span>
|
||||
<span class="n">pubDate</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'pubDate'</span><span class="p">)</span>
|
||||
<span class="n">seeders</span> <span class="o">=</span> <span class="n">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'seeders'</span><span class="p">)</span>
|
||||
<span class="n">leechers</span> <span class="o">=</span> <span class="n">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'leechers'</span><span class="p">)</span>
|
||||
<span class="n">peers</span> <span class="o">=</span> <span class="n">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'peers'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># map attributes to searx result</span>
|
||||
<span class="n">result</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'template'</span><span class="p">:</span> <span class="s1">'torrent.html'</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">),</span>
|
||||
<span class="s1">'filesize'</span><span class="p">:</span> <span class="n">size</span><span class="p">,</span>
|
||||
<span class="s1">'files'</span><span class="p">:</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'files'</span><span class="p">),</span>
|
||||
<span class="s1">'seed'</span><span class="p">:</span> <span class="n">seeders</span><span class="p">,</span>
|
||||
<span class="s1">'leech'</span><span class="p">:</span> <span class="n">_map_leechers</span><span class="p">(</span><span class="n">leechers</span><span class="p">,</span> <span class="n">seeders</span><span class="p">,</span> <span class="n">peers</span><span class="p">),</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">_map_result_url</span><span class="p">(</span><span class="n">guid</span><span class="p">,</span> <span class="n">comments</span><span class="p">),</span>
|
||||
<span class="s1">'publishedDate'</span><span class="p">:</span> <span class="n">_map_published_date</span><span class="p">(</span><span class="n">pubDate</span><span class="p">),</span>
|
||||
<span class="s1">'torrentfile'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s1">'magnetlink'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">link</span> <span class="o">=</span> <span class="n">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'link'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">show_torrent_files</span><span class="p">:</span>
|
||||
<span class="n">result</span><span class="p">[</span><span class="s1">'torrentfile'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_map_torrent_file</span><span class="p">(</span><span class="n">link</span><span class="p">,</span> <span class="n">enclosure_url</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">show_magnet_links</span><span class="p">:</span>
|
||||
<span class="n">magneturl</span> <span class="o">=</span> <span class="n">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'magneturl'</span><span class="p">)</span>
|
||||
<span class="n">result</span><span class="p">[</span><span class="s1">'magnetlink'</span><span class="p">]</span> <span class="o">=</span> <span class="n">_map_magnet_link</span><span class="p">(</span><span class="n">magneturl</span><span class="p">,</span> <span class="n">guid</span><span class="p">,</span> <span class="n">enclosure_url</span><span class="p">,</span> <span class="n">link</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">result</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_map_result_url</span><span class="p">(</span><span class="n">guid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">comments</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">guid</span> <span class="ow">and</span> <span class="n">guid</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">guid</span>
|
||||
<span class="k">if</span> <span class="n">comments</span> <span class="ow">and</span> <span class="n">comments</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">comments</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_map_leechers</span><span class="p">(</span><span class="n">leechers</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">seeders</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">peers</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">leechers</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">leechers</span>
|
||||
<span class="k">if</span> <span class="n">seeders</span> <span class="ow">and</span> <span class="n">peers</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">peers</span><span class="p">)</span> <span class="o">-</span> <span class="nb">int</span><span class="p">(</span><span class="n">seeders</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_map_published_date</span><span class="p">(</span><span class="n">pubDate</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">datetime</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">pubDate</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">pubDate</span><span class="p">,</span> <span class="s1">'</span><span class="si">%a</span><span class="s1">, </span><span class="si">%d</span><span class="s1"> %b %Y %H:%M:%S %z'</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="p">(</span><span class="ne">ValueError</span><span class="p">,</span> <span class="ne">TypeError</span><span class="p">)</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"ignore exception (publishedDate): </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_map_torrent_file</span><span class="p">(</span><span class="n">link</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">enclosure_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">link</span> <span class="ow">and</span> <span class="n">link</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">link</span>
|
||||
<span class="k">if</span> <span class="n">enclosure_url</span> <span class="ow">and</span> <span class="n">enclosure_url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'http'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">enclosure_url</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_map_magnet_link</span><span class="p">(</span>
|
||||
<span class="n">magneturl</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">guid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">enclosure_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">link</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">magneturl</span> <span class="ow">and</span> <span class="n">magneturl</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'magnet'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">magneturl</span>
|
||||
<span class="k">if</span> <span class="n">guid</span> <span class="ow">and</span> <span class="n">guid</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'magnet'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">guid</span>
|
||||
<span class="k">if</span> <span class="n">enclosure_url</span> <span class="ow">and</span> <span class="n">enclosure_url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'magnet'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">enclosure_url</span>
|
||||
<span class="k">if</span> <span class="n">link</span> <span class="ow">and</span> <span class="n">link</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'magnet'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">link</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_attribute"><a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.get_attribute">[docs]</a><span class="k">def</span> <span class="nf">get_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span><span class="p">,</span> <span class="n">property_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""Get attribute from item."""</span>
|
||||
<span class="n">property_element</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="n">property_name</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">property_element</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">property_element</span><span class="o">.</span><span class="n">text</span>
|
||||
<span class="k">return</span> <span class="kc">None</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_torznab_attribute"><a class="viewcode-back" href="../../../dev/engines/online/torznab.html#searx.engines.torznab.get_torznab_attribute">[docs]</a><span class="k">def</span> <span class="nf">get_torznab_attribute</span><span class="p">(</span><span class="n">item</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span><span class="p">,</span> <span class="n">attribute_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""Get torznab special attribute from item."""</span>
|
||||
<span class="n">element</span><span class="p">:</span> <span class="n">etree</span><span class="o">.</span><span class="n">Element</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span>
|
||||
<span class="s1">'.//torznab:attr[@name="</span><span class="si">{attribute_name}</span><span class="s1">"]'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">attribute_name</span><span class="o">=</span><span class="n">attribute_name</span><span class="p">),</span>
|
||||
<span class="p">{</span><span class="s1">'torznab'</span><span class="p">:</span> <span class="s1">'http://torznab.com/schemas/2015/feed'</span><span class="p">},</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">element</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">element</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="kc">None</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
895
_modules/searx/engines/wikidata.html
Normal file
895
_modules/searx/engines/wikidata.html
Normal file
|
|
@ -0,0 +1,895 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.wikidata — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.wikidata</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.wikidata</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This module implements the Wikidata engine. Some implementations are shared</span>
|
||||
<span class="sd">from :ref:`wikipedia engine`.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
<span class="c1"># pylint: disable=missing-class-docstring</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">from</span> <span class="nn">hashlib</span> <span class="kn">import</span> <span class="n">md5</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span><span class="p">,</span> <span class="n">unquote</span>
|
||||
<span class="kn">from</span> <span class="nn">json</span> <span class="kn">import</span> <span class="n">loads</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">dateutil.parser</span> <span class="kn">import</span> <span class="n">isoparse</span>
|
||||
<span class="kn">from</span> <span class="nn">babel.dates</span> <span class="kn">import</span> <span class="n">format_datetime</span><span class="p">,</span> <span class="n">format_date</span><span class="p">,</span> <span class="n">format_time</span><span class="p">,</span> <span class="n">get_datetime_format</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">WIKIDATA_UNITS</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">post</span><span class="p">,</span> <span class="n">get</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">searx_useragent</span><span class="p">,</span> <span class="n">get_string_replaces_function</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.external_urls</span> <span class="kn">import</span> <span class="n">get_external_url</span><span class="p">,</span> <span class="n">get_earth_coordinates_url</span><span class="p">,</span> <span class="n">area_to_osm_zoom</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.engines.wikipedia</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">fetch_wikimedia_traits</span><span class="p">,</span>
|
||||
<span class="n">get_wiki_params</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://wikidata.org/'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q2013'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://query.wikidata.org/'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># SPARQL</span>
|
||||
<span class="n">SPARQL_ENDPOINT_URL</span> <span class="o">=</span> <span class="s1">'https://query.wikidata.org/sparql'</span>
|
||||
<span class="n">SPARQL_EXPLAIN_URL</span> <span class="o">=</span> <span class="s1">'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'</span>
|
||||
<span class="n">WIKIDATA_PROPERTIES</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'P434'</span><span class="p">:</span> <span class="s1">'MusicBrainz'</span><span class="p">,</span>
|
||||
<span class="s1">'P435'</span><span class="p">:</span> <span class="s1">'MusicBrainz'</span><span class="p">,</span>
|
||||
<span class="s1">'P436'</span><span class="p">:</span> <span class="s1">'MusicBrainz'</span><span class="p">,</span>
|
||||
<span class="s1">'P966'</span><span class="p">:</span> <span class="s1">'MusicBrainz'</span><span class="p">,</span>
|
||||
<span class="s1">'P345'</span><span class="p">:</span> <span class="s1">'IMDb'</span><span class="p">,</span>
|
||||
<span class="s1">'P2397'</span><span class="p">:</span> <span class="s1">'YouTube'</span><span class="p">,</span>
|
||||
<span class="s1">'P1651'</span><span class="p">:</span> <span class="s1">'YouTube'</span><span class="p">,</span>
|
||||
<span class="s1">'P2002'</span><span class="p">:</span> <span class="s1">'Twitter'</span><span class="p">,</span>
|
||||
<span class="s1">'P2013'</span><span class="p">:</span> <span class="s1">'Facebook'</span><span class="p">,</span>
|
||||
<span class="s1">'P2003'</span><span class="p">:</span> <span class="s1">'Instagram'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># SERVICE wikibase:mwapi : https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI</span>
|
||||
<span class="c1"># SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE</span>
|
||||
<span class="c1"># https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates</span>
|
||||
<span class="c1"># https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model</span>
|
||||
<span class="c1"># optimization:</span>
|
||||
<span class="c1"># * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization</span>
|
||||
<span class="c1"># * https://github.com/blazegraph/database/wiki/QueryHints</span>
|
||||
<span class="n">QUERY_TEMPLATE</span> <span class="o">=</span> <span class="s2">"""</span>
|
||||
<span class="s2">SELECT ?item ?itemLabel ?itemDescription ?lat ?long %SELECT%</span>
|
||||
<span class="s2">WHERE</span>
|
||||
<span class="s2">{</span>
|
||||
<span class="s2"> SERVICE wikibase:mwapi {</span>
|
||||
<span class="s2"> bd:serviceParam wikibase:endpoint "www.wikidata.org";</span>
|
||||
<span class="s2"> wikibase:api "EntitySearch";</span>
|
||||
<span class="s2"> wikibase:limit 1;</span>
|
||||
<span class="s2"> mwapi:search "%QUERY%";</span>
|
||||
<span class="s2"> mwapi:language "%LANGUAGE%".</span>
|
||||
<span class="s2"> ?item wikibase:apiOutputItem mwapi:item.</span>
|
||||
<span class="s2"> }</span>
|
||||
<span class="s2"> hint:Prior hint:runFirst "true".</span>
|
||||
|
||||
<span class="s2"> %WHERE%</span>
|
||||
|
||||
<span class="s2"> SERVICE wikibase:label {</span>
|
||||
<span class="s2"> bd:serviceParam wikibase:language "%LANGUAGE%,en".</span>
|
||||
<span class="s2"> ?item rdfs:label ?itemLabel .</span>
|
||||
<span class="s2"> ?item schema:description ?itemDescription .</span>
|
||||
<span class="s2"> %WIKIBASE_LABELS%</span>
|
||||
<span class="s2"> }</span>
|
||||
|
||||
<span class="s2">}</span>
|
||||
<span class="s2">GROUP BY ?item ?itemLabel ?itemDescription ?lat ?long </span><span class="si">%G</span><span class="s2">ROUP_BY%</span>
|
||||
<span class="s2">"""</span>
|
||||
|
||||
<span class="c1"># Get the calendar names and the property names</span>
|
||||
<span class="n">QUERY_PROPERTY_NAMES</span> <span class="o">=</span> <span class="s2">"""</span>
|
||||
<span class="s2">SELECT ?item ?name</span>
|
||||
<span class="s2">WHERE {</span>
|
||||
<span class="s2"> {</span>
|
||||
<span class="s2"> SELECT ?item</span>
|
||||
<span class="s2"> WHERE { ?item wdt:P279* wd:Q12132 }</span>
|
||||
<span class="s2"> } UNION {</span>
|
||||
<span class="s2"> VALUES ?item { %ATTRIBUTES% }</span>
|
||||
<span class="s2"> }</span>
|
||||
<span class="s2"> OPTIONAL { ?item rdfs:label ?name. }</span>
|
||||
<span class="s2">}</span>
|
||||
<span class="s2">"""</span>
|
||||
|
||||
<span class="c1"># see the property "dummy value" of https://www.wikidata.org/wiki/Q2013 (Wikidata)</span>
|
||||
<span class="c1"># hard coded here to avoid to an additional SPARQL request when the server starts</span>
|
||||
<span class="n">DUMMY_ENTITY_URLS</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span>
|
||||
<span class="s2">"http://www.wikidata.org/entity/"</span> <span class="o">+</span> <span class="n">wid</span> <span class="k">for</span> <span class="n">wid</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">"Q4115189"</span><span class="p">,</span> <span class="s2">"Q13406268"</span><span class="p">,</span> <span class="s2">"Q15397819"</span><span class="p">,</span> <span class="s2">"Q17339402"</span><span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
|
||||
<span class="c1"># https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1</span>
|
||||
<span class="c1"># https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html</span>
|
||||
<span class="n">sparql_string_escape</span> <span class="o">=</span> <span class="n">get_string_replaces_function</span><span class="p">(</span>
|
||||
<span class="c1"># fmt: off</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'</span><span class="se">\t</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\t</span><span class="s1">'</span><span class="p">,</span>
|
||||
<span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\n</span><span class="s1">'</span><span class="p">,</span>
|
||||
<span class="s1">'</span><span class="se">\r</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\r</span><span class="s1">'</span><span class="p">,</span>
|
||||
<span class="s1">'</span><span class="se">\b</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\b</span><span class="s1">'</span><span class="p">,</span>
|
||||
<span class="s1">'</span><span class="se">\f</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\f</span><span class="s1">'</span><span class="p">,</span>
|
||||
<span class="s1">'</span><span class="se">\"</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\"</span><span class="s1">'</span><span class="p">,</span>
|
||||
<span class="s1">'</span><span class="se">\'</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\'</span><span class="s1">'</span><span class="p">,</span>
|
||||
<span class="s1">'</span><span class="se">\\</span><span class="s1">'</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\\\</span><span class="s1">'</span>
|
||||
<span class="p">}</span>
|
||||
<span class="c1"># fmt: on</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">replace_http_by_https</span> <span class="o">=</span> <span class="n">get_string_replaces_function</span><span class="p">({</span><span class="s1">'http:'</span><span class="p">:</span> <span class="s1">'https:'</span><span class="p">})</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_headers</span><span class="p">():</span>
|
||||
<span class="c1"># user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits</span>
|
||||
<span class="k">return</span> <span class="p">{</span><span class="s1">'Accept'</span><span class="p">:</span> <span class="s1">'application/sparql-results+json'</span><span class="p">,</span> <span class="s1">'User-Agent'</span><span class="p">:</span> <span class="n">searx_useragent</span><span class="p">()}</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_label_for_entity</span><span class="p">(</span><span class="n">entity_id</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="n">name</span> <span class="o">=</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">entity_id</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">name</span> <span class="o">=</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">get</span><span class="p">((</span><span class="n">entity_id</span><span class="p">,</span> <span class="n">language</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">name</span> <span class="o">=</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">get</span><span class="p">((</span><span class="n">entity_id</span><span class="p">,</span> <span class="n">language</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]))</span>
|
||||
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">name</span> <span class="o">=</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">get</span><span class="p">((</span><span class="n">entity_id</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">name</span> <span class="o">=</span> <span class="n">entity_id</span>
|
||||
<span class="k">return</span> <span class="n">name</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">send_wikidata_query</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'GET'</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'GET'</span><span class="p">:</span>
|
||||
<span class="c1"># query will be cached by wikidata</span>
|
||||
<span class="n">http_response</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">SPARQL_ENDPOINT_URL</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">}),</span> <span class="n">headers</span><span class="o">=</span><span class="n">get_headers</span><span class="p">())</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># query won't be cached by wikidata</span>
|
||||
<span class="n">http_response</span> <span class="o">=</span> <span class="n">post</span><span class="p">(</span><span class="n">SPARQL_ENDPOINT_URL</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">},</span> <span class="n">headers</span><span class="o">=</span><span class="n">get_headers</span><span class="p">())</span>
|
||||
<span class="k">if</span> <span class="n">http_response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">!=</span> <span class="mi">200</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'SPARQL endpoint error </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">http_response</span><span class="o">.</span><span class="n">content</span><span class="o">.</span><span class="n">decode</span><span class="p">())</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'request time </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">http_response</span><span class="o">.</span><span class="n">elapsed</span><span class="p">))</span>
|
||||
<span class="n">http_response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">loads</span><span class="p">(</span><span class="n">http_response</span><span class="o">.</span><span class="n">content</span><span class="o">.</span><span class="n">decode</span><span class="p">())</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
|
||||
<span class="n">eng_tag</span><span class="p">,</span> <span class="n">_wiki_netloc</span> <span class="o">=</span> <span class="n">get_wiki_params</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="p">)</span>
|
||||
<span class="n">query</span><span class="p">,</span> <span class="n">attributes</span> <span class="o">=</span> <span class="n">get_query</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">)</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"request --> language </span><span class="si">%s</span><span class="s2"> // len(attributes): </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">attributes</span><span class="p">))</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'method'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'POST'</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">SPARQL_ENDPOINT_URL</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'data'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">}</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_headers</span><span class="p">()</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'attributes'</span><span class="p">]</span> <span class="o">=</span> <span class="n">attributes</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">jsonresponse</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">content</span><span class="o">.</span><span class="n">decode</span><span class="p">())</span>
|
||||
|
||||
<span class="n">language</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span>
|
||||
<span class="n">attributes</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">search_params</span><span class="p">[</span><span class="s1">'attributes'</span><span class="p">]</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"request --> language </span><span class="si">%s</span><span class="s2"> // len(attributes): </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">language</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">attributes</span><span class="p">))</span>
|
||||
|
||||
<span class="n">seen_entities</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">jsonresponse</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'results'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'bindings'</span><span class="p">,</span> <span class="p">[]):</span>
|
||||
<span class="n">attribute_result</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">value</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">result</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||||
<span class="n">entity_url</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="p">[</span><span class="s1">'item'</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">entity_url</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">seen_entities</span> <span class="ow">and</span> <span class="n">entity_url</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">DUMMY_ENTITY_URLS</span><span class="p">:</span>
|
||||
<span class="n">seen_entities</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">entity_url</span><span class="p">)</span>
|
||||
<span class="n">results</span> <span class="o">+=</span> <span class="n">get_results</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">,</span> <span class="n">attributes</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'The SPARQL request returns duplicate entities: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">))</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<span class="n">_IMG_SRC_DEFAULT_URL_PREFIX</span> <span class="o">=</span> <span class="s2">"https://commons.wikimedia.org/wiki/Special:FilePath/"</span>
|
||||
<span class="n">_IMG_SRC_NEW_URL_PREFIX</span> <span class="o">=</span> <span class="s2">"https://upload.wikimedia.org/wikipedia/commons/thumb/"</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_thumbnail"><a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikidata.get_thumbnail">[docs]</a><span class="k">def</span> <span class="nf">get_thumbnail</span><span class="p">(</span><span class="n">img_src</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get Thumbnail image from wikimedia commons</span>
|
||||
|
||||
<span class="sd"> Images from commons.wikimedia.org are (HTTP) redirected to</span>
|
||||
<span class="sd"> upload.wikimedia.org. The redirected URL can be calculated by this</span>
|
||||
<span class="sd"> function.</span>
|
||||
|
||||
<span class="sd"> - https://stackoverflow.com/a/33691240</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'get_thumbnail(): </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">img_src</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">img_src</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">_IMG_SRC_DEFAULT_URL_PREFIX</span> <span class="ow">in</span> <span class="n">img_src</span><span class="o">.</span><span class="n">split</span><span class="p">()[</span><span class="mi">0</span><span class="p">]:</span>
|
||||
<span class="n">img_src_name</span> <span class="o">=</span> <span class="n">unquote</span><span class="p">(</span><span class="n">img_src</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">_IMG_SRC_DEFAULT_URL_PREFIX</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"?"</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"%20"</span><span class="p">,</span> <span class="s2">"_"</span><span class="p">))</span>
|
||||
<span class="n">img_src_name_first</span> <span class="o">=</span> <span class="n">img_src_name</span>
|
||||
<span class="n">img_src_name_second</span> <span class="o">=</span> <span class="n">img_src_name</span>
|
||||
|
||||
<span class="k">if</span> <span class="s2">".svg"</span> <span class="ow">in</span> <span class="n">img_src_name</span><span class="o">.</span><span class="n">split</span><span class="p">()[</span><span class="mi">0</span><span class="p">]:</span>
|
||||
<span class="n">img_src_name_second</span> <span class="o">=</span> <span class="n">img_src_name</span> <span class="o">+</span> <span class="s2">".png"</span>
|
||||
|
||||
<span class="n">img_src_size</span> <span class="o">=</span> <span class="n">img_src</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">_IMG_SRC_DEFAULT_URL_PREFIX</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"?"</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">img_src_size</span> <span class="o">=</span> <span class="n">img_src_size</span><span class="p">[</span><span class="n">img_src_size</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s2">"="</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span> <span class="p">:</span> <span class="n">img_src_size</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s2">"&"</span><span class="p">)]</span>
|
||||
<span class="n">img_src_name_md5</span> <span class="o">=</span> <span class="n">md5</span><span class="p">(</span><span class="n">img_src_name</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">"utf-8"</span><span class="p">))</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span>
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="n">_IMG_SRC_NEW_URL_PREFIX</span>
|
||||
<span class="o">+</span> <span class="n">img_src_name_md5</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="o">+</span> <span class="s2">"/"</span>
|
||||
<span class="o">+</span> <span class="n">img_src_name_md5</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">2</span><span class="p">]</span>
|
||||
<span class="o">+</span> <span class="s2">"/"</span>
|
||||
<span class="o">+</span> <span class="n">img_src_name_first</span>
|
||||
<span class="o">+</span> <span class="s2">"/"</span>
|
||||
<span class="o">+</span> <span class="n">img_src_size</span>
|
||||
<span class="o">+</span> <span class="s2">"px-"</span>
|
||||
<span class="o">+</span> <span class="n">img_src_name_second</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'get_thumbnail() redirected: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">img_src</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">img_src</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_results</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">,</span> <span class="n">attributes</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="c1"># pylint: disable=too-many-branches</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">infobox_title</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'itemLabel'</span><span class="p">)</span>
|
||||
<span class="n">infobox_id</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="p">[</span><span class="s1">'item'</span><span class="p">]</span>
|
||||
<span class="n">infobox_id_lang</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">infobox_urls</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">infobox_attributes</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">infobox_content</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'itemDescription'</span><span class="p">,</span> <span class="p">[])</span>
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">img_src_priority</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">attribute</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">:</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_str</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">value</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span>
|
||||
<span class="n">attribute_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">attribute</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">attribute_type</span> <span class="ow">in</span> <span class="p">(</span><span class="n">WDURLAttribute</span><span class="p">,</span> <span class="n">WDArticle</span><span class="p">):</span>
|
||||
<span class="c1"># get_select() method : there is group_concat(distinct ...;separator=", ")</span>
|
||||
<span class="c1"># split the value here</span>
|
||||
<span class="k">for</span> <span class="n">url</span> <span class="ow">in</span> <span class="n">value</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">', '</span><span class="p">):</span>
|
||||
<span class="n">infobox_urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_label</span><span class="p">(</span><span class="n">language</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="o">**</span><span class="n">attribute</span><span class="o">.</span><span class="n">kwargs</span><span class="p">})</span>
|
||||
<span class="c1"># "normal" results (not infobox) include official website and Wikipedia links.</span>
|
||||
<span class="k">if</span> <span class="n">attribute</span><span class="o">.</span><span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'official'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">attribute_type</span> <span class="o">==</span> <span class="n">WDArticle</span><span class="p">:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">infobox_title</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="n">infobox_content</span><span class="p">})</span>
|
||||
<span class="c1"># update the infobox_id with the wikipedia URL</span>
|
||||
<span class="c1"># first the local wikipedia URL, and as fallback the english wikipedia URL</span>
|
||||
<span class="k">if</span> <span class="n">attribute_type</span> <span class="o">==</span> <span class="n">WDArticle</span> <span class="ow">and</span> <span class="p">(</span>
|
||||
<span class="p">(</span><span class="n">attribute</span><span class="o">.</span><span class="n">language</span> <span class="o">==</span> <span class="s1">'en'</span> <span class="ow">and</span> <span class="n">infobox_id_lang</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="n">attribute</span><span class="o">.</span><span class="n">language</span> <span class="o">!=</span> <span class="s1">'en'</span>
|
||||
<span class="p">):</span>
|
||||
<span class="n">infobox_id_lang</span> <span class="o">=</span> <span class="n">attribute</span><span class="o">.</span><span class="n">language</span>
|
||||
<span class="n">infobox_id</span> <span class="o">=</span> <span class="n">url</span>
|
||||
<span class="k">elif</span> <span class="n">attribute_type</span> <span class="o">==</span> <span class="n">WDImageAttribute</span><span class="p">:</span>
|
||||
<span class="c1"># this attribute is an image.</span>
|
||||
<span class="c1"># replace the current image only the priority is lower</span>
|
||||
<span class="c1"># (the infobox contain only one image).</span>
|
||||
<span class="k">if</span> <span class="n">attribute</span><span class="o">.</span><span class="n">priority</span> <span class="o">></span> <span class="n">img_src_priority</span><span class="p">:</span>
|
||||
<span class="n">img_src</span> <span class="o">=</span> <span class="n">get_thumbnail</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||||
<span class="n">img_src_priority</span> <span class="o">=</span> <span class="n">attribute</span><span class="o">.</span><span class="n">priority</span>
|
||||
<span class="k">elif</span> <span class="n">attribute_type</span> <span class="o">==</span> <span class="n">WDGeoAttribute</span><span class="p">:</span>
|
||||
<span class="c1"># geocoordinate link</span>
|
||||
<span class="c1"># use the area to get the OSM zoom</span>
|
||||
<span class="c1"># Note: ignre the unit (must be km² otherwise the calculation is wrong)</span>
|
||||
<span class="c1"># Should use normalized value p:P2046/psn:P2046/wikibase:quantityAmount</span>
|
||||
<span class="n">area</span> <span class="o">=</span> <span class="n">attribute_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'P2046'</span><span class="p">)</span>
|
||||
<span class="n">osm_zoom</span> <span class="o">=</span> <span class="n">area_to_osm_zoom</span><span class="p">(</span><span class="n">area</span><span class="p">)</span> <span class="k">if</span> <span class="n">area</span> <span class="k">else</span> <span class="mi">19</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_geo_url</span><span class="p">(</span><span class="n">attribute_result</span><span class="p">,</span> <span class="n">osm_zoom</span><span class="o">=</span><span class="n">osm_zoom</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">url</span><span class="p">:</span>
|
||||
<span class="n">infobox_urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_label</span><span class="p">(</span><span class="n">language</span><span class="p">),</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">name</span><span class="p">})</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">infobox_attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span><span class="s1">'label'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">get_label</span><span class="p">(</span><span class="n">language</span><span class="p">),</span> <span class="s1">'value'</span><span class="p">:</span> <span class="n">value</span><span class="p">,</span> <span class="s1">'entity'</span><span class="p">:</span> <span class="n">attribute</span><span class="o">.</span><span class="n">name</span><span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">infobox_id</span><span class="p">:</span>
|
||||
<span class="n">infobox_id</span> <span class="o">=</span> <span class="n">replace_http_by_https</span><span class="p">(</span><span class="n">infobox_id</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># add the wikidata URL at the end</span>
|
||||
<span class="n">infobox_urls</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'title'</span><span class="p">:</span> <span class="s1">'Wikidata'</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">attribute_result</span><span class="p">[</span><span class="s1">'item'</span><span class="p">]})</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">img_src</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">infobox_attributes</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">infobox_urls</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">infobox_content</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">infobox_urls</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'url'</span><span class="p">],</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">infobox_title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">infobox_content</span><span class="p">})</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'infobox'</span><span class="p">:</span> <span class="n">infobox_title</span><span class="p">,</span>
|
||||
<span class="s1">'id'</span><span class="p">:</span> <span class="n">infobox_id</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">infobox_content</span><span class="p">,</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">img_src</span><span class="p">,</span>
|
||||
<span class="s1">'urls'</span><span class="p">:</span> <span class="n">infobox_urls</span><span class="p">,</span>
|
||||
<span class="s1">'attributes'</span><span class="p">:</span> <span class="n">infobox_attributes</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_query</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="n">attributes</span> <span class="o">=</span> <span class="n">get_attributes</span><span class="p">(</span><span class="n">language</span><span class="p">)</span>
|
||||
<span class="n">select</span> <span class="o">=</span> <span class="p">[</span><span class="n">a</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">]</span>
|
||||
<span class="n">where</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">s</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="p">[</span><span class="n">a</span><span class="o">.</span><span class="n">get_where</span><span class="p">()</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">]))</span>
|
||||
<span class="n">wikibase_label</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">s</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="p">[</span><span class="n">a</span><span class="o">.</span><span class="n">get_wikibase_label</span><span class="p">()</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">]))</span>
|
||||
<span class="n">group_by</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">s</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="p">[</span><span class="n">a</span><span class="o">.</span><span class="n">get_group_by</span><span class="p">()</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">attributes</span><span class="p">]))</span>
|
||||
<span class="n">query</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="n">QUERY_TEMPLATE</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%QUERY%'</span><span class="p">,</span> <span class="n">sparql_string_escape</span><span class="p">(</span><span class="n">query</span><span class="p">))</span>
|
||||
<span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%SELECT%'</span><span class="p">,</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">select</span><span class="p">))</span>
|
||||
<span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%WHERE%'</span><span class="p">,</span> <span class="s1">'</span><span class="se">\n</span><span class="s1"> '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">where</span><span class="p">))</span>
|
||||
<span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%WIKIBASE_LABELS%'</span><span class="p">,</span> <span class="s1">'</span><span class="se">\n</span><span class="s1"> '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">wikibase_label</span><span class="p">))</span>
|
||||
<span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">%G</span><span class="s1">ROUP_BY%'</span><span class="p">,</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">group_by</span><span class="p">))</span>
|
||||
<span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%LANGUAGE%'</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">query</span><span class="p">,</span> <span class="n">attributes</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_attributes</span><span class="p">(</span><span class="n">language</span><span class="p">):</span>
|
||||
<span class="c1"># pylint: disable=too-many-statements</span>
|
||||
<span class="n">attributes</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">add_value</span><span class="p">(</span><span class="n">name</span><span class="p">):</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">add_amount</span><span class="p">(</span><span class="n">name</span><span class="p">):</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDAmountAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">add_label</span><span class="p">(</span><span class="n">name</span><span class="p">):</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDLabelAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">add_url</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDURLAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">add_image</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDImageAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="p">,</span> <span class="n">priority</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">add_date</span><span class="p">(</span><span class="n">name</span><span class="p">):</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDDateAttribute</span><span class="p">(</span><span class="n">name</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># Dates</span>
|
||||
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">[</span>
|
||||
<span class="s1">'P571'</span><span class="p">,</span> <span class="c1"># inception date</span>
|
||||
<span class="s1">'P576'</span><span class="p">,</span> <span class="c1"># dissolution date</span>
|
||||
<span class="s1">'P580'</span><span class="p">,</span> <span class="c1"># start date</span>
|
||||
<span class="s1">'P582'</span><span class="p">,</span> <span class="c1"># end date</span>
|
||||
<span class="s1">'P569'</span><span class="p">,</span> <span class="c1"># date of birth</span>
|
||||
<span class="s1">'P570'</span><span class="p">,</span> <span class="c1"># date of death</span>
|
||||
<span class="s1">'P619'</span><span class="p">,</span> <span class="c1"># date of spacecraft launch</span>
|
||||
<span class="s1">'P620'</span><span class="p">,</span>
|
||||
<span class="p">]:</span> <span class="c1"># date of spacecraft landing</span>
|
||||
<span class="n">add_date</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">[</span>
|
||||
<span class="s1">'P27'</span><span class="p">,</span> <span class="c1"># country of citizenship</span>
|
||||
<span class="s1">'P495'</span><span class="p">,</span> <span class="c1"># country of origin</span>
|
||||
<span class="s1">'P17'</span><span class="p">,</span> <span class="c1"># country</span>
|
||||
<span class="s1">'P159'</span><span class="p">,</span>
|
||||
<span class="p">]:</span> <span class="c1"># headquarters location</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Places</span>
|
||||
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">[</span>
|
||||
<span class="s1">'P36'</span><span class="p">,</span> <span class="c1"># capital</span>
|
||||
<span class="s1">'P35'</span><span class="p">,</span> <span class="c1"># head of state</span>
|
||||
<span class="s1">'P6'</span><span class="p">,</span> <span class="c1"># head of government</span>
|
||||
<span class="s1">'P122'</span><span class="p">,</span> <span class="c1"># basic form of government</span>
|
||||
<span class="s1">'P37'</span><span class="p">,</span>
|
||||
<span class="p">]:</span> <span class="c1"># official language</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P1082'</span><span class="p">)</span> <span class="c1"># population</span>
|
||||
<span class="n">add_amount</span><span class="p">(</span><span class="s1">'P2046'</span><span class="p">)</span> <span class="c1"># area</span>
|
||||
<span class="n">add_amount</span><span class="p">(</span><span class="s1">'P281'</span><span class="p">)</span> <span class="c1"># postal code</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P38'</span><span class="p">)</span> <span class="c1"># currency</span>
|
||||
<span class="n">add_amount</span><span class="p">(</span><span class="s1">'P2048'</span><span class="p">)</span> <span class="c1"># height (building)</span>
|
||||
|
||||
<span class="c1"># Media</span>
|
||||
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">[</span>
|
||||
<span class="s1">'P400'</span><span class="p">,</span> <span class="c1"># platform (videogames, computing)</span>
|
||||
<span class="s1">'P50'</span><span class="p">,</span> <span class="c1"># author</span>
|
||||
<span class="s1">'P170'</span><span class="p">,</span> <span class="c1"># creator</span>
|
||||
<span class="s1">'P57'</span><span class="p">,</span> <span class="c1"># director</span>
|
||||
<span class="s1">'P175'</span><span class="p">,</span> <span class="c1"># performer</span>
|
||||
<span class="s1">'P178'</span><span class="p">,</span> <span class="c1"># developer</span>
|
||||
<span class="s1">'P162'</span><span class="p">,</span> <span class="c1"># producer</span>
|
||||
<span class="s1">'P176'</span><span class="p">,</span> <span class="c1"># manufacturer</span>
|
||||
<span class="s1">'P58'</span><span class="p">,</span> <span class="c1"># screenwriter</span>
|
||||
<span class="s1">'P272'</span><span class="p">,</span> <span class="c1"># production company</span>
|
||||
<span class="s1">'P264'</span><span class="p">,</span> <span class="c1"># record label</span>
|
||||
<span class="s1">'P123'</span><span class="p">,</span> <span class="c1"># publisher</span>
|
||||
<span class="s1">'P449'</span><span class="p">,</span> <span class="c1"># original network</span>
|
||||
<span class="s1">'P750'</span><span class="p">,</span> <span class="c1"># distributed by</span>
|
||||
<span class="s1">'P86'</span><span class="p">,</span>
|
||||
<span class="p">]:</span> <span class="c1"># composer</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="n">add_date</span><span class="p">(</span><span class="s1">'P577'</span><span class="p">)</span> <span class="c1"># publication date</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P136'</span><span class="p">)</span> <span class="c1"># genre (music, film, artistic...)</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P364'</span><span class="p">)</span> <span class="c1"># original language</span>
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P212'</span><span class="p">)</span> <span class="c1"># ISBN-13</span>
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P957'</span><span class="p">)</span> <span class="c1"># ISBN-10</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P275'</span><span class="p">)</span> <span class="c1"># copyright license</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P277'</span><span class="p">)</span> <span class="c1"># programming language</span>
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P348'</span><span class="p">)</span> <span class="c1"># version</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P840'</span><span class="p">)</span> <span class="c1"># narrative location</span>
|
||||
|
||||
<span class="c1"># Languages</span>
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P1098'</span><span class="p">)</span> <span class="c1"># number of speakers</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P282'</span><span class="p">)</span> <span class="c1"># writing system</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P1018'</span><span class="p">)</span> <span class="c1"># language regulatory body</span>
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P218'</span><span class="p">)</span> <span class="c1"># language code (ISO 639-1)</span>
|
||||
|
||||
<span class="c1"># Other</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P169'</span><span class="p">)</span> <span class="c1"># ceo</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P112'</span><span class="p">)</span> <span class="c1"># founded by</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P1454'</span><span class="p">)</span> <span class="c1"># legal form (company, organization)</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P137'</span><span class="p">)</span> <span class="c1"># operator (service, facility, ...)</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P1029'</span><span class="p">)</span> <span class="c1"># crew members (tripulation)</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P225'</span><span class="p">)</span> <span class="c1"># taxon name</span>
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P274'</span><span class="p">)</span> <span class="c1"># chemical formula</span>
|
||||
<span class="n">add_label</span><span class="p">(</span><span class="s1">'P1346'</span><span class="p">)</span> <span class="c1"># winner (sports, contests, ...)</span>
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P1120'</span><span class="p">)</span> <span class="c1"># number of deaths</span>
|
||||
<span class="n">add_value</span><span class="p">(</span><span class="s1">'P498'</span><span class="p">)</span> <span class="c1"># currency code (ISO 4217)</span>
|
||||
|
||||
<span class="c1"># URL</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P856'</span><span class="p">,</span> <span class="n">official</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="c1"># official website</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDArticle</span><span class="p">(</span><span class="n">language</span><span class="p">))</span> <span class="c1"># wikipedia (user language)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">language</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'en'</span><span class="p">):</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDArticle</span><span class="p">(</span><span class="s1">'en'</span><span class="p">))</span> <span class="c1"># wikipedia (english)</span>
|
||||
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P1324'</span><span class="p">)</span> <span class="c1"># source code repository</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P1581'</span><span class="p">)</span> <span class="c1"># blog</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P434'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'musicbrainz_artist'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P435'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'musicbrainz_work'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P436'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'musicbrainz_release_group'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P966'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'musicbrainz_label'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P345'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'imdb_id'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P2397'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'youtube_channel'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P1651'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'youtube_video'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P2002'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'twitter_profile'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P2013'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'facebook_profile'</span><span class="p">)</span>
|
||||
<span class="n">add_url</span><span class="p">(</span><span class="s1">'P2003'</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'instagram_profile'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Map</span>
|
||||
<span class="n">attributes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">WDGeoAttribute</span><span class="p">(</span><span class="s1">'P625'</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># Image</span>
|
||||
<span class="n">add_image</span><span class="p">(</span><span class="s1">'P15'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># route map</span>
|
||||
<span class="n">add_image</span><span class="p">(</span><span class="s1">'P242'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># locator map</span>
|
||||
<span class="n">add_image</span><span class="p">(</span><span class="s1">'P154'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># logo</span>
|
||||
<span class="n">add_image</span><span class="p">(</span><span class="s1">'P18'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># image</span>
|
||||
<span class="n">add_image</span><span class="p">(</span><span class="s1">'P41'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># flag</span>
|
||||
<span class="n">add_image</span><span class="p">(</span><span class="s1">'P2716'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">6</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># collage</span>
|
||||
<span class="n">add_image</span><span class="p">(</span><span class="s1">'P2910'</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">7</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="s1">'wikimedia_image'</span><span class="p">)</span> <span class="c1"># icon</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">attributes</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">WDAttribute</span><span class="p">:</span>
|
||||
<span class="vm">__slots__</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'name'</span><span class="p">,)</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s1">'(group_concat(distinct ?</span><span class="si">{name}</span><span class="s1">;separator=", ") as ?</span><span class="si">{name}</span><span class="s1">s)'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_label</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">get_label_for_entity</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"OPTIONAL { ?item wdt:</span><span class="si">{name}</span><span class="s2"> ?</span><span class="si">{name}</span><span class="s2"> . }"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_wikibase_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">""</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">""</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span>
|
||||
<span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'s'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s1">'<'</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span> <span class="o">+</span> <span class="s1">':'</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'>'</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">WDAmountAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s1">'?</span><span class="si">{name}</span><span class="s1"> ?</span><span class="si">{name}</span><span class="s1">Unit'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">""" OPTIONAL { ?item p:</span><span class="si">{name}</span><span class="s2"> ?</span><span class="si">{name}</span><span class="s2">Node .</span>
|
||||
<span class="s2"> ?</span><span class="si">{name}</span><span class="s2">Node rdf:type wikibase:BestRank ; ps:</span><span class="si">{name}</span><span class="s2"> ?</span><span class="si">{name}</span><span class="s2"> .</span>
|
||||
<span class="s2"> OPTIONAL { ?</span><span class="si">{name}</span><span class="s2">Node psv:</span><span class="si">{name}</span><span class="s2">/wikibase:quantityUnit ?</span><span class="si">{name}</span><span class="s2">Unit. } }"""</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span>
|
||||
<span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
<span class="n">unit</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s2">"Unit"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">unit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">unit</span> <span class="o">=</span> <span class="n">unit</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'http://www.wikidata.org/entity/'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">value</span> <span class="o">+</span> <span class="s2">" "</span> <span class="o">+</span> <span class="n">get_label_for_entity</span><span class="p">(</span><span class="n">unit</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">value</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">WDArticle</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span>
|
||||
|
||||
<span class="vm">__slots__</span> <span class="o">=</span> <span class="s1">'language'</span><span class="p">,</span> <span class="s1">'kwargs'</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">language</span><span class="p">,</span> <span class="n">kwargs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="s1">'wikipedia'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">language</span> <span class="o">=</span> <span class="n">language</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">kwargs</span> <span class="o">=</span> <span class="n">kwargs</span> <span class="ow">or</span> <span class="p">{}</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_label</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="c1"># language parameter is ignored</span>
|
||||
<span class="k">return</span> <span class="s2">"Wikipedia (</span><span class="si">{language}</span><span class="s2">)"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{language}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">language</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"?article</span><span class="si">{language}</span><span class="s2"> ?articleName</span><span class="si">{language}</span><span class="s2">"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{language}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">language</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"""OPTIONAL { ?article</span><span class="si">{language}</span><span class="s2"> schema:about ?item ;</span>
|
||||
<span class="s2"> schema:inLanguage "</span><span class="si">{language}</span><span class="s2">" ;</span>
|
||||
<span class="s2"> schema:isPartOf <https://</span><span class="si">{language}</span><span class="s2">.wikipedia.org/> ;</span>
|
||||
<span class="s2"> schema:name ?articleName</span><span class="si">{language}</span><span class="s2"> . }"""</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span>
|
||||
<span class="s1">'</span><span class="si">{language}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">language</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="n">key</span> <span class="o">=</span> <span class="s1">'article</span><span class="si">{language}</span><span class="s1">'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{language}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">language</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">WDLabelAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s1">'(group_concat(distinct ?</span><span class="si">{name}</span><span class="s1">Label;separator=", ") as ?</span><span class="si">{name}</span><span class="s1">Labels)'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"OPTIONAL { ?item wdt:</span><span class="si">{name}</span><span class="s2"> ?</span><span class="si">{name}</span><span class="s2"> . }"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_wikibase_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"?</span><span class="si">{name}</span><span class="s2"> rdfs:label ?</span><span class="si">{name}</span><span class="s2">Label ."</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Labels'</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">WDURLAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span>
|
||||
|
||||
<span class="n">HTTP_WIKIMEDIA_IMAGE</span> <span class="o">=</span> <span class="s1">'http://commons.wikimedia.org/wiki/Special:FilePath/'</span>
|
||||
|
||||
<span class="vm">__slots__</span> <span class="o">=</span> <span class="s1">'url_id'</span><span class="p">,</span> <span class="s1">'kwargs'</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">kwargs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">url_id</span> <span class="o">=</span> <span class="n">url_id</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">kwargs</span> <span class="o">=</span> <span class="n">kwargs</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'s'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">url_id</span> <span class="ow">and</span> <span class="n">value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">value</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">:</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">','</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">url_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">url_id</span>
|
||||
<span class="k">if</span> <span class="n">value</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="n">WDURLAttribute</span><span class="o">.</span><span class="n">HTTP_WIKIMEDIA_IMAGE</span><span class="p">):</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">value</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">WDURLAttribute</span><span class="o">.</span><span class="n">HTTP_WIKIMEDIA_IMAGE</span><span class="p">)</span> <span class="p">:]</span>
|
||||
<span class="n">url_id</span> <span class="o">=</span> <span class="s1">'wikimedia_image'</span>
|
||||
<span class="k">return</span> <span class="n">get_external_url</span><span class="p">(</span><span class="n">url_id</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">value</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">WDGeoAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">get_label</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"OpenStreetMap"</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"?</span><span class="si">{name}</span><span class="s2">Lat ?</span><span class="si">{name}</span><span class="s2">Long"</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"""OPTIONAL { ?item p:</span><span class="si">{name}</span><span class="s2">/psv:</span><span class="si">{name}</span><span class="s2"> [</span>
|
||||
<span class="s2"> wikibase:geoLatitude ?</span><span class="si">{name}</span><span class="s2">Lat ;</span>
|
||||
<span class="s2"> wikibase:geoLongitude ?</span><span class="si">{name}</span><span class="s2">Long ] }"""</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span>
|
||||
<span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="n">latitude</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Lat'</span><span class="p">)</span>
|
||||
<span class="n">longitude</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Long'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">latitude</span> <span class="ow">and</span> <span class="n">longitude</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">latitude</span> <span class="o">+</span> <span class="s1">' '</span> <span class="o">+</span> <span class="n">longitude</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_geo_url</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">osm_zoom</span><span class="o">=</span><span class="mi">19</span><span class="p">):</span>
|
||||
<span class="n">latitude</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Lat'</span><span class="p">)</span>
|
||||
<span class="n">longitude</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'Long'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">latitude</span> <span class="ow">and</span> <span class="n">longitude</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">get_earth_coordinates_url</span><span class="p">(</span><span class="n">latitude</span><span class="p">,</span> <span class="n">longitude</span><span class="p">,</span> <span class="n">osm_zoom</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">WDImageAttribute</span><span class="p">(</span><span class="n">WDURLAttribute</span><span class="p">):</span>
|
||||
|
||||
<span class="vm">__slots__</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'priority'</span><span class="p">,)</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">priority</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">url_id</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">priority</span> <span class="o">=</span> <span class="n">priority</span>
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">WDDateAttribute</span><span class="p">(</span><span class="n">WDAttribute</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">get_select</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s1">'?</span><span class="si">{name}</span><span class="s1"> ?</span><span class="si">{name}</span><span class="s1">timePrecision ?</span><span class="si">{name}</span><span class="s1">timeZone ?</span><span class="si">{name}</span><span class="s1">timeCalendar'</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_where</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="c1"># To remove duplicate, add</span>
|
||||
<span class="c1"># FILTER NOT EXISTS { ?item p:{name}/psv:{name}/wikibase:timeValue ?{name}bis FILTER (?{name}bis < ?{name}) }</span>
|
||||
<span class="c1"># this filter is too slow, so the response function ignore duplicate results</span>
|
||||
<span class="c1"># (see the seen_entities variable)</span>
|
||||
<span class="k">return</span> <span class="s2">"""OPTIONAL { ?item p:</span><span class="si">{name}</span><span class="s2">/psv:</span><span class="si">{name}</span><span class="s2"> [</span>
|
||||
<span class="s2"> wikibase:timeValue ?</span><span class="si">{name}</span><span class="s2"> ;</span>
|
||||
<span class="s2"> wikibase:timePrecision ?</span><span class="si">{name}</span><span class="s2">timePrecision ;</span>
|
||||
<span class="s2"> wikibase:timeTimezone ?</span><span class="si">{name}</span><span class="s2">timeZone ;</span>
|
||||
<span class="s2"> wikibase:timeCalendarModel ?</span><span class="si">{name}</span><span class="s2">timeCalendar ] . }</span>
|
||||
<span class="s2"> hint:Prior hint:rangeSafe true;"""</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span>
|
||||
<span class="s1">'</span><span class="si">{name}</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_group_by</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_select</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">format_8</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span>
|
||||
<span class="c1"># precision: less than a year</span>
|
||||
<span class="k">return</span> <span class="n">value</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">format_9</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span>
|
||||
<span class="n">year</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||||
<span class="c1"># precision: year</span>
|
||||
<span class="k">if</span> <span class="n">year</span> <span class="o"><</span> <span class="mi">1584</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">year</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">year</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">year</span><span class="p">)</span>
|
||||
<span class="n">timestamp</span> <span class="o">=</span> <span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">format_date</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'yyyy'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">format_10</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span>
|
||||
<span class="c1"># precision: month</span>
|
||||
<span class="n">timestamp</span> <span class="o">=</span> <span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">format_date</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'MMMM y'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">format_11</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span>
|
||||
<span class="c1"># precision: day</span>
|
||||
<span class="n">timestamp</span> <span class="o">=</span> <span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">format_date</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'full'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">format_13</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span>
|
||||
<span class="n">timestamp</span> <span class="o">=</span> <span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||||
<span class="c1"># precision: minute</span>
|
||||
<span class="k">return</span> <span class="p">(</span>
|
||||
<span class="n">get_datetime_format</span><span class="p">(</span><span class="nb">format</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span>
|
||||
<span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"'"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span>
|
||||
<span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{0}</span><span class="s1">'</span><span class="p">,</span> <span class="n">format_time</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="s1">'full'</span><span class="p">,</span> <span class="n">tzinfo</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">))</span>
|
||||
<span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{1}</span><span class="s1">'</span><span class="p">,</span> <span class="n">format_date</span><span class="p">(</span><span class="n">timestamp</span><span class="p">,</span> <span class="s1">'short'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">))</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">format_14</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">locale</span><span class="p">):</span>
|
||||
<span class="c1"># precision: second.</span>
|
||||
<span class="k">return</span> <span class="n">format_datetime</span><span class="p">(</span><span class="n">isoparse</span><span class="p">(</span><span class="n">value</span><span class="p">),</span> <span class="nb">format</span><span class="o">=</span><span class="s1">'full'</span><span class="p">,</span> <span class="n">locale</span><span class="o">=</span><span class="n">locale</span><span class="p">)</span>
|
||||
|
||||
<span class="n">DATE_FORMAT</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'0'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">1000000000</span><span class="p">),</span>
|
||||
<span class="s1">'1'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">100000000</span><span class="p">),</span>
|
||||
<span class="s1">'2'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">10000000</span><span class="p">),</span>
|
||||
<span class="s1">'3'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">),</span>
|
||||
<span class="s1">'4'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">100000</span><span class="p">),</span>
|
||||
<span class="s1">'5'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">10000</span><span class="p">),</span>
|
||||
<span class="s1">'6'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">1000</span><span class="p">),</span>
|
||||
<span class="s1">'7'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">100</span><span class="p">),</span>
|
||||
<span class="s1">'8'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_8'</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span>
|
||||
<span class="s1">'9'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_9'</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="c1"># year</span>
|
||||
<span class="s1">'10'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_10'</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="c1"># month</span>
|
||||
<span class="s1">'11'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_11'</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="c1"># day</span>
|
||||
<span class="s1">'12'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_13'</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="c1"># hour (not supported by babel, display minute)</span>
|
||||
<span class="s1">'13'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_13'</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="c1"># minute</span>
|
||||
<span class="s1">'14'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'format_14'</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="c1"># second</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">get_str</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">result</span><span class="p">,</span> <span class="n">language</span><span class="p">):</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">value</span> <span class="o">==</span> <span class="s1">''</span> <span class="ow">or</span> <span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
<span class="n">precision</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">+</span> <span class="s1">'timePrecision'</span><span class="p">)</span>
|
||||
<span class="n">date_format</span> <span class="o">=</span> <span class="n">WDDateAttribute</span><span class="o">.</span><span class="n">DATE_FORMAT</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">precision</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">date_format</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">format_method</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">date_format</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="n">precision</span> <span class="o">=</span> <span class="n">date_format</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">precision</span> <span class="o">>=</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">t</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">value</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'-'</span><span class="p">):</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="n">t</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="n">t</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="n">format_method</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">language</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
|
||||
<span class="k">return</span> <span class="n">value</span>
|
||||
<span class="k">return</span> <span class="n">value</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">debug_explain_wikidata_query</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'GET'</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'GET'</span><span class="p">:</span>
|
||||
<span class="n">http_response</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">SPARQL_EXPLAIN_URL</span> <span class="o">+</span> <span class="s1">'&'</span> <span class="o">+</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">}),</span> <span class="n">headers</span><span class="o">=</span><span class="n">get_headers</span><span class="p">())</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">http_response</span> <span class="o">=</span> <span class="n">post</span><span class="p">(</span><span class="n">SPARQL_EXPLAIN_URL</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">'query'</span><span class="p">:</span> <span class="n">query</span><span class="p">},</span> <span class="n">headers</span><span class="o">=</span><span class="n">get_headers</span><span class="p">())</span>
|
||||
<span class="n">http_response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">http_response</span><span class="o">.</span><span class="n">content</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> <span class="c1"># pylint: disable=unused-argument</span>
|
||||
<span class="c1"># WIKIDATA_PROPERTIES : add unit symbols</span>
|
||||
<span class="n">WIKIDATA_PROPERTIES</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">WIKIDATA_UNITS</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># WIKIDATA_PROPERTIES : add property labels</span>
|
||||
<span class="n">wikidata_property_names</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">attribute</span> <span class="ow">in</span> <span class="n">get_attributes</span><span class="p">(</span><span class="s1">'en'</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">attribute</span><span class="p">)</span> <span class="ow">in</span> <span class="p">(</span><span class="n">WDAttribute</span><span class="p">,</span> <span class="n">WDAmountAttribute</span><span class="p">,</span> <span class="n">WDURLAttribute</span><span class="p">,</span> <span class="n">WDDateAttribute</span><span class="p">,</span> <span class="n">WDLabelAttribute</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">attribute</span><span class="o">.</span><span class="n">name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">WIKIDATA_PROPERTIES</span><span class="p">:</span>
|
||||
<span class="n">wikidata_property_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s2">"wd:"</span> <span class="o">+</span> <span class="n">attribute</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
|
||||
<span class="n">query</span> <span class="o">=</span> <span class="n">QUERY_PROPERTY_NAMES</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'%ATTRIBUTES%'</span><span class="p">,</span> <span class="s2">" "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">wikidata_property_names</span><span class="p">))</span>
|
||||
<span class="n">jsonresponse</span> <span class="o">=</span> <span class="n">send_wikidata_query</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">jsonresponse</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'results'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'bindings'</span><span class="p">,</span> <span class="p">{}):</span>
|
||||
<span class="n">name</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">][</span><span class="s1">'value'</span><span class="p">]</span>
|
||||
<span class="n">lang</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'name'</span><span class="p">][</span><span class="s1">'xml:lang'</span><span class="p">]</span>
|
||||
<span class="n">entity_id</span> <span class="o">=</span> <span class="n">result</span><span class="p">[</span><span class="s1">'item'</span><span class="p">][</span><span class="s1">'value'</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'http://www.wikidata.org/entity/'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
<span class="n">WIKIDATA_PROPERTIES</span><span class="p">[(</span><span class="n">entity_id</span><span class="p">,</span> <span class="n">lang</span><span class="p">)]</span> <span class="o">=</span> <span class="n">name</span><span class="o">.</span><span class="n">capitalize</span><span class="p">()</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikidata.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Uses languages evaluated from :py:obj:`wikipedia.fetch_wikimedia_traits</span>
|
||||
<span class="sd"> <searx.engines.wikipedia.fetch_wikimedia_traits>` and removes</span>
|
||||
|
||||
<span class="sd"> - ``traits.custom['wiki_netloc']``: wikidata does not have net-locations for</span>
|
||||
<span class="sd"> the languages and the list of all</span>
|
||||
|
||||
<span class="sd"> - ``traits.custom['WIKIPEDIA_LANGUAGES']``: not used in the wikipedia engine</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">fetch_wikimedia_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">)</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
429
_modules/searx/engines/wikipedia.html
Normal file
429
_modules/searx/engines/wikipedia.html
Normal file
|
|
@ -0,0 +1,429 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.wikipedia — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.wikipedia</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.wikipedia</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""This module implements the Wikipedia engine. Some of this implementations</span>
|
||||
<span class="sd">are shared by other engines:</span>
|
||||
|
||||
<span class="sd">- :ref:`wikidata engine`</span>
|
||||
|
||||
<span class="sd">The list of supported languages is :py:obj:`fetched <fetch_wikimedia_traits>` from</span>
|
||||
<span class="sd">the article linked by :py:obj:`list_of_wikipedias`.</span>
|
||||
|
||||
<span class="sd">Unlike traditional search engines, wikipedia does not support one Wikipedia for</span>
|
||||
<span class="sd">all languages, but there is one Wikipedia for each supported language. Some of</span>
|
||||
<span class="sd">these Wikipedias have a LanguageConverter_ enabled</span>
|
||||
<span class="sd">(:py:obj:`rest_v1_summary_url`).</span>
|
||||
|
||||
<span class="sd">A LanguageConverter_ (LC) is a system based on language variants that</span>
|
||||
<span class="sd">automatically converts the content of a page into a different variant. A variant</span>
|
||||
<span class="sd">is mostly the same language in a different script.</span>
|
||||
|
||||
<span class="sd">- `Wikipedias in multiple writing systems`_</span>
|
||||
<span class="sd">- `Automatic conversion between traditional and simplified Chinese characters`_</span>
|
||||
|
||||
<span class="sd">PR-2554_:</span>
|
||||
<span class="sd"> The Wikipedia link returned by the API is still the same in all cases</span>
|
||||
<span class="sd"> (`https://zh.wikipedia.org/wiki/出租車`_) but if your browser's</span>
|
||||
<span class="sd"> ``Accept-Language`` is set to any of ``zh``, ``zh-CN``, ``zh-TW``, ``zh-HK``</span>
|
||||
<span class="sd"> or .. Wikipedia's LC automatically returns the desired script in their</span>
|
||||
<span class="sd"> web-page.</span>
|
||||
|
||||
<span class="sd"> - You can test the API here: https://reqbin.com/gesg2kvx</span>
|
||||
|
||||
<span class="sd">.. _https://zh.wikipedia.org/wiki/出租車:</span>
|
||||
<span class="sd"> https://zh.wikipedia.org/wiki/%E5%87%BA%E7%A7%9F%E8%BB%8A</span>
|
||||
|
||||
<span class="sd">To support Wikipedia's LanguageConverter_, a SearXNG request to Wikipedia uses</span>
|
||||
<span class="sd">:py:obj:`get_wiki_params` and :py:obj:`wiki_lc_locale_variants' in the</span>
|
||||
<span class="sd">:py:obj:`fetch_wikimedia_traits` function.</span>
|
||||
|
||||
<span class="sd">To test in SearXNG, query for ``!wp 出租車`` with each of the available Chinese</span>
|
||||
<span class="sd">options:</span>
|
||||
|
||||
<span class="sd">- ``!wp 出租車 :zh`` should show 出租車</span>
|
||||
<span class="sd">- ``!wp 出租車 :zh-CN`` should show 出租车</span>
|
||||
<span class="sd">- ``!wp 出租車 :zh-TW`` should show 計程車</span>
|
||||
<span class="sd">- ``!wp 出租車 :zh-HK`` should show 的士</span>
|
||||
<span class="sd">- ``!wp 出租車 :zh-SG`` should show 德士</span>
|
||||
|
||||
<span class="sd">.. _LanguageConverter:</span>
|
||||
<span class="sd"> https://www.mediawiki.org/wiki/Writing_systems#LanguageConverter</span>
|
||||
<span class="sd">.. _Wikipedias in multiple writing systems:</span>
|
||||
<span class="sd"> https://meta.wikimedia.org/wiki/Wikipedias_in_multiple_writing_systems</span>
|
||||
<span class="sd">.. _Automatic conversion between traditional and simplified Chinese characters:</span>
|
||||
<span class="sd"> https://en.wikipedia.org/wiki/Chinese_Wikipedia#Automatic_conversion_between_traditional_and_simplified_Chinese_characters</span>
|
||||
<span class="sd">.. _PR-2554: https://github.com/searx/searx/pull/2554</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">urllib.parse</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">utils</span>
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">network</span> <span class="k">as</span> <span class="n">_network</span>
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">locales</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://www.wikipedia.org/'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s1">'Q52'</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://en.wikipedia.org/api/'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'JSON'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">send_accept_language_header</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="sd">"""The HTTP ``Accept-Language`` header is needed for wikis where</span>
|
||||
<span class="sd">LanguageConverter_ is enabled."""</span>
|
||||
|
||||
<span class="n">list_of_wikipedias</span> <span class="o">=</span> <span class="s1">'https://meta.wikimedia.org/wiki/List_of_Wikipedias'</span>
|
||||
<span class="sd">"""`List of all wikipedias <https://meta.wikimedia.org/wiki/List_of_Wikipedias>`_</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">wikipedia_article_depth</span> <span class="o">=</span> <span class="s1">'https://meta.wikimedia.org/wiki/Wikipedia_article_depth'</span>
|
||||
<span class="sd">"""The *editing depth* of Wikipedia is one of several possible rough indicators</span>
|
||||
<span class="sd">of the encyclopedia's collaborative quality, showing how frequently its articles</span>
|
||||
<span class="sd">are updated. The measurement of depth was introduced after some limitations of</span>
|
||||
<span class="sd">the classic measurement of article count were realized.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">rest_v1_summary_url</span> <span class="o">=</span> <span class="s1">'https://</span><span class="si">{wiki_netloc}</span><span class="s1">/api/rest_v1/page/summary/</span><span class="si">{title}</span><span class="s1">'</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd">`wikipedia rest_v1 summary API`_:</span>
|
||||
<span class="sd"> The summary response includes an extract of the first paragraph of the page in</span>
|
||||
<span class="sd"> plain text and HTML as well as the type of page. This is useful for page</span>
|
||||
<span class="sd"> previews (fka. Hovercards, aka. Popups) on the web and link previews in the</span>
|
||||
<span class="sd"> apps.</span>
|
||||
|
||||
<span class="sd">HTTP ``Accept-Language`` header (:py:obj:`send_accept_language_header`):</span>
|
||||
<span class="sd"> The desired language variant code for wikis where LanguageConverter_ is</span>
|
||||
<span class="sd"> enabled.</span>
|
||||
|
||||
<span class="sd">.. _wikipedia rest_v1 summary API:</span>
|
||||
<span class="sd"> https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">wiki_lc_locale_variants</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"zh"</span><span class="p">:</span> <span class="p">(</span>
|
||||
<span class="s2">"zh-CN"</span><span class="p">,</span>
|
||||
<span class="s2">"zh-HK"</span><span class="p">,</span>
|
||||
<span class="s2">"zh-MO"</span><span class="p">,</span>
|
||||
<span class="s2">"zh-MY"</span><span class="p">,</span>
|
||||
<span class="s2">"zh-SG"</span><span class="p">,</span>
|
||||
<span class="s2">"zh-TW"</span><span class="p">,</span>
|
||||
<span class="p">),</span>
|
||||
<span class="s2">"zh-classical"</span><span class="p">:</span> <span class="p">(</span><span class="s2">"zh-classical"</span><span class="p">,),</span>
|
||||
<span class="p">}</span>
|
||||
<span class="sd">"""Mapping rule of the LanguageConverter_ to map a language and its variants to</span>
|
||||
<span class="sd">a Locale (used in the HTTP ``Accept-Language`` header). For example see `LC</span>
|
||||
<span class="sd">Chinese`_.</span>
|
||||
|
||||
<span class="sd">.. _LC Chinese:</span>
|
||||
<span class="sd"> https://meta.wikimedia.org/wiki/Wikipedias_in_multiple_writing_systems#Chinese</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">wikipedia_script_variants</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"zh"</span><span class="p">:</span> <span class="p">(</span>
|
||||
<span class="s2">"zh_Hant"</span><span class="p">,</span>
|
||||
<span class="s2">"zh_Hans"</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_wiki_params"><a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikipedia.get_wiki_params">[docs]</a><span class="k">def</span> <span class="nf">get_wiki_params</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">eng_traits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Returns the Wikipedia language tag and the netloc that fits to the</span>
|
||||
<span class="sd"> ``sxng_locale``. To support LanguageConverter_ this function rates a locale</span>
|
||||
<span class="sd"> (region) higher than a language (compare :py:obj:`wiki_lc_locale_variants`).</span>
|
||||
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">sxng_locale</span><span class="p">,</span> <span class="s1">'en'</span><span class="p">))</span>
|
||||
<span class="n">wiki_netloc</span> <span class="o">=</span> <span class="n">eng_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="s1">'en.wikipedia.org'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">wiki_netloc</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikipedia.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Assemble a request (`wikipedia rest_v1 summary API`_)."""</span>
|
||||
<span class="k">if</span> <span class="n">query</span><span class="o">.</span><span class="n">islower</span><span class="p">():</span>
|
||||
<span class="n">query</span> <span class="o">=</span> <span class="n">query</span><span class="o">.</span><span class="n">title</span><span class="p">()</span>
|
||||
|
||||
<span class="n">_eng_tag</span><span class="p">,</span> <span class="n">wiki_netloc</span> <span class="o">=</span> <span class="n">get_wiki_params</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'searxng_locale'</span><span class="p">],</span> <span class="n">traits</span><span class="p">)</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">quote</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">rest_v1_summary_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">wiki_netloc</span><span class="o">=</span><span class="n">wiki_netloc</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'soft_max_redirects'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">2</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<span class="c1"># get response from search-request</span>
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">404</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
<span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">400</span><span class="p">:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">api_result</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
|
||||
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span> <span class="c1"># pylint: disable=broad-except</span>
|
||||
<span class="k">pass</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="p">(</span>
|
||||
<span class="n">api_result</span><span class="p">[</span><span class="s1">'type'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request'</span>
|
||||
<span class="ow">and</span> <span class="n">api_result</span><span class="p">[</span><span class="s1">'detail'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'title-invalid-characters'</span>
|
||||
<span class="p">):</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">_network</span><span class="o">.</span><span class="n">raise_for_httperror</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
<span class="n">api_result</span> <span class="o">=</span> <span class="n">resp</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">html_to_text</span><span class="p">(</span><span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'titles'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'display'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">))</span>
|
||||
<span class="n">wikipedia_link</span> <span class="o">=</span> <span class="n">api_result</span><span class="p">[</span><span class="s1">'content_urls'</span><span class="p">][</span><span class="s1">'desktop'</span><span class="p">][</span><span class="s1">'page'</span><span class="p">]</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">wikipedia_link</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'description'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)})</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'type'</span><span class="p">)</span> <span class="o">==</span> <span class="s1">'standard'</span><span class="p">:</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'infobox'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'id'</span><span class="p">:</span> <span class="n">wikipedia_link</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'extract'</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span>
|
||||
<span class="s1">'img_src'</span><span class="p">:</span> <span class="n">api_result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'thumbnail'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'source'</span><span class="p">),</span>
|
||||
<span class="s1">'urls'</span><span class="p">:</span> <span class="p">[{</span><span class="s1">'title'</span><span class="p">:</span> <span class="s1">'Wikipedia'</span><span class="p">,</span> <span class="s1">'url'</span><span class="p">:</span> <span class="n">wikipedia_link</span><span class="p">}],</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<span class="c1"># Nonstandard language codes</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># These Wikipedias use language codes that do not conform to the ISO 639</span>
|
||||
<span class="c1"># standard (which is how wiki subdomains are chosen nowadays).</span>
|
||||
|
||||
<span class="n">lang_map</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">LOCALE_BEST_MATCH</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||||
<span class="n">lang_map</span><span class="o">.</span><span class="n">update</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'be-tarask'</span><span class="p">:</span> <span class="s1">'bel'</span><span class="p">,</span>
|
||||
<span class="s1">'ak'</span><span class="p">:</span> <span class="s1">'aka'</span><span class="p">,</span>
|
||||
<span class="s1">'als'</span><span class="p">:</span> <span class="s1">'gsw'</span><span class="p">,</span>
|
||||
<span class="s1">'bat-smg'</span><span class="p">:</span> <span class="s1">'sgs'</span><span class="p">,</span>
|
||||
<span class="s1">'cbk-zam'</span><span class="p">:</span> <span class="s1">'cbk'</span><span class="p">,</span>
|
||||
<span class="s1">'fiu-vro'</span><span class="p">:</span> <span class="s1">'vro'</span><span class="p">,</span>
|
||||
<span class="s1">'map-bms'</span><span class="p">:</span> <span class="s1">'map'</span><span class="p">,</span>
|
||||
<span class="s1">'no'</span><span class="p">:</span> <span class="s1">'nb-NO'</span><span class="p">,</span>
|
||||
<span class="s1">'nrm'</span><span class="p">:</span> <span class="s1">'nrf'</span><span class="p">,</span>
|
||||
<span class="s1">'roa-rup'</span><span class="p">:</span> <span class="s1">'rup'</span><span class="p">,</span>
|
||||
<span class="s1">'nds-nl'</span><span class="p">:</span> <span class="s1">'nds'</span><span class="p">,</span>
|
||||
<span class="c1">#'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple)</span>
|
||||
<span class="s1">'zh-min-nan'</span><span class="p">:</span> <span class="s1">'nan'</span><span class="p">,</span>
|
||||
<span class="s1">'zh-yue'</span><span class="p">:</span> <span class="s1">'yue'</span><span class="p">,</span>
|
||||
<span class="s1">'an'</span><span class="p">:</span> <span class="s1">'arg'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="n">fetch_wikimedia_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"WIKIPEDIA_LANGUAGES: </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="nb">len</span><span class="p">(</span><span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]))</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_wikimedia_traits"><a class="viewcode-back" href="../../../dev/engines/online/wikipedia.html#searx.engines.wikipedia.fetch_wikimedia_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_wikimedia_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages from Wikipedia. Not all languages from the</span>
|
||||
<span class="sd"> :py:obj:`list_of_wikipedias` are supported by SearXNG locales, only those</span>
|
||||
<span class="sd"> known from :py:obj:`searx.locales.LOCALE_NAMES` or those with a minimal</span>
|
||||
<span class="sd"> :py:obj:`editing depth <wikipedia_article_depth>`.</span>
|
||||
|
||||
<span class="sd"> The location of the Wikipedia address of a language is mapped in a</span>
|
||||
<span class="sd"> :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`</span>
|
||||
<span class="sd"> (``wiki_netloc``). Here is a reduced example:</span>
|
||||
|
||||
<span class="sd"> .. code:: python</span>
|
||||
|
||||
<span class="sd"> traits.custom['wiki_netloc'] = {</span>
|
||||
<span class="sd"> "en": "en.wikipedia.org",</span>
|
||||
<span class="sd"> ..</span>
|
||||
<span class="sd"> "gsw": "als.wikipedia.org",</span>
|
||||
<span class="sd"> ..</span>
|
||||
<span class="sd"> "zh": "zh.wikipedia.org",</span>
|
||||
<span class="sd"> "zh-classical": "zh-classical.wikipedia.org"</span>
|
||||
<span class="sd"> }</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="c1"># pylint: disable=too-many-branches</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="c1"># insert alias to map from a script or region to a wikipedia variant</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">sxng_tag_list</span> <span class="ow">in</span> <span class="n">wikipedia_script_variants</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="k">for</span> <span class="n">sxng_tag</span> <span class="ow">in</span> <span class="n">sxng_tag_list</span><span class="p">:</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||||
<span class="k">for</span> <span class="n">eng_tag</span><span class="p">,</span> <span class="n">sxng_tag_list</span> <span class="ow">in</span> <span class="n">wiki_lc_locale_variants</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="k">for</span> <span class="n">sxng_tag</span> <span class="ow">in</span> <span class="n">sxng_tag_list</span><span class="p">:</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">_network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">list_of_wikipedias</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from Wikipedia is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//table[contains(@class,"sortable")]//tbody/tr'</span><span class="p">):</span>
|
||||
|
||||
<span class="n">cols</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./td'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">cols</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">cols</span> <span class="o">=</span> <span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">text_content</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">]</span>
|
||||
|
||||
<span class="n">depth</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">cols</span><span class="p">[</span><span class="mi">11</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">'0'</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span>
|
||||
<span class="n">articles</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">cols</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span>
|
||||
|
||||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">cols</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span>
|
||||
<span class="n">wiki_url</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'./td[4]/a/@href'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">wiki_url</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">urlparse</span><span class="p">(</span><span class="n">wiki_url</span><span class="p">)</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">locales</span><span class="o">.</span><span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">lang_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">),</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'-'</span><span class="p">))</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="c1"># print("ERROR: %s [%s] is unknown by babel" % (cols[0], eng_tag))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="k">finally</span><span class="p">:</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">sxng_tag</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">locales</span><span class="o">.</span><span class="n">LOCALE_NAMES</span><span class="p">:</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">articles</span> <span class="o"><</span> <span class="mi">10000</span><span class="p">:</span>
|
||||
<span class="c1"># exclude languages with too few articles</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">int</span><span class="p">(</span><span class="n">depth</span><span class="p">)</span> <span class="o"><</span> <span class="mi">20</span><span class="p">:</span>
|
||||
<span class="c1"># Rough indicator of a Wikipedia’s quality, showing how</span>
|
||||
<span class="c1"># frequently its articles are updated.</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'wiki_netloc'</span><span class="p">][</span><span class="n">eng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">wiki_url</span><span class="o">.</span><span class="n">netloc</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s1">'WIKIPEDIA_LANGUAGES'</span><span class="p">]</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
423
_modules/searx/engines/xpath.html
Normal file
423
_modules/searx/engines/xpath.html
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.xpath — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.xpath</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.xpath</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""The XPath engine is a *generic* engine with which it is possible to configure</span>
|
||||
<span class="sd">engines in the settings.</span>
|
||||
|
||||
<span class="sd">.. _XPath selector: https://quickref.me/xpath.html#xpath-selectors</span>
|
||||
|
||||
<span class="sd">Configuration</span>
|
||||
<span class="sd">=============</span>
|
||||
|
||||
<span class="sd">Request:</span>
|
||||
|
||||
<span class="sd">- :py:obj:`search_url`</span>
|
||||
<span class="sd">- :py:obj:`lang_all`</span>
|
||||
<span class="sd">- :py:obj:`soft_max_redirects`</span>
|
||||
<span class="sd">- :py:obj:`cookies`</span>
|
||||
<span class="sd">- :py:obj:`headers`</span>
|
||||
|
||||
<span class="sd">Paging:</span>
|
||||
|
||||
<span class="sd">- :py:obj:`paging`</span>
|
||||
<span class="sd">- :py:obj:`page_size`</span>
|
||||
<span class="sd">- :py:obj:`first_page_num`</span>
|
||||
|
||||
<span class="sd">Time Range:</span>
|
||||
|
||||
<span class="sd">- :py:obj:`time_range_support`</span>
|
||||
<span class="sd">- :py:obj:`time_range_url`</span>
|
||||
<span class="sd">- :py:obj:`time_range_map`</span>
|
||||
|
||||
<span class="sd">Safe-Search:</span>
|
||||
|
||||
<span class="sd">- :py:obj:`safe_search_support`</span>
|
||||
<span class="sd">- :py:obj:`safe_search_map`</span>
|
||||
|
||||
<span class="sd">Response:</span>
|
||||
|
||||
<span class="sd">- :py:obj:`no_result_for_http_status`</span>
|
||||
|
||||
<span class="sd">`XPath selector`_:</span>
|
||||
|
||||
<span class="sd">- :py:obj:`results_xpath`</span>
|
||||
<span class="sd">- :py:obj:`url_xpath`</span>
|
||||
<span class="sd">- :py:obj:`title_xpath`</span>
|
||||
<span class="sd">- :py:obj:`content_xpath`</span>
|
||||
<span class="sd">- :py:obj:`thumbnail_xpath`</span>
|
||||
<span class="sd">- :py:obj:`suggestion_xpath`</span>
|
||||
|
||||
|
||||
<span class="sd">Example</span>
|
||||
<span class="sd">=======</span>
|
||||
|
||||
<span class="sd">Here is a simple example of a XPath engine configured in the :ref:`settings</span>
|
||||
<span class="sd">engine` section, further read :ref:`engines-dev`.</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> - name : bitbucket</span>
|
||||
<span class="sd"> engine : xpath</span>
|
||||
<span class="sd"> paging : True</span>
|
||||
<span class="sd"> search_url : https://bitbucket.org/repo/all/{pageno}?name={query}</span>
|
||||
<span class="sd"> url_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]/@href</span>
|
||||
<span class="sd"> title_xpath : //article[@class="repo-summary"]//a[@class="repo-link"]</span>
|
||||
<span class="sd"> content_xpath : //article[@class="repo-summary"]/p</span>
|
||||
|
||||
<span class="sd">Implementations</span>
|
||||
<span class="sd">===============</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlencode</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">extract_url</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">eval_xpath_list</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">raise_for_httperror</span>
|
||||
|
||||
<span class="n">search_url</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="sd">"""</span>
|
||||
<span class="sd">Search URL of the engine. Example::</span>
|
||||
|
||||
<span class="sd"> https://example.org/?search={query}&page={pageno}{time_range}{safe_search}</span>
|
||||
|
||||
<span class="sd">Replacements are:</span>
|
||||
|
||||
<span class="sd">``{query}``:</span>
|
||||
<span class="sd"> Search terms from user.</span>
|
||||
|
||||
<span class="sd">``{pageno}``:</span>
|
||||
<span class="sd"> Page number if engine supports pagging :py:obj:`paging`</span>
|
||||
|
||||
<span class="sd">``{lang}``:</span>
|
||||
<span class="sd"> ISO 639-1 language code (en, de, fr ..)</span>
|
||||
|
||||
<span class="sd">``{time_range}``:</span>
|
||||
<span class="sd"> :py:obj:`URL parameter <time_range_url>` if engine :py:obj:`supports time</span>
|
||||
<span class="sd"> range <time_range_support>`. The value for the parameter is taken from</span>
|
||||
<span class="sd"> :py:obj:`time_range_map`.</span>
|
||||
|
||||
<span class="sd">``{safe_search}``:</span>
|
||||
<span class="sd"> Safe-search :py:obj:`URL parameter <safe_search_map>` if engine</span>
|
||||
<span class="sd"> :py:obj:`supports safe-search <safe_search_support>`. The ``{safe_search}``</span>
|
||||
<span class="sd"> replacement is taken from the :py:obj:`safes_search_map`. Filter results::</span>
|
||||
|
||||
<span class="sd"> 0: none, 1: moderate, 2:strict</span>
|
||||
|
||||
<span class="sd"> If not supported, the URL parameter is an empty string.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">lang_all</span> <span class="o">=</span> <span class="s1">'en'</span>
|
||||
<span class="sd">'''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is</span>
|
||||
<span class="sd">selected.</span>
|
||||
<span class="sd">'''</span>
|
||||
|
||||
<span class="n">no_result_for_http_status</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="sd">'''Return empty result for these HTTP status codes instead of throwing an error.</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> no_result_for_http_status: []</span>
|
||||
<span class="sd">'''</span>
|
||||
|
||||
<span class="n">soft_max_redirects</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="sd">'''Maximum redirects, soft limit. Record an error but don't stop the engine'''</span>
|
||||
|
||||
<span class="n">results_xpath</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="sd">'''`XPath selector`_ for the list of result items'''</span>
|
||||
|
||||
<span class="n">url_xpath</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="sd">'''`XPath selector`_ of result's ``url``.'''</span>
|
||||
|
||||
<span class="n">content_xpath</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="sd">'''`XPath selector`_ of result's ``content``.'''</span>
|
||||
|
||||
<span class="n">title_xpath</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="sd">'''`XPath selector`_ of result's ``title``.'''</span>
|
||||
|
||||
<span class="n">thumbnail_xpath</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="sd">'''`XPath selector`_ of result's ``img_src``.'''</span>
|
||||
|
||||
<span class="n">suggestion_xpath</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="sd">'''`XPath selector`_ of result's ``suggestion``.'''</span>
|
||||
|
||||
<span class="n">cached_xpath</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="n">cached_url</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
|
||||
<span class="n">cookies</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="sd">'''Some engines might offer different result based on cookies.</span>
|
||||
<span class="sd">Possible use-case: To set safesearch cookie.'''</span>
|
||||
|
||||
<span class="n">headers</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="sd">'''Some engines might offer different result based headers. Possible use-case:</span>
|
||||
<span class="sd">To set header to moderate.'''</span>
|
||||
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="sd">'''Engine supports paging [True or False].'''</span>
|
||||
|
||||
<span class="n">page_size</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="sd">'''Number of results on each page. Only needed if the site requires not a page</span>
|
||||
<span class="sd">number, but an offset.'''</span>
|
||||
|
||||
<span class="n">first_page_num</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="sd">'''Number of the first page (usually 0 or 1).'''</span>
|
||||
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="sd">'''Engine supports search time range.'''</span>
|
||||
|
||||
<span class="n">time_range_url</span> <span class="o">=</span> <span class="s1">'&hours=</span><span class="si">{time_range_val}</span><span class="s1">'</span>
|
||||
<span class="sd">'''Time range URL parameter in the in :py:obj:`search_url`. If no time range is</span>
|
||||
<span class="sd">requested by the user, the URL parameter is an empty string. The</span>
|
||||
<span class="sd">``{time_range_val}`` replacement is taken from the :py:obj:`time_range_map`.</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> time_range_url : '&days={time_range_val}'</span>
|
||||
<span class="sd">'''</span>
|
||||
|
||||
<span class="n">time_range_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'day'</span><span class="p">:</span> <span class="mi">24</span><span class="p">,</span>
|
||||
<span class="s1">'week'</span><span class="p">:</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">7</span><span class="p">,</span>
|
||||
<span class="s1">'month'</span><span class="p">:</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">30</span><span class="p">,</span>
|
||||
<span class="s1">'year'</span><span class="p">:</span> <span class="mi">24</span> <span class="o">*</span> <span class="mi">365</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="sd">'''Maps time range value from user to ``{time_range_val}`` in</span>
|
||||
<span class="sd">:py:obj:`time_range_url`.</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> time_range_map:</span>
|
||||
<span class="sd"> day: 1</span>
|
||||
<span class="sd"> week: 7</span>
|
||||
<span class="sd"> month: 30</span>
|
||||
<span class="sd"> year: 365</span>
|
||||
<span class="sd">'''</span>
|
||||
|
||||
<span class="n">safe_search_support</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="sd">'''Engine supports safe-search.'''</span>
|
||||
|
||||
<span class="n">safe_search_map</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">'&filter=none'</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">'&filter=moderate'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">'&filter=strict'</span><span class="p">}</span>
|
||||
<span class="sd">'''Maps safe-search value to ``{safe_search}`` in :py:obj:`search_url`.</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> safesearch: true</span>
|
||||
<span class="sd"> safes_search_map:</span>
|
||||
<span class="sd"> 0: '&filter=none'</span>
|
||||
<span class="sd"> 1: '&filter=moderate'</span>
|
||||
<span class="sd"> 2: '&filter=strict'</span>
|
||||
|
||||
<span class="sd">'''</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/xpath.html#searx.engines.xpath.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">'''Build request parameters (see :ref:`engine request`).'''</span>
|
||||
<span class="n">lang</span> <span class="o">=</span> <span class="n">lang_all</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">'all'</span><span class="p">:</span>
|
||||
<span class="n">lang</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">][:</span><span class="mi">2</span><span class="p">]</span>
|
||||
|
||||
<span class="n">time_range</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">):</span>
|
||||
<span class="n">time_range_val</span> <span class="o">=</span> <span class="n">time_range_map</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'time_range'</span><span class="p">))</span>
|
||||
<span class="n">time_range</span> <span class="o">=</span> <span class="n">time_range_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">time_range_val</span><span class="o">=</span><span class="n">time_range_val</span><span class="p">)</span>
|
||||
|
||||
<span class="n">safe_search</span> <span class="o">=</span> <span class="s1">''</span>
|
||||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]:</span>
|
||||
<span class="n">safe_search</span> <span class="o">=</span> <span class="n">safe_search_map</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s1">'safesearch'</span><span class="p">]]</span>
|
||||
|
||||
<span class="n">fargs</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'query'</span><span class="p">:</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'q'</span><span class="p">:</span> <span class="n">query</span><span class="p">})[</span><span class="mi">2</span><span class="p">:],</span>
|
||||
<span class="s1">'lang'</span><span class="p">:</span> <span class="n">lang</span><span class="p">,</span>
|
||||
<span class="s1">'pageno'</span><span class="p">:</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">page_size</span> <span class="o">+</span> <span class="n">first_page_num</span><span class="p">,</span>
|
||||
<span class="s1">'time_range'</span><span class="p">:</span> <span class="n">time_range</span><span class="p">,</span>
|
||||
<span class="s1">'safe_search'</span><span class="p">:</span> <span class="n">safe_search</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'cookies'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">cookies</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">headers</span><span class="p">)</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="n">fargs</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'soft_max_redirects'</span><span class="p">]</span> <span class="o">=</span> <span class="n">soft_max_redirects</span>
|
||||
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'raise_for_httperror'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/xpath.html#searx.engines.xpath.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span> <span class="c1"># pylint: disable=too-many-branches</span>
|
||||
<span class="w"> </span><span class="sd">'''Scrap *results* from the response (see :ref:`engine results`).'''</span>
|
||||
<span class="k">if</span> <span class="n">no_result_for_http_status</span> <span class="ow">and</span> <span class="n">resp</span><span class="o">.</span><span class="n">status_code</span> <span class="ow">in</span> <span class="n">no_result_for_http_status</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">raise_for_httperror</span><span class="p">(</span><span class="n">resp</span><span class="p">)</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
<span class="n">is_onion</span> <span class="o">=</span> <span class="s1">'onions'</span> <span class="ow">in</span> <span class="n">categories</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">results_xpath</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">results_xpath</span><span class="p">):</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">extract_url</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">url_xpath</span><span class="p">,</span> <span class="n">min_len</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span> <span class="n">search_url</span><span class="p">)</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">title_xpath</span><span class="p">,</span> <span class="n">min_len</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">))</span>
|
||||
<span class="n">tmp_result</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">}</span>
|
||||
|
||||
<span class="c1"># add thumbnail if available</span>
|
||||
<span class="k">if</span> <span class="n">thumbnail_xpath</span><span class="p">:</span>
|
||||
<span class="n">thumbnail_xpath_result</span> <span class="o">=</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">thumbnail_xpath</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">thumbnail_xpath_result</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">tmp_result</span><span class="p">[</span><span class="s1">'img_src'</span><span class="p">]</span> <span class="o">=</span> <span class="n">extract_url</span><span class="p">(</span><span class="n">thumbnail_xpath_result</span><span class="p">,</span> <span class="n">search_url</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># add alternative cached url if available</span>
|
||||
<span class="k">if</span> <span class="n">cached_xpath</span><span class="p">:</span>
|
||||
<span class="n">tmp_result</span><span class="p">[</span><span class="s1">'cached_url'</span><span class="p">]</span> <span class="o">=</span> <span class="n">cached_url</span> <span class="o">+</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">cached_xpath</span><span class="p">,</span> <span class="n">min_len</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">is_onion</span><span class="p">:</span>
|
||||
<span class="n">tmp_result</span><span class="p">[</span><span class="s1">'is_onion'</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tmp_result</span><span class="p">)</span>
|
||||
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">cached_xpath</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">url</span><span class="p">,</span> <span class="n">title</span><span class="p">,</span> <span class="n">content</span><span class="p">,</span> <span class="n">cached</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span>
|
||||
<span class="p">(</span><span class="n">extract_url</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">search_url</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">url_xpath</span><span class="p">)),</span>
|
||||
<span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">title_xpath</span><span class="p">)),</span>
|
||||
<span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">)),</span>
|
||||
<span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">cached_xpath</span><span class="p">)),</span>
|
||||
<span class="p">):</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span>
|
||||
<span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||||
<span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||||
<span class="s1">'cached_url'</span><span class="p">:</span> <span class="n">cached_url</span> <span class="o">+</span> <span class="n">cached</span><span class="p">,</span>
|
||||
<span class="s1">'is_onion'</span><span class="p">:</span> <span class="n">is_onion</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">url</span><span class="p">,</span> <span class="n">title</span><span class="p">,</span> <span class="n">content</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span>
|
||||
<span class="p">(</span><span class="n">extract_url</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">search_url</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">url_xpath</span><span class="p">)),</span>
|
||||
<span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">title_xpath</span><span class="p">)),</span>
|
||||
<span class="nb">map</span><span class="p">(</span><span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">content_xpath</span><span class="p">)),</span>
|
||||
<span class="p">):</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span> <span class="s1">'is_onion'</span><span class="p">:</span> <span class="n">is_onion</span><span class="p">})</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">suggestion_xpath</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">suggestion_xpath</span><span class="p">):</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span>
|
||||
|
||||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"found </span><span class="si">%s</span><span class="s2"> results"</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
300
_modules/searx/engines/yahoo.html
Normal file
300
_modules/searx/engines/yahoo.html
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.yahoo — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.yahoo</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.yahoo</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""Yahoo Search (Web)</span>
|
||||
|
||||
<span class="sd">Languages are supported by mapping the language to a domain. If domain is not</span>
|
||||
<span class="sd">found in :py:obj:`lang2domain` URL ``<lang>.search.yahoo.com`` is used.</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">unquote</span><span class="p">,</span>
|
||||
<span class="n">urlencode</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="p">(</span>
|
||||
<span class="n">eval_xpath_getindex</span><span class="p">,</span>
|
||||
<span class="n">eval_xpath_list</span><span class="p">,</span>
|
||||
<span class="n">extract_text</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s1">'https://search.yahoo.com/'</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="s1">'https://developer.yahoo.com/api/'</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s1">'HTML'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># engine dependent config</span>
|
||||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'general'</span><span class="p">,</span> <span class="s1">'web'</span><span class="p">]</span>
|
||||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="c1"># send_accept_language_header = True</span>
|
||||
|
||||
<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'day'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'1d'</span><span class="p">,</span> <span class="s1">'d'</span><span class="p">),</span>
|
||||
<span class="s1">'week'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'1w'</span><span class="p">,</span> <span class="s1">'w'</span><span class="p">),</span>
|
||||
<span class="s1">'month'</span><span class="p">:</span> <span class="p">(</span><span class="s1">'1m'</span><span class="p">,</span> <span class="s1">'m'</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">lang2domain</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'zh_chs'</span><span class="p">:</span> <span class="s1">'hk.search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'zh_cht'</span><span class="p">:</span> <span class="s1">'tw.search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'any'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'en'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'bg'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'cs'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'da'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'el'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'et'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'he'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'hr'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'ja'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'ko'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'sk'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="s1">'sl'</span><span class="p">:</span> <span class="s1">'search.yahoo.com'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="sd">"""Map language to domain"""</span>
|
||||
|
||||
<span class="n">locale_aliases</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'zh'</span><span class="p">:</span> <span class="s1">'zh_Hans'</span><span class="p">,</span>
|
||||
<span class="s1">'zh-HK'</span><span class="p">:</span> <span class="s1">'zh_Hans'</span><span class="p">,</span>
|
||||
<span class="s1">'zh-CN'</span><span class="p">:</span> <span class="s1">'zh_Hans'</span><span class="p">,</span> <span class="c1"># dead since 2015 / routed to hk.search.yahoo.com</span>
|
||||
<span class="s1">'zh-TW'</span><span class="p">:</span> <span class="s1">'zh_Hant'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="request"><a class="viewcode-back" href="../../../dev/engines/online/yahoo.html#searx.engines.yahoo.request">[docs]</a><span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""build request"""</span>
|
||||
|
||||
<span class="n">lang</span> <span class="o">=</span> <span class="n">locale_aliases</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">],</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">lang</span><span class="p">:</span>
|
||||
<span class="n">lang</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">'language'</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'-'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">lang</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">lang</span><span class="p">,</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span>
|
||||
|
||||
<span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'pageno'</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="mi">7</span> <span class="o">+</span> <span class="mi">1</span>
|
||||
<span class="n">age</span><span class="p">,</span> <span class="n">btf</span> <span class="o">=</span> <span class="n">time_range_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s1">'time_range'</span><span class="p">],</span> <span class="p">(</span><span class="s1">''</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span>
|
||||
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">(</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'p'</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||||
<span class="s1">'ei'</span><span class="p">:</span> <span class="s1">'UTF-8'</span><span class="p">,</span>
|
||||
<span class="s1">'fl'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
|
||||
<span class="s1">'vl'</span><span class="p">:</span> <span class="s1">'lang_'</span> <span class="o">+</span> <span class="n">lang</span><span class="p">,</span>
|
||||
<span class="s1">'btf'</span><span class="p">:</span> <span class="n">btf</span><span class="p">,</span>
|
||||
<span class="s1">'fr2'</span><span class="p">:</span> <span class="s1">'time'</span><span class="p">,</span>
|
||||
<span class="s1">'age'</span><span class="p">:</span> <span class="n">age</span><span class="p">,</span>
|
||||
<span class="s1">'b'</span><span class="p">:</span> <span class="n">offset</span><span class="p">,</span>
|
||||
<span class="s1">'xargs'</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">domain</span> <span class="o">=</span> <span class="n">lang2domain</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">lang</span><span class="p">,</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">.search.yahoo.com'</span> <span class="o">%</span> <span class="n">lang</span><span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s1">'url'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'https://</span><span class="si">%s</span><span class="s1">/search?</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="parse_url"><a class="viewcode-back" href="../../../dev/engines/online/yahoo.html#searx.engines.yahoo.parse_url">[docs]</a><span class="k">def</span> <span class="nf">parse_url</span><span class="p">(</span><span class="n">url_string</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""remove yahoo-specific tracking-url"""</span>
|
||||
|
||||
<span class="n">endings</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'/RS'</span><span class="p">,</span> <span class="s1">'/RK'</span><span class="p">]</span>
|
||||
<span class="n">endpositions</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">start</span> <span class="o">=</span> <span class="n">url_string</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'http'</span><span class="p">,</span> <span class="n">url_string</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'/RU='</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">ending</span> <span class="ow">in</span> <span class="n">endings</span><span class="p">:</span>
|
||||
<span class="n">endpos</span> <span class="o">=</span> <span class="n">url_string</span><span class="o">.</span><span class="n">rfind</span><span class="p">(</span><span class="n">ending</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">endpos</span> <span class="o">></span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">endpositions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">endpos</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">start</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">endpositions</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">url_string</span>
|
||||
|
||||
<span class="n">end</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">endpositions</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">unquote</span><span class="p">(</span><span class="n">url_string</span><span class="p">[</span><span class="n">start</span><span class="p">:</span><span class="n">end</span><span class="p">])</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="response"><a class="viewcode-back" href="../../../dev/engines/online/yahoo.html#searx.engines.yahoo.response">[docs]</a><span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""parse response"""</span>
|
||||
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># parse results</span>
|
||||
<span class="k">for</span> <span class="n">result</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class,"algo-sr")]'</span><span class="p">):</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h3/a/@href'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="n">parse_url</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||||
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//h3/a'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">title</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">offset</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">extract_text</span><span class="p">(</span><span class="n">title</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'span'</span><span class="p">)))</span>
|
||||
<span class="n">title</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">title</span><span class="p">)[</span><span class="n">offset</span><span class="p">:]</span>
|
||||
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">eval_xpath_getindex</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "compText")]'</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span>
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="n">allow_none</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># append result</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'url'</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span> <span class="s1">'title'</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span> <span class="s1">'content'</span><span class="p">:</span> <span class="n">content</span><span class="p">})</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">suggestion</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "AlsoTry")]//table//a'</span><span class="p">):</span>
|
||||
<span class="c1"># append suggestion</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">'suggestion'</span><span class="p">:</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">suggestion</span><span class="p">)})</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/yahoo.html#searx.engines.yahoo.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages from yahoo"""</span>
|
||||
|
||||
<span class="c1"># pylint: disable=import-outside-toplevel</span>
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
<span class="kn">from</span> <span class="nn">searx</span> <span class="kn">import</span> <span class="n">network</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s1">'any'</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">network</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'https://search.yahoo.com/preferences/languages'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"ERROR: response from peertube is not OK."</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
<span class="n">offset</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="s1">'lang_'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">eng2sxng</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'zh_chs'</span><span class="p">:</span> <span class="s1">'zh_Hans'</span><span class="p">,</span> <span class="s1">'zh_cht'</span><span class="p">:</span> <span class="s1">'zh_Hant'</span><span class="p">}</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s1">'//div[contains(@class, "lang-item")]/input/@value'</span><span class="p">):</span>
|
||||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">val</span><span class="p">[</span><span class="n">offset</span><span class="p">:]</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">eng2sxng</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">)))</span>
|
||||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'ERROR: unknown language --> </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">eng_tag</span><span class="p">)</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
333
_modules/searx/engines/zlibrary.html
Normal file
333
_modules/searx/engines/zlibrary.html
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>searx.engines.zlibrary — SearXNG Documentation (2023.8.11+905ce2a6f)</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=4f649999" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=52e4ff28" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/tabs.css?v=a5c4661c" />
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=3c88bde0"></script>
|
||||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||||
<script src="../../../_static/tabs.js?v=3030b3cb"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head><body>
|
||||
<div class="related" role="navigation" aria-label="related navigation">
|
||||
<h3>Navigation</h3>
|
||||
<ul>
|
||||
<li class="right" style="margin-right: 10px">
|
||||
<a href="../../../genindex.html" title="General Index"
|
||||
accesskey="I">index</a></li>
|
||||
<li class="right" >
|
||||
<a href="../../../py-modindex.html" title="Python Module Index"
|
||||
>modules</a> |</li>
|
||||
<li class="nav-item nav-item-0"><a href="../../../index.html">SearXNG Documentation (2023.8.11+905ce2a6f)</a> »</li>
|
||||
<li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> »</li>
|
||||
<li class="nav-item nav-item-2"><a href="../engines.html" accesskey="U">searx.engines</a> »</li>
|
||||
<li class="nav-item nav-item-this"><a href="">searx.engines.zlibrary</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="document">
|
||||
<div class="documentwrapper">
|
||||
<div class="bodywrapper">
|
||||
<div class="body" role="main">
|
||||
|
||||
<h1>Source code for searx.engines.zlibrary</h1><div class="highlight"><pre>
|
||||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||||
<span class="c1"># lint: pylint</span>
|
||||
<span class="sd">"""`Z-Library`_ (abbreviated as z-lib, formerly BookFinder) is a shadow library</span>
|
||||
<span class="sd">project for file-sharing access to scholarly journal articles, academic texts</span>
|
||||
<span class="sd">and general-interest books. It began as a mirror of Library Genesis, from which</span>
|
||||
<span class="sd">most of its books originate.</span>
|
||||
|
||||
<span class="sd">.. _Z-Library: https://zlibrary-global.se/</span>
|
||||
|
||||
<span class="sd">Configuration</span>
|
||||
<span class="sd">=============</span>
|
||||
|
||||
<span class="sd">The engine has the following additional settings:</span>
|
||||
|
||||
<span class="sd">- :py:obj:`zlib_year_from`</span>
|
||||
<span class="sd">- :py:obj:`zlib_year_to`</span>
|
||||
<span class="sd">- :py:obj:`zlib_ext`</span>
|
||||
|
||||
<span class="sd">With this options a SearXNG maintainer is able to configure **additional**</span>
|
||||
<span class="sd">engines for specific searches in Z-Library. For example a engine to search</span>
|
||||
<span class="sd">only for EPUB from 2010 to 2020.</span>
|
||||
|
||||
<span class="sd">.. code:: yaml</span>
|
||||
|
||||
<span class="sd"> - name: z-library 2010s epub</span>
|
||||
<span class="sd"> engine: zlibrary</span>
|
||||
<span class="sd"> shortcut: zlib2010s</span>
|
||||
<span class="sd"> zlib_year_from: '2010'</span>
|
||||
<span class="sd"> zlib_year_to: '2020'</span>
|
||||
<span class="sd"> zlib_ext: 'EPUB'</span>
|
||||
|
||||
<span class="sd">Implementations</span>
|
||||
<span class="sd">===============</span>
|
||||
|
||||
<span class="sd">"""</span>
|
||||
<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">annotations</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">TYPE_CHECKING</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Optional</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
|
||||
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">quote</span>
|
||||
<span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">html</span>
|
||||
<span class="kn">from</span> <span class="nn">flask_babel</span> <span class="kn">import</span> <span class="n">gettext</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">searx.utils</span> <span class="kn">import</span> <span class="n">extract_text</span><span class="p">,</span> <span class="n">eval_xpath</span><span class="p">,</span> <span class="n">eval_xpath_list</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.enginelib.traits</span> <span class="kn">import</span> <span class="n">EngineTraits</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.data</span> <span class="kn">import</span> <span class="n">ENGINE_TRAITS</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">httpx</span>
|
||||
<span class="kn">import</span> <span class="nn">logging</span>
|
||||
|
||||
<span class="n">logger</span><span class="p">:</span> <span class="n">logging</span><span class="o">.</span><span class="n">Logger</span>
|
||||
|
||||
<span class="c1"># about</span>
|
||||
<span class="n">about</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"website"</span><span class="p">:</span> <span class="s2">"https://zlibrary-global.se"</span><span class="p">,</span>
|
||||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s2">"Q104863992"</span><span class="p">,</span>
|
||||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||||
<span class="s2">"results"</span><span class="p">:</span> <span class="s2">"HTML"</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">categories</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"files"</span><span class="p">]</span>
|
||||
<span class="n">paging</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"https://zlibrary-global.se"</span>
|
||||
|
||||
<span class="n">zlib_year_from</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="sd">"""Filter z-library's results by year from. E.g '2010'.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">zlib_year_to</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="sd">"""Filter z-library's results by year to. E.g. '2010'.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
<span class="n">zlib_ext</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="sd">"""Filter z-library's results by a file ending. Common filters for example are</span>
|
||||
<span class="sd">``PDF`` and ``EPUB``.</span>
|
||||
<span class="sd">"""</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="init"><a class="viewcode-back" href="../../../dev/engines/online/zlibrary.html#searx.engines.zlibrary.init">[docs]</a><span class="k">def</span> <span class="nf">init</span><span class="p">(</span><span class="n">engine_settings</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span> <span class="c1"># pylint: disable=unused-argument</span>
|
||||
<span class="w"> </span><span class="sd">"""Check of engine's settings."""</span>
|
||||
<span class="n">traits</span><span class="p">:</span> <span class="n">EngineTraits</span> <span class="o">=</span> <span class="n">EngineTraits</span><span class="p">(</span><span class="o">**</span><span class="n">ENGINE_TRAITS</span><span class="p">[</span><span class="s2">"z-library"</span><span class="p">])</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">zlib_ext</span> <span class="ow">and</span> <span class="n">zlib_ext</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ext"</span><span class="p">]:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"invalid setting ext: </span><span class="si">{</span><span class="n">zlib_ext</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">zlib_year_from</span> <span class="ow">and</span> <span class="n">zlib_year_from</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_from"</span><span class="p">]:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"invalid setting year_from: </span><span class="si">{</span><span class="n">zlib_year_from</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">zlib_year_to</span> <span class="ow">and</span> <span class="n">zlib_year_to</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_to"</span><span class="p">]:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"invalid setting year_to: </span><span class="si">{</span><span class="n">zlib_year_to</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
|
||||
<span class="n">lang</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"language"</span><span class="p">],</span> <span class="n">traits</span><span class="o">.</span><span class="n">all_locale</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
<span class="n">search_url</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="n">base_url</span>
|
||||
<span class="o">+</span> <span class="s2">"/s/</span><span class="si">{search_query}</span><span class="s2">/?page=</span><span class="si">{pageno}</span><span class="s2">"</span>
|
||||
<span class="o">+</span> <span class="s2">"&yearFrom=</span><span class="si">{zlib_year_from}</span><span class="s2">"</span>
|
||||
<span class="o">+</span> <span class="s2">"&yearTo=</span><span class="si">{zlib_year_to}</span><span class="s2">"</span>
|
||||
<span class="o">+</span> <span class="s2">"&languages[]=</span><span class="si">{lang}</span><span class="s2">"</span>
|
||||
<span class="o">+</span> <span class="s2">"&extensions[]=</span><span class="si">{zlib_ext}</span><span class="s2">"</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
|
||||
<span class="n">search_query</span><span class="o">=</span><span class="n">quote</span><span class="p">(</span><span class="n">query</span><span class="p">),</span>
|
||||
<span class="n">pageno</span><span class="o">=</span><span class="n">params</span><span class="p">[</span><span class="s2">"pageno"</span><span class="p">],</span>
|
||||
<span class="n">lang</span><span class="o">=</span><span class="n">lang</span><span class="p">,</span>
|
||||
<span class="n">zlib_year_from</span><span class="o">=</span><span class="n">zlib_year_from</span><span class="p">,</span>
|
||||
<span class="n">zlib_year_to</span><span class="o">=</span><span class="n">zlib_year_to</span><span class="p">,</span>
|
||||
<span class="n">zlib_ext</span><span class="o">=</span><span class="n">zlib_ext</span><span class="p">,</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">params</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">:</span> <span class="n">httpx</span><span class="o">.</span><span class="n">Response</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span>
|
||||
<span class="n">results</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'</span><span class="p">):</span>
|
||||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_parse_result</span><span class="p">(</span><span class="n">item</span><span class="p">))</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">results</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="n">selector</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">eval_xpath</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="n">selector</span><span class="p">))</span>
|
||||
|
||||
|
||||
<span class="n">i18n_language</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"Language"</span><span class="p">)</span>
|
||||
<span class="n">i18n_book_rating</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"Book rating"</span><span class="p">)</span>
|
||||
<span class="n">i18n_file_quality</span> <span class="o">=</span> <span class="n">gettext</span><span class="p">(</span><span class="s2">"File quality"</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_parse_result</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
|
||||
|
||||
<span class="n">author_elements</span> <span class="o">=</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[@class="authors"]//a[@itemprop="author"]'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">result</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"template"</span><span class="p">:</span> <span class="s2">"paper.html"</span><span class="p">,</span>
|
||||
<span class="s2">"url"</span><span class="p">:</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">item</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'(.//a[starts-with(@href, "/book/")])[1]/@href'</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span>
|
||||
<span class="s2">"title"</span><span class="p">:</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//*[@itemprop="name"]'</span><span class="p">),</span>
|
||||
<span class="s2">"authors"</span><span class="p">:</span> <span class="p">[</span><span class="n">extract_text</span><span class="p">(</span><span class="n">author</span><span class="p">)</span> <span class="k">for</span> <span class="n">author</span> <span class="ow">in</span> <span class="n">author_elements</span><span class="p">],</span>
|
||||
<span class="s2">"publisher"</span><span class="p">:</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//a[@title="Publisher"]'</span><span class="p">),</span>
|
||||
<span class="s2">"type"</span><span class="p">:</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'</span><span class="p">),</span>
|
||||
<span class="s2">"img_src"</span><span class="p">:</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//img[contains(@class, "cover")]/@data-src'</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">year</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "property_year")]//div[contains(@class, "property_value")]'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">year</span><span class="p">:</span>
|
||||
<span class="n">result</span><span class="p">[</span><span class="s2">"publishedDate"</span><span class="p">]</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="s1">'%Y'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">content</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">language</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//div[contains(@class, "property_language")]//div[contains(@class, "property_value")]'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">language</span><span class="p">:</span>
|
||||
<span class="n">content</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">i18n_language</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">language</span><span class="o">.</span><span class="n">capitalize</span><span class="p">()</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
<span class="n">book_rating</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//span[contains(@class, "book-rating-interest-score")]'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">book_rating</span> <span class="ow">and</span> <span class="nb">float</span><span class="p">(</span><span class="n">book_rating</span><span class="p">):</span>
|
||||
<span class="n">content</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">i18n_book_rating</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">book_rating</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
<span class="n">file_quality</span> <span class="o">=</span> <span class="n">_text</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="s1">'.//span[contains(@class, "book-rating-quality-score")]'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">file_quality</span> <span class="ow">and</span> <span class="nb">float</span><span class="p">(</span><span class="n">file_quality</span><span class="p">):</span>
|
||||
<span class="n">content</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">i18n_file_quality</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">file_quality</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
<span class="n">result</span><span class="p">[</span><span class="s2">"content"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">" | "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">result</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_traits"><a class="viewcode-back" href="../../../dev/engines/online/zlibrary.html#searx.engines.zlibrary.fetch_traits">[docs]</a><span class="k">def</span> <span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""Fetch languages and other search arguments from zlibrary's search form."""</span>
|
||||
<span class="c1"># pylint: disable=import-outside-toplevel</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">babel</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.network</span> <span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||||
<span class="kn">from</span> <span class="nn">searx.locales</span> <span class="kn">import</span> <span class="n">language_tag</span>
|
||||
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">all_locale</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ext"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_from"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_to"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">base_url</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span> <span class="c1"># type: ignore</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from zlibrary's search page is not OK."</span><span class="p">)</span>
|
||||
<span class="n">dom</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">year</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"</span><span class="p">):</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_from"</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">year</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">year</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//div[@id='advSearch-noJS']//select[@id='sf_yearTo']/option"</span><span class="p">):</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"year_to"</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">year</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">ext</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"</span><span class="p">):</span>
|
||||
<span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">ext</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">value</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">custom</span><span class="p">[</span><span class="s2">"ext"</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Handle languages</span>
|
||||
<span class="c1"># Z-library uses English names for languages, so we need to map them to their respective locales</span>
|
||||
<span class="n">language_name_locale_map</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">locale</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">localedata</span><span class="o">.</span><span class="n">locale_identifiers</span><span class="p">():</span> <span class="c1"># type: ignore</span>
|
||||
<span class="c1"># Create a Locale object for the current locale</span>
|
||||
<span class="n">loc</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span>
|
||||
<span class="n">language_name_locale_map</span><span class="p">[</span><span class="n">loc</span><span class="o">.</span><span class="n">english_name</span><span class="o">.</span><span class="n">lower</span><span class="p">()]</span> <span class="o">=</span> <span class="n">loc</span> <span class="c1"># type: ignore</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="s2">"//div[@id='advSearch-noJS']//select[@id='sf_languages']/option"</span><span class="p">):</span>
|
||||
<span class="n">eng_lang</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">eng_lang</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">locale</span> <span class="o">=</span> <span class="n">language_name_locale_map</span><span class="p">[</span><span class="n">eng_lang</span><span class="o">.</span><span class="n">lower</span><span class="p">()]</span>
|
||||
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
|
||||
<span class="c1"># silently ignore unknown languages</span>
|
||||
<span class="c1"># print("ERROR: %s is unknown by babel" % (eng_lang))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">sxng_lang</span> <span class="o">=</span> <span class="n">language_tag</span><span class="p">(</span><span class="n">locale</span><span class="p">)</span>
|
||||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_lang</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_lang</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_lang</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_lang</span><span class="p">))</span>
|
||||
<span class="k">continue</span>
|
||||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_lang</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_lang</span></div>
|
||||
</pre></div>
|
||||
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<span id="sidebar-top"></span>
|
||||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||
<div class="sphinxsidebarwrapper">
|
||||
|
||||
|
||||
<p class="logo"><a href="../../../index.html">
|
||||
<img class="logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||||
</a></p>
|
||||
|
||||
|
||||
<h3><a href="../../../index.html">Table of Contents</a></h3>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../user/index.html">User information</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../src/index.html">Source-Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h3>Project Links</h3>
|
||||
<ul>
|
||||
<li><a href="https://github.com/searxng/searxng/tree/master">Source</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/wiki">Wiki</a>
|
||||
|
||||
<li><a href="https://searx.space">Public instances</a>
|
||||
|
||||
<li><a href="https://github.com/searxng/searxng/issues">Issue Tracker</a>
|
||||
</ul><h3>Navigation</h3>
|
||||
<ul>
|
||||
<li><a href="../../../index.html">Overview</a>
|
||||
<ul>
|
||||
<li><a href="../../index.html">Module code</a>
|
||||
<ul>
|
||||
<li><a href="../engines.html">searx.engines</a>
|
||||
|
||||
|
||||
</ul>
|
||||
</li></ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div id="searchbox" style="display: none" role="search">
|
||||
<h3 id="searchlabel">Quick search</h3>
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||
<input type="submit" value="Go" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="clearer"></div>
|
||||
</div>
|
||||
|
||||
<div class="footer" role="contentinfo">
|
||||
© Copyright SearXNG team.
|
||||
</div>
|
||||
<script src="../../../_static/version_warning_offset.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Add table
Add a link
Reference in a new issue