mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge branch 'master' into devel_google_videos
This commit is contained in:
		
						commit
						1ea56576dc
					
				
					 9 changed files with 27261 additions and 167 deletions
				
			
		|  | @ -32,6 +32,7 @@ RUN echo "@commuedge http://nl.alpinelinux.org/alpine/edge/community" >> /etc/ap | |||
|     openssl-dev \ | ||||
|     ca-certificates \ | ||||
|     tini@commuedge \ | ||||
|  && pip install --upgrade pip \ | ||||
|  && pip install --no-cache -r requirements.txt \ | ||||
|  && apk del \ | ||||
|     build-base \ | ||||
|  |  | |||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							|  | @ -88,9 +88,7 @@ def response(resp): | |||
| 
 | ||||
|         url = json_data.get('purl') | ||||
|         img_src = json_data.get('murl') | ||||
| 
 | ||||
|         thumb_json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('mad'))) | ||||
|         thumbnail = thumb_json_data.get('turl') | ||||
|         thumbnail = json_data.get('turl') | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'template': 'images.html', | ||||
|  |  | |||
|  | @ -1,115 +0,0 @@ | |||
| """ | ||||
| FindX (General, Images, Videos) | ||||
| 
 | ||||
| @website     https://www.findx.com | ||||
| @provide-api no | ||||
| @using-api   no | ||||
| @results     HTML | ||||
| @stable      no | ||||
| @parse       url, title, content, embedded, img_src, thumbnail_src | ||||
| """ | ||||
| 
 | ||||
| from dateutil import parser | ||||
| from json import loads | ||||
| import re | ||||
| 
 | ||||
| from lxml import html | ||||
| 
 | ||||
| from searx import logger | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.engines.youtube_noapi import base_youtube_url, embedded_url | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| 
 | ||||
| paging = True | ||||
| results_xpath = '//script[@id="initial-state"]' | ||||
| search_url = 'https://www.findx.com/{category}?{q}' | ||||
| type_map = { | ||||
|     'none': 'web', | ||||
|     'general': 'web', | ||||
|     'images': 'images', | ||||
|     'videos': 'videos', | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     params['url'] = search_url.format( | ||||
|         category=type_map[params['category']], | ||||
|         q=urlencode({ | ||||
|             'q': query, | ||||
|             'page': params['pageno'] | ||||
|         }) | ||||
|     ) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     dom = html.fromstring(resp.text) | ||||
|     results_raw_json = dom.xpath(results_xpath) | ||||
|     results_json = loads(extract_text(results_raw_json)) | ||||
| 
 | ||||
|     if len(results_json['web']['results']) > 0: | ||||
|         return _general_results(results_json['web']['results']['webSearch']['results']) | ||||
| 
 | ||||
|     if len(results_json['images']['results']) > 0: | ||||
|         return _images_results(results_json['images']['results']) | ||||
| 
 | ||||
|     if len(results_json['video']['results']) > 0: | ||||
|         return _videos_results(results_json['video']['results']) | ||||
| 
 | ||||
|     return [] | ||||
| 
 | ||||
| 
 | ||||
| def _general_results(general_results): | ||||
|     results = [] | ||||
|     for result in general_results: | ||||
|         results.append({ | ||||
|             'url': result['url'], | ||||
|             'title': result['title'], | ||||
|             'content': result['sum'], | ||||
|         }) | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def _images_results(image_results): | ||||
|     results = [] | ||||
|     for result in image_results: | ||||
|         results.append({ | ||||
|             'url': result['sourceURL'], | ||||
|             'title': result['title'], | ||||
|             'content': result['source'], | ||||
|             'thumbnail_src': _extract_url(result['assets']['thumb']['url']), | ||||
|             'img_src': _extract_url(result['assets']['file']['url']), | ||||
|             'template': 'images.html', | ||||
|         }) | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def _videos_results(video_results): | ||||
|     results = [] | ||||
|     for result in video_results: | ||||
|         if not result['kind'].startswith('youtube'): | ||||
|             logger.warn('Unknown video kind in findx: {}'.format(result['kind'])) | ||||
|             continue | ||||
| 
 | ||||
|         description = result['snippet']['description'] | ||||
|         if len(description) > 300: | ||||
|             description = description[:300] + '...' | ||||
| 
 | ||||
|         results.append({ | ||||
|             'url': base_youtube_url + result['id'], | ||||
|             'title': result['snippet']['title'], | ||||
|             'content': description, | ||||
|             'thumbnail': _extract_url(result['snippet']['thumbnails']['default']['url']), | ||||
|             'publishedDate': parser.parse(result['snippet']['publishedAt']), | ||||
|             'embedded': embedded_url.format(videoid=result['id']), | ||||
|             'template': 'videos.html', | ||||
|         }) | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def _extract_url(url): | ||||
|     matching = re.search('(/https?://[^)]+)', url) | ||||
|     if matching: | ||||
|         return matching.group(0)[1:] | ||||
|     return '' | ||||
|  | @ -32,8 +32,9 @@ search_url = base_url + 'do/search' | |||
| # specific xpath variables | ||||
| # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] | ||||
| # not ads: div[@class="result"] are the direct childs of div[@id="results"] | ||||
| results_xpath = '//div[@class="result"]' | ||||
| results_xpath = '//li[contains(@class, "search-result") and contains(@class, "search-item")]' | ||||
| link_xpath = './/h3/a' | ||||
| content_xpath = './p[@class="search-item__body"]' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
|  | @ -73,14 +74,10 @@ def response(resp): | |||
|         if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): | ||||
|             continue | ||||
| 
 | ||||
|         # block ixquick search url's | ||||
|         if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): | ||||
|             continue | ||||
| 
 | ||||
|         title = extract_text(link) | ||||
| 
 | ||||
|         if result.xpath('./p[@class="desc clk"]'): | ||||
|             content = extract_text(result.xpath('./p[@class="desc clk"]')) | ||||
|         if result.xpath(content_xpath): | ||||
|             content = extract_text(result.xpath(content_xpath)) | ||||
|         else: | ||||
|             content = '' | ||||
| 
 | ||||
|  |  | |||
|  | @ -218,24 +218,6 @@ engines: | |||
|     shortcut : fd | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : findx | ||||
|     engine : findx | ||||
|     shortcut : fx | ||||
|     categories : general | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : findx images | ||||
|     engine : findx | ||||
|     shortcut : fxi | ||||
|     categories : images | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : findx videos | ||||
|     engine : findx | ||||
|     shortcut : fxv | ||||
|     categories : videos | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : flickr | ||||
|     categories : images | ||||
|     shortcut : fl | ||||
|  | @ -597,14 +579,6 @@ engines: | |||
|     timeout : 6.0 | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : ixquick | ||||
|     engine : startpage | ||||
|     base_url : 'https://www.ixquick.eu/' | ||||
|     search_url : 'https://www.ixquick.eu/do/search' | ||||
|     shortcut : iq | ||||
|     timeout : 6.0 | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : swisscows | ||||
|     engine : swisscows | ||||
|     shortcut : sw | ||||
|  | @ -723,6 +697,19 @@ engines: | |||
|     shortcut : du | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : seznam | ||||
|     shortcut: szn | ||||
|     engine: xpath | ||||
|     paging : True | ||||
|     search_url : https://search.seznam.cz/?q={query}&count=10&from={pageno} | ||||
|     results_xpath: //div[@class="Page-content"]//div[@class="Result "] | ||||
|     url_xpath : ./h3/a/@href | ||||
|     title_xpath : ./h3 | ||||
|     content_xpath : .//p[@class="Result-description"] | ||||
|     first_page_num : 0 | ||||
|     page_size : 10 | ||||
|     disabled : True | ||||
| 
 | ||||
| #  - name : yacy | ||||
| #    engine : yacy | ||||
| #    shortcut : ya | ||||
|  |  | |||
|  | @ -52,7 +52,7 @@ class TestBingImagesEngine(SearxTestCase): | |||
|                 <li> | ||||
|                     <div> | ||||
|                         <div class="imgpt"> | ||||
|                             <a m='{"purl":"page_url","murl":"img_url"}' mad='{"turl":"thumb_url"}'> | ||||
|                             <a m='{"purl":"page_url","murl":"img_url","turl":"thumb_url"}'> | ||||
|                                 <img src="" alt="alt text" /> | ||||
|                             </a> | ||||
|                         </div> | ||||
|  | @ -60,7 +60,7 @@ class TestBingImagesEngine(SearxTestCase): | |||
|                     </div> | ||||
|                     <div> | ||||
|                         <div class="imgpt"> | ||||
|                             <a m='{"purl":"page_url2","murl":"img_url2"}' mad='{"turl":"thumb_url2"}'> | ||||
|                             <a m='{"purl":"page_url2","murl":"img_url2","turl":"thumb_url2"}'> | ||||
|                                 <img src="" alt="alt text 2" /> | ||||
|                             </a> | ||||
|                         </div> | ||||
|  | @ -71,7 +71,7 @@ class TestBingImagesEngine(SearxTestCase): | |||
|                 <li> | ||||
|                     <div> | ||||
|                         <div class="imgpt"> | ||||
|                             <a m='{"purl":"page_url3","murl":"img_url3"}' mad='{"turl":"thumb_url3"}'> | ||||
|                             <a m='{"purl":"page_url3","murl":"img_url3","turl":"thumb_url3"}'> | ||||
|                                 <img src="" alt="alt text 3" /> | ||||
|                             </a> | ||||
|                         </div> | ||||
|  |  | |||
|  | @ -31,14 +31,14 @@ class TestStartpageEngine(SearxTestCase): | |||
|         self.assertEqual(startpage.response(response), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <div class='result' style=' *width : auto; *margin-right : 10%;'> | ||||
|         <li class="search-result search-item"> | ||||
|             <h3> | ||||
|                 <a href='http://this.should.be.the.link/' id='title_2' name='title_2' > | ||||
|                     This should be the title | ||||
|                 </a> | ||||
|                 <span id='title_stars_2' name='title_stars_2'>  </span> | ||||
|             </h3> | ||||
|             <p class='desc clk'> | ||||
|             <p class="search-item__body"> | ||||
|                 This should be the content. | ||||
|             </p> | ||||
|             <p> | ||||
|  | @ -56,7 +56,7 @@ class TestStartpageEngine(SearxTestCase): | |||
|                     Mis en surbrillance | ||||
|                 </A> | ||||
|             </p> | ||||
|         </div> | ||||
|         </li> | ||||
|         """ | ||||
|         response = mock.Mock(text=html.encode('utf-8')) | ||||
|         results = startpage.response(response) | ||||
|  | @ -67,14 +67,14 @@ class TestStartpageEngine(SearxTestCase): | |||
|         self.assertEqual(results[0]['content'], 'This should be the content.') | ||||
| 
 | ||||
|         html = """ | ||||
|         <div class='result' style=' *width : auto; *margin-right : 10%;'> | ||||
|         <li class="search-result search-item"> | ||||
|             <h3> | ||||
|                 <a href='http://www.google.com/aclk?sa=l&ai=C' id='title_2' name='title_2' > | ||||
|                     This should be the title | ||||
|                 </a> | ||||
|                 <span id='title_stars_2' name='title_stars_2'>  </span> | ||||
|             </h3> | ||||
|             <p class='desc clk'> | ||||
|             <p class="search-item__body"> | ||||
|                 This should be the content. | ||||
|             </p> | ||||
|             <p> | ||||
|  | @ -92,20 +92,20 @@ class TestStartpageEngine(SearxTestCase): | |||
|                     Mis en surbrillance | ||||
|                 </A> | ||||
|             </p> | ||||
|         </div> | ||||
|         <div class='result' style=' *width : auto; *margin-right : 10%;'> | ||||
|         </li> | ||||
|         <li class="search-result search-item"> | ||||
|             <h3> | ||||
|                 <span id='title_stars_2' name='title_stars_2'>  </span> | ||||
|             </h3> | ||||
|             <p class='desc clk'> | ||||
|             <p class="search-item__body"> | ||||
|                 This should be the content. | ||||
|             </p> | ||||
|             <p> | ||||
|                 <span class='url'>www.speed<b>test</b>.net/fr/ | ||||
|                 </span> | ||||
|             </p> | ||||
|         </div> | ||||
|         <div class='result' style=' *width : auto; *margin-right : 10%;'> | ||||
|         </li> | ||||
|         <li class="search-result search-item"> | ||||
|             <h3> | ||||
|                 <a href='http://this.should.be.the.link/' id='title_2' name='title_2' > | ||||
|                     This should be the title | ||||
|  | @ -127,7 +127,7 @@ class TestStartpageEngine(SearxTestCase): | |||
|                     Mis en surbrillance | ||||
|                 </A> | ||||
|             </p> | ||||
|         </div> | ||||
|         </li> | ||||
|         """ | ||||
|         response = mock.Mock(text=html.encode('utf-8')) | ||||
|         results = startpage.response(response) | ||||
|  |  | |||
|  | @ -27,12 +27,14 @@ def fetch_supported_languages(): | |||
|         if hasattr(engines[engine_name], 'fetch_supported_languages'): | ||||
|             try: | ||||
|                 engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() | ||||
|                 if type(engines_languages[engine_name]) == list: | ||||
|                     engines_languages[engine_name] = sorted(engines_languages[engine_name]) | ||||
|             except Exception as e: | ||||
|                 print(e) | ||||
| 
 | ||||
|     # write json file | ||||
|     with io.open(engines_languages_file, "w", encoding="utf-8") as f: | ||||
|         dump(engines_languages, f, ensure_ascii=False) | ||||
|         dump(engines_languages, f, ensure_ascii=False, indent=4, separators=(',', ': ')) | ||||
| 
 | ||||
|     return engines_languages | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Noémi Ványi
						Noémi Ványi