[enh] general-file.com engine added

2024-01-01 19:24:07 +01:00 · 2014-06-27 17:25:16 +02:00 · 2014-06-27 17:25:16 +02:00 · 8b0cb686d5
commit 8b0cb686d5
parent 96c8b20a04
1 changed files with 35 additions and 0 deletions
--- a/searx/engines/generalfile.py
+++ b/searx/engines/generalfile.py
@ -0,0 +1,35 @@
+from lxml import html
+
+
+base_url = 'http://www.general-file.com'
+search_url = base_url + '/files-{letter}/{query}/{pageno}'
+
+result_xpath = '//table[@class="block-file"]'
+title_xpath = './/h2/a//text()'
+url_xpath = './/h2/a/@href'
+content_xpath = './/p//text()'
+
+paging = True
+
+
+def request(query, params):
+    params['url'] = search_url.format(query=query,
+                                      letter=query[0],
+                                      pageno=params['pageno'])
+    return params
+
+
+def response(resp):
+
+    results = []
+    dom = html.fromstring(resp.text)
+    for result in dom.xpath(result_xpath):
+        url = result.xpath(url_xpath)[0]
+        # skip fast download links
+        if not url.startswith('/'):
+            continue
+        results.append({'url': base_url + url,
+                        'title': ''.join(result.xpath(title_xpath)),
+                        'content': ''.join(result.xpath(content_xpath))})
+
+    return results