mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
o
This commit is contained in:
parent
8f16390022
commit
a81e99ecf4
2 changed files with 14675 additions and 3 deletions
14600
searx/keywords
Normal file
14600
searx/keywords
Normal file
File diff suppressed because it is too large
Load diff
|
@ -18,6 +18,7 @@ import markdown
|
||||||
import re
|
import re
|
||||||
import datetime
|
import datetime
|
||||||
from textrank4zh import TextRank4Keyword, TextRank4Sentence
|
from textrank4zh import TextRank4Keyword, TextRank4Sentence
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
from timeit import default_timer
|
from timeit import default_timer
|
||||||
from html import escape
|
from html import escape
|
||||||
|
@ -701,6 +702,8 @@ def search():
|
||||||
try:
|
try:
|
||||||
search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form)
|
search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form)
|
||||||
# search = Search(search_query) # without plugins
|
# search = Search(search_query) # without plugins
|
||||||
|
if request.environ['HTTP_CF_IPCOUNTRY'] == 'CN' and gfw.filter(search_query.query):
|
||||||
|
return index_error(output_format, 'No query'), 400
|
||||||
try:
|
try:
|
||||||
original_search_query = search_query.query
|
original_search_query = search_query.query
|
||||||
if "模仿" in search_query.query or "扮演" in search_query.query or "你能" in search_query.query or "请推荐" in search_query.query or "帮我" in search_query.query or "写一段" in search_query.query or "写一个" in search_query.query or "请问" in search_query.query or "请给" in search_query.query or "请你" in search_query.query or "请推荐" in search_query.query or "是谁" in search_query.query or "能帮忙" in search_query.query or "介绍一下" in search_query.query or "为什么" in search_query.query or "什么是" in search_query.query or "有什么" in search_query.query or "怎样" in search_query.query or "给我" in search_query.query or "如何" in search_query.query or "谁是" in search_query.query or "查询" in search_query.query or "告诉我" in search_query.query or "查一下" in search_query.query or "找一个" in search_query.query or "什么样" in search_query.query or "哪个" in search_query.query or "哪些" in search_query.query or "哪一个" in search_query.query or "哪一些" in search_query.query or "啥是" in search_query.query or "为啥" in search_query.query or "怎么" in search_query.query:
|
if "模仿" in search_query.query or "扮演" in search_query.query or "你能" in search_query.query or "请推荐" in search_query.query or "帮我" in search_query.query or "写一段" in search_query.query or "写一个" in search_query.query or "请问" in search_query.query or "请给" in search_query.query or "请你" in search_query.query or "请推荐" in search_query.query or "是谁" in search_query.query or "能帮忙" in search_query.query or "介绍一下" in search_query.query or "为什么" in search_query.query or "什么是" in search_query.query or "有什么" in search_query.query or "怎样" in search_query.query or "给我" in search_query.query or "如何" in search_query.query or "谁是" in search_query.query or "查询" in search_query.query or "告诉我" in search_query.query or "查一下" in search_query.query or "找一个" in search_query.query or "什么样" in search_query.query or "哪个" in search_query.query or "哪些" in search_query.query or "哪一个" in search_query.query or "哪一些" in search_query.query or "啥是" in search_query.query or "为啥" in search_query.query or "怎么" in search_query.query:
|
||||||
|
@ -713,7 +716,7 @@ def search():
|
||||||
search_type = '任务'
|
search_type = '任务'
|
||||||
net_search = False
|
net_search = False
|
||||||
net_search_str = 'false'
|
net_search_str = 'false'
|
||||||
elif len(original_query)>10:
|
elif len(original_search_query)>10:
|
||||||
prompt = "任务:写诗 写故事 写代码 写论文摘要 模仿推特用户 生成搜索广告 回答问题 聊天话题 搜索网页 搜索视频 搜索地图 搜索新闻 查看食谱 搜索商品 写歌词 写论文 模仿名人 翻译语言 摘要文章 讲笑话 做数学题 搜索图片 播放音乐 查看天气\n1.判断是以上任务的哪一个2.判断是否需要联网回答3.给出搜索关键词\n"
|
prompt = "任务:写诗 写故事 写代码 写论文摘要 模仿推特用户 生成搜索广告 回答问题 聊天话题 搜索网页 搜索视频 搜索地图 搜索新闻 查看食谱 搜索商品 写歌词 写论文 模仿名人 翻译语言 摘要文章 讲笑话 做数学题 搜索图片 播放音乐 查看天气\n1.判断是以上任务的哪一个2.判断是否需要联网回答3.给出搜索关键词\n"
|
||||||
prompt = prompt + "提问:" + search_query.query + '答案用json数组例如["写诗","否","详细关键词"]来表述\n答案:'
|
prompt = prompt + "提问:" + search_query.query + '答案用json数组例如["写诗","否","详细关键词"]来表述\n答案:'
|
||||||
acts = ['写诗', '写故事', '写代码', '写论文摘要', '模仿推特用户', '生成搜索广告', '回答问题', '聊天话题', '搜索网页', '搜索视频', '搜索地图', '搜索新闻', '查看食谱', '搜索商品', '写歌词', '写论文', '模仿名人', '翻译语言', '摘要文章', '讲笑话', '做数学题', '搜索图片', '播放音乐', '查看天气']
|
acts = ['写诗', '写故事', '写代码', '写论文摘要', '模仿推特用户', '生成搜索广告', '回答问题', '聊天话题', '搜索网页', '搜索视频', '搜索地图', '搜索新闻', '查看食谱', '搜索商品', '写歌词', '写论文', '模仿名人', '翻译语言', '摘要文章', '讲笑话', '做数学题', '搜索图片', '播放音乐', '查看天气']
|
||||||
|
@ -739,6 +742,7 @@ def search():
|
||||||
"logprobs": 0,
|
"logprobs": 0,
|
||||||
"stream": False
|
"stream": False
|
||||||
}
|
}
|
||||||
|
gpt_json={}
|
||||||
if prompt and prompt !='' :
|
if prompt and prompt !='' :
|
||||||
gpt_response = requests.post(gpt_url, headers=gpt_headers, data=json.dumps(gpt_data))
|
gpt_response = requests.post(gpt_url, headers=gpt_headers, data=json.dumps(gpt_data))
|
||||||
gpt_json = gpt_response.json()
|
gpt_json = gpt_response.json()
|
||||||
|
@ -795,9 +799,14 @@ def search():
|
||||||
url_proxy = {}
|
url_proxy = {}
|
||||||
prompt = ""
|
prompt = ""
|
||||||
for res in results:
|
for res in results:
|
||||||
|
results.remove(res)
|
||||||
if 'url' not in res: continue
|
if 'url' not in res: continue
|
||||||
if 'content' not in res: continue
|
|
||||||
if 'title' not in res: continue
|
if 'title' not in res: continue
|
||||||
|
if request.environ['HTTP_CF_IPCOUNTRY'] == 'CN' and gfw.filter(res['title']):
|
||||||
|
if 'content' not in res: continue
|
||||||
|
if request.environ['HTTP_CF_IPCOUNTRY'] == 'CN' and gfw.filter(res['content']):
|
||||||
|
return index_error(output_format, 'No query'), 400
|
||||||
|
|
||||||
if res['content'] == '': continue
|
if res['content'] == '': continue
|
||||||
new_url = 'https://url'+str(len(url_pair))
|
new_url = 'https://url'+str(len(url_pair))
|
||||||
url_pair.append(res['url'])
|
url_pair.append(res['url'])
|
||||||
|
@ -2978,8 +2987,71 @@ if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_
|
||||||
search_initialize(enable_checker=True, check_network=True, enable_metrics=settings['general']['enable_metrics'])
|
search_initialize(enable_checker=True, check_network=True, enable_metrics=settings['general']['enable_metrics'])
|
||||||
|
|
||||||
|
|
||||||
|
class DFAFilter():
|
||||||
|
def __init__(self):
|
||||||
|
self.keyword_chains = {}
|
||||||
|
self.delimit = '\x00'
|
||||||
|
|
||||||
|
def add(self, keyword):
|
||||||
|
if not isinstance(keyword, unicode):
|
||||||
|
keyword = keyword.decode('utf-8')
|
||||||
|
keyword = keyword.lower()
|
||||||
|
chars = keyword.strip()
|
||||||
|
if not chars:
|
||||||
|
return
|
||||||
|
level = self.keyword_chains
|
||||||
|
for i in range(len(chars)):
|
||||||
|
if chars[i] in level:
|
||||||
|
level = level[chars[i]]
|
||||||
|
else:
|
||||||
|
if not isinstance(level, dict):
|
||||||
|
break
|
||||||
|
for j in range(i, len(chars)):
|
||||||
|
level[chars[j]] = {}
|
||||||
|
last_level, last_char = level, chars[j]
|
||||||
|
level = level[chars[j]]
|
||||||
|
last_level[last_char] = {self.delimit: 0}
|
||||||
|
break
|
||||||
|
if i == len(chars) - 1:
|
||||||
|
level[self.delimit] = 0
|
||||||
|
|
||||||
|
def parse(self, path):
|
||||||
|
with open(path) as f:
|
||||||
|
for keyword in f:
|
||||||
|
self.add(keyword.strip())
|
||||||
|
|
||||||
|
def filter(self, message, repl="*"):
|
||||||
|
if not isinstance(message, unicode):
|
||||||
|
message = message.decode('utf-8')
|
||||||
|
message = message.lower()
|
||||||
|
ret = []
|
||||||
|
start = 0
|
||||||
|
while start < len(message):
|
||||||
|
level = self.keyword_chains
|
||||||
|
step_ins = 0
|
||||||
|
for char in message[start:]:
|
||||||
|
if char in level:
|
||||||
|
step_ins += 1
|
||||||
|
if self.delimit not in level[char]:
|
||||||
|
level = level[char]
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
ret.append(repl * step_ins)
|
||||||
|
start += step_ins - 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ret.append(message[start])
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ret.append(message[start])
|
||||||
|
start += 1
|
||||||
|
|
||||||
|
return False
|
||||||
|
gfw = DFAFilter()
|
||||||
def run():
|
def run():
|
||||||
logger.debug('starting webserver on %s:%s', settings['server']['bind_address'], settings['server']['port'])
|
logger.debug('starting webserver on %s:%s', settings['server']['bind_address'], settings['server']['port'])
|
||||||
|
|
||||||
|
gfw.parse("keywords")
|
||||||
app.run(
|
app.run(
|
||||||
debug=searx_debug,
|
debug=searx_debug,
|
||||||
use_debugger=searx_debug,
|
use_debugger=searx_debug,
|
||||||
|
|
Loading…
Add table
Reference in a new issue