mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
o
This commit is contained in:
parent
8f16390022
commit
a81e99ecf4
2 changed files with 14675 additions and 3 deletions
14600
searx/keywords
Normal file
14600
searx/keywords
Normal file
File diff suppressed because it is too large
Load diff
|
@ -18,6 +18,7 @@ import markdown
|
|||
import re
|
||||
import datetime
|
||||
from textrank4zh import TextRank4Keyword, TextRank4Sentence
|
||||
from collections import defaultdict
|
||||
|
||||
from timeit import default_timer
|
||||
from html import escape
|
||||
|
@ -701,6 +702,8 @@ def search():
|
|||
try:
|
||||
search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form)
|
||||
# search = Search(search_query) # without plugins
|
||||
if request.environ['HTTP_CF_IPCOUNTRY'] == 'CN' and gfw.filter(search_query.query):
|
||||
return index_error(output_format, 'No query'), 400
|
||||
try:
|
||||
original_search_query = search_query.query
|
||||
if "模仿" in search_query.query or "扮演" in search_query.query or "你能" in search_query.query or "请推荐" in search_query.query or "帮我" in search_query.query or "写一段" in search_query.query or "写一个" in search_query.query or "请问" in search_query.query or "请给" in search_query.query or "请你" in search_query.query or "请推荐" in search_query.query or "是谁" in search_query.query or "能帮忙" in search_query.query or "介绍一下" in search_query.query or "为什么" in search_query.query or "什么是" in search_query.query or "有什么" in search_query.query or "怎样" in search_query.query or "给我" in search_query.query or "如何" in search_query.query or "谁是" in search_query.query or "查询" in search_query.query or "告诉我" in search_query.query or "查一下" in search_query.query or "找一个" in search_query.query or "什么样" in search_query.query or "哪个" in search_query.query or "哪些" in search_query.query or "哪一个" in search_query.query or "哪一些" in search_query.query or "啥是" in search_query.query or "为啥" in search_query.query or "怎么" in search_query.query:
|
||||
|
@ -713,7 +716,7 @@ def search():
|
|||
search_type = '任务'
|
||||
net_search = False
|
||||
net_search_str = 'false'
|
||||
elif len(original_query)>10:
|
||||
elif len(original_search_query)>10:
|
||||
prompt = "任务:写诗 写故事 写代码 写论文摘要 模仿推特用户 生成搜索广告 回答问题 聊天话题 搜索网页 搜索视频 搜索地图 搜索新闻 查看食谱 搜索商品 写歌词 写论文 模仿名人 翻译语言 摘要文章 讲笑话 做数学题 搜索图片 播放音乐 查看天气\n1.判断是以上任务的哪一个2.判断是否需要联网回答3.给出搜索关键词\n"
|
||||
prompt = prompt + "提问:" + search_query.query + '答案用json数组例如["写诗","否","详细关键词"]来表述\n答案:'
|
||||
acts = ['写诗', '写故事', '写代码', '写论文摘要', '模仿推特用户', '生成搜索广告', '回答问题', '聊天话题', '搜索网页', '搜索视频', '搜索地图', '搜索新闻', '查看食谱', '搜索商品', '写歌词', '写论文', '模仿名人', '翻译语言', '摘要文章', '讲笑话', '做数学题', '搜索图片', '播放音乐', '查看天气']
|
||||
|
@ -739,6 +742,7 @@ def search():
|
|||
"logprobs": 0,
|
||||
"stream": False
|
||||
}
|
||||
gpt_json={}
|
||||
if prompt and prompt !='' :
|
||||
gpt_response = requests.post(gpt_url, headers=gpt_headers, data=json.dumps(gpt_data))
|
||||
gpt_json = gpt_response.json()
|
||||
|
@ -795,9 +799,14 @@ def search():
|
|||
url_proxy = {}
|
||||
prompt = ""
|
||||
for res in results:
|
||||
results.remove(res)
|
||||
if 'url' not in res: continue
|
||||
if 'content' not in res: continue
|
||||
if 'title' not in res: continue
|
||||
if request.environ['HTTP_CF_IPCOUNTRY'] == 'CN' and gfw.filter(res['title']):
|
||||
if 'content' not in res: continue
|
||||
if request.environ['HTTP_CF_IPCOUNTRY'] == 'CN' and gfw.filter(res['content']):
|
||||
return index_error(output_format, 'No query'), 400
|
||||
|
||||
if res['content'] == '': continue
|
||||
new_url = 'https://url'+str(len(url_pair))
|
||||
url_pair.append(res['url'])
|
||||
|
@ -2978,8 +2987,71 @@ if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_
|
|||
search_initialize(enable_checker=True, check_network=True, enable_metrics=settings['general']['enable_metrics'])
|
||||
|
||||
|
||||
class DFAFilter():
|
||||
def __init__(self):
|
||||
self.keyword_chains = {}
|
||||
self.delimit = '\x00'
|
||||
|
||||
def add(self, keyword):
|
||||
if not isinstance(keyword, unicode):
|
||||
keyword = keyword.decode('utf-8')
|
||||
keyword = keyword.lower()
|
||||
chars = keyword.strip()
|
||||
if not chars:
|
||||
return
|
||||
level = self.keyword_chains
|
||||
for i in range(len(chars)):
|
||||
if chars[i] in level:
|
||||
level = level[chars[i]]
|
||||
else:
|
||||
if not isinstance(level, dict):
|
||||
break
|
||||
for j in range(i, len(chars)):
|
||||
level[chars[j]] = {}
|
||||
last_level, last_char = level, chars[j]
|
||||
level = level[chars[j]]
|
||||
last_level[last_char] = {self.delimit: 0}
|
||||
break
|
||||
if i == len(chars) - 1:
|
||||
level[self.delimit] = 0
|
||||
|
||||
def parse(self, path):
|
||||
with open(path) as f:
|
||||
for keyword in f:
|
||||
self.add(keyword.strip())
|
||||
|
||||
def filter(self, message, repl="*"):
|
||||
if not isinstance(message, unicode):
|
||||
message = message.decode('utf-8')
|
||||
message = message.lower()
|
||||
ret = []
|
||||
start = 0
|
||||
while start < len(message):
|
||||
level = self.keyword_chains
|
||||
step_ins = 0
|
||||
for char in message[start:]:
|
||||
if char in level:
|
||||
step_ins += 1
|
||||
if self.delimit not in level[char]:
|
||||
level = level[char]
|
||||
else:
|
||||
return True
|
||||
ret.append(repl * step_ins)
|
||||
start += step_ins - 1
|
||||
break
|
||||
else:
|
||||
ret.append(message[start])
|
||||
break
|
||||
else:
|
||||
ret.append(message[start])
|
||||
start += 1
|
||||
|
||||
return False
|
||||
gfw = DFAFilter()
|
||||
def run():
|
||||
logger.debug('starting webserver on %s:%s', settings['server']['bind_address'], settings['server']['port'])
|
||||
|
||||
gfw.parse("keywords")
|
||||
app.run(
|
||||
debug=searx_debug,
|
||||
use_debugger=searx_debug,
|
||||
|
|
Loading…
Add table
Reference in a new issue