From 3a423887c94f68af2d596f78fe5341fa52c2a566 Mon Sep 17 00:00:00 2001 From: Joseph Cheung Date: Mon, 20 Feb 2023 21:16:40 +0800 Subject: [PATCH] Update webapp.py --- searx/webapp.py | 199 ++++++++++++++++++++++++------------------------ 1 file changed, 101 insertions(+), 98 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index 0d7a62b1d..47e76d2a1 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -746,107 +746,110 @@ def search(): number_of_results = 0 # OPENAI GPT - url_pair = {} - prompt = "" - for res in results: - if 'url' not in res: continue - if 'content' not in res: continue - if 'title' not in res: continue - if res['content'] == '': continue - new_url = 'https://url'+str(len(url_pair)+1) - url_pair[new_url] = res['url'] - res['title'] = res['title'].replace("التغريدات مع الردود بواسطة","") - res['content'] = res['content'].replace("Translate Tweet. ","") - res['content'] = res['content'].replace("Learn more ","") - res['content'] = res['content'].replace("Translate Tweet.","") - res['content'] = res['content'].replace("Learn more.","") - tmp_prompt = res['title'] +'\n'+ res['content'] + '\n' + new_url +'\n' - if len(prompt)+len(tmp_prompt)<3000: - prompt += tmp_prompt +'\n' - if prompt != "": - gpt = "" - gpt_url = "https://api.openai.com/v1/engines/text-davinci-003/completions" - gpt_headers = { - "Authorization": "Bearer "+os.environ['GPTKEY'], - "Content-Type": "application/json", - "OpenAI-Organization": os.environ['GPTORG'] - } - if original_search_query != search_query.query: - gpt_data = { - "prompt": prompt+"\n以上是问题 " + original_search_query + " 的搜索结果,用简体中文分条总结简报,在文中用markdown脚注指对应内容来源链接:", - "max_tokens": 1000, - "temperature": 0.7, - "top_p": 1, - "frequency_penalty": 0, - "presence_penalty": 0, - "best_of": 1, - "echo": False, - "logprobs": 0, - "stream": False + try: + url_pair = {} + prompt = "" + for res in results: + if 'url' not in res: continue + if 'content' not in res: continue + if 'title' not in res: continue + if res['content'] == '': continue + new_url = 'https://url'+str(len(url_pair)+1) + url_pair[new_url] = res['url'] + res['title'] = res['title'].replace("التغريدات مع الردود بواسطة","") + res['content'] = res['content'].replace("Translate Tweet. ","") + res['content'] = res['content'].replace("Learn more ","") + res['content'] = res['content'].replace("Translate Tweet.","") + res['content'] = res['content'].replace("Learn more.","") + tmp_prompt = res['title'] +'\n'+ res['content'] + '\n' + new_url +'\n' + if len(prompt)+len(tmp_prompt)<3000: + prompt += tmp_prompt +'\n' + if prompt != "": + gpt = "" + gpt_url = "https://api.openai.com/v1/engines/text-davinci-003/completions" + gpt_headers = { + "Authorization": "Bearer "+os.environ['GPTKEY'], + "Content-Type": "application/json", + "OpenAI-Organization": os.environ['GPTORG'] } - else: - gpt_data = { - "prompt": prompt+"\n以上是关键词 " + search_query.query + " 的搜索结果,用简体中文分条总结简报,在文中用markdown脚注指对应内容来源链接:", - "max_tokens": 1000, - "temperature": 0.7, - "top_p": 1, - "frequency_penalty": 0, - "presence_penalty": 0, - "best_of": 1, - "echo": False, - "logprobs": 0, - "stream": False - } - gpt_response = requests.post(gpt_url, headers=gpt_headers, data=json.dumps(gpt_data)) - gpt_json = gpt_response.json() - if 'choices' in gpt_json: - gpt = gpt_json['choices'][0]['text'] - gpt = gpt.replace("简报:","").replace("简报:","") - for urls in url_pair.keys(): - gpt = gpt.replace(urls,url_pair[urls]) - rgpt = gpt - - if gpt and gpt!="": if original_search_query != search_query.query: - gpt = "Search 为您搜索:" + search_query.query + "\n\n" + gpt - for i in range(1,16): - gpt = gpt.replace("["+str(i)+"] http","[^"+str(i)+"]: http").replace("["+str(i)+"]http","[^"+str(i)+"]: http").replace("["+str(i)+"]","[^"+str(i)+"]") - rgpt = gpt - gpt = markdown.markdown( gpt , extensions=['footnotes']) - + gpt_data = { + "prompt": prompt+"\n以上是问题 " + original_search_query + " 的搜索结果,用简体中文分条总结简报,在文中用markdown脚注指对应内容来源链接:", + "max_tokens": 1000, + "temperature": 0.7, + "top_p": 1, + "frequency_penalty": 0, + "presence_penalty": 0, + "best_of": 1, + "echo": False, + "logprobs": 0, + "stream": False + } + else: + gpt_data = { + "prompt": prompt+"\n以上是关键词 " + search_query.query + " 的搜索结果,用简体中文分条总结简报,在文中用markdown脚注指对应内容来源链接:", + "max_tokens": 1000, + "temperature": 0.7, + "top_p": 1, + "frequency_penalty": 0, + "presence_penalty": 0, + "best_of": 1, + "echo": False, + "logprobs": 0, + "stream": False + } + gpt_response = requests.post(gpt_url, headers=gpt_headers, data=json.dumps(gpt_data)) + gpt_json = gpt_response.json() + if 'choices' in gpt_json: + gpt = gpt_json['choices'][0]['text'] + gpt = gpt.replace("简报:","").replace("简报:","") for urls in url_pair.keys(): - gpt = gpt.replace("#fn:"+urls.replace("https://url",""),url_pair[urls]) - gpt = gpt.replace("#fn:url"+urls.replace("https://url",""),url_pair[urls]) - gpt = re.sub(r'
(.*?)
', '', gpt, flags=re.DOTALL) - gpt = gpt + ''' - ''' - for i in range(1, 16): - rgpt = rgpt.replace(f"[{i}]", "") - rgpt = rgpt.replace(f"[^{i}]", "") - gptbox = { - 'infobox': 'GPT3', - 'id': 'gpt'+str(len(prompt)), - 'content': gpt, - } - result_container.infoboxes.append(gptbox) + gpt = gpt.replace(urls,url_pair[urls]) + rgpt = gpt + + if gpt and gpt!="": + if original_search_query != search_query.query: + gpt = "Search 为您搜索:" + search_query.query + "\n\n" + gpt + for i in range(1,16): + gpt = gpt.replace("["+str(i)+"] http","[^"+str(i)+"]: http").replace("["+str(i)+"]http","[^"+str(i)+"]: http").replace("["+str(i)+"]","[^"+str(i)+"]") + rgpt = gpt + gpt = markdown.markdown( gpt , extensions=['footnotes']) + + for urls in url_pair.keys(): + gpt = gpt.replace("#fn:"+urls.replace("https://url",""),url_pair[urls]) + gpt = gpt.replace("#fn:url"+urls.replace("https://url",""),url_pair[urls]) + gpt = re.sub(r'
(.*?)
', '', gpt, flags=re.DOTALL) + gpt = gpt + ''' + ''' + for i in range(1, 16): + rgpt = rgpt.replace(f"[{i}]", "") + rgpt = rgpt.replace(f"[^{i}]", "") + gptbox = { + 'infobox': 'GPT3', + 'id': 'gpt'+str(len(prompt)), + 'content': gpt, + } + result_container.infoboxes.append(gptbox) + except Exception as ee: + logger.exception(ee, exc_info=True) # checkin for a external bang