diff --git a/requirements-dev.txt b/requirements-dev.txt index c3df15686..7bcbd8251 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -22,3 +22,4 @@ wlc==1.13 coloredlogs==15.0.1 requests markdown +tiktoken \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 0cff50be3..016484508 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,5 @@ markdown-it-py==2.1.0 typing_extensions==4.5.0 fasttext-predict==0.9.2.1 requests -markdown \ No newline at end of file +markdown +tiktoken \ No newline at end of file diff --git a/searx/webapp.py b/searx/webapp.py index c13931102..337847d2f 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -17,6 +17,7 @@ import requests import markdown import re import datetime +import tiktoken from timeit import default_timer from html import escape @@ -767,7 +768,7 @@ def search(): res['content'] = res['content'].replace("This Tweet was deleted by the Tweet author.","Deleted Tweet.") tmp_prompt = res['title'] +'\n'+ res['content'] + '\n' + new_url +'\n' - if len(prompt)+len(tmp_prompt)<2300: + if len( tiktoken.get_encoding("gpt2").encode(prompt + tmp_prompt +'\n' + "\n以上是问题 " + original_search_query + " 的搜索结果,删除与问题相关度低的内容,用简体中文分条总结简报,在文中用(链接)标注对应内容来源链接,不要把链接都放在最后。结果:") )<2990: prompt += tmp_prompt +'\n' if prompt != "": gpt = ""