This commit is contained in:
Joseph Cheung 2023-03-08 00:14:33 +08:00
parent ae674288b5
commit e7ac2c0bbf
2 changed files with 61 additions and 1 deletions

View file

@ -19,6 +19,7 @@ import re
import datetime import datetime
from textrank4zh import TextRank4Keyword, TextRank4Sentence from textrank4zh import TextRank4Keyword, TextRank4Sentence
import pycorrector import pycorrector
import threading
from timeit import default_timer from timeit import default_timer
from html import escape from html import escape
@ -663,6 +664,26 @@ def keytext():
res.append(item.sentence) res.append(item.sentence)
return Response(json.dumps(res), mimetype='application/json') return Response(json.dumps(res), mimetype='application/json')
def process_result(result):
url_pattern = re.compile(r'^(https?://)?([a-z0-9-]+\.)+[a-z0-9-]+\.[a-z]+/?$')
# 判断URL是否符合要求
if not url_pattern.match(result['url']):
return
# 发起GET请求访问API
query_url=re.sub(r'https?://', '', result['url'])
try:
response = requests.get(f'https://noisy-dust-b504.marduk.workers.dev/siteOwner?{query_url}', timeout=5)
except requests.exceptions.Timeout:
print(f'Request timeout for {result["url"]}')
return
except requests.exceptions.RequestException as e:
print(f'Request error for {result["url"]}: {e}')
return
# 判断返回值是否为'null'如果不是则更新title
if response.text != 'null':
result['title'] += ' 此网站主人是:' +response.text
@app.route('/search', methods=['GET', 'POST']) @app.route('/search', methods=['GET', 'POST'])
def search(): def search():
"""Search query in q and return results. """Search query in q and return results.
@ -810,6 +831,15 @@ def search():
# return index_error(output_format, gettext('No item found')), 500 # return index_error(output_format, gettext('No item found')), 500
results.remove(res) results.remove(res)
except:pass except:pass
threads = []
for result in results:
t = threading.Thread(target=process_result, args=(result,))
t.start()
threads.append(t)
# 等待所有线程执行完毕
for t in threads:
t.join()
for res in results: for res in results:
if 'engine' in res and res['engine'] == 'twitter': if 'engine' in res and res['engine'] == 'twitter':
try: try:

View file

@ -19,6 +19,7 @@ import re
import datetime import datetime
from textrank4zh import TextRank4Keyword, TextRank4Sentence from textrank4zh import TextRank4Keyword, TextRank4Sentence
import pycorrector import pycorrector
import threading
from timeit import default_timer from timeit import default_timer
from html import escape from html import escape
@ -663,6 +664,26 @@ def keytext():
res.append(item.sentence) res.append(item.sentence)
return Response(json.dumps(res), mimetype='application/json') return Response(json.dumps(res), mimetype='application/json')
def process_result(result):
url_pattern = re.compile(r'^(https?://)?([a-z0-9-]+\.)+[a-z0-9-]+\.[a-z]+/?$')
# 判断URL是否符合要求
if not url_pattern.match(result['url']):
return
# 发起GET请求访问API
query_url=re.sub(r'https?://', '', result['url'])
try:
response = requests.get(f'https://noisy-dust-b504.marduk.workers.dev/siteOwner?{query_url}', timeout=5)
except requests.exceptions.Timeout:
print(f'Request timeout for {result["url"]}')
return
except requests.exceptions.RequestException as e:
print(f'Request error for {result["url"]}: {e}')
return
# 判断返回值是否为'null'如果不是则更新title
if response.text != 'null':
result['title'] += ' 此网站主人是:' +response.text
@app.route('/search', methods=['GET', 'POST']) @app.route('/search', methods=['GET', 'POST'])
def search(): def search():
"""Search query in q and return results. """Search query in q and return results.
@ -810,6 +831,15 @@ def search():
# return index_error(output_format, gettext('No item found')), 500 # return index_error(output_format, gettext('No item found')), 500
results.remove(res) results.remove(res)
except:pass except:pass
threads = []
for result in results:
t = threading.Thread(target=process_result, args=(result,))
t.start()
threads.append(t)
# 等待所有线程执行完毕
for t in threads:
t.join()
for res in results: for res in results:
if 'engine' in res and res['engine'] == 'twitter': if 'engine' in res and res['engine'] == 'twitter':
try: try: