This commit is contained in:
Joseph Cheung 2023-03-01 19:22:58 +08:00
parent e35bd91b88
commit f331ab8ba3
4 changed files with 498 additions and 2 deletions

View file

@ -0,0 +1,390 @@
let wasm;
let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
cachedTextDecoder.decode();
let cachegetUint8Memory0 = null;
function getUint8Memory0() {
if (cachegetUint8Memory0 === null || cachegetUint8Memory0.buffer !== wasm.memory.buffer) {
cachegetUint8Memory0 = new Uint8Array(wasm.memory.buffer);
}
return cachegetUint8Memory0;
}
function getStringFromWasm0(ptr, len) {
return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len));
}
const heap = new Array(32).fill(undefined);
heap.push(undefined, null, true, false);
let heap_next = heap.length;
function addHeapObject(obj) {
if (heap_next === heap.length) heap.push(heap.length + 1);
const idx = heap_next;
heap_next = heap[idx];
heap[idx] = obj;
return idx;
}
function getObject(idx) { return heap[idx]; }
function dropObject(idx) {
if (idx < 36) return;
heap[idx] = heap_next;
heap_next = idx;
}
function takeObject(idx) {
const ret = getObject(idx);
dropObject(idx);
return ret;
}
function debugString(val) {
// primitive types
const type = typeof val;
if (type == 'number' || type == 'boolean' || val == null) {
return `${val}`;
}
if (type == 'string') {
return `"${val}"`;
}
if (type == 'symbol') {
const description = val.description;
if (description == null) {
return 'Symbol';
} else {
return `Symbol(${description})`;
}
}
if (type == 'function') {
const name = val.name;
if (typeof name == 'string' && name.length > 0) {
return `Function(${name})`;
} else {
return 'Function';
}
}
// objects
if (Array.isArray(val)) {
const length = val.length;
let debug = '[';
if (length > 0) {
debug += debugString(val[0]);
}
for(let i = 1; i < length; i++) {
debug += ', ' + debugString(val[i]);
}
debug += ']';
return debug;
}
// Test for built-in
const builtInMatches = /\[object ([^\]]+)\]/.exec(toString.call(val));
let className;
if (builtInMatches.length > 1) {
className = builtInMatches[1];
} else {
// Failed to match the standard '[object ClassName]'
return toString.call(val);
}
if (className == 'Object') {
// we're a user defined class or Object
// JSON.stringify avoids problems with cycles, and is generally much
// easier than looping through ownProperties of `val`.
try {
return 'Object(' + JSON.stringify(val) + ')';
} catch (_) {
return 'Object';
}
}
// errors
if (val instanceof Error) {
return `${val.name}: ${val.message}\n${val.stack}`;
}
// TODO we could test for more things here, like `Set`s and `Map`s.
return className;
}
let WASM_VECTOR_LEN = 0;
let cachedTextEncoder = new TextEncoder('utf-8');
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
? function (arg, view) {
return cachedTextEncoder.encodeInto(arg, view);
}
: function (arg, view) {
const buf = cachedTextEncoder.encode(arg);
view.set(buf);
return {
read: arg.length,
written: buf.length
};
});
function passStringToWasm0(arg, malloc, realloc) {
if (realloc === undefined) {
const buf = cachedTextEncoder.encode(arg);
const ptr = malloc(buf.length);
getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf);
WASM_VECTOR_LEN = buf.length;
return ptr;
}
let len = arg.length;
let ptr = malloc(len);
const mem = getUint8Memory0();
let offset = 0;
for (; offset < len; offset++) {
const code = arg.charCodeAt(offset);
if (code > 0x7F) break;
mem[ptr + offset] = code;
}
if (offset !== len) {
if (offset !== 0) {
arg = arg.slice(offset);
}
ptr = realloc(ptr, len, len = offset + arg.length * 3);
const view = getUint8Memory0().subarray(ptr + offset, ptr + len);
const ret = encodeString(arg, view);
offset += ret.written;
}
WASM_VECTOR_LEN = offset;
return ptr;
}
let cachegetInt32Memory0 = null;
function getInt32Memory0() {
if (cachegetInt32Memory0 === null || cachegetInt32Memory0.buffer !== wasm.memory.buffer) {
cachegetInt32Memory0 = new Int32Array(wasm.memory.buffer);
}
return cachegetInt32Memory0;
}
let cachegetUint32Memory0 = null;
function getUint32Memory0() {
if (cachegetUint32Memory0 === null || cachegetUint32Memory0.buffer !== wasm.memory.buffer) {
cachegetUint32Memory0 = new Uint32Array(wasm.memory.buffer);
}
return cachegetUint32Memory0;
}
function getArrayJsValueFromWasm0(ptr, len) {
const mem = getUint32Memory0();
const slice = mem.subarray(ptr / 4, ptr / 4 + len);
const result = [];
for (let i = 0; i < slice.length; i++) {
result.push(takeObject(slice[i]));
}
return result;
}
/**
* @param {string} text
* @param {boolean} hmm
* @returns {any[]}
*/
export function cut(text, hmm) {
try {
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
var ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len0 = WASM_VECTOR_LEN;
wasm.cut(retptr, ptr0, len0, hmm);
var r0 = getInt32Memory0()[retptr / 4 + 0];
var r1 = getInt32Memory0()[retptr / 4 + 1];
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
wasm.__wbindgen_free(r0, r1 * 4);
return v1;
} finally {
wasm.__wbindgen_add_to_stack_pointer(16);
}
}
/**
* @param {string} text
* @returns {any[]}
*/
export function cut_all(text) {
try {
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
var ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len0 = WASM_VECTOR_LEN;
wasm.cut_all(retptr, ptr0, len0);
var r0 = getInt32Memory0()[retptr / 4 + 0];
var r1 = getInt32Memory0()[retptr / 4 + 1];
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
wasm.__wbindgen_free(r0, r1 * 4);
return v1;
} finally {
wasm.__wbindgen_add_to_stack_pointer(16);
}
}
/**
* @param {string} text
* @param {boolean} hmm
* @returns {any[]}
*/
export function cut_for_search(text, hmm) {
try {
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
var ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len0 = WASM_VECTOR_LEN;
wasm.cut_for_search(retptr, ptr0, len0, hmm);
var r0 = getInt32Memory0()[retptr / 4 + 0];
var r1 = getInt32Memory0()[retptr / 4 + 1];
var v1 = getArrayJsValueFromWasm0(r0, r1).slice();
wasm.__wbindgen_free(r0, r1 * 4);
return v1;
} finally {
wasm.__wbindgen_add_to_stack_pointer(16);
}
}
/**
* @param {string} text
* @param {string} mode
* @param {boolean} hmm
* @returns {any[]}
*/
export function tokenize(text, mode, hmm) {
try {
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
var ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len0 = WASM_VECTOR_LEN;
var ptr1 = passStringToWasm0(mode, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len1 = WASM_VECTOR_LEN;
wasm.tokenize(retptr, ptr0, len0, ptr1, len1, hmm);
var r0 = getInt32Memory0()[retptr / 4 + 0];
var r1 = getInt32Memory0()[retptr / 4 + 1];
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
wasm.__wbindgen_free(r0, r1 * 4);
return v2;
} finally {
wasm.__wbindgen_add_to_stack_pointer(16);
}
}
function isLikeNone(x) {
return x === undefined || x === null;
}
/**
* @param {string} word
* @param {number | undefined} freq
* @param {string | undefined} tag
* @returns {number}
*/
export function add_word(word, freq, tag) {
var ptr0 = passStringToWasm0(word, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len0 = WASM_VECTOR_LEN;
var ptr1 = isLikeNone(tag) ? 0 : passStringToWasm0(tag, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len1 = WASM_VECTOR_LEN;
var ret = wasm.add_word(ptr0, len0, !isLikeNone(freq), isLikeNone(freq) ? 0 : freq, ptr1, len1);
return ret >>> 0;
}
async function load(module, imports) {
if (typeof Response === 'function' && module instanceof Response) {
if (typeof WebAssembly.instantiateStreaming === 'function') {
try {
return await WebAssembly.instantiateStreaming(module, imports);
} catch (e) {
if (module.headers.get('Content-Type') != 'application/wasm') {
console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
} else {
throw e;
}
}
}
const bytes = await module.arrayBuffer();
return await WebAssembly.instantiate(bytes, imports);
} else {
const instance = await WebAssembly.instantiate(module, imports);
if (instance instanceof WebAssembly.Instance) {
return { instance, module };
} else {
return instance;
}
}
}
async function init(input) {
if (typeof input === 'undefined') {
input = new URL('jieba_rs_wasm_bg.wasm', import.meta.url);
}
const imports = {};
imports.wbg = {};
imports.wbg.__wbindgen_string_new = function(arg0, arg1) {
var ret = getStringFromWasm0(arg0, arg1);
return addHeapObject(ret);
};
imports.wbg.__wbindgen_object_drop_ref = function(arg0) {
takeObject(arg0);
};
imports.wbg.__wbg_new_68adb0d58759a4ed = function() {
var ret = new Object();
return addHeapObject(ret);
};
imports.wbg.__wbindgen_number_new = function(arg0) {
var ret = arg0;
return addHeapObject(ret);
};
imports.wbg.__wbg_set_2e79e744454afade = function(arg0, arg1, arg2) {
getObject(arg0)[takeObject(arg1)] = takeObject(arg2);
};
imports.wbg.__wbindgen_object_clone_ref = function(arg0) {
var ret = getObject(arg0);
return addHeapObject(ret);
};
imports.wbg.__wbg_new_7031805939a80203 = function(arg0, arg1) {
var ret = new Error(getStringFromWasm0(arg0, arg1));
return addHeapObject(ret);
};
imports.wbg.__wbindgen_debug_string = function(arg0, arg1) {
var ret = debugString(getObject(arg1));
var ptr0 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len0 = WASM_VECTOR_LEN;
getInt32Memory0()[arg0 / 4 + 1] = len0;
getInt32Memory0()[arg0 / 4 + 0] = ptr0;
};
imports.wbg.__wbindgen_throw = function(arg0, arg1) {
throw new Error(getStringFromWasm0(arg0, arg1));
};
imports.wbg.__wbindgen_rethrow = function(arg0) {
throw takeObject(arg0);
};
if (typeof input === 'string' || (typeof Request === 'function' && input instanceof Request) || (typeof URL === 'function' && input instanceof URL)) {
input = fetch(input);
}
const { instance, module } = await load(await input, imports);
wasm = instance.exports;
init.__wbindgen_wasm_module = module;
return wasm;
}
export default init;

Binary file not shown.

View file

@ -1174,6 +1174,7 @@ button.btn_more {
<script src="/static/themes/magi/Readability-readerable.js"></script> <script src="/static/themes/magi/Readability-readerable.js"></script>
<script src="/static/themes/magi/Readability.js"></script> <script src="/static/themes/magi/Readability.js"></script>
<script src="/static/themes/magi/markdown.js"></script> <script src="/static/themes/magi/markdown.js"></script>
<script> <script>
const original_search_query = "''' + original_search_query.replace('"',"") + r'''" const original_search_query = "''' + original_search_query.replace('"',"") + r'''"
const search_queryquery = "''' + search_query.query.replace('"',"") + r'''" const search_queryquery = "''' + search_query.query.replace('"',"") + r'''"
@ -1196,6 +1197,57 @@ function proxify()
} }
}
const _load_wasm_jieba = async ()=> {
if (window.cut !== undefined) return;
/* load jieba*/
const {
default: init,
cut
} = await import("/static/themes/magi/jieba_rs_wasm.js");
const inited = await init();
window.cut = cut;
return inited;
}
_load_wasm_jieba();
function cosineSimilarity(keyword, sentence) {
// 将关键词和句子转换成单词列表
const keywordList = cut(keyword.toLowerCase(), true);
const sentenceList = cut(sentence.toLowerCase(), true);
// 创建一个包含所有单词的列表
const words = new Set(keywordList.concat(sentenceList));
// 创建一个对象来记录每个单词在关键词和句子中出现的次数
const keywordFreq = {};
const sentenceFreq = {};
for (const word of words) {
keywordFreq[word] = 0;
sentenceFreq[word] = 0;
}
// 计算每个单词在关键词和句子中出现的次数
for (const word of keywordList) {
keywordFreq[word]++;
}
for (const word of sentenceList) {
sentenceFreq[word]++;
}
// 计算余弦相似度
let dotProduct = 0;
let keywordMagnitude = 0;
let sentenceMagnitude = 0;
for (const word of words) {
dotProduct += keywordFreq[word] * sentenceFreq[word];
keywordMagnitude += keywordFreq[word] ** 2;
sentenceMagnitude += sentenceFreq[word] ** 2;
}
keywordMagnitude = Math.sqrt(keywordMagnitude);
sentenceMagnitude = Math.sqrt(sentenceMagnitude);
const similarity = dotProduct / (keywordMagnitude * sentenceMagnitude);
return similarity;
} }
function modal_open(url, num) function modal_open(url, num)
{ {
@ -1229,7 +1281,7 @@ function modal_open(url, num)
let modalele = eleparse(iframe.contentDocument); let modalele = eleparse(iframe.contentDocument);
let article = new Readability(iframe.contentDocument.cloneNode(true)).parse(); let article = new Readability(iframe.contentDocument.cloneNode(true)).parse();
let fulltext = article.textContent; let fulltext = article.textContent;
fulltext.replaceAll("\n\n","\n").replaceAll("\n\n","\n"); fulltext = fulltext.replaceAll("\n\n","\n").replaceAll("\n\n","\n");
const delimiter = /[?!;\?\n]/g const delimiter = /[?!;\?\n]/g
fulltext = fulltext.split(delimiter); fulltext = fulltext.split(delimiter);
optkeytext = { optkeytext = {
@ -1237,6 +1289,7 @@ function modal_open(url, num)
headers: headers, headers: headers,
body: JSON.stringify({'text':fulltext.join("\n")}) body: JSON.stringify({'text':fulltext.join("\n")})
}; };
console.log(fulltext)
fetchRetry('https://search.kg/keytext',3,optkeytext) fetchRetry('https://search.kg/keytext',3,optkeytext)
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {

View file

@ -1174,6 +1174,7 @@ button.btn_more {
<script src="/static/themes/magi/Readability-readerable.js"></script> <script src="/static/themes/magi/Readability-readerable.js"></script>
<script src="/static/themes/magi/Readability.js"></script> <script src="/static/themes/magi/Readability.js"></script>
<script src="/static/themes/magi/markdown.js"></script> <script src="/static/themes/magi/markdown.js"></script>
<script> <script>
const original_search_query = "''' + original_search_query.replace('"',"") + r'''" const original_search_query = "''' + original_search_query.replace('"',"") + r'''"
const search_queryquery = "''' + search_query.query.replace('"',"") + r'''" const search_queryquery = "''' + search_query.query.replace('"',"") + r'''"
@ -1196,6 +1197,57 @@ function proxify()
} }
}
const _load_wasm_jieba = async ()=> {
if (window.cut !== undefined) return;
/* load jieba*/
const {
default: init,
cut
} = await import("/static/themes/magi/jieba_rs_wasm.js");
const inited = await init();
window.cut = cut;
return inited;
}
_load_wasm_jieba();
function cosineSimilarity(keyword, sentence) {
// 将关键词和句子转换成单词列表
const keywordList = cut(keyword.toLowerCase(), true);
const sentenceList = cut(sentence.toLowerCase(), true);
// 创建一个包含所有单词的列表
const words = new Set(keywordList.concat(sentenceList));
// 创建一个对象来记录每个单词在关键词和句子中出现的次数
const keywordFreq = {};
const sentenceFreq = {};
for (const word of words) {
keywordFreq[word] = 0;
sentenceFreq[word] = 0;
}
// 计算每个单词在关键词和句子中出现的次数
for (const word of keywordList) {
keywordFreq[word]++;
}
for (const word of sentenceList) {
sentenceFreq[word]++;
}
// 计算余弦相似度
let dotProduct = 0;
let keywordMagnitude = 0;
let sentenceMagnitude = 0;
for (const word of words) {
dotProduct += keywordFreq[word] * sentenceFreq[word];
keywordMagnitude += keywordFreq[word] ** 2;
sentenceMagnitude += sentenceFreq[word] ** 2;
}
keywordMagnitude = Math.sqrt(keywordMagnitude);
sentenceMagnitude = Math.sqrt(sentenceMagnitude);
const similarity = dotProduct / (keywordMagnitude * sentenceMagnitude);
return similarity;
} }
function modal_open(url, num) function modal_open(url, num)
{ {
@ -1229,7 +1281,7 @@ function modal_open(url, num)
let modalele = eleparse(iframe.contentDocument); let modalele = eleparse(iframe.contentDocument);
let article = new Readability(iframe.contentDocument.cloneNode(true)).parse(); let article = new Readability(iframe.contentDocument.cloneNode(true)).parse();
let fulltext = article.textContent; let fulltext = article.textContent;
fulltext.replaceAll("\n\n","\n").replaceAll("\n\n","\n"); fulltext = fulltext.replaceAll("\n\n","\n").replaceAll("\n\n","\n");
const delimiter = /[?!;\?\n]/g const delimiter = /[?!;\?\n]/g
fulltext = fulltext.split(delimiter); fulltext = fulltext.split(delimiter);
optkeytext = { optkeytext = {
@ -1237,6 +1289,7 @@ function modal_open(url, num)
headers: headers, headers: headers,
body: JSON.stringify({'text':fulltext.join("\n")}) body: JSON.stringify({'text':fulltext.join("\n")})
}; };
console.log(fulltext)
fetchRetry('https://search.kg/keytext',3,optkeytext) fetchRetry('https://search.kg/keytext',3,optkeytext)
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {