Remove OPFS, use cache API, map index onto [A-Z] for path. Change to build twice, can't hack anymore :(

This commit is contained in:
msqr1
2024-10-07 11:59:37 -07:00
parent 0d1acd0ebd
commit 182124e275
13 changed files with 232 additions and 109 deletions

View File

@@ -8,28 +8,33 @@ CommonModel::CommonModel(int index, bool normalMdl, int tarStart, int tarSize) :
});
}
void CommonModel::extractAndLoad(unsigned char* tar, int tarSize) {
std::string storepath{'/' + std::to_string(index)};
int res{untar(tar, tarSize, storepath.c_str())};
// Map index onto [A-Z]
const char storepath[3]{'/', static_cast<char>(index % 26 + 'A')};
int res{untar(tar, tarSize, storepath)};
free(tar);
const char* untarErr{};
switch(res) {
case IncorrectFormat:
fireEv(index, Event::status, "Untar: Incorrect tar format, must be USTAR");
return;
untarErr = "Untar: Incorrect tar format, must be USTAR";
break;
case IncorrectFiletype:
fireEv(index, Event::status, "Untar: Not a directory or regular file");
return;
untarErr = "Untar: Not a directory or regular file";
break;
case FailedOpen:
fireEv(index, Event::status, "Untar: Unable to open file for write");
return;
untarErr = "Untar: Unable to open file for write";
break;
case FailedWrite:
fireEv(index, Event::status, "Untar: Unable to write file");
return;
untarErr = "Untar: Unable to write file";
break;
case FailedClose:
fireEv(index, Event::status, "Untar: Unable to close file after write");
return;
untarErr = "Untar: Unable to close file after write";
};
if(normalMdl) mdl = vosk_model_new(storepath.c_str());
else mdl = vosk_spk_model_new(storepath.c_str());
if(untarErr != nullptr) {
fireEv(index, Event::status, untarErr);
return;
}
if(normalMdl) mdl = vosk_model_new(storepath);
else mdl = vosk_spk_model_new(storepath);
if(normalMdl ? std::get<VoskModel*>(mdl) != nullptr : std::get<VoskSpkModel*>(mdl) != nullptr) fireEv(index, status);
else fireEv(index, status, "Unable to load model for recognition");
fs::remove_all(storepath);

View File

@@ -5,48 +5,9 @@
if(ENVIRONMENT_IS_WEB) {
// "var" to expose this outside the if
// 'var' to expose this outside the if
var objs = [];
var events = ['status', 'partialResult', 'result'];
let storageWorkerURL = URL.createObjectURL(new Blob(['(', (async () => {
let txtDecoder = new TextDecoder();
let txtEncoder = new TextEncoder();
let OPFSRoot = await navigator.storage.getDirectory();
onmessage = async msg => {
msg = msg.data;
let components = msg.storepath.split('/');
let prevDir = OPFSRoot;
for(let component of components) prevDir = await prevDir.getDirectoryHandle(component, { create: true });
let idHandle = await prevDir.getFileHandle('id', { create: true });
let mdlHandle = await prevDir.getFileHandle('model.tgz', { create: true });
let idFile = await idHandle.createSyncAccessHandle();
let mdlFile = await mdlHandle.createSyncAccessHandle();
let oldIdBuf = new ArrayBuffer(idFile.getSize());
idFile.read(oldIdBuf);
let tar, tgz;
if(txtDecoder.decode(oldIdBuf) == msg.id) {
tgz = new ArrayBuffer(mdlFile.getSize());
mdlFile.read(tgz);
tar = await new Response(new Response(tgz).body.pipeThrough(new DecompressionStream('gzip'))).arrayBuffer();
}
else {
let res = await fetch(msg.url);
if(!res.ok) throw 'Unable to download model'
let teed = res.body.tee();
tgz = await new Response(teed[0].pipeThrough(new CompressionStream('gzip'))).arrayBuffer();
mdlFile.write(tgz, { at: 0 });
mdlFile.truncate(tgz.byteLength);
let newId = txtEncoder.encode(msg.id);
idFile.write(newId, { at: 0 });
idFile.truncate(newId.length);
tar = await new Response(teed[1]).arrayBuffer();
}
idFile.close();
mdlFile.close();
self.postMessage(tar, [tar]);
}
}).toString(), ')()'], { type: 'text/javascript' }))
let storageWorker = new Worker(storageWorkerURL);
let processorURL = URL.createObjectURL(new Blob(['(', (() => {
registerProcessor('VoskletTransferer', class extends AudioWorkletProcessor {
constructor(opts) {
@@ -88,17 +49,23 @@ class CommonModel extends EventTarget {
else reject(ev.detail)
}, { once: true })
});
storageWorker.addEventListener('message', tar => {
tar = tar.data;
let tarStart = _malloc(tar.byteLength);
HEAPU8.set(new Uint8Array(tar), tarStart);
mdl.obj = new Module['CommonModel'](objs.length - 1, normalMdl, tarStart, tar.byteLength);
}, { once: true });
storageWorker.postMessage({
url: url,
storepath: storepath,
id: id
});
let cache = await caches.open('Vosklet');
let res = await cache.match(storepath);
let tar;
if(typeof res == 'undefined' || res.headers.get('id') != id) {
// Caching already handled explicitly
res = await fetch(url, { cache: 'no-store' });
if (!res.ok) throw 'Unable to fetch model, status: ' + res.status;
await cache.put(storepath, new Response(
res.clone().body.pipeThrough(new CompressionStream('gzip')),
{ headers: { 'id': id } }
));
}
tar = await new Response(res.body.pipeThrough(new DecompressionStream('gzip'))).arrayBuffer();
let tarStart = _malloc(tar.byteLength);
HEAPU8.set(new Uint8Array(tar), tarStart);
mdl.obj = new Module['CommonModel'](objs.length - 1, normalMdl, tarStart, tar.byteLength);
return result;
}
}
@@ -155,8 +122,6 @@ Module = {
'cleanUp': async () => {
for(let obj of objs) await obj.delete();
URL.revokeObjectURL(processorURL);
URL.revokeObjectURL(storageWorkerURL);
storageWorker.terminate();
},
'createTransferer': async (ctx, bufSize) => {

View File

@@ -72,15 +72,18 @@ if [ ! -d "$VOSK" ]; then
fi
cd "$SRC" &&
em++ Util.cc CommonModel.cc Recognizer.cc Bindings.cc -O3 -std=c++23 -fno-rtti -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -DMAX_WORKERS="$MAX_THREADS" -sWASMFS -sWASM_BIGINT -sMODULARIZE -sWASM_EXNREF -sTEXTDECODER=2 -sWASM_WORKERS=2 -sEVAL_CTORS=2 -sINITIAL_MEMORY="$INITIAL_MEMORY" -sALLOW_MEMORY_GROWTH -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sINCOMING_MODULE_JS_API=wasmMemory,instantiateWasm,wasm -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sENVIRONMENT=web,worker -I. -I"$VOSK"/src -L"$KALDI"/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L"$OPENFST"/lib -l:libfst.a -l:libfstngram.a -L"$CLAPACK_WASM" -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L"$VOSK"/src -l:vosk.a -lembind -flto -msimd128 -matomics -mbulk-memory -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals --pre-js Wrapper.js --closure 1 -o ../Vosklet.js
FILES="Util.o CommonModel.o Recognizer.o Bindings.o"
COMMON_FLAGS="-O3 -flto -fno-rtti -msimd128 -matomics -mbulk-memory -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals"
COMMON_LD_FLAGS="-sWASMFS -sWASM_BIGINT -sMODULARIZE -sTEXTDECODER=2 -sWASM_WORKERS=2 -sEVAL_CTORS=2 -sINITIAL_MEMORY="$INITIAL_MEMORY" -sALLOW_MEMORY_GROWTH -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sINCOMING_MODULE_JS_API=wasmMemory,instantiateWasm,wasm -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sENVIRONMENT=web,worker -L"$KALDI"/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L"$OPENFST"/lib -l:libfst.a -l:libfstngram.a -L"$CLAPACK_WASM" -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L"$VOSK"/src -l:vosk.a -lembind --closure 1 --pre-js"
em++ ${FILES//.o/.cc} $COMMON_FLAGS -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -DMAX_WORKERS="$MAX_THREADS" -std=c++23 -c -I. -I"$VOSK"/src &&
em++ $FILES $COMMON_FLAGS $COMMON_LD_FLAGS Wrapper.js -o ../Vosklet.js
em++ $FILES $COMMON_FLAGS $COMMON_LD_FLAGS ../Examples/Wrapper.js -o ../Examples/Vosklet.js
rm -f $FILES
cd .. &&
tr -d '\n' < Vosklet.js | tr -s ' ' > /tmp/hehe && mv /tmp/hehe Vosklet.js &&
tr -d '\n' < Examples/Vosklet.js | tr -s ' ' > /tmp/hahe && mv /tmp/hahe Examples/Vosklet.js &&
rm -rf /tmp/openfst
cp Vosklet.js Examples/Vosklet.js &&
cp Vosklet.wasm Examples/Vosklet.wasm &&
# Can't serve files from raw.githubusercontent with Content-Encoding: gzip header so the browser won't decompress automatically. Manually decompressing instead.
sed -i 's/.pipeThrough(new CompressionStream("gzip"))//;s/\[1\])/[1].pipeThrough(new DecompressionStream("gzip")))/' Examples/Vosklet.js &&
rm -rf /tmp/openfst