Great progress

This commit is contained in:
msqr1
2024-03-10 22:52:01 -07:00
parent 9a40cdaa7f
commit 2091845b04
12 changed files with 8977 additions and 9264 deletions

View File

@@ -9,7 +9,7 @@
- Download multiple models
- Model storage path management (for multiple models)
- Model ID management (for model updates)
- Massively smaller JS size (>5.7MB vs 1.7MB)
- Smaller JS size (>3.1MB vs 1.7MB)
- All related files (worker.js, worklet processors,...) are bundled
- Shorter from-scratch build time

1
devel/Vosklet.d.ts vendored
View File

@@ -15,7 +15,6 @@ interface WasmModule {
export interface genericModel {
check(): void;
afterFetch(): void;
load(): void;
delete(): void;
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ LIBARCHIVE=$(realpath libarchive) &&
CLAPACK_WASM=$(realpath clapack-wasm) &&
MAX_MEMORY=${MAX_MEMORY:-300mb} &&
MAX_THREADS=${MAX_THREADS:-2} &&
MAX_THREADS=${MAX_THREADS:-5} &&
EMSDK=${EMSDK:-$(realpath emsdk)} &&
if [ ! -d $EMSDK ]; then
@@ -28,12 +28,12 @@ fi
. $EMSDK/emsdk_env.sh &&
cd $SRC &&
MODE=2 && # 0: Ultra debug info, 1: Extremely optimized release, else custom
MODE=0 && # 0: Ultra debug info, 1: Extremely optimized release, else custom
echo "Mode = $MODE" &&
if [ $MODE = 0 ]; then
em++ -O0 global.cc genericModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sGZIP_EMBEDDINGS -sMODULARIZE -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sRUNTIME_DEBUG -sSTACK_OVERFLOW_CHECK=2 -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sASSERTIONS=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sDISABLE_EXCEPTION_CATCHING=0 -sINVOKE_RUN=0 -sPOLYFILL=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -msimd128 --emit-symbol-map --embind-emit-tsd Vosklet.d.ts -fsanitize=undefined -fsanitize=address -fsanitize=leak -g3 --pre-js pre.js -o ../devel/Vosklet.js
em++ -O0 global.cc genericModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sGZIP_EMBEDDINGS -sMODULARIZE -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sRUNTIME_DEBUG -sSTACK_OVERFLOW_CHECK=2 -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sASSERTIONS=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sDISABLE_EXCEPTION_CATCHING=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sPOLYFILL=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -msimd128 --embind-emit-tsd Vosklet.d.ts -fsanitize=undefined -fsanitize=address -fsanitize=leak -g3 --pre-js pre.js -o ../devel/Vosklet.js
elif [ $MODE = 1 ]; then
em++ -O3 global.cc genericModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sGZIP_EMBEDDINGS -sMODULARIZE -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sPOLYFILL=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -msimd128 --pre-js pre.js -o ../devel/Vosklet.js
em++ -O3 global.cc genericModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sGZIP_EMBEDDINGS -sMODULARIZE -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -msimd128 --pre-js pre.js -o ../devel/Vosklet.js
else
em++ -O0 global.cc genericModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sGZIP_EMBEDDINGS -sMODULARIZE -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sRUNTIME_DEBUG -sSTACK_OVERFLOW_CHECK=2 -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sASSERTIONS=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sDISABLE_EXCEPTION_CATCHING=0 -sEXIT_RUNTIME=0 -sPOLYFILL=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -msimd128 --emit-symbol-map --embind-emit-tsd Vosklet.d.ts -fsanitize=undefined -fsanitize=address -fsanitize=leak -g3 --pre-js pre.js -o ../devel/Vosklet.js
em++ -O0 global.cc genericModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sGZIP_EMBEDDINGS -sMODULARIZE -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sRUNTIME_DEBUG -sSTACK_OVERFLOW_CHECK=2 -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sASSERTIONS=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sDISABLE_EXCEPTION_CATCHING=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sPOLYFILL=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -msimd128 --embind-emit-tsd Vosklet.d.ts -fsanitize=undefined -fsanitize=address -fsanitize=leak -g3 --pre-js pre.js -o ../devel/Vosklet.js
fi

View File

@@ -9,8 +9,7 @@ EMSCRIPTEN_BINDINGS() {
class_<genericModel>("genericModel")
.constructor<std::string, std::string, int, bool>(allow_raw_pointers())
.function("check", &genericModel::check, allow_raw_pointers())
.function("afterFetch", &genericModel::afterFetch, allow_raw_pointers())
.function("load", &genericModel::load, allow_raw_pointers());
.function("afterFetch", &genericModel::afterFetch, allow_raw_pointers());
class_<recognizer>("recognizer")
.constructor<genericModel*, float, int>(allow_raw_pointers())

View File

@@ -52,7 +52,7 @@ void genericModel::check() {
thrd.addTask([this](){
if(OPFSTried && !OPFSOk) {
emscripten_console_log("OPFS isn't available");
fireEv("_checkMdl", "OPFS isn't available", index);
fireEv("_continue", "OPFS isn't available", index);
return;
}
if(!OPFSTried){
@@ -63,7 +63,7 @@ void genericModel::check() {
emscripten_console_log("Initializing OPFS");
if(!OPFSOk) {
emscripten_console_log("OPFS initialization failed");
fireEv("_checkMdl", "OPFS initialization failed", index);
fireEv("_continue", "OPFS initialization failed", index);
return;
}
fs::current_path("/opfs", tank);
@@ -84,41 +84,24 @@ void genericModel::check() {
}
if(!checkFiles() && !fs::exists("id", tank)) {
emscripten_console_log("Model is not available, fetching...");
fireEv("_checkMdl", "fetch", index);
fireEv("_continue", "1", index);
return;
}
emscripten_console_log("Model is available, verifying ID");
FILE* idFile {fopen("id", "r")};
if(idFile == nullptr) {
emscripten_console_log("Couldn't open id file");
fireEv("_checkMdl", "Couldn't open id file", index);
return;
}
if(fseek(idFile, 0, SEEK_END) != 0) {
emscripten_console_log("Id file end seeking fail");
fireEv("_checkMdl", "Id file end seeking fail", index);
fclose(idFile);
return;
};
long long oldsize{ftell(idFile)};
char* oldid {new char[oldsize]};
if(fseek(idFile, 0L, SEEK_SET) != 0) {
emscripten_console_log("Id file start seeking fail");
fireEv("_checkMdl", "Id file start seeking fail", index);
fclose(idFile);
return;
};
fread(oldid, 1, oldsize, idFile);
fclose(idFile);
if(strcmp(oldid, id.c_str()) != 0) {
std::ifstream idFile("id");
idFile.seekg(0, std::ios::end);
size_t oldSize = idFile.tellg();
std::string oldID(oldSize, ' ');
idFile.seekg(0);
idFile.read(&oldID[0], oldSize);
if(id.compare(oldID.c_str()) != 0) {
emscripten_console_log("ID doesn't match, fetching...");
fireEv("_checkMdl", "fetch", index);
fireEv("_continue", "1", index);
}
else {
emscripten_console_log("ID match, returning instance");
fireEv("_checkMdl", nullptr, index);
emscripten_console_log("ID matches, loading...");
//load();
}
delete[] oldid;
emscripten_console_log("Success! Model is ready!");
});
}
@@ -126,9 +109,9 @@ void genericModel::afterFetch() {
thrd.addTask([this](){
emscripten_console_log("Trying to extract...");
if(!extract()) {
fs::remove("/opfs/m0dEl.tar",tank);
//fs::remove("/opfs/m0dEl.tar",tank);
fs::current_path("/opfs", tank);
fs::remove_all(storepath, tank);
//fs::remove_all(storepath, tank);
emscripten_console_log("Unable to extract model");
fireEv("_continue", "Unable to extract model", index);
return;
@@ -167,13 +150,25 @@ void genericModel::afterFetch() {
bool genericModel::extract() {
static fs::path path{};
static int fd{};
static archive_entry* entry{archive_entry_new()};
archive_entry* entry{archive_entry_new()};
archive* src {archive_read_new()};
auto cleanup {[&](){
archive_entry_free(entry);
archive_read_free(src);
}};
archive_read_support_format_tar(src);
archive_read_open_filename(src, "/opfs/m0dEl.tar", 10240);
if(archive_errno(src) != 0) return false;
if(archive_errno(src) != 0) {
cleanup();
emscripten_console_log(archive_error_string(src));
return false;
}
while(archive_read_next_header2(src, entry) == ARCHIVE_OK) {
if(archive_errno(src) != 0) return false;
if(archive_errno(src) != 0) {
cleanup();
emscripten_console_log(archive_error_string(src));
return false;
}
path = archive_entry_pathname(entry);
// Strip 1st component, keep relative path
path = "." + path.generic_string().substr(path.generic_string().find("/"));
@@ -183,16 +178,20 @@ bool genericModel::extract() {
continue;
}
fd = creat(path.c_str(),0777);
if(fd == -1) return false;
if(fd == -1) {
cleanup();
return false;
}
archive_read_data_into_fd(src, fd);
close(fd);
if(archive_errno(src) != 0) {
cleanup();
emscripten_console_log(archive_error_string(src));
return false;
}
}
fs::remove("README",tank);
fs::remove("/opfs/m0dEl.tar",tank);
archive_read_free(src);
cleanup();
return true;
}

View File

@@ -3,7 +3,7 @@
#include <string>
#include <filesystem>
#include <cstring>
#include <fstream>
#include <fcntl.h>
#include <variant>
@@ -13,14 +13,14 @@
namespace fs = std::filesystem;
struct genericModel {
bool normalMdl{};
bool recognizerUsedThrd{};
int index{};
bool normalMdl;
bool recognizerUsedThrd;
int index;
std::string storepath;
std::string id;
std::variant<VoskModel*, VoskSpkModel*> mdl{};
reusableThrd thrd{};
static bool extract();
std::variant<VoskModel*, VoskSpkModel*> mdl;
reusableThrd thrd;
bool extract();
void load();
void check();
bool checkFiles();

View File

@@ -20,7 +20,6 @@ reusableThrd::reusableThrd() {
blocker.wait(done.test(std::memory_order_relaxed) || queue.empty(), std::memory_order_relaxed);
blocker.clear(std::memory_order_relaxed);
while(!queue.empty()) {
emscripten_console_log("Executing task...");
queue.front()();
queue.pop();
}

View File

@@ -1,5 +1,4 @@
let objs = []
let dStream = new DecompressionStream("gzip")
Module.cleanUp = () => {
objs.forEach(obj => obj.delete())
URL.revokeObjectURL(pthreadUrl)
@@ -9,33 +8,31 @@ Module.locateFile = (path, scriptDir) => {
if(path === "Vosklet.js") return pthreadUrl
return scriptDir+path
}
class genericModel extends EventTarget {
constructor() {
constructor(url, storepath, id, normalMdl) {
super()
objs.push(this)
this.url = url
this.storepath = storepath
this.id = id
this.normalMdl = normalMdl
}
static async _init(url, storepath, id, normalMdl) {
let mdl = new genericModel()
return new Promise((resolve, reject) => {
mdl.addEventListener("_continue", (ev) => {
if(ev.detail === "0") {
return resolve(mdl)
}
mdl.delete()
reject(ev.detail)
}, {once : true})
mdl.addEventListener("_checkMdl", async (ev) => {
let mdl = new genericModel(url, storepath, id, normalMdl)
mdl.addEventListener("_continue", async function listener(ev) {
switch(ev.detail) {
case "0":
mdl.load(true);
break;
case "fetch":
mdl.removeEventListener("_continue", listener)
return resolve(mdl)
case "1":
let res = await fetch(url)
if(!res.ok) {
return reject("Unable to download model")
}
let wStream = await (await (await navigator.storage.getDirectory()).getFileHandle("m0dEl.tar", {create : true})).createWritable()
let tarReader = res.body.pipeThrough(dStream).getReader()
let tarReader = res.body.pipeThrough(new DecompressionStream("gzip")).getReader()
while(true) {
let readRes = await tarReader.read()
if(!readRes.done) await wStream.write(readRes.value)
@@ -46,9 +43,11 @@ class genericModel extends EventTarget {
mdl.obj.afterFetch()
break;
default:
mdl.delete()
mdl.removeEventListener("_continue", listener)
reject(ev.detail)
}
}, {once : true})
})
mdl.obj = new Module.genericModel(storepath, id, objs.length-1, normalMdl)
mdl.obj.check()
})
@@ -58,9 +57,17 @@ class genericModel extends EventTarget {
}
}
Module.makeModel = async (url, storepath, id) => {
return genericModel._init(url, storepath, id, true)
for (obj in objs) {
if (typeof obj.normalMdl !== "undefined" && obj.normalMdl && obj.url === url && obj.storepath === storepath && obj.id === id) return obj;
}
return genericModel._init(url, storepath, id, true);
}
Module.makeSpkModel = async (url, storepath, id) => {
for(obj in objs) {
if(typeof obj.normalMdl !== "undefined" && !obj.normalMdl && obj.url === url && obj.storepath === storepath && obj.id === id) {
return obj;
}
}
return genericModel._init(url, storepath, id, false)
}
class Recognizer extends EventTarget {
@@ -68,48 +75,29 @@ class Recognizer extends EventTarget {
super()
objs.push(this)
}
static async _init1(model, sampleRate) {
let rec = new Recognizer()
static async _init(model, sampleRate, mode, grammar, spkModel) {
return new Promise((resolve, reject) => {
let rec = new Recognizer()
rec.addEventListener("_continue", (ev) => {
if(ev.detail == "0") {
if(ev.detail === "0") {
rec.ptr = Module._malloc(512)
return resolve(rec)
}
rec.delete()
reject(ev.detail)
}, {once : true})
rec.obj = new Module.recognizer(model, sampleRate, objs.length-1)
switch(mode) {
case 1:
rec.obj = new Module.recognizer(model, sampleRate, objs.length-1)
break
case 2:
rec.obj = new Module.recognizer(model, spkModel, sampleRate, objs.length-1)
break
default:
rec.obj = new Module.recognizer(model, grammar, sampleRate, objs.length-1, 0)
}
})
}
static async _init2(model, spkModel, sampleRate) {
let rec = new Recognizer()
return new Promise((resolve, reject) => {
rec.addEventListener("_continue", (ev) => {
if(ev.detail == "0") {
rec.ptr = Module._malloc(512)
return resolve(rec)
}
rec.delete()
reject(ev.detail)
}, {once : true})
rec.obj = new Module.recognizer(model, spkModel, sampleRate, objs.length-1)
})
}
static async _init3(model, grammar, sampleRate) {
let rec = new Recognizer()
return new Promise((resolve, reject) => {
rec.addEventListener("_continue", (ev) => {
if(ev.detail == "0") {
rec.ptr = Module._malloc(512)
return resolve(rec)
}
rec.delete()
reject(ev.detail)
}, {once : true})
rec.obj = new Module.recognizer(model, grammar, sampleRate, objs.length-1, 0)
})
}
}
async getNode(ctx, channelIndex = 0) {
if(typeof this.node === "undefined") {
let msgChannel = new MessageChannel()
@@ -149,13 +137,13 @@ class Recognizer extends EventTarget {
}
}
Module.makeRecognizer = (model, sampleRate) => {
return Recognizer._init(model.obj, sampleRate)
return Recognizer._init(model.obj, sampleRate, 1)
}
Module.makeRecognizerWithSpkModel = (model, spkModel, sampleRate) => {
return Recognizer._init2(model.obj, spkModel.obj, sampleRate)
Module.makeRecognizerWithSpkModel = (model, sampleRate, spkModel) => {
return Recognizer._init(model.obj, sampleRate, 2, null, spkModel)
}
Module.makeRecognizerWithGrm = (model, grammar, sampleRate) => {
return Recognizer._init3(model.obj, grammar, sampleRate)
Module.makeRecognizerWithGrm = (model, sampleRate, grammar) => {
return Recognizer._init(model.obj, sampleRate, 3, grammar, null)
}
let processorUrl = URL.createObjectURL(new Blob(['(',
(() => {
@@ -177,168 +165,10 @@ let processorUrl = URL.createObjectURL(new Blob(['(',
})
}).toString()
, ')()'], {type : "text/javascript"}))
/*
let pthreadUrl = URL.createObjectURL(new Blob(['(',
(() => {
/**
* @license
* Copyright 2015 The Emscripten Authors
* SPDX-License-Identifier: MIT
*/
// Pthread Web Worker startup routine:
// This is the entry point file that is loaded first by each Web Worker
// that executes pthreads on the Emscripten application.
'use strict';
var Module = {};
// Thread-local guard variable for one-time init of the JS state
var initializedJS = false;
function assert(condition, text) {
if (!condition) abort('Assertion failed: ' + text);
}
function threadPrintErr(...args) {
var text = args.join(' ');
console.error(text);
}
function threadAlert(...args) {
var text = args.join(' ');
postMessage({cmd: 'alert', text, threadId: Module['_pthread_self']()});
}
// We don't need out() for now, but may need to add it if we want to use it
// here. Or, if this code all moves into the main JS, that problem will go
// away. (For now, adding it here increases code size for no benefit.)
var out = () => { throw 'out() is not defined in worker.js.'; }
var err = threadPrintErr;
self.alert = threadAlert;
var dbg = threadPrintErr;
Module['instantiateWasm'] = (info, receiveInstance) => {
// Instantiate from the module posted from the main thread.
// We can just use sync instantiation in the worker.
var module = Module['wasmModule'];
// We don't need the module anymore; new threads will be spawned from the main thread.
Module['wasmModule'] = null;
var instance = new WebAssembly.Instance(module, info);
// TODO: Due to Closure regression https://github.com/google/closure-compiler/issues/3193,
// the above line no longer optimizes out down to the following line.
// When the regression is fixed, we can remove this if/else.
return receiveInstance(instance);
}
// Turn unhandled rejected promises into errors so that the main thread will be
// notified about them.
self.onunhandledrejection = (e) => {
throw e.reason || e;
};
function handleMessage(e) {
try {
if (e.data.cmd === 'load') { // Preload command that is called once per worker to parse and load the Emscripten code.
// Until we initialize the runtime, queue up any further incoming messages.
let messageQueue = [];
self.onmessage = (e) => messageQueue.push(e);
// And add a callback for when the runtime is initialized.
self.startWorker = (instance) => {
Module = instance;
// Notify the main thread that this thread has loaded.
postMessage({ 'cmd': 'loaded' });
// Process any messages that were queued before the thread was ready.
for (let msg of messageQueue) {
handleMessage(msg);
}
// Restore the real message handler.
self.onmessage = handleMessage;
};
// Module and memory were sent from main thread
Module['wasmModule'] = e.data.wasmModule;
// Use `const` here to ensure that the variable is scoped only to
// that iteration, allowing safe reference from a closure.
for (const handler of e.data.handlers) {
Module[handler] = (...args) => {
postMessage({ cmd: 'callHandler', handler, args: args });
}
}
Module['wasmMemory'] = e.data.wasmMemory;
Module['buffer'] = Module['wasmMemory'].buffer;
Module['workerID'] = e.data.workerID;
Module['ENVIRONMENT_IS_PTHREAD'] = true;
if (typeof e.data.urlOrBlob == 'string') {
importScripts(e.data.urlOrBlob);
} else {
var objectUrl = URL.createObjectURL(e.data.urlOrBlob);
importScripts(objectUrl);
URL.revokeObjectURL(objectUrl);
}
loadBR(Module);
} else if (e.data.cmd === 'run') {
// Pass the thread address to wasm to store it for fast access.
Module['__emscripten_thread_init'](e.data.pthread_ptr, /*is_main=*/0, /*is_runtime=*/0, /*can_block=*/1);
// Await mailbox notifications with `Atomics.waitAsync` so we can start
// using the fast `Atomics.notify` notification path.
Module['__emscripten_thread_mailbox_await'](e.data.pthread_ptr);
assert(e.data.pthread_ptr);
// Also call inside JS module to set up the stack frame for this pthread in JS module scope
Module['establishStackSpace']();
Module['PThread'].receiveObjectTransfer(e.data);
Module['PThread'].threadInitTLS();
if (!initializedJS) {
// Embind must initialize itself on all threads, as it generates support JS.
// We only do this once per worker since they get reused
Module['__embind_initialize_bindings']();
initializedJS = true;
}
try {
Module['invokeEntryPoint'](e.data.start_routine, e.data.arg);
} catch(ex) {
if (ex != 'unwind') {
// The pthread "crashed". Do not call `_emscripten_thread_exit` (which
// would make this thread joinable). Instead, re-throw the exception
// and let the top level handler propagate it back to the main thread.
throw ex;
}
}
} else if (e.data.cmd === 'cancel') { // Main thread is asking for a pthread_cancel() on this thread.
if (Module['_pthread_self']()) {
Module['__emscripten_thread_exit'](-1);
}
} else if (e.data.target === 'setimmediate') {
// no-op
} else if (e.data.cmd === 'checkMailbox') {
if (initializedJS) {
Module['checkMailbox']();
}
} else if (e.data.cmd) {
// The received message looks like something that should be handled by this message
// handler, (since there is a e.data.cmd field present), but is not one of the
// recognized commands:
err(`worker.js received unknown command ${e.data.cmd}`);
err(e.data);
}
} catch(ex) {
err(`worker.js onmessage() captured an uncaught exception: ${ex}`);
if (ex?.stack) err(ex.stack);
Module['__emscripten_thread_crashed']?.();
throw ex;
}
};
self.onmessage = handleMessage;
}).toString()
, ')()'], {type : "text/javascript"}))
, ')()'], {type : "text/javascript"}))
*/

View File

@@ -5,9 +5,9 @@
struct recognizer {
std::atomic_flag done{};
std::atomic_flag controller{};
float* dataPtr{};
int index{};
VoskRecognizer* rec{};
float* dataPtr;
int index;
VoskRecognizer* rec;
recognizer(genericModel* model, float sampleRate, int index);
recognizer(genericModel* model, genericModel* spkModel, float sampleRate, int index);
recognizer(genericModel* model, const std::string& grm, float sampleRate, int index, int dummy);

View File

@@ -12,7 +12,7 @@
## ```Module``` object
| Function signature | Description |
|---|---|
| ```Promise<Model> makeModel(path: string, url: string, id: string)```<br><br>```Promise<SpkModel> makeSpkModel(path: string, url: string, id: string)``` | Make a ```Model``` or ```SpkModel```, model files must be directly under the model root, and compressed model must be in .tgz format. If:<br>- **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. |
| ```Promise<Model> makeModel(path: string, url: string, id: string)```<br><br>```Promise<SpkModel> makeSpkModel(path: string, url: string, id: string)``` | Make a ```Model``` or ```SpkModel```, model files must be directly under the model root, and compressed model must be in .tgz format. If:<br>- **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. Creating models with the same parameters will return a reference to the first one created. |
| ```Promise<Recognizer> makeRecognizer(model: Model, sampleRate: float)```<br><br>```Promise<Recognizer> makeRecognizerWithSpkModel(model: Model, spkModel: spkModel, sampleRate: float)```<br><br>```Promise<Recognizer> makeRecognizerWithGrm(model: Model, grammar: string, sampleRate: float)``` | Make a ```Recognizer```, it will use **model**'s thread if it's the first user of **model**, else it will use a new thread. |
| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: ```0```: Info) <br>```-2```: Error<br>```-1```: Warning<br>```1```: Verbose<br>```2```: More verbose<br>```3```: Debug |
| ```cleanUp()``` | A convenience function that call ```delete()``` on all objects and revoke all URLs. **Put this at the end of your code!** |
@@ -43,9 +43,9 @@ Vosklet require SharedArrayBuffer to share thread's data, so these response head
If you can't set them, you may use a HACKY workaround at *src/addCOI.js*.
## Origin Private Filesystem (OPFS)
Vosklet needs the Emscripten WASMFS' OPFS to store its model, IDBFS was considered, but dropped because there is no direct way to read from IDBFS to C++ without copying to MEMFS (basically RAM). For safety with this, always:
- Try catch ```window.loadBR()``` to to check for OPFS availability.
- Check if there is enough space via ```navigator.storage.estimate()``` for TWICE THE MODEL SIZE before calling Module.makeModel
Vosklet needs the Emscripten WASMFS' OPFS to store its model, IDBFS was considered, but dropped because there is no direct way to read from IDBFS to C++ without copying to MEMFS (RAM). For safety with this, always:
- Try catch ```window.loadVosklet()``` to to check for OPFS availability.
- Check if there is enough space via ```navigator.storage.estimate()``` for TWICE THE MODEL SIZE before calling ```Module.makeModel```
# Compilation
Changing any option to non-default values requires recompilation
@@ -58,5 +58,5 @@ cd Vosklet/src &&
|---|---|---|
| MAX_MEMORY | Set max memory, valid suffixes: kb, mb, gb, tb or none (bytes) | ```300mb```, as [recommended](https://alphacephei.com/vosk/models) |
| MAX_THREADS | Set the max number of thread (2 min) | ```2``` (1 OPFS thread + 1 model/recognizer thread) |
| COMPILE_JOBS | Set the number of jobs (threads) when compiling | ```$(nproc)``` |
| COMPILE_JOBS | Set the number of jobs (threads) when building | ```$(nproc)``` |
| EMSDK | Set EMSDK's path (will install EMSDK in root folder if unset) | ```../emsdk``` |