From 48b54295f37eb2fa46f4e836a4dd586a35904b57 Mon Sep 17 00:00:00 2001 From: msqr1 Date: Wed, 24 Jan 2024 23:00:29 -0800 Subject: [PATCH] Non-working draft --- compile.sh | 2 +- src/preAfter.js | 3 +++ src/{pre.js => preBefore.js} | 25 ++++++++++++++----------- src/preMiddle.js | 23 +++++++++++++++++++++++ src/processor.js | 23 ----------------------- src/recognizer.cc | 28 ++++++++++++++++------------ src/recognizer.h | 11 ++++------- 7 files changed, 61 insertions(+), 54 deletions(-) create mode 100644 src/preAfter.js rename src/{pre.js => preBefore.js} (62%) create mode 100644 src/preMiddle.js delete mode 100644 src/processor.js diff --git a/compile.sh b/compile.sh index 9e43861..ace4e73 100755 --- a/compile.sh +++ b/compile.sh @@ -81,4 +81,4 @@ em++ -pthread -O3 -flto -Wno-deprecated -I. -I$KALDI/src -I$OPENFST/include $VOS emar -rcs vosk.a ${VOSK_FILES//.cc/.o} && cd $SRC && -em++ -O3 genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sEMBIND_STD_STRING_IS_UTF8 -sSUPPORT_LONGJMP=0 -sMODULARIZE -sEXPORT_NAME=loadBR -sENVIRONMENT=web,worker -sINITIAL_MAX_MEMORY=$MAX_MEMORY -sASYNCIFY -sPTHREAD_POOL_SIZE=$MAX_THREAD -sPTHREAD_POOL_SIZE_STRICT -sPTHREAD_POOL_DELAY_LOAD -sASYNCIFY_ONLY=['emscripten_wget'] -sALLOW_BLOCKING_ON_MAIN_THREAD=0 -sPOLYFILL=0 --pre-js pre.js -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -o BrowserRecognizer.js +em++ -O3 genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sEMBIND_STD_STRING_IS_UTF8 -sSUPPORT_LONGJMP=0 -sMODULARIZE -sEXPORT_NAME=loadBR -sENVIRONMENT=web,worker -sINITIAL_MAX_MEMORY=$MAX_MEMORY -sASYNCIFY -sPTHREAD_POOL_SIZE=$MAX_THREAD -sPTHREAD_POOL_SIZE_STRICT -sPTHREAD_POOL_DELAY_LOAD -sASYNCIFY_ONLY=['emscripten_wget'] -sALLOW_BLOCKING_ON_MAIN_THREAD=0 -sPOLYFILL=0 --pre-js preBefore.js --pre-js preMiddle.js --pre-js preAfter.js -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -o BrowserRecognizer.js diff --git a/src/preAfter.js b/src/preAfter.js new file mode 100644 index 0000000..ac57dfa --- /dev/null +++ b/src/preAfter.js @@ -0,0 +1,3 @@ + `]))) + return new recognizer(rec,ctx) +} \ No newline at end of file diff --git a/src/pre.js b/src/preBefore.js similarity index 62% rename from src/pre.js rename to src/preBefore.js index b890136..abdaca8 100644 --- a/src/pre.js +++ b/src/preBefore.js @@ -1,17 +1,21 @@ let objs = [] class recognizer extends EventTarget { - constructor(rec) { + constructor(rec,ctx) { super() this.obj = rec + this.ptr = Module._malloc(512) + let channel = new MessageChannel() + this.copier = new AudioWorkletNode(ctx, 'BRCopier', { channelCount: 1, numberOfInputs: 1, numberOfOutputs: 0 }) + this.copier.port.postMessage({cmd : "init", ptr: this.ptr},[channel.port1]) + channel.port1.onmessage = (ev) => { + this.obj.acceptWaveForm(this.ptr, 512) + } objs.push(this) } - processAudio(ctx) { - let ptr = Module._malloc(512); - - this.obj.acceptWaveForm(ptr) - } delete() { this.obj.delete() + this.copier.port.postMessage({cmd : "deinit"}) + Module.free(this.ptr) } setWords(words) { this.obj.setWords(words) @@ -59,15 +63,14 @@ Module.makeSpkModel = async (url, path, id) => { } objs.push(mdl) return mdl -} -Module.makeRecognizer = async (model, sampleRate, ctx) => { +}, ctx.AudioWorklet +Module.makeRecognizer = async (model, ctx) => { let rec try { - rec = new Module.recognizer(model,sampleRate, objs.length) + rec = new Module.recognizer(model, ctx.sampleRate, objs.length) } catch(e) { rec.delete() return Promise.reject(e) } - return new recognizer(rec) -} + await ctx.AudioWorklet.addModule(URL.createObjectURL(new Blob([` diff --git a/src/preMiddle.js b/src/preMiddle.js new file mode 100644 index 0000000..554c308 --- /dev/null +++ b/src/preMiddle.js @@ -0,0 +1,23 @@ +registerProcessor("BRCopier", class extends AudioWorkletProcessor { + constructor(options) { + super(options) + this.ret = true + this.port.onmessage = (ev) => { + switch(ev.cmd) { + case "init": + this.recognizerPort = ev.ports[0] + this.wasmMem = new Float32Array(WebAssembly.Memory.buffer).subarray(ev.ptr, ev.ptr+512) + break + case "deinit": + this.ret = false + break + } + } + } + process(inputs, outputs, params) { + if(!this.ret) return false; + inputs[0].copyFromChannel(this.wasmMem, 0) + this.recognizerPort.postMessage("done") + return true + } +}) \ No newline at end of file diff --git a/src/processor.js b/src/processor.js deleted file mode 100644 index 9d1d65a..0000000 --- a/src/processor.js +++ /dev/null @@ -1,23 +0,0 @@ -registerProcessor("wasmMemCpy", class extends AudioWorkletProcessor { - constructor(options) { - super(options) - this.retval = true - this.port.onmessage = (ev) => { - switch(ev.cmd) { - case "init": - this.ptr = ev.ptr - this.wasmMem = ev.wasmMem - this.recognizerPort = ev.ports[0] - break - case "deinit": - this.retval = false - break - } - } - } - process(input, output, param) { - const data = input[0][0] - this.recognizerPort.postMessage("done") - return this.retval - } -}) \ No newline at end of file diff --git a/src/recognizer.cc b/src/recognizer.cc index e0c7ce4..c9315b1 100644 --- a/src/recognizer.cc +++ b/src/recognizer.cc @@ -1,27 +1,30 @@ #include "recognizer.h" -audioData::audioData(int addr, int len) : addr(reinterpret_cast(addr)), len(len) {} recognizer::recognizer(model* mdl, float sampleRate, int index) : index(index) { rec = vosk_recognizer_new(mdl->mdl,sampleRate); if(rec == nullptr) { throwJS("Unable to initialize recognizer"); return; } + controller.lock(); std::thread t{[this](){ - while(!queue.empty()) { - audioData data {queue.front()}; - queue.pop(); - switch(vosk_recognizer_accept_waveform_f(rec, data.addr, data.len)) { - case 0: - fireEv("result", vosk_recognizer_result(rec)); - break; - case 1: - fireEv("partialResult", vosk_recognizer_partial_result(rec)); + while(!done.test()) { + controller.lock(); + if(!done.test()) { + switch(vosk_recognizer_accept_waveform_f(rec, dataPtr, 512)) { + case 0: + fireEv("result", vosk_recognizer_result(rec)); + break; + case 1: + fireEv("partialResult", vosk_recognizer_partial_result(rec)); + } } } }}; t.detach(); } recognizer::~recognizer() { + done.test_and_set(std::memory_order_relaxed); + controller.unlock(); vosk_recognizer_free(rec); } void recognizer::fireEv(const char *type, const char *content) { @@ -29,8 +32,9 @@ void recognizer::fireEv(const char *type, const char *content) { recognizers[$0].dispatchEvent(new CustomEvent(UTF8ToString($1), {"details" : UTF8ToString($2)})); },this->index, type, content); } -void recognizer::acceptWaveForm(int addr, int len) { - queue.emplace(addr, len); +void recognizer::acceptWaveForm() { + controller.unlock(); + controller.lock(); } void recognizer::setGrm(const std::string& grm) { vosk_recognizer_set_grm(rec, grm.c_str()); diff --git a/src/recognizer.h b/src/recognizer.h index 9de5a80..98f04e3 100644 --- a/src/recognizer.h +++ b/src/recognizer.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -16,17 +15,15 @@ extern void throwJS(const char* msg, bool err = false); namespace fs = std::filesystem; -struct audioData { - float* addr{}; - int len{}; - audioData(int addr, int len); -}; struct recognizer { + std::atomic_flag done{}; + std::mutex controller{}; + float* dataPtr{}; int index{}; VoskRecognizer* rec{}; recognizer(model* model, float sampleRate, int index); ~recognizer(); - void acceptWaveForm(int addr, int len); + void acceptWaveForm(); void fireEv(const char* type, const char* content); void setSpkModel(spkModel* model); void setGrm(const std::string& grm);