From 6da9a662a12531f3ecb701e93c648b4b750e652c Mon Sep 17 00:00:00 2001 From: msqr1 Date: Sat, 20 Jan 2024 17:37:47 -0800 Subject: [PATCH] Fix licence and restructure, prepare to add AudioWorklet --- LICENSE | 25 ------------------- NOTICE | 15 ++++++++++++ README.md | 58 ++++++++++++++++++++++----------------------- install.sh | 4 ++-- src/bindings.cc | 16 ++++++++++--- src/genericModel.cc | 8 +++---- src/genericModel.h | 6 ++--- src/genericObj.h | 12 ---------- src/model.cc | 5 ++-- src/model.h | 2 +- src/pre.js | 24 ++++++++++++------- src/recognizer.cc | 13 +++++----- src/recognizer.h | 7 +++--- src/spkModel.cc | 3 +-- 14 files changed, 94 insertions(+), 104 deletions(-) create mode 100644 NOTICE delete mode 100644 src/genericObj.h diff --git a/LICENSE b/LICENSE index 261eeb9..d9a10c0 100644 --- a/LICENSE +++ b/LICENSE @@ -174,28 +174,3 @@ of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..c1f0d9c --- /dev/null +++ b/NOTICE @@ -0,0 +1,15 @@ +Browser Recognizer +Copyright 2024 Rylex Phan + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/README.md b/README.md index 0f39cb2..eff29d9 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -# Browser-recognizer +# Browser-recognizer- - A speech recognizer built on Vosk that can be run on the browser, inspired by [vosk-browser](https://github.com/ccoreilly/vosk-browser), but built from scratch and no code taken! - Browser-recognizer can run both in the browser main thread and web workers. ## Global and all objects' common interface | Function signature (global) | Description | |---|---| -| ```Promise makeModel(url, path, id)```
```Promise makeSpkModel(url, storepath, id)``` | - If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.
- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. | -| ```setLogLevel(level)``` | Set Vosk's log level (default: -1)
- 2: Error
- 1: Warning
- 0: Info
- 1: Verbose
- 2: More verbose
- 3: Debug | -| ```deleteAll()``` | Call ```delete()``` on all objects, it is recommended to put this at the end of the program to automatically clean up. See [here](https://emscripten.org/docs/getting_started/FAQ.html#what-does-exiting-the-runtime-mean-why-don-t-atexit-s-run).| +| ```Promise makeModel(path: string, url: string, id: string)```

```Promise makeSpkModel(path: string, url: string, id: string)``` | - If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.
- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. | +| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: -1)
- 2: Error
- 1: Warning
- 0: Info
- 1: Verbose
- 2: More verbose
- 3: Debug | +| ```deleteAll()``` | Call ```delete()``` on all objects, it is recommended to put this at the end of the program to automatically clean up. See [why](https://emscripten.org/docs/getting_started/FAQ.html#what-does-exiting-the-runtime-mean-why-don-t-atexit-s-run).| | Function signature (all objects) | Description |---|---| @@ -14,18 +14,18 @@ ## ```Recognizer``` object | Function signature | Description | |---|---| -| ```setPartialWords(partialWords)``` | Return words' information in a partialResult event (default: false) | -| ```setWords(words)``` | Return words' information in a result event (default: false) | -| ```setNLSML(nlsml)``` | Return result and partialResult in NLSML form (default: false) | -| ```setMaxAlternatives(alts)``` | Set the max number of alternatives for result event (default: false) | -| ```setGrm(grm)``` | Add grammar to the recognizer (default: none) | -| ```setSpkModel(spkmodel)``` | Set the speaker model of the recognizer (default: none) | +| ```processAudio(audio: AudioBuffer)``` | Recognize an audio chunk, +| ```setPartialWords(partialWords: bool)``` | Return words' information in a partialResult event (default: false) | +| ```setWords(words: bool)``` | Return words' information in a result event (default: false) | +| ```setNLSML(nlsml: bool)``` | Return result and partialResult in NLSML form (default: false) | +| ```setMaxAlternatives(alts: int)``` | Set the max number of alternatives for result event (default: false) | +| ```setGrm(grm: string)``` | Add grammar to the recognizer (default: none) | +| ```setSpkModel(mdl: spkmodel)``` | Set the speaker model of the recognizer (default: none) | | Event | Description | |---|---| | ```partialResult``` | There is a partial recognition result, check the event's "details" property | | ```result``` | There is a full recognition result, check the event's "details" property | -| ```error``` | An recognition occurred, check the event's "details" property | ## Other key points - If an error occurs, no changes was made. - Fixed memory size at 300MB, changing it require recompilation (because the use of pthread will lead) @@ -45,29 +45,29 @@ diff --git a/install.sh b/install.sh index 4d04a42..ba0fd10 100755 --- a/install.sh +++ b/install.sh @@ -62,5 +62,5 @@ VOSK_FILES="recognizer.cc language_model.cc model.cc spk_model.cc vosk_api.cc" & em++ -pthread -O3 -flto -I. -I$KALDI/src -I$OPENFST/include $VOSK_FILES -c && emar -rcs vosk.a ${VOSK_FILES//.cc/.o} && -cd $SRC -em++ -O3 genericObj.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSUPPORT_BIG_ENDIAN -sSINGLE_FILE -sMODULARIZE -sEXPORT_ES6 -sASYNCIFY -sEXPORT_NAME=loadBR -sENVIRONMENT=web,worker -sINITIAL_MEMORY=300mb -sPTHREAD_POOL_SIZE=2 --pre-js pre.js --extern-post-js post.js -pthread -flto -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -lopenal -o ../BrowserRecognizer.js +cd $SRC && +em++ -O3 genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSUPPORT_BIG_ENDIAN -sSINGLE_FILE -sEMBIND_AOT -sWASM_WORKER -sAUDIO_WORKLET -sEMBIND_STD_STRING_IS_UTF8 -sSUPPORT_LONGJMP=0 -sMODULARIZE -sEXPORT_NAME=loadBR -sEXPORT_ES6 -sENVIRONMENT=web,worker -sINITIAL_MEMORY=300mb -sASYNCIFY -sPTHREAD_POOL_SIZE=2 --pre-js pre.js --extern-post-js post.js -pthread -flto -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -lopenal -o ../BrowserRecognizer.js diff --git a/src/bindings.cc b/src/bindings.cc index cb2f182..fcf37b1 100644 --- a/src/bindings.cc +++ b/src/bindings.cc @@ -1,7 +1,17 @@ #include "spkModel.h" #include "model.h" #include "recognizer.h" +#include using namespace emscripten; +void throwJS(const char* msg, bool err = false) { + EM_ASM({ + if($1) { + throw Error(UTF8ToString) + return + } + throw UTF8ToString($0) + },msg, err); +} int main() { //vosk_set_log_level(-1); std::thread t{[](){ @@ -12,13 +22,13 @@ int main() { EMSCRIPTEN_BINDINGS() { function("setLogLevel", &vosk_set_log_level, allow_raw_pointers()); class_("model") - .constructor(allow_raw_pointers()); + .constructor(allow_raw_pointers()); class_("spkModel") - .constructor(allow_raw_pointers()); + .constructor(allow_raw_pointers()); class_("recognizer") - .constructor(allow_raw_pointers()) + .constructor(allow_raw_pointers()) .function("setWords", &recognizer::setWords, allow_raw_pointers()) .function("setPartialWords", &recognizer::setPartialWords, allow_raw_pointers()) .function("setGrm", &recognizer::setGrm, allow_raw_pointers()) diff --git a/src/genericModel.cc b/src/genericModel.cc index f1535a4..74824e7 100644 --- a/src/genericModel.cc +++ b/src/genericModel.cc @@ -21,23 +21,23 @@ bool genericModel::loadModel(const std::string& storepath) { char filename[] {"/opfs/XXXXXX.tzst"}; close(mkostemps(filename, 5, O_PATH)); if(emscripten_wget(url.c_str(),filename) == 1) { - throwErr("Unable to fetch model"); + throwJS("Unable to fetch model"); return false; } if(!extractModel(filename)) { - throwErr("Unable to extract model"); + throwJS("Unable to extract model"); return false; } fs::remove(filename); if(!checkModel()) { - throwErr("Model URL contains invalid model files"); + throwJS("Model URL contains invalid model files"); fs::current_path("/opfs"); fs::remove_all(storepath); return false; } std::ofstream idFile("id"); if(!idFile.is_open()) { - throwErr("Unable to write new id"); + throwJS("Unable to write new id"); fs::remove_all(storepath); return false; } diff --git a/src/genericModel.h b/src/genericModel.h index 8e7e4a3..ab81ddc 100644 --- a/src/genericModel.h +++ b/src/genericModel.h @@ -1,6 +1,4 @@ #pragma once -#include "genericObj.h" - #include #include #include @@ -11,8 +9,8 @@ #include #include #include -#include - +#include +extern void throwJS(const char* msg, bool err = false); namespace fs = std::filesystem; struct genericModel { diff --git a/src/genericObj.h b/src/genericObj.h deleted file mode 100644 index 103f14c..0000000 --- a/src/genericObj.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include -#include -void throwErr(const char* msg) { - EM_ASM({ - throw Error(UTF8ToString($0)) - },msg); -} - - - diff --git a/src/model.cc b/src/model.cc index b0af11b..3b5a4bf 100644 --- a/src/model.cc +++ b/src/model.cc @@ -1,11 +1,10 @@ #include "model.h" -model::model(const std::string &url, const std::string& storepath, const std::string& id, int index) : genericModel(url, id, storepath) { +model::model(const std::string &url, const std::string& storepath, const std::string& id) : genericModel(url, storepath, id) { if(!loadModel(storepath)) return; mdl = vosk_model_new("."); if(mdl == nullptr) { - throwErr("Unable to initialize model"); - return; + throwJS("Unable to initialize model"); } }; model::~model() { diff --git a/src/model.h b/src/model.h index 6450347..c0728be 100644 --- a/src/model.h +++ b/src/model.h @@ -4,7 +4,7 @@ struct model : genericModel { bool checkModel(); VoskModel* mdl{}; - model(const std::string &url, const std::string& storepath, const std::string& id, int index); + model(const std::string &url, const std::string& storepath, const std::string& id); ~model(); }; diff --git a/src/pre.js b/src/pre.js index 88c28df..6e5ecb9 100644 --- a/src/pre.js +++ b/src/pre.js @@ -5,6 +5,11 @@ class recognizer extends EventTarget { this.obj = rec objs.push(this) } + processAudio(buffer) { + if(buffer.numberOfChannels < 1) throw Error("Buffer has ",buffer.numberOfChannels, " channel") + let data = buffer.getChannelData(0); + if(!(data instanceof Float32Array)) throw Error("Channel data isn't a Float32Array"); + } delete() { this.obj.delete() } @@ -32,31 +37,34 @@ Module.makeModel = async (url, path, id) => { let mdl try { mdl = new Module.model(url, path, id) - objs.push(mdl) } catch(e) { - return Promise.reject(e.message) + mdl.delete() + return Promise.reject(e) } + objs.push(mdl) return mdl } Module.makeSpkModel = async (url, path, id) => { let mdl try { mdl = new Module.spkModel(url, path, id) - objs.push(mdl) } catch(e) { - return Promise.reject(e.message) + mdl.delete() + return Promise.reject(e) } + objs.push(mdl) return mdl } -Module.makeRecognizer = async (model, sampleRate) => { +Module.makeRecognizer = async (model, sampleRate, ctx) => { let rec try { - rec = recognizer(new Module.recognizer(model,sampleRate, objs.length)) + rec = new Module.recognizer(model,sampleRate, objs.length) } catch(e) { - return Promise.reject(e.message) + rec.delete() + return Promise.reject(e) } - return rec + return new recognizer(rec) } diff --git a/src/recognizer.cc b/src/recognizer.cc index 0d3c63e..41952fe 100644 --- a/src/recognizer.cc +++ b/src/recognizer.cc @@ -1,19 +1,18 @@ -#include "./recognizer.h" +#include "recognizer.h" recognizer::recognizer(model* mdl, float sampleRate, int index) : index(index) { rec = vosk_recognizer_new(mdl->mdl,sampleRate); if(rec == nullptr) { - throwErr("Unable to initialize recognizer"); - return; + throwJS("Unable to initialize recognizer"); } } +recognizer::~recognizer() { + vosk_recognizer_free(rec); +} void recognizer::fireEv(const char *type, const char *content) { EM_ASM({ recognizers[$0].dispatchEvent(new CustomEvent(UTF8ToString($1), {"details" : UTF8ToString($2)})); },this->index, type, content); } -recognizer::~recognizer() { - vosk_recognizer_free(rec); -} void recognizer::acceptWaveForm(float* data, int len) { switch(vosk_recognizer_accept_waveform_f(rec, data, len)) { case 0: @@ -23,7 +22,7 @@ void recognizer::acceptWaveForm(float* data, int len) { fireEv("partialResult", vosk_recognizer_partial_result(rec)); break; default: - fireEv("_error", "Recognition error, unable to recognize"); + throwJS("acceptWaveForm error (from C++)", true); } } void recognizer::setGrm(const std::string& grm) { diff --git a/src/recognizer.h b/src/recognizer.h index 5b409e4..7486f42 100644 --- a/src/recognizer.h +++ b/src/recognizer.h @@ -1,27 +1,26 @@ #pragma once #include "model.h" #include "spkModel.h" -#include "genericObj.h" #include #include #include -#include #include -#include +#include #include #include #include #include +extern void throwJS(const char* msg, bool err = false); namespace fs = std::filesystem; struct recognizer { int index{}; VoskRecognizer* rec{}; - void acceptWaveForm(float* data, int len); recognizer(model* model, float sampleRate, int index); ~recognizer(); + void acceptWaveForm(float* data, int len); void fireEv(const char* type, const char* content); void setSpkModel(spkModel* model); void setGrm(const std::string& grm); diff --git a/src/spkModel.cc b/src/spkModel.cc index 966a110..12facd5 100644 --- a/src/spkModel.cc +++ b/src/spkModel.cc @@ -3,8 +3,7 @@ spkModel::spkModel(const std::string &url, const std::string& storepath, const s if(!loadModel(storepath)) return; mdl = vosk_spk_model_new("."); if(mdl == nullptr) { - throwErr("Unable to initialize speaker model"); - return; + throwJS("Unable to initialize speaker model"); } }; spkModel::~spkModel() {