diff --git a/LICENSE b/LICENSE
index 261eeb9..d9a10c0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -174,28 +174,3 @@
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000..c1f0d9c
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,15 @@
+Browser Recognizer
+Copyright 2024 Rylex Phan
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
diff --git a/README.md b/README.md
index 0f39cb2..eff29d9 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
-# Browser-recognizer
+# Browser-recognizer-
- A speech recognizer built on Vosk that can be run on the browser, inspired by [vosk-browser](https://github.com/ccoreilly/vosk-browser), but built from scratch and no code taken!
- Browser-recognizer can run both in the browser main thread and web workers.
## Global and all objects' common interface
| Function signature (global) | Description |
|---|---|
-| ```Promise makeModel(url, path, id)```
```Promise makeSpkModel(url, storepath, id)``` | - If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.
- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. |
-| ```setLogLevel(level)``` | Set Vosk's log level (default: -1)
- 2: Error
- 1: Warning
- 0: Info
- 1: Verbose
- 2: More verbose
- 3: Debug |
-| ```deleteAll()``` | Call ```delete()``` on all objects, it is recommended to put this at the end of the program to automatically clean up. See [here](https://emscripten.org/docs/getting_started/FAQ.html#what-does-exiting-the-runtime-mean-why-don-t-atexit-s-run).|
+| ```Promise makeModel(path: string, url: string, id: string)```
```Promise makeSpkModel(path: string, url: string, id: string)``` | - If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.
- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. |
+| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: -1)
- 2: Error
- 1: Warning
- 0: Info
- 1: Verbose
- 2: More verbose
- 3: Debug |
+| ```deleteAll()``` | Call ```delete()``` on all objects, it is recommended to put this at the end of the program to automatically clean up. See [why](https://emscripten.org/docs/getting_started/FAQ.html#what-does-exiting-the-runtime-mean-why-don-t-atexit-s-run).|
| Function signature (all objects) | Description
|---|---|
@@ -14,18 +14,18 @@
## ```Recognizer``` object
| Function signature | Description |
|---|---|
-| ```setPartialWords(partialWords)``` | Return words' information in a partialResult event (default: false) |
-| ```setWords(words)``` | Return words' information in a result event (default: false) |
-| ```setNLSML(nlsml)``` | Return result and partialResult in NLSML form (default: false) |
-| ```setMaxAlternatives(alts)``` | Set the max number of alternatives for result event (default: false) |
-| ```setGrm(grm)``` | Add grammar to the recognizer (default: none) |
-| ```setSpkModel(spkmodel)``` | Set the speaker model of the recognizer (default: none) |
+| ```processAudio(audio: AudioBuffer)``` | Recognize an audio chunk,
+| ```setPartialWords(partialWords: bool)``` | Return words' information in a partialResult event (default: false) |
+| ```setWords(words: bool)``` | Return words' information in a result event (default: false) |
+| ```setNLSML(nlsml: bool)``` | Return result and partialResult in NLSML form (default: false) |
+| ```setMaxAlternatives(alts: int)``` | Set the max number of alternatives for result event (default: false) |
+| ```setGrm(grm: string)``` | Add grammar to the recognizer (default: none) |
+| ```setSpkModel(mdl: spkmodel)``` | Set the speaker model of the recognizer (default: none) |
| Event | Description |
|---|---|
| ```partialResult``` | There is a partial recognition result, check the event's "details" property |
| ```result``` | There is a full recognition result, check the event's "details" property |
-| ```error``` | An recognition occurred, check the event's "details" property |
## Other key points
- If an error occurs, no changes was made.
- Fixed memory size at 300MB, changing it require recompilation (because the use of pthread will lead)
@@ -45,29 +45,29 @@
diff --git a/install.sh b/install.sh
index 4d04a42..ba0fd10 100755
--- a/install.sh
+++ b/install.sh
@@ -62,5 +62,5 @@ VOSK_FILES="recognizer.cc language_model.cc model.cc spk_model.cc vosk_api.cc" &
em++ -pthread -O3 -flto -I. -I$KALDI/src -I$OPENFST/include $VOSK_FILES -c &&
emar -rcs vosk.a ${VOSK_FILES//.cc/.o} &&
-cd $SRC
-em++ -O3 genericObj.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSUPPORT_BIG_ENDIAN -sSINGLE_FILE -sMODULARIZE -sEXPORT_ES6 -sASYNCIFY -sEXPORT_NAME=loadBR -sENVIRONMENT=web,worker -sINITIAL_MEMORY=300mb -sPTHREAD_POOL_SIZE=2 --pre-js pre.js --extern-post-js post.js -pthread -flto -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -lopenal -o ../BrowserRecognizer.js
+cd $SRC &&
+em++ -O3 genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSUPPORT_BIG_ENDIAN -sSINGLE_FILE -sEMBIND_AOT -sWASM_WORKER -sAUDIO_WORKLET -sEMBIND_STD_STRING_IS_UTF8 -sSUPPORT_LONGJMP=0 -sMODULARIZE -sEXPORT_NAME=loadBR -sEXPORT_ES6 -sENVIRONMENT=web,worker -sINITIAL_MEMORY=300mb -sASYNCIFY -sPTHREAD_POOL_SIZE=2 --pre-js pre.js --extern-post-js post.js -pthread -flto -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -lopenal -o ../BrowserRecognizer.js
diff --git a/src/bindings.cc b/src/bindings.cc
index cb2f182..fcf37b1 100644
--- a/src/bindings.cc
+++ b/src/bindings.cc
@@ -1,7 +1,17 @@
#include "spkModel.h"
#include "model.h"
#include "recognizer.h"
+#include
using namespace emscripten;
+void throwJS(const char* msg, bool err = false) {
+ EM_ASM({
+ if($1) {
+ throw Error(UTF8ToString)
+ return
+ }
+ throw UTF8ToString($0)
+ },msg, err);
+}
int main() {
//vosk_set_log_level(-1);
std::thread t{[](){
@@ -12,13 +22,13 @@ int main() {
EMSCRIPTEN_BINDINGS() {
function("setLogLevel", &vosk_set_log_level, allow_raw_pointers());
class_("model")
- .constructor(allow_raw_pointers());
+ .constructor(allow_raw_pointers());
class_("spkModel")
- .constructor(allow_raw_pointers());
+ .constructor(allow_raw_pointers());
class_("recognizer")
- .constructor(allow_raw_pointers())
+ .constructor(allow_raw_pointers())
.function("setWords", &recognizer::setWords, allow_raw_pointers())
.function("setPartialWords", &recognizer::setPartialWords, allow_raw_pointers())
.function("setGrm", &recognizer::setGrm, allow_raw_pointers())
diff --git a/src/genericModel.cc b/src/genericModel.cc
index f1535a4..74824e7 100644
--- a/src/genericModel.cc
+++ b/src/genericModel.cc
@@ -21,23 +21,23 @@ bool genericModel::loadModel(const std::string& storepath) {
char filename[] {"/opfs/XXXXXX.tzst"};
close(mkostemps(filename, 5, O_PATH));
if(emscripten_wget(url.c_str(),filename) == 1) {
- throwErr("Unable to fetch model");
+ throwJS("Unable to fetch model");
return false;
}
if(!extractModel(filename)) {
- throwErr("Unable to extract model");
+ throwJS("Unable to extract model");
return false;
}
fs::remove(filename);
if(!checkModel()) {
- throwErr("Model URL contains invalid model files");
+ throwJS("Model URL contains invalid model files");
fs::current_path("/opfs");
fs::remove_all(storepath);
return false;
}
std::ofstream idFile("id");
if(!idFile.is_open()) {
- throwErr("Unable to write new id");
+ throwJS("Unable to write new id");
fs::remove_all(storepath);
return false;
}
diff --git a/src/genericModel.h b/src/genericModel.h
index 8e7e4a3..ab81ddc 100644
--- a/src/genericModel.h
+++ b/src/genericModel.h
@@ -1,6 +1,4 @@
#pragma once
-#include "genericObj.h"
-
#include
#include
#include
@@ -11,8 +9,8 @@
#include
#include
#include
-#include
-
+#include
+extern void throwJS(const char* msg, bool err = false);
namespace fs = std::filesystem;
struct genericModel {
diff --git a/src/genericObj.h b/src/genericObj.h
deleted file mode 100644
index 103f14c..0000000
--- a/src/genericObj.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-#include
-#include
-void throwErr(const char* msg) {
- EM_ASM({
- throw Error(UTF8ToString($0))
- },msg);
-}
-
-
-
diff --git a/src/model.cc b/src/model.cc
index b0af11b..3b5a4bf 100644
--- a/src/model.cc
+++ b/src/model.cc
@@ -1,11 +1,10 @@
#include "model.h"
-model::model(const std::string &url, const std::string& storepath, const std::string& id, int index) : genericModel(url, id, storepath) {
+model::model(const std::string &url, const std::string& storepath, const std::string& id) : genericModel(url, storepath, id) {
if(!loadModel(storepath)) return;
mdl = vosk_model_new(".");
if(mdl == nullptr) {
- throwErr("Unable to initialize model");
- return;
+ throwJS("Unable to initialize model");
}
};
model::~model() {
diff --git a/src/model.h b/src/model.h
index 6450347..c0728be 100644
--- a/src/model.h
+++ b/src/model.h
@@ -4,7 +4,7 @@
struct model : genericModel {
bool checkModel();
VoskModel* mdl{};
- model(const std::string &url, const std::string& storepath, const std::string& id, int index);
+ model(const std::string &url, const std::string& storepath, const std::string& id);
~model();
};
diff --git a/src/pre.js b/src/pre.js
index 88c28df..6e5ecb9 100644
--- a/src/pre.js
+++ b/src/pre.js
@@ -5,6 +5,11 @@ class recognizer extends EventTarget {
this.obj = rec
objs.push(this)
}
+ processAudio(buffer) {
+ if(buffer.numberOfChannels < 1) throw Error("Buffer has ",buffer.numberOfChannels, " channel")
+ let data = buffer.getChannelData(0);
+ if(!(data instanceof Float32Array)) throw Error("Channel data isn't a Float32Array");
+ }
delete() {
this.obj.delete()
}
@@ -32,31 +37,34 @@ Module.makeModel = async (url, path, id) => {
let mdl
try {
mdl = new Module.model(url, path, id)
- objs.push(mdl)
}
catch(e) {
- return Promise.reject(e.message)
+ mdl.delete()
+ return Promise.reject(e)
}
+ objs.push(mdl)
return mdl
}
Module.makeSpkModel = async (url, path, id) => {
let mdl
try {
mdl = new Module.spkModel(url, path, id)
- objs.push(mdl)
}
catch(e) {
- return Promise.reject(e.message)
+ mdl.delete()
+ return Promise.reject(e)
}
+ objs.push(mdl)
return mdl
}
-Module.makeRecognizer = async (model, sampleRate) => {
+Module.makeRecognizer = async (model, sampleRate, ctx) => {
let rec
try {
- rec = recognizer(new Module.recognizer(model,sampleRate, objs.length))
+ rec = new Module.recognizer(model,sampleRate, objs.length)
}
catch(e) {
- return Promise.reject(e.message)
+ rec.delete()
+ return Promise.reject(e)
}
- return rec
+ return new recognizer(rec)
}
diff --git a/src/recognizer.cc b/src/recognizer.cc
index 0d3c63e..41952fe 100644
--- a/src/recognizer.cc
+++ b/src/recognizer.cc
@@ -1,19 +1,18 @@
-#include "./recognizer.h"
+#include "recognizer.h"
recognizer::recognizer(model* mdl, float sampleRate, int index) : index(index) {
rec = vosk_recognizer_new(mdl->mdl,sampleRate);
if(rec == nullptr) {
- throwErr("Unable to initialize recognizer");
- return;
+ throwJS("Unable to initialize recognizer");
}
}
+recognizer::~recognizer() {
+ vosk_recognizer_free(rec);
+}
void recognizer::fireEv(const char *type, const char *content) {
EM_ASM({
recognizers[$0].dispatchEvent(new CustomEvent(UTF8ToString($1), {"details" : UTF8ToString($2)}));
},this->index, type, content);
}
-recognizer::~recognizer() {
- vosk_recognizer_free(rec);
-}
void recognizer::acceptWaveForm(float* data, int len) {
switch(vosk_recognizer_accept_waveform_f(rec, data, len)) {
case 0:
@@ -23,7 +22,7 @@ void recognizer::acceptWaveForm(float* data, int len) {
fireEv("partialResult", vosk_recognizer_partial_result(rec));
break;
default:
- fireEv("_error", "Recognition error, unable to recognize");
+ throwJS("acceptWaveForm error (from C++)", true);
}
}
void recognizer::setGrm(const std::string& grm) {
diff --git a/src/recognizer.h b/src/recognizer.h
index 5b409e4..7486f42 100644
--- a/src/recognizer.h
+++ b/src/recognizer.h
@@ -1,27 +1,26 @@
#pragma once
#include "model.h"
#include "spkModel.h"
-#include "genericObj.h"
#include
#include
#include
-#include
#include
-#include
+#include
#include
#include
#include
#include
+extern void throwJS(const char* msg, bool err = false);
namespace fs = std::filesystem;
struct recognizer {
int index{};
VoskRecognizer* rec{};
- void acceptWaveForm(float* data, int len);
recognizer(model* model, float sampleRate, int index);
~recognizer();
+ void acceptWaveForm(float* data, int len);
void fireEv(const char* type, const char* content);
void setSpkModel(spkModel* model);
void setGrm(const std::string& grm);
diff --git a/src/spkModel.cc b/src/spkModel.cc
index 966a110..12facd5 100644
--- a/src/spkModel.cc
+++ b/src/spkModel.cc
@@ -3,8 +3,7 @@ spkModel::spkModel(const std::string &url, const std::string& storepath, const s
if(!loadModel(storepath)) return;
mdl = vosk_spk_model_new(".");
if(mdl == nullptr) {
- throwErr("Unable to initialize speaker model");
- return;
+ throwJS("Unable to initialize speaker model");
}
};
spkModel::~spkModel() {