Fix licence and restructure, prepare to add AudioWorklet
This commit is contained in:
25
LICENSE
25
LICENSE
@@ -174,28 +174,3 @@
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
15
NOTICE
Normal file
15
NOTICE
Normal file
@@ -0,0 +1,15 @@
|
||||
Browser Recognizer
|
||||
Copyright 2024 Rylex Phan
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
58
README.md
58
README.md
@@ -1,12 +1,12 @@
|
||||
# Browser-recognizer
|
||||
# Browser-recognizer-
|
||||
- A speech recognizer built on Vosk that can be run on the browser, inspired by [vosk-browser](https://github.com/ccoreilly/vosk-browser), but built from scratch and no code taken!
|
||||
- Browser-recognizer can run both in the browser main thread and web workers.
|
||||
## Global and all objects' common interface
|
||||
| Function signature (global) | Description |
|
||||
|---|---|
|
||||
| ```Promise makeModel(url, path, id)```<br>```Promise makeSpkModel(url, storepath, id)``` | - If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. |
|
||||
| ```setLogLevel(level)``` | Set Vosk's log level (default: -1) <br>- 2: Error<br>- 1: Warning<br>- 0: Info <br>- 1: Verbose<br>- 2: More verbose<br>- 3: Debug |
|
||||
| ```deleteAll()``` | Call ```delete()``` on all objects, it is recommended to put this at the end of the program to automatically clean up. See [here](https://emscripten.org/docs/getting_started/FAQ.html#what-does-exiting-the-runtime-mean-why-don-t-atexit-s-run).|
|
||||
| ```Promise makeModel(path: string, url: string, id: string)```<br><br>```Promise makeSpkModel(path: string, url: string, id: string)``` | - If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. |
|
||||
| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: -1) <br>- 2: Error<br>- 1: Warning<br>- 0: Info <br>- 1: Verbose<br>- 2: More verbose<br>- 3: Debug |
|
||||
| ```deleteAll()``` | Call ```delete()``` on all objects, it is recommended to put this at the end of the program to automatically clean up. See [why](https://emscripten.org/docs/getting_started/FAQ.html#what-does-exiting-the-runtime-mean-why-don-t-atexit-s-run).|
|
||||
|
||||
| Function signature (all objects) | Description
|
||||
|---|---|
|
||||
@@ -14,18 +14,18 @@
|
||||
## ```Recognizer``` object
|
||||
| Function signature | Description |
|
||||
|---|---|
|
||||
| ```setPartialWords(partialWords)``` | Return words' information in a partialResult event (default: false) |
|
||||
| ```setWords(words)``` | Return words' information in a result event (default: false) |
|
||||
| ```setNLSML(nlsml)``` | Return result and partialResult in NLSML form (default: false) |
|
||||
| ```setMaxAlternatives(alts)``` | Set the max number of alternatives for result event (default: false) |
|
||||
| ```setGrm(grm)``` | Add grammar to the recognizer (default: none) |
|
||||
| ```setSpkModel(spkmodel)``` | Set the speaker model of the recognizer (default: none) |
|
||||
| ```processAudio(audio: AudioBuffer)``` | Recognize an audio chunk,
|
||||
| ```setPartialWords(partialWords: bool)``` | Return words' information in a partialResult event (default: false) |
|
||||
| ```setWords(words: bool)``` | Return words' information in a result event (default: false) |
|
||||
| ```setNLSML(nlsml: bool)``` | Return result and partialResult in NLSML form (default: false) |
|
||||
| ```setMaxAlternatives(alts: int)``` | Set the max number of alternatives for result event (default: false) |
|
||||
| ```setGrm(grm: string)``` | Add grammar to the recognizer (default: none) |
|
||||
| ```setSpkModel(mdl: spkmodel)``` | Set the speaker model of the recognizer (default: none) |
|
||||
|
||||
| Event | Description |
|
||||
|---|---|
|
||||
| ```partialResult``` | There is a partial recognition result, check the event's "details" property |
|
||||
| ```result``` | There is a full recognition result, check the event's "details" property |
|
||||
| ```error``` | An recognition occurred, check the event's "details" property |
|
||||
## Other key points
|
||||
- If an error occurs, no changes was made.
|
||||
- Fixed memory size at 300MB, changing it require recompilation (because the use of pthread will lead)
|
||||
@@ -45,29 +45,29 @@
|
||||
<script src="BrowserRecognizer.js" type="module"></script>
|
||||
<!-->
|
||||
<script>
|
||||
// Select name
|
||||
const BrRec = await loadBR()
|
||||
// Select name
|
||||
const BrRec = await loadBR()
|
||||
|
||||
// Prepare
|
||||
const model = await BrRec.makeModel(")
|
||||
const recognizer = await BrRec.makeRecognizer(model)
|
||||
recognizer.addEventListener("result", e => {
|
||||
console.log("Result: ",e.details)
|
||||
}
|
||||
recognizer.addEventListener("partialResult", e => {
|
||||
console.log("Partial result: ",e.details)
|
||||
}
|
||||
// Prepare
|
||||
const model = await BrRec.makeModel(")
|
||||
const recognizer = await BrRec.makeRecognizer(model)
|
||||
recognizer.addEventListener("result", e => {
|
||||
console.log("Result: ",e.details)
|
||||
})
|
||||
recognizer.addEventListener("partialResult", e => {
|
||||
console.log("Partial result: ",e.details)
|
||||
})
|
||||
|
||||
// Recognize
|
||||
media = await navigator.mediaDevices.getUserMedia({
|
||||
// Recognize
|
||||
media = await navigator.mediaDevices.getUserMedia({
|
||||
video: false,
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
channelCount: 1,
|
||||
sampleRate: 16000
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
channelCount: 1,
|
||||
sampleRate: 16000
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
</script>
|
||||
|
||||
@@ -62,5 +62,5 @@ VOSK_FILES="recognizer.cc language_model.cc model.cc spk_model.cc vosk_api.cc" &
|
||||
em++ -pthread -O3 -flto -I. -I$KALDI/src -I$OPENFST/include $VOSK_FILES -c &&
|
||||
emar -rcs vosk.a ${VOSK_FILES//.cc/.o} &&
|
||||
|
||||
cd $SRC
|
||||
em++ -O3 genericObj.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSUPPORT_BIG_ENDIAN -sSINGLE_FILE -sMODULARIZE -sEXPORT_ES6 -sASYNCIFY -sEXPORT_NAME=loadBR -sENVIRONMENT=web,worker -sINITIAL_MEMORY=300mb -sPTHREAD_POOL_SIZE=2 --pre-js pre.js --extern-post-js post.js -pthread -flto -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -lopenal -o ../BrowserRecognizer.js
|
||||
cd $SRC &&
|
||||
em++ -O3 genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSUPPORT_BIG_ENDIAN -sSINGLE_FILE -sEMBIND_AOT -sWASM_WORKER -sAUDIO_WORKLET -sEMBIND_STD_STRING_IS_UTF8 -sSUPPORT_LONGJMP=0 -sMODULARIZE -sEXPORT_NAME=loadBR -sEXPORT_ES6 -sENVIRONMENT=web,worker -sINITIAL_MEMORY=300mb -sASYNCIFY -sPTHREAD_POOL_SIZE=2 --pre-js pre.js --extern-post-js post.js -pthread -flto -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -lopenal -o ../BrowserRecognizer.js
|
||||
|
||||
@@ -1,7 +1,17 @@
|
||||
#include "spkModel.h"
|
||||
#include "model.h"
|
||||
#include "recognizer.h"
|
||||
#include <emscripten/bind.h>
|
||||
using namespace emscripten;
|
||||
void throwJS(const char* msg, bool err = false) {
|
||||
EM_ASM({
|
||||
if($1) {
|
||||
throw Error(UTF8ToString)
|
||||
return
|
||||
}
|
||||
throw UTF8ToString($0)
|
||||
},msg, err);
|
||||
}
|
||||
int main() {
|
||||
//vosk_set_log_level(-1);
|
||||
std::thread t{[](){
|
||||
@@ -12,13 +22,13 @@ int main() {
|
||||
EMSCRIPTEN_BINDINGS() {
|
||||
function("setLogLevel", &vosk_set_log_level, allow_raw_pointers());
|
||||
class_<model>("model")
|
||||
.constructor<std::string, std::string, std::string, int>(allow_raw_pointers());
|
||||
.constructor<std::string, std::string, std::string>(allow_raw_pointers());
|
||||
|
||||
class_<spkModel>("spkModel")
|
||||
.constructor<std::string, std::string, std::string, int>(allow_raw_pointers());
|
||||
.constructor<std::string, std::string, std::string>(allow_raw_pointers());
|
||||
|
||||
class_<recognizer>("recognizer")
|
||||
.constructor<model*, int, int>(allow_raw_pointers())
|
||||
.constructor<model*, float, int>(allow_raw_pointers())
|
||||
.function("setWords", &recognizer::setWords, allow_raw_pointers())
|
||||
.function("setPartialWords", &recognizer::setPartialWords, allow_raw_pointers())
|
||||
.function("setGrm", &recognizer::setGrm, allow_raw_pointers())
|
||||
|
||||
@@ -21,23 +21,23 @@ bool genericModel::loadModel(const std::string& storepath) {
|
||||
char filename[] {"/opfs/XXXXXX.tzst"};
|
||||
close(mkostemps(filename, 5, O_PATH));
|
||||
if(emscripten_wget(url.c_str(),filename) == 1) {
|
||||
throwErr("Unable to fetch model");
|
||||
throwJS("Unable to fetch model");
|
||||
return false;
|
||||
}
|
||||
if(!extractModel(filename)) {
|
||||
throwErr("Unable to extract model");
|
||||
throwJS("Unable to extract model");
|
||||
return false;
|
||||
}
|
||||
fs::remove(filename);
|
||||
if(!checkModel()) {
|
||||
throwErr("Model URL contains invalid model files");
|
||||
throwJS("Model URL contains invalid model files");
|
||||
fs::current_path("/opfs");
|
||||
fs::remove_all(storepath);
|
||||
return false;
|
||||
}
|
||||
std::ofstream idFile("id");
|
||||
if(!idFile.is_open()) {
|
||||
throwErr("Unable to write new id");
|
||||
throwJS("Unable to write new id");
|
||||
fs::remove_all(storepath);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
#pragma once
|
||||
#include "genericObj.h"
|
||||
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
@@ -11,8 +9,8 @@
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
#include <emscripten/wasmfs.h>
|
||||
#include <emscripten/bind.h>
|
||||
|
||||
#include <emscripten.h>
|
||||
extern void throwJS(const char* msg, bool err = false);
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
struct genericModel {
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <emscripten.h>
|
||||
#include <emscripten/console.h>
|
||||
void throwErr(const char* msg) {
|
||||
EM_ASM({
|
||||
throw Error(UTF8ToString($0))
|
||||
},msg);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
#include "model.h"
|
||||
|
||||
model::model(const std::string &url, const std::string& storepath, const std::string& id, int index) : genericModel(url, id, storepath) {
|
||||
model::model(const std::string &url, const std::string& storepath, const std::string& id) : genericModel(url, storepath, id) {
|
||||
if(!loadModel(storepath)) return;
|
||||
mdl = vosk_model_new(".");
|
||||
if(mdl == nullptr) {
|
||||
throwErr("Unable to initialize model");
|
||||
return;
|
||||
throwJS("Unable to initialize model");
|
||||
}
|
||||
};
|
||||
model::~model() {
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
struct model : genericModel {
|
||||
bool checkModel();
|
||||
VoskModel* mdl{};
|
||||
model(const std::string &url, const std::string& storepath, const std::string& id, int index);
|
||||
model(const std::string &url, const std::string& storepath, const std::string& id);
|
||||
~model();
|
||||
};
|
||||
|
||||
|
||||
24
src/pre.js
24
src/pre.js
@@ -5,6 +5,11 @@ class recognizer extends EventTarget {
|
||||
this.obj = rec
|
||||
objs.push(this)
|
||||
}
|
||||
processAudio(buffer) {
|
||||
if(buffer.numberOfChannels < 1) throw Error("Buffer has ",buffer.numberOfChannels, " channel")
|
||||
let data = buffer.getChannelData(0);
|
||||
if(!(data instanceof Float32Array)) throw Error("Channel data isn't a Float32Array");
|
||||
}
|
||||
delete() {
|
||||
this.obj.delete()
|
||||
}
|
||||
@@ -32,31 +37,34 @@ Module.makeModel = async (url, path, id) => {
|
||||
let mdl
|
||||
try {
|
||||
mdl = new Module.model(url, path, id)
|
||||
objs.push(mdl)
|
||||
}
|
||||
catch(e) {
|
||||
return Promise.reject(e.message)
|
||||
mdl.delete()
|
||||
return Promise.reject(e)
|
||||
}
|
||||
objs.push(mdl)
|
||||
return mdl
|
||||
}
|
||||
Module.makeSpkModel = async (url, path, id) => {
|
||||
let mdl
|
||||
try {
|
||||
mdl = new Module.spkModel(url, path, id)
|
||||
objs.push(mdl)
|
||||
}
|
||||
catch(e) {
|
||||
return Promise.reject(e.message)
|
||||
mdl.delete()
|
||||
return Promise.reject(e)
|
||||
}
|
||||
objs.push(mdl)
|
||||
return mdl
|
||||
}
|
||||
Module.makeRecognizer = async (model, sampleRate) => {
|
||||
Module.makeRecognizer = async (model, sampleRate, ctx) => {
|
||||
let rec
|
||||
try {
|
||||
rec = recognizer(new Module.recognizer(model,sampleRate, objs.length))
|
||||
rec = new Module.recognizer(model,sampleRate, objs.length)
|
||||
}
|
||||
catch(e) {
|
||||
return Promise.reject(e.message)
|
||||
rec.delete()
|
||||
return Promise.reject(e)
|
||||
}
|
||||
return rec
|
||||
return new recognizer(rec)
|
||||
}
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
#include "./recognizer.h"
|
||||
#include "recognizer.h"
|
||||
recognizer::recognizer(model* mdl, float sampleRate, int index) : index(index) {
|
||||
rec = vosk_recognizer_new(mdl->mdl,sampleRate);
|
||||
if(rec == nullptr) {
|
||||
throwErr("Unable to initialize recognizer");
|
||||
return;
|
||||
throwJS("Unable to initialize recognizer");
|
||||
}
|
||||
}
|
||||
recognizer::~recognizer() {
|
||||
vosk_recognizer_free(rec);
|
||||
}
|
||||
void recognizer::fireEv(const char *type, const char *content) {
|
||||
EM_ASM({
|
||||
recognizers[$0].dispatchEvent(new CustomEvent(UTF8ToString($1), {"details" : UTF8ToString($2)}));
|
||||
},this->index, type, content);
|
||||
}
|
||||
recognizer::~recognizer() {
|
||||
vosk_recognizer_free(rec);
|
||||
}
|
||||
void recognizer::acceptWaveForm(float* data, int len) {
|
||||
switch(vosk_recognizer_accept_waveform_f(rec, data, len)) {
|
||||
case 0:
|
||||
@@ -23,7 +22,7 @@ void recognizer::acceptWaveForm(float* data, int len) {
|
||||
fireEv("partialResult", vosk_recognizer_partial_result(rec));
|
||||
break;
|
||||
default:
|
||||
fireEv("_error", "Recognition error, unable to recognize");
|
||||
throwJS("acceptWaveForm error (from C++)", true);
|
||||
}
|
||||
}
|
||||
void recognizer::setGrm(const std::string& grm) {
|
||||
|
||||
@@ -1,27 +1,26 @@
|
||||
#pragma once
|
||||
#include "model.h"
|
||||
#include "spkModel.h"
|
||||
#include "genericObj.h"
|
||||
|
||||
#include <filesystem>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
#include <emscripten/bind.h>
|
||||
#include <emscripten/wasmfs.h>
|
||||
#include <emscripten/console.h>
|
||||
#include <emscripten/webaudio.h>
|
||||
#include <AL/al.h>
|
||||
#include <AL/alc.h>
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
extern void throwJS(const char* msg, bool err = false);
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
struct recognizer {
|
||||
int index{};
|
||||
VoskRecognizer* rec{};
|
||||
void acceptWaveForm(float* data, int len);
|
||||
recognizer(model* model, float sampleRate, int index);
|
||||
~recognizer();
|
||||
void acceptWaveForm(float* data, int len);
|
||||
void fireEv(const char* type, const char* content);
|
||||
void setSpkModel(spkModel* model);
|
||||
void setGrm(const std::string& grm);
|
||||
|
||||
@@ -3,8 +3,7 @@ spkModel::spkModel(const std::string &url, const std::string& storepath, const s
|
||||
if(!loadModel(storepath)) return;
|
||||
mdl = vosk_spk_model_new(".");
|
||||
if(mdl == nullptr) {
|
||||
throwErr("Unable to initialize speaker model");
|
||||
return;
|
||||
throwJS("Unable to initialize speaker model");
|
||||
}
|
||||
};
|
||||
spkModel::~spkModel() {
|
||||
|
||||
Reference in New Issue
Block a user