C and JS interface, untested

This commit is contained in:
msqr1
2024-01-15 23:36:20 -08:00
parent db2acc30be
commit ab8d869dd9
22 changed files with 7862 additions and 1 deletions

9
.gitignore vendored Normal file
View File

@@ -0,0 +1,9 @@
.vscode
test.sh
index.html
model.tzst
clapack-wasm
kaldi
libarchive
vosk-api
zstd

7039
BrowserRecognizer.js Normal file

File diff suppressed because one or more lines are too long

192
BrowserRecognizer.worker.js Normal file
View File

@@ -0,0 +1,192 @@
/**
* @license
* Copyright 2015 The Emscripten Authors
* SPDX-License-Identifier: MIT
*/
// Pthread Web Worker startup routine:
// This is the entry point file that is loaded first by each Web Worker
// that executes pthreads on the Emscripten application.
'use strict';
var Module = {};
// Node.js support
var ENVIRONMENT_IS_NODE = typeof process == 'object' && typeof process.versions == 'object' && typeof process.versions.node == 'string';
if (ENVIRONMENT_IS_NODE) {
// Create as web-worker-like an environment as we can.
var nodeWorkerThreads = require('worker_threads');
var parentPort = nodeWorkerThreads.parentPort;
parentPort.on('message', (data) => onmessage({ data: data }));
var fs = require('fs');
var vm = require('vm');
Object.assign(global, {
self: global,
require,
Module,
location: {
href: __filename
},
Worker: nodeWorkerThreads.Worker,
importScripts: (f) => vm.runInThisContext(fs.readFileSync(f, 'utf8'), {filename: f}),
postMessage: (msg) => parentPort.postMessage(msg),
performance: global.performance || { now: Date.now },
});
}
// Thread-local guard variable for one-time init of the JS state
var initializedJS = false;
function assert(condition, text) {
if (!condition) abort('Assertion failed: ' + text);
}
function threadPrintErr() {
var text = Array.prototype.slice.call(arguments).join(' ');
// See https://github.com/emscripten-core/emscripten/issues/14804
if (ENVIRONMENT_IS_NODE) {
fs.writeSync(2, text + '\n');
return;
}
console.error(text);
}
function threadAlert() {
var text = Array.prototype.slice.call(arguments).join(' ');
postMessage({cmd: 'alert', text, threadId: Module['_pthread_self']()});
}
// We don't need out() for now, but may need to add it if we want to use it
// here. Or, if this code all moves into the main JS, that problem will go
// away. (For now, adding it here increases code size for no benefit.)
var out = () => { throw 'out() is not defined in worker.js.'; }
var err = threadPrintErr;
self.alert = threadAlert;
var dbg = threadPrintErr;
Module['instantiateWasm'] = (info, receiveInstance) => {
// Instantiate from the module posted from the main thread.
// We can just use sync instantiation in the worker.
var module = Module['wasmModule'];
// We don't need the module anymore; new threads will be spawned from the main thread.
Module['wasmModule'] = null;
var instance = new WebAssembly.Instance(module, info);
// TODO: Due to Closure regression https://github.com/google/closure-compiler/issues/3193,
// the above line no longer optimizes out down to the following line.
// When the regression is fixed, we can remove this if/else.
return receiveInstance(instance);
}
// Turn unhandled rejected promises into errors so that the main thread will be
// notified about them.
self.onunhandledrejection = (e) => {
throw e.reason || e;
};
function handleMessage(e) {
try {
if (e.data.cmd === 'load') { // Preload command that is called once per worker to parse and load the Emscripten code.
// Until we initialize the runtime, queue up any further incoming messages.
let messageQueue = [];
self.onmessage = (e) => messageQueue.push(e);
// And add a callback for when the runtime is initialized.
self.startWorker = (instance) => {
// Notify the main thread that this thread has loaded.
postMessage({ 'cmd': 'loaded' });
// Process any messages that were queued before the thread was ready.
for (let msg of messageQueue) {
handleMessage(msg);
}
// Restore the real message handler.
self.onmessage = handleMessage;
};
// Module and memory were sent from main thread
Module['wasmModule'] = e.data.wasmModule;
// Use `const` here to ensure that the variable is scoped only to
// that iteration, allowing safe reference from a closure.
for (const handler of e.data.handlers) {
Module[handler] = (...args) => {
postMessage({ cmd: 'callHandler', handler, args: args });
}
}
Module['wasmMemory'] = e.data.wasmMemory;
Module['buffer'] = Module['wasmMemory'].buffer;
Module['workerID'] = e.data.workerID;
Module['ENVIRONMENT_IS_PTHREAD'] = true;
if (typeof e.data.urlOrBlob == 'string') {
importScripts(e.data.urlOrBlob);
} else {
var objectUrl = URL.createObjectURL(e.data.urlOrBlob);
importScripts(objectUrl);
URL.revokeObjectURL(objectUrl);
}
} else if (e.data.cmd === 'run') {
// Pass the thread address to wasm to store it for fast access.
Module['__emscripten_thread_init'](e.data.pthread_ptr, /*is_main=*/0, /*is_runtime=*/0, /*can_block=*/1);
// Await mailbox notifications with `Atomics.waitAsync` so we can start
// using the fast `Atomics.notify` notification path.
Module['__emscripten_thread_mailbox_await'](e.data.pthread_ptr);
assert(e.data.pthread_ptr);
// Also call inside JS module to set up the stack frame for this pthread in JS module scope
Module['establishStackSpace']();
Module['PThread'].receiveObjectTransfer(e.data);
Module['PThread'].threadInitTLS();
if (!initializedJS) {
// Embind must initialize itself on all threads, as it generates support JS.
// We only do this once per worker since they get reused
Module['__embind_initialize_bindings']();
initializedJS = true;
}
try {
Module['invokeEntryPoint'](e.data.start_routine, e.data.arg);
} catch(ex) {
if (ex != 'unwind') {
// The pthread "crashed". Do not call `_emscripten_thread_exit` (which
// would make this thread joinable). Instead, re-throw the exception
// and let the top level handler propagate it back to the main thread.
throw ex;
}
}
} else if (e.data.cmd === 'cancel') { // Main thread is asking for a pthread_cancel() on this thread.
if (Module['_pthread_self']()) {
Module['__emscripten_thread_exit'](-1);
}
} else if (e.data.target === 'setimmediate') {
// no-op
} else if (e.data.cmd === 'checkMailbox') {
if (initializedJS) {
Module['checkMailbox']();
}
} else if (e.data.cmd) {
// The received message looks like something that should be handled by this message
// handler, (since there is a e.data.cmd field present), but is not one of the
// recognized commands:
err(`worker.js received unknown command ${e.data.cmd}`);
err(e.data);
}
} catch(ex) {
err(`worker.js onmessage() captured an uncaught exception: ${ex}`);
if (ex?.stack) err(ex.stack);
Module['__emscripten_thread_crashed']?.();
throw ex;
}
};
self.onmessage = handleMessage;

View File

@@ -1,2 +1,71 @@
# Browser-recognizer # Browser-recognizer
A speech recognizer built on Vosk that can be run on the browser, inspired by [https://github.com/ccoreilly/vosk-browser](vosk-browser), but built from scratch and no code taken! A from-microphone speech recognizer built on Vosk that can be run on the browser, inspired by [vosk-browser](https://github.com/ccoreilly/vosk-browser), but built from scratch and no code taken!
## Interface
- setLogLevel: set Kaldi's log level (default: -1)
- -2: Error
- -1: Warning
- 0: Info
- 1: Verbose
- 2: More verbose
- 3: Debug
### Model and SpkModel
```
new Model(url, storepath, uid)
new SpkModel(url, storepath, uid)
```
#### Functions
- ***constructor*** : Construct a model from an URL, storage path, and an UID.
- If **storepath** contains valid model files and **uid** is the same, there will not be a fetch from **url**
- If **storepath** doesn't contain valid model files, or if it contains valid model files, but **uid** is different, there will be a fetch from **url**, and the model is stored with **uid**
- ***delete***: Delete self and free resources
#### Events
- ***ready***: The model is ready to be put into a recognizer via the constructor for Model, or setSpkModel() for SpkModel
- ***error***: An error occured, check the event's **details** property for more information
### Recognizer
```
new Recognizer(model)
```
#### Functions
- ***constructor***: Construct a recognizer from a model object
- ***start***: Start recognizing
- ***stop***: Stop recognizing
- ***setWords***: Return words' information in a result event (default: false)
- ***setPartialWords***: Return words' information in a partialResult event (default: false)
- ***setNLSML***: Return result and partialResult in NLSML form (default: false)
- ***setMaxAlternatives***: Set the max number of alternatives for result event (default: false)
- ***setGrm***: Add grammar to the recognizer (default: none)
- ***setSpkModel***: Set the speaker model of the recognizer (default: none)
#### Events
- ***partialResult***: There is a partial recognition result, check the event's **details** property
- ***result***: There is a full recognition result, check the event's **details** property
- ***error***: An error occured, check the event's **details** property for more information
***delete***: Delete self and free resources
## Other key points
### IMPORTANT
You MUST call delete() on objects at the end of its usage. Or put:
```
__GenericObj__.objects.forEach(obj => obj.delete())
```
at the end of your program to automatically do that. We have to do this because Emscripten doesn't call destructors. See [here](https://emscripten.org/docs/getting_started/FAQ.html#what-does-exiting-the-runtime-mean-why-don-t-atexit-s-run).
### Guarantees
If an error occurs (error event is fired), no changes was made, and no other dependent events will fire.
For example, if an error occur while loading the model, the "ready" event won't fire in order to prevent executing code on a nonexistent model.
### Limitations compared to vosk-browser:
- Only works on main thread
- Microphone only
- Fixed memory size at 300MB, changing it require recompilation
### Additions to vosk-browser:
- Multiple models support
- Speaker model (SpkModel) support
- Storage path management (when many models are required)
- Model ID management (when model updates are required)
### This requires SharedArrayBuffer, so set the response headers:
- ***Cross-Origin-Embedder-Policy*** ---> ***require-corp***
- ***Cross-Origin-Opener-Policy*** ---> ***same-origin***
### If you can't set these headers, you can use a VERY HACKY workaround at *src/addCOI.js*.
## Usage
```
<!--Load this from a script tag-->
<script src="BrowserRecognizer.js">
```

64
install.sh Executable file
View File

@@ -0,0 +1,64 @@
#### #### #### ### #### # # #### # #### #### #### #### ####
# # # # # # ## # # # # # # # # # # # # #
#### # # #### # # #### # # #### # # # #### #### #### ####
# # # # # # ## # # # # # # # # # # # #
# #### # # ### #### # #### ##### #### # #### # # ####
# Total build time is around 45 minutes, mostly from building Kaldi
sudo apt install shtool libtool autogen autotools-dev pkg-config make &&
SRC=$(realpath src) &&
KALDI=$(realpath kaldi) &&
VOSK=$(realpath vosk-api) &&
OPENFST=$KALDI/tools/openfst &&
LIBARCHIVE=$(realpath libarchive) &&
ZSTD=$(realpath zstd) &&
CLAPACK_WASM=$(realpath clapack-wasm) &&
source ../../emsdk/emsdk_env.sh &&
export PATH=:$PATH:$(realpath ../../emsdk/upstream/bin) &&
rm -rf /tmp/zstd &&
rm -rf /tmp/libarchive &&
rm -rf /tmp/openfst &&
git clone -b v1.5.5 --depth=1 https://github.com/facebook/zstd /tmp/zstd &&
git clone -b v3.7.2 --depth=1 https://github.com/libarchive/libarchive /tmp/libarchive &&
git clone --depth=1 https://gitlab.inria.fr/multispeech/kaldi.web/clapack-wasm.git &&
git clone --depth=1 https://github.com/alphacep/openfst /tmp/openfst &&
git clone -b vosk --depth=1 https://github.com/alphacep/kaldi &&
git clone -b go/v0.3.46 --depth=1 https://github.com/alphacep/vosk-api &&
cd /tmp/zstd &&
HAVE_THREAD=0 ZSTD_LEGACY_SUPPORT=0 HAVE_ZLIB=0 HAVE_LZMA=0 HAVE_LZ4=0 ZSTD_NOBENCH=1 ZSTD_NODICT=1 ZSTD_NOCOMPRESS=1 BACKTRACE=0 PREFIX=$SRC/zstd CPPFLAGS="-O3 -flto" LDFLAGS="-O3 -flto" emmake make install &&
rm -rf /tmp/zstd &&
cd /tmp/libarchive &&
build/autogen.sh &&
CPPFLAGS="-I$ZSTD/include -flto" LDFLAGS="-L$ZSTD/lib -flto" emconfigure ./configure --prefix=$SRC/libarchive --without-lz4 --without-lzma --without-zlib --without-bz2lib --without-xml2 --without-expat --without-cng --without-openssl --without-libb2 --disable-bsdunzip --disable-xattr --disable-acl --disable-bsdcpio --disable-bsdcat --disable-rpath --disable-maintainer-mode --disable-dependency-tracking --enable-static --disable-shared &&
emmake make install &&
rm -rf /tmp/libarchive &&
cd $CLAPACK_WASM &&
bash ./install_repo.sh emcc &&
cd /tmp/openfst &&
autoreconf -i &&
CXXFLAGS="-pthread -r -O3 -flto" LDFLAGS="-O3 -pthread -flto" emconfigure ./configure --prefix=$OPENFST --enable-static --disable-shared --enable-ngram-fsts --enable-lookahead-fsts --disable-bin --with-pic &&
emmake make install &&
echo "PACKAGE_VERSION = 1.8.0" >> $OPENFST/Makefile &&
cd $KALDI/src &&
git apply $SRC/kaldi.patch &&
CXXFLAGS="-O3 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -msimd128 -UHAVE_EXECINFO_H -pthread -flto" LDFLAGS="-O3 -sERROR_ON_UNDEFINED_SYMBOLS=0 -lembind -pthread -flto" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --double-precision=yes --debug-level=0 --clapack-root=$CLAPACK_WASM --host=WASM &&
emmake make online2 lm rnnlm &&
cd $VOSK/src &&
VOSK_FILES="recognizer.cc language_model.cc model.cc spk_model.cc vosk_api.cc" &&
em++ -pthread -O3 -flto -I. -I$KALDI/src -I$OPENFST/include $VOSK_FILES -c &&
emar -rcs vosk.a ${VOSK_FILES//.cc/.o} &&
cd $SRC
em++ -O3 genericObj.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSUPPORT_BIG_ENDIAN -sSINGLE_FILE -sINITIAL_MEMORY=300mb -sASYNCIFY -sPTHREAD_POOL_SIZE=2 -pthread --no-entry -flto --post-js genericObj.js --post-js model.js --post-js spkModel.js --post-js recognizer.js -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -lopenal -o BrowserRecognizer.js

56
src/addCOI.js Normal file
View File

@@ -0,0 +1,56 @@
// Add cross-origin isolation (COI) into the page when the user visits it for the first time via a service worker and refreshing to apply the headers.
// Taken, and modified from https://github.com/orgs/community/discussions/13309#discussioncomment-3844940
if(typeof window === 'undefined') {
self.addEventListener("install", () => self.skipWaiting());
self.addEventListener("activate", e => e.waitUntil(self.clients.claim()));
async function handleFetch(request) {
if(request.cache === "only-if-cached" && request.mode !== "same-origin") {
return;
}
if(request.mode === "no-cors") {
request = new Request(request.url, {
cache: request.cache,
credentials: "omit",
headers: request.headers,
integrity: request.integrity,
destination: request.destination,
keepalive: request.keepalive,
method: request.method,
mode: request.mode,
redirect: request.redirect,
referrer: request.referrer,
referrerPolicy: request.referrerPolicy,
signal: request.signal,
});
}
let r = await fetch(request).catch(e => console.error(e));
if(r.status === 0) {
return r;
}
const headers = new Headers(r.headers);
headers.set("Cross-Origin-Embedder-Policy", "require-corp");
headers.set("Cross-Origin-Opener-Policy", "same-origin");
return new Response(r.body, { status: r.status, statusText: r.statusText, headers });
}
self.addEventListener("fetch", function(e) {
e.respondWith(handleFetch(e.request));
});
} else {
(async function() {
if(window.crossOriginIsolated !== false) return;
let registration = await navigator.serviceWorker.register(window.document.currentScript.src).catch(e => console.error("COOP/COEP Service Worker failed to register:", e));
if(registration) {
registration.addEventListener("updatefound", () => {
window.location.reload();
});
if(registration.active && !navigator.serviceWorker.controller) {
window.location.reload();
}
}
})();
}

26
src/bindings.cc Normal file
View File

@@ -0,0 +1,26 @@
#include "spkModel.h"
#include "model.h"
#include "recognizer.h"
using namespace emscripten;
EMSCRIPTEN_BINDINGS(BrowserRecognizer) {
function("setLogLevel", &vosk_set_log_level, allow_raw_pointers());
class_<Model>("__Model__")
.constructor<std::string, std::string, std::string, int>(allow_raw_pointers());
class_<SpkModel>("__SpkModel__")
.constructor<std::string, std::string, std::string, const int>(allow_raw_pointers());
class_<Recognizer>("__Recognizer__")
.constructor<Model*, int, int>(allow_raw_pointers())
.function("start", &Recognizer::start, allow_raw_pointers())
.function("stop", &Recognizer::stop, allow_raw_pointers())
.function("deinit", &Recognizer::deinit, allow_raw_pointers())
.function("setWords", &Recognizer::setWords, allow_raw_pointers())
.function("setPartialWords", &Recognizer::setPartialWords, allow_raw_pointers())
.function("setGrm", &Recognizer::setGrm, allow_raw_pointers())
.function("setNLSML", &Recognizer::setNLSML, allow_raw_pointers())
.function("setSpkModel", &Recognizer::setSpkModel, allow_raw_pointers())
.function("setMaxAlternatives", &Recognizer::setMaxAlternatives, allow_raw_pointers());
};

69
src/genericModel.cc Normal file
View File

@@ -0,0 +1,69 @@
#include "genericModel.h"
bool GenericModel::first = true;
GenericModel::GenericModel(const std::string &url, const std::string& storepath, const std::string &id, int index) : url(url), id(id), storepath(("opfs/" + storepath)), GenericObj(index) {
if(first) {
vosk_set_log_level(-1);
int res{};
std::thread t{[&res](){
res = wasmfs_create_directory("opfs",0777,wasmfs_create_opfs_backend());
}};
t.join();
if(res == 1){
fireEv("error", "Unable to create OPFS directory");
return;
}
first = false;
}
}
bool GenericModel::checkId(const std::string& path, const std::string& id) {
std::ifstream file {(path + "/id"), std::ifstream::binary};
if(!file.is_open()) {
return false;
};
long long size {file.seekg(0, std::ios::end).tellg()};
std::string oldid(size, ' ');
file.seekg(0);
file.read(&oldid[0], size);
return id.compare(oldid) == 0 ? true : false;
}
bool GenericModel::loadModel() {
if(!checkModel(storepath) || !checkId(storepath, id)) {
if(emscripten_wget(url.c_str(), "opfs/model.tzst") == 1) {
fireEv("error", "Unable to fetch model");
return false;
}
if(!extractModel("opfs/model.tzst", storepath)) {
fireEv("error", "Unable to extract model");
}
fs::remove("opfs/model.tzst");
if(!checkModel(storepath)) {
fireEv("error", "Model URL contains invalid model files");
}
std::ofstream idFile((storepath + "/id"));
if(!idFile.is_open()) {
fireEv("error", "Unable to write new id");
fs::remove_all(storepath);
return false;
}
idFile << id;
}
return true;
}
bool GenericModel::extractModel(const char* target, const std::string& dest) {
std::string path{};
archive* src {archive_read_new()};
archive_entry* entry {};
archive_read_support_filter_all(src);
archive_read_support_format_all(src);
archive_read_open_filename(src, target,22480);
if(archive_errno(src) != 0) return false;
while (archive_read_next_header(src, &entry) == ARCHIVE_OK) {
path = archive_entry_pathname(entry);
archive_entry_set_pathname(entry, (dest + path.substr(path.find("/"))).c_str());
if(archive_errno(src) != 0) return false;
archive_read_extract(src, entry, ARCHIVE_EXTRACT_UNLINK);
}
archive_read_free(src);
return true;
}

28
src/genericModel.h Normal file
View File

@@ -0,0 +1,28 @@
#pragma once
#include "genericObj.h"
#include <string>
#include <thread>
#include <filesystem>
#include <fstream>
#include <vosk_api.h>
#include <archive.h>
#include <archive_entry.h>
#include <emscripten/wasmfs.h>
#include <emscripten/bind.h>
namespace fs = std::filesystem;
class GenericModel : public GenericObj {
static bool first;
const std::string url{};
const std::string id{};
static bool extractModel(const char* target, const std::string& dest);
static bool checkId(const std::string& path, const std::string& id);
public:
const std::string storepath{};
virtual bool checkModel(const std::string& path) = 0;
bool loadModel();
GenericModel(const std::string& url, const std::string& storepath, const std::string& id, int index);
};

13
src/genericObj.cc Normal file
View File

@@ -0,0 +1,13 @@
#include "genericObj.h"
void GenericObj::fireEv(const char *type, const char *content) {
if(content == nullptr) {
MAIN_THREAD_EM_ASM({
__GenericObj__.objects[$0].dispatchEvent(new Event(UTF8ToString($1)));
},this->index, type);
return;
}
MAIN_THREAD_EM_ASM({
__GenericObj__.objects[$0].dispatchEvent(new CustomEvent(UTF8ToString($0), {"details" : UTF8ToString($1)}));
},this->index, type, content);
};

11
src/genericObj.h Normal file
View File

@@ -0,0 +1,11 @@
#pragma once
#include <emscripten.h>
class GenericObj {
const int index{};
public:
GenericObj(int index) : index(index) {};
void fireEv(const char *type, const char *content = nullptr);
};

1
src/genericObj.js Normal file
View File

@@ -0,0 +1 @@
class __GenericObj__ {static objects = []}

52
src/kaldi.patch Normal file
View File

@@ -0,0 +1,52 @@
diff --git a/src/matrix/Makefile b/src/matrix/Makefile
index 398179a35..c903fbfd4 100644
--- a/src/matrix/Makefile
+++ b/src/matrix/Makefile
@@ -10,7 +10,6 @@ include ../kaldi.mk
# you can uncomment matrix-lib-speed-test if you want to do the speed tests.
-TESTFILES = matrix-lib-test sparse-matrix-test numpy-array-test #matrix-lib-speed-test
OBJFILES = kaldi-matrix.o kaldi-vector.o packed-matrix.o sp-matrix.o tp-matrix.o \
matrix-functions.o qr.o srfft.o compressed-matrix.o \
diff --git a/src/util/kaldi-thread.cc b/src/util/kaldi-thread.cc
index 4573e24f1..4af4e73ea 100644
--- a/src/util/kaldi-thread.cc
+++ b/src/util/kaldi-thread.cc
@@ -22,7 +22,7 @@
#include "util/kaldi-thread.h"
namespace kaldi {
-int32 g_num_threads = 4; // Initialize this global variable.
+int32 g_num_threads = 1; // Initialize this global variable.
MultiThreadable::~MultiThreadable() {
// default implementation does nothing
diff --git a/src/base/kaldi-types.h b/src/base/kaldi-types.h
index 7ebf4f853..2f5979e42 100644
--- a/src/base/kaldi-types.h
+++ b/src/base/kaldi-types.h
@@ -20,6 +20,7 @@
#ifndef KALDI_BASE_KALDI_TYPES_H_
#define KALDI_BASE_KALDI_TYPES_H_ 1
+#define KALDI_DOUBLEPRECISION 1
namespace kaldi {
// TYPEDEFS ..................................................................
diff --git a/src/ivector/ivector-extractor.cc b/src/ivector/ivector-extractor.cc
index c3a122281..71d37256d 100644
--- a/src/ivector/ivector-extractor.cc
+++ b/src/ivector/ivector-extractor.cc
@@ -195,7 +195,7 @@ void IvectorExtractor::ComputeDerivedVars() {
// could because some tasks finish before others.
{
TaskSequencerConfig sequencer_opts;
- sequencer_opts.num_threads = g_num_threads;
+ sequencer_opts.num_threads = 0;
TaskSequencer<IvectorExtractorComputeDerivedVarsClass> sequencer(
sequencer_opts);
for (int32 i = 0; i < NumGauss(); i++)

27
src/model.cc Normal file
View File

@@ -0,0 +1,27 @@
#include "model.h"
Model::Model(const std::string &url, const std::string& storepath, const std::string& id, int index) : GenericModel(url, storepath, id, index) {
if(!loadModel()) return;
model = vosk_model_new(this->storepath.c_str());
if(model == nullptr) {
fireEv("error", "Unable to initialize model");
return;
}
fireEv("ready");
};
bool Model::checkModel(const std::string& path) {
return fs::exists(path + "/am/final.mdl") &&
fs::exists(path + "/conf/mfcc.conf") &&
fs::exists(path + "/conf/model.conf") &&
fs::exists(path + "/graph/phones/word_boundary.int") &&
fs::exists(path + "/graph/Gr.fst") &&
fs::exists(path + "/graph/HCLr.fst") &&
fs::exists(path + "/graph/disambig_tid.int") &&
fs::exists(path + "/ivector/final.dubm") &&
fs::exists(path + "/ivector/final.ie") &&
fs::exists(path + "/ivector/final.mat") &&
fs::exists(path + "/ivector/global_cmvn.stats") &&
fs::exists(path + "/ivector/online_cmvn.conf") &&
fs::exists(path + "/ivector/splice.conf");
}

12
src/model.h Normal file
View File

@@ -0,0 +1,12 @@
#pragma once
#include "genericModel.h"
class Model : public GenericModel {
bool checkModel(const std::string& path);
public:
VoskModel* model{};
Model(const std::string &url, const std::string& storepath, const std::string& id, int index);
};

10
src/model.js Normal file
View File

@@ -0,0 +1,10 @@
class Model extends EventTarget{
constructor(url, storepath, id) {
super()
this.obj = new Module.__Model__(url, storepath, id, __GenericObj__.objects.length)
__GenericObj__.objects.push(this)
}
delete() {
this.obj.delete()
}
}

74
src/recognizer.cc Normal file
View File

@@ -0,0 +1,74 @@
#include "./recognizer.h"
void Recognizer::start() {
controller.test_and_set(std::memory_order_relaxed);
controller.notify_all();
}
void Recognizer::stop() {
controller.clear(std::memory_order_relaxed);
controller.notify_all();
}
void Recognizer::deinit() {
done.test_and_set(std::memory_order_relaxed);
done.notify_all();
stop();
}
Recognizer::Recognizer(Model* model, int sampleRate, int index) : GenericObj(index) {
mic = alcCaptureOpenDevice("Emscripten OpenAL capture",sampleRate, AL_FORMAT_MONO16, 22480);
if(alcGetError(mic) != 0) {
fireEv("error", "Unable to initialize microphone");
return;
}
std::thread t{[this](Model* model, int sampleRate) {
recognizer = vosk_recognizer_new(model->model,static_cast<float>(sampleRate));
if(recognizer == nullptr) {
fireEv("error", "Unable to construct recognizer");
return;
}
main();
}, model, sampleRate};
t.detach();
}
void Recognizer::main() {
char buffer[22480];
int sample{};
fireEv("ready");
while(!done.test()) {
controller.wait(done.test(std::memory_order_relaxed), std::memory_order_relaxed);
alcCaptureStart(mic);
while(controller.test()) {
alcGetIntegerv(mic, ALC_CAPTURE_SAMPLES, sizeof(int), &sample);
alcCaptureSamples(mic, buffer, sample);
switch(vosk_recognizer_accept_waveform(recognizer, buffer, 22480)) {
case 0:
fireEv("result", vosk_recognizer_result(recognizer));
break;
case 1:
fireEv("partialResult", vosk_recognizer_partial_result(recognizer));
break;
default:
fireEv("error", "Recognition result error");
}
}
alcCaptureStop(mic);
}
vosk_recognizer_free(recognizer);
alcCaptureCloseDevice(mic);
}
void Recognizer::setGrm(const std::string& grm) {
vosk_recognizer_set_grm(recognizer, grm.c_str());
}
void Recognizer::setSpkModel(SpkModel* model) {
vosk_recognizer_set_spk_model(recognizer,model->model);
}
void Recognizer::setWords(bool words) {
vosk_recognizer_set_words(recognizer,words);
}
void Recognizer::setPartialWords(bool partialWords) {
vosk_recognizer_set_partial_words(recognizer, partialWords);
}
void Recognizer::setNLSML(bool nlsml) {
vosk_recognizer_set_nlsml(recognizer, nlsml);
}
void Recognizer::setMaxAlternatives(int alts) {
vosk_recognizer_set_max_alternatives(recognizer, alts);
}

36
src/recognizer.h Normal file
View File

@@ -0,0 +1,36 @@
#pragma once
#include "model.h"
#include "spkModel.h"
#include "genericObj.h"
#include <filesystem>
#include <atomic>
#include <thread>
#include <emscripten/bind.h>
#include <emscripten/wasmfs.h>
#include <emscripten/console.h>
#include <AL/al.h>
#include <AL/alc.h>
#include <archive.h>
#include <archive_entry.h>
namespace fs = std::filesystem;
class Recognizer : public GenericObj {
VoskRecognizer* recognizer{};
ALCdevice* mic{};
std::atomic_flag done {false};
std::atomic_flag controller{false};
void main();
public:
Recognizer(Model* model, int sampleRate, int index);
void start();
void stop();
void deinit();
void setSpkModel(SpkModel* model);
void setGrm(const std::string& grm);
void setWords(bool words);
void setPartialWords(bool partialWords);
void setNLSML(bool nlsml);
void setMaxAlternatives(int alts);
};

36
src/recognizer.js Normal file
View File

@@ -0,0 +1,36 @@
class Recognizer extends EventTarget {
constructor(model) {
ctx = new (AudioContext || webkitAudioContext)()
this.obj = new Module.__Recognizer__(model.obj,ctx.sampleRate,__GenericObj__.objects.length)
__GenericObj__.objects.push(this)
ctx.close()
}
start() {
this.obj.start()
}
stop() {
this.obj.stop()
}
delete() {
this.obj.deinit()
this.obj.delete()
}
setWords(words) {
this.obj.setWords(words)
}
setPartialWords(partialWords) {
this.obj.setPartialWords(words)
}
setGrm(grm) {
this.obj.setGrm(grm)
}
setSpkModel(model) {
this.obj.setSpkModel(model.obj)
}
setNLSML(nlsml) {
this.obj.setNLSML(nlsml)
}
setMaxAlternatives(alts) {
this.obj.setMaxAlternatives(alts)
}
}

15
src/spkModel.cc Normal file
View File

@@ -0,0 +1,15 @@
#include "spkModel.h"
SpkModel::SpkModel(const std::string &url, const std::string& storepath, const std::string& id, int index) : GenericModel(url, storepath, id, index) {
if(!loadModel()) return;
model = vosk_spk_model_new(this->storepath.c_str());
if(model == nullptr) {
fireEv("error", "Unable to initialize speaker model");
}
fireEv("ready");
};
bool SpkModel::checkModel(const std::string& path) {
return fs::exists((path + "/mfcc.conf")) &&
fs::exists((path + "/final.ext.raw")) &&
fs::exists((path + "/mean.vec")) &&
fs::exists((path + "/transform.mat"));
}

12
src/spkModel.h Normal file
View File

@@ -0,0 +1,12 @@
#pragma once
#include "genericModel.h"
class SpkModel : public GenericModel {
bool checkModel(const std::string& path);
public:
SpkModel(const std::string &url, const std::string& storepath, const std::string& id, const int index);
VoskSpkModel* model{};
};

10
src/spkModel.js Normal file
View File

@@ -0,0 +1,10 @@
class SpkModel extends EventTarget{
constructor(url, storepath, id) {
super()
this.obj = new Module.__SpkModel__(url, storepath, id, __GenericObj__.objects.length)
__GenericObj__.objects.push(this)
}
delete() {
this.obj.delete()
}
}