Nothing is working but it is on the right track! ASYNCIFY removed, manual waiting instead and some proxying logic
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -1,9 +1,7 @@
|
||||
zstd
|
||||
vosk-api
|
||||
kaldi
|
||||
test
|
||||
test.sh
|
||||
minitest
|
||||
libarchive
|
||||
.vscode
|
||||
clapack-wasm
|
||||
index.html
|
||||
clapack-wasm
|
||||
7102
BrowserRecognizer.js
7102
BrowserRecognizer.js
File diff suppressed because one or more lines are too long
@@ -1,161 +1 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2015 The Emscripten Authors
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
// Pthread Web Worker startup routine:
|
||||
// This is the entry point file that is loaded first by each Web Worker
|
||||
// that executes pthreads on the Emscripten application.
|
||||
|
||||
'use strict';
|
||||
|
||||
var Module = {};
|
||||
|
||||
// Thread-local guard variable for one-time init of the JS state
|
||||
var initializedJS = false;
|
||||
|
||||
function assert(condition, text) {
|
||||
if (!condition) abort('Assertion failed: ' + text);
|
||||
}
|
||||
|
||||
function threadPrintErr() {
|
||||
var text = Array.prototype.slice.call(arguments).join(' ');
|
||||
console.error(text);
|
||||
}
|
||||
function threadAlert() {
|
||||
var text = Array.prototype.slice.call(arguments).join(' ');
|
||||
postMessage({cmd: 'alert', text, threadId: Module['_pthread_self']()});
|
||||
}
|
||||
// We don't need out() for now, but may need to add it if we want to use it
|
||||
// here. Or, if this code all moves into the main JS, that problem will go
|
||||
// away. (For now, adding it here increases code size for no benefit.)
|
||||
var out = () => { throw 'out() is not defined in worker.js.'; }
|
||||
var err = threadPrintErr;
|
||||
self.alert = threadAlert;
|
||||
var dbg = threadPrintErr;
|
||||
|
||||
Module['instantiateWasm'] = (info, receiveInstance) => {
|
||||
// Instantiate from the module posted from the main thread.
|
||||
// We can just use sync instantiation in the worker.
|
||||
var module = Module['wasmModule'];
|
||||
// We don't need the module anymore; new threads will be spawned from the main thread.
|
||||
Module['wasmModule'] = null;
|
||||
var instance = new WebAssembly.Instance(module, info);
|
||||
// TODO: Due to Closure regression https://github.com/google/closure-compiler/issues/3193,
|
||||
// the above line no longer optimizes out down to the following line.
|
||||
// When the regression is fixed, we can remove this if/else.
|
||||
return receiveInstance(instance);
|
||||
}
|
||||
|
||||
// Turn unhandled rejected promises into errors so that the main thread will be
|
||||
// notified about them.
|
||||
self.onunhandledrejection = (e) => {
|
||||
throw e.reason || e;
|
||||
};
|
||||
|
||||
function handleMessage(e) {
|
||||
try {
|
||||
if (e.data.cmd === 'load') { // Preload command that is called once per worker to parse and load the Emscripten code.
|
||||
|
||||
// Until we initialize the runtime, queue up any further incoming messages.
|
||||
let messageQueue = [];
|
||||
self.onmessage = (e) => messageQueue.push(e);
|
||||
|
||||
// And add a callback for when the runtime is initialized.
|
||||
self.startWorker = (instance) => {
|
||||
Module = instance;
|
||||
// Notify the main thread that this thread has loaded.
|
||||
postMessage({ 'cmd': 'loaded' });
|
||||
// Process any messages that were queued before the thread was ready.
|
||||
for (let msg of messageQueue) {
|
||||
handleMessage(msg);
|
||||
}
|
||||
// Restore the real message handler.
|
||||
self.onmessage = handleMessage;
|
||||
};
|
||||
|
||||
// Module and memory were sent from main thread
|
||||
Module['wasmModule'] = e.data.wasmModule;
|
||||
|
||||
// Use `const` here to ensure that the variable is scoped only to
|
||||
// that iteration, allowing safe reference from a closure.
|
||||
for (const handler of e.data.handlers) {
|
||||
Module[handler] = (...args) => {
|
||||
postMessage({ cmd: 'callHandler', handler, args: args });
|
||||
}
|
||||
}
|
||||
|
||||
Module['wasmMemory'] = e.data.wasmMemory;
|
||||
|
||||
Module['buffer'] = Module['wasmMemory'].buffer;
|
||||
|
||||
Module['workerID'] = e.data.workerID;
|
||||
|
||||
Module['ENVIRONMENT_IS_PTHREAD'] = true;
|
||||
|
||||
if (typeof e.data.urlOrBlob == 'string') {
|
||||
importScripts(e.data.urlOrBlob);
|
||||
} else {
|
||||
var objectUrl = URL.createObjectURL(e.data.urlOrBlob);
|
||||
importScripts(objectUrl);
|
||||
URL.revokeObjectURL(objectUrl);
|
||||
}
|
||||
loadBR(Module);
|
||||
} else if (e.data.cmd === 'run') {
|
||||
// Pass the thread address to wasm to store it for fast access.
|
||||
Module['__emscripten_thread_init'](e.data.pthread_ptr, /*is_main=*/0, /*is_runtime=*/0, /*can_block=*/1);
|
||||
|
||||
// Await mailbox notifications with `Atomics.waitAsync` so we can start
|
||||
// using the fast `Atomics.notify` notification path.
|
||||
Module['__emscripten_thread_mailbox_await'](e.data.pthread_ptr);
|
||||
|
||||
assert(e.data.pthread_ptr);
|
||||
// Also call inside JS module to set up the stack frame for this pthread in JS module scope
|
||||
Module['establishStackSpace']();
|
||||
Module['PThread'].receiveObjectTransfer(e.data);
|
||||
Module['PThread'].threadInitTLS();
|
||||
|
||||
if (!initializedJS) {
|
||||
// Embind must initialize itself on all threads, as it generates support JS.
|
||||
// We only do this once per worker since they get reused
|
||||
Module['__embind_initialize_bindings']();
|
||||
initializedJS = true;
|
||||
}
|
||||
|
||||
try {
|
||||
Module['invokeEntryPoint'](e.data.start_routine, e.data.arg);
|
||||
} catch(ex) {
|
||||
if (ex != 'unwind') {
|
||||
// The pthread "crashed". Do not call `_emscripten_thread_exit` (which
|
||||
// would make this thread joinable). Instead, re-throw the exception
|
||||
// and let the top level handler propagate it back to the main thread.
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
} else if (e.data.cmd === 'cancel') { // Main thread is asking for a pthread_cancel() on this thread.
|
||||
if (Module['_pthread_self']()) {
|
||||
Module['__emscripten_thread_exit'](-1);
|
||||
}
|
||||
} else if (e.data.target === 'setimmediate') {
|
||||
// no-op
|
||||
} else if (e.data.cmd === 'checkMailbox') {
|
||||
if (initializedJS) {
|
||||
Module['checkMailbox']();
|
||||
}
|
||||
} else if (e.data.cmd) {
|
||||
// The received message looks like something that should be handled by this message
|
||||
// handler, (since there is a e.data.cmd field present), but is not one of the
|
||||
// recognized commands:
|
||||
err(`worker.js received unknown command ${e.data.cmd}`);
|
||||
err(e.data);
|
||||
}
|
||||
} catch(ex) {
|
||||
err(`worker.js onmessage() captured an uncaught exception: ${ex}`);
|
||||
if (ex?.stack) err(ex.stack);
|
||||
Module['__emscripten_thread_crashed']?.();
|
||||
throw ex;
|
||||
}
|
||||
};
|
||||
|
||||
self.onmessage = handleMessage;
|
||||
"use strict";var Module={};var initializedJS=false;function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var module=Module["wasmModule"];Module["wasmModule"]=null;var instance=new WebAssembly.Instance(module,info);return receiveInstance(instance)};self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{if(e.data.cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}loadBR(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["__emscripten_thread_mailbox_await"](e.data.pthread_ptr);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="checkMailbox"){if(initializedJS){Module["checkMailbox"]()}}else if(e.data.cmd){err(`worker.js received unknown command ${e.data.cmd}`);err(e.data)}}catch(ex){Module["__emscripten_thread_crashed"]?.();throw ex}}self.onmessage=handleMessage;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
| Function signature (global) | Description |
|
||||
|---|---|
|
||||
| ```Promise<Model> makeModel(path: string, url: string, id: string)```<br><br>```Promise<SpkModel> makeSpkModel(path: string, url: string, id: string)``` | Make a ```Model``` or ```SpkModel```<br>- If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. |
|
||||
| ```Promise<Recognizer> makeRecognizer(model: Model)``` | Make a ```Recognizer```, it will use a separate thread for recognition
|
||||
| ```Promise<Recognizer> makeRecognizer(model: Model, sampleRate: float)``` | Make a ```Recognizer```, it will use a separate thread for recognition
|
||||
| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: -1) <br>- 2: Error<br>- 1: Warning<br>- 0: Info <br>- 1: Verbose<br>- 2: More verbose<br>- 3: Debug |
|
||||
| ```deleteAll()``` | Call ```delete()``` on all objects, it is recommended to run this at the API usage end to automatically clean up everything. See [why](https://emscripten.org/docs/getting_started/FAQ.html#what-does-exiting-the-runtime-mean-why-don-t-atexit-s-run).|
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
## ```Recognizer``` object
|
||||
| Function signature | Description |
|
||||
|---|---|
|
||||
| ```Promise<AudioWorkletNode> getNode(ctx: AudioContext)``` | Get a pass-through node that recognize audio and is connectable to a processing graph |
|
||||
| ```recognize(buf: AudioBuffer)``` | Recognize an AudioBuffer, usually from something like ```BaseAudioContext.decodeAudioData()```
|
||||
| ```Promise<AudioWorkletNode> getNode(ctx: AudioContext, channelIndex = 0: int)``` | Get a pass-through node that recognize audio and is connectable to a processing graph. It has 1 input and 1 output, **channelIndex** must point to a 16-bit mono channel of the input |
|
||||
| ```recognize(buf: AudioBuffer, channelIndex = 0: int)``` | Recognize an AudioBuffer, usually from something like ```BaseAudioContext.decodeAudioData()```, **channelIndex** must point to a 16-bit mono channel of **buf**
|
||||
| ```setPartialWords(partialWords: bool)``` | Return words' information in a partialResult event (default: false) |
|
||||
| ```setWords(words: bool)``` | Return words' information in a result event (default: false) |
|
||||
| ```setNLSML(nlsml: bool)``` | Return result and partialResult in NLSML form (default: false) |
|
||||
|
||||
@@ -9,7 +9,7 @@ sudo apt install shtool libtool autogen autotools-dev pkg-config make &&
|
||||
|
||||
MAX_MEMORY=${MAX_MEMORY:-300mb} &&
|
||||
MAX_THREADS=${MAX_THREADS:-2} &&
|
||||
EMSDK=${EMSDK:-$(realpath .)} &&
|
||||
EMSDK=${EMSDK:-$(realpath emsdk)} &&
|
||||
COMPILE_JOBS=${COMPILE_JOBS:-$(nproc)} &&
|
||||
|
||||
SRC=$(realpath src) &&
|
||||
@@ -25,9 +25,11 @@ if [ ! -d $EMSDK_PATH ]; then
|
||||
exit 1
|
||||
fi
|
||||
if [ $MAX_THREAD -lt 2 ]; then
|
||||
echo "MAX_THREAD be greater or equal to 2" &&
|
||||
echo "MAX_THREADS must be greater than or equal to 2" &&
|
||||
exit 1
|
||||
fi
|
||||
if [ $COMPILE_JOBS -lt 1 ]; then
|
||||
echo "COMPILE_JOBS must be greater than or equal to 1" &&
|
||||
if ! [[ $MAX_MEMORY =~ ^[0-9]+([kmgt]b)?$ ]]; then
|
||||
echo "MAX_MEMORY valid suffixes are kb, mb, gb, tb, none (bytes)" &&
|
||||
exit 1
|
||||
@@ -86,4 +88,4 @@ em++ -pthread -O3 -flto -Wno-deprecated -I. -I$KALDI/src -I$OPENFST/include $VOS
|
||||
emar -rcs vosk.a ${VOSK_FILES//.cc/.o} &&
|
||||
|
||||
cd $SRC &&
|
||||
em++ -O3 genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sEMBIND_STD_STRING_IS_UTF8 -sSUPPORT_LONGJMP=0 -sMODULARIZE -sEXPORT_NAME=loadBR -sENVIRONMENT=web,worker -sINITIAL_MEMORY=32pf -sASYNCIFY -sPTHREAD_POOL_SIZE=$MAX_THREAD -sPTHREAD_POOL_SIZE_STRICT -sPTHREAD_POOL_DELAY_LOAD -sASYNCIFY_ONLY=['emscripten_wget'] -sALLOW_BLOCKING_ON_MAIN_THREAD=0 -sPOLYFILL=0 --pre-js pre1.js --pre-js pre2.js --pre-js pre3.js -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -o BrowserRecognizer.js
|
||||
em++ -O3 genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sEMBIND_STD_STRING_IS_UTF8 -sSUPPORT_LONGJMP=0 -sMODULARIZE -sEXPORT_NAME=loadBR -sENVIRONMENT=web,worker -sINITIAL_MEMORY=32pf -sASYNCIFY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sPTHREAD_POOL_SIZE_STRICT -sPTHREAD_POOL_DELAY_LOAD -sASYNCIFY_ONLY=['emscripten_wget'] -sALLOW_BLOCKING_ON_MAIN_THREAD=0 -sPOLYFILL=0 --pre-js pre.js -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto -o BrowserRecognizer.js
|
||||
|
||||
28
index.html
Normal file
28
index.html
Normal file
@@ -0,0 +1,28 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<script src="BrowserRecognizer.js"></script>
|
||||
</head>
|
||||
<script>
|
||||
const BrRec = await loadBR()
|
||||
const model = await BrRec.makeModel("test/model.tzst")
|
||||
const recognizer = await BrRec.makeRecognizer(model)
|
||||
recognizer.addEventListener("result", e => {
|
||||
console.log("Result: ",e.details)
|
||||
})
|
||||
recognizer.addEventListener("partialResult", e => {
|
||||
console.log("Partial result: ",e.details)
|
||||
})
|
||||
let ctx = new AudioContext()
|
||||
media = await navigator.mediaDevices.getUserMedia({
|
||||
video: false,
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
channelCount: 1,
|
||||
sampleRate: 16000
|
||||
},
|
||||
})
|
||||
|
||||
</script>
|
||||
</html>
|
||||
@@ -3,22 +3,6 @@
|
||||
#include "recognizer.h"
|
||||
#include <emscripten/bind.h>
|
||||
using namespace emscripten;
|
||||
void throwJS(const char* msg, bool err = false) {
|
||||
EM_ASM({
|
||||
if($1) {
|
||||
throw Error(UTF8ToString)
|
||||
return
|
||||
}
|
||||
throw UTF8ToString($0)
|
||||
},msg, err);
|
||||
}
|
||||
int main() {
|
||||
//vosk_set_log_level(-1);
|
||||
std::thread t{[](){
|
||||
wasmfs_create_directory("/opfs",0777,wasmfs_create_opfs_backend());
|
||||
}};
|
||||
t.detach();
|
||||
}
|
||||
EMSCRIPTEN_BINDINGS() {
|
||||
function("setLogLevel", &vosk_set_log_level, allow_raw_pointers());
|
||||
class_<model>("Model")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include "genericModel.h"
|
||||
|
||||
fetchData::fetchData(const std::string& storepath, bool* successful, std::atomic_flag* blocker, genericModel* self) : storepath(storepath), successful(successful), blocker(blocker), self(self) {};
|
||||
genericModel::genericModel(const std::string &url, const std::string& storepath, const std::string &id) : url(url), id(id) {
|
||||
fs::current_path("/opfs");
|
||||
fs::create_directories(storepath);
|
||||
@@ -7,9 +7,7 @@ genericModel::genericModel(const std::string &url, const std::string& storepath,
|
||||
}
|
||||
bool genericModel::checkId(const std::string& id) {
|
||||
std::ifstream file {"id", std::ifstream::binary};
|
||||
if(!file.is_open()) {
|
||||
return false;
|
||||
};
|
||||
if(!file.is_open()) return false;
|
||||
long long size {file.seekg(0, std::ios::end).tellg()};
|
||||
std::string oldid(size, ' ');
|
||||
file.seekg(0);
|
||||
@@ -17,41 +15,47 @@ bool genericModel::checkId(const std::string& id) {
|
||||
return id.compare(oldid) == 0 ? true : false;
|
||||
}
|
||||
bool genericModel::loadModel(const std::string& storepath) {
|
||||
if(!checkModel() || !checkId(id)) {
|
||||
char filename[] {"/opfs/XXXXXX.tzst"};
|
||||
close(mkostemps(filename, 5, O_PATH));
|
||||
if(emscripten_wget(url.c_str(),filename) == 1) {
|
||||
throwJS("Unable to fetch model");
|
||||
return false;
|
||||
}
|
||||
if(!extractModel(filename)) {
|
||||
if(checkModel() && checkId(id)) return true;
|
||||
std::atomic_flag blocker{};
|
||||
bool successful{};
|
||||
fetchData data{storepath, &successful, &blocker, this};
|
||||
emscripten_async_wget2(url.c_str(), "A_fIlEnAmE_tHaT_dOeS_nOt_CoNfLiCt.tzst", "GET", nullptr, (void*)&data, [](unsigned handle, void* arg, const char* fname){
|
||||
fetchData* data = (fetchData*)arg;
|
||||
if(!extractModel()) {
|
||||
throwJS("Unable to extract model");
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
fs::remove(filename);
|
||||
if(!checkModel()) {
|
||||
fs::remove(fname);
|
||||
if(!data->self->checkModel()) {
|
||||
throwJS("Model URL contains invalid model files");
|
||||
fs::current_path("/opfs");
|
||||
fs::remove_all(storepath);
|
||||
return false;
|
||||
fs::remove_all(data->storepath);
|
||||
return;
|
||||
}
|
||||
std::ofstream idFile("id");
|
||||
if(!idFile.is_open()) {
|
||||
throwJS("Unable to write new id");
|
||||
fs::remove_all(storepath);
|
||||
return false;
|
||||
fs::current_path("/opfs");
|
||||
fs::remove_all(data->storepath);
|
||||
return;
|
||||
}
|
||||
idFile << id;
|
||||
}
|
||||
return true;
|
||||
idFile << data->self->id;
|
||||
*data->successful = true;
|
||||
data->blocker->notify_one();
|
||||
}, [](unsigned handle, void* arg, int status) {
|
||||
throwJS("Unable to fetch model");
|
||||
((fetchData*)arg)->blocker->notify_one();
|
||||
}, nullptr);
|
||||
blocker.wait(false, std::memory_order_relaxed);
|
||||
return successful;
|
||||
}
|
||||
bool genericModel::extractModel(char* name) {
|
||||
bool genericModel::extractModel() {
|
||||
std::string path{};
|
||||
archive* src {archive_read_new()};
|
||||
archive_entry* entry {};
|
||||
archive_read_support_filter_all(src);
|
||||
archive_read_support_format_all(src);
|
||||
archive_read_open_filename(src, name,10240);
|
||||
archive_read_open_filename(src, "A_fIlEnAmE_tHaT_dOeS_nOt_CoNfLiCt.tzst",10240);
|
||||
if(archive_errno(src) != 0) return false;
|
||||
while (archive_read_next_header(src, &entry) == ARCHIVE_OK) {
|
||||
path = archive_entry_pathname(entry);
|
||||
|
||||
@@ -1,24 +1,30 @@
|
||||
#pragma once
|
||||
#include "global.h"
|
||||
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <vosk_api.h>
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
#include <emscripten/wasmfs.h>
|
||||
#include <emscripten.h>
|
||||
extern void throwJS(const char* msg, bool err = false);
|
||||
extern void throwJS(const char* msg, bool err);
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
struct genericModel {
|
||||
const std::string url{};
|
||||
const std::string id{};
|
||||
static bool extractModel(char *name);
|
||||
static bool extractModel();
|
||||
static bool checkId(const std::string& id);
|
||||
virtual bool checkModel() = 0;
|
||||
bool loadModel(const std::string& storepath);
|
||||
genericModel(const std::string &url, const std::string &storepath, const std::string &id);
|
||||
};
|
||||
struct fetchData {
|
||||
const std::string storepath{};
|
||||
std::atomic_flag* blocker{};
|
||||
bool* successful{};
|
||||
genericModel* self{};
|
||||
fetchData(const std::string& storepath, bool* successful, std::atomic_flag* blocker, genericModel* self);
|
||||
};
|
||||
|
||||
17
src/global.cc
Normal file
17
src/global.cc
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "global.h"
|
||||
void throwJS(const char* msg, bool err) {
|
||||
EM_ASM({
|
||||
if($1) {
|
||||
throw Error(UTF8ToString($0));
|
||||
return;
|
||||
}
|
||||
throw UTF8ToString($0);
|
||||
},msg, err);
|
||||
}
|
||||
int main() {
|
||||
//vosk_set_log_level(-1);
|
||||
std::thread t{[](){
|
||||
wasmfs_create_directory("/opfs",0777,wasmfs_create_opfs_backend());
|
||||
}};
|
||||
t.detach();
|
||||
}
|
||||
9
src/global.h
Normal file
9
src/global.h
Normal file
@@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
|
||||
#include <emscripten.h>
|
||||
#include <emscripten/wasmfs.h>
|
||||
|
||||
void throwJS(const char* msg, bool err = false);
|
||||
int main();
|
||||
@@ -1,29 +1,34 @@
|
||||
// @externs
|
||||
let objs = []
|
||||
class Recognizer extends EventTarget {
|
||||
constructor(rec) {
|
||||
super()
|
||||
this.obj = rec
|
||||
objs.push(this)
|
||||
this.ptr = Module._malloc(512)
|
||||
this.arr = Module.HEAPF32.subarray(this.ptr, this.ptr+512)
|
||||
}
|
||||
getNode(ctx) {
|
||||
let channel = new MessageChannel()
|
||||
this.node = new AudioWorkletNode(ctx, 'BRProcessor', { channelCount: 1, numberOfInputs: 1, numberOfOutputs: 1 })
|
||||
node.port.postMessage({cmd : "init", ptr: this.ptr},[channel.port1])
|
||||
channel.port1.onmessage = (ev) => {
|
||||
this.obj.acceptWaveForm(this.ptr, 512)
|
||||
}
|
||||
return this.node
|
||||
async getNode(ctx, channelIndex = 0) {
|
||||
if(typeof this.node === "undefined") {
|
||||
let msgChannel = new MessageChannel()
|
||||
ctx.AudioWorklet.addModule("src/processor.js")
|
||||
this.node = new AudioWorkletNode(ctx, 'BRProcessor', { channelCountMode: "max", numberOfInputs: 1, numberOfOutputs: 1 })
|
||||
this.node.port.postMessage({cmd : "init", ptr: this.ptr, channel: channelIndex}, [msgChannel.port1])
|
||||
msgChannel.port1.onmessage = (ev) => {
|
||||
this.obj.acceptWaveForm()
|
||||
}
|
||||
return this.node
|
||||
}
|
||||
}
|
||||
recognize(buf) {
|
||||
buf.copyFromChannel()
|
||||
this.obj.acceptWaveForm(this.ptr, 512)
|
||||
recognize(buf, channelIndex = 0) {
|
||||
buf.copyFromChannel(this.arr, channelIndex)
|
||||
this.obj.acceptWaveForm()
|
||||
}
|
||||
delete() {
|
||||
this.obj.delete()
|
||||
if(typeof this.node !== "undefined") {
|
||||
this.node.port.postMessage({cmd : "deinit"})
|
||||
}
|
||||
Module.free(this.ptr)
|
||||
}
|
||||
setWords(words) {
|
||||
this.obj.setWords(words)
|
||||
@@ -46,7 +51,6 @@ class Recognizer extends EventTarget {
|
||||
}
|
||||
Module.deleteAll = () => {
|
||||
objs.forEach(obj => obj.delete())
|
||||
ctx.close()
|
||||
}
|
||||
Module.makeModel = async (url, path, id) => {
|
||||
let mdl
|
||||
@@ -71,11 +75,11 @@ Module.makeSpkModel = async (url, path, id) => {
|
||||
}
|
||||
objs.push(mdl)
|
||||
return mdl
|
||||
}, ctx.AudioWorklet
|
||||
Module.makeRecognizer = async (model) => {
|
||||
}
|
||||
Module.makeRecognizer = async (model, sampleRate) => {
|
||||
let rec
|
||||
try {
|
||||
rec = new Module.recognizer(model, ctx.sampleRate, objs.length)
|
||||
rec = new Module.recognizer(model, sampleRate, objs.length)
|
||||
}
|
||||
catch(e) {
|
||||
rec.delete()
|
||||
27
src/pre2.js
27
src/pre2.js
@@ -1,27 +0,0 @@
|
||||
// A copy and pass processor
|
||||
registerProcessor("BRProcessor", class extends AudioWorkletProcessor {
|
||||
constructor(options) {
|
||||
super(options)
|
||||
this.ret = true
|
||||
this.port.onmessage = (ev) => {
|
||||
switch(ev.cmd) {
|
||||
case "init":
|
||||
this.recognizerPort = ev.ports[0]
|
||||
this.wasmMem = new Float32Array(WebAssembly.Memory.buffer).subarray(ev.ptr, ev.ptr+512)
|
||||
this.channel = ev.channel
|
||||
this.input = ev.input
|
||||
break
|
||||
case "deinit":
|
||||
this.ret = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
process(inputs, outputs, params) {
|
||||
if(!this.ret) return false;
|
||||
inputs[this.input].copyFromChannel(this.wasmMem, this.channel)
|
||||
outputs = inputs
|
||||
this.recognizerPort.postMessage(".") // A
|
||||
return true
|
||||
}
|
||||
})
|
||||
28
src/processor.js
Normal file
28
src/processor.js
Normal file
@@ -0,0 +1,28 @@
|
||||
// A copy and pass processor, check if already registered
|
||||
if(typeof BRProcessor === "undefined") {
|
||||
var BRProcessor = class extends AudioWorkletProcessor {
|
||||
constructor(options) {
|
||||
super(options)
|
||||
this.done = false
|
||||
this.port.onmessage = (ev) => {
|
||||
switch(ev.cmd) {
|
||||
case "init":
|
||||
this.recognizerPort = ev.ports[0]
|
||||
this.wasmMem = new Float32Array(WebAssembly.Memory.buffer).subarray(ev.ptr, ev.ptr+512)
|
||||
break
|
||||
case "deinit":
|
||||
this.done = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
process(inputs, outputs, params) {
|
||||
if(this.done) return false;
|
||||
inputs[0].copyFromChannel(this.wasmMem, this.channel)
|
||||
this.recognizerPort.postMessage(".") // Basically an empty message
|
||||
outputs = inputs
|
||||
return true
|
||||
}
|
||||
}
|
||||
registerProcessor("BRProcessor", BRProcessor)
|
||||
}
|
||||
@@ -6,31 +6,35 @@ recognizer::recognizer(model* mdl, float sampleRate, int index) : index(index) {
|
||||
return;
|
||||
}
|
||||
controller.lock();
|
||||
std::thread t{[this](){
|
||||
std::thread t{[this](const pthread_t& caller){
|
||||
while(!done.test()) {
|
||||
controller.lock();
|
||||
if(!done.test()) {
|
||||
switch(vosk_recognizer_accept_waveform_f(rec, dataPtr, 512)) {
|
||||
case 0:
|
||||
fireEv("result", vosk_recognizer_result(rec));
|
||||
fireEv("result", vosk_recognizer_result(rec), caller);
|
||||
break;
|
||||
case 1:
|
||||
fireEv("partialResult", vosk_recognizer_partial_result(rec));
|
||||
fireEv("partialResult", vosk_recognizer_partial_result(rec), caller);
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
},pthread_self()};
|
||||
t.detach();
|
||||
}
|
||||
recognizer::~recognizer() {
|
||||
done.test_and_set(std::memory_order_relaxed);
|
||||
controller.unlock();
|
||||
vosk_recognizer_free(rec);
|
||||
free(dataPtr);
|
||||
}
|
||||
void recognizer::fireEv(const char *type, const char *content) {
|
||||
EM_ASM({
|
||||
recognizers[$0].dispatchEvent(new CustomEvent(UTF8ToString($1), {"details" : UTF8ToString($2)}));
|
||||
},this->index, type, content);
|
||||
void recognizer::fireEv(const char *type, const char *content, const pthread_t& caller) {
|
||||
static ProxyingQueue pq{};
|
||||
pq.proxyAsync(caller, [&](){
|
||||
EM_ASM({
|
||||
objs[$0].dispatchEvent(new CustomEvent(UTF8ToString($1), {"details" : UTF8ToString($2)}));
|
||||
},index, type, content);
|
||||
});
|
||||
}
|
||||
void recognizer::acceptWaveForm() {
|
||||
controller.unlock();
|
||||
|
||||
@@ -1,20 +1,17 @@
|
||||
#pragma once
|
||||
#include "model.h"
|
||||
#include "spkModel.h"
|
||||
#include "global.h"
|
||||
|
||||
#include <filesystem>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
#include <emscripten/wasmfs.h>
|
||||
#include <emscripten/webaudio.h>
|
||||
#include <AL/al.h>
|
||||
#include <AL/alc.h>
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
extern void throwJS(const char* msg, bool err = false);
|
||||
#include <emscripten/proxying.h>
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
using namespace emscripten;
|
||||
struct recognizer {
|
||||
std::atomic_flag done{};
|
||||
std::mutex controller{};
|
||||
@@ -24,7 +21,7 @@ struct recognizer {
|
||||
recognizer(model* model, float sampleRate, int index);
|
||||
~recognizer();
|
||||
void acceptWaveForm();
|
||||
void fireEv(const char* type, const char* content);
|
||||
void fireEv(const char* type, const char* content, const pthread_t& caller);
|
||||
void setSpkModel(spkModel* model);
|
||||
void setGrm(const std::string& grm);
|
||||
void setWords(bool words);
|
||||
|
||||
19
test/BrowserRecognizer.js
Normal file
19
test/BrowserRecognizer.js
Normal file
File diff suppressed because one or more lines are too long
1
test/BrowserRecognizer.worker.js
Normal file
1
test/BrowserRecognizer.worker.js
Normal file
@@ -0,0 +1 @@
|
||||
"use strict";var Module={};var initializedJS=false;function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var module=Module["wasmModule"];Module["wasmModule"]=null;var instance=new WebAssembly.Instance(module,info);return receiveInstance(instance)};self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{if(e.data.cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}loadBR(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["__emscripten_thread_mailbox_await"](e.data.pthread_ptr);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="checkMailbox"){if(initializedJS){Module["checkMailbox"]()}}else if(e.data.cmd){err(`worker.js received unknown command ${e.data.cmd}`);err(e.data)}}catch(ex){Module["__emscripten_thread_crashed"]?.();throw ex}}self.onmessage=handleMessage;
|
||||
BIN
test/en-model.tzst
Normal file
BIN
test/en-model.tzst
Normal file
Binary file not shown.
12
test/index.html
Normal file
12
test/index.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<script src="BrowserRecognizer.js" async defer>
|
||||
</script>
|
||||
<!--
|
||||
<script src="src/genericObj.js"></script>
|
||||
<script src="src/model.js"></script>
|
||||
<script src="src/spkModel.js"></script>
|
||||
<script src="src/recognizer.js"></script>-->
|
||||
</head>
|
||||
</html>
|
||||
38
test/test.sh
Executable file
38
test/test.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
cd .. &&
|
||||
SRC=$(realpath src) &&
|
||||
KALDI=$(realpath kaldi) &&
|
||||
VOSK=$(realpath vosk-api) &&
|
||||
OPENFST=$KALDI/tools/openfst &&
|
||||
LIBARCHIVE=$(realpath libarchive) &&
|
||||
ZSTD=$(realpath zstd) &&
|
||||
CLAPACK_WASM=$(realpath clapack-wasm) &&
|
||||
|
||||
MAX_MEMORY=${MAX_MEMORY:-300mb} &&
|
||||
MAX_THREADS=${MAX_THREADS:-2} &&
|
||||
EMSDK=${EMSDK:-$(realpath ../emsdk)} &&
|
||||
|
||||
if [ ! -d $EMSDK_PATH ]; then
|
||||
echo "Invalid EMSDK path"
|
||||
exit 1
|
||||
fi
|
||||
if [ $MAX_THREAD -lt 2 ]; then
|
||||
echo "MAX_THREAD must be greater or equal to 2" &&
|
||||
exit 1
|
||||
fi
|
||||
if ! [[ $MAX_MEMORY =~ ^[0-9]+([kmgt]b)?$ ]]; then
|
||||
echo "MAX_MEMORY valid suffixes are kb, mb, gb, tb, none (bytes)" &&
|
||||
exit 1
|
||||
fi
|
||||
if [ $(realpath $EMSDK) == $(realpath emsdk) ]; then
|
||||
echo "EMSDK is current directory, installing emsdk and Emscripten..." &&
|
||||
git clone --depth=1 https://github.com/emscripten-core/emsdk.git &&
|
||||
cd emsdk &&
|
||||
./emsdk install 3.1.51 &&
|
||||
./emsdk activate 3.1.51
|
||||
fi
|
||||
|
||||
. $EMSDK/emsdk_env.sh &&
|
||||
export PATH=:$PATH:$EMSDK/upstream/bin &&
|
||||
|
||||
cd $SRC &&
|
||||
em++ -O3 global.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sEMBIND_STD_STRING_IS_UTF8 -sSUPPORT_LONGJMP=0 -sMODULARIZE -sEXPORT_NAME=loadBR -sENVIRONMENT=web,worker -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sPTHREAD_POOL_SIZE_STRICT -sPTHREAD_POOL_DELAY_LOAD -sALLOW_BLOCKING_ON_MAIN_THREAD=1 -sPOLYFILL=0 --pre-js pre.js -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$ZSTD/lib -lzstd -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -o ../test/BrowserRecognizer.js
|
||||
Reference in New Issue
Block a user