change in documentation

This commit is contained in:
msqr1
2024-02-20 19:57:18 -08:00
parent 7d6b0bb668
commit 0f9cf2916e
10 changed files with 234 additions and 70 deletions

View File

@@ -4,7 +4,7 @@
# # # # # # ## # # # # # # # # # # # #
# #### # # ### #### # #### ##### #### # #### # # ####
# 45 min build time
# 20 min build time
SHELL=/bin/bash
MAX_MEMORY?=300mb
MAX_THREADS?=2
@@ -20,14 +20,14 @@ CLAPACK_WASM:=$(realpath clapack-wasm)
BrowserRecognizer.js: | vosk libarchive
cd $(SRC) && \
em++ -O3 global.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sMODULARIZE -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$(MAX_MEMORY) -sPTHREAD_POOL_SIZE=$(MAX_THREADS) -sPOLYFILL=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadBR -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$(LIBARCHIVE)/include -I$(VOSK)/src -L$(LIBARCHIVE)/lib -larchive -L$(KALDI)/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$(OPENFST)/lib -l:libfst.a -l:libfstngram.a -L$(CLAPACK_WASM) -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$(VOSK)/src -l:vosk.a -lopfs.js -lembind -pthread -flto -msimd128 --pre-js pre.js -o ../BrowserRecognizer.js && \
em++ -O3 global.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS \ -sWASM_BIGINT -sSINGLE_FILE -sMODULARIZE -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$(MAX_MEMORY) -sPTHREAD_POOL_SIZE=$(MAX_THREADS) -sPOLYFILL=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadBR -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$(LIBARCHIVE)/include -I$(VOSK)/src -L$(LIBARCHIVE)/lib -larchive -L$(KALDI)/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$(OPENFST)/lib -l:libfst.a -l:libfstngram.a -L$(CLAPACK_WASM) -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$(VOSK)/src -l:vosk.a -lopfs.js -lembind -pthread -flto -msimd128 --pre-js pre.js -o ../BrowserRecognizer.js && \
cd .. && \
rm -f BrowserRecognizer.worker.js && \
sed -i 's/locateFile("BrowserRecognizer.worker.js")/pthreadUrl/g' BrowserRecognizer.js && \
prepare:
sudo apt install shtool libtool autogen autotools-dev pkg-config make && \
[ $(EMSDK) != ../emsdk ! -d $(EMSDK) ] && \
[ $(EMSDK) != ../emsdk ! -d $(EMSDK) ] && \
echo "Invalid emsdk path"; \
exit 1; \
[ $(MAX_THREADS) -lt 2 ] && \
@@ -39,21 +39,22 @@ prepare:
[[ ! $(MAX_MEMORY) =~ "$(^[0-9]+([kmgt]b)?$$)" ]] && \
echo "MAX_MEMORY valid suffixes are kb, mb, gb, tb, none (bytes)"; \
exit 1; \
[ $(EMSDK) = emsdk ] && \
[ $(EMSDK) = ../emsdk ] && \
echo "Installing emsdk + Emscripten..."; \
git clone --depth=1 https://github.com/emscripten-core/emsdk.git && \
cd emsdk && \
git clone --depth=1 https://github.com/emscripten-core/emsdk.git ../emsdk && \
cd ../emsdk &&
./emsdk install 3.1.54 && \
./emsdk activate 3.1.54; \
. $(EMSDK)/emsdk_env.sh && \
export PATH=:$$PATH:$(EMSDK)/upstream/bin
. ./emsdk_env.sh && \
export PATH=:$$PATH:$(realpath $(EMSDK))/upstream/bin && \
cd ../src
libarchive: prepare
rm -rf /tmp/libarchive && \
git clone -b v3.7.2 --depth=1 https://github.com/libarchive/libarchive /tmp/libarchive && \
cd /tmp/libarchive && \
build/autogen.sh && \
CPPFLAGS="-O3 -flto" LDFLAGS="-O3 -flto" emconfigure ./configure --prefix=$(LIBARCHIVE) --without-lz4 --without-lzma --without-zlib --without-bz2lib --without-xml2 --without-expat --without-cng --without-openssl --without-libb2 --without-zstd --disable-bsdunzip --disable-xattr --disable-acl --disable-bsdcpio --disable-bsdcat --disable-rpath --disable-maintainer-mode --disable-dependency-tracking --enable-static --disable-shared && \
CPPFLAGS="-O3 -flto -msimd128" LDFLAGS="-O3 -flto" emconfigure ./configure --prefix=$(LIBARCHIVE) --without-lz4 --without-lzma --without-zlib --without-bz2lib --without-xml2 --without-expat --without-cng --without-openssl --without-libb2 --without-zstd --disable-bsdunzip --disable-xattr --disable-acl --disable-bsdcpio --disable-bsdcat --disable-rpath --disable-maintainer-mode --disable-dependency-tracking --enable-static --disable-shared && \
emmake make -j$(COMPILE_JOBS) install && \
rm -rf /tmp/libarchive
@@ -61,14 +62,14 @@ clapack-wasm: prepare
git clone --depth=1 https://gitlab.inria.fr/multispeech/kaldi.web/clapack-wasm.git $(CLAPACK_WASM) && \
cd $(CLAPACK_WASM) && \
git apply $(SRC)/clapack-wasm.patch &&
bash ./install_repo.sh emcc
bash install_repo.sh emcc
openfst: prepare
rm -rf /tmp/openfst && \
git clone --depth=1 https://github.com/alphacep/openfst /tmp/openfst && \
cd /tmp/openfst && \
autoreconf -i && \
CXXFLAGS="-pthread -r -O3 -flto" LDFLAGS="-O3 -pthread -flto" emconfigure ./configure --prefix=$(OPENFST) --enable-static --disable-shared --enable-ngram-fsts --enable-lookahead-fsts --disable-bin --with-pic && \
CXXFLAGS="-pthread -r -O3 -flto -msimd128" LDFLAGS="-O3 -pthread -flto" emconfigure ./configure --prefix=$(OPENFST) --enable-static --disable-shared --enable-ngram-fsts --enable-lookahead-fsts --disable-bin --with-pic && \
emmake make -j$(COMPILE_JOBS) install && \
echo "PACKAGE_VERSION = 1.8.0" >> $(OPENFST)/Makefile && \
rm -rf /tmp/openfst
@@ -77,7 +78,7 @@ kaldi: | openfst clapack-wasm
git clone -b vosk --depth=1 https://github.com/alphacep/kaldi $(KALDI) && \
cd $(KALDI)/src && \
git apply $(SRC)/kaldi.patch && \
CXXFLAGS="-O3 -msimd128 -UHAVE_EXECINFO_H -pthread -flto" LDFLAGS="-O3 -sERROR_ON_UNDEFINED_SYMBOLS=0 -lembind -pthread -flto" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --debug-level=0 --double-precision=yes --fst-root=$(OPENFST) --clapack-root=$(CLAPACK_WASM) --host=WASM && \
CXXFLAGS="-O3 -UHAVE_EXECINFO_H -pthread -flto -msimd128" LDFLAGS="-O3 -sERROR_ON_UNDEFINED_SYMBOLS=0 -lembind -pthread -flto" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --debug-level=0 --double-precision=yes --fst-root=$(OPENFST) --clapack-root=$(CLAPACK_WASM) --host=WASM && \
emmake make -j$(COMPILE_JOBS) online2 lm rnnlm
vosk: | kaldi
@@ -85,7 +86,7 @@ vosk: | kaldi
cd $(VOSK)/src && \
git apply $(SRC)/vosk.patch && \
VOSK_FILES="recognizer.cc language_model.cc model.cc spk_model.cc vosk_api.cc" && \
em++ -pthread -O3 -flto -Wno-deprecated -I. -I$(KALDI)/src -I$(OPENFST)/include $(VOSK_FILES) -c && \
em++ -pthread -O3 -flto -msimd128 -Wno-deprecated -I. -I$(KALDI)/src -I$(OPENFST)/include $(VOSK_FILES) -c && \
emar -rcs vosk.a $(VOSK_FILES:.cc=.o)
.PHONY: prepare

View File

@@ -7,7 +7,7 @@ index bf7eda6..5fedaf6 100644
#-----------------------------------------------------------------------------
-CFLAGS = -DADD_ -O3
+CFLAGS = -DADD_ -O3 -flto
+CFLAGS = -DADD_ -O3 -flto -msimd128
#-----------------------------------------------------------------------------
# Archive programs and flags
@@ -20,7 +20,7 @@ index 80037d0..a964b2d 100644
# if no wrapping of the blas library is needed, uncomment next line
CC = emcc # -DNO_BLAS_WRAP
-CFLAGS = -I$(TOPDIR)/INCLUDE -I$(TOPDIR)/../libf2c -O3
+CFLAGS = -I$(TOPDIR)/INCLUDE -I$(TOPDIR)/../libf2c -O3 -flto
+CFLAGS = -I$(TOPDIR)/INCLUDE -I$(TOPDIR)/../libf2c -O3 -flto -msimd128
LOADER = $(CC)
LOADOPTS =
NOOPT = -O0 -I$(TOPDIR)/INCLUDE -I$(TOPDIR)/../libf2c
@@ -33,7 +33,7 @@ index e071614..4647c2b 100644
#
CC = emcc
-CFLAGS = -I../libf2c -O3
+CFLAGS = -I../libf2c -O3 -flto
+CFLAGS = -I../libf2c -O3 -flto -msimd128
DRVOPTS = $(OPTS)
NOOPT =
LOADER = emcc
@@ -46,7 +46,7 @@ index 6221401..d93b87f 100644
CC = emcc
SHELL = /bin/sh
-CFLAGS = -DNON_UNIX_STDIO -O3
+CFLAGS = -DNON_UNIX_STDIO -O3 -flto
+CFLAGS = -DNON_UNIX_STDIO -O3 -flto -msimd128
LD = wasm-ld
RANLIB = emranlib

View File

@@ -32,18 +32,28 @@ void genericModel::checkModel() {
fireEv("_checkMdl", "fetch", index);
return;
}
std::ifstream file {"id", std::ifstream::in};
if(!file.is_open()) {
FILE* idFile {fopen("id", "r")};
if(idFile == nullptr) {
fireEv("_checkMdl", "Couldn't open id file", index);
return;
}
long long size {file.seekg(0, std::ios::end).tellg()};
std::string oldid(size, ' ');
file.seekg(0);
file.read(&oldid[0], size);
if(id.compare(oldid) == 0) fireEv("_checkMdl", nullptr, index);
else fireEv("_checkMdl", "fetch", index);
file.close();
if(fseek(idFile, 0, SEEK_END) != 0) {
fireEv("_checkMdl", "Id file end seeking fail", index);
fclose(idFile);
return;
};
long long oldsize{ftell(idFile)};
char* oldid {new char[oldsize]};
if(fseek(idFile, 0L, SEEK_SET) != 0) {
fireEv("_checkMdl", "Id file start seeking fail", index);
fclose(idFile);
return;
};
fread(oldid, 1, oldsize, idFile);
fclose(idFile);
if(strcmp(oldid, id.c_str()) != 0) fireEv("_checkMdl", "fetch", index);
else fireEv("_checkMdl", nullptr, index);
delete[] oldid;
});
}
void genericModel::afterFetch() {
@@ -58,18 +68,16 @@ void genericModel::afterFetch() {
fs::remove("/opfs/m0dEl.tar",tank);
fs::remove("README",tank);
if(!checkModelFiles()) {
fireEv("_continue", "URL contains invalid model files", index);
fireEv("_continue", "URL points to invalid model files", index);
return;
}
std::ofstream idFile{"id"};
if(!idFile.is_open()) {
fs::current_path("/opfs", tank);
fs::remove_all(storepath, tank);
fireEv("_continue", "Unable to write model ID", index);
int idFd {open("id", O_WRONLY | O_TRUNC)};
if(write(idFd, id.c_str(), id.size()) == -1) {
fireEv("_continue", "Unable to write new ID", index);
close(idFd);
return;
}
idFile << id;
idFile.close();
};
close(idFd);
load(false);
});
}

View File

@@ -3,9 +3,9 @@
#include <string>
#include <filesystem>
#include <fstream>
#include <cstring>
#include <fcntl.h>
#include <vosk_api.h>
#include <archive.h>
#include <archive_entry.h>

View File

@@ -1,12 +1,9 @@
let objs = []
let dStream = new DecompressionStream("gzip")
Module.revokeURLs = () => {
URL.revokeObjectURL(pthreadUrl)
URL.revokeObjectURL(processorUrl)
}
Module.cleanUp = () => {
objs.forEach(obj => obj.delete())
Module.revokeURLs()
URL.revokeObjectURL(pthreadUrl)
URL.revokeObjectURL(processorUrl)
}
Module.locateFile = (path, scriptDir) => {
if(path === "BrowserRecognizer.worker.js") return pthreadUrl
@@ -27,27 +24,25 @@ class genericModel extends EventTarget {
mdl.delete()
reject(ev.detail)
}, {once : true})
mdl.addEventListener("_checkMdl", (ev) => {
mdl.addEventListener("_checkMdl", async (ev) => {
switch(ev.detail) {
case "0":
mdl.load(true);
break;
case "fetch":
(async () => {
let res = await fetch(url)
if(!res.ok) {
return reject("Unable to download model")
}
let wStream = await (await (await navigator.storage.getDirectory()).getFileHandle("m0dEl.tar", {create : true})).createWritable()
let tarReader = res.body.pipeThrough(dStream).getReader()
while(true) {
let readRes = await tarReader.read()
if(!readRes.done) await wStream.write(readRes.value)
else break
}
await wStream.close()
mdl.obj.afterFetch()
})()
let res = await fetch(url)
if(!res.ok) {
return reject("Unable to download model")
}
let wStream = await (await (await navigator.storage.getDirectory()).getFileHandle("m0dEl.tar", {create : true})).createWritable()
let tarReader = res.body.pipeThrough(dStream).getReader()
while(true) {
let readRes = await tarReader.read()
if(!readRes.done) await wStream.write(readRes.value)
else break
}
await wStream.close()
mdl.obj.afterFetch()
break;
default:
reject(ev.detail)
@@ -63,7 +58,7 @@ class genericModel extends EventTarget {
}
}
Module.makeModel = async (url, storepath, id) => {
return genericModel._init(url, storepath, id,true)
return genericModel._init(url, storepath, id, true)
}
Module.makeSpkModel = async (url, storepath, id) => {
return genericModel._init(url, storepath, id, false)
@@ -92,7 +87,7 @@ class Recognizer extends EventTarget {
let msgChannel = new MessageChannel()
await ctx.audioWorklet.addModule(processorUrl)
this.node = new AudioWorkletNode(ctx, 'BRProcessor', { channelCountMode: "max", numberOfInputs: 1, numberOfOutputs: 1, processorOptions: { ptr: this.ptr, channel: channelIndex, recognizerPort: msgChannel.port1 } })
msgChannel.port1.onmessage = (ev) => {
msgChannel.port1.onmessage = () => {
this.obj.acceptWaveForm()
}
}
@@ -104,7 +99,7 @@ class Recognizer extends EventTarget {
}
delete() {
if (this.obj) this.obj.delete()
if(this.node) this.node.postMessage("0")
if(this.node) this.node.postMessage(0)
}
setWords(words) {
this.obj.setWords(words)
@@ -150,6 +145,166 @@ let processorUrl = URL.createObjectURL(new Blob(['(',
, ')()'], {type : "text/javascript"}))
let pthreadUrl = URL.createObjectURL(new Blob(['(',
(() => {
// FIXME: Copy content of worker.js with -O0 here
/**
* @license
* Copyright 2015 The Emscripten Authors
* SPDX-License-Identifier: MIT
*/
// Pthread Web Worker startup routine:
// This is the entry point file that is loaded first by each Web Worker
// that executes pthreads on the Emscripten application.
'use strict';
var Module = {};
// Thread-local guard variable for one-time init of the JS state
var initializedJS = false;
function assert(condition, text) {
if (!condition) abort('Assertion failed: ' + text);
}
function threadPrintErr(...args) {
var text = args.join(' ');
console.error(text);
}
function threadAlert(...args) {
var text = args.join(' ');
postMessage({cmd: 'alert', text, threadId: Module['_pthread_self']()});
}
// We don't need out() for now, but may need to add it if we want to use it
// here. Or, if this code all moves into the main JS, that problem will go
// away. (For now, adding it here increases code size for no benefit.)
var out = () => { throw 'out() is not defined in worker.js.'; }
var err = threadPrintErr;
self.alert = threadAlert;
var dbg = threadPrintErr;
Module['instantiateWasm'] = (info, receiveInstance) => {
// Instantiate from the module posted from the main thread.
// We can just use sync instantiation in the worker.
var module = Module['wasmModule'];
// We don't need the module anymore; new threads will be spawned from the main thread.
Module['wasmModule'] = null;
var instance = new WebAssembly.Instance(module, info);
// TODO: Due to Closure regression https://github.com/google/closure-compiler/issues/3193,
// the above line no longer optimizes out down to the following line.
// When the regression is fixed, we can remove this if/else.
return receiveInstance(instance);
}
// Turn unhandled rejected promises into errors so that the main thread will be
// notified about them.
self.onunhandledrejection = (e) => {
throw e.reason || e;
};
function handleMessage(e) {
try {
if (e.data.cmd === 'load') { // Preload command that is called once per worker to parse and load the Emscripten code.
// Until we initialize the runtime, queue up any further incoming messages.
let messageQueue = [];
self.onmessage = (e) => messageQueue.push(e);
// And add a callback for when the runtime is initialized.
self.startWorker = (instance) => {
Module = instance;
// Notify the main thread that this thread has loaded.
postMessage({ 'cmd': 'loaded' });
// Process any messages that were queued before the thread was ready.
for (let msg of messageQueue) {
handleMessage(msg);
}
// Restore the real message handler.
self.onmessage = handleMessage;
};
// Module and memory were sent from main thread
Module['wasmModule'] = e.data.wasmModule;
// Use `const` here to ensure that the variable is scoped only to
// that iteration, allowing safe reference from a closure.
for (const handler of e.data.handlers) {
Module[handler] = (...args) => {
postMessage({ cmd: 'callHandler', handler, args: args });
}
}
Module['wasmMemory'] = e.data.wasmMemory;
Module['buffer'] = Module['wasmMemory'].buffer;
Module['workerID'] = e.data.workerID;
Module['ENVIRONMENT_IS_PTHREAD'] = true;
if (typeof e.data.urlOrBlob == 'string') {
importScripts(e.data.urlOrBlob);
} else {
var objectUrl = URL.createObjectURL(e.data.urlOrBlob);
importScripts(objectUrl);
URL.revokeObjectURL(objectUrl);
}
loadBR(Module);
} else if (e.data.cmd === 'run') {
// Pass the thread address to wasm to store it for fast access.
Module['__emscripten_thread_init'](e.data.pthread_ptr, /*is_main=*/0, /*is_runtime=*/0, /*can_block=*/1);
// Await mailbox notifications with `Atomics.waitAsync` so we can start
// using the fast `Atomics.notify` notification path.
Module['__emscripten_thread_mailbox_await'](e.data.pthread_ptr);
assert(e.data.pthread_ptr);
// Also call inside JS module to set up the stack frame for this pthread in JS module scope
Module['establishStackSpace']();
Module['PThread'].receiveObjectTransfer(e.data);
Module['PThread'].threadInitTLS();
if (!initializedJS) {
// Embind must initialize itself on all threads, as it generates support JS.
// We only do this once per worker since they get reused
Module['__embind_initialize_bindings']();
initializedJS = true;
}
try {
Module['invokeEntryPoint'](e.data.start_routine, e.data.arg);
} catch(ex) {
if (ex != 'unwind') {
// The pthread "crashed". Do not call `_emscripten_thread_exit` (which
// would make this thread joinable). Instead, re-throw the exception
// and let the top level handler propagate it back to the main thread.
throw ex;
}
}
} else if (e.data.cmd === 'cancel') { // Main thread is asking for a pthread_cancel() on this thread.
if (Module['_pthread_self']()) {
Module['__emscripten_thread_exit'](-1);
}
} else if (e.data.target === 'setimmediate') {
// no-op
} else if (e.data.cmd === 'checkMailbox') {
if (initializedJS) {
Module['checkMailbox']();
}
} else if (e.data.cmd) {
// The received message looks like something that should be handled by this message
// handler, (since there is a e.data.cmd field present), but is not one of the
// recognized commands:
err(`worker.js received unknown command ${e.data.cmd}`);
err(e.data);
}
} catch(ex) {
err(`worker.js onmessage() captured an uncaught exception: ${ex}`);
if (ex?.stack) err(ex.stack);
Module['__emscripten_thread_crashed']?.();
throw ex;
}
};
self.onmessage = handleMessage;
}).toString()
, ')()'], {type : "text/javascript"}))