Openfst is patched by author.

This commit is contained in:
msqr1
2024-08-30 18:38:42 -07:00
parent 01a0dbf3a7
commit 1d5854cc27
12 changed files with 61 additions and 100 deletions

View File

@@ -20,7 +20,7 @@ index 80037d0..a964b2d 100644
# if no wrapping of the blas library is needed, uncomment next line
CC = emcc # -DNO_BLAS_WRAP
-CFLAGS = -I$(TOPDIR)/INCLUDE -I$(TOPDIR)/../libf2c -O3
+CFLAGS = -I$(TOPDIR)/INCLUDE -I$(TOPDIR)/../libf2c -O3 -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals
+CFLAGS = -I$(TOPDIR)/INCLUDE -I$(TOPDIR)/../libf2c -O3 -fno-rtti -fno-exceptions -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals
LOADER = $(CC)
LOADOPTS =
NOOPT = -O0 -I$(TOPDIR)/INCLUDE -I$(TOPDIR)/../libf2c
@@ -33,7 +33,7 @@ index e071614..4647c2b 100644
#
CC = emcc
-CFLAGS = -I../libf2c -O3
+CFLAGS = -I../libf2c -O3 -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals
+CFLAGS = -I../libf2c -O3 -fno-rtti -fno-exceptions -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals
DRVOPTS = $(OPTS)
NOOPT =
LOADER = emcc
@@ -46,7 +46,7 @@ index 6221401..d93b87f 100644
CC = emcc
SHELL = /bin/sh
-CFLAGS = -DNON_UNIX_STDIO -O3
+CFLAGS = -DNON_UNIX_STDIO -O3 -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals
+CFLAGS = -DNON_UNIX_STDIO -O3 -fno-rtti -fno-exceptions -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals
LD = wasm-ld
RANLIB = emranlib

View File

@@ -1,28 +0,0 @@
diff --git a/src/include/fst/bi-table.h b/src/include/fst/bi-table.h
index 7c5be23..4527001 100644
--- a/src/include/fst/bi-table.h
+++ b/src/include/fst/bi-table.h
@@ -327,7 +327,7 @@ class VectorHashBiTable {
}
VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table)
- : selector_(table.s_),
+ : selector_(table.selector_),
fp_(table.fp_),
h_(table.h_),
id2entry_(table.id2entry_),
diff --git a/src/include/fst/fst.h b/src/include/fst/fst.h
index 80e3948..57cf1fd 100644
--- a/src/include/fst/fst.h
+++ b/src/include/fst/fst.h
@@ -701,8 +701,8 @@ class FstImpl {
properties_.store(impl.properties_.load(std::memory_order_relaxed),
std::memory_order_relaxed);
type_ = impl.type_;
- isymbols_ = impl.isymbols_ ? impl.isymbols_->Copy() : nullptr;
- osymbols_ = impl.osymbols_ ? impl.osymbols_->Copy() : nullptr;
+ isymbols_.reset(impl.isymbols_ ? impl.isymbols_->Copy() : nullptr);
+ osymbols_.reset(impl.osymbols_ ? impl.osymbols_->Copy() : nullptr);
return *this;
}

View File

@@ -2,7 +2,6 @@
#include <emscripten/em_js.h>
#include <emscripten/wasm_worker.h>
WorkerPool globalPool;
EM_JS(void, _fireEv, (int index, int content, int type), {
objs[index].dispatchEvent(new CustomEvent(type === 0 ? "0" : UTF8ToString(type), { "detail" : UTF8ToString(content) }));
})
@@ -79,6 +78,9 @@ void Worker::startup(int _self, int _pool) {
self.fn();
}
}
static constexpr int workerStack{65536};
static std::array<std::byte, MAX_WORKERS * workerStack> stacks;
#undef MAX_WORKERS
WorkerPool::WorkerPool() {
for(int i = 0; i < workers.size(); i++) {
workers[i].handle = emscripten_create_wasm_worker(&stacks[i * workerStack], workerStack);
@@ -95,4 +97,5 @@ void WorkerPool::exec(std::function<void()> fn) {
taskQ.emplace(fn);
emscripten_atomic_store_u32(&qLock, false);
emscripten_atomic_notify(&qLock, 1);
}
}
WorkerPool globalPool;

View File

@@ -30,19 +30,16 @@ struct Worker {
#ifndef MAX_WORKERS
#define MAX_WORKERS 1
#endif
static constexpr int workerStack{65536};
static std::array<std::byte, MAX_WORKERS * workerStack> stacks;
struct WorkerPool {
bool qLock{true}; // True is locked, false is unlocked
bool done{};
std::queue<std::function<void()>> taskQ;
std::array<Worker, MAX_WORKERS> workers;
#undef MAX_WORKERS
WorkerPool();
~WorkerPool();
void exec(std::function<void()> fn);
};
extern WorkerPool globalPool;
void fireEv(int index, const char* _content, const char* _type = nullptr);
int untar(unsigned char* tar, int tarSize, const std::string& storepath);
int untar(unsigned char* tar, int tarSize, const std::string& storepath);
extern WorkerPool globalPool;

View File

@@ -1,8 +1,17 @@
diff --git a/src/model.cc b/src/model.cc
index 035ffee..7f5148a 100644
index 035ffee..18edcd3 100644
--- a/src/model.cc
+++ b/src/model.cc
@@ -74,38 +74,48 @@ static void KaldiLogHandler(const LogMessageEnvelope &env, const char *message)
@@ -23,7 +23,7 @@
#include <fst/register.h>
#include <fst/matcher-fst.h>
#include <fst/extensions/ngram/ngram-fst.h>
-
+#include <emscripten/console.h>
#ifdef HAVE_MKL
// We need to set num threads
@@ -74,38 +74,37 @@ static void KaldiLogHandler(const LogMessageEnvelope &env, const char *message)
#else
static void KaldiLogHandler(const LogMessageEnvelope &env, const char *message)
{
@@ -16,6 +25,20 @@ index 035ffee..7f5148a 100644
- if (env.severity > LogMessageEnvelope::kInfo) {
- full_message << "VLOG[" << env.severity << "] (";
- } else {
- switch (env.severity) {
- case LogMessageEnvelope::kInfo:
- full_message << "LOG (";
- break;
- case LogMessageEnvelope::kWarning:
- full_message << "WARNING (";
- break;
- case LogMessageEnvelope::kAssertFailed:
- full_message << "ASSERTION_FAILED (";
- break;
- case LogMessageEnvelope::kError:
- default: // If not the ERROR, it still an error!
- full_message << "ERROR (";
- break;
+ // Modified default Kaldi logging so we can disable LOG messages.
+ std::stringstream full_message;
+ if (env.severity > LogMessageEnvelope::kInfo) {
@@ -36,35 +59,6 @@ index 035ffee..7f5148a 100644
+ full_message << "ERROR (";
+ break;
+ }
+ }
+ // Add other info from the envelope and the message text.
+ full_message << "VoskAPI" << ':'
+ << env.func << "():" << env.file << ':'
+ << env.line << ") " << message;
+ if(env.severity > LogMessageEnvelope::kInfo) {
+ emscripten_console_log(full_message.str().c_str());
+ return;
+ }
switch (env.severity) {
case LogMessageEnvelope::kInfo:
- full_message << "LOG (";
- break;
+ emscripten_console_log(full_message.str().c_str());
+ break;
case LogMessageEnvelope::kWarning:
- full_message << "WARNING (";
- break;
- case LogMessageEnvelope::kAssertFailed:
- full_message << "ASSERTION_FAILED (";
- break;
- case LogMessageEnvelope::kError:
- default: // If not the ERROR, it still an error!
- full_message << "ERROR (";
- break;
+ emscripten_console_warn(full_message.str().c_str());
+ break;
+ default:
+ emscripten_console_error(full_message.str().c_str());
}
- }
- // Add other info from the envelope and the message text.
@@ -75,23 +69,18 @@ index 035ffee..7f5148a 100644
- // Print the complete message to stderr.
- full_message << "\n";
- std::cerr << full_message.str();
+ // Add other info from the envelope and the message text.
+ full_message << "VoskAPI" << ':'
+ << env.func << "():" << env.file << ':'
+ << env.line << ") " << message;
+ if(env.severity >= LogMessageEnvelope::kInfo) emscripten_console_log(full_message.str().c_str());
+ else if(env.severity == LogMessageEnvelope::kWarning) emscripten_console_warn(full_message.str().c_str());
+ else emscripten_console_error(full_message.str().c_str());
}
#endif
diff --git a/src/model.h b/src/model.h
index 7fc09df..608d28a 100644
--- a/src/model.h
+++ b/src/model.h
@@ -32,6 +32,7 @@
#include "rnnlm/rnnlm-utils.h"
#include "rnnlm/rnnlm-lattice-rescoring.h"
#include <atomic>
+#include <emscripten/console.h>
using namespace kaldi;
using namespace std;
diff --git a/src/recognizer.cc b/src/recognizer.cc
index 1da6e6b..44c4d8c 100644
index 55d9991..4802b92 100644
--- a/src/recognizer.cc
+++ b/src/recognizer.cc
@@ -384,8 +384,9 @@ bool Recognizer::AcceptWaveform(const float *fdata, int len)

View File

@@ -85,8 +85,8 @@ class CommonModel extends EventTarget {
throw e
}
}
let tarStart = Module._malloc(tar.byteLength)
Module.HEAPU8.set(new Uint8Array(tar), tarStart)
let tarStart = _malloc(tar.byteLength)
HEAPU8.set(new Uint8Array(tar), tarStart)
mdl.obj = new Module.CommonModel(objs.length - 1, normalMdl, "/" + storepath, id, tarStart, tar.byteLength)
return result
}
@@ -131,8 +131,8 @@ class Recognizer extends EventTarget {
return result
}
acceptWaveform(audioData) {
let start = Module._malloc(audioData.length * 4)
Module.HEAPF32.set(audioData, start / 4)
let start = _malloc(audioData.length * 4)
HEAPF32.set(audioData, start / 4)
this.obj.pushData(start, audioData.length)
}
}

View File

@@ -41,9 +41,8 @@ if [ ! -d "$OPENFST" ]; then
rm -rf /tmp/openfst &&
git clone --depth=1 https://github.com/alphacep/openfst /tmp/openfst &&
cd /tmp/openfst &&
git apply "$SRC"/Openfst.patch
autoreconf -is &&
CXXFLAGS="-r -O3 -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals" LDFLAGS="-O3 -flto" emconfigure ./configure --prefix="$OPENFST" --enable-static --disable-shared --enable-lookahead-fsts --enable-ngram-fsts --disable-bin &&
CXXFLAGS="-r -O3 -fno-rtti -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals" emconfigure ./configure --prefix="$OPENFST" --enable-static --disable-shared --enable-lookahead-fsts --enable-ngram-fsts --disable-bin &&
emmake make -j"$JOBS" install &&
echo "PACKAGE_VERSION = 1.8.0" >> "$OPENFST"/Makefile
fi
@@ -59,7 +58,7 @@ if [ ! -d "$KALDI" ]; then
git clone -b vosk --depth=1 https://github.com/alphacep/kaldi "$KALDI" &&
cd "$KALDI"/src &&
git apply "$SRC"/Kaldi.patch &&
CXXFLAGS="-O3 -UHAVE_EXECINFO_H -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals -Wno-unused-variable -Wno-unused-but-set-variable -g0" LDFLAGS="-O3 -lembind -flto -g0" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --debug-level=0 --fst-root="$OPENFST" --clapack-root="$CLAPACK_WASM" --host=WASM &&
CXXFLAGS="-O3 -UHAVE_EXECINFO_H -fno-rtti -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals -Wno-unused-variable -Wno-unused-but-set-variable -g0" LDFLAGS="-O3 -lembind -flto -g0" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --debug-level=0 --fst-root="$OPENFST" --clapack-root="$CLAPACK_WASM" --host=WASM &&
emmake make -j"$JOBS" online2 rnnlm
fi
@@ -67,13 +66,13 @@ if [ ! -d "$VOSK" ]; then
git clone -b v0.3.50 --depth=1 https://github.com/alphacep/vosk-api "$VOSK" &&
cd "$VOSK"/src &&
git apply "$SRC"/Vosk.patch &&
VOSK_FILES="Recognizer.cc language_model.cc model.cc spk_model.cc vosk_api.cc" &&
em++ -O3 -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals -Wno-deprecated -I. -I"$KALDI"/src -I"$OPENFST"/include "$VOSK_FILES" -c &&
emar -rcs vosk.a "${VOSK_FILES//.cc/.o}"
VOSK_FILES="recognizer.cc language_model.cc model.cc spk_model.cc vosk_api.cc" &&
em++ -O3 -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals -Wno-deprecated -I. -I"$KALDI"/src -I"$OPENFST"/include $VOSK_FILES -c &&
emar -rcs vosk.a ${VOSK_FILES//.cc/.o}
fi
cd "$SRC" &&
em++ Util.cc CommonModel.cc Recognizer.cc Bindings.cc -O3 -Wall -Werror -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti -DMAX_WORKERS="$MAX_THREADS" -sWASMFS -sWASM_BIGINT -sMODULARIZE -sTEXTDECODER=2 -sWASM_WORKERS=2 -sEVAL_CTORS=2 -sINITIAL_MEMORY="$INITIAL_MEMORY" -sALLOW_MEMORY_GROWTH -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I"$VOSK"/src -L"$KALDI"/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L"$OPENFST"/lib -l:libfst.a -l:libfstngram.a -L"$CLAPACK_WASM" -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L"$VOSK"/src -l:vosk.a -lembind -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals --pre-js Wrapper.js -o ../Vosklet.js
em++ Util.cc CommonModel.cc Recognizer.cc Bindings.cc -O3 -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fno-rtti -DMAX_WORKERS="$MAX_THREADS" -sWASMFS -sWASM_BIGINT -sMODULARIZE -sWASM_EXNREF -sTEXTDECODER=2 -sWASM_WORKERS=2 -sEVAL_CTORS=2 -sINITIAL_MEMORY="$INITIAL_MEMORY" -sALLOW_MEMORY_GROWTH -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString -sENVIRONMENT=web,worker -I. -I"$VOSK"/src -L"$KALDI"/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L"$OPENFST"/lib -l:libfst.a -l:libfstngram.a -L"$CLAPACK_WASM" -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L"$VOSK"/src -l:vosk.a -lembind -flto -msimd128 -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals --pre-js Wrapper.js -o ../Vosklet.js
cd .. &&
rm -f Vosklet.worker.js