Fix #29 by removing compression altogether

This commit is contained in:
msqr1
2025-04-03 19:42:02 -07:00
parent c64ae47ac7
commit 006199d103
11 changed files with 43 additions and 37 deletions

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -48,19 +48,20 @@ class CommonModel extends EventTarget {
resolve(mdl) resolve(mdl)
} }
else reject(ev.detail) else reject(ev.detail)
}, { once: true }) }, { once: true });
}); });
let cache = await caches.open('Vosklet'); let cache = await caches.open('Vosklet');
let req = (await cache.keys(storepath, { ignoreSearch: true }))[0] let req = (await cache.keys(storepath, { ignoreSearch: true }))[0];
let tar, res; let res;
if (typeof req == 'undefined' || req.url.split('?')[1] != id) { if (typeof req == 'undefined' || req.url.split('?')[1] != id) {
// Caching already handled explicitly // Caching already handled explicitly
res = await fetch(url, { cache: 'no-store' }); res = await fetch(url, { cache: 'no-store' });
if (!res.ok) throw 'Unable to fetch model, status: ' + res.status; if (!res.ok) throw 'Unable to fetch model, status: ' + res.status;
await cache.put(storepath + '?' + id, res.clone()); await cache.put(storepath + '?' + id, new Response(res.body.pipeThrough(new DecompressionStream('gzip'))));
} }
else res = await cache.match(req) else res = await cache.match(req)
tar = await new Response(res.body.pipeThrough(new DecompressionStream('gzip'))).arrayBuffer(); let tar = await res.arrayBuffer();
let tarStart = _malloc(tar.byteLength); let tarStart = _malloc(tar.byteLength);
HEAPU8.set(new Uint8Array(tar), tarStart); HEAPU8.set(new Uint8Array(tar), tarStart);
mdl.obj = new Module['CommonModel'](objs.length - 1, normalMdl, tarStart, tar.byteLength); mdl.obj = new Module['CommonModel'](objs.length - 1, normalMdl, tarStart, tar.byteLength);
@@ -70,6 +71,7 @@ class CommonModel extends EventTarget {
class Recognizer extends EventTarget { class Recognizer extends EventTarget {
constructor() { constructor() {
super(); super();
// Closure workaround to prevent acceptWaveform from getting removed // Closure workaround to prevent acceptWaveform from getting removed
this['acceptWaveform'] = audioData => { this['acceptWaveform'] = audioData => {
let start = _malloc(audioData.length * 4); let start = _malloc(audioData.length * 4);

View File

@@ -4,18 +4,18 @@
<script src="https://cdn.jsdelivr.net/gh/msqr1/Vosklet@1.2.1/Examples/Vosklet.js" async defer></script> <script src="https://cdn.jsdelivr.net/gh/msqr1/Vosklet@1.2.1/Examples/Vosklet.js" async defer></script>
<script> <script>
async function start() { async function start() {
// All data is collected and transfered to the main thread so the AudioContext won't output anything. Set sinkId type to none to save power // All data is collected and transfered to the main thread so the AudioContext won't output anything. Set sinkId type to none to save power
let ctx = new AudioContext({sinkId: {type: "none"}}); let ctx = new AudioContext({sinkId: {type: "none"}});
let module = await loadVosklet(); let module = await loadVosklet();
let model = await module.createModel("https://ccoreilly.github.io/vosk-browser/models/vosk-model-small-en-us-0.15.tar.gz", "English", "vosk-model-small-en-us-0.15"); let model = await module.createModel("https://ccoreilly.github.io/vosk-browser/models/vosk-model-small-en-us-0.15.tar.gz", "English", "vosk-model-small-en-us-0.15");
let recognizer = await module.createRecognizer(model, ctx.sampleRate); let recognizer = await module.createRecognizer(model, ctx.sampleRate);
// Listen for result and partial result // Listen for result and partial result
recognizer.addEventListener("result", ev => console.log("Result: ", ev.detail)); recognizer.addEventListener("result", ev => console.log("Result: ", ev.detail));
recognizer.addEventListener("partialResult", ev => console.log("Partial result: ", ev.detail)); recognizer.addEventListener("partialResult", ev => console.log("Partial result: ", ev.detail));
// Fetch, decode, and recognize the .wav // Fetch, decode, and recognize the .wav
let wav = await fetch("https://cdn.jsdelivr.net/gh/msqr1/Vosklet/examples/1to10-en.wav"); let wav = await fetch("https://cdn.jsdelivr.net/gh/msqr1/Vosklet/Examples/1to10-en.wav");
let audioBuf = await ctx.decodeAudioData(await wav.arrayBuffer()); let audioBuf = await ctx.decodeAudioData(await wav.arrayBuffer());
recognizer.acceptWaveform(audioBuf.getChannelData(0)); recognizer.acceptWaveform(audioBuf.getChannelData(0));
} }

View File

@@ -4,6 +4,7 @@
<script src="https://cdn.jsdelivr.net/gh/msqr1/Vosklet@1.2.1/Examples/Vosklet.js" async defer></script> <script src="https://cdn.jsdelivr.net/gh/msqr1/Vosklet@1.2.1/Examples/Vosklet.js" async defer></script>
<script> <script>
async function start() { async function start() {
// All data is collected and transfered to the main thread so the AudioContext won't output anything. Set sinkId type to none to save power // All data is collected and transfered to the main thread so the AudioContext won't output anything. Set sinkId type to none to save power
let ctx = new AudioContext({sinkId: {type: "none"}}); let ctx = new AudioContext({sinkId: {type: "none"}});

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -1,5 +1,6 @@
#pragma once #pragma once
#include "CommonModel.h" #include "CommonModel.h"
#include <queue>
// Prevent naming conflicts with Vosk's Recognizer class // Prevent naming conflicts with Vosk's Recognizer class
#define Recognizer Recognizer_ #define Recognizer Recognizer_

View File

@@ -3,6 +3,7 @@
#include <variant> #include <variant>
#include <fstream> #include <fstream>
#include <functional> #include <functional>
#include <queue>
#include "emscripten/atomic.h" #include "emscripten/atomic.h"
#include "emscripten/console.h" #include "emscripten/console.h"

View File

@@ -44,26 +44,24 @@ class CommonModel extends EventTarget {
let result = new Promise((resolve, reject) => { let result = new Promise((resolve, reject) => {
mdl.addEventListener('status', ev => { mdl.addEventListener('status', ev => {
if(!ev.detail) { if(!ev.detail) {
if(normalMdl) mdl['findWord'] = word => mdl.obj['findWord'](word) if(normalMdl) mdl['findWord'] = word => mdl.obj['findWord'](word);
resolve(mdl) resolve(mdl);
} }
else reject(ev.detail) else reject(ev.detail);
}, { once: true }) }, { once: true });
}); });
let cache = await caches.open('Vosklet'); let cache = await caches.open('Vosklet');
let req = (await cache.keys(storepath, { ignoreSearch: true }))[0] let req = (await cache.keys(storepath, { ignoreSearch: true }))[0];
let tar, res; let res;
if (typeof req == 'undefined' || req.url.split('?')[1] != id) { if (typeof req == 'undefined' || req.url.split('?')[1] != id) {
// Caching already handled explicitly // Caching already handled explicitly
res = await fetch(url, { cache: 'no-store' }); res = await fetch(url, { cache: 'no-store' });
if (!res.ok) throw 'Unable to fetch model, status: ' + res.status; if (!res.ok) throw 'Unable to fetch model, status: ' + res.status;
await cache.put( await cache.put(storepath + '?' + id, res.clone());
storepath + '?' + id,
new Response(res.clone().body.pipeThrough(new CompressionStream('gzip')))
);
} }
else res = await cache.match(req); else res = await cache.match(req);
tar = await new Response(res.body.pipeThrough(new DecompressionStream('gzip'))).arrayBuffer(); let tar = await res.arrayBuffer();
let tarStart = _malloc(tar.byteLength); let tarStart = _malloc(tar.byteLength);
HEAPU8.set(new Uint8Array(tar), tarStart); HEAPU8.set(new Uint8Array(tar), tarStart);
mdl.obj = new Module['CommonModel'](objs.length - 1, normalMdl, tarStart, tar.byteLength); mdl.obj = new Module['CommonModel'](objs.length - 1, normalMdl, tarStart, tar.byteLength);
@@ -73,6 +71,7 @@ class CommonModel extends EventTarget {
class Recognizer extends EventTarget { class Recognizer extends EventTarget {
constructor() { constructor() {
super(); super();
// Closure workaround to prevent acceptWaveform from getting removed // Closure workaround to prevent acceptWaveform from getting removed
this['acceptWaveform'] = audioData => { this['acceptWaveform'] = audioData => {
let start = _malloc(audioData.length * 4); let start = _malloc(audioData.length * 4);

View File

@@ -4,7 +4,7 @@ MAX_THREADS=${MAX_THREADS:-1}
EMSDK=${EMSDK:-../emsdk} EMSDK=${EMSDK:-../emsdk}
JOBS=${JOBS:-$(nproc)} JOBS=${JOBS:-$(nproc)}
if [ "$EMSDK" != ../emsdk ] && [ ! -d "$EMSDK" ]; then if [ "$EMSDK" != ../emsdk ] && [ ! -f "$EMSDK" ]; then
echo "Invalid emsdk path" echo "Invalid emsdk path"
exit 1 exit 1
fi fi
@@ -20,12 +20,12 @@ if ! [[ $INITIAL_MEMORY =~ ^[0-9]+([kmgt]b)?$ ]]; then
echo "INITIAL_MEMORY valid suffixes are kb, mb, gb, tb, none (bytes)" echo "INITIAL_MEMORY valid suffixes are kb, mb, gb, tb, none (bytes)"
exit 1 exit 1
fi fi
if [ "$EMSDK" = ../emsdk ] && [ ! -d "$EMSDK" ]; then if [ "$EMSDK" = ../emsdk ] && [ ! -f "$EMSDK" ]; then
echo "Installing emsdk + Emscripten..." echo "Installing emsdk + Emscripten..."
git clone --depth=1 https://github.com/emscripten-core/emsdk.git ../emsdk && git clone --depth=1 https://github.com/emscripten-core/emsdk.git ../emsdk &&
cd ../emsdk && cd ../emsdk &&
./emsdk install 3.1.69 && ./emsdk install 4.0.6 &&
./emsdk activate 3.1.69 ./emsdk activate 4.0.6
fi fi
. $(realpath "$EMSDK")/emsdk_env.sh && . $(realpath "$EMSDK")/emsdk_env.sh &&
export PATH=:$PATH:$(realpath "$EMSDK")/upstream/bin && export PATH=:$PATH:$(realpath "$EMSDK")/upstream/bin &&
@@ -37,23 +37,25 @@ VOSK=$(realpath vosk)
OPENFST=$(realpath openfst) OPENFST=$(realpath openfst)
OPENBLAS=$(realpath openblas) OPENBLAS=$(realpath openblas)
SHARED_FLAGS="-O3 -flto -msimd128 -matomics -mbulk-memory -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals" SHARED_FLAGS="-g0 -O3 -flto -msimd128 -matomics -mreference-types -mextended-const -msign-ext -mmutable-globals"
if [ ! -d "$OPENFST" ]; then if [ ! -f "$OPENFST/lib/libfst.a" ]; then
rm -rf /tmp/openfst && rm -rf /tmp/openfst &&
git clone --depth=1 https://github.com/alphacep/openfst /tmp/openfst && wget https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.8.4.tar.gz -O /tmp/openfst.tgz &&
mkdir /tmp/openfst &&
tar -xzf /tmp/openfst.tgz -C /tmp/openfst --strip-component 1 &&
cd /tmp/openfst && cd /tmp/openfst &&
autoreconf -is && autoreconf -is &&
CXXFLAGS="$SHARED_FLAGS -O3 -fno-rtti" emconfigure ./configure --prefix="$OPENFST" --enable-static --disable-shared --enable-ngram-fsts --disable-bin && CXXFLAGS="$SHARED_FLAGS -O3 -fno-rtti" emconfigure ./configure --prefix="$OPENFST" --enable-static --disable-shared --enable-ngram-fsts --disable-bin &&
emmake make -j"$JOBS" install > /dev/null && emmake make -j"$JOBS" install > /dev/null &&
echo "PACKAGE_VERSION = 1.8.0" > "$OPENFST"/Makefile
rm -rf /tmp/openfst rm -rf /tmp/openfst
fi fi
if [ ! -d "$OPENBLAS" ]; then if [ ! -f "$OPENBLAS/lib/libopenblas.a" ]; then
rm -rf /tmp/openblas && rm -rf /tmp/openblas &&
git clone -b v0.3.28 https://github.com/OpenMathLib/OpenBLAS --depth=1 /tmp/openblas && git clone -b v0.3.29 https://github.com/OpenMathLib/OpenBLAS --depth=1 /tmp/openblas &&
cd /tmp/openblas && cd /tmp/openblas &&
git apply "$SRC"/OpenBLAS.patch && git apply "$SRC"/OpenBLAS.patch &&
# Change HOSTCC to the default C compiler on your machine # Change HOSTCC to the default C compiler on your machine
openblasFlags="CC=emcc HOSTCC=clang-20 TARGET=RISCV64_GENERIC USE_THREAD=0 NO_SHARED=1 BINARY=32 BUILD_SINGLE=1 BUILD_DOUBLE=1 BUILD_BFLOAT16=0 BUILD_COMPLEX16=0 BUILD_COMPLEX=0" openblasFlags="CC=emcc HOSTCC=clang-20 TARGET=RISCV64_GENERIC USE_THREAD=0 NO_SHARED=1 BINARY=32 BUILD_SINGLE=1 BUILD_DOUBLE=1 BUILD_BFLOAT16=0 BUILD_COMPLEX16=0 BUILD_COMPLEX=0"
openblasCFlags="$SHARED_FLAGS -fno-exceptions -fno-rtti -Wno-implicit-function-declaration -Wno-unused-function -Wno-unused-but-set-variable" openblasCFlags="$SHARED_FLAGS -fno-exceptions -fno-rtti -Wno-implicit-function-declaration -Wno-unused-function -Wno-unused-but-set-variable"
@@ -62,21 +64,21 @@ if [ ! -d "$OPENBLAS" ]; then
rm -rf /tmp/openblas rm -rf /tmp/openblas
fi fi
if [ ! -d "$KALDI" ]; then if [ ! -f "$KALDI/src/kaldi.mk" ]; then
git clone -b vosk --depth=1 https://github.com/alphacep/kaldi "$KALDI" && git clone --depth=1 https://github.com/kaldi-asr/kaldi "$KALDI" &&
cd "$KALDI"/src && cd "$KALDI"/src &&
git apply "$SRC"/Kaldi.patch && CXXFLAGS="$SHARED_FLAGS -UHAVE_EXECINFO_H -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fwasm-exceptions -Wno-unused-variable -Wno-unused-but-set-variable" LDFLAGS="-lembind" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --fst-version=1.8.4 --debug-level=0 --fst-root="$OPENFST" --openblas-root="$OPENBLAS" --host=WASM &&
CXXFLAGS="$SHARED_FLAGS -UHAVE_EXECINFO_H -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fwasm-exceptions -Wno-unused-variable -Wno-unused-but-set-variable -g0" LDFLAGS="-lembind" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --debug-level=0 --fst-root="$OPENFST" --openblas-root="$OPENBLAS" --host=WASM &&
emmake make -j"$JOBS" online2 rnnlm > /dev/null emmake make -j"$JOBS" online2 rnnlm > /dev/null
fi fi
if [ ! -d "$VOSK" ]; then if [ ! -f "$VOSK/src/vosk.a" ]; then
git clone -b v0.3.50 --depth=1 https://github.com/alphacep/vosk-api "$VOSK" && git clone -b v0.3.50 --depth=1 https://github.com/alphacep/vosk-api "$VOSK" &&
cd "$VOSK"/src && cd "$VOSK"/src &&
git apply "$SRC"/Vosk.patch && git apply "$SRC"/Vosk.patch &&
voskFiles="recognizer.o language_model.o model.o spk_model.o vosk_api.o" && voskFiles="recognizer.o language_model.o model.o spk_model.o vosk_api.o" &&
# shellcheck disable=SC2086 # shellcheck disable=SC2086
em++ $SHARED_FLAGS -fwasm-exceptions -Wno-deprecated -I. -I"$KALDI"/src -I"$OPENFST"/include ${voskFiles//.o/.cc} -c && em++ $SHARED_FLAGS -DOPENFST_VER=10804 -fwasm-exceptions -Wno-deprecated -I. -I"$KALDI"/src -I"$OPENFST"/include ${voskFiles//.o/.cc} -c &&
emar -rcs vosk.a $voskFiles && emar -rcs vosk.a $voskFiles &&
rm -f $voskFiles rm -f $voskFiles
fi fi
@@ -84,7 +86,7 @@ fi
cd "$SRC" && cd "$SRC" &&
voskletFiles="Util.o CommonModel.o Recognizer.o Bindings.o" voskletFiles="Util.o CommonModel.o Recognizer.o Bindings.o"
voskletFlags="$SHARED_FLAGS -fno-rtti -sSTRICT -sWASM_WORKERS=2" voskletFlags="$SHARED_FLAGS -fno-rtti -sSTRICT -sWASM_WORKERS=2"
voskletLDFlags="-sWASMFS -sWASM_BIGINT -sMODULARIZE -sTEXTDECODER=2 -sEVAL_CTORS=2 -sALLOW_UNIMPLEMENTED_SYSCALLS -sINITIAL_MEMORY=$INITIAL_MEMORY -sALLOW_MEMORY_GROWTH -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sINCOMING_MODULE_JS_API=wasmMemory,instantiateWasm,wasm -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sENVIRONMENT=web,worker -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$OPENBLAS -l:lib/libopenblas.a -L$VOSK/src -l:vosk.a -lembind --no-entry --closure 1 --pre-js" voskletLDFlags="-sWASMFS -sMODULARIZE -sTEXTDECODER=2 -sEVAL_CTORS=2 -sALLOW_UNIMPLEMENTED_SYSCALLS -sINITIAL_MEMORY=$INITIAL_MEMORY -sALLOW_MEMORY_GROWTH -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sINCOMING_MODULE_JS_API=wasmMemory,instantiateWasm,wasm -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sENVIRONMENT=web,worker -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$OPENBLAS -l:lib/libopenblas.a -L$VOSK/src -l:vosk.a -lembind --no-entry --closure 1 --pre-js"
# shellcheck disable=SC2086 # shellcheck disable=SC2086
em++ ${voskletFiles//.o/.cc} $voskletFlags -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -DMAX_WORKERS="$MAX_THREADS" -fno-exceptions -std=c++23 -c -I. -I"$VOSK"/src && em++ ${voskletFiles//.o/.cc} $voskletFlags -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -DMAX_WORKERS="$MAX_THREADS" -fno-exceptions -std=c++23 -c -I. -I"$VOSK"/src &&