Remove zstd and use JS decompressionStream => support gzip, 4.4MB -> 3.9MB, change example model.
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -7,4 +7,4 @@ libarchive
|
|||||||
clapack-wasm
|
clapack-wasm
|
||||||
openfst
|
openfst
|
||||||
emsdk
|
emsdk
|
||||||
devel
|
devel/index.html
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
# Overview
|
# Overview
|
||||||
- A speech recognizer built on Vosk that can be run on the browser, inspired by [vosk-browser](https://github.com/ccoreilly/vosk-browser), but built from scratch and no code taken!
|
- A speech recognizer built on Vosk that can be run on the browser, inspired by [vosk-browser](https://github.com/ccoreilly/vosk-browser), but built from scratch and no code taken!
|
||||||
- Designed with strong exception safety
|
- Designed with strong exception safety
|
||||||
|
- See the examples folder for ways to use the API
|
||||||
|
- See the devel folder for the absolutely newest build (not guaranteed to work) and the JS build script
|
||||||
|
|
||||||
# Additions to vosk-browser:
|
# Additions to vosk-browser:
|
||||||
- Download multiple models
|
- Download multiple models
|
||||||
@@ -24,7 +26,7 @@ Browser-recognizer needs the Emscripten WASMFS' OPFS to store its model, IDBFS w
|
|||||||
## JS ```window``` object
|
## JS ```window``` object
|
||||||
| Function signature | Description |
|
| Function signature | Description |
|
||||||
|---|---|
|
|---|---|
|
||||||
|```Promise<Module> loadBR()``` | Load the Emscripten Module
|
|```Promise<Module> loadBR()``` | Load Emscripten's Module |
|
||||||
|
|
||||||
## Shared interface
|
## Shared interface
|
||||||
| Function signature | Description |
|
| Function signature | Description |
|
||||||
@@ -34,7 +36,7 @@ Browser-recognizer needs the Emscripten WASMFS' OPFS to store its model, IDBFS w
|
|||||||
## ```Module``` object
|
## ```Module``` object
|
||||||
| Function signature | Description |
|
| Function signature | Description |
|
||||||
|---|---|
|
|---|---|
|
||||||
| ```Promise<Model> makeModel(path: string, url: string, id: string)```<br><br>```Promise<SpkModel> makeSpkModel(path: string, url: string, id: string)``` | Make a ```Model``` or ```SpkModel```<br>- If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. |
|
| ```Promise<Model> makeModel(path: string, url: string, id: string)```<br><br>```Promise<SpkModel> makeSpkModel(path: string, url: string, id: string)``` | Make a ```Model``` or ```SpkModel```<br>- If **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- If **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. Model files must be directly under the model root folder, and compressed model must be in .tgz format. |
|
||||||
| ```Promise<Recognizer> makeRecognizer(model: Model, sampleRate: float)``` | Make a ```Recognizer```, it will use **model**'s thread if it's the first user of **model**, else it will use a new thread.
|
| ```Promise<Recognizer> makeRecognizer(model: Model, sampleRate: float)``` | Make a ```Recognizer```, it will use **model**'s thread if it's the first user of **model**, else it will use a new thread.
|
||||||
| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: ```0```: Info) <br>```-2```: Error<br>```-1```: Warning<br>```1```: Verbose<br>```2```: More verbose<br>```3```: Debug |
|
| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: ```0```: Info) <br>```-2```: Error<br>```-1```: Warning<br>```1```: Verbose<br>```2```: More verbose<br>```3```: Debug |
|
||||||
| ```revokeURLs()``` | Revoke the Blob URLs of pthread worker and worklet processor |
|
| ```revokeURLs()``` | Revoke the Blob URLs of pthread worker and worklet processor |
|
||||||
@@ -70,6 +72,7 @@ cd Browser-recognizer &&
|
|||||||
| MAX_THREADS | Set the max number of thread (2 min) | ```2``` (1 OPFS thread + 1 model/recognizer thread) |
|
| MAX_THREADS | Set the max number of thread (2 min) | ```2``` (1 OPFS thread + 1 model/recognizer thread) |
|
||||||
| COMPILE_JOBS | Set the number of jobs (threads) when compiling | ```$(nproc)``` |
|
| COMPILE_JOBS | Set the number of jobs (threads) when compiling | ```$(nproc)``` |
|
||||||
| EMSDK | Set EMSDK's path (will install EMSDK in root folder if unset) | ```../emsdk``` |
|
| EMSDK | Set EMSDK's path (will install EMSDK in root folder if unset) | ```../emsdk``` |
|
||||||
|
|
||||||
# TODO:
|
# TODO:
|
||||||
- Fix libarchive extract closing issue
|
- Fix libarchive extract closing issue
|
||||||
- setSpkModel avoid spawning extra thread
|
- setSpkModel avoid spawning extra thread
|
||||||
|
|||||||
18
devel/BrowserRecognizer.js
Normal file
18
devel/BrowserRecognizer.js
Normal file
File diff suppressed because one or more lines are too long
45
devel/test.sh
Executable file
45
devel/test.sh
Executable file
@@ -0,0 +1,45 @@
|
|||||||
|
# Build the js file into here
|
||||||
|
|
||||||
|
cd .. &&
|
||||||
|
SRC=$(realpath src) &&
|
||||||
|
KALDI=$(realpath kaldi) &&
|
||||||
|
VOSK=$(realpath vosk) &&
|
||||||
|
OPENFST=$(realpath openfst) &&
|
||||||
|
LIBARCHIVE=$(realpath libarchive) &&
|
||||||
|
ZSTD=$(realpath zstd) &&
|
||||||
|
CLAPACK_WASM=$(realpath clapack-wasm) &&
|
||||||
|
|
||||||
|
MAX_MEMORY=${MAX_MEMORY:-300mb} &&
|
||||||
|
MAX_THREADS=${MAX_THREADS:-2} &&
|
||||||
|
EMSDK=${EMSDK:-$(realpath ../emsdk)} &&
|
||||||
|
|
||||||
|
if [ ! -d $EMSDK_PATH ]; then
|
||||||
|
echo "Invalid EMSDK path"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ $MAX_THREADS -lt 2 ]; then
|
||||||
|
echo "MAX_THREAD must be greater or equal to 2" &&
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! [[ $MAX_MEMORY =~ ^[0-9]+([kmgt]b)?$ ]]; then
|
||||||
|
echo "MAX_MEMORY valid suffixes are kb, mb, gb, tb, none (bytes)" &&
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ $EMSDK = ../emsdk ]; then
|
||||||
|
echo "EMSDK is current directory, installing emsdk and Emscripten..." &&
|
||||||
|
git clone --depth=1 https://github.com/emscripten-core/emsdk.git &&
|
||||||
|
cd emsdk &&
|
||||||
|
./emsdk install 3.1.53 &&
|
||||||
|
./emsdk activate 3.1.53
|
||||||
|
fi
|
||||||
|
|
||||||
|
. $EMSDK/emsdk_env.sh &&
|
||||||
|
export PATH=:$PATH:$EMSDK/upstream/bin &&
|
||||||
|
|
||||||
|
cd $SRC &&
|
||||||
|
# Small build
|
||||||
|
em++ -Oz global.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sMODULARIZE -sTRUSTED_TYPES -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sPOLYFILL=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc,_main -sEXPORT_NAME=loadBR -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto --pre-js pre.js -o ../devel/BrowserRecognizer.js &&
|
||||||
|
cd ../devel &&
|
||||||
|
rm -f BrowserRecognizer.worker.js &&
|
||||||
|
sed -i "s/locateFile('BrowserRecognizer.worker.js')/pthreadUrl/g" BrowserRecognizer.js &&
|
||||||
|
sed -i 's/locateFile("BrowserRecognizer.worker.js")/pthreadUrl/g' BrowserRecognizer.js
|
||||||
Binary file not shown.
11
src/Makefile
11
src/Makefile
@@ -50,19 +50,12 @@ prepare:
|
|||||||
. $(EMSDK)/emsdk_env.sh && \
|
. $(EMSDK)/emsdk_env.sh && \
|
||||||
export PATH=:$$PATH:$(EMSDK)/upstream/bin
|
export PATH=:$$PATH:$(EMSDK)/upstream/bin
|
||||||
|
|
||||||
zstd: prepare
|
libarchive: prepare
|
||||||
rm -rf /tmp/zstd && \
|
|
||||||
git clone -b v1.5.5 --depth=1 https://github.com/facebook/zstd /tmp/zstd && \
|
|
||||||
cd /tmp/zstd && \
|
|
||||||
HAVE_THREAD=0 ZSTD_LEGACY_SUPPORT=0 HAVE_ZLIB=0 HAVE_LZMA=0 HAVE_LZ4=0 ZSTD_NOBENCH=1 ZSTD_NODICT=1 ZSTD_NOCOMPRESS=1 BACKTRACE=0 PREFIX=$(ZSTD) CPPFLAGS="-O3 -flto" LDFLAGS="-O3 -flto" emmake make -j$(COMPILE_JOBS) install && \
|
|
||||||
rm -rf /tmp/zstd
|
|
||||||
|
|
||||||
libarchive: | zstd
|
|
||||||
rm -rf /tmp/libarchive && \
|
rm -rf /tmp/libarchive && \
|
||||||
git clone -b v3.7.2 --depth=1 https://github.com/libarchive/libarchive /tmp/libarchive && \
|
git clone -b v3.7.2 --depth=1 https://github.com/libarchive/libarchive /tmp/libarchive && \
|
||||||
cd /tmp/libarchive && \
|
cd /tmp/libarchive && \
|
||||||
build/autogen.sh && \
|
build/autogen.sh && \
|
||||||
CPPFLAGS="-I$(ZSTD)/include -flto" LDFLAGS="-L$(ZSTD)/lib -flto" emconfigure ./configure --prefix=$(LIBARCHIVE) --without-lz4 --without-lzma --without-zlib --without-bz2lib --without-xml2 --without-expat --without-cng --without-openssl --without-libb2 --disable-bsdunzip --disable-xattr --disable-acl --disable-bsdcpio --disable-bsdcat --disable-rpath --disable-maintainer-mode --disable-dependency-tracking --enable-static --disable-shared && \
|
CPPFLAGS="-O3 -flto" LDFLAGS="-O3 -flto" emconfigure ./configure --prefix=$(LIBARCHIVE) --without-lz4 --without-lzma --without-zlib --without-bz2lib --without-xml2 --without-expat --without-cng --without-openssl --without-libb2 --without-zstd --disable-bsdunzip --disable-xattr --disable-acl --disable-bsdcpio --disable-bsdcat --disable-rpath --disable-maintainer-mode --disable-dependency-tracking --enable-static --disable-shared && \
|
||||||
emmake make -j$(COMPILE_JOBS) install && \
|
emmake make -j$(COMPILE_JOBS) install && \
|
||||||
rm -rf /tmp/libarchive
|
rm -rf /tmp/libarchive
|
||||||
|
|
||||||
|
|||||||
@@ -28,13 +28,15 @@ bool genericModel::checkModel() {
|
|||||||
}
|
}
|
||||||
void genericModel::afterFetch() {
|
void genericModel::afterFetch() {
|
||||||
thrd.setTask1([this](){
|
thrd.setTask1([this](){
|
||||||
if(!extractModel() && fs::remove("/opfs/m0dEl.tzst",tank)) {
|
if(!extractModel()) {
|
||||||
fs::current_path("/opfs");
|
fs::remove("/opfs/m0dEl.tar",tank);
|
||||||
fs::remove_all(storepath);
|
fs::current_path("/opfs", tank);
|
||||||
|
fs::remove_all(storepath, tank);
|
||||||
fireEv("_continue", "Unable to extract model", index);
|
fireEv("_continue", "Unable to extract model", index);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fs::remove("/opfs/m0dEl.tzst",tank);
|
fs::remove("/opfs/m0dEl.tar",tank);
|
||||||
|
fs::remove("README",tank);
|
||||||
std::ofstream idFile("id");
|
std::ofstream idFile("id");
|
||||||
if(!idFile.is_open()) {
|
if(!idFile.is_open()) {
|
||||||
fs::current_path("/opfs");
|
fs::current_path("/opfs");
|
||||||
@@ -55,21 +57,25 @@ void genericModel::afterFetch() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
bool genericModel::extractModel() {
|
bool genericModel::extractModel() {
|
||||||
std::string path{};
|
static std::string path{};
|
||||||
archive* src {archive_read_new()};
|
archive* src {archive_read_new()};
|
||||||
archive_entry* entry {};
|
archive* dst{archive_write_disk_new()};
|
||||||
archive_read_support_filter_all(src);
|
static archive_entry* entry{};
|
||||||
archive_read_support_format_all(src);
|
archive_read_support_format_tar(src);
|
||||||
archive_read_open_filename(src, "/opfs/m0dEl.tzst", 10240);
|
archive_read_open_filename(src, "/opfs/m0dEl.tar", 10240);
|
||||||
|
archive_write_disk_set_standard_lookup(dst);
|
||||||
|
archive_write_disk_set_options(dst, ARCHIVE_EXTRACT_NO_AUTODIR | ARCHIVE_EXTRACT_UNLINK);
|
||||||
if(archive_errno(src) != 0) return false;
|
if(archive_errno(src) != 0) return false;
|
||||||
while (archive_read_next_header(src, &entry) == ARCHIVE_OK) {
|
if(archive_errno(dst) != 0) return false;
|
||||||
|
while(archive_read_next_header2(src, entry) == ARCHIVE_OK) {
|
||||||
path = archive_entry_pathname(entry);
|
path = archive_entry_pathname(entry);
|
||||||
// Strip first component, keep relative path
|
path = "." + path.substr(path.find("/")); // Strip 1st component
|
||||||
path = "." + path.substr(path.find("/"));
|
emscripten_console_log(archive_entry_pathname(entry));
|
||||||
archive_entry_set_pathname(entry, path.c_str());
|
archive_read_extract2(src, entry, dst);
|
||||||
archive_read_extract(src, entry, ARCHIVE_EXTRACT_UNLINK | ARCHIVE_EXTRACT_NO_AUTODIR);
|
|
||||||
if(archive_errno(src) != 0) return false;
|
if(archive_errno(src) != 0) return false;
|
||||||
|
if(archive_errno(dst) != 0) return false;
|
||||||
}
|
}
|
||||||
archive_read_free(src);
|
archive_read_free(src);
|
||||||
|
archive_write_free(dst);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
10
src/pre.js
10
src/pre.js
@@ -1,4 +1,5 @@
|
|||||||
let objs = []
|
let objs = []
|
||||||
|
let dStream = new DecompressionStream("gzip")
|
||||||
Module.revokeURLs = () => {
|
Module.revokeURLs = () => {
|
||||||
URL.revokeObjectURL(pthreadUrl)
|
URL.revokeObjectURL(pthreadUrl)
|
||||||
URL.revokeObjectURL(processorUrl)
|
URL.revokeObjectURL(processorUrl)
|
||||||
@@ -96,8 +97,13 @@ Module.makeModel = async (url, storepath, id) => {
|
|||||||
if(!res.ok) {
|
if(!res.ok) {
|
||||||
return reject("Unable to download model")
|
return reject("Unable to download model")
|
||||||
}
|
}
|
||||||
let wStream = await (await (await navigator.storage.getDirectory()).getFileHandle("m0dEl.tzst", {create : true})).createWritable()
|
let wStream = await (await (await navigator.storage.getDirectory()).getFileHandle("m0dEl.tar", {create : true})).createWritable()
|
||||||
await wStream.write(await res.arrayBuffer())
|
let tarReader = res.body.pipeThrough(dStream).getReader()
|
||||||
|
while(true) {
|
||||||
|
let readRes = await tarReader.read()
|
||||||
|
if(!readRes.done) await wStream.write(readRes.value)
|
||||||
|
else break
|
||||||
|
}
|
||||||
await wStream.close()
|
await wStream.close()
|
||||||
mdl.obj.afterFetch()
|
mdl.obj.afterFetch()
|
||||||
})()
|
})()
|
||||||
|
|||||||
Reference in New Issue
Block a user