Fixed #1 in TODO by using archive_read_data_into_fd.
This commit is contained in:
@@ -2,12 +2,14 @@
|
||||
- A speech recognizer built on Vosk that can be run on the browser, inspired by [vosk-browser](https://github.com/ccoreilly/vosk-browser), but built from scratch and no code taken!
|
||||
- Designed with strong exception safety
|
||||
- See the examples folder for ways to use the API
|
||||
- See the devel folder for the absolutely newest build (not guaranteed to work) and the JS build script
|
||||
- See the devel folder for the newest build (not guaranteed to work) and the JS build script
|
||||
|
||||
# Additions to vosk-browser:
|
||||
- Download multiple models
|
||||
- Model storage path management (for multiple models)
|
||||
- Model ID management (for model updates)
|
||||
- Smaller JS size
|
||||
- Doesn't need another file when using AudioWorkletNode
|
||||
|
||||
# User agent notes
|
||||
## SharedArrayBuffer
|
||||
@@ -20,7 +22,7 @@ If you can't set them, you may use a HACKY workaround at *src/addCOI.js*.
|
||||
## Origin Private Filesystem (OPFS)
|
||||
Browser-recognizer needs the Emscripten WASMFS' OPFS to store its model, IDBFS was considered, but dropped because there is no direct way to read from IDBFS to C++ without copying to MEMFS (basically RAM). For safety with this, always:
|
||||
- Try catch ```window.loadBR()``` to to check for OPFS availability.
|
||||
- Check if there is enough space via ```navigator.storage.estimate()``` for **model + compressed model** before calling makeModel
|
||||
- Check if there is enough space via ```navigator.storage.estimate()``` for TWICE THE MODEL SIZE before calling Module.makeModel
|
||||
|
||||
# API interface
|
||||
## JS ```window``` object
|
||||
@@ -74,7 +76,6 @@ cd Browser-recognizer &&
|
||||
| EMSDK | Set EMSDK's path (will install EMSDK in root folder if unset) | ```../emsdk``` |
|
||||
|
||||
# TODO:
|
||||
- Fix libarchive extract closing issue
|
||||
- setSpkModel avoid spawning extra thread
|
||||
- Top level await in API usage
|
||||
- Write examples
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -6,7 +6,6 @@ KALDI=$(realpath kaldi) &&
|
||||
VOSK=$(realpath vosk) &&
|
||||
OPENFST=$(realpath openfst) &&
|
||||
LIBARCHIVE=$(realpath libarchive) &&
|
||||
ZSTD=$(realpath zstd) &&
|
||||
CLAPACK_WASM=$(realpath clapack-wasm) &&
|
||||
|
||||
MAX_MEMORY=${MAX_MEMORY:-300mb} &&
|
||||
@@ -38,7 +37,7 @@ export PATH=:$PATH:$EMSDK/upstream/bin &&
|
||||
|
||||
cd $SRC &&
|
||||
# Small build
|
||||
em++ -Oz global.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sMODULARIZE -sTRUSTED_TYPES -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sPOLYFILL=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc,_main -sEXPORT_NAME=loadBR -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto --pre-js pre.js -o ../devel/BrowserRecognizer.js &&
|
||||
em++ -O0 global.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sMODULARIZE -sTRUSTED_TYPES -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sPOLYFILL=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc,_main -sEXPORT_NAME=loadBR -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto --pre-js pre.js -o ../devel/BrowserRecognizer.js &&
|
||||
cd ../devel &&
|
||||
rm -f BrowserRecognizer.worker.js &&
|
||||
sed -i "s/locateFile('BrowserRecognizer.worker.js')/pthreadUrl/g" BrowserRecognizer.js &&
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# # # # # # ## # # # # # # # # # # # #
|
||||
# #### # # ### #### # #### ##### #### # #### # # ####
|
||||
|
||||
# 1 hour build time
|
||||
# 45 min build time
|
||||
SHELL=/bin/bash
|
||||
MAX_MEMORY?=300mb
|
||||
MAX_THREADS?=2
|
||||
@@ -16,16 +16,14 @@ KALDI:=$(realpath kaldi)
|
||||
VOSK:=$(realpath vosk)
|
||||
OPENFST:=$(realpath openfst)
|
||||
LIBARCHIVE:=$(realpath libarchive)
|
||||
ZSTD:=$(realpath zstd)
|
||||
CLAPACK_WASM:=$(realpath clapack-wasm)
|
||||
|
||||
BrowserRecognizer.js: | vosk libarchive
|
||||
cd $(SRC) && \
|
||||
em++ -O3 global.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sMODULARIZE -sTRUSTED_TYPES -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$(MAX_MEMORY) -sPTHREAD_POOL_SIZE=$(MAX_THREADS) -sPOLYFILL=0 -sSUPPORT_LONGJMP=0 -sEXPORT_NAME=loadBR -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sMALLOC=emmalloc -sEXPORTED_FUNCTIONS=_malloc,_main -sENVIRONMENT=web,worker -I. -I$(LIBARCHIVE)/include -I$(VOSK)/src -L$(LIBARCHIVE)/lib -larchive -L$(ZSTD)/lib -lzstd -L$(KALDI)/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$(OPENFST)/lib -l:libfst.a -l:libfstngram.a -L$(CLAPACK_WASM) -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$(VOSK)/src -l:vosk.a -lopfs.js -lembind -pthread -flto --pre-js pre.js -o ../BrowserRecognizer.js && \
|
||||
em++ -O3 global.cc genericModel.cc model.cc spkModel.cc recognizer.cc bindings.cc -sWASMFS -sWASM_BIGINT -sSINGLE_FILE -sMODULARIZE -sTRUSTED_TYPES -sEMBIND_STD_STRING_IS_UTF8 -sPTHREAD_POOL_DELAY_LOAD -sTEXTDECODER=2 -sPTHREAD_POOL_SIZE_STRICT=2 -sINITIAL_MEMORY=$MAX_MEMORY -sPTHREAD_POOL_SIZE=$MAX_THREADS -sPOLYFILL=0 -sSUPPORT_LONGJMP=0 -sEXPORTED_FUNCTIONS=_malloc,_main -sEXPORT_NAME=loadBR -sMALLOC=emmalloc -sEXPORTED_RUNTIME_METHODS=UTF8ToString,stringToUTF8OnStack -sENVIRONMENT=web,worker -I. -I$LIBARCHIVE/include -I$VOSK/src -L$LIBARCHIVE/lib -larchive -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$CLAPACK_WASM -l:CBLAS/lib/cblas.a -l:CLAPACK-3.2.1/lapack.a -l:CLAPACK-3.2.1/libcblaswr.a -l:f2c_BLAS-3.8.0/blas.a -l:libf2c/libf2c.a -L$VOSK/src -l:vosk.a -lopfs.js -lembind -pthread -flto --pre-js pre.js -o ../BrowserRecognizer.js && \
|
||||
cd .. && \
|
||||
rm -f BrowserRecognizer.worker.js && \
|
||||
sed -i 's/locateFile("BrowserRecognizer.worker.js")/pthreadUrl/g' BrowserRecognizer.js && \
|
||||
sed -i 's/let root/var root/g' BrowserRecognizer.js
|
||||
|
||||
prepare:
|
||||
sudo apt install shtool libtool autogen autotools-dev pkg-config make && \
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
#include "spkModel.h"
|
||||
#include "model.h"
|
||||
#include "recognizer.h"
|
||||
|
||||
#include <emscripten/bind.h>
|
||||
using namespace emscripten;
|
||||
|
||||
EMSCRIPTEN_BINDINGS() {
|
||||
function("setLogLevel", &vosk_set_log_level, allow_raw_pointers());
|
||||
class_<model>("model")
|
||||
|
||||
@@ -46,36 +46,33 @@ void genericModel::afterFetch() {
|
||||
}
|
||||
idFile << id;
|
||||
idFile.close();
|
||||
// I wanna give up on this thing so bad...
|
||||
std::ifstream is("./conf/model.conf");
|
||||
emscripten_console_logf("%d", is.good());
|
||||
emscripten_console_logf("%d", is.bad());
|
||||
emscripten_console_logf("%d", is.eof());
|
||||
emscripten_console_logf("%d", is.fail());
|
||||
is.close();
|
||||
//load(false);
|
||||
load(false);
|
||||
});
|
||||
}
|
||||
bool genericModel::extractModel() {
|
||||
static std::string path{};
|
||||
std::string path{};
|
||||
archive* src {archive_read_new()};
|
||||
archive* dst{archive_write_disk_new()};
|
||||
static archive_entry* entry{};
|
||||
archive_entry* entry{};
|
||||
int fd{};
|
||||
archive_read_support_format_tar(src);
|
||||
archive_read_open_filename(src, "/opfs/m0dEl.tar", 10240);
|
||||
archive_write_disk_set_standard_lookup(dst);
|
||||
archive_write_disk_set_options(dst, ARCHIVE_EXTRACT_NO_AUTODIR | ARCHIVE_EXTRACT_UNLINK);
|
||||
if(archive_errno(src) != 0) return false;
|
||||
if(archive_errno(dst) != 0) return false;
|
||||
while(archive_read_next_header2(src, entry) == ARCHIVE_OK) {
|
||||
path = archive_entry_pathname(entry);
|
||||
path = "." + path.substr(path.find("/")); // Strip 1st component
|
||||
emscripten_console_log(archive_entry_pathname(entry));
|
||||
archive_read_extract2(src, entry, dst);
|
||||
if(archive_errno(src) != 0) return false;
|
||||
if(archive_errno(dst) != 0) return false;
|
||||
// Strip 1st component, keep relative path
|
||||
path = "." + path.substr(path.find("/"));
|
||||
if(!fs::path(path).has_extension()) {
|
||||
fs::create_directory(path);
|
||||
continue;
|
||||
}
|
||||
fd = open(path.c_str(), O_CREAT | O_WRONLY | O_TRUNC);
|
||||
archive_read_data_into_fd(src, fd);
|
||||
close(fd);
|
||||
if(archive_errno(src) != 0) {
|
||||
emscripten_console_log(archive_error_string(src));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
archive_read_free(src);
|
||||
archive_write_free(dst);
|
||||
return true;
|
||||
}
|
||||
Reference in New Issue
Block a user