Compare commits

...

10 Commits

Author SHA1 Message Date
90e4390d1a Fix Vosklet browser cache handling and recognizer event bridge
- fix broken cached model loading
- cache validated TAR models instead of gzip responses
- add proper fireEv bridge for wasm/js events
- fix recognizer initialization callbacks
- fix Embind waveform return type handling
- stabilize acceptWaveform in minified builds
- add optional debug logging
2026-05-12 19:25:31 +02:00
msqr1
166ecc501b Merge pull request #31 from arbdevml/main
Updated documentaion and added a example of x-vector (voice fingerprint)
2025-09-21 22:50:14 -07:00
msqr1
a53ce186d1 Merge pull request #32 from nicknmi/main
Update README.md
2025-09-21 22:46:09 -07:00
nicknmi
9a84d3b898 Update README.md 2025-09-21 22:41:37 -07:00
Alexandro
b41c6eef3d Update Documentation.md 2025-09-17 13:35:26 +00:00
Alexandro
74cec983a3 Create x-vector.html 2025-09-17 12:45:19 +00:00
msqr1
9e36733c10 Remove async 2025-09-01 20:51:18 -07:00
msqr1
5a705a3f56 Fix #29 (fr fr fr) 2025-04-05 22:19:01 -07:00
msqr1
9e5d039ee5 Fix #29 (for real) 2025-04-04 11:19:39 -07:00
msqr1
006199d103 Fix #29 by removing compression altogether 2025-04-03 19:42:02 -07:00
25 changed files with 2277 additions and 665 deletions

3
.gitignore vendored
View File

@@ -6,4 +6,5 @@ openblas
openfst
emsdk
test.js
test.wasm
test.wasm
src/*.o

View File

@@ -9,7 +9,7 @@
| Function/Object | Description |
|-|-|
| ```Promise<Model> createModel(url: string, path: string, id: string)```<br><br>```Promise<SpkModel> createSpkModel(url: string, path: string, id: string)``` | Create a ```Model``` or ```SpkModel```, model files must be directly under the model root, and compressed model must be in ```.tar.gz```/```.tgz``` format. Tar format must be USTAR. If:<br>- ```path``` contains valid model files and ```id``` is the same, there will not be a fetch from ```url```.<br>- ```path``` doesn't contain valid model files, or if it contains valid model files but ```id``` is different, there will be a fetch from ```url```, and the model is stored with ```id```. Models are thread-safe and reusable across recognizers. |
| ```Promise<Recognizer> createRecognizer(model: Model, sampleRate: float)```<br><br>```Promise<Recognizer> createRecognizerWithSpkModel(model: Model, spkModel: spkModel, sampleRate: float)```<br><br>```Promise<Recognizer> createRecognizerWithGrm(model: Model, grammar: string, sampleRate: float)``` | Create a ```Recognizer``` |
| ```Promise<Recognizer> createRecognizer(model: Model, sampleRate: float)```<br><br>```Promise<Recognizer> createRecognizerWithSpkModel(model: Model, sampleRate: float, spkModel: spkModel)```<br><br>```Promise<Recognizer> createRecognizerWithGrm(model: Model, grammar: string, sampleRate: float)``` | Create a ```Recognizer``` |
| ```setLogLevel(lvl: int)``` | Set log level for Kaldi messages (default: ```0```: Info) <br>```-2```: Error<br>```-1```: Warning<br>```1```: Verbose<br>```2```: More verbose<br>```3```: Debug |
| ```Promise<AudioWorkletNode> createTransferer(ctx: AudioContext, bufferSize: int)``` | Create a node that transfer its inputs back to the main thread with custom buffer size (must be multiple of 128). Its port's ```onmessage``` handler can be set to get audio data. Has 1 input with 1 channel and no output. The the higher the size, the lesser the audio breaks up, but the higher the latency. Recomended value is around ```128 * 150```. |
| ```Promise<void> cleanUp()``` | A convenience function that call ```delete()``` on all objects and revoke all URLs. **Run this when you're done!** |
@@ -74,4 +74,4 @@ cd Vosklet/src &&
| INITIAL_MEMORY | Set inital memory, valid suffixes: kb, mb, gb, tb or none (bytes) | ```315mb``` as [recommended](https://alphacephei.com/vosk/models) plus a bit of leeway. This memory will grow if usage exceeds this value. |
| MAX_THREADS | Set the max number of threads (>=1), this should be equal to the number of recognizers used in the program | ```1``` |
| JOBS | Set the number of jobs (threads) when building | ```$(nproc)``` |
| EMSDK | Set EMSDK's path (will install EMSDK in root folder if unset) | ```../emsdk``` |
| EMSDK | Set EMSDK's path (will install EMSDK in root folder if unset) | ```../emsdk``` |

View File

@@ -1,3 +0,0 @@
#### The file Vosklet.js in this folder, used by the examples and the outer [README.md](../README.md), has been set to decompress explicitly using ```DecompressionStream``` (instead of implicit browser decompression) because I can't set a third-party (Github's) server response header. You can utilize this if you run into the same situation. Otherwise, please use the outer Vosklet.js instead.
#### The motivation is that it will work right away when put into a HTML file. You can just make a local copy and try everything out quickly

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -1,154 +0,0 @@
/**
* @fileoverview
* @suppress {undefinedVars|checkTypes}
*/
if(ENVIRONMENT_IS_WEB) {
// 'var' to expose this outside the if
var objs = [];
var events = ['status', 'partialResult', 'result'];
let _cache = caches.open('Vosklet');
let processorURL = URL.createObjectURL(new Blob(['(', (() => {
registerProcessor('VoskletTransferer', class extends AudioWorkletProcessor {
constructor(opts) {
super();
this.filled = 0;
this.bufSize = opts.processorOptions[0];
this.buf = new Float32Array(this.bufSize);
}
process(inputs) {
if(inputs[0][0]) {
this.buf.set(inputs[0][0], this.filled);
this.filled += 128;
if(this.filled >= this.bufSize) {
this.filled = 0;
this.port.postMessage(this.buf, [this.buf.buffer]);
this.buf = new Float32Array(this.bufSize);
}
}
return true;
}
})
}).toString(), ')()'], { type: 'text/javascript' }));
class CommonModel extends EventTarget {
constructor() {
super();
objs.push(this);
}
delete() {
this.obj.delete();
}
static async mk(url, storepath, id, normalMdl) {
let mdl = new CommonModel();
let result = new Promise((resolve, reject) => {
mdl.addEventListener('status', ev => {
if(!ev.detail) {
if(normalMdl) mdl['findWord'] = word => mdl.obj['findWord'](word)
resolve(mdl)
}
else reject(ev.detail)
}, { once: true })
});
let cache = await caches.open('Vosklet');
let req = (await cache.keys(storepath, { ignoreSearch: true }))[0]
let tar, res;
if (typeof req == 'undefined' || req.url.split('?')[1] != id) {
// Caching already handled explicitly
res = await fetch(url, { cache: 'no-store' });
if (!res.ok) throw 'Unable to fetch model, status: ' + res.status;
await cache.put(storepath + '?' + id, res.clone());
}
else res = await cache.match(req)
tar = await new Response(res.body.pipeThrough(new DecompressionStream('gzip'))).arrayBuffer();
let tarStart = _malloc(tar.byteLength);
HEAPU8.set(new Uint8Array(tar), tarStart);
mdl.obj = new Module['CommonModel'](objs.length - 1, normalMdl, tarStart, tar.byteLength);
return result;
}
}
class Recognizer extends EventTarget {
constructor() {
super();
// Closure workaround to prevent acceptWaveform from getting removed
this['acceptWaveform'] = audioData => {
let start = _malloc(audioData.length * 4);
HEAPF32.set(audioData, start / 4);
this.obj['acceptWaveform'](start, audioData.length);
}
objs.push(this);
return new Proxy(this, {
get(self, prop, _) {
if(self[prop] == undefined && self.obj[prop] == undefined) return;
let p = self[prop];
if(p) return p.bind ? p.bind(self) : p;
p = self.obj[prop];
return p.bind ? p.bind(self.obj) : p;
}
})
}
async delete(processCurrent = false) {
let result = new Promise((resolve, _) => this.addEventListener('status', _ => {
this.obj.delete();
resolve();
}, { once: true }));
this.obj['safeDelete'](processCurrent);
return result;
}
static async mk(model, sampleRate, mode, grammar, spkModel) {
let rec = new Recognizer();
let result = new Promise((resolve, reject) => {
rec.addEventListener('status', ev => {
if(!ev.detail) resolve(rec);
else reject(ev.detail);
}, { once: true });
})
switch(mode) {
case 1:
rec.obj = new Module['Recognizer'](objs.length - 1, sampleRate, model);
break;
case 2:
rec.obj = new Module['Recognizer'](objs.length -1, sampleRate, model, spkModel);
break;
default:
rec.obj = new Module['Recognizer'](objs.length - 1, sampleRate, model, grammar, 0);
}
return result;
}
}
Module = {
'getModelCache': () => _cache,
'cleanUp': async () => {
for(let obj of objs) await obj.delete();
URL.revokeObjectURL(processorURL);
},
'createTransferer': async (ctx, bufSize) => {
await ctx.audioWorklet.addModule(processorURL);
return new AudioWorkletNode(ctx, 'VoskletTransferer', {
channelCountMode: 'explicit',
numberOfInputs: 1,
numberOfOutputs: 0,
channelCount: 1,
processorOptions: [bufSize]
});
},
'createModel': (url, storepath, id) =>
CommonModel.mk(url, storepath, id, true),
'createSpkModel': (url, storepath, id) =>
CommonModel.mk(url, storepath, id, false),
'createRecognizer': (model, sampleRate) =>
Recognizer.mk(model.obj, sampleRate, 1),
'createRecognizerWithGrm': (model, sampleRate, grammar) =>
Recognizer.mk(model.obj, sampleRate, 3, grammar, null),
'createRecognizerWithSpkModel': (model, sampleRate, spkModel) =>
Recognizer.mk(model.obj, sampleRate, 2, null, spkModel.obj)
}
}

View File

@@ -1,21 +1,21 @@
<!DOCTYPE html>
<html>
<head>
<script src="https://cdn.jsdelivr.net/gh/msqr1/Vosklet@1.2.1/Examples/Vosklet.js" async defer></script>
<script src="../Vosklet.js" async defer></script>
<script>
async function start() {
// All data is collected and transfered to the main thread so the AudioContext won't output anything. Set sinkId type to none to save power
let ctx = new AudioContext({sinkId: {type: "none"}});
let module = await loadVosklet();
let model = await module.createModel("https://ccoreilly.github.io/vosk-browser/models/vosk-model-small-en-us-0.15.tar.gz", "English", "vosk-model-small-en-us-0.15");
let recognizer = await module.createRecognizer(model, ctx.sampleRate);
// Listen for result and partial result
recognizer.addEventListener("result", ev => console.log("Result: ", ev.detail));
recognizer.addEventListener("partialResult", ev => console.log("Partial result: ", ev.detail));
// Fetch, decode, and recognize the .wav
let wav = await fetch("https://cdn.jsdelivr.net/gh/msqr1/Vosklet/examples/1to10-en.wav");
let wav = await fetch("https://cdn.jsdelivr.net/gh/msqr1/Vosklet/Examples/1to10-en.wav");
let audioBuf = await ctx.decodeAudioData(await wav.arrayBuffer());
recognizer.acceptWaveform(audioBuf.getChannelData(0));
}

View File

@@ -1,42 +1,45 @@
<!DOCTYPE html>
<html>
<head>
<script src="https://cdn.jsdelivr.net/gh/msqr1/Vosklet@1.2.1/Examples/Vosklet.js" async defer></script>
<script>
async function start() {
// All data is collected and transfered to the main thread so the AudioContext won't output anything. Set sinkId type to none to save power
let ctx = new AudioContext({sinkId: {type: "none"}});
// Setup microphone
let micNode = ctx.createMediaStreamSource(await navigator.mediaDevices.getUserMedia({
video: false,
audio: {
echoCancellation: true,
noiseSuppression: true,
channelCount: 1
},
}));
<head>
<script src="../Vosklet.js" async defer></script>
<script>
async function start() {
// Load Vosklet module, model and recognizer
let module = await loadVosklet();
let model = await module.createModel("https://ccoreilly.github.io/vosk-browser/models/vosk-model-small-en-us-0.15.tar.gz", "English", "vosk-model-small-en-us-0.15");
let recognizer = await module.createRecognizer(model, ctx.sampleRate);
// All data is collected and transfered to the main thread so the AudioContext won't output anything. Set sinkId type to none to save power
let ctx = new AudioContext({ sinkId: { type: "none" } });
// Listen for result and partial result
recognizer.addEventListener("result", ev => console.log("Result: ", ev.detail));
recognizer.addEventListener("partialResult", ev => console.log("Partial result: ", ev.detail));
// Setup microphone
let micNode = ctx.createMediaStreamSource(await navigator.mediaDevices.getUserMedia({
video: false,
audio: {
echoCancellation: true,
noiseSuppression: true,
channelCount: 1
},
}));
// Create a transferer node to get audio data on the main thread
let transferer = await module.createTransferer(ctx, 128 * 150);
// Load Vosklet module, model and recognizer
let module = await loadVosklet();
let model = await module.createModel("https://ccoreilly.github.io/vosk-browser/models/vosk-model-small-en-us-0.15.tar.gz", "English", "vosk-model-small-en-us-0.15");
let recognizer = await module.createRecognizer(model, ctx.sampleRate);
// Recognize data on arrival
transferer.port.onmessage = ev => recognizer.acceptWaveform(ev.data);
// Listen for result and partial result
recognizer.addEventListener("result", ev => console.log("Result: ", ev.detail));
recognizer.addEventListener("partialResult", ev => console.log("Partial result: ", ev.detail));
// Create a transferer node to get audio data on the main thread
let transferer = await module.createTransferer(ctx, 128 * 150);
// Recognize data on arrival
transferer.port.onmessage = ev => recognizer.acceptWaveform(ev.data);
// Connect transferer to microphone
micNode.connect(transferer);
}
</script>
<!-- Start and create audio context only as a result of user's action -->
<button onclick="start()">Start</button>
</head>
// Connect transferer to microphone
micNode.connect(transferer);
}
</script>
<!-- Start and create audio context only as a result of user's action -->
<button onclick="start()">Start</button>
</head>
</html>

94
Examples/x-vector.html Normal file
View File

@@ -0,0 +1,94 @@
<!DOCTYPE html>
<html lang="en">
<head>
<script src="https://cdn.jsdelivr.net/gh/msqr1/Vosklet@1.2.1/Examples/Vosklet.js" async defer></script>
<script>
async function start() {
console.log("Starting speech recognition...");
let ctx = new AudioContext({sinkId: {type: "none"}});
let micNode = ctx.createMediaStreamSource(await navigator.mediaDevices.getUserMedia({
video: false,
audio: {
echoCancellation: true,
noiseSuppression: true,
channelCount: 1
},
}));
console.log("Microphone connected.");
let module;
try {
module = await loadVosklet();
console.log("Vosklet module loaded.");
} catch (error) {
console.error("Error loading Vosklet module:", error);
return;
}
let model;
try {
model = await module.createModel(
"https://ccoreilly.github.io/vosk-browser/models/vosk-model-small-en-us-0.15.tar.gz",
"English",
"vosk-model-small-en-us-0.15"
);
console.log("Speech recognition model loaded.");
} catch (error) {
console.error("Error loading speech recognition model:", error);
return;
}
let spkModel;
try {
spkModel = await module.createSpkModel(
"https://arbdevml.github.io/x-vector/vosk-model-spk-0.4.tar.gz",
"vosk-model-spk-0.4",
"vosk-model-spk-0.4"
);
console.log("Speaker identification model loaded.");
} catch (error) {
console.error("Error loading speaker identification model:", error);
return;
}
let recognizer;
try {
recognizer = await module.createRecognizerWithSpkModel(model, ctx.sampleRate, spkModel);
console.log("Recognizer with speaker model created.");
} catch (error) {
console.error("Error creating recognizer with speaker model:", error);
return;
}
recognizer.addEventListener("result", ev => {
const result = JSON.parse(ev.detail);
console.log("typeof ev.detail:", typeof ev.detail);
console.log("typeof result:", typeof result);
console.log("Speech recognized:", result);
console.log("spk:", result.spk);
console.log("ev:", ev);
});
recognizer.addEventListener("partialResult", ev => {
console.log("Partial result:", ev.detail);
});
let transferer;
try {
transferer = await module.createTransferer(ctx, 128 * 150);
console.log("Transferer created.");
} catch (error) {
console.error("Error creating transferer:", error);
return;
}
transferer.port.onmessage = ev => recognizer.acceptWaveform(ev.data);
micNode.connect(transferer);
console.log("Microphone data connected to Vosklet.");
}
</script>
</head>
<body>
<button onclick="start()">Start</button>
</body>
</html>

View File

@@ -1,7 +1,7 @@
# Overview
- A fast, lightweight, actively maintained speech recognizer in the browser with total brotlied (used by JSDelivr) size of **under a megabyte** (614 KB)
- Live Demo (ASR in 20 languages): https://msqr1-github-io.pages.dev/Vosklet
- Inspired by [vosk-browser](https://github.com/ccoreilly/vosk-browser)
- Inspired by vosk-browser by [ccoreilly](https://github.com/ccoreilly)
# Documentation
- See [Documentation.md](Documentation.md)

66
Vosklet.d.ts vendored Normal file
View File

@@ -0,0 +1,66 @@
// vosklet.d.ts
declare global {
interface Window {
loadVosklet(): Promise<Module>;
}
}
export type EpMode =
| "ANSWER_DEFAULT"
| "DISABLED"
| "FAST"
| "MEDIUM"
| "SLOW";
export interface Module {
createModel(url: string, path: string, id: string): Promise<Model>;
createSpkModel(url: string, path: string, id: string): Promise<SpkModel>;
createRecognizer(model: Model, sampleRate: number): Promise<Recognizer>;
createRecognizerWithSpkModel(
model: Model,
spkModel: SpkModel,
sampleRate: number
): Promise<Recognizer>;
createRecognizerWithGrm(
model: Model,
grammar: string,
sampleRate: number
): Promise<Recognizer>;
setLogLevel(level: number): void;
createTransferer(
ctx: AudioContext,
bufferSize: number
): Promise<AudioWorkletNode>;
cleanUp(): Promise<void>;
getModelCache(): Promise<Cache>;
EpMode: EpMode;
}
export interface Model {
findWord(word: string): number;
delete(): void;
}
export interface SpkModel {
delete(): void;
}
export interface Recognizer extends EventTarget {
acceptWaveform(audioData: Float32Array): void;
setWords(words: boolean): void;
setPartialWords(partialWords: boolean): void;
setNLSML(nlsml: boolean): void;
setMaxAlternatives(alts: number): void;
setGrm(grammar: string): void;
setSpkModel(model: SpkModel): void;
setEndpointerMode(mode: EpMode): void;
setEndpointerDelays(
tStartMax: number,
tEnd: number,
tMax: number
): void;
delete(processCurrent?: boolean): Promise<void>;
}

1597
Vosklet.js

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -4,32 +4,32 @@
#include <emscripten/bind.h>
using namespace emscripten;
EMSCRIPTEN_BINDINGS() {
EMSCRIPTEN_BINDINGS()
{
function("setLogLevel", &vosk_set_log_level);
enum_<VoskEndpointerMode>("EpMode")
.value("ANSWER_DEFAULT", VOSK_EP_ANSWER_DEFAULT)
.value("ANSWER_SHORT", VOSK_EP_ANSWER_SHORT)
.value("ANSWER_LONG", VOSK_EP_ANSWER_LONG)
.value("ANSWER_VERY_LONG", VOSK_EP_ANSWER_VERY_LONG);
.value("ANSWER_DEFAULT", VOSK_EP_ANSWER_DEFAULT)
.value("ANSWER_SHORT", VOSK_EP_ANSWER_SHORT)
.value("ANSWER_LONG", VOSK_EP_ANSWER_LONG)
.value("ANSWER_VERY_LONG", VOSK_EP_ANSWER_VERY_LONG);
class_<CommonModel>("CommonModel")
.constructor<int, bool, int, int>(return_value_policy::take_ownership())
.function("findWord", &CommonModel::findWord);
.constructor<int, bool, int, int>(return_value_policy::take_ownership())
.function("findWord", &CommonModel::findWord);
class_<Recognizer>("Recognizer")
.constructor<int, float, CommonModel*>(return_value_policy::take_ownership())
.constructor<int, float, CommonModel*, CommonModel*>(return_value_policy::take_ownership())
.constructor<int, float, CommonModel*, const std::string&, int>(return_value_policy::take_ownership())
.function("safeDelete", &Recognizer::safeDelete)
.function("acceptWaveform", &Recognizer::acceptWaveform)
.function("reset", &Recognizer::reset)
.function("setEndpointerMode", &Recognizer::setEndpointerMode)
.function("setEndpointerDelays", &Recognizer::setEndpointerDelays)
.function("setWords", &Recognizer::setWords)
.function("setPartialWords", &Recognizer::setPartialWords)
.function("setGrm", &Recognizer::setGrm)
.function("setNLSML", &Recognizer::setNLSML)
.function("setSpkModel", &Recognizer::setSpkModel, allow_raw_pointers())
.function("setMaxAlternatives", &Recognizer::setMaxAlternatives);
.constructor<int, float, CommonModel *>(return_value_policy::take_ownership())
.constructor<int, float, CommonModel *, CommonModel *>(return_value_policy::take_ownership())
.constructor<int, float, CommonModel *, const std::string &, int>(return_value_policy::take_ownership())
.function("acceptWaveform", &Recognizer::acceptWaveform, allow_raw_pointers())
.function("reset", &Recognizer::reset)
.function("setEndpointerMode", &Recognizer::setEndpointerMode)
.function("setEndpointerDelays", &Recognizer::setEndpointerDelays)
.function("setWords", &Recognizer::setWords)
.function("setPartialWords", &Recognizer::setPartialWords)
.function("setGrm", &Recognizer::setGrm)
.function("setNLSML", &Recognizer::setNLSML)
.function("setSpkModel", &Recognizer::setSpkModel, allow_raw_pointers())
.function("setMaxAlternatives", &Recognizer::setMaxAlternatives);
};

View File

@@ -1,19 +1,19 @@
#include "CommonModel.h"
#include "FireEv.h"
CommonModel::CommonModel(int index, bool normalMdl, int tarStart, int tarSize) :
normalMdl{normalMdl}, index{index}
CommonModel::CommonModel(int index, bool normalMdl, int tarStart, int tarSize) : normalMdl{normalMdl}, index{index}
{
globalPool.exec([this, tarStart, tarSize]{
extractAndLoad(reinterpret_cast<unsigned char*>(tarStart), tarSize);
});
extractAndLoad(reinterpret_cast<unsigned char *>(tarStart), tarSize);
}
void CommonModel::extractAndLoad(unsigned char* tar, int tarSize) {
void CommonModel::extractAndLoad(unsigned char *tar, int tarSize)
{
// Map index onto [A-Z]
const char storepath[3]{'/', static_cast<char>(index % 26 + 'A')};
int res{untar(tar, tarSize, storepath)};
free(tar);
const char* untarErr{};
switch(res) {
const char *untarErr{};
switch (res)
{
case IncorrectFormat:
untarErr = "Untar: Incorrect tar format, must be USTAR";
break;
@@ -29,20 +29,29 @@ void CommonModel::extractAndLoad(unsigned char* tar, int tarSize) {
case FailedClose:
untarErr = "Untar: Unable to close file after write";
};
if(untarErr != nullptr) {
fireEv(index, Event::status, untarErr);
if (untarErr != nullptr)
{
fireEv(index, untarErr);
return;
}
if(normalMdl) mdl = vosk_model_new(storepath);
else mdl = vosk_spk_model_new(storepath);
if(normalMdl ? std::get<VoskModel*>(mdl) != nullptr : std::get<VoskSpkModel*>(mdl) != nullptr) fireEv(index, status);
else fireEv(index, status, "Unable to load model for recognition");
if (normalMdl)
mdl = vosk_model_new(storepath);
else
mdl = vosk_spk_model_new(storepath);
if (normalMdl ? std::get<VoskModel *>(mdl) != nullptr : std::get<VoskSpkModel *>(mdl) != nullptr)
fireEv(index);
else
fireEv(index, "Unable to load model for recognition");
fs::remove_all(storepath);
}
int CommonModel::findWord(std::string word) {
return vosk_model_find_word(std::get<VoskModel*>(mdl), word.c_str());
int CommonModel::findWord(std::string word)
{
return vosk_model_find_word(std::get<VoskModel *>(mdl), word.c_str());
}
CommonModel::~CommonModel() {
if(normalMdl) vosk_model_free(std::get<VoskModel*>(mdl));
else vosk_spk_model_free(std::get<VoskSpkModel*>(mdl));
CommonModel::~CommonModel()
{
if (normalMdl)
vosk_model_free(std::get<VoskModel *>(mdl));
else
vosk_spk_model_free(std::get<VoskSpkModel *>(mdl));
}

18
src/FireEv.cc Normal file
View File

@@ -0,0 +1,18 @@
#include <emscripten.h>
EM_JS(void, fireEv, (int idx, const char *msgPtr), {
const msg = msgPtr ? UTF8ToString(msgPtr) : null;
const obj =
globalThis.__voskletObjs &&
globalThis.__voskletObjs[idx];
if (!obj) {
console.error("fireEv: unknown object index", idx);
return;
}
obj.dispatchEvent(new CustomEvent("", {
detail: msg
}));
});

12
src/FireEv.h Normal file
View File

@@ -0,0 +1,12 @@
// FireEv.h
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
void fireEv(int idx, const char *msgPtr);
#ifdef __cplusplus
}
#endif

View File

@@ -1,58 +0,0 @@
diff --git a/src/configure b/src/configure
index fc3aee6..c93e4e8 100755
--- a/src/configure
+++ b/src/configure
@@ -1261,14 +1261,7 @@ or try another math library, e.g. --mathlib=OPENBLAS (Kaldi may be slower)."
** You can also use other matrix algebra libraries. For information, see:
** http://kaldi-asr.org/doc/matrixwrap.html"
fi
- if [ -f $OPENBLASROOT/lib/libopenblas.so ]; then
- OPENBLASLIBDIR=$OPENBLASROOT/lib
- elif [ -f $OPENBLASROOT/lib64/libopenblas.so ]; then
- # in REDHAT/CentOS package installs, the library is located here
- OPENBLASLIBDIR=$OPENBLASROOT/lib64
- else
- failure "Expected to find the file $OPENBLASROOT/lib/libopenblas.so"
- fi
+ OPENBLASLIBDIR=$OPENBLASROOT/lib
if [ -f $OPENBLASROOT/include/cblas.h ] ; then
OPENBLASINCDIR=$OPENBLASROOT/include
elif [ -f $OPENBLASROOT/include/openblas/cblas.h ] ; then
diff --git a/src/ivector/ivector-extractor.cc b/src/ivector/ivector-extractor.cc
index c3a1222..71d3725 100644
--- a/src/ivector/ivector-extractor.cc
+++ b/src/ivector/ivector-extractor.cc
@@ -195,7 +195,7 @@ void IvectorExtractor::ComputeDerivedVars() {
// could because some tasks finish before others.
{
TaskSequencerConfig sequencer_opts;
- sequencer_opts.num_threads = g_num_threads;
+ sequencer_opts.num_threads = 0;
TaskSequencer<IvectorExtractorComputeDerivedVarsClass> sequencer(
sequencer_opts);
for (int32 i = 0; i < NumGauss(); i++)
diff --git a/src/matrix/Makefile b/src/matrix/Makefile
index 398179a..c903fbf 100644
--- a/src/matrix/Makefile
+++ b/src/matrix/Makefile
@@ -10,7 +10,6 @@ include ../kaldi.mk
# you can uncomment matrix-lib-speed-test if you want to do the speed tests.
-TESTFILES = matrix-lib-test sparse-matrix-test numpy-array-test #matrix-lib-speed-test
OBJFILES = kaldi-matrix.o kaldi-vector.o packed-matrix.o sp-matrix.o tp-matrix.o \
matrix-functions.o qr.o srfft.o compressed-matrix.o \
diff --git a/src/util/kaldi-thread.cc b/src/util/kaldi-thread.cc
index 4573e24..4af4e73 100644
--- a/src/util/kaldi-thread.cc
+++ b/src/util/kaldi-thread.cc
@@ -22,7 +22,7 @@
#include "util/kaldi-thread.h"
namespace kaldi {
-int32 g_num_threads = 4; // Initialize this global variable.
+int32 g_num_threads = 1; // Initialize this global variable.
MultiThreadable::~MultiThreadable() {
// default implementation does nothing

View File

@@ -1,102 +1,88 @@
#include "Recognizer.h"
#include "FireEv.h"
#include <atomic>
const char* recognizerInitErr{"Unable to initialize recognizer"};
Recognizer::Recognizer(int index, float sampleRate, CommonModel* model) :
rec{vosk_recognizer_new(std::get<VoskModel*>(model->mdl), sampleRate)} {
if(rec == nullptr) fireEv(index, Event::status, recognizerInitErr);
else globalPool.exec([this, index]{main(index);});
const char *recognizerInitErr{"Unable to initialize recognizer"};
Recognizer::Recognizer(int index, float sampleRate, CommonModel *model) : rec{vosk_recognizer_new(std::get<VoskModel *>(model->mdl), sampleRate)}
{
if (rec == nullptr)
fireEv(index, recognizerInitErr);
else
fireEv(index);
}
Recognizer::Recognizer(int index, float sampleRate, CommonModel* model, CommonModel* spkModel) :
rec{vosk_recognizer_new_spk(std::get<VoskModel*>(model->mdl), sampleRate, std::get<VoskSpkModel*>(spkModel->mdl))} {
if(rec == nullptr) fireEv(index, Event::status, recognizerInitErr);
else globalPool.exec([this, index]{main(index);});
Recognizer::Recognizer(int index, float sampleRate, CommonModel *model, CommonModel *spkModel) : rec{vosk_recognizer_new_spk(std::get<VoskModel *>(model->mdl), sampleRate, std::get<VoskSpkModel *>(spkModel->mdl))}
{
if (rec == nullptr)
fireEv(index, recognizerInitErr);
else
fireEv(index);
}
Recognizer::Recognizer(int index, float sampleRate, CommonModel* model, const std::string& grm, int) :
rec{vosk_recognizer_new_grm(std::get<VoskModel*>(model->mdl), sampleRate, grm.c_str())} {
if(rec == nullptr) fireEv(index, Event::status, recognizerInitErr);
else globalPool.exec([this, index]{main(index);});
Recognizer::Recognizer(int index, float sampleRate, CommonModel *model, const std::string &grm, int) : rec{vosk_recognizer_new_grm(std::get<VoskModel *>(model->mdl), sampleRate, grm.c_str())}
{
if (rec == nullptr)
fireEv(index, recognizerInitErr);
else
fireEv(index);
}
void Recognizer::safeDelete(bool _processCurrent) {
emscripten_atomic_store_u8(&processCurrent, _processCurrent);
emscripten_atomic_store_u8(&done, true);
emscripten_atomic_store_u32(&haveData, true);
emscripten_atomic_notify(&haveData, 1);
}
void Recognizer::main(int index) {
fireEv(index, Event::status);
AudioData* next;
while(!emscripten_atomic_load_u8(&done)) {
if(dataQ.empty()) {
emscripten_atomic_store_u32(&haveData, false);
emscripten_atomic_wait_u32(&haveData, false, -1);
}
else {
next = &dataQ.front();
switch(vosk_recognizer_accept_waveform_f(rec, next->data, next->len)) {
case 0:
fireEv(index, Event::partialResult, vosk_recognizer_partial_result(rec));
break;
case 1:
fireEv(index, Event::result, vosk_recognizer_result(rec));
}
free(next->data);
dataQ.pop();
}
std::string Recognizer::acceptWaveform(int start, int len)
{
const char *res = nullptr;
switch (vosk_recognizer_accept_waveform_f(
rec,
reinterpret_cast<float *>(start),
len))
{
case 0:
res = vosk_recognizer_partial_result(rec);
break;
case 1:
res = vosk_recognizer_result(rec);
break;
default:
res = "";
break;
}
if(emscripten_atomic_load_u8(&processCurrent)) {
while(!dataQ.empty()) {
free(dataQ.front().data);
dataQ.pop();
}
}
else {
while(!dataQ.empty()) {
next = &dataQ.front();
switch(vosk_recognizer_accept_waveform_f(rec, next->data, next->len)) {
case 0: [[likely]]
fireEv(index, Event::partialResult, vosk_recognizer_partial_result(rec));
break;
case 1: [[unlikely]]
fireEv(index, Event::result, vosk_recognizer_result(rec));
}
free(next->data);
dataQ.pop();
}
}
fireEv(index, Event::result, vosk_recognizer_final_result(rec));
vosk_recognizer_free(rec);
fireEv(index, Event::status);
return res ? std::string(res) : std::string();
}
void Recognizer::acceptWaveform(int start, int len) {
dataQ.emplace(start, len);
emscripten_atomic_store_u32(&haveData, true);
emscripten_atomic_notify(&haveData, 1);
}
void Recognizer::reset() {
void Recognizer::reset()
{
vosk_recognizer_reset(rec);
}
void Recognizer::setEndpointerMode(VoskEndpointerMode mode) {
void Recognizer::setEndpointerMode(VoskEndpointerMode mode)
{
vosk_recognizer_set_endpointer_mode(rec, mode);
}
void Recognizer::setEndpointerDelays(float tStartMax, float tEnd, float tMax) {
void Recognizer::setEndpointerDelays(float tStartMax, float tEnd, float tMax)
{
vosk_recognizer_set_endpointer_delays(rec, tStartMax, tEnd, tMax);
}
void Recognizer::setGrm(const std::string& grm) {
void Recognizer::setGrm(const std::string &grm)
{
vosk_recognizer_set_grm(rec, grm.c_str());
}
void Recognizer::setSpkModel(CommonModel* spkModel) {
vosk_recognizer_set_spk_model(rec, std::get<VoskSpkModel*>(spkModel->mdl));
void Recognizer::setSpkModel(CommonModel *spkModel)
{
vosk_recognizer_set_spk_model(rec, std::get<VoskSpkModel *>(spkModel->mdl));
}
void Recognizer::setWords(bool words) {
void Recognizer::setWords(bool words)
{
vosk_recognizer_set_words(rec, words);
}
void Recognizer::setPartialWords(bool partialWords) {
void Recognizer::setPartialWords(bool partialWords)
{
vosk_recognizer_set_partial_words(rec, partialWords);
}
void Recognizer::setNLSML(bool nlsml) {
void Recognizer::setNLSML(bool nlsml)
{
vosk_recognizer_set_nlsml(rec, nlsml);
}
void Recognizer::setMaxAlternatives(int alts) {
void Recognizer::setMaxAlternatives(int alts)
{
vosk_recognizer_set_max_alternatives(rec, alts);
}

View File

@@ -1,25 +1,22 @@
#pragma once
#include "CommonModel.h"
#include <queue>
// Prevent naming conflicts with Vosk's Recognizer class
#define Recognizer Recognizer_
struct Recognizer {
int haveData{};
bool processCurrent{};
bool done{};
VoskRecognizer* rec;
std::queue<AudioData> dataQ;
Recognizer(int index, float sampleRate, CommonModel* model);
Recognizer(int index, float sampleRate, CommonModel* model, CommonModel* spkModel);
Recognizer(int index, float sampleRate, CommonModel* model, const std::string& grm, int);
void main(int index);
void safeDelete(bool _processCurrent);
void acceptWaveform(int start, int len);
struct Recognizer
{
int index;
VoskRecognizer *rec;
Recognizer(int index, float sampleRate, CommonModel *model);
Recognizer(int index, float sampleRate, CommonModel *model, CommonModel *spkModel);
Recognizer(int index, float sampleRate, CommonModel *model, const std::string &grm, int);
std::string acceptWaveform(int start, int len);
void reset();
void setEndpointerMode(VoskEndpointerMode mode);
void setEndpointerDelays(float tStartMax, float tEnd, float tMax);
void setSpkModel(CommonModel* model);
void setGrm(const std::string& grm);
void setSpkModel(CommonModel *model);
void setGrm(const std::string &grm);
void setWords(bool words);
void setPartialWords(bool partialWords);
void setNLSML(bool nlsml);

View File

@@ -1,108 +1,57 @@
#include "Util.h"
#include "emscripten/wasm_worker.h"
#include "emscripten/em_js.h"
#include <cstring>
EM_JS(void, fireEv, (int idx, int typeIdx, const char* content), {
if(ENVIRONMENT_IS_WEB) objs[idx].dispatchEvent(new CustomEvent(events[typeIdx], {
"detail": content == 0 ? null : UTF8ToString(content)
}));
else self.postMessage([idx, typeIdx, content]);
});
int untar(unsigned char* tar, int tarSize, const char* storepath) {
if(std::memcmp(tar + 257, "ustar", 5)) return IncorrectFormat;
int untar(unsigned char *tar, int tarSize, const char *storepath)
{
if (std::memcmp(tar + 257, "ustar", 5))
return IncorrectFormat;
size_t size{};
std::string path;
path.reserve(100); // Max length
unsigned char* end = tar + tarSize;
while(tar <= end) {
if(tar[156] != '5' && tar[156] != 0 &&
tar[156] != '0') {
unsigned char *end = tar + tarSize;
while (tar <= end)
{
if (tar[156] != '5' && tar[156] != 0 &&
tar[156] != '0')
{
return IncorrectFiletype;
}
path.clear();
path += reinterpret_cast<char*>(tar + 345);
path += reinterpret_cast<char*>(tar);
path += reinterpret_cast<char *>(tar + 345);
path += reinterpret_cast<char *>(tar);
tar += 124;
for(int i{0}; i < 11; i++) {
for (int i{0}; i < 11; i++)
{
size *= 8;
size += *tar - 48;
tar++;
}
tar += 377;
size_t firstSlash = path.find_first_of("/");
if(firstSlash == std::string::npos) {
if(size != 0) tar += size + 512 - size % 512;
if (firstSlash == std::string::npos)
{
if (size != 0)
tar += size + 512 - size % 512;
continue;
}
path = storepath + path.substr(firstSlash);
std::ofstream file;
if(size == 0) fs::create_directory(path);
else {
if (size == 0)
fs::create_directory(path);
else
{
file.open(path, std::ios::trunc | std::ios::binary);
if(!file) return FailedOpen;
if(!file.write(reinterpret_cast<char*>(tar), size)) return FailedWrite;
if (!file)
return FailedOpen;
if (!file.write(reinterpret_cast<char *>(tar), size))
return FailedWrite;
file.close();
if(!file) return FailedClose;
if (!file)
return FailedClose;
tar += size + 512 - size % 512;
}
}
return Successful;
}
void workerStartup(int _pool) {
WorkerPool& pool{*reinterpret_cast<WorkerPool*>(_pool)};
std::function<void()> fn;
while(!pool.done) {
// Wait until unlocked
emscripten_atomic_wait_u32(&pool.qLock, true, -1);
if(pool.done) break;
// If there is no task then everyone has to wait until there is more
if(pool.taskQ.empty()) {
emscripten_atomic_store_u32(&pool.qLock, true);
continue;
}
// If this locks, the returned (loaded) value will be false, and we move on
if(emscripten_atomic_cas_u32(&pool.qLock, false, true)) continue;
fn = pool.taskQ.front();
pool.taskQ.pop();
// Unlock
emscripten_atomic_store_u32(&pool.qLock, false);
emscripten_atomic_notify(&pool.qLock, 1);
fn();
}
}
using _startupFn = void(*)(int);
EM_JS(void, startupWorkers, (_startupFn startupFn, WorkerPool* pool), {
for(let worker of Object.values(_wasmWorkers)) {
worker.postMessage({
"_wsc": startupFn,
"x": [ pool ]
});
worker.onmessage = msg => fireEv(...msg.data);
}
})
constexpr int workerStack{32768};
std::array<std::byte, MAX_WORKERS * workerStack> stacks;
WorkerPool::WorkerPool() {
for(int i{}; i < MAX_WORKERS; ++i) {
emscripten_create_wasm_worker(&stacks[i * workerStack], workerStack);
}
startupWorkers(workerStartup, this);
}
#undef MAX_WORKERS
WorkerPool::~WorkerPool() {
// LTO will remove the EM_JS definition for some reason if it isn't called in the same translation unit (I get undefined symbols), even though it is annotated with EMSCRIPTEN_KEEPALIVE. "Call" it here (this destructor is never called) to workaround that. I'm going to file an issue on Emscripten
fireEv(0, 0);
/*
done = true;
emscripten_atomic_store_u32(&qLock, false);
emscripten_atomic_notify(&qLock, -1);
emscripten_terminate_all_wasm_workers();
*/
}
void WorkerPool::exec(std::function<void()> fn) {
taskQ.emplace(fn);
emscripten_atomic_store_u32(&qLock, false);
emscripten_atomic_notify(&qLock, 1);
}
WorkerPool globalPool;
}

View File

@@ -3,20 +3,15 @@
#include <variant>
#include <fstream>
#include <functional>
#include <queue>
#include "emscripten/atomic.h"
#include "emscripten/console.h"
namespace fs = std::filesystem;
enum Event {
// Shared
status,
// Recognizer
partialResult,
result,
};
enum UntarStatus {
enum UntarStatus
{
Successful,
IncorrectFormat,
IncorrectFiletype,
@@ -24,25 +19,7 @@ enum UntarStatus {
FailedWrite,
FailedClose
};
struct AudioData {
float* data;
int len;
AudioData(int start, int len) : data{reinterpret_cast<float*>(start)}, len{len} {}
};
#ifndef MAX_WORKERS
#define MAX_WORKERS 1
#endif
struct WorkerPool {
bool qLock{true}; // True is locked, false is unlocked
bool done{};
std::queue<std::function<void()>> taskQ;
WorkerPool();
~WorkerPool();
void exec(std::function<void()> fn);
};
extern "C" void fireEv(int idx, int typeIdx, const char* content = nullptr);
extern "C" void fireEv(int idx, const char *content = nullptr);
int untar(unsigned char* tar, int tarSize, const char* storepath);
extern WorkerPool globalPool;
int untar(unsigned char *tar, int tarSize, const char *storepath);

View File

@@ -80,7 +80,7 @@ index 035ffee..18edcd3 100644
#endif
diff --git a/src/recognizer.cc b/src/recognizer.cc
index 55d9991..c111038 100644
index 55d9991..44ac113 100644
--- a/src/recognizer.cc
+++ b/src/recognizer.cc
@@ -17,6 +17,7 @@
@@ -98,8 +98,7 @@ index 55d9991..c111038 100644
-
+const v128_t _32768fx4{wasm_f32x4_const_splat(32768.0f)};
bool Recognizer::AcceptWaveform(const float *fdata, int len)
-{
+{
{
Vector<BaseFloat> wave;
wave.Resize(len, kUndefined);
- for (int i = 0; i < len; i++)
@@ -116,3 +115,12 @@ index 55d9991..c111038 100644
return AcceptWaveform(wave);
}
@@ -844,7 +852,7 @@ const char* Recognizer::PartialResult()
clat = decoder_->GetLattice(decoder_->NumFramesInLattice(), false);
if (model_->winfo_) {
- WordAlignLatticePartial(clat, *model_->trans_model_, *model_->winfo_, 0, &aligned_lat);
+ WordAlignLattice(clat, *model_->trans_model_, *model_->winfo_, 0, &aligned_lat);
} else {
CopyLatticeForMbr(clat, &aligned_lat);
}

View File

@@ -3,155 +3,270 @@
* @suppress {undefinedVars|checkTypes}
*/
if(ENVIRONMENT_IS_WEB) {
if (ENVIRONMENT_IS_WEB) {
// 'var' to expose this outside the if
var objs = [];
var events = ['status', 'partialResult', 'result'];
let _cache = caches.open('Vosklet');
let processorURL = URL.createObjectURL(new Blob(['(', (() => {
registerProcessor('VoskletTransferer', class extends AudioWorkletProcessor {
constructor(opts) {
super();
this.filled = 0;
this.bufSize = opts.processorOptions[0];
this.buf = new Float32Array(this.bufSize);
// 'var' to expose this outside the if
var objs = [];
globalThis.__voskletObjs = objs;
const VOSKLET_DEBUG = false;
function voskLog(...args) {
if (VOSKLET_DEBUG) {
console.log(...args);
}
process(inputs) {
if(inputs[0][0]) {
this.buf.set(inputs[0][0], this.filled);
this.filled += 128;
if(this.filled >= this.bufSize) {
this.filled = 0;
this.port.postMessage(this.buf, [this.buf.buffer]);
this.buf = new Float32Array(this.bufSize);
}
}
function voskWarn(...args) {
if (VOSKLET_DEBUG) {
console.warn(...args);
}
}
var events = ['status', 'partialResult', 'result'];
let _cache = caches.open('Vosklet');
let processorURL = URL.createObjectURL(new Blob(['(', (() => {
registerProcessor('VoskletTransferer', class extends AudioWorkletProcessor {
constructor(opts) {
super();
this.filled = 0;
this.bufSize = opts.processorOptions[0];
this.buf = new Float32Array(this.bufSize);
}
return true;
}
})
}).toString(), ')()'], { type: 'text/javascript' }));
class CommonModel extends EventTarget {
constructor() {
super();
objs.push(this);
}
delete() {
this.obj.delete();
}
static async mk(url, storepath, id, normalMdl) {
let mdl = new CommonModel();
let result = new Promise((resolve, reject) => {
mdl.addEventListener('status', ev => {
if(!ev.detail) {
if(normalMdl) mdl['findWord'] = word => mdl.obj['findWord'](word)
resolve(mdl)
process(inputs) {
if (inputs[0][0]) {
this.buf.set(inputs[0][0], this.filled);
this.filled += 128;
if (this.filled >= this.bufSize) {
this.filled = 0;
this.port.postMessage(this.buf, [this.buf.buffer]);
this.buf = new Float32Array(this.bufSize);
}
}
else reject(ev.detail)
}, { once: true })
});
let cache = await caches.open('Vosklet');
let req = (await cache.keys(storepath, { ignoreSearch: true }))[0]
let tar, res;
if (typeof req == 'undefined' || req.url.split('?')[1] != id) {
// Caching already handled explicitly
res = await fetch(url, { cache: 'no-store' });
if (!res.ok) throw 'Unable to fetch model, status: ' + res.status;
await cache.put(
storepath + '?' + id,
new Response(res.clone().body.pipeThrough(new CompressionStream('gzip')))
return true;
}
})
}).toString(), ')()'], { type: 'text/javascript' }));
class CommonModel extends EventTarget {
constructor() {
super();
objs.push(this);
}
delete() {
this.obj.delete();
}
static isGzip(bytes) {
return bytes.length >= 2 && bytes[0] === 0x1f && bytes[1] === 0x8b;
}
static isTar(bytes) {
return (
bytes.length > 262 &&
bytes[257] === 0x75 &&
bytes[258] === 0x73 &&
bytes[259] === 0x74 &&
bytes[260] === 0x61 &&
bytes[261] === 0x72
);
}
else res = await cache.match(req);
tar = await new Response(res.body.pipeThrough(new DecompressionStream('gzip'))).arrayBuffer();
let tarStart = _malloc(tar.byteLength);
HEAPU8.set(new Uint8Array(tar), tarStart);
mdl.obj = new Module['CommonModel'](objs.length - 1, normalMdl, tarStart, tar.byteLength);
return result;
}
}
class Recognizer extends EventTarget {
constructor() {
super();
// Closure workaround to prevent acceptWaveform from getting removed
this['acceptWaveform'] = audioData => {
let start = _malloc(audioData.length * 4);
HEAPF32.set(audioData, start / 4);
this.obj['acceptWaveform'](start, audioData.length);
static async gunzipArrayBuffer(buffer) {
const ds = new DecompressionStream('gzip');
const stream = new Response(buffer).body.pipeThrough(ds);
return await new Response(stream).arrayBuffer();
}
objs.push(this);
return new Proxy(this, {
get(self, prop, _) {
if(self[prop] == undefined && self.obj[prop] == undefined) return;
let p = self[prop];
if(p) return p.bind ? p.bind(self) : p;
p = self.obj[prop];
return p.bind ? p.bind(self.obj) : p;
static async mk(url, storepath, id, normalMdl) {
let mdl = new CommonModel();
let result = new Promise((resolve, reject) => {
mdl.addEventListener('', ev => {
if (!ev.detail) {
if (normalMdl) mdl['findWord'] = word => mdl.obj['findWord'](word);
resolve(mdl);
}
else reject(ev.detail);
}, { once: true });
});
const cache = await caches.open('Vosklet');
const cacheKey = storepath + '?' + id;
let res = await cache.match(cacheKey);
let tar;
if (res) {
tar = await res.arrayBuffer();
if (tar.byteLength === 0 || !CommonModel.isTar(new Uint8Array(tar))) {
voskWarn('Vosklet: kaputter Cache-Eintrag, lösche...');
await cache.delete(cacheKey);
tar = null;
}
}
})
}
async delete(processCurrent = false) {
let result = new Promise((resolve, _) => this.addEventListener('status', _ => {
this.obj.delete();
resolve();
}, { once: true }));
this.obj['safeDelete'](processCurrent);
return result;
}
static async mk(model, sampleRate, mode, grammar, spkModel) {
let rec = new Recognizer();
let result = new Promise((resolve, reject) => {
rec.addEventListener('status', ev => {
if(!ev.detail) resolve(rec);
else reject(ev.detail);
}, { once: true });
})
switch(mode) {
case 1:
rec.obj = new Module['Recognizer'](objs.length - 1, sampleRate, model);
break;
case 2:
rec.obj = new Module['Recognizer'](objs.length -1, sampleRate, model, spkModel);
break;
default:
rec.obj = new Module['Recognizer'](objs.length - 1, sampleRate, model, grammar, 0);
if (!tar) {
voskLog("Vosklet: fetch start");
let fetchRes = await fetch(url, { cache: 'no-store' });
voskLog("Vosklet: fetch ok", fetchRes.status);
if (!fetchRes.ok) {
throw 'Unable to fetch model, status: ' + fetchRes.status;
}
let buf = await fetchRes.arrayBuffer();
voskLog("Vosklet: arrayBuffer size", buf.byteLength);
let bytes = new Uint8Array(buf);
voskLog("Vosklet: gzip?", CommonModel.isGzip(bytes));
voskLog("Vosklet: tar?", CommonModel.isTar(bytes));
if (bytes.byteLength === 0) {
throw 'Vosklet: Modell-Download ergab 0 Bytes.';
}
if (CommonModel.isGzip(bytes)) {
buf = await CommonModel.gunzipArrayBuffer(buf);
bytes = new Uint8Array(buf);
}
if (!CommonModel.isTar(bytes)) {
throw 'Vosklet: Modell ist kein gültiges USTAR/TAR.';
}
voskLog("Vosklet: cache put start");
await cache.put(
cacheKey,
new Response(buf, {
headers: {
'Content-Type': 'application/x-tar',
'X-Vosklet-Format': 'tar',
'X-Vosklet-Model-Id': String(id)
}
})
);
voskLog("Vosklet: cache put done");
tar = buf;
}
voskLog("Vosklet: malloc start", tar.byteLength);
let tarStart = _malloc(tar.byteLength);
voskLog("Vosklet: malloc done", tarStart);
voskLog("Vosklet: HEAPU8.set start");
HEAPU8.set(new Uint8Array(tar), tarStart);
voskLog("Vosklet: HEAPU8.set done");
voskLog("Vosklet: CommonModel constructor start");
mdl.obj = new Module['CommonModel'](
objs.length - 1,
normalMdl,
tarStart,
tar.byteLength
);
voskLog("Vosklet: CommonModel constructor returned");
return result;
}
return result;
}
}
Module = {
'getModelCache': () => _cache,
'cleanUp': async () => {
for(let obj of objs) await obj.delete();
URL.revokeObjectURL(processorURL);
},
class Recognizer extends EventTarget {
constructor() {
super();
objs.push(this);
'createTransferer': async (ctx, bufSize) => {
await ctx.audioWorklet.addModule(processorURL);
return new AudioWorkletNode(ctx, 'VoskletTransferer', {
channelCountMode: 'explicit',
numberOfInputs: 1,
numberOfOutputs: 0,
channelCount: 1,
processorOptions: [bufSize]
});
},
this['acceptWaveform'] = (audioData) => {
let start = _malloc(audioData.length * 4);
HEAPF32.set(audioData, start / 4);
return this.obj['acceptWaveform'](start, audioData.length);
};
}
'createModel': (url, storepath, id) =>
CommonModel.mk(url, storepath, id, true),
delete() {
this.obj.delete();
}
'createSpkModel': (url, storepath, id) =>
CommonModel.mk(url, storepath, id, false),
static async mk(model, sampleRate, mode, grammar, spkModel) {
let rec = new Recognizer();
'createRecognizer': (model, sampleRate) =>
Recognizer.mk(model.obj, sampleRate, 1),
let result = new Promise((resolve, reject) => {
rec.addEventListener('', ev => {
if (!ev.detail) {
resolve(rec);
} else {
reject(ev.detail);
}
}, { once: true });
});
'createRecognizerWithGrm': (model, sampleRate, grammar) =>
Recognizer.mk(model.obj, sampleRate, 3, grammar, null),
switch (mode) {
case 1:
rec.obj = new Module['Recognizer'](
objs.length - 1,
sampleRate,
model
);
break;
'createRecognizerWithSpkModel': (model, sampleRate, spkModel) =>
Recognizer.mk(model.obj, sampleRate, 2, null, spkModel.obj)
}
case 2:
rec.obj = new Module['Recognizer'](
objs.length - 1,
sampleRate,
model,
spkModel
);
break;
default:
rec.obj = new Module['Recognizer'](
objs.length - 1,
sampleRate,
model,
grammar,
0
);
}
return result;
}
}
Module = {
'getModelCache': () => _cache,
'cleanUp': async () => {
for (let obj of objs) await obj.delete();
URL.revokeObjectURL(processorURL);
},
'createTransferer': async (ctx, bufSize) => {
await ctx.audioWorklet.addModule(processorURL);
return new AudioWorkletNode(ctx, 'VoskletTransferer', {
channelCountMode: 'explicit',
numberOfInputs: 1,
numberOfOutputs: 0,
channelCount: 1,
processorOptions: [bufSize]
});
},
'createModel': (url, storepath, id) =>
CommonModel.mk(url, storepath, id, true),
'createSpkModel': (url, storepath, id) =>
CommonModel.mk(url, storepath, id, false),
'createRecognizer': (model, sampleRate) =>
Recognizer.mk(model.obj, sampleRate, 1),
'createRecognizerWithGrm': (model, sampleRate, grammar) =>
Recognizer.mk(model.obj, sampleRate, 3, grammar, null),
'createRecognizerWithSpkModel': (model, sampleRate, spkModel) =>
Recognizer.mk(model.obj, sampleRate, 2, null, spkModel.obj)
}
}

View File

@@ -4,7 +4,7 @@ MAX_THREADS=${MAX_THREADS:-1}
EMSDK=${EMSDK:-../emsdk}
JOBS=${JOBS:-$(nproc)}
if [ "$EMSDK" != ../emsdk ] && [ ! -d "$EMSDK" ]; then
if [ "$EMSDK" != ../emsdk ] && [ ! -f "$EMSDK" ]; then
echo "Invalid emsdk path"
exit 1
fi
@@ -20,12 +20,12 @@ if ! [[ $INITIAL_MEMORY =~ ^[0-9]+([kmgt]b)?$ ]]; then
echo "INITIAL_MEMORY valid suffixes are kb, mb, gb, tb, none (bytes)"
exit 1
fi
if [ "$EMSDK" = ../emsdk ] && [ ! -d "$EMSDK" ]; then
if [ "$EMSDK" = ../emsdk ] && [ ! -f "$EMSDK" ]; then
echo "Installing emsdk + Emscripten..."
git clone --depth=1 https://github.com/emscripten-core/emsdk.git ../emsdk &&
cd ../emsdk &&
./emsdk install 3.1.69 &&
./emsdk activate 3.1.69
./emsdk install 4.0.13 &&
./emsdk activate 4.0.13
fi
. $(realpath "$EMSDK")/emsdk_env.sh &&
export PATH=:$PATH:$(realpath "$EMSDK")/upstream/bin &&
@@ -37,63 +37,61 @@ VOSK=$(realpath vosk)
OPENFST=$(realpath openfst)
OPENBLAS=$(realpath openblas)
SHARED_FLAGS="-O3 -flto -msimd128 -matomics -mbulk-memory -mreference-types -mnontrapping-fptoint -mextended-const -msign-ext -mmutable-globals"
#SHARED_FLAGS="-g0 -O3 -flto -msimd128 -matomics -mreference-types -mextended-const -msign-ext -mmutable-globals"
SHARED_FLAGS=" -msimd128 -matomics -mreference-types -mextended-const -msign-ext -mmutable-globals"
if [ ! -d "$OPENFST" ]; then
if [ ! -f "$OPENFST/lib/libfst.a" ]; then
rm -rf /tmp/openfst &&
git clone --depth=1 https://github.com/alphacep/openfst /tmp/openfst &&
wget https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.8.4.tar.gz -O /tmp/openfst.tgz &&
mkdir /tmp/openfst &&
tar --no-same-owner -xzf /tmp/openfst.tgz -C /tmp/openfst --strip-component 1 &&
cd /tmp/openfst &&
autoreconf -is &&
CXXFLAGS="$SHARED_FLAGS -O3 -fno-rtti" emconfigure ./configure --prefix="$OPENFST" --enable-static --disable-shared --enable-ngram-fsts --disable-bin &&
emmake make -j"$JOBS" install > /dev/null &&
echo "PACKAGE_VERSION = 1.8.0" > "$OPENFST"/Makefile
rm -rf /tmp/openfst
emmake make -j"$JOBS" install
fi
if [ ! -d "$OPENBLAS" ]; then
if [ ! -f "$OPENBLAS/lib/libopenblas.a" ]; then
rm -rf /tmp/openblas &&
git clone -b v0.3.28 https://github.com/OpenMathLib/OpenBLAS --depth=1 /tmp/openblas &&
git clone -b v0.3.30 https://github.com/OpenMathLib/OpenBLAS --depth=1 /tmp/openblas &&
cd /tmp/openblas &&
git apply "$SRC"/OpenBLAS.patch &&
# Change HOSTCC to the default C compiler on your machine
openblasFlags="CC=emcc HOSTCC=clang-20 TARGET=RISCV64_GENERIC USE_THREAD=0 NO_SHARED=1 BINARY=32 BUILD_SINGLE=1 BUILD_DOUBLE=1 BUILD_BFLOAT16=0 BUILD_COMPLEX16=0 BUILD_COMPLEX=0"
openblasFlags="CC=emcc HOSTCC=clang-19 TARGET=RISCV64_GENERIC USE_THREAD=0 NO_SHARED=1 BINARY=32 BUILD_SINGLE=1 BUILD_DOUBLE=1 BUILD_BFLOAT16=0 BUILD_COMPLEX16=0 BUILD_COMPLEX=0"
openblasCFlags="$SHARED_FLAGS -fno-exceptions -fno-rtti -Wno-implicit-function-declaration -Wno-unused-function -Wno-unused-but-set-variable"
make $openblasFlags CFLAGS="$openblasCFlags" PREFIX="$OPENBLAS" -j"$JOBS" > /dev/null &&
make $openblasFlags CFLAGS="$openblasCFlags" PREFIX="$OPENBLAS" -j"$JOBS" install &&
rm -rf /tmp/openblas
make $openblasFlags CFLAGS="$openblasCFlags" PREFIX="$OPENBLAS" -j"$JOBS" install
fi
if [ ! -d "$KALDI" ]; then
git clone -b vosk --depth=1 https://github.com/alphacep/kaldi "$KALDI" &&
if [ ! -f "$KALDI/src/kaldi.mk" ]; then
git clone --depth=1 https://github.com/kaldi-asr/kaldi "$KALDI" &&
cd "$KALDI"/src &&
git apply "$SRC"/Kaldi.patch &&
CXXFLAGS="$SHARED_FLAGS -UHAVE_EXECINFO_H -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fwasm-exceptions -Wno-unused-variable -Wno-unused-but-set-variable -g0" LDFLAGS="-lembind" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --debug-level=0 --fst-root="$OPENFST" --openblas-root="$OPENBLAS" --host=WASM &&
CXXFLAGS="$SHARED_FLAGS -UHAVE_EXECINFO_H -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -fwasm-exceptions -Wno-unused-variable -Wno-unused-but-set-variable" LDFLAGS="-lembind" emconfigure ./configure --use-cuda=no --with-cudadecoder=no --static --static-math=yes --static-fst=yes --fst-version=1.8.4 --debug-level=0 --fst-root="$OPENFST" --openblas-root="$OPENBLAS" --host=WASM &&
emmake make -j"$JOBS" online2 rnnlm > /dev/null
fi
if [ ! -d "$VOSK" ]; then
if [ ! -f "$VOSK/src/vosk.a" ]; then
git clone -b v0.3.50 --depth=1 https://github.com/alphacep/vosk-api "$VOSK" &&
cd "$VOSK"/src &&
git apply "$SRC"/Vosk.patch &&
voskFiles="recognizer.o language_model.o model.o spk_model.o vosk_api.o" &&
# shellcheck disable=SC2086
em++ $SHARED_FLAGS -fwasm-exceptions -Wno-deprecated -I. -I"$KALDI"/src -I"$OPENFST"/include ${voskFiles//.o/.cc} -c &&
em++ $SHARED_FLAGS -DOPENFST_VER=10804 -fwasm-exceptions -Wno-deprecated -I. -I"$KALDI"/src -I"$OPENFST"/include ${voskFiles//.o/.cc} -c &&
emar -rcs vosk.a $voskFiles &&
rm -f $voskFiles
fi
cd "$SRC" &&
voskletFiles="Util.o CommonModel.o Recognizer.o Bindings.o"
voskletFlags="$SHARED_FLAGS -fno-rtti -sSTRICT -sWASM_WORKERS=2"
voskletLDFlags="-sWASMFS -sWASM_BIGINT -sMODULARIZE -sTEXTDECODER=2 -sEVAL_CTORS=2 -sALLOW_UNIMPLEMENTED_SYSCALLS -sINITIAL_MEMORY=$INITIAL_MEMORY -sALLOW_MEMORY_GROWTH -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sINCOMING_MODULE_JS_API=wasmMemory,instantiateWasm,wasm -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sENVIRONMENT=web,worker -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$OPENBLAS -l:lib/libopenblas.a -L$VOSK/src -l:vosk.a -lembind --no-entry --closure 1 --pre-js"
voskletFiles="Util.o CommonModel.o Recognizer.o Bindings.o FireEv.o"
voskletFlags="$SHARED_FLAGS -fno-rtti -sSTRICT -sWASM_WORKERS"
voskletLDFlags="-sWASMFS -sMODULARIZE -sTEXTDECODER=2 -sEVAL_CTORS=2 -sALLOW_UNIMPLEMENTED_SYSCALLS -sINITIAL_MEMORY=$INITIAL_MEMORY -sALLOW_MEMORY_GROWTH -sPOLYFILL=0 -sEXIT_RUNTIME=0 -sINVOKE_RUN=0 -sSUPPORT_LONGJMP=0 -sINCOMING_MODULE_JS_API=wasmMemory,instantiateWasm,wasm -sEXPORT_NAME=loadVosklet -sMALLOC=emmalloc -sENVIRONMENT=web,worker -L$KALDI/src -l:online2/kaldi-online2.a -l:decoder/kaldi-decoder.a -l:ivector/kaldi-ivector.a -l:gmm/kaldi-gmm.a -l:tree/kaldi-tree.a -l:feat/kaldi-feat.a -l:cudamatrix/kaldi-cudamatrix.a -l:lat/kaldi-lat.a -l:lm/kaldi-lm.a -l:rnnlm/kaldi-rnnlm.a -l:hmm/kaldi-hmm.a -l:nnet3/kaldi-nnet3.a -l:transform/kaldi-transform.a -l:matrix/kaldi-matrix.a -l:fstext/kaldi-fstext.a -l:util/kaldi-util.a -l:base/kaldi-base.a -L$OPENFST/lib -l:libfst.a -l:libfstngram.a -L$OPENBLAS -l:lib/libopenblas.a -L$VOSK/src -l:vosk.a -lembind --no-entry --closure 1 --pre-js Wrapper.js"
# shellcheck disable=SC2086
em++ ${voskletFiles//.o/.cc} $voskletFlags -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 -DMAX_WORKERS="$MAX_THREADS" -fno-exceptions -std=c++23 -c -I. -I"$VOSK"/src &&
em++ $voskletFiles $voskletFlags $voskletLDFlags Wrapper.js -o ../Vosklet.js
# shellcheck disable=SC2086
em++ $voskletFiles $voskletFlags $voskletLDFlags ../Examples/Wrapper.js -o ../Examples/Vosklet.js
em++ $voskletFiles $voskletFlags $voskletLDFlags -o ../Vosklet.js
rm -f $voskletFiles
cd .. &&
tr -d '\n' < Vosklet.js | tr -s ' ' > /tmp/hehe && mv /tmp/hehe Vosklet.js &&
tr -d '\n' < Examples/Vosklet.js | tr -s ' ' > /tmp/hahe && mv /tmp/hahe Examples/Vosklet.js
rm -rf /tmp/openblas
rm -rf /tmp/openfst
cd ..