Version 1.0.0

This commit is contained in:
msqr1
2024-03-27 16:55:34 -07:00
parent b2e70b24f2
commit 0a7a974dee
14 changed files with 72 additions and 38 deletions

6
API.md
View File

@@ -15,7 +15,7 @@
| ```Promise<Model> createModel(path: string, url: string, id: string)```<br><br>```Promise<SpkModel> createSpkModel(path: string, url: string, id: string)``` | Create a ```Model``` or ```SpkModel```, model files must be directly under the model root, and compressed model must be in .tgz format. If:<br>- **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. Models are thread-safe, reuse them as much as possible! |
| ```Promise<Recognizer> createRecognizer(model: Model, sampleRate: float)```<br><br>```Promise<Recognizer> createRecognizerWithSpkModel(model: Model, spkModel: spkModel, sampleRate: float)```<br><br>```Promise<Recognizer> createRecognizerWithGrm(model: Model, grammar: string, sampleRate: float)``` | Create a ```Recognizer```, it will use **model**'s thread if it's the first user of **model**, else it will use a new thread. |
| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: ```0```: Info) <br>```-2```: Error<br>```-1```: Warning<br>```1```: Verbose<br>```2```: More verbose<br>```3```: Debug |
| ```Promise<AudioWorkletNode> createTransferer(ctx: AudioContext)``` | Create a node that transfer its inputs back to the main thread. Its port's ```onmessage``` handler can be set to get audio data. Has 1 input with 1 channel and 0 output.
| ```Promise<AudioWorkletNode> createTransferer(ctx: AudioContext, bufferSize: int)``` | Create a node that transfer its inputs back to the main thread with custom buffer size (must be multiple of 128). Its port's ```onmessage``` handler can be set to get audio data. Has 1 input with 1 channel and 0 output. The the higher the size, the lesser the audio breaks up, but the higher the latency. Recomended value is around ```128 * 150```. |
| ```cleanUp()``` | A convenience function that call ```delete()``` on all objects and revoke all URLs. **Put this at the end of your code!** |
| ```epMode``` | Endpointer modes (enum) | See Vosk's description |
@@ -54,7 +54,7 @@ cd Vosklet/src &&
```
| Option | Description | Default value |
|---|---|---|
| MAX_MEMORY | Set max memory, valid suffixes: kb, mb, gb, tb or none (bytes) | ```300mb```, as [recommended](https://alphacephei.com/vosk/models) |
| MAX_THREADS | Set the max number of thread, this should be equal to the number of model or speaker model that is used in the program (>1) | ```1``` |
| MAX_MEMORY | Set max memory, valid suffixes: kb, mb, gb, tb or none (bytes) | ```375mb```, 300mb as [recommended](https://alphacephei.com/vosk/models) plus 75mb typical uncompressed model size (loaded to memory) |
| MAX_THREADS | Set the max number of threads (>=1), this should be equal to the number of model or speaker model that is used in the program | ```1``` (1 recognizer, 1 model, 0 speaker model) |
| COMPILE_JOBS | Set the number of jobs (threads) when building | ```$(nproc)``` |
| EMSDK | Set EMSDK's path (will install EMSDK in root folder if unset) | ```../emsdk``` |

View File

@@ -13,7 +13,7 @@
- Shorter from-scratch build time
- Faster loading and processing time
# Basic usage
# Basic usage (microphone recognition)
```
<!DOCTYPE html>
<html>
@@ -40,7 +40,7 @@
recognizer.addEventListener("partialResult", ev => {
console.log("Partial result: ", ev.detail)
})
let transferer = await module.createTransferer(ctx)
let transferer = await module.createTransferer(ctx, 128 * 150)
transferer.port.onmessage = ev => {
recognizer.acceptWaveform(ev.data)
}

18
Vosklet.js Normal file

File diff suppressed because one or more lines are too long

5
devel/Vosklet.d.ts vendored
View File

@@ -23,12 +23,15 @@ export interface genericModel {
}
export interface recognizer {
reset(): void;
setEndpointerMode(_0: epMode): void;
setSpkModel(_0: genericModel): void;
acceptWaveForm(): void;
setWords(_0: boolean): void;
setPartialWords(_0: boolean): void;
setNLSML(_0: boolean): void;
pushData(_0: number, _1: number): void;
setMaxAlternatives(_0: number): void;
setEndpointerDelays(_0: number, _1: number, _2: number): void;
setGrm(_0: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string): void;
delete(): void;
}

File diff suppressed because one or more lines are too long

View File

@@ -1,7 +1,7 @@
<!DOCTYPE html>
<html>
<head>
<script src="Vosklet.js"></script>
<script src="Vosklet.js" async defer></script>
<script>
async function start() {
let ctx = new AudioContext({sampleRate : 16000})
@@ -15,7 +15,7 @@
},
}))
let module = await loadVosklet()
let model = await module.createModel("../usage/en-model.tgz","model","ID")
let model = await module.createModel("en-model.tgz","model","ID")
let recognizer = await module.createRecognizer(model, 16000)
recognizer.addEventListener("result", ev => {
console.log("Result: ", ev.detail)
@@ -23,7 +23,7 @@
recognizer.addEventListener("partialResult", ev => {
console.log("Partial result: ", ev.detail)
})
let transferer = await module.createTransferer(ctx)
let transferer = await module.createTransferer(ctx, 25600)
transferer.port.onmessage = ev => {
recognizer.acceptWaveform(ev.data)
}

View File

@@ -8,7 +8,7 @@ OPENFST=$(realpath openfst) &&
LIBARCHIVE=$(realpath libarchive) &&
CLAPACK_WASM=$(realpath clapack-wasm) &&
MAX_MEMORY=${MAX_MEMORY:-500mb} &&
MAX_MEMORY=${MAX_MEMORY:-375mb} &&
MAX_THREADS=${MAX_THREADS:-1} &&
EMSDK=${EMSDK:-$(realpath emsdk)} &&

View File

@@ -15,7 +15,7 @@
},
}))
let module = await loadVosklet()
let model = await module.createModel("../usage/en-model.tgz","model","ID")
let model = await module.createModel("en-model.tgz","model","ID")
let recognizer = await module.createRecognizer(model, 16000)
recognizer.addEventListener("result", ev => {
console.log("Result: ", ev.detail)
@@ -23,7 +23,7 @@
recognizer.addEventListener("partialResult", ev => {
console.log("Partial result: ", ev.detail)
})
let transferer = await module.createTransferer(ctx)
let transferer = await module.createTransferer(ctx, 128 * 150)
transferer.port.onmessage = ev => {
recognizer.acceptWaveform(ev.data)
}

View File

@@ -1,9 +1,9 @@
<!DOCTYPE html>
<html>
<head>
<script src="../Vosklet.js" async defer></script>
<script src="Vosklet.js" async defer></script>
<script>
window.onload = async () => {
async function start() {
let ctx = new AudioContext({sampleRate : 16000})
let module = await loadVosklet()
let model = await module.createModel("en-model.tgz","model","ID")
@@ -20,4 +20,5 @@
}
</script>
</head>
<button onclick="start()">Start</button>
</html>

View File

@@ -1,8 +1,6 @@
#include "genericModel.h"
genericModel::genericModel(int index, bool normalMdl, std::string storepath, std::string id) : index{index}, normalMdl{normalMdl}, storepath{std::move(storepath)}, id{std::move(id)}, entry{archive_entry_new()} {
blocker.acquire();
}
genericModel::genericModel(int index, bool normalMdl, std::string storepath, std::string id) : index{index}, normalMdl{normalMdl}, storepath{std::move(storepath)}, id{std::move(id)}, entry{archive_entry_new()} {}
void genericModel::extractAndLoad(int tarStart, int tarSize) {
static fs::path path{};
static int fd{};
@@ -50,8 +48,7 @@ void genericModel::extractAndLoad(int tarStart, int tarSize) {
};
std::thread t{[this](){
func();
blocker.acquire();
blocker.release();
blocker.wait(false, std::memory_order_relaxed);
func();
}};
t.detach();

View File

@@ -1,27 +1,24 @@
#pragma once
#include "link.h"
#include <string>
#include <filesystem>
#include <variant>
#include <thread>
#include <semaphore>
#include <fcntl.h>
#include <emscripten/console.h>
#include <vosk_api.h>
#include <archive.h>
#include <archive_entry.h>
namespace fs = std::filesystem;
extern void free(void*);
struct genericModel {
bool normalMdl;
bool resourceUsed{};
std::atomic_bool blocker{};
int index;
std::string storepath;
std::string id;
std::variant<VoskModel*, VoskSpkModel*> mdl;
std::binary_semaphore blocker{1};
std::function<void()> func;
archive_entry* entry;
void extractAndLoad(int tarStart, int tarSize);

View File

@@ -1,8 +1,20 @@
let objs = []
let processorURL = URL.createObjectURL(new Blob(['(', (() => {
registerProcessor("VoskletTransferer", class extends AudioWorkletProcessor {
constructor(opts) {
super()
this.count = 0
this.maxCount = opts.processorOptions.maxCount
this.buffer = new Float32Array(this.maxCount * 128)
}
process(inputs) {
this.port.postMessage(inputs[0][0].buffer, [inputs[0][0].buffer])
this.buffer.set(inputs[0][0], this.count * 128)
this.count++
if(this.count >= this.maxCount) {
this.count = 0
this.port.postMessage(this.buffer, [this.buffer.buffer])
this.buffer = new Float32Array(this.maxCount * 128)
}
return true
}
})
@@ -176,13 +188,14 @@ Module.cleanUp = () => {
URL.revokeObjectURL(pthreadURL)
URL.revokeObjectURL(processorURL)
}
Module.createTransferer = async (ctx) => {
Module.createTransferer = async (ctx, bufferSize) => {
await ctx.audioWorklet.addModule(processorURL)
return new AudioWorkletNode(ctx, "VoskletTransferer", {
channelCountMode : "explicit",
numberOfInputs : 1,
numberOfOutputs : 0,
channelCount : 1
numberOfOutputs : 1,
channelCount : 1,
processorOptions : { maxCount: bufferSize / 128 }
})
}
Module.locateFile = (path, scriptDir) => {

View File

@@ -21,8 +21,8 @@ void recognizer::finishConstruction(genericModel* model, genericModel* spkModel)
auto main {[this](){
fireEv(index, "0");
while(!done) {
blocker.acquire();
blocker.release();
blocker.wait(done, std::memory_order_relaxed);
blocker = false;
while(!dataQ.empty()) {
switch(vosk_recognizer_accept_waveform_f(rec, dataQ.front().data, dataQ.front().len)) {
case 0:
@@ -39,13 +39,15 @@ void recognizer::finishConstruction(genericModel* model, genericModel* spkModel)
if(!model->resourceUsed) {
model->resourceUsed = true;
model->func = main;
model->blocker.release();
model->blocker = true;
model->blocker.notify_one();
return;
}
if(spkModel != nullptr && !spkModel->resourceUsed) {
spkModel->resourceUsed = true;
spkModel->func = main;
spkModel->blocker.release();
spkModel->blocker = true;
model->blocker.notify_one();
return;
}
std::thread t{main};
@@ -53,8 +55,8 @@ void recognizer::finishConstruction(genericModel* model, genericModel* spkModel)
}
void recognizer::pushData(int start, int len) {
dataQ.emplace(start, len);
blocker.release();
blocker.acquire();
blocker = true;
blocker.notify_one();
}
void recognizer::reset() {
vosk_recognizer_reset(rec);

View File

@@ -1,18 +1,21 @@
#pragma once
#include "genericModel.h"
#include <queue>
#include <emscripten/console.h>
struct audioData {
float* data;
int len;
audioData(int start, int len) : data{reinterpret_cast<float*>(start)}, len{len} {}
};
struct recognizer {
std::atomic_bool done;
std::atomic_bool done{};
std::atomic_bool blocker{};
int index;
std::binary_semaphore blocker{1};
std::queue<audioData> dataQ{};
VoskRecognizer* rec;
std::queue<audioData> dataQ{};
recognizer(int index, float sampleRate, genericModel* model);
recognizer(int index, float sampleRate, genericModel* model, genericModel* spkModel);
recognizer(int index, float sampleRate, genericModel* model, const std::string& grm, int dummy);