Version 1.0.0
This commit is contained in:
6
API.md
6
API.md
@@ -15,7 +15,7 @@
|
||||
| ```Promise<Model> createModel(path: string, url: string, id: string)```<br><br>```Promise<SpkModel> createSpkModel(path: string, url: string, id: string)``` | Create a ```Model``` or ```SpkModel```, model files must be directly under the model root, and compressed model must be in .tgz format. If:<br>- **path** contains valid model files and **id** is the same, there will not be a fetch from **url**.<br>- **path** doesn't contain valid model files, or if it contains valid model files but **id** is different, there will be a fetch from **url**, and the model is stored with **id**. Models are thread-safe, reuse them as much as possible! |
|
||||
| ```Promise<Recognizer> createRecognizer(model: Model, sampleRate: float)```<br><br>```Promise<Recognizer> createRecognizerWithSpkModel(model: Model, spkModel: spkModel, sampleRate: float)```<br><br>```Promise<Recognizer> createRecognizerWithGrm(model: Model, grammar: string, sampleRate: float)``` | Create a ```Recognizer```, it will use **model**'s thread if it's the first user of **model**, else it will use a new thread. |
|
||||
| ```setLogLevel(lvl: int)``` | Set Vosk's log level (default: ```0```: Info) <br>```-2```: Error<br>```-1```: Warning<br>```1```: Verbose<br>```2```: More verbose<br>```3```: Debug |
|
||||
| ```Promise<AudioWorkletNode> createTransferer(ctx: AudioContext)``` | Create a node that transfer its inputs back to the main thread. Its port's ```onmessage``` handler can be set to get audio data. Has 1 input with 1 channel and 0 output.
|
||||
| ```Promise<AudioWorkletNode> createTransferer(ctx: AudioContext, bufferSize: int)``` | Create a node that transfer its inputs back to the main thread with custom buffer size (must be multiple of 128). Its port's ```onmessage``` handler can be set to get audio data. Has 1 input with 1 channel and 0 output. The the higher the size, the lesser the audio breaks up, but the higher the latency. Recomended value is around ```128 * 150```. |
|
||||
| ```cleanUp()``` | A convenience function that call ```delete()``` on all objects and revoke all URLs. **Put this at the end of your code!** |
|
||||
| ```epMode``` | Endpointer modes (enum) | See Vosk's description |
|
||||
|
||||
@@ -54,7 +54,7 @@ cd Vosklet/src &&
|
||||
```
|
||||
| Option | Description | Default value |
|
||||
|---|---|---|
|
||||
| MAX_MEMORY | Set max memory, valid suffixes: kb, mb, gb, tb or none (bytes) | ```300mb```, as [recommended](https://alphacephei.com/vosk/models) |
|
||||
| MAX_THREADS | Set the max number of thread, this should be equal to the number of model or speaker model that is used in the program (>1) | ```1``` |
|
||||
| MAX_MEMORY | Set max memory, valid suffixes: kb, mb, gb, tb or none (bytes) | ```375mb```, 300mb as [recommended](https://alphacephei.com/vosk/models) plus 75mb typical uncompressed model size (loaded to memory) |
|
||||
| MAX_THREADS | Set the max number of threads (>=1), this should be equal to the number of model or speaker model that is used in the program | ```1``` (1 recognizer, 1 model, 0 speaker model) |
|
||||
| COMPILE_JOBS | Set the number of jobs (threads) when building | ```$(nproc)``` |
|
||||
| EMSDK | Set EMSDK's path (will install EMSDK in root folder if unset) | ```../emsdk``` |
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
- Shorter from-scratch build time
|
||||
- Faster loading and processing time
|
||||
|
||||
# Basic usage
|
||||
# Basic usage (microphone recognition)
|
||||
```
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
@@ -40,7 +40,7 @@
|
||||
recognizer.addEventListener("partialResult", ev => {
|
||||
console.log("Partial result: ", ev.detail)
|
||||
})
|
||||
let transferer = await module.createTransferer(ctx)
|
||||
let transferer = await module.createTransferer(ctx, 128 * 150)
|
||||
transferer.port.onmessage = ev => {
|
||||
recognizer.acceptWaveform(ev.data)
|
||||
}
|
||||
|
||||
18
Vosklet.js
Normal file
18
Vosklet.js
Normal file
File diff suppressed because one or more lines are too long
5
devel/Vosklet.d.ts
vendored
5
devel/Vosklet.d.ts
vendored
@@ -23,12 +23,15 @@ export interface genericModel {
|
||||
}
|
||||
|
||||
export interface recognizer {
|
||||
reset(): void;
|
||||
setEndpointerMode(_0: epMode): void;
|
||||
setSpkModel(_0: genericModel): void;
|
||||
acceptWaveForm(): void;
|
||||
setWords(_0: boolean): void;
|
||||
setPartialWords(_0: boolean): void;
|
||||
setNLSML(_0: boolean): void;
|
||||
pushData(_0: number, _1: number): void;
|
||||
setMaxAlternatives(_0: number): void;
|
||||
setEndpointerDelays(_0: number, _1: number, _2: number): void;
|
||||
setGrm(_0: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string): void;
|
||||
delete(): void;
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,7 +1,7 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<script src="Vosklet.js"></script>
|
||||
<script src="Vosklet.js" async defer></script>
|
||||
<script>
|
||||
async function start() {
|
||||
let ctx = new AudioContext({sampleRate : 16000})
|
||||
@@ -15,7 +15,7 @@
|
||||
},
|
||||
}))
|
||||
let module = await loadVosklet()
|
||||
let model = await module.createModel("../usage/en-model.tgz","model","ID")
|
||||
let model = await module.createModel("en-model.tgz","model","ID")
|
||||
let recognizer = await module.createRecognizer(model, 16000)
|
||||
recognizer.addEventListener("result", ev => {
|
||||
console.log("Result: ", ev.detail)
|
||||
@@ -23,7 +23,7 @@
|
||||
recognizer.addEventListener("partialResult", ev => {
|
||||
console.log("Partial result: ", ev.detail)
|
||||
})
|
||||
let transferer = await module.createTransferer(ctx)
|
||||
let transferer = await module.createTransferer(ctx, 25600)
|
||||
transferer.port.onmessage = ev => {
|
||||
recognizer.acceptWaveform(ev.data)
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ OPENFST=$(realpath openfst) &&
|
||||
LIBARCHIVE=$(realpath libarchive) &&
|
||||
CLAPACK_WASM=$(realpath clapack-wasm) &&
|
||||
|
||||
MAX_MEMORY=${MAX_MEMORY:-500mb} &&
|
||||
MAX_MEMORY=${MAX_MEMORY:-375mb} &&
|
||||
MAX_THREADS=${MAX_THREADS:-1} &&
|
||||
EMSDK=${EMSDK:-$(realpath emsdk)} &&
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
},
|
||||
}))
|
||||
let module = await loadVosklet()
|
||||
let model = await module.createModel("../usage/en-model.tgz","model","ID")
|
||||
let model = await module.createModel("en-model.tgz","model","ID")
|
||||
let recognizer = await module.createRecognizer(model, 16000)
|
||||
recognizer.addEventListener("result", ev => {
|
||||
console.log("Result: ", ev.detail)
|
||||
@@ -23,7 +23,7 @@
|
||||
recognizer.addEventListener("partialResult", ev => {
|
||||
console.log("Partial result: ", ev.detail)
|
||||
})
|
||||
let transferer = await module.createTransferer(ctx)
|
||||
let transferer = await module.createTransferer(ctx, 128 * 150)
|
||||
transferer.port.onmessage = ev => {
|
||||
recognizer.acceptWaveform(ev.data)
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<script src="../Vosklet.js" async defer></script>
|
||||
<script src="Vosklet.js" async defer></script>
|
||||
<script>
|
||||
window.onload = async () => {
|
||||
async function start() {
|
||||
let ctx = new AudioContext({sampleRate : 16000})
|
||||
let module = await loadVosklet()
|
||||
let model = await module.createModel("en-model.tgz","model","ID")
|
||||
@@ -20,4 +20,5 @@
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<button onclick="start()">Start</button>
|
||||
</html>
|
||||
@@ -1,8 +1,6 @@
|
||||
#include "genericModel.h"
|
||||
|
||||
genericModel::genericModel(int index, bool normalMdl, std::string storepath, std::string id) : index{index}, normalMdl{normalMdl}, storepath{std::move(storepath)}, id{std::move(id)}, entry{archive_entry_new()} {
|
||||
blocker.acquire();
|
||||
}
|
||||
genericModel::genericModel(int index, bool normalMdl, std::string storepath, std::string id) : index{index}, normalMdl{normalMdl}, storepath{std::move(storepath)}, id{std::move(id)}, entry{archive_entry_new()} {}
|
||||
void genericModel::extractAndLoad(int tarStart, int tarSize) {
|
||||
static fs::path path{};
|
||||
static int fd{};
|
||||
@@ -50,8 +48,7 @@ void genericModel::extractAndLoad(int tarStart, int tarSize) {
|
||||
};
|
||||
std::thread t{[this](){
|
||||
func();
|
||||
blocker.acquire();
|
||||
blocker.release();
|
||||
blocker.wait(false, std::memory_order_relaxed);
|
||||
func();
|
||||
}};
|
||||
t.detach();
|
||||
|
||||
@@ -1,27 +1,24 @@
|
||||
#pragma once
|
||||
#include "link.h"
|
||||
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <variant>
|
||||
#include <thread>
|
||||
#include <semaphore>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <emscripten/console.h>
|
||||
#include <vosk_api.h>
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
extern void free(void*);
|
||||
struct genericModel {
|
||||
bool normalMdl;
|
||||
bool resourceUsed{};
|
||||
std::atomic_bool blocker{};
|
||||
int index;
|
||||
std::string storepath;
|
||||
std::string id;
|
||||
std::variant<VoskModel*, VoskSpkModel*> mdl;
|
||||
std::binary_semaphore blocker{1};
|
||||
std::function<void()> func;
|
||||
archive_entry* entry;
|
||||
void extractAndLoad(int tarStart, int tarSize);
|
||||
|
||||
21
src/pre.js
21
src/pre.js
@@ -1,8 +1,20 @@
|
||||
let objs = []
|
||||
let processorURL = URL.createObjectURL(new Blob(['(', (() => {
|
||||
registerProcessor("VoskletTransferer", class extends AudioWorkletProcessor {
|
||||
constructor(opts) {
|
||||
super()
|
||||
this.count = 0
|
||||
this.maxCount = opts.processorOptions.maxCount
|
||||
this.buffer = new Float32Array(this.maxCount * 128)
|
||||
}
|
||||
process(inputs) {
|
||||
this.port.postMessage(inputs[0][0].buffer, [inputs[0][0].buffer])
|
||||
this.buffer.set(inputs[0][0], this.count * 128)
|
||||
this.count++
|
||||
if(this.count >= this.maxCount) {
|
||||
this.count = 0
|
||||
this.port.postMessage(this.buffer, [this.buffer.buffer])
|
||||
this.buffer = new Float32Array(this.maxCount * 128)
|
||||
}
|
||||
return true
|
||||
}
|
||||
})
|
||||
@@ -176,13 +188,14 @@ Module.cleanUp = () => {
|
||||
URL.revokeObjectURL(pthreadURL)
|
||||
URL.revokeObjectURL(processorURL)
|
||||
}
|
||||
Module.createTransferer = async (ctx) => {
|
||||
Module.createTransferer = async (ctx, bufferSize) => {
|
||||
await ctx.audioWorklet.addModule(processorURL)
|
||||
return new AudioWorkletNode(ctx, "VoskletTransferer", {
|
||||
channelCountMode : "explicit",
|
||||
numberOfInputs : 1,
|
||||
numberOfOutputs : 0,
|
||||
channelCount : 1
|
||||
numberOfOutputs : 1,
|
||||
channelCount : 1,
|
||||
processorOptions : { maxCount: bufferSize / 128 }
|
||||
})
|
||||
}
|
||||
Module.locateFile = (path, scriptDir) => {
|
||||
|
||||
@@ -21,8 +21,8 @@ void recognizer::finishConstruction(genericModel* model, genericModel* spkModel)
|
||||
auto main {[this](){
|
||||
fireEv(index, "0");
|
||||
while(!done) {
|
||||
blocker.acquire();
|
||||
blocker.release();
|
||||
blocker.wait(done, std::memory_order_relaxed);
|
||||
blocker = false;
|
||||
while(!dataQ.empty()) {
|
||||
switch(vosk_recognizer_accept_waveform_f(rec, dataQ.front().data, dataQ.front().len)) {
|
||||
case 0:
|
||||
@@ -39,13 +39,15 @@ void recognizer::finishConstruction(genericModel* model, genericModel* spkModel)
|
||||
if(!model->resourceUsed) {
|
||||
model->resourceUsed = true;
|
||||
model->func = main;
|
||||
model->blocker.release();
|
||||
model->blocker = true;
|
||||
model->blocker.notify_one();
|
||||
return;
|
||||
}
|
||||
if(spkModel != nullptr && !spkModel->resourceUsed) {
|
||||
spkModel->resourceUsed = true;
|
||||
spkModel->func = main;
|
||||
spkModel->blocker.release();
|
||||
spkModel->blocker = true;
|
||||
model->blocker.notify_one();
|
||||
return;
|
||||
}
|
||||
std::thread t{main};
|
||||
@@ -53,8 +55,8 @@ void recognizer::finishConstruction(genericModel* model, genericModel* spkModel)
|
||||
}
|
||||
void recognizer::pushData(int start, int len) {
|
||||
dataQ.emplace(start, len);
|
||||
blocker.release();
|
||||
blocker.acquire();
|
||||
blocker = true;
|
||||
blocker.notify_one();
|
||||
}
|
||||
void recognizer::reset() {
|
||||
vosk_recognizer_reset(rec);
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
#pragma once
|
||||
#include "genericModel.h"
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include <emscripten/console.h>
|
||||
|
||||
struct audioData {
|
||||
float* data;
|
||||
int len;
|
||||
audioData(int start, int len) : data{reinterpret_cast<float*>(start)}, len{len} {}
|
||||
};
|
||||
struct recognizer {
|
||||
std::atomic_bool done;
|
||||
std::atomic_bool done{};
|
||||
std::atomic_bool blocker{};
|
||||
int index;
|
||||
std::binary_semaphore blocker{1};
|
||||
std::queue<audioData> dataQ{};
|
||||
VoskRecognizer* rec;
|
||||
std::queue<audioData> dataQ{};
|
||||
recognizer(int index, float sampleRate, genericModel* model);
|
||||
recognizer(int index, float sampleRate, genericModel* model, genericModel* spkModel);
|
||||
recognizer(int index, float sampleRate, genericModel* model, const std::string& grm, int dummy);
|
||||
|
||||
Reference in New Issue
Block a user