Great progress

This commit is contained in:
msqr1
2024-03-10 22:52:01 -07:00
parent 9a40cdaa7f
commit 2091845b04
12 changed files with 8977 additions and 9264 deletions

View File

@@ -9,8 +9,7 @@ EMSCRIPTEN_BINDINGS() {
class_<genericModel>("genericModel")
.constructor<std::string, std::string, int, bool>(allow_raw_pointers())
.function("check", &genericModel::check, allow_raw_pointers())
.function("afterFetch", &genericModel::afterFetch, allow_raw_pointers())
.function("load", &genericModel::load, allow_raw_pointers());
.function("afterFetch", &genericModel::afterFetch, allow_raw_pointers());
class_<recognizer>("recognizer")
.constructor<genericModel*, float, int>(allow_raw_pointers())

View File

@@ -52,7 +52,7 @@ void genericModel::check() {
thrd.addTask([this](){
if(OPFSTried && !OPFSOk) {
emscripten_console_log("OPFS isn't available");
fireEv("_checkMdl", "OPFS isn't available", index);
fireEv("_continue", "OPFS isn't available", index);
return;
}
if(!OPFSTried){
@@ -63,7 +63,7 @@ void genericModel::check() {
emscripten_console_log("Initializing OPFS");
if(!OPFSOk) {
emscripten_console_log("OPFS initialization failed");
fireEv("_checkMdl", "OPFS initialization failed", index);
fireEv("_continue", "OPFS initialization failed", index);
return;
}
fs::current_path("/opfs", tank);
@@ -84,41 +84,24 @@ void genericModel::check() {
}
if(!checkFiles() && !fs::exists("id", tank)) {
emscripten_console_log("Model is not available, fetching...");
fireEv("_checkMdl", "fetch", index);
fireEv("_continue", "1", index);
return;
}
emscripten_console_log("Model is available, verifying ID");
FILE* idFile {fopen("id", "r")};
if(idFile == nullptr) {
emscripten_console_log("Couldn't open id file");
fireEv("_checkMdl", "Couldn't open id file", index);
return;
}
if(fseek(idFile, 0, SEEK_END) != 0) {
emscripten_console_log("Id file end seeking fail");
fireEv("_checkMdl", "Id file end seeking fail", index);
fclose(idFile);
return;
};
long long oldsize{ftell(idFile)};
char* oldid {new char[oldsize]};
if(fseek(idFile, 0L, SEEK_SET) != 0) {
emscripten_console_log("Id file start seeking fail");
fireEv("_checkMdl", "Id file start seeking fail", index);
fclose(idFile);
return;
};
fread(oldid, 1, oldsize, idFile);
fclose(idFile);
if(strcmp(oldid, id.c_str()) != 0) {
std::ifstream idFile("id");
idFile.seekg(0, std::ios::end);
size_t oldSize = idFile.tellg();
std::string oldID(oldSize, ' ');
idFile.seekg(0);
idFile.read(&oldID[0], oldSize);
if(id.compare(oldID.c_str()) != 0) {
emscripten_console_log("ID doesn't match, fetching...");
fireEv("_checkMdl", "fetch", index);
fireEv("_continue", "1", index);
}
else {
emscripten_console_log("ID match, returning instance");
fireEv("_checkMdl", nullptr, index);
emscripten_console_log("ID matches, loading...");
//load();
}
delete[] oldid;
emscripten_console_log("Success! Model is ready!");
});
}
@@ -126,9 +109,9 @@ void genericModel::afterFetch() {
thrd.addTask([this](){
emscripten_console_log("Trying to extract...");
if(!extract()) {
fs::remove("/opfs/m0dEl.tar",tank);
//fs::remove("/opfs/m0dEl.tar",tank);
fs::current_path("/opfs", tank);
fs::remove_all(storepath, tank);
//fs::remove_all(storepath, tank);
emscripten_console_log("Unable to extract model");
fireEv("_continue", "Unable to extract model", index);
return;
@@ -167,13 +150,25 @@ void genericModel::afterFetch() {
bool genericModel::extract() {
static fs::path path{};
static int fd{};
static archive_entry* entry{archive_entry_new()};
archive_entry* entry{archive_entry_new()};
archive* src {archive_read_new()};
auto cleanup {[&](){
archive_entry_free(entry);
archive_read_free(src);
}};
archive_read_support_format_tar(src);
archive_read_open_filename(src, "/opfs/m0dEl.tar", 10240);
if(archive_errno(src) != 0) return false;
if(archive_errno(src) != 0) {
cleanup();
emscripten_console_log(archive_error_string(src));
return false;
}
while(archive_read_next_header2(src, entry) == ARCHIVE_OK) {
if(archive_errno(src) != 0) return false;
if(archive_errno(src) != 0) {
cleanup();
emscripten_console_log(archive_error_string(src));
return false;
}
path = archive_entry_pathname(entry);
// Strip 1st component, keep relative path
path = "." + path.generic_string().substr(path.generic_string().find("/"));
@@ -183,16 +178,20 @@ bool genericModel::extract() {
continue;
}
fd = creat(path.c_str(),0777);
if(fd == -1) return false;
if(fd == -1) {
cleanup();
return false;
}
archive_read_data_into_fd(src, fd);
close(fd);
if(archive_errno(src) != 0) {
cleanup();
emscripten_console_log(archive_error_string(src));
return false;
}
}
fs::remove("README",tank);
fs::remove("/opfs/m0dEl.tar",tank);
archive_read_free(src);
cleanup();
return true;
}

View File

@@ -3,7 +3,7 @@
#include <string>
#include <filesystem>
#include <cstring>
#include <fstream>
#include <fcntl.h>
#include <variant>
@@ -13,14 +13,14 @@
namespace fs = std::filesystem;
struct genericModel {
bool normalMdl{};
bool recognizerUsedThrd{};
int index{};
bool normalMdl;
bool recognizerUsedThrd;
int index;
std::string storepath;
std::string id;
std::variant<VoskModel*, VoskSpkModel*> mdl{};
reusableThrd thrd{};
static bool extract();
std::variant<VoskModel*, VoskSpkModel*> mdl;
reusableThrd thrd;
bool extract();
void load();
void check();
bool checkFiles();

View File

@@ -20,7 +20,6 @@ reusableThrd::reusableThrd() {
blocker.wait(done.test(std::memory_order_relaxed) || queue.empty(), std::memory_order_relaxed);
blocker.clear(std::memory_order_relaxed);
while(!queue.empty()) {
emscripten_console_log("Executing task...");
queue.front()();
queue.pop();
}

View File

@@ -1,5 +1,4 @@
let objs = []
let dStream = new DecompressionStream("gzip")
Module.cleanUp = () => {
objs.forEach(obj => obj.delete())
URL.revokeObjectURL(pthreadUrl)
@@ -9,33 +8,31 @@ Module.locateFile = (path, scriptDir) => {
if(path === "Vosklet.js") return pthreadUrl
return scriptDir+path
}
class genericModel extends EventTarget {
constructor() {
constructor(url, storepath, id, normalMdl) {
super()
objs.push(this)
this.url = url
this.storepath = storepath
this.id = id
this.normalMdl = normalMdl
}
static async _init(url, storepath, id, normalMdl) {
let mdl = new genericModel()
return new Promise((resolve, reject) => {
mdl.addEventListener("_continue", (ev) => {
if(ev.detail === "0") {
return resolve(mdl)
}
mdl.delete()
reject(ev.detail)
}, {once : true})
mdl.addEventListener("_checkMdl", async (ev) => {
let mdl = new genericModel(url, storepath, id, normalMdl)
mdl.addEventListener("_continue", async function listener(ev) {
switch(ev.detail) {
case "0":
mdl.load(true);
break;
case "fetch":
mdl.removeEventListener("_continue", listener)
return resolve(mdl)
case "1":
let res = await fetch(url)
if(!res.ok) {
return reject("Unable to download model")
}
let wStream = await (await (await navigator.storage.getDirectory()).getFileHandle("m0dEl.tar", {create : true})).createWritable()
let tarReader = res.body.pipeThrough(dStream).getReader()
let tarReader = res.body.pipeThrough(new DecompressionStream("gzip")).getReader()
while(true) {
let readRes = await tarReader.read()
if(!readRes.done) await wStream.write(readRes.value)
@@ -46,9 +43,11 @@ class genericModel extends EventTarget {
mdl.obj.afterFetch()
break;
default:
mdl.delete()
mdl.removeEventListener("_continue", listener)
reject(ev.detail)
}
}, {once : true})
})
mdl.obj = new Module.genericModel(storepath, id, objs.length-1, normalMdl)
mdl.obj.check()
})
@@ -58,9 +57,17 @@ class genericModel extends EventTarget {
}
}
Module.makeModel = async (url, storepath, id) => {
return genericModel._init(url, storepath, id, true)
for (obj in objs) {
if (typeof obj.normalMdl !== "undefined" && obj.normalMdl && obj.url === url && obj.storepath === storepath && obj.id === id) return obj;
}
return genericModel._init(url, storepath, id, true);
}
Module.makeSpkModel = async (url, storepath, id) => {
for(obj in objs) {
if(typeof obj.normalMdl !== "undefined" && !obj.normalMdl && obj.url === url && obj.storepath === storepath && obj.id === id) {
return obj;
}
}
return genericModel._init(url, storepath, id, false)
}
class Recognizer extends EventTarget {
@@ -68,48 +75,29 @@ class Recognizer extends EventTarget {
super()
objs.push(this)
}
static async _init1(model, sampleRate) {
let rec = new Recognizer()
static async _init(model, sampleRate, mode, grammar, spkModel) {
return new Promise((resolve, reject) => {
let rec = new Recognizer()
rec.addEventListener("_continue", (ev) => {
if(ev.detail == "0") {
if(ev.detail === "0") {
rec.ptr = Module._malloc(512)
return resolve(rec)
}
rec.delete()
reject(ev.detail)
}, {once : true})
rec.obj = new Module.recognizer(model, sampleRate, objs.length-1)
switch(mode) {
case 1:
rec.obj = new Module.recognizer(model, sampleRate, objs.length-1)
break
case 2:
rec.obj = new Module.recognizer(model, spkModel, sampleRate, objs.length-1)
break
default:
rec.obj = new Module.recognizer(model, grammar, sampleRate, objs.length-1, 0)
}
})
}
static async _init2(model, spkModel, sampleRate) {
let rec = new Recognizer()
return new Promise((resolve, reject) => {
rec.addEventListener("_continue", (ev) => {
if(ev.detail == "0") {
rec.ptr = Module._malloc(512)
return resolve(rec)
}
rec.delete()
reject(ev.detail)
}, {once : true})
rec.obj = new Module.recognizer(model, spkModel, sampleRate, objs.length-1)
})
}
static async _init3(model, grammar, sampleRate) {
let rec = new Recognizer()
return new Promise((resolve, reject) => {
rec.addEventListener("_continue", (ev) => {
if(ev.detail == "0") {
rec.ptr = Module._malloc(512)
return resolve(rec)
}
rec.delete()
reject(ev.detail)
}, {once : true})
rec.obj = new Module.recognizer(model, grammar, sampleRate, objs.length-1, 0)
})
}
}
async getNode(ctx, channelIndex = 0) {
if(typeof this.node === "undefined") {
let msgChannel = new MessageChannel()
@@ -149,13 +137,13 @@ class Recognizer extends EventTarget {
}
}
Module.makeRecognizer = (model, sampleRate) => {
return Recognizer._init(model.obj, sampleRate)
return Recognizer._init(model.obj, sampleRate, 1)
}
Module.makeRecognizerWithSpkModel = (model, spkModel, sampleRate) => {
return Recognizer._init2(model.obj, spkModel.obj, sampleRate)
Module.makeRecognizerWithSpkModel = (model, sampleRate, spkModel) => {
return Recognizer._init(model.obj, sampleRate, 2, null, spkModel)
}
Module.makeRecognizerWithGrm = (model, grammar, sampleRate) => {
return Recognizer._init3(model.obj, grammar, sampleRate)
Module.makeRecognizerWithGrm = (model, sampleRate, grammar) => {
return Recognizer._init(model.obj, sampleRate, 3, grammar, null)
}
let processorUrl = URL.createObjectURL(new Blob(['(',
(() => {
@@ -177,168 +165,10 @@ let processorUrl = URL.createObjectURL(new Blob(['(',
})
}).toString()
, ')()'], {type : "text/javascript"}))
/*
let pthreadUrl = URL.createObjectURL(new Blob(['(',
(() => {
/**
* @license
* Copyright 2015 The Emscripten Authors
* SPDX-License-Identifier: MIT
*/
// Pthread Web Worker startup routine:
// This is the entry point file that is loaded first by each Web Worker
// that executes pthreads on the Emscripten application.
'use strict';
var Module = {};
// Thread-local guard variable for one-time init of the JS state
var initializedJS = false;
function assert(condition, text) {
if (!condition) abort('Assertion failed: ' + text);
}
function threadPrintErr(...args) {
var text = args.join(' ');
console.error(text);
}
function threadAlert(...args) {
var text = args.join(' ');
postMessage({cmd: 'alert', text, threadId: Module['_pthread_self']()});
}
// We don't need out() for now, but may need to add it if we want to use it
// here. Or, if this code all moves into the main JS, that problem will go
// away. (For now, adding it here increases code size for no benefit.)
var out = () => { throw 'out() is not defined in worker.js.'; }
var err = threadPrintErr;
self.alert = threadAlert;
var dbg = threadPrintErr;
Module['instantiateWasm'] = (info, receiveInstance) => {
// Instantiate from the module posted from the main thread.
// We can just use sync instantiation in the worker.
var module = Module['wasmModule'];
// We don't need the module anymore; new threads will be spawned from the main thread.
Module['wasmModule'] = null;
var instance = new WebAssembly.Instance(module, info);
// TODO: Due to Closure regression https://github.com/google/closure-compiler/issues/3193,
// the above line no longer optimizes out down to the following line.
// When the regression is fixed, we can remove this if/else.
return receiveInstance(instance);
}
// Turn unhandled rejected promises into errors so that the main thread will be
// notified about them.
self.onunhandledrejection = (e) => {
throw e.reason || e;
};
function handleMessage(e) {
try {
if (e.data.cmd === 'load') { // Preload command that is called once per worker to parse and load the Emscripten code.
// Until we initialize the runtime, queue up any further incoming messages.
let messageQueue = [];
self.onmessage = (e) => messageQueue.push(e);
// And add a callback for when the runtime is initialized.
self.startWorker = (instance) => {
Module = instance;
// Notify the main thread that this thread has loaded.
postMessage({ 'cmd': 'loaded' });
// Process any messages that were queued before the thread was ready.
for (let msg of messageQueue) {
handleMessage(msg);
}
// Restore the real message handler.
self.onmessage = handleMessage;
};
// Module and memory were sent from main thread
Module['wasmModule'] = e.data.wasmModule;
// Use `const` here to ensure that the variable is scoped only to
// that iteration, allowing safe reference from a closure.
for (const handler of e.data.handlers) {
Module[handler] = (...args) => {
postMessage({ cmd: 'callHandler', handler, args: args });
}
}
Module['wasmMemory'] = e.data.wasmMemory;
Module['buffer'] = Module['wasmMemory'].buffer;
Module['workerID'] = e.data.workerID;
Module['ENVIRONMENT_IS_PTHREAD'] = true;
if (typeof e.data.urlOrBlob == 'string') {
importScripts(e.data.urlOrBlob);
} else {
var objectUrl = URL.createObjectURL(e.data.urlOrBlob);
importScripts(objectUrl);
URL.revokeObjectURL(objectUrl);
}
loadBR(Module);
} else if (e.data.cmd === 'run') {
// Pass the thread address to wasm to store it for fast access.
Module['__emscripten_thread_init'](e.data.pthread_ptr, /*is_main=*/0, /*is_runtime=*/0, /*can_block=*/1);
// Await mailbox notifications with `Atomics.waitAsync` so we can start
// using the fast `Atomics.notify` notification path.
Module['__emscripten_thread_mailbox_await'](e.data.pthread_ptr);
assert(e.data.pthread_ptr);
// Also call inside JS module to set up the stack frame for this pthread in JS module scope
Module['establishStackSpace']();
Module['PThread'].receiveObjectTransfer(e.data);
Module['PThread'].threadInitTLS();
if (!initializedJS) {
// Embind must initialize itself on all threads, as it generates support JS.
// We only do this once per worker since they get reused
Module['__embind_initialize_bindings']();
initializedJS = true;
}
try {
Module['invokeEntryPoint'](e.data.start_routine, e.data.arg);
} catch(ex) {
if (ex != 'unwind') {
// The pthread "crashed". Do not call `_emscripten_thread_exit` (which
// would make this thread joinable). Instead, re-throw the exception
// and let the top level handler propagate it back to the main thread.
throw ex;
}
}
} else if (e.data.cmd === 'cancel') { // Main thread is asking for a pthread_cancel() on this thread.
if (Module['_pthread_self']()) {
Module['__emscripten_thread_exit'](-1);
}
} else if (e.data.target === 'setimmediate') {
// no-op
} else if (e.data.cmd === 'checkMailbox') {
if (initializedJS) {
Module['checkMailbox']();
}
} else if (e.data.cmd) {
// The received message looks like something that should be handled by this message
// handler, (since there is a e.data.cmd field present), but is not one of the
// recognized commands:
err(`worker.js received unknown command ${e.data.cmd}`);
err(e.data);
}
} catch(ex) {
err(`worker.js onmessage() captured an uncaught exception: ${ex}`);
if (ex?.stack) err(ex.stack);
Module['__emscripten_thread_crashed']?.();
throw ex;
}
};
self.onmessage = handleMessage;
}).toString()
, ')()'], {type : "text/javascript"}))
, ')()'], {type : "text/javascript"}))
*/

View File

@@ -5,9 +5,9 @@
struct recognizer {
std::atomic_flag done{};
std::atomic_flag controller{};
float* dataPtr{};
int index{};
VoskRecognizer* rec{};
float* dataPtr;
int index;
VoskRecognizer* rec;
recognizer(genericModel* model, float sampleRate, int index);
recognizer(genericModel* model, genericModel* spkModel, float sampleRate, int index);
recognizer(genericModel* model, const std::string& grm, float sampleRate, int index, int dummy);