summaryrefslogtreecommitdiff
path: root/makima/src/server/state.rs
diff options
context:
space:
mode:
Diffstat (limited to 'makima/src/server/state.rs')
-rw-r--r--makima/src/server/state.rs22
1 files changed, 22 insertions, 0 deletions
diff --git a/makima/src/server/state.rs b/makima/src/server/state.rs
index 1bc7d7e..bf8f6f2 100644
--- a/makima/src/server/state.rs
+++ b/makima/src/server/state.rs
@@ -8,6 +8,7 @@ use uuid::Uuid;
use crate::listen::{DiarizationConfig, ParakeetEOU, ParakeetTDT, Sortformer};
use crate::server::auth::{AuthConfig, JwtVerifier};
+use crate::tts::TtsEngine;
/// Notification payload for file updates (broadcast to WebSocket subscribers).
#[derive(Debug, Clone)]
@@ -599,6 +600,8 @@ pub struct AppState {
pub jwt_verifier: Option<JwtVerifier>,
/// Pending worktree info requests awaiting daemon response (keyed by task_id)
pub pending_worktree_info: DashMap<Uuid, oneshot::Sender<WorktreeInfoResponse>>,
+ /// Lazily-loaded TTS engine (initialized on first Speak connection)
+ pub tts_engine: OnceCell<Box<dyn TtsEngine>>,
}
impl AppState {
@@ -673,9 +676,28 @@ impl AppState {
tool_keys: DashMap::new(),
jwt_verifier,
pending_worktree_info: DashMap::new(),
+ tts_engine: OnceCell::new(),
}
}
+ /// Get or initialize the TTS engine (lazy loading).
+ ///
+ /// The TTS engine is loaded on first Speak connection using the Qwen3 backend.
+ /// Returns a reference to the engine, or an error if loading fails.
+ pub async fn get_tts_engine(&self) -> Result<&dyn TtsEngine, Box<dyn std::error::Error + Send + Sync>> {
+ self.tts_engine.get_or_try_init(|| async {
+ tracing::info!("Lazy-loading TTS engine (Qwen3) on first Speak connection...");
+ let engine = crate::tts::TtsEngineFactory::create(
+ crate::tts::TtsBackend::Qwen3,
+ None, // Use default model directory
+ ).map_err(|e| -> Box<dyn std::error::Error + Send + Sync> {
+ Box::new(e)
+ })?;
+ tracing::info!("TTS engine loaded successfully");
+ Ok(engine)
+ }).await.map(|b| b.as_ref())
+ }
+
/// Get or initialize ML models (lazy loading).
///
/// Models are loaded on first call and cached for subsequent calls.