diff options
| author | soryu <soryu@soryu.co> | 2026-02-01 03:04:36 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2026-02-01 03:04:36 +0000 |
| commit | a2c147ddd59f55a07b5be0c8970169726b55c876 (patch) | |
| tree | e41a80f2dfdd8fcaf6b6e91c899392f4e619ca3e /makima/src/server | |
| parent | 65eebd078af712d004a5a9e28863a16df30792a6 (diff) | |
| download | soryu-a2c147ddd59f55a07b5be0c8970169726b55c876.tar.gz soryu-a2c147ddd59f55a07b5be0c8970169726b55c876.zip | |
Use chatterbox TTS
Diffstat (limited to 'makima/src/server')
| -rw-r--r-- | makima/src/server/handlers/speak.rs | 5 | ||||
| -rw-r--r-- | makima/src/server/state.rs | 16 |
2 files changed, 10 insertions, 11 deletions
diff --git a/makima/src/server/handlers/speak.rs b/makima/src/server/handlers/speak.rs index 3ed2620..b235c65 100644 --- a/makima/src/server/handlers/speak.rs +++ b/makima/src/server/handlers/speak.rs @@ -1,19 +1,18 @@ //! WebSocket handler for TTS streaming (direct in-process inference). //! //! This module implements the `/api/v1/speak` endpoint which performs -//! text-to-speech synthesis directly using the candle-based TTS engine. +//! text-to-speech synthesis directly using the Chatterbox ONNX TTS engine. //! No external Python service or proxy — the model runs in-process. //! //! ## Architecture //! //! The speak handler will: //! 1. Accept a WebSocket connection from the client -//! 2. Lazily load the TTS model (candle) on first request +//! 2. Lazily load the TTS model (Chatterbox ONNX) on first request //! 3. Parse JSON control messages (start, speak, stop, cancel) //! 4. Run inference directly and stream audio chunks back //! //! See `makima/src/tts/` for the TTS engine implementation. -//! See `docs/specs/qwen3-tts-spec.md` for the full protocol specification. use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; diff --git a/makima/src/server/state.rs b/makima/src/server/state.rs index f662e30..bd6864f 100644 --- a/makima/src/server/state.rs +++ b/makima/src/server/state.rs @@ -560,7 +560,7 @@ pub struct ModelConfig { pub parakeet_model_dir: String, pub parakeet_eou_dir: String, pub sortformer_model_path: String, - pub qwen3_tts_dir: String, + pub chatterbox_model_dir: String, } /// Lazily-loaded ML models. @@ -619,12 +619,12 @@ impl AppState { /// * `parakeet_model_dir` - Path to the Parakeet TDT model directory /// * `parakeet_eou_dir` - Path to the Parakeet EOU model directory /// * `sortformer_model_path` - Path to the Sortformer diarization model file - /// * `qwen3_tts_dir` - Path to the Qwen3-TTS model directory + /// * `chatterbox_model_dir` - Path to the Chatterbox TTS model directory pub fn new( parakeet_model_dir: &str, parakeet_eou_dir: &str, sortformer_model_path: &str, - qwen3_tts_dir: &str, + chatterbox_model_dir: &str, ) -> Self { // Create broadcast channels with buffer for 256 messages let (file_updates, _) = broadcast::channel(256); @@ -668,7 +668,7 @@ impl AppState { parakeet_model_dir: parakeet_model_dir.to_string(), parakeet_eou_dir: parakeet_eou_dir.to_string(), sortformer_model_path: sortformer_model_path.to_string(), - qwen3_tts_dir: qwen3_tts_dir.to_string(), + chatterbox_model_dir: chatterbox_model_dir.to_string(), }), ml_models: OnceCell::new(), db_pool: None, @@ -691,17 +691,17 @@ impl AppState { /// Get or initialize the TTS engine (lazy loading). /// - /// The TTS engine is loaded on first Speak connection using the Qwen3 backend. + /// The TTS engine is loaded on first Speak connection using the Chatterbox backend. /// Returns a reference to the engine, or an error if loading fails. pub async fn get_tts_engine(&self) -> Result<&dyn TtsEngine, Box<dyn std::error::Error + Send + Sync>> { - let tts_dir = self.model_config.as_ref().map(|c| c.qwen3_tts_dir.as_str()); + let tts_dir = self.model_config.as_ref().map(|c| c.chatterbox_model_dir.as_str()); self.tts_engine.get_or_try_init(|| async { tracing::info!( model_dir = ?tts_dir, - "Lazy-loading TTS engine (Qwen3) on first Speak connection..." + "Lazy-loading TTS engine (Chatterbox) on first Speak connection..." ); let engine = crate::tts::TtsEngineFactory::create( - crate::tts::TtsBackend::Qwen3, + crate::tts::TtsBackend::Chatterbox, tts_dir, ).map_err(|e| -> Box<dyn std::error::Error + Send + Sync> { Box::new(e) |
