summaryrefslogtreecommitdiff
path: root/makima/src/server
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2026-02-01 03:04:36 +0000
committersoryu <soryu@soryu.co>2026-02-01 03:04:36 +0000
commita2c147ddd59f55a07b5be0c8970169726b55c876 (patch)
treee41a80f2dfdd8fcaf6b6e91c899392f4e619ca3e /makima/src/server
parent65eebd078af712d004a5a9e28863a16df30792a6 (diff)
downloadsoryu-a2c147ddd59f55a07b5be0c8970169726b55c876.tar.gz
soryu-a2c147ddd59f55a07b5be0c8970169726b55c876.zip
Use chatterbox TTS
Diffstat (limited to 'makima/src/server')
-rw-r--r--makima/src/server/handlers/speak.rs5
-rw-r--r--makima/src/server/state.rs16
2 files changed, 10 insertions, 11 deletions
diff --git a/makima/src/server/handlers/speak.rs b/makima/src/server/handlers/speak.rs
index 3ed2620..b235c65 100644
--- a/makima/src/server/handlers/speak.rs
+++ b/makima/src/server/handlers/speak.rs
@@ -1,19 +1,18 @@
//! WebSocket handler for TTS streaming (direct in-process inference).
//!
//! This module implements the `/api/v1/speak` endpoint which performs
-//! text-to-speech synthesis directly using the candle-based TTS engine.
+//! text-to-speech synthesis directly using the Chatterbox ONNX TTS engine.
//! No external Python service or proxy — the model runs in-process.
//!
//! ## Architecture
//!
//! The speak handler will:
//! 1. Accept a WebSocket connection from the client
-//! 2. Lazily load the TTS model (candle) on first request
+//! 2. Lazily load the TTS model (Chatterbox ONNX) on first request
//! 3. Parse JSON control messages (start, speak, stop, cancel)
//! 4. Run inference directly and stream audio chunks back
//!
//! See `makima/src/tts/` for the TTS engine implementation.
-//! See `docs/specs/qwen3-tts-spec.md` for the full protocol specification.
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
diff --git a/makima/src/server/state.rs b/makima/src/server/state.rs
index f662e30..bd6864f 100644
--- a/makima/src/server/state.rs
+++ b/makima/src/server/state.rs
@@ -560,7 +560,7 @@ pub struct ModelConfig {
pub parakeet_model_dir: String,
pub parakeet_eou_dir: String,
pub sortformer_model_path: String,
- pub qwen3_tts_dir: String,
+ pub chatterbox_model_dir: String,
}
/// Lazily-loaded ML models.
@@ -619,12 +619,12 @@ impl AppState {
/// * `parakeet_model_dir` - Path to the Parakeet TDT model directory
/// * `parakeet_eou_dir` - Path to the Parakeet EOU model directory
/// * `sortformer_model_path` - Path to the Sortformer diarization model file
- /// * `qwen3_tts_dir` - Path to the Qwen3-TTS model directory
+ /// * `chatterbox_model_dir` - Path to the Chatterbox TTS model directory
pub fn new(
parakeet_model_dir: &str,
parakeet_eou_dir: &str,
sortformer_model_path: &str,
- qwen3_tts_dir: &str,
+ chatterbox_model_dir: &str,
) -> Self {
// Create broadcast channels with buffer for 256 messages
let (file_updates, _) = broadcast::channel(256);
@@ -668,7 +668,7 @@ impl AppState {
parakeet_model_dir: parakeet_model_dir.to_string(),
parakeet_eou_dir: parakeet_eou_dir.to_string(),
sortformer_model_path: sortformer_model_path.to_string(),
- qwen3_tts_dir: qwen3_tts_dir.to_string(),
+ chatterbox_model_dir: chatterbox_model_dir.to_string(),
}),
ml_models: OnceCell::new(),
db_pool: None,
@@ -691,17 +691,17 @@ impl AppState {
/// Get or initialize the TTS engine (lazy loading).
///
- /// The TTS engine is loaded on first Speak connection using the Qwen3 backend.
+ /// The TTS engine is loaded on first Speak connection using the Chatterbox backend.
/// Returns a reference to the engine, or an error if loading fails.
pub async fn get_tts_engine(&self) -> Result<&dyn TtsEngine, Box<dyn std::error::Error + Send + Sync>> {
- let tts_dir = self.model_config.as_ref().map(|c| c.qwen3_tts_dir.as_str());
+ let tts_dir = self.model_config.as_ref().map(|c| c.chatterbox_model_dir.as_str());
self.tts_engine.get_or_try_init(|| async {
tracing::info!(
model_dir = ?tts_dir,
- "Lazy-loading TTS engine (Qwen3) on first Speak connection..."
+ "Lazy-loading TTS engine (Chatterbox) on first Speak connection..."
);
let engine = crate::tts::TtsEngineFactory::create(
- crate::tts::TtsBackend::Qwen3,
+ crate::tts::TtsBackend::Chatterbox,
tts_dir,
).map_err(|e| -> Box<dyn std::error::Error + Send + Sync> {
Box::new(e)