diff options
Diffstat (limited to 'makima/src/tts/mod.rs')
| -rw-r--r-- | makima/src/tts/mod.rs | 44 |
1 files changed, 6 insertions, 38 deletions
diff --git a/makima/src/tts/mod.rs b/makima/src/tts/mod.rs index b66f4a5..31f4204 100644 --- a/makima/src/tts/mod.rs +++ b/makima/src/tts/mod.rs @@ -1,19 +1,15 @@ //! TTS engine abstraction and implementations. //! -//! Provides a trait-based TTS engine interface with two backends: -//! - **Chatterbox**: ONNX-based TTS (legacy) -//! - **Qwen3**: Pure Rust candle-based Qwen3-TTS-12Hz-0.6B +//! Provides a trait-based TTS engine interface using Chatterbox ONNX-based TTS. use std::path::Path; use std::sync::atomic::AtomicBool; use std::sync::Arc; pub mod chatterbox; -pub mod qwen3; // Re-export primary types pub use chatterbox::ChatterboxTTS; -pub use qwen3::Qwen3Tts; /// Audio output sample rate (both engines output 24kHz). pub const SAMPLE_RATE: u32 = 24_000; @@ -51,8 +47,6 @@ pub enum TtsError { Audio(crate::audio::AudioError), Io(std::io::Error), VoiceRequired, - Config(String), - Candle(String), } impl std::fmt::Display for TtsError { @@ -66,8 +60,6 @@ impl std::fmt::Display for TtsError { TtsError::VoiceRequired => { write!(f, "voice reference audio is required") } - TtsError::Config(msg) => write!(f, "config error: {msg}"), - TtsError::Candle(msg) => write!(f, "candle error: {msg}"), } } } @@ -92,22 +84,7 @@ impl From<ort::Error> for TtsError { } } -impl From<candle_core::Error> for TtsError { - fn from(value: candle_core::Error) -> Self { - TtsError::Candle(value.to_string()) - } -} - -/// Which TTS backend to use. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum TtsBackend { - /// ONNX-based Chatterbox TTS (legacy). - Chatterbox, - /// Candle-based Qwen3-TTS (preferred). - Qwen3, -} - -/// TTS engine trait — implemented by both Chatterbox and Qwen3. +/// TTS engine trait for text-to-speech synthesis. #[async_trait::async_trait] pub trait TtsEngine: Send + Sync { /// Generate complete audio from text with a voice reference. @@ -137,19 +114,10 @@ pub trait TtsEngine: Send + Sync { pub struct TtsEngineFactory; impl TtsEngineFactory { - /// Create a TTS engine of the specified backend type. - pub fn create(backend: TtsBackend, model_dir: Option<&str>) -> Result<Box<dyn TtsEngine>, TtsError> { - match backend { - TtsBackend::Chatterbox => { - let engine = ChatterboxTTS::from_pretrained(model_dir)?; - Ok(Box::new(engine)) - } - TtsBackend::Qwen3 => { - let device = candle_core::Device::Cpu; // Default to CPU; GPU selection happens at higher level - let engine = Qwen3Tts::from_pretrained(model_dir, &device)?; - Ok(Box::new(engine)) - } - } + /// Create a Chatterbox TTS engine. + pub fn create(model_dir: Option<&str>) -> Result<Box<dyn TtsEngine>, TtsError> { + let engine = ChatterboxTTS::from_pretrained(model_dir)?; + Ok(Box::new(engine)) } } |
