summaryrefslogtreecommitdiff
path: root/makima/src/tts/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'makima/src/tts/mod.rs')
-rw-r--r--makima/src/tts/mod.rs44
1 files changed, 6 insertions, 38 deletions
diff --git a/makima/src/tts/mod.rs b/makima/src/tts/mod.rs
index b66f4a5..31f4204 100644
--- a/makima/src/tts/mod.rs
+++ b/makima/src/tts/mod.rs
@@ -1,19 +1,15 @@
//! TTS engine abstraction and implementations.
//!
-//! Provides a trait-based TTS engine interface with two backends:
-//! - **Chatterbox**: ONNX-based TTS (legacy)
-//! - **Qwen3**: Pure Rust candle-based Qwen3-TTS-12Hz-0.6B
+//! Provides a trait-based TTS engine interface using Chatterbox ONNX-based TTS.
use std::path::Path;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
pub mod chatterbox;
-pub mod qwen3;
// Re-export primary types
pub use chatterbox::ChatterboxTTS;
-pub use qwen3::Qwen3Tts;
/// Audio output sample rate (both engines output 24kHz).
pub const SAMPLE_RATE: u32 = 24_000;
@@ -51,8 +47,6 @@ pub enum TtsError {
Audio(crate::audio::AudioError),
Io(std::io::Error),
VoiceRequired,
- Config(String),
- Candle(String),
}
impl std::fmt::Display for TtsError {
@@ -66,8 +60,6 @@ impl std::fmt::Display for TtsError {
TtsError::VoiceRequired => {
write!(f, "voice reference audio is required")
}
- TtsError::Config(msg) => write!(f, "config error: {msg}"),
- TtsError::Candle(msg) => write!(f, "candle error: {msg}"),
}
}
}
@@ -92,22 +84,7 @@ impl From<ort::Error> for TtsError {
}
}
-impl From<candle_core::Error> for TtsError {
- fn from(value: candle_core::Error) -> Self {
- TtsError::Candle(value.to_string())
- }
-}
-
-/// Which TTS backend to use.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum TtsBackend {
- /// ONNX-based Chatterbox TTS (legacy).
- Chatterbox,
- /// Candle-based Qwen3-TTS (preferred).
- Qwen3,
-}
-
-/// TTS engine trait — implemented by both Chatterbox and Qwen3.
+/// TTS engine trait for text-to-speech synthesis.
#[async_trait::async_trait]
pub trait TtsEngine: Send + Sync {
/// Generate complete audio from text with a voice reference.
@@ -137,19 +114,10 @@ pub trait TtsEngine: Send + Sync {
pub struct TtsEngineFactory;
impl TtsEngineFactory {
- /// Create a TTS engine of the specified backend type.
- pub fn create(backend: TtsBackend, model_dir: Option<&str>) -> Result<Box<dyn TtsEngine>, TtsError> {
- match backend {
- TtsBackend::Chatterbox => {
- let engine = ChatterboxTTS::from_pretrained(model_dir)?;
- Ok(Box::new(engine))
- }
- TtsBackend::Qwen3 => {
- let device = candle_core::Device::Cpu; // Default to CPU; GPU selection happens at higher level
- let engine = Qwen3Tts::from_pretrained(model_dir, &device)?;
- Ok(Box::new(engine))
- }
- }
+ /// Create a Chatterbox TTS engine.
+ pub fn create(model_dir: Option<&str>) -> Result<Box<dyn TtsEngine>, TtsError> {
+ let engine = ChatterboxTTS::from_pretrained(model_dir)?;
+ Ok(Box::new(engine))
}
}