From 9b53f6c6b01da85ef73bd5960b32ec319df0b947 Mon Sep 17 00:00:00 2001 From: soryu Date: Wed, 28 Jan 2026 03:50:45 +0000 Subject: Replace TTS endpoint with Rust-native Qwen3-TTS (#41) * chore: fix unused import warnings in qwen3-tts module - Remove unused import 'IndexOp' in model.rs - Remove unused import 'DType' in speech_tokenizer.rs - Add #[allow(dead_code)] to codebook_dim field in RvqCodebook Co-Authored-By: Claude Opus 4.5 * feat: add voice loading and selection for TTS cloning Add voice reference audio loading so the TTS speak handler can perform voice cloning using reference WAV files from the voices/ directory. - Add voice.rs module: loads manifest.json and reference.wav for a given voice_id, decodes via symphonia, resamples to 24kHz for the TTS engine - Update speak.rs: resolve voice_id from the speak request (default "makima"), load reference audio, pass it to engine.generate() - Add voices/makima/README.md with instructions for obtaining reference audio (extraction from YouTube, recording, ffmpeg conversion) - Graceful fallback: if reference audio is missing, TTS proceeds without voice cloning using the model's default voice Co-Authored-By: Claude Opus 4.5 * [WIP] Heartbeat checkpoint - 2026-01-28 03:49:13 UTC --------- Co-authored-by: Claude Opus 4.5 --- makima/src/tts/chatterbox.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'makima/src/tts/chatterbox.rs') diff --git a/makima/src/tts/chatterbox.rs b/makima/src/tts/chatterbox.rs index e26bc06..712910f 100644 --- a/makima/src/tts/chatterbox.rs +++ b/makima/src/tts/chatterbox.rs @@ -6,7 +6,8 @@ use std::borrow::Cow; use std::fs; use std::path::{Path, PathBuf}; -use std::sync::Mutex; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex}; use hf_hub::api::sync::Api; use ndarray::{Array2, Array3, Array4, ArrayD, IxDyn}; @@ -427,6 +428,7 @@ impl TtsEngine for ChatterboxTTS { text: &str, reference_audio: Option<&[f32]>, reference_sample_rate: Option, + _cancel_flag: Option>, ) -> Result, TtsError> { let samples = match reference_audio { Some(audio) => { -- cgit v1.2.3