From ab9166170043ba5e0ce974e5b7accf0939d686e3 Mon Sep 17 00:00:00 2001 From: soryu Date: Fri, 19 Dec 2025 04:43:59 +0000 Subject: Experiment: ChatterBoxTTS --- makima/src/main.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'makima/src/main.rs') diff --git a/makima/src/main.rs b/makima/src/main.rs index 9097ef6..2348b23 100644 --- a/makima/src/main.rs +++ b/makima/src/main.rs @@ -1,6 +1,44 @@ +use std::path::Path; +use crate::tts::{save_wav, ChatterboxTTS}; + +mod audio; mod listen; +pub mod tts; fn main() -> Result<(), Box> { + println!("Loading ChatterboxTTS..."); + let mut tts = ChatterboxTTS::from_pretrained(None)?; + println!("Model loaded successfully!"); + + // // Voice cloning using existing audio file + // println!("Generating TTS with voice cloning..."); + // let audio = tts.generate_tts_with_voice( + // "Hello, this is a test of the voice cloning system.", + // Path::new("audio.wav") + // )?; + // + // println!("Generated {} samples", audio.len()); + // save_wav(&audio, Path::new("output.wav"))?; + // println!("Saved to output.wav"); + + + // Load reference audio from mp3 + println!("Loading reference audio..."); + let reference = audio::to_16k_mono_from_path(Path::new("audio.mp3"))?; + let samples = &reference.samples; + let sample_rate = reference.sample_rate; + + // Voice cloning using audio samples + println!("Generating TTS with voice cloning..."); + let audio = tts.generate_tts_with_samples( + "Hello, this is a test of the voice cloning system [chuckles]. Repeat after me \" I am Steve Jobs!\"", + samples, + sample_rate, + )?; + + println!("Generated {} samples", audio.len()); + save_wav(&audio, Path::new("output.wav"))?; + println!("Saved to output.wav"); let segments = listen::listen()?; println!("Captured {} diarized segments", segments.len()); Ok(()) -- cgit v1.2.3