summaryrefslogtreecommitdiff
path: root/parakeet-rs/examples/transcribe.rs
diff options
context:
space:
mode:
Diffstat (limited to 'parakeet-rs/examples/transcribe.rs')
-rw-r--r--parakeet-rs/examples/transcribe.rs106
1 files changed, 106 insertions, 0 deletions
diff --git a/parakeet-rs/examples/transcribe.rs b/parakeet-rs/examples/transcribe.rs
new file mode 100644
index 0000000..685e8de
--- /dev/null
+++ b/parakeet-rs/examples/transcribe.rs
@@ -0,0 +1,106 @@
+/*
+transcribes entire audio, no diarization
+wget https://github.com/thewh1teagle/pyannote-rs/releases/download/v0.1.0/6_speakers.wav
+
+CTC (English-only):
+cargo run --example transcribe 6_speakers.wav
+
+TDT (Multilingual):
+cargo run --example transcribe 6_speakers.wav tdt
+
+NOTE: For manual audio loading without using transcribe_file(), see examples/raw.rs
+- Shows transcribe_samples(audio, sample_rate, channels, timestamps) usage
+
+WARNING: This may fail on very long audio files (>8 min).
+For longer audio, use the pyannote example which processes segments, or split your audio into chunks.
+
+Note: The coreml feature flag is only for reproducing a known ONNX Runtime bug.
+Just ignore it :). See: https://github.com/microsoft/onnxruntime/issues/26355
+*/
+use parakeet_rs::{Parakeet, TimestampMode};
+use std::env;
+use std::time::Instant;
+
+#[cfg(feature = "coreml")]
+use parakeet_rs::{ExecutionConfig, ExecutionProvider};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+ let start_time = Instant::now();
+ let args: Vec<String> = env::args().collect();
+ let audio_path = if args.len() > 1 {
+ &args[1]
+ } else {
+ "6_speakers.wav"
+ };
+
+ let use_tdt = args.len() > 2 && args[2] == "tdt";
+
+ // TDT model (multilingual, 25 languages)
+ if use_tdt {
+ #[cfg(feature = "coreml")]
+ {
+ let config = ExecutionConfig::new().with_execution_provider(ExecutionProvider::CoreML);
+ let mut parakeet = parakeet_rs::ParakeetTDT::from_pretrained("./tdt", Some(config))?;
+ let result = parakeet.transcribe_file(audio_path, Some(TimestampMode::Sentences))?;
+ println!("{}", result.text);
+
+ println!("\nSentencess:");
+ for segment in result.tokens.iter() {
+ println!("[{:.2}s - {:.2}s]: {}", segment.start, segment.end, segment.text);
+ }
+
+ let elapsed = start_time.elapsed();
+ println!("\n✓ Transcription completed in {:.2}s", elapsed.as_secs_f32());
+ return Ok(());
+ }
+
+ #[cfg(not(feature = "coreml"))]
+ {
+ let mut parakeet = parakeet_rs::ParakeetTDT::from_pretrained("./tdt", None)?;
+ let result = parakeet.transcribe_file(audio_path, Some(TimestampMode::Sentences))?;
+ println!("{}", result.text);
+
+ println!("\nSentencess:");
+ for segment in result.tokens.iter() {
+ println!("[{:.2}s - {:.2}s]: {}", segment.start, segment.end, segment.text);
+ }
+
+ let elapsed = start_time.elapsed();
+ println!("\n✓ Transcription completed in {:.2}s", elapsed.as_secs_f32());
+ return Ok(());
+ }
+ }
+
+ // CTC model (English-only)
+ #[cfg(feature = "coreml")]
+ let mut parakeet = {
+ let config = ExecutionConfig::new().with_execution_provider(ExecutionProvider::CoreML);
+ Parakeet::from_pretrained(".", Some(config))?
+ };
+
+ // Default: CPU execution provider (works correctly)
+ // Auto-detects model with priority: model.onnx > model_fp16.onnx > model_int8.onnx > model_q4.onnx
+ // Or specify exact model: Parakeet::from_pretrained("model_q4.onnx", None)?
+ #[cfg(not(feature = "coreml"))]
+ let mut parakeet = Parakeet::from_pretrained(".", None)?;
+
+ // CTC model doesn't predict punctuation (lowercase alphabet only)
+ // This means no sentence boundaries - use Words mode instead of Sentences
+ let result = parakeet.transcribe_file(audio_path, Some(TimestampMode::Words))?;
+
+ // Print transcription
+ println!("{}", result.text);
+
+ // Access word-level timestamps (showing first 10 for brevity)
+ // Note: CTC generates word-level timestamps but cannot segment into sentences
+ // due to lack of punctuation prediction - this is a model limitation
+ println!("\nWords (first 10):");
+ for word in result.tokens.iter().take(10) {
+ println!("[{:.2}s - {:.2}s]: {}", word.start, word.end, word.text);
+ }
+
+ let elapsed = start_time.elapsed();
+ println!("\n✓ Transcription completed in {:.2}s", elapsed.as_secs_f32());
+
+ Ok(())
+}