summaryrefslogtreecommitdiff
path: root/parakeet-rs/examples/transcribe.rs
diff options
context:
space:
mode:
Diffstat (limited to 'parakeet-rs/examples/transcribe.rs')
-rw-r--r--parakeet-rs/examples/transcribe.rs106
1 files changed, 0 insertions, 106 deletions
diff --git a/parakeet-rs/examples/transcribe.rs b/parakeet-rs/examples/transcribe.rs
deleted file mode 100644
index 685e8de..0000000
--- a/parakeet-rs/examples/transcribe.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
-transcribes entire audio, no diarization
-wget https://github.com/thewh1teagle/pyannote-rs/releases/download/v0.1.0/6_speakers.wav
-
-CTC (English-only):
-cargo run --example transcribe 6_speakers.wav
-
-TDT (Multilingual):
-cargo run --example transcribe 6_speakers.wav tdt
-
-NOTE: For manual audio loading without using transcribe_file(), see examples/raw.rs
-- Shows transcribe_samples(audio, sample_rate, channels, timestamps) usage
-
-WARNING: This may fail on very long audio files (>8 min).
-For longer audio, use the pyannote example which processes segments, or split your audio into chunks.
-
-Note: The coreml feature flag is only for reproducing a known ONNX Runtime bug.
-Just ignore it :). See: https://github.com/microsoft/onnxruntime/issues/26355
-*/
-use parakeet_rs::{Parakeet, TimestampMode};
-use std::env;
-use std::time::Instant;
-
-#[cfg(feature = "coreml")]
-use parakeet_rs::{ExecutionConfig, ExecutionProvider};
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
- let start_time = Instant::now();
- let args: Vec<String> = env::args().collect();
- let audio_path = if args.len() > 1 {
- &args[1]
- } else {
- "6_speakers.wav"
- };
-
- let use_tdt = args.len() > 2 && args[2] == "tdt";
-
- // TDT model (multilingual, 25 languages)
- if use_tdt {
- #[cfg(feature = "coreml")]
- {
- let config = ExecutionConfig::new().with_execution_provider(ExecutionProvider::CoreML);
- let mut parakeet = parakeet_rs::ParakeetTDT::from_pretrained("./tdt", Some(config))?;
- let result = parakeet.transcribe_file(audio_path, Some(TimestampMode::Sentences))?;
- println!("{}", result.text);
-
- println!("\nSentencess:");
- for segment in result.tokens.iter() {
- println!("[{:.2}s - {:.2}s]: {}", segment.start, segment.end, segment.text);
- }
-
- let elapsed = start_time.elapsed();
- println!("\n✓ Transcription completed in {:.2}s", elapsed.as_secs_f32());
- return Ok(());
- }
-
- #[cfg(not(feature = "coreml"))]
- {
- let mut parakeet = parakeet_rs::ParakeetTDT::from_pretrained("./tdt", None)?;
- let result = parakeet.transcribe_file(audio_path, Some(TimestampMode::Sentences))?;
- println!("{}", result.text);
-
- println!("\nSentencess:");
- for segment in result.tokens.iter() {
- println!("[{:.2}s - {:.2}s]: {}", segment.start, segment.end, segment.text);
- }
-
- let elapsed = start_time.elapsed();
- println!("\n✓ Transcription completed in {:.2}s", elapsed.as_secs_f32());
- return Ok(());
- }
- }
-
- // CTC model (English-only)
- #[cfg(feature = "coreml")]
- let mut parakeet = {
- let config = ExecutionConfig::new().with_execution_provider(ExecutionProvider::CoreML);
- Parakeet::from_pretrained(".", Some(config))?
- };
-
- // Default: CPU execution provider (works correctly)
- // Auto-detects model with priority: model.onnx > model_fp16.onnx > model_int8.onnx > model_q4.onnx
- // Or specify exact model: Parakeet::from_pretrained("model_q4.onnx", None)?
- #[cfg(not(feature = "coreml"))]
- let mut parakeet = Parakeet::from_pretrained(".", None)?;
-
- // CTC model doesn't predict punctuation (lowercase alphabet only)
- // This means no sentence boundaries - use Words mode instead of Sentences
- let result = parakeet.transcribe_file(audio_path, Some(TimestampMode::Words))?;
-
- // Print transcription
- println!("{}", result.text);
-
- // Access word-level timestamps (showing first 10 for brevity)
- // Note: CTC generates word-level timestamps but cannot segment into sentences
- // due to lack of punctuation prediction - this is a model limitation
- println!("\nWords (first 10):");
- for word in result.tokens.iter().take(10) {
- println!("[{:.2}s - {:.2}s]: {}", word.start, word.end, word.text);
- }
-
- let elapsed = start_time.elapsed();
- println!("\n✓ Transcription completed in {:.2}s", elapsed.as_secs_f32());
-
- Ok(())
-}