summaryrefslogtreecommitdiff
path: root/parakeet-rs/src/lib.rs
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2025-12-21 00:40:04 +0000
committersoryu <soryu@soryu.co>2025-12-23 14:47:18 +0000
commit55cacf6e1a087c0fa6950a1ddeb09060f787e541 (patch)
tree0b8e754eb16c829fc0ee7c8f4ba66fe75b4f3ebf /parakeet-rs/src/lib.rs
parent84fee5ce2ae30fb2381c99b9b223b8235b962869 (diff)
downloadsoryu-55cacf6e1a087c0fa6950a1ddeb09060f787e541.tar.gz
soryu-55cacf6e1a087c0fa6950a1ddeb09060f787e541.zip
Add EOU detection and streaming diarization
Diffstat (limited to 'parakeet-rs/src/lib.rs')
-rw-r--r--parakeet-rs/src/lib.rs74
1 files changed, 74 insertions, 0 deletions
diff --git a/parakeet-rs/src/lib.rs b/parakeet-rs/src/lib.rs
new file mode 100644
index 0000000..0aaefd1
--- /dev/null
+++ b/parakeet-rs/src/lib.rs
@@ -0,0 +1,74 @@
+//! # parakeet-rs
+//!
+//! Rust bindings for NVIDIA's Parakeet speech recognition model using ONNX Runtime.
+//!
+//! Parakeet is a state-of-the-art automatic speech recognition (ASR) model developed by NVIDIA,
+//! based on the FastConformer-TDT architecture with 600 million parameters.
+//!
+//! ## Features
+//!
+//! - Easy-to-use API for speech-to-text transcription
+//! - Support for ONNX format models
+//! - 16kHz mono audio input
+//! - Punctuation and capitalization included in output
+//! - Fast inference using ONNX Runtime
+//!
+//! ## Quick Start
+//!
+//! ```ignore
+//! use parakeet_rs::Parakeet;
+//!
+//! // Load the model
+//! let parakeet = Parakeet::from_pretrained(".")?;
+//!
+//! // Transcribe audio file
+//! let text = parakeet.transcribe_file("audio.wav")?;
+//! println!("Transcription: {}", text);
+//! ```
+//!
+//! ## Model Requirements
+//!
+//! Your model directory should contain:
+//! - `model.onnx` - The ONNX model file
+//! - `model.onnx_data` - External model weights
+//! - `config.json` - Model configuration
+//! - `preprocessor_config.json` - Audio preprocessing configuration
+//! - `tokenizer.json` - Tokenizer vocabulary
+//! - `tokenizer_config.json` - Tokenizer configuration
+//!
+//! ## Audio Requirements
+//!
+//! - Format: WAV
+//! - Sample Rate: 16kHz
+//! - Channels: Mono (stereo will be converted automatically)
+//! - Bit Depth: 16-bit PCM or 32-bit float
+
+mod audio;
+mod config;
+mod decoder;
+mod decoder_tdt;
+mod error;
+mod execution;
+mod model;
+mod model_tdt;
+mod parakeet;
+mod parakeet_tdt;
+mod timestamps;
+mod vocab;
+mod model_eou;
+mod parakeet_eou;
+#[cfg(feature = "sortformer")]
+pub mod sortformer;
+
+pub use error::{Error, Result};
+pub use execution::{ExecutionProvider, ModelConfig as ExecutionConfig};
+pub use parakeet::Parakeet;
+pub use parakeet_tdt::ParakeetTDT;
+pub use timestamps::TimestampMode;
+
+pub use config::{ModelConfig as ModelConfigJson, PreprocessorConfig};
+
+pub use decoder::{ParakeetDecoder, TimedToken, TranscriptionResult};
+pub use model::ParakeetModel;
+pub use model_eou::ParakeetEOUModel;
+pub use parakeet_eou::ParakeetEOU; \ No newline at end of file