diff options
| -rw-r--r-- | Dockerfile | 2 | ||||
| -rw-r--r-- | makima/sh/download-models.sh | 56 | ||||
| -rw-r--r-- | makima/src/bin/server.rs | 2 |
3 files changed, 47 insertions, 13 deletions
@@ -39,7 +39,7 @@ ENV PORT=8080 ENV RUST_LOG=makima=info,tower_http=info ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3 ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx -ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.onnx +ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx EXPOSE 8080 diff --git a/makima/sh/download-models.sh b/makima/sh/download-models.sh index 7aecefe..0381e15 100644 --- a/makima/sh/download-models.sh +++ b/makima/sh/download-models.sh @@ -28,7 +28,6 @@ download_from_url() { download_from_hf() { local dest=$1 local repo=$2 - local include=${3:-} if [ -d "$dest" ] && [ "$(ls -A $dest 2>/dev/null)" ]; then echo "Model $dest already exists, skipping..." @@ -37,21 +36,46 @@ download_from_hf() { echo "Downloading from Hugging Face ($repo)..." mkdir -p "$dest" + hf download "$repo" --local-dir "$dest" + echo "Downloaded to $dest successfully" +} - if [ -n "$include" ]; then - hf download "$repo" --include "$include" --local-dir "$dest" - else - hf download "$repo" --local-dir "$dest" +download_from_hf_subdir() { + local dest=$1 + local repo=$2 + local subdir=$3 + + if [ -d "$dest" ] && [ "$(ls -A $dest 2>/dev/null)" ]; then + echo "Model $dest already exists, skipping..." + return 0 fi + echo "Downloading $subdir from Hugging Face ($repo)..." + local tmpdir=$(mktemp -d) + hf download "$repo" --include "$subdir/*" --local-dir "$tmpdir" + + # Move subdirectory contents to destination + mkdir -p "$dest" + mv "$tmpdir/$subdir"/* "$dest"/ + rm -rf "$tmpdir" echo "Downloaded to $dest successfully" } # Check if models exist +# TDT: encoder-model.onnx, encoder-model.onnx.data, decoder_joint-model.onnx, vocab.txt +# EOU: encoder.onnx, decoder_joint.onnx, tokenizer.json +# Diarization: diar_streaming_sortformer_4spk-v2.1.onnx check_models_exist() { - [ -d "$MODELS_DIR/$PARAKEET_DIR" ] && \ - [ -d "$MODELS_DIR/$EOU_DIR" ] && \ - [ -f "$MODELS_DIR/$DIARIZATION_DIR/diar_streaming_sortformer_4spk-v2.onnx" ] + # TDT model files + [ -f "$MODELS_DIR/$PARAKEET_DIR/encoder-model.onnx" ] && \ + [ -f "$MODELS_DIR/$PARAKEET_DIR/decoder_joint-model.onnx" ] && \ + [ -f "$MODELS_DIR/$PARAKEET_DIR/vocab.txt" ] && \ + # EOU model files + [ -f "$MODELS_DIR/$EOU_DIR/encoder.onnx" ] && \ + [ -f "$MODELS_DIR/$EOU_DIR/decoder_joint.onnx" ] && \ + [ -f "$MODELS_DIR/$EOU_DIR/tokenizer.json" ] && \ + # Diarization model + [ -f "$MODELS_DIR/$DIARIZATION_DIR/diar_streaming_sortformer_4spk-v2.1.onnx" ] } if check_models_exist; then @@ -68,13 +92,23 @@ else echo "Downloading models from Hugging Face..." # Parakeet TDT from istupakov/parakeet-tdt-0.6b-v3-onnx + # Required: encoder-model.onnx, encoder-model.onnx.data, decoder_joint-model.onnx, vocab.txt download_from_hf "$MODELS_DIR/$PARAKEET_DIR" "istupakov/parakeet-tdt-0.6b-v3-onnx" + # Verify TDT files were downloaded + if [ ! -f "$MODELS_DIR/$PARAKEET_DIR/vocab.txt" ]; then + echo "ERROR: vocab.txt not found in parakeet TDT model" + echo "Contents of $MODELS_DIR/$PARAKEET_DIR:" + ls -la "$MODELS_DIR/$PARAKEET_DIR" + exit 1 + fi + # EOU model from altunenes/parakeet-rs (subdirectory) - download_from_hf "$MODELS_DIR/$EOU_DIR" "altunenes/parakeet-rs" "realtime_eou_120m-v1-onnx/*" + download_from_hf_subdir "$MODELS_DIR/$EOU_DIR" "altunenes/parakeet-rs" "realtime_eou_120m-v1-onnx" - # Diarization model from altunenes/parakeet-rs (subdirectory) - download_from_hf "$MODELS_DIR/$DIARIZATION_DIR" "altunenes/parakeet-rs" "diarization/*" + # Diarization model from altunenes/parakeet-rs (single file at root) + mkdir -p "$MODELS_DIR/$DIARIZATION_DIR" + hf download "altunenes/parakeet-rs" "diar_streaming_sortformer_4spk-v2.1.onnx" --local-dir "$MODELS_DIR/$DIARIZATION_DIR" fi echo "All models downloaded successfully" diff --git a/makima/src/bin/server.rs b/makima/src/bin/server.rs index 470e295..3ea3a67 100644 --- a/makima/src/bin/server.rs +++ b/makima/src/bin/server.rs @@ -11,7 +11,7 @@ use makima::server::{run_server, state::AppState}; /// Default model paths (can be overridden via environment variables). const DEFAULT_PARAKEET_MODEL_DIR: &str = "models/parakeet-tdt-0.6b-v3"; const DEFAULT_PARAKEET_EOU_DIR: &str = "models/realtime_eou_120m-v1-onnx"; -const DEFAULT_SORTFORMER_MODEL_PATH: &str = "models/diarization/diar_streaming_sortformer_4spk-v2.onnx"; +const DEFAULT_SORTFORMER_MODEL_PATH: &str = "models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx"; #[tokio::main] async fn main() -> anyhow::Result<()> { |
