summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Dockerfile2
-rw-r--r--makima/sh/download-models.sh56
-rw-r--r--makima/src/bin/server.rs2
3 files changed, 47 insertions, 13 deletions
diff --git a/Dockerfile b/Dockerfile
index 27b2153..79cc223 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -39,7 +39,7 @@ ENV PORT=8080
ENV RUST_LOG=makima=info,tower_http=info
ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3
ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx
-ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.onnx
+ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx
EXPOSE 8080
diff --git a/makima/sh/download-models.sh b/makima/sh/download-models.sh
index 7aecefe..0381e15 100644
--- a/makima/sh/download-models.sh
+++ b/makima/sh/download-models.sh
@@ -28,7 +28,6 @@ download_from_url() {
download_from_hf() {
local dest=$1
local repo=$2
- local include=${3:-}
if [ -d "$dest" ] && [ "$(ls -A $dest 2>/dev/null)" ]; then
echo "Model $dest already exists, skipping..."
@@ -37,21 +36,46 @@ download_from_hf() {
echo "Downloading from Hugging Face ($repo)..."
mkdir -p "$dest"
+ hf download "$repo" --local-dir "$dest"
+ echo "Downloaded to $dest successfully"
+}
- if [ -n "$include" ]; then
- hf download "$repo" --include "$include" --local-dir "$dest"
- else
- hf download "$repo" --local-dir "$dest"
+download_from_hf_subdir() {
+ local dest=$1
+ local repo=$2
+ local subdir=$3
+
+ if [ -d "$dest" ] && [ "$(ls -A $dest 2>/dev/null)" ]; then
+ echo "Model $dest already exists, skipping..."
+ return 0
fi
+ echo "Downloading $subdir from Hugging Face ($repo)..."
+ local tmpdir=$(mktemp -d)
+ hf download "$repo" --include "$subdir/*" --local-dir "$tmpdir"
+
+ # Move subdirectory contents to destination
+ mkdir -p "$dest"
+ mv "$tmpdir/$subdir"/* "$dest"/
+ rm -rf "$tmpdir"
echo "Downloaded to $dest successfully"
}
# Check if models exist
+# TDT: encoder-model.onnx, encoder-model.onnx.data, decoder_joint-model.onnx, vocab.txt
+# EOU: encoder.onnx, decoder_joint.onnx, tokenizer.json
+# Diarization: diar_streaming_sortformer_4spk-v2.1.onnx
check_models_exist() {
- [ -d "$MODELS_DIR/$PARAKEET_DIR" ] && \
- [ -d "$MODELS_DIR/$EOU_DIR" ] && \
- [ -f "$MODELS_DIR/$DIARIZATION_DIR/diar_streaming_sortformer_4spk-v2.onnx" ]
+ # TDT model files
+ [ -f "$MODELS_DIR/$PARAKEET_DIR/encoder-model.onnx" ] && \
+ [ -f "$MODELS_DIR/$PARAKEET_DIR/decoder_joint-model.onnx" ] && \
+ [ -f "$MODELS_DIR/$PARAKEET_DIR/vocab.txt" ] && \
+ # EOU model files
+ [ -f "$MODELS_DIR/$EOU_DIR/encoder.onnx" ] && \
+ [ -f "$MODELS_DIR/$EOU_DIR/decoder_joint.onnx" ] && \
+ [ -f "$MODELS_DIR/$EOU_DIR/tokenizer.json" ] && \
+ # Diarization model
+ [ -f "$MODELS_DIR/$DIARIZATION_DIR/diar_streaming_sortformer_4spk-v2.1.onnx" ]
}
if check_models_exist; then
@@ -68,13 +92,23 @@ else
echo "Downloading models from Hugging Face..."
# Parakeet TDT from istupakov/parakeet-tdt-0.6b-v3-onnx
+ # Required: encoder-model.onnx, encoder-model.onnx.data, decoder_joint-model.onnx, vocab.txt
download_from_hf "$MODELS_DIR/$PARAKEET_DIR" "istupakov/parakeet-tdt-0.6b-v3-onnx"
+ # Verify TDT files were downloaded
+ if [ ! -f "$MODELS_DIR/$PARAKEET_DIR/vocab.txt" ]; then
+ echo "ERROR: vocab.txt not found in parakeet TDT model"
+ echo "Contents of $MODELS_DIR/$PARAKEET_DIR:"
+ ls -la "$MODELS_DIR/$PARAKEET_DIR"
+ exit 1
+ fi
+
# EOU model from altunenes/parakeet-rs (subdirectory)
- download_from_hf "$MODELS_DIR/$EOU_DIR" "altunenes/parakeet-rs" "realtime_eou_120m-v1-onnx/*"
+ download_from_hf_subdir "$MODELS_DIR/$EOU_DIR" "altunenes/parakeet-rs" "realtime_eou_120m-v1-onnx"
- # Diarization model from altunenes/parakeet-rs (subdirectory)
- download_from_hf "$MODELS_DIR/$DIARIZATION_DIR" "altunenes/parakeet-rs" "diarization/*"
+ # Diarization model from altunenes/parakeet-rs (single file at root)
+ mkdir -p "$MODELS_DIR/$DIARIZATION_DIR"
+ hf download "altunenes/parakeet-rs" "diar_streaming_sortformer_4spk-v2.1.onnx" --local-dir "$MODELS_DIR/$DIARIZATION_DIR"
fi
echo "All models downloaded successfully"
diff --git a/makima/src/bin/server.rs b/makima/src/bin/server.rs
index 470e295..3ea3a67 100644
--- a/makima/src/bin/server.rs
+++ b/makima/src/bin/server.rs
@@ -11,7 +11,7 @@ use makima::server::{run_server, state::AppState};
/// Default model paths (can be overridden via environment variables).
const DEFAULT_PARAKEET_MODEL_DIR: &str = "models/parakeet-tdt-0.6b-v3";
const DEFAULT_PARAKEET_EOU_DIR: &str = "models/realtime_eou_120m-v1-onnx";
-const DEFAULT_SORTFORMER_MODEL_PATH: &str = "models/diarization/diar_streaming_sortformer_4spk-v2.onnx";
+const DEFAULT_SORTFORMER_MODEL_PATH: &str = "models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx";
#[tokio::main]
async fn main() -> anyhow::Result<()> {