diff options
| author | soryu <soryu@soryu.co> | 2025-12-21 04:09:18 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2025-12-23 14:47:18 +0000 |
| commit | dbec21683cad0c61736ef5d376c44a30451b46c8 (patch) | |
| tree | 7d7abcbba0999e92f98bedeec013563b0e820586 /makima/sh | |
| parent | 3787fd5f5358c0bf9cba6a359593fbfc2c334727 (diff) | |
| download | soryu-dbec21683cad0c61736ef5d376c44a30451b46c8.tar.gz soryu-dbec21683cad0c61736ef5d376c44a30451b46c8.zip | |
Use HF to download models
Diffstat (limited to 'makima/sh')
| -rw-r--r-- | makima/sh/download-models.sh | 60 |
1 files changed, 42 insertions, 18 deletions
diff --git a/makima/sh/download-models.sh b/makima/sh/download-models.sh index ddb7454..58ee0ac 100644 --- a/makima/sh/download-models.sh +++ b/makima/sh/download-models.sh @@ -4,12 +4,12 @@ set -e MODELS_DIR="${MODELS_DIR:-/app/models}" MODEL_BASE_URL="${MODEL_BASE_URL:-}" -# Model directories to check/download +# Model directories PARAKEET_DIR="parakeet-tdt-0.6b-v3" EOU_DIR="realtime_eou_120m-v1-onnx" DIARIZATION_DIR="diarization" -download_model() { +download_from_url() { local name=$1 local url=$2 local dest="$MODELS_DIR/$name" @@ -19,15 +19,34 @@ download_model() { return 0 fi - echo "Downloading $name..." + echo "Downloading $name from URL..." mkdir -p "$dest" - - # Download and extract tar.gz curl -L "$url" | tar -xz -C "$dest" --strip-components=1 - echo "Downloaded $name successfully" } +download_from_hf() { + local dest=$1 + local repo=$2 + local include=${3:-} + + if [ -d "$dest" ] && [ "$(ls -A $dest 2>/dev/null)" ]; then + echo "Model $dest already exists, skipping..." + return 0 + fi + + echo "Downloading from Hugging Face ($repo)..." + mkdir -p "$dest" + + if [ -n "$include" ]; then + huggingface-cli download "$repo" --include "$include" --local-dir "$dest" + else + huggingface-cli download "$repo" --local-dir "$dest" + fi + + echo "Downloaded to $dest successfully" +} + # Check if models exist check_models_exist() { [ -d "$MODELS_DIR/$PARAKEET_DIR" ] && \ @@ -38,20 +57,25 @@ check_models_exist() { if check_models_exist; then echo "All models present" else - if [ -z "$MODEL_BASE_URL" ]; then - echo "ERROR: Models not found and MODEL_BASE_URL not set" - echo "Please set MODEL_BASE_URL to the base URL containing model archives:" - echo " - \${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz" - echo " - \${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz" - echo " - \${MODEL_BASE_URL}/diarization.tar.gz" - exit 1 - fi - mkdir -p "$MODELS_DIR" - download_model "$PARAKEET_DIR" "${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz" - download_model "$EOU_DIR" "${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz" - download_model "$DIARIZATION_DIR" "${MODEL_BASE_URL}/diarization.tar.gz" + if [ -n "$MODEL_BASE_URL" ]; then + echo "Downloading models from custom URL..." + download_from_url "$PARAKEET_DIR" "${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz" + download_from_url "$EOU_DIR" "${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz" + download_from_url "$DIARIZATION_DIR" "${MODEL_BASE_URL}/diarization.tar.gz" + else + echo "Downloading models from Hugging Face..." + + # Parakeet TDT from istupakov/parakeet-tdt-0.6b-v3-onnx + download_from_hf "$MODELS_DIR/$PARAKEET_DIR" "istupakov/parakeet-tdt-0.6b-v3-onnx" + + # EOU model from altunenes/parakeet-rs (subdirectory) + download_from_hf "$MODELS_DIR/$EOU_DIR" "altunenes/parakeet-rs" "realtime_eou_120m-v1-onnx/*" + + # Diarization model from altunenes/parakeet-rs (subdirectory) + download_from_hf "$MODELS_DIR/$DIARIZATION_DIR" "altunenes/parakeet-rs" "diarization/*" + fi echo "All models downloaded successfully" fi |
