summaryrefslogtreecommitdiff
path: root/makima/sh
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2025-12-21 04:09:18 +0000
committersoryu <soryu@soryu.co>2025-12-23 14:47:18 +0000
commitdbec21683cad0c61736ef5d376c44a30451b46c8 (patch)
tree7d7abcbba0999e92f98bedeec013563b0e820586 /makima/sh
parent3787fd5f5358c0bf9cba6a359593fbfc2c334727 (diff)
downloadsoryu-dbec21683cad0c61736ef5d376c44a30451b46c8.tar.gz
soryu-dbec21683cad0c61736ef5d376c44a30451b46c8.zip
Use HF to download models
Diffstat (limited to 'makima/sh')
-rw-r--r--makima/sh/download-models.sh60
1 files changed, 42 insertions, 18 deletions
diff --git a/makima/sh/download-models.sh b/makima/sh/download-models.sh
index ddb7454..58ee0ac 100644
--- a/makima/sh/download-models.sh
+++ b/makima/sh/download-models.sh
@@ -4,12 +4,12 @@ set -e
MODELS_DIR="${MODELS_DIR:-/app/models}"
MODEL_BASE_URL="${MODEL_BASE_URL:-}"
-# Model directories to check/download
+# Model directories
PARAKEET_DIR="parakeet-tdt-0.6b-v3"
EOU_DIR="realtime_eou_120m-v1-onnx"
DIARIZATION_DIR="diarization"
-download_model() {
+download_from_url() {
local name=$1
local url=$2
local dest="$MODELS_DIR/$name"
@@ -19,15 +19,34 @@ download_model() {
return 0
fi
- echo "Downloading $name..."
+ echo "Downloading $name from URL..."
mkdir -p "$dest"
-
- # Download and extract tar.gz
curl -L "$url" | tar -xz -C "$dest" --strip-components=1
-
echo "Downloaded $name successfully"
}
+download_from_hf() {
+ local dest=$1
+ local repo=$2
+ local include=${3:-}
+
+ if [ -d "$dest" ] && [ "$(ls -A $dest 2>/dev/null)" ]; then
+ echo "Model $dest already exists, skipping..."
+ return 0
+ fi
+
+ echo "Downloading from Hugging Face ($repo)..."
+ mkdir -p "$dest"
+
+ if [ -n "$include" ]; then
+ huggingface-cli download "$repo" --include "$include" --local-dir "$dest"
+ else
+ huggingface-cli download "$repo" --local-dir "$dest"
+ fi
+
+ echo "Downloaded to $dest successfully"
+}
+
# Check if models exist
check_models_exist() {
[ -d "$MODELS_DIR/$PARAKEET_DIR" ] && \
@@ -38,20 +57,25 @@ check_models_exist() {
if check_models_exist; then
echo "All models present"
else
- if [ -z "$MODEL_BASE_URL" ]; then
- echo "ERROR: Models not found and MODEL_BASE_URL not set"
- echo "Please set MODEL_BASE_URL to the base URL containing model archives:"
- echo " - \${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz"
- echo " - \${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz"
- echo " - \${MODEL_BASE_URL}/diarization.tar.gz"
- exit 1
- fi
-
mkdir -p "$MODELS_DIR"
- download_model "$PARAKEET_DIR" "${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz"
- download_model "$EOU_DIR" "${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz"
- download_model "$DIARIZATION_DIR" "${MODEL_BASE_URL}/diarization.tar.gz"
+ if [ -n "$MODEL_BASE_URL" ]; then
+ echo "Downloading models from custom URL..."
+ download_from_url "$PARAKEET_DIR" "${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz"
+ download_from_url "$EOU_DIR" "${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz"
+ download_from_url "$DIARIZATION_DIR" "${MODEL_BASE_URL}/diarization.tar.gz"
+ else
+ echo "Downloading models from Hugging Face..."
+
+ # Parakeet TDT from istupakov/parakeet-tdt-0.6b-v3-onnx
+ download_from_hf "$MODELS_DIR/$PARAKEET_DIR" "istupakov/parakeet-tdt-0.6b-v3-onnx"
+
+ # EOU model from altunenes/parakeet-rs (subdirectory)
+ download_from_hf "$MODELS_DIR/$EOU_DIR" "altunenes/parakeet-rs" "realtime_eou_120m-v1-onnx/*"
+
+ # Diarization model from altunenes/parakeet-rs (subdirectory)
+ download_from_hf "$MODELS_DIR/$DIARIZATION_DIR" "altunenes/parakeet-rs" "diarization/*"
+ fi
echo "All models downloaded successfully"
fi