summaryrefslogtreecommitdiff
path: root/makima/sh/download-models.sh
diff options
context:
space:
mode:
Diffstat (limited to 'makima/sh/download-models.sh')
-rwxr-xr-xmakima/sh/download-models.sh69
1 files changed, 34 insertions, 35 deletions
diff --git a/makima/sh/download-models.sh b/makima/sh/download-models.sh
index 4f188f3..14fc46e 100755
--- a/makima/sh/download-models.sh
+++ b/makima/sh/download-models.sh
@@ -114,47 +114,46 @@ else
echo "All models downloaded successfully"
fi
-# Download Qwen3-TTS models (for TTS functionality)
-QWEN3_TTS_DIR="${QWEN3_TTS_DIR:-/app/models/qwen3-tts}"
-
-download_qwen3_tts() {
- if [ -d "$QWEN3_TTS_DIR" ] && \
- [ -f "$QWEN3_TTS_DIR/model.safetensors" ] && \
- [ -f "$QWEN3_TTS_DIR/speech_tokenizer.safetensors" ] && \
- [ -f "$QWEN3_TTS_DIR/vocab.json" ] && \
- [ -f "$QWEN3_TTS_DIR/merges.txt" ] && \
- [ -f "$QWEN3_TTS_DIR/config.json" ]; then
- echo "Qwen3-TTS models already exist, skipping..."
+# Download Chatterbox TTS models (for TTS functionality)
+CHATTERBOX_MODEL_DIR="${CHATTERBOX_MODEL_DIR:-/app/models/chatterbox-turbo}"
+
+download_chatterbox_tts() {
+ if [ -d "$CHATTERBOX_MODEL_DIR" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/speech_encoder.onnx" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/language_model.onnx" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/conditional_decoder.onnx" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/tokenizer.json" ]; then
+ echo "Chatterbox TTS models already exist, skipping..."
return 0
fi
- echo "Downloading Qwen3-TTS models..."
- mkdir -p "$QWEN3_TTS_DIR"
-
- # Download base TTS model files from Qwen/Qwen3-TTS-12Hz-0.6B-Base
- # Note: This repo uses vocab.json + merges.txt (not tokenizer.json)
- echo "Downloading Qwen3-TTS-12Hz-0.6B-Base..."
- hf download Qwen/Qwen3-TTS-12Hz-0.6B-Base \
- model.safetensors \
- config.json \
- vocab.json \
- merges.txt \
- tokenizer_config.json \
- --local-dir "$QWEN3_TTS_DIR"
-
- # Download speech tokenizer from Qwen/Qwen3-TTS-Tokenizer-12Hz
- echo "Downloading Qwen3-TTS-Tokenizer-12Hz..."
- local tmpdir=$(mktemp -d)
- hf download Qwen/Qwen3-TTS-Tokenizer-12Hz \
- model.safetensors \
- --local-dir "$tmpdir"
- mv "$tmpdir/model.safetensors" "$QWEN3_TTS_DIR/speech_tokenizer.safetensors"
- rm -rf "$tmpdir"
+ echo "Downloading Chatterbox TTS models..."
+ mkdir -p "$CHATTERBOX_MODEL_DIR"
+
+ # Download ONNX models from ResembleAI/chatterbox-turbo-ONNX
+ echo "Downloading ResembleAI/chatterbox-turbo-ONNX..."
+ hf download ResembleAI/chatterbox-turbo-ONNX \
+ onnx/speech_encoder.onnx \
+ onnx/speech_encoder.onnx_data \
+ onnx/embed_tokens.onnx \
+ onnx/embed_tokens.onnx_data \
+ onnx/language_model.onnx \
+ onnx/language_model.onnx_data \
+ onnx/conditional_decoder.onnx \
+ onnx/conditional_decoder.onnx_data \
+ tokenizer.json \
+ --local-dir "$CHATTERBOX_MODEL_DIR"
+
+ # Move ONNX files from onnx/ subdirectory to root
+ if [ -d "$CHATTERBOX_MODEL_DIR/onnx" ]; then
+ mv "$CHATTERBOX_MODEL_DIR/onnx"/* "$CHATTERBOX_MODEL_DIR/"
+ rmdir "$CHATTERBOX_MODEL_DIR/onnx"
+ fi
- echo "Qwen3-TTS models downloaded successfully"
+ echo "Chatterbox TTS models downloaded successfully"
}
-download_qwen3_tts
+download_chatterbox_tts
# Execute the main command
exec "$@"