summaryrefslogtreecommitdiff
path: root/makima/sh
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2026-01-28 12:51:46 +0000
committersoryu <soryu@soryu.co>2026-01-28 12:51:46 +0000
commit6026e169e8cbc892ead3643608e20b03605ecd93 (patch)
treef8850b565ac37187ac6d79a307ea92f33c43db43 /makima/sh
parentd0436686f047f1d82c30da26cf83f9eca6727292 (diff)
downloadsoryu-6026e169e8cbc892ead3643608e20b03605ecd93.tar.gz
soryu-6026e169e8cbc892ead3643608e20b03605ecd93.zip
Add Qwen3-TTS model download to build process
Fix TTS engine failure due to missing tokenizer by downloading Qwen3-TTS models during Docker build: - Download model.safetensors, config.json, tokenizer.json, and tokenizer_config.json from Qwen/Qwen3-TTS-12Hz-0.6B-Base - Download speech tokenizer from Qwen/Qwen3-TTS-Tokenizer-12Hz - Add QWEN3_TTS_DIR environment variable to Dockerfile - Script supports both env var override and default path Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'makima/sh')
-rwxr-xr-xmakima/sh/download-models.sh35
1 files changed, 35 insertions, 0 deletions
diff --git a/makima/sh/download-models.sh b/makima/sh/download-models.sh
index 0381e15..1aefad8 100755
--- a/makima/sh/download-models.sh
+++ b/makima/sh/download-models.sh
@@ -114,5 +114,40 @@ else
echo "All models downloaded successfully"
fi
+# Download Qwen3-TTS models (for TTS functionality)
+QWEN3_TTS_DIR="${QWEN3_TTS_DIR:-/app/models/qwen3-tts}"
+
+download_qwen3_tts() {
+ if [ -d "$QWEN3_TTS_DIR" ] && [ -f "$QWEN3_TTS_DIR/model.safetensors" ] && [ -f "$QWEN3_TTS_DIR/speech_tokenizer.safetensors" ]; then
+ echo "Qwen3-TTS models already exist, skipping..."
+ return 0
+ fi
+
+ echo "Downloading Qwen3-TTS models..."
+ mkdir -p "$QWEN3_TTS_DIR"
+
+ # Download base TTS model files from Qwen/Qwen3-TTS-12Hz-0.6B-Base
+ echo "Downloading Qwen3-TTS-12Hz-0.6B-Base..."
+ huggingface-cli download Qwen/Qwen3-TTS-12Hz-0.6B-Base \
+ model.safetensors \
+ config.json \
+ tokenizer.json \
+ tokenizer_config.json \
+ --local-dir "$QWEN3_TTS_DIR"
+
+ # Download speech tokenizer from Qwen/Qwen3-TTS-Tokenizer-12Hz
+ echo "Downloading Qwen3-TTS-Tokenizer-12Hz..."
+ local tmpdir=$(mktemp -d)
+ huggingface-cli download Qwen/Qwen3-TTS-Tokenizer-12Hz \
+ model.safetensors \
+ --local-dir "$tmpdir"
+ mv "$tmpdir/model.safetensors" "$QWEN3_TTS_DIR/speech_tokenizer.safetensors"
+ rm -rf "$tmpdir"
+
+ echo "Qwen3-TTS models downloaded successfully"
+}
+
+download_qwen3_tts
+
# Execute the main command
exec "$@"