summaryrefslogtreecommitdiff
path: root/makima/sh
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2026-02-01 03:04:36 +0000
committersoryu <soryu@soryu.co>2026-02-01 03:04:36 +0000
commita2c147ddd59f55a07b5be0c8970169726b55c876 (patch)
treee41a80f2dfdd8fcaf6b6e91c899392f4e619ca3e /makima/sh
parent65eebd078af712d004a5a9e28863a16df30792a6 (diff)
downloadsoryu-a2c147ddd59f55a07b5be0c8970169726b55c876.tar.gz
soryu-a2c147ddd59f55a07b5be0c8970169726b55c876.zip
Use chatterbox TTS
Diffstat (limited to 'makima/sh')
-rwxr-xr-xmakima/sh/download-models.sh54
1 files changed, 19 insertions, 35 deletions
diff --git a/makima/sh/download-models.sh b/makima/sh/download-models.sh
index 4f188f3..b44e091 100755
--- a/makima/sh/download-models.sh
+++ b/makima/sh/download-models.sh
@@ -114,47 +114,31 @@ else
echo "All models downloaded successfully"
fi
-# Download Qwen3-TTS models (for TTS functionality)
-QWEN3_TTS_DIR="${QWEN3_TTS_DIR:-/app/models/qwen3-tts}"
-
-download_qwen3_tts() {
- if [ -d "$QWEN3_TTS_DIR" ] && \
- [ -f "$QWEN3_TTS_DIR/model.safetensors" ] && \
- [ -f "$QWEN3_TTS_DIR/speech_tokenizer.safetensors" ] && \
- [ -f "$QWEN3_TTS_DIR/vocab.json" ] && \
- [ -f "$QWEN3_TTS_DIR/merges.txt" ] && \
- [ -f "$QWEN3_TTS_DIR/config.json" ]; then
- echo "Qwen3-TTS models already exist, skipping..."
+# Download Chatterbox TTS models (for TTS functionality)
+CHATTERBOX_MODEL_DIR="${CHATTERBOX_MODEL_DIR:-/app/models/chatterbox-turbo}"
+
+download_chatterbox_tts() {
+ if [ -d "$CHATTERBOX_MODEL_DIR" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/speech_encoder.onnx" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/embed_tokens.onnx" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/language_model.onnx" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/conditional_decoder.onnx" ] && \
+ [ -f "$CHATTERBOX_MODEL_DIR/tokenizer.json" ]; then
+ echo "Chatterbox TTS models already exist, skipping..."
return 0
fi
- echo "Downloading Qwen3-TTS models..."
- mkdir -p "$QWEN3_TTS_DIR"
-
- # Download base TTS model files from Qwen/Qwen3-TTS-12Hz-0.6B-Base
- # Note: This repo uses vocab.json + merges.txt (not tokenizer.json)
- echo "Downloading Qwen3-TTS-12Hz-0.6B-Base..."
- hf download Qwen/Qwen3-TTS-12Hz-0.6B-Base \
- model.safetensors \
- config.json \
- vocab.json \
- merges.txt \
- tokenizer_config.json \
- --local-dir "$QWEN3_TTS_DIR"
-
- # Download speech tokenizer from Qwen/Qwen3-TTS-Tokenizer-12Hz
- echo "Downloading Qwen3-TTS-Tokenizer-12Hz..."
- local tmpdir=$(mktemp -d)
- hf download Qwen/Qwen3-TTS-Tokenizer-12Hz \
- model.safetensors \
- --local-dir "$tmpdir"
- mv "$tmpdir/model.safetensors" "$QWEN3_TTS_DIR/speech_tokenizer.safetensors"
- rm -rf "$tmpdir"
+ echo "Downloading Chatterbox TTS models..."
+ mkdir -p "$CHATTERBOX_MODEL_DIR"
+
+ # Download from ResembleAI/chatterbox-turbo-ONNX
+ echo "Downloading ResembleAI/chatterbox-turbo-ONNX..."
+ hf download ResembleAI/chatterbox-turbo-ONNX --local-dir "$CHATTERBOX_MODEL_DIR"
- echo "Qwen3-TTS models downloaded successfully"
+ echo "Chatterbox TTS models downloaded successfully"
}
-download_qwen3_tts
+download_chatterbox_tts
# Execute the main command
exec "$@"