diff options
| -rw-r--r-- | Dockerfile | 6 | ||||
| -rw-r--r-- | makima/src/tts/qwen3/mod.rs | 32 | ||||
| -rw-r--r-- | voices/makima/reference.wav | bin | 0 -> 1726528 bytes |
3 files changed, 34 insertions, 4 deletions
@@ -22,11 +22,17 @@ ENV MODELS_DIR=/app/models ENV QWEN3_TTS_DIR=/app/models/qwen3-tts RUN /app/download-models.sh echo "Models downloaded" +# Download missing Qwen3-TTS tokenizer files (vocab.json + merges.txt) +# The main download script tries to get tokenizer.json which doesn't exist in the HF repo +RUN curl -sL "https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-Base/resolve/main/vocab.json" -o /app/models/qwen3-tts/vocab.json \ + && curl -sL "https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-Base/resolve/main/merges.txt" -o /app/models/qwen3-tts/merges.txt + # Copy workspace files COPY Cargo.toml Cargo.lock ./ COPY makima ./makima COPY vendor ./vendor COPY tools/stt-client ./tools/stt-client +COPY voices ./voices # Build release binary RUN cargo build --release --package makima --bin makima diff --git a/makima/src/tts/qwen3/mod.rs b/makima/src/tts/qwen3/mod.rs index 9bac794..1520be6 100644 --- a/makima/src/tts/qwen3/mod.rs +++ b/makima/src/tts/qwen3/mod.rs @@ -99,10 +99,34 @@ impl Qwen3Tts { Qwen3TtsConfig::default() }; - // Load text tokenizer - let tokenizer_path = model_dir.join("tokenizer.json"); - let tokenizer = Tokenizer::from_file(&tokenizer_path) - .map_err(|e| TtsError::Tokenizer(format!("failed to load tokenizer: {e}")))?; + // Load text tokenizer (supports both tokenizer.json and vocab.json+merges.txt formats) + let tokenizer_json_path = model_dir.join("tokenizer.json"); + let tokenizer = if tokenizer_json_path.exists() { + Tokenizer::from_file(&tokenizer_json_path) + .map_err(|e| TtsError::Tokenizer(format!("failed to load tokenizer.json: {e}")))? + } else { + // Fall back to vocab.json + merges.txt (HuggingFace Qwen3-TTS format) + let vocab_path = model_dir.join("vocab.json"); + let merges_path = model_dir.join("merges.txt"); + + if !vocab_path.exists() || !merges_path.exists() { + return Err(TtsError::Tokenizer(format!( + "tokenizer files not found: need either tokenizer.json or vocab.json+merges.txt in {}", + model_dir.display() + ))); + } + + tokenizers::Tokenizer::from_file(&vocab_path) + .or_else(|_| { + // Build BPE tokenizer from vocab and merges + use tokenizers::models::bpe::BPE; + let bpe = BPE::from_file(&vocab_path.to_string_lossy(), &merges_path.to_string_lossy()) + .build() + .map_err(|e| TtsError::Tokenizer(format!("failed to build BPE tokenizer: {e}")))?; + Ok(Tokenizer::new(bpe)) + }) + .map_err(|e: TtsError| TtsError::Tokenizer(format!("failed to load tokenizer: {e}")))? + }; // Load LM weights from safetensors let lm_weights_path = model_dir.join("model.safetensors"); diff --git a/voices/makima/reference.wav b/voices/makima/reference.wav Binary files differnew file mode 100644 index 0000000..c07586b --- /dev/null +++ b/voices/makima/reference.wav |
