summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Dockerfile6
-rw-r--r--makima/src/tts/qwen3/mod.rs32
-rw-r--r--voices/makima/reference.wavbin0 -> 1726528 bytes
3 files changed, 34 insertions, 4 deletions
diff --git a/Dockerfile b/Dockerfile
index 0e21e74..4b6b432 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -22,11 +22,17 @@ ENV MODELS_DIR=/app/models
ENV QWEN3_TTS_DIR=/app/models/qwen3-tts
RUN /app/download-models.sh echo "Models downloaded"
+# Download missing Qwen3-TTS tokenizer files (vocab.json + merges.txt)
+# The main download script tries to get tokenizer.json which doesn't exist in the HF repo
+RUN curl -sL "https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-Base/resolve/main/vocab.json" -o /app/models/qwen3-tts/vocab.json \
+ && curl -sL "https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-Base/resolve/main/merges.txt" -o /app/models/qwen3-tts/merges.txt
+
# Copy workspace files
COPY Cargo.toml Cargo.lock ./
COPY makima ./makima
COPY vendor ./vendor
COPY tools/stt-client ./tools/stt-client
+COPY voices ./voices
# Build release binary
RUN cargo build --release --package makima --bin makima
diff --git a/makima/src/tts/qwen3/mod.rs b/makima/src/tts/qwen3/mod.rs
index 9bac794..1520be6 100644
--- a/makima/src/tts/qwen3/mod.rs
+++ b/makima/src/tts/qwen3/mod.rs
@@ -99,10 +99,34 @@ impl Qwen3Tts {
Qwen3TtsConfig::default()
};
- // Load text tokenizer
- let tokenizer_path = model_dir.join("tokenizer.json");
- let tokenizer = Tokenizer::from_file(&tokenizer_path)
- .map_err(|e| TtsError::Tokenizer(format!("failed to load tokenizer: {e}")))?;
+ // Load text tokenizer (supports both tokenizer.json and vocab.json+merges.txt formats)
+ let tokenizer_json_path = model_dir.join("tokenizer.json");
+ let tokenizer = if tokenizer_json_path.exists() {
+ Tokenizer::from_file(&tokenizer_json_path)
+ .map_err(|e| TtsError::Tokenizer(format!("failed to load tokenizer.json: {e}")))?
+ } else {
+ // Fall back to vocab.json + merges.txt (HuggingFace Qwen3-TTS format)
+ let vocab_path = model_dir.join("vocab.json");
+ let merges_path = model_dir.join("merges.txt");
+
+ if !vocab_path.exists() || !merges_path.exists() {
+ return Err(TtsError::Tokenizer(format!(
+ "tokenizer files not found: need either tokenizer.json or vocab.json+merges.txt in {}",
+ model_dir.display()
+ )));
+ }
+
+ tokenizers::Tokenizer::from_file(&vocab_path)
+ .or_else(|_| {
+ // Build BPE tokenizer from vocab and merges
+ use tokenizers::models::bpe::BPE;
+ let bpe = BPE::from_file(&vocab_path.to_string_lossy(), &merges_path.to_string_lossy())
+ .build()
+ .map_err(|e| TtsError::Tokenizer(format!("failed to build BPE tokenizer: {e}")))?;
+ Ok(Tokenizer::new(bpe))
+ })
+ .map_err(|e: TtsError| TtsError::Tokenizer(format!("failed to load tokenizer: {e}")))?
+ };
// Load LM weights from safetensors
let lm_weights_path = model_dir.join("model.safetensors");
diff --git a/voices/makima/reference.wav b/voices/makima/reference.wav
new file mode 100644
index 0000000..c07586b
--- /dev/null
+++ b/voices/makima/reference.wav
Binary files differ