diff options
| -rw-r--r-- | Dockerfile | 34 | ||||
| -rw-r--r-- | makima/sh/download-models.sh | 60 |
2 files changed, 60 insertions, 34 deletions
@@ -1,47 +1,49 @@ -# Build stage -FROM rust:1.91-bookworm AS builder +FROM rust:1.91-bookworm WORKDIR /app -# Install build dependencies +# Install dependencies RUN apt-get update && apt-get install -y \ pkg-config \ libssl-dev \ + curl \ + python3 \ + python3-pip \ + && pip3 install --break-system-packages huggingface_hub[cli] \ && rm -rf /var/lib/apt/lists/* +# Copy and run model download script +COPY makima/sh/download-models.sh /app/download-models.sh +RUN chmod +x /app/download-models.sh + +ARG MODEL_BASE_URL +ENV MODEL_BASE_URL=${MODEL_BASE_URL} +ENV MODELS_DIR=/app/models +RUN /app/download-models.sh echo "Models downloaded" + # Copy workspace files COPY Cargo.toml Cargo.lock ./ COPY makima ./makima COPY vendor ./vendor COPY tools/stt-client ./tools/stt-client - # Build release binary RUN cargo build --release --package makima --bin makima-server - RUN cp /app/target/release/makima-server /app/makima-server -# Copy model download script -COPY makima/sh/download-models.sh /app/download-models.sh -RUN chmod +x /app/download-models.sh - -# Create models directory -RUN mkdir -p /app/models +# Clean up build artifacts to reduce image size +RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry # Set default environment variables ENV PORT=8080 ENV RUST_LOG=makima=info,tower_http=info -ENV MODELS_DIR=/app/models ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3 ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.onnx EXPOSE 8080 -# Health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ +HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1 -# Use download script as entrypoint to ensure models exist -ENTRYPOINT ["/app/download-models.sh"] CMD ["/app/makima-server"] diff --git a/makima/sh/download-models.sh b/makima/sh/download-models.sh index ddb7454..58ee0ac 100644 --- a/makima/sh/download-models.sh +++ b/makima/sh/download-models.sh @@ -4,12 +4,12 @@ set -e MODELS_DIR="${MODELS_DIR:-/app/models}" MODEL_BASE_URL="${MODEL_BASE_URL:-}" -# Model directories to check/download +# Model directories PARAKEET_DIR="parakeet-tdt-0.6b-v3" EOU_DIR="realtime_eou_120m-v1-onnx" DIARIZATION_DIR="diarization" -download_model() { +download_from_url() { local name=$1 local url=$2 local dest="$MODELS_DIR/$name" @@ -19,15 +19,34 @@ download_model() { return 0 fi - echo "Downloading $name..." + echo "Downloading $name from URL..." mkdir -p "$dest" - - # Download and extract tar.gz curl -L "$url" | tar -xz -C "$dest" --strip-components=1 - echo "Downloaded $name successfully" } +download_from_hf() { + local dest=$1 + local repo=$2 + local include=${3:-} + + if [ -d "$dest" ] && [ "$(ls -A $dest 2>/dev/null)" ]; then + echo "Model $dest already exists, skipping..." + return 0 + fi + + echo "Downloading from Hugging Face ($repo)..." + mkdir -p "$dest" + + if [ -n "$include" ]; then + huggingface-cli download "$repo" --include "$include" --local-dir "$dest" + else + huggingface-cli download "$repo" --local-dir "$dest" + fi + + echo "Downloaded to $dest successfully" +} + # Check if models exist check_models_exist() { [ -d "$MODELS_DIR/$PARAKEET_DIR" ] && \ @@ -38,20 +57,25 @@ check_models_exist() { if check_models_exist; then echo "All models present" else - if [ -z "$MODEL_BASE_URL" ]; then - echo "ERROR: Models not found and MODEL_BASE_URL not set" - echo "Please set MODEL_BASE_URL to the base URL containing model archives:" - echo " - \${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz" - echo " - \${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz" - echo " - \${MODEL_BASE_URL}/diarization.tar.gz" - exit 1 - fi - mkdir -p "$MODELS_DIR" - download_model "$PARAKEET_DIR" "${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz" - download_model "$EOU_DIR" "${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz" - download_model "$DIARIZATION_DIR" "${MODEL_BASE_URL}/diarization.tar.gz" + if [ -n "$MODEL_BASE_URL" ]; then + echo "Downloading models from custom URL..." + download_from_url "$PARAKEET_DIR" "${MODEL_BASE_URL}/parakeet-tdt-0.6b-v3.tar.gz" + download_from_url "$EOU_DIR" "${MODEL_BASE_URL}/realtime_eou_120m-v1-onnx.tar.gz" + download_from_url "$DIARIZATION_DIR" "${MODEL_BASE_URL}/diarization.tar.gz" + else + echo "Downloading models from Hugging Face..." + + # Parakeet TDT from istupakov/parakeet-tdt-0.6b-v3-onnx + download_from_hf "$MODELS_DIR/$PARAKEET_DIR" "istupakov/parakeet-tdt-0.6b-v3-onnx" + + # EOU model from altunenes/parakeet-rs (subdirectory) + download_from_hf "$MODELS_DIR/$EOU_DIR" "altunenes/parakeet-rs" "realtime_eou_120m-v1-onnx/*" + + # Diarization model from altunenes/parakeet-rs (subdirectory) + download_from_hf "$MODELS_DIR/$DIARIZATION_DIR" "altunenes/parakeet-rs" "diarization/*" + fi echo "All models downloaded successfully" fi |
