From 70a83104292c4e1fe5f43dd5f50e5214928c8dd6 Mon Sep 17 00:00:00 2001 From: soryu Date: Sat, 2 May 2026 15:26:39 +0100 Subject: build(server): split Dockerfiles, make ML model paths optional (#120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Existing Dockerfile (with LLM/STT/TTS model download) is now `Dockerfile.full`. The new top-level `Dockerfile` builds a slim image without python, without huggingface_hub, without the model download step. The slim image is the new default for users who only want the orchestration surface — the directive folder UI, the mesh/task system, the API. ## Slim Dockerfile * No python / huggingface_hub / model downloads. * Same runtime tooling as `k8s/daemon/Dockerfile` (git, gh CLI, ssh, jq, curl, ca-certs, libssl3). * Embeds the daemon binary at /app/daemon-binaries/makima-linux-x86_64 for the in-server download endpoint. * PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are intentionally NOT set — Listen and Speak return "ML models not configured" if a client tries to use them. ## ML model paths now optional `ServerArgs.parakeet_model_dir`, `parakeet_eou_dir`, `sortformer_model_path`, `chatterbox_model_dir` are now `Option` (no defaults). The bin constructor inspects them: if all four are present, configures `AppState::new`; if all four are absent, uses the new `AppState::new_slim()` which leaves `model_config = None`. The lazy load path in `get_ml_models` already returned a clean error for None. Speak (TTS) was already optional via `model_config.as_ref()` — still works. Mixed configurations log a warning and degrade to slim mode. ## Ops note The old `Dockerfile.full` retains the original behaviour for anyone who needs STT/diarization/TTS in production. CI still builds the daemon image from `k8s/daemon/Dockerfile` (untouched). Co-authored-by: Claude Opus 4.7 (1M context) --- Dockerfile | 83 ++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 32 deletions(-) (limited to 'Dockerfile') diff --git a/Dockerfile b/Dockerfile index a8a9245..1a751d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,57 +1,76 @@ -FROM rust:1.91-bookworm +# ============================================================================== +# Makima Server — slim image (no LLM/STT/TTS models) +# ============================================================================== +# This builds the smallest viable Makima server image: the Rust binary plus +# the runtime tools the orchestrator needs. The Listen and Speak websocket +# endpoints will respond with "ML models not configured" — everything else +# (mesh, directives, files, repo CRUD) works normally. +# +# Use Dockerfile.full when you need STT (Parakeet), diarization (Sortformer), +# or TTS (Chatterbox) — that variant is ~5GB larger because it downloads the +# model weights at build time. +# ============================================================================== + +# ---------- Builder stage ---------- +FROM rust:1.91-bookworm AS builder WORKDIR /app -# Install dependencies RUN apt-get update && apt-get install -y \ pkg-config \ libssl-dev \ - curl \ - python3 \ - python3-pip \ - && pip3 install --break-system-packages huggingface_hub[cli] \ && rm -rf /var/lib/apt/lists/* -# Copy and run model download script -COPY makima/sh/download-models.sh /app/download-models.sh -RUN chmod +x /app/download-models.sh - -ARG MODEL_BASE_URL -ENV MODEL_BASE_URL=${MODEL_BASE_URL} -ENV MODELS_DIR=/app/models -ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo -RUN /app/download-models.sh echo "Models downloaded" - -# Copy workspace files +# Copy workspace files. We deliberately do NOT copy `voices/` or any model +# data — the slim build doesn't ship them. COPY Cargo.toml Cargo.lock ./ COPY makima ./makima COPY vendor ./vendor COPY tools/stt-client ./tools/stt-client -COPY voices ./voices -# Build release binary RUN cargo build --release --package makima --bin makima -RUN cp /app/target/release/makima /makima -# Embed daemon binary for download endpoint -RUN mkdir -p /app/daemon-binaries -RUN cp /app/target/release/makima /app/daemon-binaries/makima-linux-x86_64 -ENV DAEMON_BINARIES_DIR=/app/daemon-binaries +# ---------- Runtime stage ---------- +FROM debian:bookworm-slim + +# Runtime deps — same as the daemon image. No python, no huggingface_hub, +# no model download script. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl3 \ + git \ + curl \ + openssh-client \ + jq \ + && rm -rf /var/lib/apt/lists/* + +# GitHub CLI for orchestrator PR operations. +RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg \ + && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends gh \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /app/target/release/makima /makima -# Clean up build artifacts to reduce image size -RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry +# Embed daemon binary for the download endpoint (same binary, served as +# `/api/v1/daemons/binaries/...` to clients). +RUN mkdir -p /app/daemon-binaries \ + && cp /makima /app/daemon-binaries/makima-linux-x86_64 +ENV DAEMON_BINARIES_DIR=/app/daemon-binaries -# Set default environment variables ENV PORT=8080 ENV RUST_LOG=makima=info,tower_http=info -ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3 -ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx -ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx -ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo +# NOTE: PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are +# DELIBERATELY not set. The server will start without them and Listen/Speak +# will return "ML models not configured" if a client attempts to use them. EXPOSE 8080 -HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ +HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \ CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1 CMD ["/makima", "server"] -- cgit v1.2.3