From 70a83104292c4e1fe5f43dd5f50e5214928c8dd6 Mon Sep 17 00:00:00 2001
From: soryu <soryu@soryu.co>
Date: Sat, 2 May 2026 15:26:39 +0100
Subject: build(server): split Dockerfiles, make ML model paths optional (#120)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Existing Dockerfile (with LLM/STT/TTS model download) is now `Dockerfile.full`.
The new top-level `Dockerfile` builds a slim image without python, without
huggingface_hub, without the model download step. The slim image is the new
default for users who only want the orchestration surface — the directive
folder UI, the mesh/task system, the API.

## Slim Dockerfile
* No python / huggingface_hub / model downloads.
* Same runtime tooling as `k8s/daemon/Dockerfile` (git, gh CLI, ssh, jq,
  curl, ca-certs, libssl3).
* Embeds the daemon binary at /app/daemon-binaries/makima-linux-x86_64
  for the in-server download endpoint.
* PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are
  intentionally NOT set — Listen and Speak return "ML models not
  configured" if a client tries to use them.

## ML model paths now optional
`ServerArgs.parakeet_model_dir`, `parakeet_eou_dir`, `sortformer_model_path`,
`chatterbox_model_dir` are now `Option<String>` (no defaults). The bin
constructor inspects them: if all four are present, configures
`AppState::new`; if all four are absent, uses the new
`AppState::new_slim()` which leaves `model_config = None`. The lazy load
path in `get_ml_models` already returned a clean error for None.

Speak (TTS) was already optional via `model_config.as_ref()` — still works.

Mixed configurations log a warning and degrade to slim mode.

## Ops note
The old `Dockerfile.full` retains the original behaviour for anyone who
needs STT/diarization/TTS in production. CI still builds the daemon image
from `k8s/daemon/Dockerfile` (untouched).

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Dockerfile | 83 ++++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 51 insertions(+), 32 deletions(-)

(limited to 'Dockerfile')
diff --git a/Dockerfile b/Dockerfile
index a8a9245..1a751d4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,57 +1,76 @@
-FROM rust:1.91-bookworm
+# ==============================================================================
+# Makima Server — slim image (no LLM/STT/TTS models)
+# ==============================================================================
+# This builds the smallest viable Makima server image: the Rust binary plus
+# the runtime tools the orchestrator needs. The Listen and Speak websocket
+# endpoints will respond with "ML models not configured" — everything else
+# (mesh, directives, files, repo CRUD) works normally.
+#
+# Use Dockerfile.full when you need STT (Parakeet), diarization (Sortformer),
+# or TTS (Chatterbox) — that variant is ~5GB larger because it downloads the
+# model weights at build time.
+# ==============================================================================
+
+# ---------- Builder stage ----------
+FROM rust:1.91-bookworm AS builder
 
 WORKDIR /app
 
-# Install dependencies
 RUN apt-get update && apt-get install -y \
     pkg-config \
     libssl-dev \
-    curl \
-    python3 \
-    python3-pip \
-    && pip3 install --break-system-packages huggingface_hub[cli] \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy and run model download script
-COPY makima/sh/download-models.sh /app/download-models.sh
-RUN chmod +x /app/download-models.sh
-
-ARG MODEL_BASE_URL
-ENV MODEL_BASE_URL=${MODEL_BASE_URL}
-ENV MODELS_DIR=/app/models
-ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
-RUN /app/download-models.sh echo "Models downloaded"
-
-# Copy workspace files
+# Copy workspace files. We deliberately do NOT copy `voices/` or any model
+# data — the slim build doesn't ship them.
 COPY Cargo.toml Cargo.lock ./
 COPY makima ./makima
 COPY vendor ./vendor
 COPY tools/stt-client ./tools/stt-client
-COPY voices ./voices
 
-# Build release binary
 RUN cargo build --release --package makima --bin makima
-RUN cp /app/target/release/makima /makima
 
-# Embed daemon binary for download endpoint
-RUN mkdir -p /app/daemon-binaries
-RUN cp /app/target/release/makima /app/daemon-binaries/makima-linux-x86_64
-ENV DAEMON_BINARIES_DIR=/app/daemon-binaries
+# ---------- Runtime stage ----------
+FROM debian:bookworm-slim
+
+# Runtime deps — same as the daemon image. No python, no huggingface_hub,
+# no model download script.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    libssl3 \
+    git \
+    curl \
+    openssh-client \
+    jq \
+    && rm -rf /var/lib/apt/lists/*
+
+# GitHub CLI for orchestrator PR operations.
+RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+        -o /usr/share/keyrings/githubcli-archive-keyring.gpg \
+    && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
+    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+        > /etc/apt/sources.list.d/github-cli.list \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends gh \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=builder /app/target/release/makima /makima
 
-# Clean up build artifacts to reduce image size
-RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry
+# Embed daemon binary for the download endpoint (same binary, served as
+# `/api/v1/daemons/binaries/...` to clients).
+RUN mkdir -p /app/daemon-binaries \
+    && cp /makima /app/daemon-binaries/makima-linux-x86_64
+ENV DAEMON_BINARIES_DIR=/app/daemon-binaries
 
-# Set default environment variables
 ENV PORT=8080
 ENV RUST_LOG=makima=info,tower_http=info
-ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3
-ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx
-ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx
-ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
+# NOTE: PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are
+# DELIBERATELY not set. The server will start without them and Listen/Speak
+# will return "ML models not configured" if a client attempts to use them.
 
 EXPOSE 8080
 
-HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
     CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1
 
 CMD ["/makima", "server"]
-- 
cgit v1.2.3