build(server): split Dockerfiles, make ML model paths optional (#120)

Existing Dockerfile (with LLM/STT/TTS model download) is now `Dockerfile.full`. The new top-level `Dockerfile` builds a slim image without python, without huggingface_hub, without the model download step. The slim image is the new default for users who only want the orchestration surface — the directive folder UI, the mesh/task system, the API. ## Slim Dockerfile * No python / huggingface_hub / model downloads. * Same runtime tooling as `k8s/daemon/Dockerfile` (git, gh CLI, ssh, jq, curl, ca-certs, libssl3). * Embeds the daemon binary at /app/daemon-binaries/makima-linux-x86_64 for the in-server download endpoint. * PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are intentionally NOT set — Listen and Speak return "ML models not configured" if a client tries to use them. ## ML model paths now optional `ServerArgs.parakeet_model_dir`, `parakeet_eou_dir`, `sortformer_model_path`, `chatterbox_model_dir` are now `Option<String>` (no defaults). The bin constructor inspects them: if all four are present, configures `AppState::new`; if all four are absent, uses the new `AppState::new_slim()` which leaves `model_config = None`. The lazy load path in `get_ml_models` already returned a clean error for None. Speak (TTS) was already optional via `model_config.as_ref()` — still works. Mixed configurations log a warning and degrade to slim mode. ## Ops note The old `Dockerfile.full` retains the original behaviour for anyone who needs STT/diarization/TTS in production. CI still builds the daemon image from `k8s/daemon/Dockerfile` (untouched). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
author: soryu <soryu@soryu.co> 2026-05-02 15:26:39 +0100
committer: GitHub <noreply@github.com> 2026-05-02 15:26:39 +0100
commit: 70a83104292c4e1fe5f43dd5f50e5214928c8dd6 (patch)
tree: f423621f1cd7a06ccdeb2cf8b7a011080c49a278
parent: 760516b2e7b97fa389fb3902e8d2314eea052ff0 (diff)
download: soryu-70a83104292c4e1fe5f43dd5f50e5214928c8dd6.tar.gz
soryu-70a83104292c4e1fe5f43dd5f50e5214928c8dd6.zip
5 files changed, 181 insertions, 79 deletions
diff --git a/Dockerfile b/Dockerfile
index a8a9245..1a751d4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,57 +1,76 @@
-FROM rust:1.91-bookworm
+# ==============================================================================
+# Makima Server — slim image (no LLM/STT/TTS models)
+# ==============================================================================
+# This builds the smallest viable Makima server image: the Rust binary plus
+# the runtime tools the orchestrator needs. The Listen and Speak websocket
+# endpoints will respond with "ML models not configured" — everything else
+# (mesh, directives, files, repo CRUD) works normally.
+#
+# Use Dockerfile.full when you need STT (Parakeet), diarization (Sortformer),
+# or TTS (Chatterbox) — that variant is ~5GB larger because it downloads the
+# model weights at build time.
+# ==============================================================================
+
+# ---------- Builder stage ----------
+FROM rust:1.91-bookworm AS builder
 
 WORKDIR /app
 
-# Install dependencies
 RUN apt-get update && apt-get install -y \
     pkg-config \
     libssl-dev \
-    curl \
-    python3 \
-    python3-pip \
-    && pip3 install --break-system-packages huggingface_hub[cli] \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy and run model download script
-COPY makima/sh/download-models.sh /app/download-models.sh
-RUN chmod +x /app/download-models.sh
-
-ARG MODEL_BASE_URL
-ENV MODEL_BASE_URL=${MODEL_BASE_URL}
-ENV MODELS_DIR=/app/models
-ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
-RUN /app/download-models.sh echo "Models downloaded"
-
-# Copy workspace files
+# Copy workspace files. We deliberately do NOT copy `voices/` or any model
+# data — the slim build doesn't ship them.
 COPY Cargo.toml Cargo.lock ./
 COPY makima ./makima
 COPY vendor ./vendor
 COPY tools/stt-client ./tools/stt-client
-COPY voices ./voices
 
-# Build release binary
 RUN cargo build --release --package makima --bin makima
-RUN cp /app/target/release/makima /makima
 
-# Embed daemon binary for download endpoint
-RUN mkdir -p /app/daemon-binaries
-RUN cp /app/target/release/makima /app/daemon-binaries/makima-linux-x86_64
-ENV DAEMON_BINARIES_DIR=/app/daemon-binaries
+# ---------- Runtime stage ----------
+FROM debian:bookworm-slim
+
+# Runtime deps — same as the daemon image. No python, no huggingface_hub,
+# no model download script.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    libssl3 \
+    git \
+    curl \
+    openssh-client \
+    jq \
+    && rm -rf /var/lib/apt/lists/*
+
+# GitHub CLI for orchestrator PR operations.
+RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+        -o /usr/share/keyrings/githubcli-archive-keyring.gpg \
+    && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
+    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+        > /etc/apt/sources.list.d/github-cli.list \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends gh \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=builder /app/target/release/makima /makima
 
-# Clean up build artifacts to reduce image size
-RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry
+# Embed daemon binary for the download endpoint (same binary, served as
+# `/api/v1/daemons/binaries/...` to clients).
+RUN mkdir -p /app/daemon-binaries \
+    && cp /makima /app/daemon-binaries/makima-linux-x86_64
+ENV DAEMON_BINARIES_DIR=/app/daemon-binaries
 
-# Set default environment variables
 ENV PORT=8080
 ENV RUST_LOG=makima=info,tower_http=info
-ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3
-ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx
-ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx
-ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
+# NOTE: PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are
+# DELIBERATELY not set. The server will start without them and Listen/Speak
+# will return "ML models not configured" if a client attempts to use them.
 
 EXPOSE 8080
 
-HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
     CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1
 
 CMD ["/makima", "server"]
diff --git a/Dockerfile.full b/Dockerfile.full
new file mode 100644
index 0000000..a8a9245
--- /dev/null
+++ b/Dockerfile.full
@@ -0,0 +1,57 @@
+FROM rust:1.91-bookworm
+
+WORKDIR /app
+
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    pkg-config \
+    libssl-dev \
+    curl \
+    python3 \
+    python3-pip \
+    && pip3 install --break-system-packages huggingface_hub[cli] \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy and run model download script
+COPY makima/sh/download-models.sh /app/download-models.sh
+RUN chmod +x /app/download-models.sh
+
+ARG MODEL_BASE_URL
+ENV MODEL_BASE_URL=${MODEL_BASE_URL}
+ENV MODELS_DIR=/app/models
+ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
+RUN /app/download-models.sh echo "Models downloaded"
+
+# Copy workspace files
+COPY Cargo.toml Cargo.lock ./
+COPY makima ./makima
+COPY vendor ./vendor
+COPY tools/stt-client ./tools/stt-client
+COPY voices ./voices
+
+# Build release binary
+RUN cargo build --release --package makima --bin makima
+RUN cp /app/target/release/makima /makima
+
+# Embed daemon binary for download endpoint
+RUN mkdir -p /app/daemon-binaries
+RUN cp /app/target/release/makima /app/daemon-binaries/makima-linux-x86_64
+ENV DAEMON_BINARIES_DIR=/app/daemon-binaries
+
+# Clean up build artifacts to reduce image size
+RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry
+
+# Set default environment variables
+ENV PORT=8080
+ENV RUST_LOG=makima=info,tower_http=info
+ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3
+ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx
+ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx
+ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
+
+EXPOSE 8080
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1
+
+CMD ["/makima", "server"]
diff --git a/makima/src/bin/makima.rs b/makima/src/bin/makima.rs
index 338d8f9..a84c581 100644
--- a/makima/src/bin/makima.rs
+++ b/makima/src/bin/makima.rs
@@ -42,13 +42,33 @@ async fn run_server(
     eprintln!("=== Makima Server Starting ===");
     eprintln!("Port: {}", args.port);
 
-    // Create app state
-    let mut app_state = makima::server::state::AppState::new(
-        &args.parakeet_model_dir,
-        &args.parakeet_eou_dir,
-        &args.sortformer_model_path,
-        &args.chatterbox_model_dir,
-    );
+    // Create app state. ML model paths are optional now — when none of
+    // them are supplied (the slim Dockerfile case) Listen and Speak
+    // websocket endpoints respond with "not configured" and everything
+    // else works normally.
+    let mut app_state = match (
+        args.parakeet_model_dir.as_deref(),
+        args.parakeet_eou_dir.as_deref(),
+        args.sortformer_model_path.as_deref(),
+        args.chatterbox_model_dir.as_deref(),
+    ) {
+        (Some(p), Some(eou), Some(sf), Some(cb)) => {
+            eprintln!("ML models configured (lazy load on first use)");
+            makima::server::state::AppState::new(p, eou, sf, cb)
+        }
+        (None, None, None, None) => {
+            eprintln!("ML models NOT configured — Listen/Speak disabled");
+            makima::server::state::AppState::new_slim()
+        }
+        _ => {
+            eprintln!(
+                "WARNING: only some ML model paths provided. Pass all four \
+                 (parakeet/parakeet_eou/sortformer/chatterbox) to enable ML, \
+                 or none to run in slim mode. Continuing in slim mode."
+            );
+            makima::server::state::AppState::new_slim()
+        }
+    };
 
     // Connect to database if URL provided
     if let Some(ref db_url) = args.database_url {
diff --git a/makima/src/daemon/cli/server.rs b/makima/src/daemon/cli/server.rs
index adb765d..667f9ea 100644
--- a/makima/src/daemon/cli/server.rs
+++ b/makima/src/daemon/cli/server.rs
@@ -3,39 +3,34 @@
 use clap::Args;
 
 /// Run the makima server.
+///
+/// ML model paths (parakeet / sortformer / chatterbox) are optional. When
+/// none are provided the server still starts; the Listen and Speak websocket
+/// endpoints just return a "not configured" error if a client tries to use
+/// them. This is the supported way to run a slim deployment of makima
+/// without the LLM/STT/TTS dependencies — see `Dockerfile` (slim) vs
+/// `Dockerfile.full` (with models).
 #[derive(Args, Debug)]
 pub struct ServerArgs {
     /// Server port
     #[arg(long, env = "PORT", default_value = "8080")]
     pub port: u16,
 
-    /// Path to parakeet model directory
-    #[arg(
-        long,
-        env = "PARAKEET_MODEL_DIR",
-        default_value = "models/parakeet-tdt-0.6b-v3"
-    )]
-    pub parakeet_model_dir: String,
-
-    /// Path to parakeet EOU model directory
-    #[arg(
-        long,
-        env = "PARAKEET_EOU_DIR",
-        default_value = "models/realtime_eou_120m-v1-onnx"
-    )]
-    pub parakeet_eou_dir: String,
-
-    /// Path to sortformer model
-    #[arg(
-        long,
-        env = "SORTFORMER_MODEL_PATH",
-        default_value = "models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx"
-    )]
-    pub sortformer_model_path: String,
-
-    /// Path to Chatterbox TTS model directory
-    #[arg(long, env = "CHATTERBOX_MODEL_DIR", default_value = "models/chatterbox-turbo")]
-    pub chatterbox_model_dir: String,
+    /// Path to parakeet model directory (optional; STT disabled when unset).
+    #[arg(long, env = "PARAKEET_MODEL_DIR")]
+    pub parakeet_model_dir: Option<String>,
+
+    /// Path to parakeet EOU model directory (optional).
+    #[arg(long, env = "PARAKEET_EOU_DIR")]
+    pub parakeet_eou_dir: Option<String>,
+
+    /// Path to sortformer model (optional; diarization disabled when unset).
+    #[arg(long, env = "SORTFORMER_MODEL_PATH")]
+    pub sortformer_model_path: Option<String>,
+
+    /// Path to Chatterbox TTS model directory (optional; TTS disabled when unset).
+    #[arg(long, env = "CHATTERBOX_MODEL_DIR")]
+    pub chatterbox_model_dir: Option<String>,
 
     /// PostgreSQL connection URI
     #[arg(long, env = "POSTGRES_CONNECTION_URI")]
diff --git a/makima/src/server/state.rs b/makima/src/server/state.rs
index 6bd9e2b..e267da1 100644
--- a/makima/src/server/state.rs
+++ b/makima/src/server/state.rs
@@ -691,21 +691,37 @@ pub struct AppState {
 }
 
 impl AppState {
+    /// Create AppState WITHOUT ML model configuration. Listen and Speak
+    /// endpoints will return "not configured" errors if used; everything
+    /// else (mesh, directives, files, contracts-free CRUD) works normally.
+    /// This is the constructor used by the slim Dockerfile.
+    pub fn new_slim() -> Self {
+        Self::new_inner(None)
+    }
+
     /// Create AppState with ML model configuration for lazy loading.
+    /// Pass None to disable a specific model family — Listen needs all
+    /// three of parakeet/parakeet_eou/sortformer; Speak needs chatterbox.
+    /// If `parakeet_model_dir` is None we skip the whole ModelConfig and
+    /// behave like `new_slim()`.
     ///
-    /// Models are NOT loaded at startup - they will be loaded on first Listen connection.
-    ///
-    /// # Arguments
-    /// * `parakeet_model_dir` - Path to the Parakeet TDT model directory
-    /// * `parakeet_eou_dir` - Path to the Parakeet EOU model directory
-    /// * `sortformer_model_path` - Path to the Sortformer diarization model file
-    /// * `chatterbox_model_dir` - Path to the Chatterbox TTS model directory
+    /// Models are NOT loaded at startup — they're loaded on first use.
     pub fn new(
         parakeet_model_dir: &str,
         parakeet_eou_dir: &str,
         sortformer_model_path: &str,
         chatterbox_model_dir: &str,
     ) -> Self {
+        Self::new_inner(Some(ModelConfig {
+            parakeet_model_dir: parakeet_model_dir.to_string(),
+            parakeet_eou_dir: parakeet_eou_dir.to_string(),
+            sortformer_model_path: sortformer_model_path.to_string(),
+            chatterbox_model_dir: chatterbox_model_dir.to_string(),
+        }))
+    }
+
+    /// Internal constructor — model_config can be None for the slim build.
+    fn new_inner(model_config: Option<ModelConfig>) -> Self {
         // Create broadcast channels with buffer for 256 messages
         let (file_updates, _) = broadcast::channel(256);
         let (task_updates, _) = broadcast::channel(256);
@@ -744,12 +760,7 @@ impl AppState {
         };
 
         Self {
-            model_config: Some(ModelConfig {
-                parakeet_model_dir: parakeet_model_dir.to_string(),
-                parakeet_eou_dir: parakeet_eou_dir.to_string(),
-                sortformer_model_path: sortformer_model_path.to_string(),
-                chatterbox_model_dir: chatterbox_model_dir.to_string(),
-            }),
+            model_config,
             ml_models: OnceCell::new(),
             db_pool: None,
             file_updates,
author	soryu <soryu@soryu.co>	2026-05-02 15:26:39 +0100
committer	GitHub <noreply@github.com>	2026-05-02 15:26:39 +0100
commit	70a83104292c4e1fe5f43dd5f50e5214928c8dd6 (patch)
tree	f423621f1cd7a06ccdeb2cf8b7a011080c49a278
parent	760516b2e7b97fa389fb3902e8d2314eea052ff0 (diff)
download	soryu-70a83104292c4e1fe5f43dd5f50e5214928c8dd6.tar.gz soryu-70a83104292c4e1fe5f43dd5f50e5214928c8dd6.zip