summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2026-05-02 15:26:39 +0100
committerGitHub <noreply@github.com>2026-05-02 15:26:39 +0100
commit70a83104292c4e1fe5f43dd5f50e5214928c8dd6 (patch)
treef423621f1cd7a06ccdeb2cf8b7a011080c49a278
parent760516b2e7b97fa389fb3902e8d2314eea052ff0 (diff)
downloadsoryu-70a83104292c4e1fe5f43dd5f50e5214928c8dd6.tar.gz
soryu-70a83104292c4e1fe5f43dd5f50e5214928c8dd6.zip
build(server): split Dockerfiles, make ML model paths optional (#120)
Existing Dockerfile (with LLM/STT/TTS model download) is now `Dockerfile.full`. The new top-level `Dockerfile` builds a slim image without python, without huggingface_hub, without the model download step. The slim image is the new default for users who only want the orchestration surface — the directive folder UI, the mesh/task system, the API. ## Slim Dockerfile * No python / huggingface_hub / model downloads. * Same runtime tooling as `k8s/daemon/Dockerfile` (git, gh CLI, ssh, jq, curl, ca-certs, libssl3). * Embeds the daemon binary at /app/daemon-binaries/makima-linux-x86_64 for the in-server download endpoint. * PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are intentionally NOT set — Listen and Speak return "ML models not configured" if a client tries to use them. ## ML model paths now optional `ServerArgs.parakeet_model_dir`, `parakeet_eou_dir`, `sortformer_model_path`, `chatterbox_model_dir` are now `Option<String>` (no defaults). The bin constructor inspects them: if all four are present, configures `AppState::new`; if all four are absent, uses the new `AppState::new_slim()` which leaves `model_config = None`. The lazy load path in `get_ml_models` already returned a clean error for None. Speak (TTS) was already optional via `model_config.as_ref()` — still works. Mixed configurations log a warning and degrade to slim mode. ## Ops note The old `Dockerfile.full` retains the original behaviour for anyone who needs STT/diarization/TTS in production. CI still builds the daemon image from `k8s/daemon/Dockerfile` (untouched). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
-rw-r--r--Dockerfile83
-rw-r--r--Dockerfile.full57
-rw-r--r--makima/src/bin/makima.rs34
-rw-r--r--makima/src/daemon/cli/server.rs49
-rw-r--r--makima/src/server/state.rs37
5 files changed, 181 insertions, 79 deletions
diff --git a/Dockerfile b/Dockerfile
index a8a9245..1a751d4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,57 +1,76 @@
-FROM rust:1.91-bookworm
+# ==============================================================================
+# Makima Server — slim image (no LLM/STT/TTS models)
+# ==============================================================================
+# This builds the smallest viable Makima server image: the Rust binary plus
+# the runtime tools the orchestrator needs. The Listen and Speak websocket
+# endpoints will respond with "ML models not configured" — everything else
+# (mesh, directives, files, repo CRUD) works normally.
+#
+# Use Dockerfile.full when you need STT (Parakeet), diarization (Sortformer),
+# or TTS (Chatterbox) — that variant is ~5GB larger because it downloads the
+# model weights at build time.
+# ==============================================================================
+
+# ---------- Builder stage ----------
+FROM rust:1.91-bookworm AS builder
WORKDIR /app
-# Install dependencies
RUN apt-get update && apt-get install -y \
pkg-config \
libssl-dev \
- curl \
- python3 \
- python3-pip \
- && pip3 install --break-system-packages huggingface_hub[cli] \
&& rm -rf /var/lib/apt/lists/*
-# Copy and run model download script
-COPY makima/sh/download-models.sh /app/download-models.sh
-RUN chmod +x /app/download-models.sh
-
-ARG MODEL_BASE_URL
-ENV MODEL_BASE_URL=${MODEL_BASE_URL}
-ENV MODELS_DIR=/app/models
-ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
-RUN /app/download-models.sh echo "Models downloaded"
-
-# Copy workspace files
+# Copy workspace files. We deliberately do NOT copy `voices/` or any model
+# data — the slim build doesn't ship them.
COPY Cargo.toml Cargo.lock ./
COPY makima ./makima
COPY vendor ./vendor
COPY tools/stt-client ./tools/stt-client
-COPY voices ./voices
-# Build release binary
RUN cargo build --release --package makima --bin makima
-RUN cp /app/target/release/makima /makima
-# Embed daemon binary for download endpoint
-RUN mkdir -p /app/daemon-binaries
-RUN cp /app/target/release/makima /app/daemon-binaries/makima-linux-x86_64
-ENV DAEMON_BINARIES_DIR=/app/daemon-binaries
+# ---------- Runtime stage ----------
+FROM debian:bookworm-slim
+
+# Runtime deps — same as the daemon image. No python, no huggingface_hub,
+# no model download script.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ ca-certificates \
+ libssl3 \
+ git \
+ curl \
+ openssh-client \
+ jq \
+ && rm -rf /var/lib/apt/lists/*
+
+# GitHub CLI for orchestrator PR operations.
+RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+ -o /usr/share/keyrings/githubcli-archive-keyring.gpg \
+ && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
+ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+ > /etc/apt/sources.list.d/github-cli.list \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends gh \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY --from=builder /app/target/release/makima /makima
-# Clean up build artifacts to reduce image size
-RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry
+# Embed daemon binary for the download endpoint (same binary, served as
+# `/api/v1/daemons/binaries/...` to clients).
+RUN mkdir -p /app/daemon-binaries \
+ && cp /makima /app/daemon-binaries/makima-linux-x86_64
+ENV DAEMON_BINARIES_DIR=/app/daemon-binaries
-# Set default environment variables
ENV PORT=8080
ENV RUST_LOG=makima=info,tower_http=info
-ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3
-ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx
-ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx
-ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
+# NOTE: PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are
+# DELIBERATELY not set. The server will start without them and Listen/Speak
+# will return "ML models not configured" if a client attempts to use them.
EXPOSE 8080
-HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1
CMD ["/makima", "server"]
diff --git a/Dockerfile.full b/Dockerfile.full
new file mode 100644
index 0000000..a8a9245
--- /dev/null
+++ b/Dockerfile.full
@@ -0,0 +1,57 @@
+FROM rust:1.91-bookworm
+
+WORKDIR /app
+
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+ pkg-config \
+ libssl-dev \
+ curl \
+ python3 \
+ python3-pip \
+ && pip3 install --break-system-packages huggingface_hub[cli] \
+ && rm -rf /var/lib/apt/lists/*
+
+# Copy and run model download script
+COPY makima/sh/download-models.sh /app/download-models.sh
+RUN chmod +x /app/download-models.sh
+
+ARG MODEL_BASE_URL
+ENV MODEL_BASE_URL=${MODEL_BASE_URL}
+ENV MODELS_DIR=/app/models
+ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
+RUN /app/download-models.sh echo "Models downloaded"
+
+# Copy workspace files
+COPY Cargo.toml Cargo.lock ./
+COPY makima ./makima
+COPY vendor ./vendor
+COPY tools/stt-client ./tools/stt-client
+COPY voices ./voices
+
+# Build release binary
+RUN cargo build --release --package makima --bin makima
+RUN cp /app/target/release/makima /makima
+
+# Embed daemon binary for download endpoint
+RUN mkdir -p /app/daemon-binaries
+RUN cp /app/target/release/makima /app/daemon-binaries/makima-linux-x86_64
+ENV DAEMON_BINARIES_DIR=/app/daemon-binaries
+
+# Clean up build artifacts to reduce image size
+RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry
+
+# Set default environment variables
+ENV PORT=8080
+ENV RUST_LOG=makima=info,tower_http=info
+ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3
+ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx
+ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx
+ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo
+
+EXPOSE 8080
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+ CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1
+
+CMD ["/makima", "server"]
diff --git a/makima/src/bin/makima.rs b/makima/src/bin/makima.rs
index 338d8f9..a84c581 100644
--- a/makima/src/bin/makima.rs
+++ b/makima/src/bin/makima.rs
@@ -42,13 +42,33 @@ async fn run_server(
eprintln!("=== Makima Server Starting ===");
eprintln!("Port: {}", args.port);
- // Create app state
- let mut app_state = makima::server::state::AppState::new(
- &args.parakeet_model_dir,
- &args.parakeet_eou_dir,
- &args.sortformer_model_path,
- &args.chatterbox_model_dir,
- );
+ // Create app state. ML model paths are optional now — when none of
+ // them are supplied (the slim Dockerfile case) Listen and Speak
+ // websocket endpoints respond with "not configured" and everything
+ // else works normally.
+ let mut app_state = match (
+ args.parakeet_model_dir.as_deref(),
+ args.parakeet_eou_dir.as_deref(),
+ args.sortformer_model_path.as_deref(),
+ args.chatterbox_model_dir.as_deref(),
+ ) {
+ (Some(p), Some(eou), Some(sf), Some(cb)) => {
+ eprintln!("ML models configured (lazy load on first use)");
+ makima::server::state::AppState::new(p, eou, sf, cb)
+ }
+ (None, None, None, None) => {
+ eprintln!("ML models NOT configured — Listen/Speak disabled");
+ makima::server::state::AppState::new_slim()
+ }
+ _ => {
+ eprintln!(
+ "WARNING: only some ML model paths provided. Pass all four \
+ (parakeet/parakeet_eou/sortformer/chatterbox) to enable ML, \
+ or none to run in slim mode. Continuing in slim mode."
+ );
+ makima::server::state::AppState::new_slim()
+ }
+ };
// Connect to database if URL provided
if let Some(ref db_url) = args.database_url {
diff --git a/makima/src/daemon/cli/server.rs b/makima/src/daemon/cli/server.rs
index adb765d..667f9ea 100644
--- a/makima/src/daemon/cli/server.rs
+++ b/makima/src/daemon/cli/server.rs
@@ -3,39 +3,34 @@
use clap::Args;
/// Run the makima server.
+///
+/// ML model paths (parakeet / sortformer / chatterbox) are optional. When
+/// none are provided the server still starts; the Listen and Speak websocket
+/// endpoints just return a "not configured" error if a client tries to use
+/// them. This is the supported way to run a slim deployment of makima
+/// without the LLM/STT/TTS dependencies — see `Dockerfile` (slim) vs
+/// `Dockerfile.full` (with models).
#[derive(Args, Debug)]
pub struct ServerArgs {
/// Server port
#[arg(long, env = "PORT", default_value = "8080")]
pub port: u16,
- /// Path to parakeet model directory
- #[arg(
- long,
- env = "PARAKEET_MODEL_DIR",
- default_value = "models/parakeet-tdt-0.6b-v3"
- )]
- pub parakeet_model_dir: String,
-
- /// Path to parakeet EOU model directory
- #[arg(
- long,
- env = "PARAKEET_EOU_DIR",
- default_value = "models/realtime_eou_120m-v1-onnx"
- )]
- pub parakeet_eou_dir: String,
-
- /// Path to sortformer model
- #[arg(
- long,
- env = "SORTFORMER_MODEL_PATH",
- default_value = "models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx"
- )]
- pub sortformer_model_path: String,
-
- /// Path to Chatterbox TTS model directory
- #[arg(long, env = "CHATTERBOX_MODEL_DIR", default_value = "models/chatterbox-turbo")]
- pub chatterbox_model_dir: String,
+ /// Path to parakeet model directory (optional; STT disabled when unset).
+ #[arg(long, env = "PARAKEET_MODEL_DIR")]
+ pub parakeet_model_dir: Option<String>,
+
+ /// Path to parakeet EOU model directory (optional).
+ #[arg(long, env = "PARAKEET_EOU_DIR")]
+ pub parakeet_eou_dir: Option<String>,
+
+ /// Path to sortformer model (optional; diarization disabled when unset).
+ #[arg(long, env = "SORTFORMER_MODEL_PATH")]
+ pub sortformer_model_path: Option<String>,
+
+ /// Path to Chatterbox TTS model directory (optional; TTS disabled when unset).
+ #[arg(long, env = "CHATTERBOX_MODEL_DIR")]
+ pub chatterbox_model_dir: Option<String>,
/// PostgreSQL connection URI
#[arg(long, env = "POSTGRES_CONNECTION_URI")]
diff --git a/makima/src/server/state.rs b/makima/src/server/state.rs
index 6bd9e2b..e267da1 100644
--- a/makima/src/server/state.rs
+++ b/makima/src/server/state.rs
@@ -691,21 +691,37 @@ pub struct AppState {
}
impl AppState {
+ /// Create AppState WITHOUT ML model configuration. Listen and Speak
+ /// endpoints will return "not configured" errors if used; everything
+ /// else (mesh, directives, files, contracts-free CRUD) works normally.
+ /// This is the constructor used by the slim Dockerfile.
+ pub fn new_slim() -> Self {
+ Self::new_inner(None)
+ }
+
/// Create AppState with ML model configuration for lazy loading.
+ /// Pass None to disable a specific model family — Listen needs all
+ /// three of parakeet/parakeet_eou/sortformer; Speak needs chatterbox.
+ /// If `parakeet_model_dir` is None we skip the whole ModelConfig and
+ /// behave like `new_slim()`.
///
- /// Models are NOT loaded at startup - they will be loaded on first Listen connection.
- ///
- /// # Arguments
- /// * `parakeet_model_dir` - Path to the Parakeet TDT model directory
- /// * `parakeet_eou_dir` - Path to the Parakeet EOU model directory
- /// * `sortformer_model_path` - Path to the Sortformer diarization model file
- /// * `chatterbox_model_dir` - Path to the Chatterbox TTS model directory
+ /// Models are NOT loaded at startup — they're loaded on first use.
pub fn new(
parakeet_model_dir: &str,
parakeet_eou_dir: &str,
sortformer_model_path: &str,
chatterbox_model_dir: &str,
) -> Self {
+ Self::new_inner(Some(ModelConfig {
+ parakeet_model_dir: parakeet_model_dir.to_string(),
+ parakeet_eou_dir: parakeet_eou_dir.to_string(),
+ sortformer_model_path: sortformer_model_path.to_string(),
+ chatterbox_model_dir: chatterbox_model_dir.to_string(),
+ }))
+ }
+
+ /// Internal constructor — model_config can be None for the slim build.
+ fn new_inner(model_config: Option<ModelConfig>) -> Self {
// Create broadcast channels with buffer for 256 messages
let (file_updates, _) = broadcast::channel(256);
let (task_updates, _) = broadcast::channel(256);
@@ -744,12 +760,7 @@ impl AppState {
};
Self {
- model_config: Some(ModelConfig {
- parakeet_model_dir: parakeet_model_dir.to_string(),
- parakeet_eou_dir: parakeet_eou_dir.to_string(),
- sortformer_model_path: sortformer_model_path.to_string(),
- chatterbox_model_dir: chatterbox_model_dir.to_string(),
- }),
+ model_config,
ml_models: OnceCell::new(),
db_pool: None,
file_updates,