diff options
| author | soryu <soryu@soryu.co> | 2026-05-02 15:26:39 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-05-02 15:26:39 +0100 |
| commit | 70a83104292c4e1fe5f43dd5f50e5214928c8dd6 (patch) | |
| tree | f423621f1cd7a06ccdeb2cf8b7a011080c49a278 | |
| parent | 760516b2e7b97fa389fb3902e8d2314eea052ff0 (diff) | |
| download | soryu-70a83104292c4e1fe5f43dd5f50e5214928c8dd6.tar.gz soryu-70a83104292c4e1fe5f43dd5f50e5214928c8dd6.zip | |
build(server): split Dockerfiles, make ML model paths optional (#120)
Existing Dockerfile (with LLM/STT/TTS model download) is now `Dockerfile.full`.
The new top-level `Dockerfile` builds a slim image without python, without
huggingface_hub, without the model download step. The slim image is the new
default for users who only want the orchestration surface — the directive
folder UI, the mesh/task system, the API.
## Slim Dockerfile
* No python / huggingface_hub / model downloads.
* Same runtime tooling as `k8s/daemon/Dockerfile` (git, gh CLI, ssh, jq,
curl, ca-certs, libssl3).
* Embeds the daemon binary at /app/daemon-binaries/makima-linux-x86_64
for the in-server download endpoint.
* PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are
intentionally NOT set — Listen and Speak return "ML models not
configured" if a client tries to use them.
## ML model paths now optional
`ServerArgs.parakeet_model_dir`, `parakeet_eou_dir`, `sortformer_model_path`,
`chatterbox_model_dir` are now `Option<String>` (no defaults). The bin
constructor inspects them: if all four are present, configures
`AppState::new`; if all four are absent, uses the new
`AppState::new_slim()` which leaves `model_config = None`. The lazy load
path in `get_ml_models` already returned a clean error for None.
Speak (TTS) was already optional via `model_config.as_ref()` — still works.
Mixed configurations log a warning and degrade to slim mode.
## Ops note
The old `Dockerfile.full` retains the original behaviour for anyone who
needs STT/diarization/TTS in production. CI still builds the daemon image
from `k8s/daemon/Dockerfile` (untouched).
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
| -rw-r--r-- | Dockerfile | 83 | ||||
| -rw-r--r-- | Dockerfile.full | 57 | ||||
| -rw-r--r-- | makima/src/bin/makima.rs | 34 | ||||
| -rw-r--r-- | makima/src/daemon/cli/server.rs | 49 | ||||
| -rw-r--r-- | makima/src/server/state.rs | 37 |
5 files changed, 181 insertions, 79 deletions
@@ -1,57 +1,76 @@ -FROM rust:1.91-bookworm +# ============================================================================== +# Makima Server — slim image (no LLM/STT/TTS models) +# ============================================================================== +# This builds the smallest viable Makima server image: the Rust binary plus +# the runtime tools the orchestrator needs. The Listen and Speak websocket +# endpoints will respond with "ML models not configured" — everything else +# (mesh, directives, files, repo CRUD) works normally. +# +# Use Dockerfile.full when you need STT (Parakeet), diarization (Sortformer), +# or TTS (Chatterbox) — that variant is ~5GB larger because it downloads the +# model weights at build time. +# ============================================================================== + +# ---------- Builder stage ---------- +FROM rust:1.91-bookworm AS builder WORKDIR /app -# Install dependencies RUN apt-get update && apt-get install -y \ pkg-config \ libssl-dev \ - curl \ - python3 \ - python3-pip \ - && pip3 install --break-system-packages huggingface_hub[cli] \ && rm -rf /var/lib/apt/lists/* -# Copy and run model download script -COPY makima/sh/download-models.sh /app/download-models.sh -RUN chmod +x /app/download-models.sh - -ARG MODEL_BASE_URL -ENV MODEL_BASE_URL=${MODEL_BASE_URL} -ENV MODELS_DIR=/app/models -ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo -RUN /app/download-models.sh echo "Models downloaded" - -# Copy workspace files +# Copy workspace files. We deliberately do NOT copy `voices/` or any model +# data — the slim build doesn't ship them. COPY Cargo.toml Cargo.lock ./ COPY makima ./makima COPY vendor ./vendor COPY tools/stt-client ./tools/stt-client -COPY voices ./voices -# Build release binary RUN cargo build --release --package makima --bin makima -RUN cp /app/target/release/makima /makima -# Embed daemon binary for download endpoint -RUN mkdir -p /app/daemon-binaries -RUN cp /app/target/release/makima /app/daemon-binaries/makima-linux-x86_64 -ENV DAEMON_BINARIES_DIR=/app/daemon-binaries +# ---------- Runtime stage ---------- +FROM debian:bookworm-slim + +# Runtime deps — same as the daemon image. No python, no huggingface_hub, +# no model download script. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl3 \ + git \ + curl \ + openssh-client \ + jq \ + && rm -rf /var/lib/apt/lists/* + +# GitHub CLI for orchestrator PR operations. +RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg \ + && chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends gh \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /app/target/release/makima /makima -# Clean up build artifacts to reduce image size -RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry +# Embed daemon binary for the download endpoint (same binary, served as +# `/api/v1/daemons/binaries/...` to clients). +RUN mkdir -p /app/daemon-binaries \ + && cp /makima /app/daemon-binaries/makima-linux-x86_64 +ENV DAEMON_BINARIES_DIR=/app/daemon-binaries -# Set default environment variables ENV PORT=8080 ENV RUST_LOG=makima=info,tower_http=info -ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3 -ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx -ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx -ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo +# NOTE: PARAKEET_MODEL_DIR / SORTFORMER_MODEL_PATH / CHATTERBOX_MODEL_DIR are +# DELIBERATELY not set. The server will start without them and Listen/Speak +# will return "ML models not configured" if a client attempts to use them. EXPOSE 8080 -HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ +HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \ CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1 CMD ["/makima", "server"] diff --git a/Dockerfile.full b/Dockerfile.full new file mode 100644 index 0000000..a8a9245 --- /dev/null +++ b/Dockerfile.full @@ -0,0 +1,57 @@ +FROM rust:1.91-bookworm + +WORKDIR /app + +# Install dependencies +RUN apt-get update && apt-get install -y \ + pkg-config \ + libssl-dev \ + curl \ + python3 \ + python3-pip \ + && pip3 install --break-system-packages huggingface_hub[cli] \ + && rm -rf /var/lib/apt/lists/* + +# Copy and run model download script +COPY makima/sh/download-models.sh /app/download-models.sh +RUN chmod +x /app/download-models.sh + +ARG MODEL_BASE_URL +ENV MODEL_BASE_URL=${MODEL_BASE_URL} +ENV MODELS_DIR=/app/models +ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo +RUN /app/download-models.sh echo "Models downloaded" + +# Copy workspace files +COPY Cargo.toml Cargo.lock ./ +COPY makima ./makima +COPY vendor ./vendor +COPY tools/stt-client ./tools/stt-client +COPY voices ./voices + +# Build release binary +RUN cargo build --release --package makima --bin makima +RUN cp /app/target/release/makima /makima + +# Embed daemon binary for download endpoint +RUN mkdir -p /app/daemon-binaries +RUN cp /app/target/release/makima /app/daemon-binaries/makima-linux-x86_64 +ENV DAEMON_BINARIES_DIR=/app/daemon-binaries + +# Clean up build artifacts to reduce image size +RUN rm -rf /app/target /app/makima/src /app/vendor /app/tools /usr/local/cargo/registry + +# Set default environment variables +ENV PORT=8080 +ENV RUST_LOG=makima=info,tower_http=info +ENV PARAKEET_MODEL_DIR=/app/models/parakeet-tdt-0.6b-v3 +ENV PARAKEET_EOU_DIR=/app/models/realtime_eou_120m-v1-onnx +ENV SORTFORMER_MODEL_PATH=/app/models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx +ENV CHATTERBOX_MODEL_DIR=/app/models/chatterbox-turbo + +EXPOSE 8080 + +HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ + CMD curl -f http://localhost:${PORT}/api/v1/healthcheck || exit 1 + +CMD ["/makima", "server"] diff --git a/makima/src/bin/makima.rs b/makima/src/bin/makima.rs index 338d8f9..a84c581 100644 --- a/makima/src/bin/makima.rs +++ b/makima/src/bin/makima.rs @@ -42,13 +42,33 @@ async fn run_server( eprintln!("=== Makima Server Starting ==="); eprintln!("Port: {}", args.port); - // Create app state - let mut app_state = makima::server::state::AppState::new( - &args.parakeet_model_dir, - &args.parakeet_eou_dir, - &args.sortformer_model_path, - &args.chatterbox_model_dir, - ); + // Create app state. ML model paths are optional now — when none of + // them are supplied (the slim Dockerfile case) Listen and Speak + // websocket endpoints respond with "not configured" and everything + // else works normally. + let mut app_state = match ( + args.parakeet_model_dir.as_deref(), + args.parakeet_eou_dir.as_deref(), + args.sortformer_model_path.as_deref(), + args.chatterbox_model_dir.as_deref(), + ) { + (Some(p), Some(eou), Some(sf), Some(cb)) => { + eprintln!("ML models configured (lazy load on first use)"); + makima::server::state::AppState::new(p, eou, sf, cb) + } + (None, None, None, None) => { + eprintln!("ML models NOT configured — Listen/Speak disabled"); + makima::server::state::AppState::new_slim() + } + _ => { + eprintln!( + "WARNING: only some ML model paths provided. Pass all four \ + (parakeet/parakeet_eou/sortformer/chatterbox) to enable ML, \ + or none to run in slim mode. Continuing in slim mode." + ); + makima::server::state::AppState::new_slim() + } + }; // Connect to database if URL provided if let Some(ref db_url) = args.database_url { diff --git a/makima/src/daemon/cli/server.rs b/makima/src/daemon/cli/server.rs index adb765d..667f9ea 100644 --- a/makima/src/daemon/cli/server.rs +++ b/makima/src/daemon/cli/server.rs @@ -3,39 +3,34 @@ use clap::Args; /// Run the makima server. +/// +/// ML model paths (parakeet / sortformer / chatterbox) are optional. When +/// none are provided the server still starts; the Listen and Speak websocket +/// endpoints just return a "not configured" error if a client tries to use +/// them. This is the supported way to run a slim deployment of makima +/// without the LLM/STT/TTS dependencies — see `Dockerfile` (slim) vs +/// `Dockerfile.full` (with models). #[derive(Args, Debug)] pub struct ServerArgs { /// Server port #[arg(long, env = "PORT", default_value = "8080")] pub port: u16, - /// Path to parakeet model directory - #[arg( - long, - env = "PARAKEET_MODEL_DIR", - default_value = "models/parakeet-tdt-0.6b-v3" - )] - pub parakeet_model_dir: String, - - /// Path to parakeet EOU model directory - #[arg( - long, - env = "PARAKEET_EOU_DIR", - default_value = "models/realtime_eou_120m-v1-onnx" - )] - pub parakeet_eou_dir: String, - - /// Path to sortformer model - #[arg( - long, - env = "SORTFORMER_MODEL_PATH", - default_value = "models/diarization/diar_streaming_sortformer_4spk-v2.1.onnx" - )] - pub sortformer_model_path: String, - - /// Path to Chatterbox TTS model directory - #[arg(long, env = "CHATTERBOX_MODEL_DIR", default_value = "models/chatterbox-turbo")] - pub chatterbox_model_dir: String, + /// Path to parakeet model directory (optional; STT disabled when unset). + #[arg(long, env = "PARAKEET_MODEL_DIR")] + pub parakeet_model_dir: Option<String>, + + /// Path to parakeet EOU model directory (optional). + #[arg(long, env = "PARAKEET_EOU_DIR")] + pub parakeet_eou_dir: Option<String>, + + /// Path to sortformer model (optional; diarization disabled when unset). + #[arg(long, env = "SORTFORMER_MODEL_PATH")] + pub sortformer_model_path: Option<String>, + + /// Path to Chatterbox TTS model directory (optional; TTS disabled when unset). + #[arg(long, env = "CHATTERBOX_MODEL_DIR")] + pub chatterbox_model_dir: Option<String>, /// PostgreSQL connection URI #[arg(long, env = "POSTGRES_CONNECTION_URI")] diff --git a/makima/src/server/state.rs b/makima/src/server/state.rs index 6bd9e2b..e267da1 100644 --- a/makima/src/server/state.rs +++ b/makima/src/server/state.rs @@ -691,21 +691,37 @@ pub struct AppState { } impl AppState { + /// Create AppState WITHOUT ML model configuration. Listen and Speak + /// endpoints will return "not configured" errors if used; everything + /// else (mesh, directives, files, contracts-free CRUD) works normally. + /// This is the constructor used by the slim Dockerfile. + pub fn new_slim() -> Self { + Self::new_inner(None) + } + /// Create AppState with ML model configuration for lazy loading. + /// Pass None to disable a specific model family — Listen needs all + /// three of parakeet/parakeet_eou/sortformer; Speak needs chatterbox. + /// If `parakeet_model_dir` is None we skip the whole ModelConfig and + /// behave like `new_slim()`. /// - /// Models are NOT loaded at startup - they will be loaded on first Listen connection. - /// - /// # Arguments - /// * `parakeet_model_dir` - Path to the Parakeet TDT model directory - /// * `parakeet_eou_dir` - Path to the Parakeet EOU model directory - /// * `sortformer_model_path` - Path to the Sortformer diarization model file - /// * `chatterbox_model_dir` - Path to the Chatterbox TTS model directory + /// Models are NOT loaded at startup — they're loaded on first use. pub fn new( parakeet_model_dir: &str, parakeet_eou_dir: &str, sortformer_model_path: &str, chatterbox_model_dir: &str, ) -> Self { + Self::new_inner(Some(ModelConfig { + parakeet_model_dir: parakeet_model_dir.to_string(), + parakeet_eou_dir: parakeet_eou_dir.to_string(), + sortformer_model_path: sortformer_model_path.to_string(), + chatterbox_model_dir: chatterbox_model_dir.to_string(), + })) + } + + /// Internal constructor — model_config can be None for the slim build. + fn new_inner(model_config: Option<ModelConfig>) -> Self { // Create broadcast channels with buffer for 256 messages let (file_updates, _) = broadcast::channel(256); let (task_updates, _) = broadcast::channel(256); @@ -744,12 +760,7 @@ impl AppState { }; Self { - model_config: Some(ModelConfig { - parakeet_model_dir: parakeet_model_dir.to_string(), - parakeet_eou_dir: parakeet_eou_dir.to_string(), - sortformer_model_path: sortformer_model_path.to_string(), - chatterbox_model_dir: chatterbox_model_dir.to_string(), - }), + model_config, ml_models: OnceCell::new(), db_pool: None, file_updates, |
