From 3efdab36ca61a6795454668881d5b925abe22bd3 Mon Sep 17 00:00:00 2001 From: soryu Date: Thu, 15 Jan 2026 11:57:43 +0000 Subject: Fixup: Add cleanup and isolation features to makima Add comprehensive CLI documentation - Create makima/docs/CLI.md with complete command reference for: - makima server: HTTP/WebSocket server options - makima daemon: Worker daemon configuration - makima supervisor: Contract orchestration commands - makima contract: Task-contract interaction commands - Include configuration file examples and environment variables - Add usage workflows for common scenarios - Update makima/README.md with CLI overview and link to docs Add GitHub Actions release workflow for v0.1.0 Creates automated release workflow that: - Triggers on v* tag pushes - Builds binaries for Linux x86_64, macOS x86_64, and macOS ARM64 - Uses Rust nightly toolchain (required for edition 2024) - Packages binaries as .tar.gz archives - Creates GitHub release with installation instructions fix(ci): update macOS runner for x86_64 builds Replace deprecated macos-13 runner with macos-15-intel for x86_64-apple-darwin target. The macos-13 runner has been retired by GitHub Actions. Co-Authored-By: Claude Opus 4.5 Add dismissing notifications and fix CLI task ID arg Add worktree cleanup when contracts complete or are deleted (#21) - Add CleanupWorktree daemon command variant - Handle CleanupWorktree in daemon task manager - Add cleanup_contract_worktrees helper function - Trigger cleanup when contract status becomes 'completed' - Trigger cleanup before contract deletion Add Autonomous Loop Mode for persistent task completion (#20) Implements the "Autonomous Loop Mode" feature inspired by Ralph for Claude Code. This enables tasks to automatically restart and continue working until they explicitly signal completion via a COMPLETION_GATE block. Key features: - Exit confirmation via COMPLETION_GATE: Tasks must output a block with `ready: true` to signal completion. Without this, the task auto-restarts using `claude --continue` to resume the conversation. - Circuit breaker: Prevents infinite loops by detecting: * Maximum iteration limit (default: 10) * No progress for N consecutive iterations (default: 3) * Same error repeated N times (default: 5) - spawn_continue: New ProcessManager method to spawn Claude with the `--continue` flag, resuming from the previous session state. Toggle: Enable via `autonomous_loop` flag on contracts. When set, all tasks spawned for that contract will run in autonomous loop mode. Files changed: - completion_gate.rs: COMPLETION_GATE parser and CircuitBreaker logic - claude.rs: spawn_continue() for --continue mode spawning - manager.rs: Autonomous loop iteration logic in run_task() - protocol.rs: autonomousLoop field in DaemonCommand::SpawnTask - models.rs/repository.rs: autonomous_loop column on contracts/tasks - Migration: Adds autonomous_loop columns to contracts and tasks tables Add get-task and output commands to supervisor CLI (#24) Add two new supervisor subcommands: - `makima supervisor task ` - Get individual task details - `makima supervisor output ` - Get task output/claude log This allows supervisors to fetch task details and claude output directly from the CLI instead of using curl to call the task API. Add optional bubblewrap sandboxing for Claude processes (#23) Add --bubblewrap flag and process.bubblewrap config section to enable running Claude Code in a bubblewrap sandbox for process isolation. When enabled, claude processes run with filesystem restrictions: - Root filesystem mounted read-only - Working directory (worktree) mounted read-write - Fresh /dev, /proc, /tmp - Network access preserved for API calls Co-authored-by: Claude Opus 4.5 --- makima/src/daemon/task/completion_gate.rs | 402 ++++++++++++++++++++++++++++++ 1 file changed, 402 insertions(+) create mode 100644 makima/src/daemon/task/completion_gate.rs (limited to 'makima/src/daemon/task/completion_gate.rs') diff --git a/makima/src/daemon/task/completion_gate.rs b/makima/src/daemon/task/completion_gate.rs new file mode 100644 index 0000000..69b7c6a --- /dev/null +++ b/makima/src/daemon/task/completion_gate.rs @@ -0,0 +1,402 @@ +//! Completion gate parsing for autonomous loop mode. +//! +//! This module parses COMPLETION_GATE blocks from Claude's output to determine +//! if the task is truly complete. The format is inspired by Ralph's autonomous +//! development framework. +//! +//! Format: +//! ``` +//! +//! ready: true|false +//! reason: "explanation of completion status" +//! progress: "summary of what was accomplished" +//! blockers: ["list", "of", "blockers"] (optional, only when ready: false) +//! +//! ``` + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +/// Represents a parsed COMPLETION_GATE block from Claude's output. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct CompletionGate { + /// Whether the task is ready to complete. + pub ready: bool, + /// Explanation of the completion status. + pub reason: Option, + /// Summary of what was accomplished. + pub progress: Option, + /// List of blockers if not ready. + pub blockers: Option>, + /// Any additional fields that were parsed. + #[serde(flatten)] + pub extra: HashMap, +} + +impl CompletionGate { + /// Parse a COMPLETION_GATE block from text output. + /// + /// Returns None if no valid COMPLETION_GATE is found. + pub fn parse(text: &str) -> Option { + // Find the COMPLETION_GATE block + let start_tag = ""; + let end_tag = ""; + + let start_idx = text.find(start_tag)?; + let end_idx = text.find(end_tag)?; + + if end_idx <= start_idx { + return None; + } + + let content = &text[start_idx + start_tag.len()..end_idx]; + let content = content.trim(); + + // Try to parse as JSON first + if content.starts_with('{') { + if let Ok(gate) = serde_json::from_str::(content) { + return Some(gate); + } + } + + // Fall back to YAML-like parsing + Self::parse_yaml_like(content) + } + + /// Parse a YAML-like format (key: value lines). + fn parse_yaml_like(content: &str) -> Option { + let mut gate = CompletionGate::default(); + + for line in content.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + if let Some((key, value)) = line.split_once(':') { + let key = key.trim().to_lowercase(); + let value = value.trim(); + + match key.as_str() { + "ready" => { + gate.ready = value.to_lowercase() == "true" + || value == "yes" + || value == "1"; + } + "reason" => { + gate.reason = Some(Self::unquote(value)); + } + "progress" => { + gate.progress = Some(Self::unquote(value)); + } + "blockers" => { + // Try to parse as JSON array + if let Ok(blockers) = serde_json::from_str::>(value) { + gate.blockers = Some(blockers); + } else { + // Single blocker as string + gate.blockers = Some(vec![Self::unquote(value)]); + } + } + _ => { + // Store unknown fields + if let Ok(json_val) = serde_json::from_str(value) { + gate.extra.insert(key, json_val); + } else { + gate.extra.insert( + key, + serde_json::Value::String(Self::unquote(value)), + ); + } + } + } + } + } + + Some(gate) + } + + /// Remove surrounding quotes from a string value. + fn unquote(s: &str) -> String { + let s = s.trim(); + if (s.starts_with('"') && s.ends_with('"')) + || (s.starts_with('\'') && s.ends_with('\'')) + { + s[1..s.len() - 1].to_string() + } else { + s.to_string() + } + } + + /// Find all COMPLETION_GATE blocks in the output and return the last one. + /// + /// This is useful when Claude produces multiple completion gates during + /// a long-running task, and we want to use the final status. + pub fn parse_last(text: &str) -> Option { + let end_tag = ""; + let mut last_gate = None; + let mut search_start = 0; + + while let Some(end_idx) = text[search_start..].find(end_tag) { + let absolute_end = search_start + end_idx + end_tag.len(); + if let Some(gate) = Self::parse(&text[..absolute_end]) { + last_gate = Some(gate); + } + search_start = absolute_end; + } + + last_gate + } +} + +/// State tracking for the circuit breaker in autonomous loop mode. +#[derive(Debug, Clone, Default)] +pub struct CircuitBreaker { + /// Number of consecutive runs without file changes. + pub runs_without_changes: u32, + /// Threshold for opening circuit due to no changes (default: 3). + pub no_change_threshold: u32, + /// Number of consecutive runs with the same error. + pub same_error_count: u32, + /// Threshold for opening circuit due to same error (default: 5). + pub same_error_threshold: u32, + /// Last error message seen. + pub last_error: Option, + /// Total number of loop iterations. + pub iteration_count: u32, + /// Maximum allowed iterations (default: 10). + pub max_iterations: u32, + /// Whether the circuit is open (task should stop). + pub is_open: bool, + /// Reason why circuit was opened. + pub open_reason: Option, +} + +impl CircuitBreaker { + /// Create a new circuit breaker with default thresholds. + pub fn new() -> Self { + Self { + no_change_threshold: 3, + same_error_threshold: 5, + max_iterations: 10, + ..Default::default() + } + } + + /// Create with custom thresholds. + pub fn with_thresholds(no_change: u32, same_error: u32, max_iterations: u32) -> Self { + Self { + no_change_threshold: no_change, + same_error_threshold: same_error, + max_iterations, + ..Default::default() + } + } + + /// Record a new iteration. Returns true if circuit should remain closed. + pub fn record_iteration(&mut self, had_changes: bool, error: Option<&str>) -> bool { + self.iteration_count += 1; + + // Check max iterations + if self.iteration_count >= self.max_iterations { + self.is_open = true; + self.open_reason = Some(format!( + "Maximum iterations ({}) reached", + self.max_iterations + )); + return false; + } + + // Track file changes + if had_changes { + self.runs_without_changes = 0; + } else { + self.runs_without_changes += 1; + if self.runs_without_changes >= self.no_change_threshold { + self.is_open = true; + self.open_reason = Some(format!( + "No file changes for {} consecutive runs", + self.runs_without_changes + )); + return false; + } + } + + // Track errors + match (error, &self.last_error) { + (Some(err), Some(last)) if err == last => { + self.same_error_count += 1; + if self.same_error_count >= self.same_error_threshold { + self.is_open = true; + self.open_reason = Some(format!( + "Same error repeated {} times: {}", + self.same_error_count, err + )); + return false; + } + } + (Some(err), _) => { + self.last_error = Some(err.to_string()); + self.same_error_count = 1; + } + (None, _) => { + self.same_error_count = 0; + self.last_error = None; + } + } + + true // Circuit remains closed + } + + /// Check if the circuit breaker is open. + pub fn should_stop(&self) -> bool { + self.is_open + } + + /// Reset the circuit breaker. + pub fn reset(&mut self) { + *self = Self::with_thresholds( + self.no_change_threshold, + self.same_error_threshold, + self.max_iterations, + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_yaml_format() { + let text = r#" +Some output before + +ready: true +reason: "All tests pass" +progress: "Implemented feature X" + +More output after +"#; + + let gate = CompletionGate::parse(text).unwrap(); + assert!(gate.ready); + assert_eq!(gate.reason.as_deref(), Some("All tests pass")); + assert_eq!(gate.progress.as_deref(), Some("Implemented feature X")); + } + + #[test] + fn test_parse_not_ready() { + let text = r#" + +ready: false +reason: "Tests are failing" +blockers: ["Fix test_foo", "Fix test_bar"] + +"#; + + let gate = CompletionGate::parse(text).unwrap(); + assert!(!gate.ready); + assert_eq!(gate.reason.as_deref(), Some("Tests are failing")); + assert_eq!( + gate.blockers, + Some(vec!["Fix test_foo".to_string(), "Fix test_bar".to_string()]) + ); + } + + #[test] + fn test_parse_json_format() { + let text = r#" + +{ + "ready": true, + "reason": "Done", + "progress": "All good" +} + +"#; + + let gate = CompletionGate::parse(text).unwrap(); + assert!(gate.ready); + assert_eq!(gate.reason.as_deref(), Some("Done")); + } + + #[test] + fn test_parse_last_gate() { + let text = r#" + +ready: false +reason: "Still working" + +Some more work... + +ready: true +reason: "Finally done" + +"#; + + let gate = CompletionGate::parse_last(text).unwrap(); + assert!(gate.ready); + assert_eq!(gate.reason.as_deref(), Some("Finally done")); + } + + #[test] + fn test_no_gate() { + let text = "No completion gate here"; + assert!(CompletionGate::parse(text).is_none()); + } + + #[test] + fn test_circuit_breaker_max_iterations() { + let mut cb = CircuitBreaker::with_thresholds(3, 5, 5); + for _ in 0..4 { + assert!(cb.record_iteration(true, None)); + } + assert!(!cb.record_iteration(true, None)); // 5th iteration should trip + assert!(cb.is_open); + assert!(cb.open_reason.as_ref().unwrap().contains("Maximum iterations")); + } + + #[test] + fn test_circuit_breaker_no_changes() { + let mut cb = CircuitBreaker::with_thresholds(3, 5, 10); + assert!(cb.record_iteration(false, None)); // 1st no change + assert!(cb.record_iteration(false, None)); // 2nd no change + assert!(!cb.record_iteration(false, None)); // 3rd no change - trips + assert!(cb.is_open); + assert!(cb.open_reason.as_ref().unwrap().contains("No file changes")); + } + + #[test] + fn test_circuit_breaker_same_error() { + let mut cb = CircuitBreaker::with_thresholds(10, 3, 10); + let err = "Test failed"; + assert!(cb.record_iteration(true, Some(err))); + assert!(cb.record_iteration(true, Some(err))); + assert!(!cb.record_iteration(true, Some(err))); // 3rd same error - trips + assert!(cb.is_open); + assert!(cb.open_reason.as_ref().unwrap().contains("Same error")); + } + + #[test] + fn test_circuit_breaker_different_errors_ok() { + let mut cb = CircuitBreaker::with_thresholds(10, 3, 10); + assert!(cb.record_iteration(true, Some("error 1"))); + assert!(cb.record_iteration(true, Some("error 2"))); + assert!(cb.record_iteration(true, Some("error 3"))); + // Different errors don't trip the circuit + assert!(!cb.is_open); + } + + #[test] + fn test_circuit_breaker_changes_reset() { + let mut cb = CircuitBreaker::with_thresholds(3, 5, 10); + assert!(cb.record_iteration(false, None)); // 1 no change + assert!(cb.record_iteration(false, None)); // 2 no changes + assert!(cb.record_iteration(true, None)); // has changes - resets + assert!(cb.record_iteration(false, None)); // 1 no change again + assert!(cb.record_iteration(false, None)); // 2 no changes + // Still shouldn't trip because we had a change in between + assert!(!cb.is_open); + } +} -- cgit v1.2.3