From cb2aa9a73163ce392d7c3f1dd81888b039312a67 Mon Sep 17 00:00:00 2001 From: soryu Date: Sat, 24 Jan 2026 00:52:28 +0000 Subject: feat: Add maximum iterations limit for autonomous loop mode Adds configurable iteration limits to prevent runaway autonomous loops and provide predictable behavior, inspired by Ralph's design patterns. Changes: - Add AutonomousLoopConfig to daemon config with: - default_max_iterations: 10 (default for new tasks) - hard_limit: 50 (absolute maximum that cannot be exceeded) - no_change_threshold: 3 (consecutive runs without progress) - same_error_threshold: 5 (consecutive runs with same error) - Add max_iterations and iteration_count fields to Task model - Add iteration_limit_reached status to TaskStatus enum - Pass max_iterations through DaemonCommand::SpawnTask - Apply limits in CircuitBreaker during autonomous loop execution When a task hits the iteration limit: - Task status is set to "iteration_limit_reached" (not "failed") - Clear message is logged about hitting the limit - Task can be resumed with a higher limit if needed Co-Authored-By: Claude Opus 4.5 --- makima/src/daemon/config.rs | 56 +++++++++++++++++++ makima/src/daemon/task/manager.rs | 111 +++++++++++++++++++++++++++++++------- makima/src/daemon/task/state.rs | 8 ++- makima/src/daemon/ws/protocol.rs | 4 ++ 4 files changed, 160 insertions(+), 19 deletions(-) (limited to 'makima/src/daemon') diff --git a/makima/src/daemon/config.rs b/makima/src/daemon/config.rs index b7cb1e8..735a56a 100644 --- a/makima/src/daemon/config.rs +++ b/makima/src/daemon/config.rs @@ -37,6 +37,57 @@ fn default_true() -> bool { true } +/// Autonomous loop configuration for controlling iteration limits. +/// Inspired by Ralph's pattern for preventing runaway autonomous loops. +#[derive(Debug, Clone, Deserialize)] +#[serde(default)] +pub struct AutonomousLoopConfig { + /// Default maximum iterations for autonomous loop mode (default: 10). + /// Tasks will stop after this many iterations if no COMPLETION_GATE with ready: true is found. + #[serde(default = "default_max_iterations", alias = "defaultmaxiterations")] + pub default_max_iterations: u32, + + /// Hard limit on maximum iterations (default: 50). + /// This is an absolute maximum that cannot be exceeded, even if task specifies higher. + #[serde(default = "default_hard_limit", alias = "hardlimit")] + pub hard_limit: u32, + + /// Number of consecutive runs without file changes before stopping (default: 3). + #[serde(default = "default_no_change_threshold", alias = "nochangethreshold")] + pub no_change_threshold: u32, + + /// Number of consecutive runs with same error before stopping (default: 5). + #[serde(default = "default_same_error_threshold", alias = "sameerrorthreshold")] + pub same_error_threshold: u32, +} + +fn default_max_iterations() -> u32 { + 10 +} + +fn default_hard_limit() -> u32 { + 50 +} + +fn default_no_change_threshold() -> u32 { + 3 +} + +fn default_same_error_threshold() -> u32 { + 5 +} + +impl Default for AutonomousLoopConfig { + fn default() -> Self { + Self { + default_max_iterations: default_max_iterations(), + hard_limit: default_hard_limit(), + no_change_threshold: default_no_change_threshold(), + same_error_threshold: default_same_error_threshold(), + } + } +} + /// Root daemon configuration. #[derive(Debug, Clone, Deserialize)] pub struct DaemonConfig { @@ -63,6 +114,10 @@ pub struct DaemonConfig { /// Repositories to auto-clone on startup. #[serde(default)] pub repos: ReposConfig, + + /// Autonomous loop configuration for iteration limits. + #[serde(default)] + pub autonomous_loop: AutonomousLoopConfig, } /// Server connection configuration. @@ -626,6 +681,7 @@ impl DaemonConfig { }, logging: LoggingConfig::default(), repos: ReposConfig::default(), + autonomous_loop: AutonomousLoopConfig::default(), } } } diff --git a/makima/src/daemon/task/manager.rs b/makima/src/daemon/task/manager.rs index 3fdde9b..9020f27 100644 --- a/makima/src/daemon/task/manager.rs +++ b/makima/src/daemon/task/manager.rs @@ -953,6 +953,8 @@ pub struct ManagedTask { pub concurrency_key: Uuid, /// Whether to run in autonomous loop mode. pub autonomous_loop: bool, + /// Maximum iterations for autonomous loop mode (None = use default from config). + pub max_iterations: Option, /// Time task was created. pub created_at: Instant, /// Time task started running. @@ -995,6 +997,8 @@ pub struct TaskConfig { pub heartbeat_commit_interval_secs: u64, /// Checkpoint patch storage configuration. pub checkpoint_patches: CheckpointPatchConfig, + /// Autonomous loop configuration for iteration limits. + pub autonomous_loop_config: crate::daemon::config::AutonomousLoopConfig, } impl Default for TaskConfig { @@ -1014,6 +1018,7 @@ impl Default for TaskConfig { api_key: String::new(), heartbeat_commit_interval_secs: 300, // 5 minutes checkpoint_patches: CheckpointPatchConfig::default(), + autonomous_loop_config: crate::daemon::config::AutonomousLoopConfig::default(), } } } @@ -1650,6 +1655,7 @@ impl TaskManager { conversation_history, patch_data, patch_base_sha, + max_iterations, } => { tracing::info!( task_id = %task_id, @@ -1662,6 +1668,7 @@ impl TaskManager { is_orchestrator = is_orchestrator, is_supervisor = is_supervisor, autonomous_loop = autonomous_loop, + max_iterations = ?max_iterations, resume_session = resume_session, target_repo_path = ?target_repo_path, completion_action = ?completion_action, @@ -1676,7 +1683,7 @@ impl TaskManager { parent_task_id, depth, is_orchestrator, is_supervisor, target_repo_path, completion_action, continue_from_task_id, copy_files, contract_id, autonomous_loop, resume_session, - conversation_history, patch_data, patch_base_sha, + conversation_history, patch_data, patch_base_sha, max_iterations, ).await?; } DaemonCommand::PauseTask { task_id } => { @@ -1776,6 +1783,7 @@ impl TaskManager { None, // conversation_history - not needed for fresh respawn None, // patch_data - not available for respawn None, // patch_base_sha - not available for respawn + None, // max_iterations - not used for supervisors ).await { tracing::error!( task_id = %task_id, @@ -2004,6 +2012,7 @@ impl TaskManager { conversation_history: Option, patch_data: Option, patch_base_sha: Option, + max_iterations: Option, ) -> TaskResult<()> { tracing::info!(task_id = %task_id, is_orchestrator = is_orchestrator, is_supervisor = is_supervisor, depth = depth, patch_available = patch_data.is_some(), "=== SPAWN_TASK START ==="); @@ -2054,6 +2063,7 @@ impl TaskManager { contract_id, concurrency_key, autonomous_loop, + max_iterations, created_at: Instant::now(), started_at: None, completed_at: None, @@ -2080,7 +2090,7 @@ impl TaskManager { task_id, task_name, plan, repo_url, base_branch, target_branch, is_orchestrator, is_supervisor, target_repo_path, completion_action, continue_from_task_id, copy_files, contract_id, autonomous_loop, resume_session, - conversation_history, patch_data, patch_base_sha, + conversation_history, patch_data, patch_base_sha, max_iterations, ).await { tracing::error!(task_id = %task_id, error = %e, "Task execution failed"); inner.mark_failed(task_id, &e.to_string()).await; @@ -2112,6 +2122,7 @@ impl TaskManager { contract_task_counts: self.contract_task_counts.clone(), checkpoint_patches: self.config.checkpoint_patches.clone(), local_db: self.local_db.clone(), + autonomous_loop_config: self.config.autonomous_loop_config.clone(), } } @@ -3433,6 +3444,8 @@ struct TaskManagerInner { checkpoint_patches: CheckpointPatchConfig, /// Local SQLite database for crash recovery. local_db: Arc>, + /// Autonomous loop configuration for iteration limits. + autonomous_loop_config: crate::daemon::config::AutonomousLoopConfig, } impl TaskManagerInner { @@ -3486,6 +3499,7 @@ impl TaskManagerInner { conversation_history: Option, patch_data: Option, patch_base_sha: Option, + max_iterations: Option, ) -> Result<(), DaemonError> { tracing::info!(task_id = %task_id, is_orchestrator = is_orchestrator, is_supervisor = is_supervisor, resume_session = resume_session, has_patch = patch_data.is_some(), "=== RUN_TASK START ==="); @@ -4208,9 +4222,31 @@ impl TaskManagerInner { // For autonomous loop mode: track accumulated output for COMPLETION_GATE detection let mut accumulated_output = String::new(); - let mut circuit_breaker = CircuitBreaker::new(); + + // Calculate effective max iterations: use task-specific value if provided, + // otherwise use daemon config default, but never exceed hard limit + let loop_config = &self.autonomous_loop_config; + let effective_max_iterations = max_iterations + .unwrap_or(loop_config.default_max_iterations) + .min(loop_config.hard_limit); + + tracing::info!( + task_id = %task_id, + task_max_iterations = ?max_iterations, + config_default = loop_config.default_max_iterations, + hard_limit = loop_config.hard_limit, + effective_max_iterations = effective_max_iterations, + "Autonomous loop configuration" + ); + + let mut circuit_breaker = CircuitBreaker::with_thresholds( + loop_config.no_change_threshold, + loop_config.same_error_threshold, + effective_max_iterations, + ); let mut iteration_count = 0u32; let mut final_exit_code: i64 = -1; // Track the final exit code across iterations + let mut iteration_limit_reached = false; // Track if we hit max iterations // Autonomous loop: we may run multiple iterations 'autonomous_loop: loop { @@ -4467,17 +4503,20 @@ impl TaskManagerInner { let error = gate.blockers.as_ref().and_then(|b| b.first()).map(|s| s.as_str()); if !circuit_breaker.record_iteration(had_progress, error) { - // Circuit breaker tripped + // Circuit breaker tripped - check if it was due to max iterations + let reason = circuit_breaker.open_reason.as_deref().unwrap_or("Unknown reason"); + if reason.contains("Maximum iterations") { + iteration_limit_reached = true; + } tracing::warn!( task_id = %task_id, reason = ?circuit_breaker.open_reason, + iteration_limit_reached = iteration_limit_reached, "Circuit breaker tripped, stopping autonomous loop" ); let msg = DaemonMessage::task_output( task_id, - format!("\n[Autonomous Loop] Circuit breaker tripped: {}\n", - circuit_breaker.open_reason.as_deref().unwrap_or("Unknown reason") - ), + format!("\n[Autonomous Loop] Circuit breaker tripped: {}\n", reason), false, ); let _ = self.ws_tx.send(msg).await; @@ -4506,16 +4545,20 @@ impl TaskManagerInner { let had_progress = output_bytes > 0; if !circuit_breaker.record_iteration(had_progress, None) { + // Circuit breaker tripped - check if it was due to max iterations + let reason = circuit_breaker.open_reason.as_deref().unwrap_or("Unknown reason"); + if reason.contains("Maximum iterations") { + iteration_limit_reached = true; + } tracing::warn!( task_id = %task_id, reason = ?circuit_breaker.open_reason, + iteration_limit_reached = iteration_limit_reached, "Circuit breaker tripped (no COMPLETION_GATE), stopping" ); let msg = DaemonMessage::task_output( task_id, - format!("\n[Autonomous Loop] Circuit breaker tripped: {}\n", - circuit_breaker.open_reason.as_deref().unwrap_or("Unknown reason") - ), + format!("\n[Autonomous Loop] Circuit breaker tripped: {}\n", reason), false, ); let _ = self.ws_tx.send(msg).await; @@ -4538,9 +4581,12 @@ impl TaskManagerInner { } } // end 'autonomous_loop - // Update state based on exit code + // Update state based on exit code and iteration limit let success = final_exit_code == 0; - let new_state = if success { + let new_state = if iteration_limit_reached { + // Task hit the max iteration limit - special state that allows resuming + TaskState::IterationLimitReached + } else if success { TaskState::Completed } else { TaskState::Failed @@ -4550,6 +4596,7 @@ impl TaskManagerInner { task_id = %task_id, exit_code = final_exit_code, success = success, + iteration_limit_reached = iteration_limit_reached, new_state = ?new_state, "Claude process exited, updating task state" ); @@ -4559,7 +4606,12 @@ impl TaskManagerInner { if let Some(task) = tasks.get_mut(&task_id) { task.state = new_state; task.completed_at = Some(Instant::now()); - if !success { + if iteration_limit_reached { + task.error = Some(format!( + "Task stopped after {} iterations (max: {}). Can be resumed with higher limit.", + iteration_count, effective_max_iterations + )); + } else if !success { task.error = Some(format!("Process exited with code {}", final_exit_code)); } } @@ -4621,17 +4673,39 @@ impl TaskManagerInner { ); let _ = self.ws_tx.send(msg).await; } else { - let error = if success { + let error = if iteration_limit_reached { + Some(format!( + "Iteration limit reached ({}/{}). Task can be resumed with higher limit.", + iteration_count, effective_max_iterations + )) + } else if success { None } else { Some(format!("Exit code: {}", final_exit_code)) }; - tracing::info!(task_id = %task_id, success = success, "Notifying server of task completion"); - let msg = DaemonMessage::task_complete(task_id, success, error); + // Mark iteration_limit_reached as successful for status purposes (not a failure) + // but send the specific status via send_status_change + let task_success = success || iteration_limit_reached; + tracing::info!( + task_id = %task_id, + success = task_success, + iteration_limit_reached = iteration_limit_reached, + "Notifying server of task completion" + ); + + if iteration_limit_reached { + // Send specific status change for iteration limit + self.send_status_change(task_id, "running", "iteration_limit_reached").await; + } + + // Send task complete message + let msg = DaemonMessage::task_complete(task_id, task_success, error); let _ = self.ws_tx.send(msg).await; - // Remove completed task from local database (no longer needs crash recovery) - self.remove_task_from_local_db(task_id); + // Only remove from local database if fully completed (not paused at limit) + if !iteration_limit_reached { + self.remove_task_from_local_db(task_id); + } } // Note: Worktrees are kept until explicitly deleted (per user preference) @@ -5098,6 +5172,7 @@ impl Clone for TaskManagerInner { contract_task_counts: self.contract_task_counts.clone(), checkpoint_patches: self.checkpoint_patches.clone(), local_db: self.local_db.clone(), + autonomous_loop_config: self.autonomous_loop_config.clone(), } } } diff --git a/makima/src/daemon/task/state.rs b/makima/src/daemon/task/state.rs index 7b59b62..ed7c177 100644 --- a/makima/src/daemon/task/state.rs +++ b/makima/src/daemon/task/state.rs @@ -21,6 +21,9 @@ pub enum TaskState { Failed, /// Task interrupted by user. Interrupted, + /// Task stopped due to reaching maximum iteration limit in autonomous loop mode. + /// Task can be resumed with a higher limit if needed. + IterationLimitReached, } impl TaskState { @@ -44,6 +47,7 @@ impl TaskState { | (Running, Completed) | (Running, Failed) | (Running, Interrupted) + | (Running, IterationLimitReached) // From Paused | (Paused, Running) | (Paused, Interrupted) @@ -59,7 +63,7 @@ impl TaskState { pub fn is_terminal(&self) -> bool { matches!( self, - TaskState::Completed | TaskState::Failed | TaskState::Interrupted + TaskState::Completed | TaskState::Failed | TaskState::Interrupted | TaskState::IterationLimitReached ) } @@ -91,6 +95,7 @@ impl TaskState { TaskState::Completed => "done", TaskState::Failed => "failed", TaskState::Interrupted => "interrupted", + TaskState::IterationLimitReached => "iteration_limit_reached", } } @@ -105,6 +110,7 @@ impl TaskState { "done" | "completed" => Some(TaskState::Completed), "failed" => Some(TaskState::Failed), "interrupted" => Some(TaskState::Interrupted), + "iteration_limit_reached" => Some(TaskState::IterationLimitReached), _ => None, } } diff --git a/makima/src/daemon/ws/protocol.rs b/makima/src/daemon/ws/protocol.rs index 2e7caef..4ea0c5e 100644 --- a/makima/src/daemon/ws/protocol.rs +++ b/makima/src/daemon/ws/protocol.rs @@ -422,6 +422,10 @@ pub enum DaemonCommand { /// Commit SHA to apply the patch on top of. #[serde(rename = "patchBaseSha", default, skip_serializing_if = "Option::is_none")] patch_base_sha: Option, + /// Maximum iterations for autonomous loop mode (None = use daemon default). + /// Task stops with "iteration_limit_reached" status when limit is hit. + #[serde(rename = "maxIterations", default, skip_serializing_if = "Option::is_none")] + max_iterations: Option, }, /// Pause a running task. -- cgit v1.2.3