diff options
Diffstat (limited to 'makima/src/server')
| -rw-r--r-- | makima/src/server/handlers/mesh_supervisor.rs | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/makima/src/server/handlers/mesh_supervisor.rs b/makima/src/server/handlers/mesh_supervisor.rs index a654a05..bcc8f3a 100644 --- a/makima/src/server/handlers/mesh_supervisor.rs +++ b/makima/src/server/handlers/mesh_supervisor.rs @@ -742,6 +742,67 @@ pub async fn spawn_task( updated_fields: vec!["status".to_string(), "daemon_id".to_string()], updated_by: "supervisor".to_string(), }); + + // Check if we should spawn a red team task + // Conditions: + // 1. This is not a supervisor task + // 2. This is not already a red team task + // 3. Contract has red_team_enabled = true + // 4. No red team task exists for this contract yet + if !updated_task.is_supervisor && !updated_task.is_red_team && contract.red_team_enabled { + if let Some(contract_id) = updated_task.contract_id { + // Check if a red team task already exists + match repository::get_red_team_task_for_contract(pool, contract_id).await { + Ok(None) => { + // No red team task exists, spawn one + tracing::info!( + contract_id = %contract_id, + work_task_id = %updated_task.id, + "Spawning red team task for contract (first work task started)" + ); + match spawn_red_team_task( + pool, + &state, + contract_id, + owner_id, + &contract.name, + &contract.phase, + contract.red_team_prompt.as_deref(), + ).await { + Ok(red_team_task) => { + tracing::info!( + contract_id = %contract_id, + red_team_task_id = %red_team_task.id, + "Red team task spawned successfully" + ); + } + Err(e) => { + // Log error but don't fail the work task spawn + tracing::error!( + contract_id = %contract_id, + error = %e, + "Failed to spawn red team task" + ); + } + } + } + Ok(Some(existing)) => { + tracing::debug!( + contract_id = %contract_id, + red_team_task_id = %existing.id, + "Red team task already exists for contract" + ); + } + Err(e) => { + tracing::error!( + contract_id = %contract_id, + error = %e, + "Error checking for existing red team task" + ); + } + } + } + } } break; } @@ -2396,3 +2457,233 @@ pub async fn rewind_conversation( }) .into_response() } + +// ============================================================================= +// Red Team Task Spawning +// ============================================================================= + +/// Generate the system prompt/plan for a red team task. +/// +/// This creates detailed instructions for the red team monitor, including +/// what to look for, severity levels, and how to report issues. +pub fn generate_red_team_plan( + contract_name: &str, + contract_phase: &str, + custom_prompt: Option<&str>, +) -> String { + let custom_criteria = if let Some(prompt) = custom_prompt { + format!( + r#" + +## Custom Review Criteria + +The contract owner has specified additional review criteria: +{} +"#, + prompt + ) + } else { + String::new() + }; + + format!( + r#"# Red Team Monitor + +You are an adversarial quality reviewer for a software development contract. Your role is to monitor work task outputs in real-time and flag potential issues BEFORE they compound into larger problems. + +## Your Mission + +Monitor all task outputs and verify: +1. **Plan Adherence**: Are tasks following the implementation plan? +2. **Code Quality**: Does the code meet repository standards? +3. **Contract Requirements**: Does the implementation match the specification? +4. **Best Practices**: Are there obvious anti-patterns or issues? + +## Access Available + +You have read-only access to: +- Task outputs (streamed in real-time) +- Task diffs (code changes) +- Contract specifications and plan documents +- Repository configuration files (CONTRIBUTING.md, linting configs, etc.) + +## How to Monitor + +1. **Subscribe to task outputs**: You'll receive outputs from all work tasks +2. **Analyze code changes**: Request diffs for completed tasks +3. **Cross-reference**: Compare outputs against the plan and specifications +4. **Report issues**: Use `makima red-team notify` when you detect problems + +## When to Notify + +NOTIFY the supervisor when you observe: +- **Critical**: Security vulnerabilities, data loss risks, breaking changes +- **High**: Significant deviations from the plan, major code quality issues +- **Medium**: Missing tests, suboptimal implementations, minor standard violations +- **Low**: Style inconsistencies, documentation gaps (use sparingly) + +## What NOT to Do + +- Do NOT nitpick minor style issues (that's what linters are for) +- Do NOT block progress for trivial concerns +- Do NOT write code or make changes yourself +- Do NOT notify for things that are already in progress and being addressed +- Do NOT create duplicate notifications for the same issue + +## Notification Format + +When notifying, always include: +1. A clear, concise description of the issue +2. The severity level (critical/high/medium/low) +3. The related task ID if applicable +4. The specific file or code location if known +5. Why this matters (reference to plan, spec, or standards) + +## Example Notification + +``` +makima red-team notify "Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document" \ + --severity critical \ + --task <task_id> \ + --file "src/auth/user.rs" \ + --context "Specification section 3.2 requires bcrypt hashing for all passwords" +``` +{} +## Contract Context + +Contract: {} +Phase: {} + +Focus your monitoring on outputs that relate to the active work tasks. Prioritize issues that could affect the success of the contract or introduce technical debt. +"#, + custom_criteria, contract_name, contract_phase + ) +} + +/// Spawn a red team task for a contract. +/// +/// This creates a red team monitor task that will observe work task outputs +/// and can notify the supervisor about potential issues. +pub async fn spawn_red_team_task( + pool: &sqlx::PgPool, + state: &SharedState, + contract_id: Uuid, + owner_id: Uuid, + contract_name: &str, + contract_phase: &str, + red_team_prompt: Option<&str>, +) -> Result<Task, String> { + // Generate the red team plan/prompt + let plan = generate_red_team_plan(contract_name, contract_phase, red_team_prompt); + + // Create task request + let create_req = CreateTaskRequest { + name: "Red Team Monitor".to_string(), + description: Some("Adversarial review task monitoring work task outputs".to_string()), + plan, + contract_id: Some(contract_id), + parent_task_id: None, + is_supervisor: false, + is_red_team: true, + priority: 0, + repository_url: None, // Red team doesn't need a repo + base_branch: None, + target_branch: None, + merge_mode: None, + target_repo_path: None, + completion_action: None, + continue_from_task_id: None, + copy_files: None, + checkpoint_sha: None, + branched_from_task_id: None, + conversation_history: None, + }; + + // Create task in DB + let task = repository::create_task_for_owner(pool, owner_id, create_req) + .await + .map_err(|e| format!("Failed to create red team task: {}", e))?; + + tracing::info!( + contract_id = %contract_id, + red_team_task_id = %task.id, + "Created red team task for contract" + ); + + // Find a daemon to run the red team task + for entry in state.daemon_connections.iter() { + let daemon = entry.value(); + if daemon.owner_id == owner_id { + // Update task with daemon assignment + let update_req = UpdateTaskRequest { + status: Some("starting".to_string()), + daemon_id: Some(daemon.id), + version: Some(task.version), + ..Default::default() + }; + + match repository::update_task_for_owner(pool, task.id, owner_id, update_req).await { + Ok(Some(updated_task)) => { + // Send spawn command to daemon + let cmd = DaemonCommand::SpawnTask { + task_id: updated_task.id, + task_name: updated_task.name.clone(), + plan: updated_task.plan.clone(), + repo_url: None, // Red team doesn't need a repo + base_branch: None, + target_branch: None, + parent_task_id: None, + depth: 0, + is_orchestrator: false, + target_repo_path: None, + completion_action: None, + continue_from_task_id: None, + copy_files: None, + contract_id: Some(contract_id), + is_supervisor: false, + autonomous_loop: false, + resume_session: false, + conversation_history: None, + patch_data: None, + patch_base_sha: None, + local_only: true, // Red team is always local-only + }; + + if let Err(e) = state.send_daemon_command(daemon.id, cmd).await { + tracing::warn!( + error = %e, + daemon_id = %daemon.id, + red_team_task_id = %task.id, + "Failed to send red team spawn command" + ); + // Rollback + let rollback_req = UpdateTaskRequest { + status: Some("pending".to_string()), + clear_daemon_id: true, + ..Default::default() + }; + let _ = repository::update_task_for_owner(pool, task.id, owner_id, rollback_req).await; + } else { + tracing::info!( + red_team_task_id = %task.id, + daemon_id = %daemon.id, + "Red team task spawn command sent" + ); + return Ok(updated_task); + } + } + Ok(None) => { + tracing::warn!(red_team_task_id = %task.id, "Red team task not found when updating daemon_id"); + } + Err(e) => { + tracing::error!(red_team_task_id = %task.id, error = %e, "Failed to update red team task with daemon_id"); + } + } + break; + } + } + + // Return the task even if we couldn't start it on a daemon + // It will remain pending and can be started later + Ok(task) +} |
