summaryrefslogtreecommitdiff
path: root/makima/src/server/handlers/mesh_supervisor.rs
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2026-01-27 01:05:25 +0000
committerGitHub <noreply@github.com>2026-01-27 01:05:25 +0000
commit6cd5b20670d7ecd3d48539ff898e021988f2a503 (patch)
tree0232ecd8411b01db07230f8ea2003cbbcdc070a6 /makima/src/server/handlers/mesh_supervisor.rs
parent64cc98783d067625d633eea1142d114e324f76bb (diff)
downloadsoryu-6cd5b20670d7ecd3d48539ff898e021988f2a503.tar.gz
soryu-6cd5b20670d7ecd3d48539ff898e021988f2a503.zip
Add Red Team adversarial review system for contract monitoring (#35)
Implements a parallel "red team" task that monitors work task outputs in real-time, verifying implementations stick to contract requirements, repository standards, and the execution plan. Key features: - New `red_team_enabled` and `red_team_prompt` contract configuration - Red team tasks auto-spawn when first work task is created - `makima red-team notify` CLI command for alerting supervisors - POST /api/v1/mesh/red-team/notify and /status endpoints - Alert delivery to supervisor via SendMessage daemon command - Notification audit trail via history_events table Database changes: - Add red_team_enabled/red_team_prompt columns to contracts - Add is_red_team flag to tasks with partial index - Create red_team_notifications table for audit logging Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'makima/src/server/handlers/mesh_supervisor.rs')
-rw-r--r--makima/src/server/handlers/mesh_supervisor.rs292
1 files changed, 292 insertions, 0 deletions
diff --git a/makima/src/server/handlers/mesh_supervisor.rs b/makima/src/server/handlers/mesh_supervisor.rs
index a654a05..6d9f8fb 100644
--- a/makima/src/server/handlers/mesh_supervisor.rs
+++ b/makima/src/server/handlers/mesh_supervisor.rs
@@ -610,6 +610,7 @@ pub async fn spawn_task(
contract_id: Some(request.contract_id),
parent_task_id: request.parent_task_id,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: request.checkpoint_sha.clone(),
merge_mode: Some("manual".to_string()),
priority: 0,
@@ -742,6 +743,67 @@ pub async fn spawn_task(
updated_fields: vec!["status".to_string(), "daemon_id".to_string()],
updated_by: "supervisor".to_string(),
});
+
+ // Check if we should spawn a red team task
+ // Conditions:
+ // 1. This is not a supervisor task
+ // 2. This is not already a red team task
+ // 3. Contract has red_team_enabled = true
+ // 4. No red team task exists for this contract yet
+ if !updated_task.is_supervisor && !updated_task.is_red_team && contract.red_team_enabled {
+ if let Some(contract_id) = updated_task.contract_id {
+ // Check if a red team task already exists
+ match repository::get_red_team_task_for_contract(pool, contract_id).await {
+ Ok(None) => {
+ // No red team task exists, spawn one
+ tracing::info!(
+ contract_id = %contract_id,
+ work_task_id = %updated_task.id,
+ "Spawning red team task for contract (first work task started)"
+ );
+ match spawn_red_team_task(
+ pool,
+ &state,
+ contract_id,
+ owner_id,
+ &contract.name,
+ &contract.phase,
+ contract.red_team_prompt.as_deref(),
+ ).await {
+ Ok(red_team_task) => {
+ tracing::info!(
+ contract_id = %contract_id,
+ red_team_task_id = %red_team_task.id,
+ "Red team task spawned successfully"
+ );
+ }
+ Err(e) => {
+ // Log error but don't fail the work task spawn
+ tracing::error!(
+ contract_id = %contract_id,
+ error = %e,
+ "Failed to spawn red team task"
+ );
+ }
+ }
+ }
+ Ok(Some(existing)) => {
+ tracing::debug!(
+ contract_id = %contract_id,
+ red_team_task_id = %existing.id,
+ "Red team task already exists for contract"
+ );
+ }
+ Err(e) => {
+ tracing::error!(
+ contract_id = %contract_id,
+ error = %e,
+ "Error checking for existing red team task"
+ );
+ }
+ }
+ }
+ }
}
break;
}
@@ -2396,3 +2458,233 @@ pub async fn rewind_conversation(
})
.into_response()
}
+
+// =============================================================================
+// Red Team Task Spawning
+// =============================================================================
+
+/// Generate the system prompt/plan for a red team task.
+///
+/// This creates detailed instructions for the red team monitor, including
+/// what to look for, severity levels, and how to report issues.
+pub fn generate_red_team_plan(
+ contract_name: &str,
+ contract_phase: &str,
+ custom_prompt: Option<&str>,
+) -> String {
+ let custom_criteria = if let Some(prompt) = custom_prompt {
+ format!(
+ r#"
+
+## Custom Review Criteria
+
+The contract owner has specified additional review criteria:
+{}
+"#,
+ prompt
+ )
+ } else {
+ String::new()
+ };
+
+ format!(
+ r#"# Red Team Monitor
+
+You are an adversarial quality reviewer for a software development contract. Your role is to monitor work task outputs in real-time and flag potential issues BEFORE they compound into larger problems.
+
+## Your Mission
+
+Monitor all task outputs and verify:
+1. **Plan Adherence**: Are tasks following the implementation plan?
+2. **Code Quality**: Does the code meet repository standards?
+3. **Contract Requirements**: Does the implementation match the specification?
+4. **Best Practices**: Are there obvious anti-patterns or issues?
+
+## Access Available
+
+You have read-only access to:
+- Task outputs (streamed in real-time)
+- Task diffs (code changes)
+- Contract specifications and plan documents
+- Repository configuration files (CONTRIBUTING.md, linting configs, etc.)
+
+## How to Monitor
+
+1. **Subscribe to task outputs**: You'll receive outputs from all work tasks
+2. **Analyze code changes**: Request diffs for completed tasks
+3. **Cross-reference**: Compare outputs against the plan and specifications
+4. **Report issues**: Use `makima red-team notify` when you detect problems
+
+## When to Notify
+
+NOTIFY the supervisor when you observe:
+- **Critical**: Security vulnerabilities, data loss risks, breaking changes
+- **High**: Significant deviations from the plan, major code quality issues
+- **Medium**: Missing tests, suboptimal implementations, minor standard violations
+- **Low**: Style inconsistencies, documentation gaps (use sparingly)
+
+## What NOT to Do
+
+- Do NOT nitpick minor style issues (that's what linters are for)
+- Do NOT block progress for trivial concerns
+- Do NOT write code or make changes yourself
+- Do NOT notify for things that are already in progress and being addressed
+- Do NOT create duplicate notifications for the same issue
+
+## Notification Format
+
+When notifying, always include:
+1. A clear, concise description of the issue
+2. The severity level (critical/high/medium/low)
+3. The related task ID if applicable
+4. The specific file or code location if known
+5. Why this matters (reference to plan, spec, or standards)
+
+## Example Notification
+
+```
+makima red-team notify "Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document" \
+ --severity critical \
+ --task <task_id> \
+ --file "src/auth/user.rs" \
+ --context "Specification section 3.2 requires bcrypt hashing for all passwords"
+```
+{}
+## Contract Context
+
+Contract: {}
+Phase: {}
+
+Focus your monitoring on outputs that relate to the active work tasks. Prioritize issues that could affect the success of the contract or introduce technical debt.
+"#,
+ custom_criteria, contract_name, contract_phase
+ )
+}
+
+/// Spawn a red team task for a contract.
+///
+/// This creates a red team monitor task that will observe work task outputs
+/// and can notify the supervisor about potential issues.
+pub async fn spawn_red_team_task(
+ pool: &sqlx::PgPool,
+ state: &SharedState,
+ contract_id: Uuid,
+ owner_id: Uuid,
+ contract_name: &str,
+ contract_phase: &str,
+ red_team_prompt: Option<&str>,
+) -> Result<Task, String> {
+ // Generate the red team plan/prompt
+ let plan = generate_red_team_plan(contract_name, contract_phase, red_team_prompt);
+
+ // Create task request
+ let create_req = CreateTaskRequest {
+ name: "Red Team Monitor".to_string(),
+ description: Some("Adversarial review task monitoring work task outputs".to_string()),
+ plan,
+ contract_id: Some(contract_id),
+ parent_task_id: None,
+ is_supervisor: false,
+ is_red_team: true,
+ priority: 0,
+ repository_url: None, // Red team doesn't need a repo
+ base_branch: None,
+ target_branch: None,
+ merge_mode: None,
+ target_repo_path: None,
+ completion_action: None,
+ continue_from_task_id: None,
+ copy_files: None,
+ checkpoint_sha: None,
+ branched_from_task_id: None,
+ conversation_history: None,
+ };
+
+ // Create task in DB
+ let task = repository::create_task_for_owner(pool, owner_id, create_req)
+ .await
+ .map_err(|e| format!("Failed to create red team task: {}", e))?;
+
+ tracing::info!(
+ contract_id = %contract_id,
+ red_team_task_id = %task.id,
+ "Created red team task for contract"
+ );
+
+ // Find a daemon to run the red team task
+ for entry in state.daemon_connections.iter() {
+ let daemon = entry.value();
+ if daemon.owner_id == owner_id {
+ // Update task with daemon assignment
+ let update_req = UpdateTaskRequest {
+ status: Some("starting".to_string()),
+ daemon_id: Some(daemon.id),
+ version: Some(task.version),
+ ..Default::default()
+ };
+
+ match repository::update_task_for_owner(pool, task.id, owner_id, update_req).await {
+ Ok(Some(updated_task)) => {
+ // Send spawn command to daemon
+ let cmd = DaemonCommand::SpawnTask {
+ task_id: updated_task.id,
+ task_name: updated_task.name.clone(),
+ plan: updated_task.plan.clone(),
+ repo_url: None, // Red team doesn't need a repo
+ base_branch: None,
+ target_branch: None,
+ parent_task_id: None,
+ depth: 0,
+ is_orchestrator: false,
+ target_repo_path: None,
+ completion_action: None,
+ continue_from_task_id: None,
+ copy_files: None,
+ contract_id: Some(contract_id),
+ is_supervisor: false,
+ autonomous_loop: false,
+ resume_session: false,
+ conversation_history: None,
+ patch_data: None,
+ patch_base_sha: None,
+ local_only: true, // Red team is always local-only
+ };
+
+ if let Err(e) = state.send_daemon_command(daemon.id, cmd).await {
+ tracing::warn!(
+ error = %e,
+ daemon_id = %daemon.id,
+ red_team_task_id = %task.id,
+ "Failed to send red team spawn command"
+ );
+ // Rollback
+ let rollback_req = UpdateTaskRequest {
+ status: Some("pending".to_string()),
+ clear_daemon_id: true,
+ ..Default::default()
+ };
+ let _ = repository::update_task_for_owner(pool, task.id, owner_id, rollback_req).await;
+ } else {
+ tracing::info!(
+ red_team_task_id = %task.id,
+ daemon_id = %daemon.id,
+ "Red team task spawn command sent"
+ );
+ return Ok(updated_task);
+ }
+ }
+ Ok(None) => {
+ tracing::warn!(red_team_task_id = %task.id, "Red team task not found when updating daemon_id");
+ }
+ Err(e) => {
+ tracing::error!(red_team_task_id = %task.id, error = %e, "Failed to update red team task with daemon_id");
+ }
+ }
+ break;
+ }
+ }
+
+ // Return the task even if we couldn't start it on a daemon
+ // It will remain pending and can be started later
+ Ok(task)
+}