diff options
| author | soryu <soryu@soryu.co> | 2026-01-27 00:19:16 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2026-01-27 00:19:16 +0000 |
| commit | 61e3b3f92adb791fd47d2de3027c46d08c03d901 (patch) | |
| tree | d818cfcaf0f400d739155ddb729861beb3ccdc25 | |
| parent | f6b4d06a0158fb7803a2d7a861cf891cb3b202b4 (diff) | |
| download | soryu-61e3b3f92adb791fd47d2de3027c46d08c03d901.tar.gz soryu-61e3b3f92adb791fd47d2de3027c46d08c03d901.zip | |
[WIP] Heartbeat checkpoint - 2026-01-27 00:19:16 UTC
| -rw-r--r-- | makima/src/db/models.rs | 28 | ||||
| -rw-r--r-- | makima/src/db/repository.rs | 31 | ||||
| -rw-r--r-- | makima/src/server/handlers/mesh_supervisor.rs | 291 |
3 files changed, 346 insertions, 4 deletions
diff --git a/makima/src/db/models.rs b/makima/src/db/models.rs index 9c2d072..625a572 100644 --- a/makima/src/db/models.rs +++ b/makima/src/db/models.rs @@ -441,6 +441,12 @@ pub struct Task { #[serde(default)] pub is_supervisor: bool, + // Red team flag + /// True for red team monitor tasks. Red team tasks monitor work tasks + /// and can alert the supervisor about potential issues. + #[serde(default)] + pub is_red_team: bool, + // Daemon/container info pub daemon_id: Option<Uuid>, pub container_id: Option<String>, @@ -627,6 +633,9 @@ pub struct CreateTaskRequest { /// True for contract supervisor tasks. Only supervisors can spawn new tasks. #[serde(default)] pub is_supervisor: bool, + /// True for red team monitor tasks that watch work tasks. + #[serde(default)] + pub is_red_team: bool, /// Priority (higher = more urgent) #[serde(default)] pub priority: i32, @@ -1331,6 +1340,16 @@ pub struct Contract { /// allowing users to manually handle code changes via patch files or other means. #[serde(default)] pub local_only: bool, + /// Whether to spawn a red team task to monitor work tasks. + /// When enabled, a parallel task monitors outputs and can alert + /// the supervisor about potential issues. + #[serde(default)] + pub red_team_enabled: bool, + /// Optional custom prompt/criteria for the red team to use + /// when evaluating task outputs. If not provided, uses default + /// quality criteria. + #[serde(skip_serializing_if = "Option::is_none")] + pub red_team_prompt: Option<String>, pub version: i32, pub created_at: DateTime<Utc>, pub updated_at: DateTime<Utc>, @@ -1508,6 +1527,15 @@ pub struct CreateContractRequest { /// allowing users to manually handle code changes via patch files or other means. #[serde(default)] pub local_only: Option<bool>, + /// Enable red team monitoring for this contract. + /// When enabled, a parallel task monitors work task outputs + /// and can alert the supervisor about potential issues. + #[serde(default)] + pub red_team_enabled: Option<bool>, + /// Optional custom criteria for the red team to evaluate. + /// Examples: "Focus on security vulnerabilities", + /// "Ensure all functions have tests", etc. + pub red_team_prompt: Option<String>, } /// Request payload for updating a contract diff --git a/makima/src/db/repository.rs b/makima/src/db/repository.rs index 6d6642b..0282984 100644 --- a/makima/src/db/repository.rs +++ b/makima/src/db/repository.rs @@ -1100,11 +1100,11 @@ pub async fn create_task_for_owner( r#" INSERT INTO tasks ( owner_id, contract_id, parent_task_id, depth, name, description, plan, priority, - is_supervisor, repository_url, base_branch, target_branch, merge_mode, + is_supervisor, is_red_team, repository_url, base_branch, target_branch, merge_mode, target_repo_path, completion_action, continue_from_task_id, copy_files, branched_from_task_id, conversation_state ) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20) RETURNING * "#, ) @@ -1117,6 +1117,7 @@ pub async fn create_task_for_owner( .bind(&req.plan) .bind(req.priority) .bind(req.is_supervisor) + .bind(req.is_red_team) .bind(&repo_url) .bind(&base_branch) .bind(&target_branch) @@ -1131,6 +1132,24 @@ pub async fn create_task_for_owner( .await } +/// Get the red team task for a contract, if one exists. +pub async fn get_red_team_task_for_contract( + pool: &PgPool, + contract_id: Uuid, +) -> Result<Option<Task>, sqlx::Error> { + sqlx::query_as::<_, Task>( + r#" + SELECT * + FROM tasks + WHERE contract_id = $1 AND is_red_team = true + LIMIT 1 + "#, + ) + .bind(contract_id) + .fetch_optional(pool) + .await +} + /// Get a task by ID, scoped to owner. pub async fn get_task_for_owner( pool: &PgPool, @@ -2176,11 +2195,13 @@ pub async fn create_contract_for_owner( let autonomous_loop = req.autonomous_loop.unwrap_or(false); let phase_guard = req.phase_guard.unwrap_or(false); let local_only = req.local_only.unwrap_or(false); + let red_team_enabled = req.red_team_enabled.unwrap_or(false); + let red_team_prompt = req.red_team_prompt.as_ref(); sqlx::query_as::<_, Contract>( r#" - INSERT INTO contracts (owner_id, name, description, contract_type, phase, autonomous_loop, phase_guard, local_only) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + INSERT INTO contracts (owner_id, name, description, contract_type, phase, autonomous_loop, phase_guard, local_only, red_team_enabled, red_team_prompt) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING * "#, ) @@ -2192,6 +2213,8 @@ pub async fn create_contract_for_owner( .bind(autonomous_loop) .bind(phase_guard) .bind(local_only) + .bind(red_team_enabled) + .bind(red_team_prompt) .fetch_one(pool) .await } diff --git a/makima/src/server/handlers/mesh_supervisor.rs b/makima/src/server/handlers/mesh_supervisor.rs index a654a05..bcc8f3a 100644 --- a/makima/src/server/handlers/mesh_supervisor.rs +++ b/makima/src/server/handlers/mesh_supervisor.rs @@ -742,6 +742,67 @@ pub async fn spawn_task( updated_fields: vec!["status".to_string(), "daemon_id".to_string()], updated_by: "supervisor".to_string(), }); + + // Check if we should spawn a red team task + // Conditions: + // 1. This is not a supervisor task + // 2. This is not already a red team task + // 3. Contract has red_team_enabled = true + // 4. No red team task exists for this contract yet + if !updated_task.is_supervisor && !updated_task.is_red_team && contract.red_team_enabled { + if let Some(contract_id) = updated_task.contract_id { + // Check if a red team task already exists + match repository::get_red_team_task_for_contract(pool, contract_id).await { + Ok(None) => { + // No red team task exists, spawn one + tracing::info!( + contract_id = %contract_id, + work_task_id = %updated_task.id, + "Spawning red team task for contract (first work task started)" + ); + match spawn_red_team_task( + pool, + &state, + contract_id, + owner_id, + &contract.name, + &contract.phase, + contract.red_team_prompt.as_deref(), + ).await { + Ok(red_team_task) => { + tracing::info!( + contract_id = %contract_id, + red_team_task_id = %red_team_task.id, + "Red team task spawned successfully" + ); + } + Err(e) => { + // Log error but don't fail the work task spawn + tracing::error!( + contract_id = %contract_id, + error = %e, + "Failed to spawn red team task" + ); + } + } + } + Ok(Some(existing)) => { + tracing::debug!( + contract_id = %contract_id, + red_team_task_id = %existing.id, + "Red team task already exists for contract" + ); + } + Err(e) => { + tracing::error!( + contract_id = %contract_id, + error = %e, + "Error checking for existing red team task" + ); + } + } + } + } } break; } @@ -2396,3 +2457,233 @@ pub async fn rewind_conversation( }) .into_response() } + +// ============================================================================= +// Red Team Task Spawning +// ============================================================================= + +/// Generate the system prompt/plan for a red team task. +/// +/// This creates detailed instructions for the red team monitor, including +/// what to look for, severity levels, and how to report issues. +pub fn generate_red_team_plan( + contract_name: &str, + contract_phase: &str, + custom_prompt: Option<&str>, +) -> String { + let custom_criteria = if let Some(prompt) = custom_prompt { + format!( + r#" + +## Custom Review Criteria + +The contract owner has specified additional review criteria: +{} +"#, + prompt + ) + } else { + String::new() + }; + + format!( + r#"# Red Team Monitor + +You are an adversarial quality reviewer for a software development contract. Your role is to monitor work task outputs in real-time and flag potential issues BEFORE they compound into larger problems. + +## Your Mission + +Monitor all task outputs and verify: +1. **Plan Adherence**: Are tasks following the implementation plan? +2. **Code Quality**: Does the code meet repository standards? +3. **Contract Requirements**: Does the implementation match the specification? +4. **Best Practices**: Are there obvious anti-patterns or issues? + +## Access Available + +You have read-only access to: +- Task outputs (streamed in real-time) +- Task diffs (code changes) +- Contract specifications and plan documents +- Repository configuration files (CONTRIBUTING.md, linting configs, etc.) + +## How to Monitor + +1. **Subscribe to task outputs**: You'll receive outputs from all work tasks +2. **Analyze code changes**: Request diffs for completed tasks +3. **Cross-reference**: Compare outputs against the plan and specifications +4. **Report issues**: Use `makima red-team notify` when you detect problems + +## When to Notify + +NOTIFY the supervisor when you observe: +- **Critical**: Security vulnerabilities, data loss risks, breaking changes +- **High**: Significant deviations from the plan, major code quality issues +- **Medium**: Missing tests, suboptimal implementations, minor standard violations +- **Low**: Style inconsistencies, documentation gaps (use sparingly) + +## What NOT to Do + +- Do NOT nitpick minor style issues (that's what linters are for) +- Do NOT block progress for trivial concerns +- Do NOT write code or make changes yourself +- Do NOT notify for things that are already in progress and being addressed +- Do NOT create duplicate notifications for the same issue + +## Notification Format + +When notifying, always include: +1. A clear, concise description of the issue +2. The severity level (critical/high/medium/low) +3. The related task ID if applicable +4. The specific file or code location if known +5. Why this matters (reference to plan, spec, or standards) + +## Example Notification + +``` +makima red-team notify "Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document" \ + --severity critical \ + --task <task_id> \ + --file "src/auth/user.rs" \ + --context "Specification section 3.2 requires bcrypt hashing for all passwords" +``` +{} +## Contract Context + +Contract: {} +Phase: {} + +Focus your monitoring on outputs that relate to the active work tasks. Prioritize issues that could affect the success of the contract or introduce technical debt. +"#, + custom_criteria, contract_name, contract_phase + ) +} + +/// Spawn a red team task for a contract. +/// +/// This creates a red team monitor task that will observe work task outputs +/// and can notify the supervisor about potential issues. +pub async fn spawn_red_team_task( + pool: &sqlx::PgPool, + state: &SharedState, + contract_id: Uuid, + owner_id: Uuid, + contract_name: &str, + contract_phase: &str, + red_team_prompt: Option<&str>, +) -> Result<Task, String> { + // Generate the red team plan/prompt + let plan = generate_red_team_plan(contract_name, contract_phase, red_team_prompt); + + // Create task request + let create_req = CreateTaskRequest { + name: "Red Team Monitor".to_string(), + description: Some("Adversarial review task monitoring work task outputs".to_string()), + plan, + contract_id: Some(contract_id), + parent_task_id: None, + is_supervisor: false, + is_red_team: true, + priority: 0, + repository_url: None, // Red team doesn't need a repo + base_branch: None, + target_branch: None, + merge_mode: None, + target_repo_path: None, + completion_action: None, + continue_from_task_id: None, + copy_files: None, + checkpoint_sha: None, + branched_from_task_id: None, + conversation_history: None, + }; + + // Create task in DB + let task = repository::create_task_for_owner(pool, owner_id, create_req) + .await + .map_err(|e| format!("Failed to create red team task: {}", e))?; + + tracing::info!( + contract_id = %contract_id, + red_team_task_id = %task.id, + "Created red team task for contract" + ); + + // Find a daemon to run the red team task + for entry in state.daemon_connections.iter() { + let daemon = entry.value(); + if daemon.owner_id == owner_id { + // Update task with daemon assignment + let update_req = UpdateTaskRequest { + status: Some("starting".to_string()), + daemon_id: Some(daemon.id), + version: Some(task.version), + ..Default::default() + }; + + match repository::update_task_for_owner(pool, task.id, owner_id, update_req).await { + Ok(Some(updated_task)) => { + // Send spawn command to daemon + let cmd = DaemonCommand::SpawnTask { + task_id: updated_task.id, + task_name: updated_task.name.clone(), + plan: updated_task.plan.clone(), + repo_url: None, // Red team doesn't need a repo + base_branch: None, + target_branch: None, + parent_task_id: None, + depth: 0, + is_orchestrator: false, + target_repo_path: None, + completion_action: None, + continue_from_task_id: None, + copy_files: None, + contract_id: Some(contract_id), + is_supervisor: false, + autonomous_loop: false, + resume_session: false, + conversation_history: None, + patch_data: None, + patch_base_sha: None, + local_only: true, // Red team is always local-only + }; + + if let Err(e) = state.send_daemon_command(daemon.id, cmd).await { + tracing::warn!( + error = %e, + daemon_id = %daemon.id, + red_team_task_id = %task.id, + "Failed to send red team spawn command" + ); + // Rollback + let rollback_req = UpdateTaskRequest { + status: Some("pending".to_string()), + clear_daemon_id: true, + ..Default::default() + }; + let _ = repository::update_task_for_owner(pool, task.id, owner_id, rollback_req).await; + } else { + tracing::info!( + red_team_task_id = %task.id, + daemon_id = %daemon.id, + "Red team task spawn command sent" + ); + return Ok(updated_task); + } + } + Ok(None) => { + tracing::warn!(red_team_task_id = %task.id, "Red team task not found when updating daemon_id"); + } + Err(e) => { + tracing::error!(red_team_task_id = %task.id, error = %e, "Failed to update red team task with daemon_id"); + } + } + break; + } + } + + // Return the task even if we couldn't start it on a daemon + // It will remain pending and can be started later + Ok(task) +} |
