summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2026-01-27 00:19:16 +0000
committersoryu <soryu@soryu.co>2026-01-27 00:19:16 +0000
commit61e3b3f92adb791fd47d2de3027c46d08c03d901 (patch)
treed818cfcaf0f400d739155ddb729861beb3ccdc25
parentf6b4d06a0158fb7803a2d7a861cf891cb3b202b4 (diff)
downloadsoryu-61e3b3f92adb791fd47d2de3027c46d08c03d901.tar.gz
soryu-61e3b3f92adb791fd47d2de3027c46d08c03d901.zip
[WIP] Heartbeat checkpoint - 2026-01-27 00:19:16 UTC
-rw-r--r--makima/src/db/models.rs28
-rw-r--r--makima/src/db/repository.rs31
-rw-r--r--makima/src/server/handlers/mesh_supervisor.rs291
3 files changed, 346 insertions, 4 deletions
diff --git a/makima/src/db/models.rs b/makima/src/db/models.rs
index 9c2d072..625a572 100644
--- a/makima/src/db/models.rs
+++ b/makima/src/db/models.rs
@@ -441,6 +441,12 @@ pub struct Task {
#[serde(default)]
pub is_supervisor: bool,
+ // Red team flag
+ /// True for red team monitor tasks. Red team tasks monitor work tasks
+ /// and can alert the supervisor about potential issues.
+ #[serde(default)]
+ pub is_red_team: bool,
+
// Daemon/container info
pub daemon_id: Option<Uuid>,
pub container_id: Option<String>,
@@ -627,6 +633,9 @@ pub struct CreateTaskRequest {
/// True for contract supervisor tasks. Only supervisors can spawn new tasks.
#[serde(default)]
pub is_supervisor: bool,
+ /// True for red team monitor tasks that watch work tasks.
+ #[serde(default)]
+ pub is_red_team: bool,
/// Priority (higher = more urgent)
#[serde(default)]
pub priority: i32,
@@ -1331,6 +1340,16 @@ pub struct Contract {
/// allowing users to manually handle code changes via patch files or other means.
#[serde(default)]
pub local_only: bool,
+ /// Whether to spawn a red team task to monitor work tasks.
+ /// When enabled, a parallel task monitors outputs and can alert
+ /// the supervisor about potential issues.
+ #[serde(default)]
+ pub red_team_enabled: bool,
+ /// Optional custom prompt/criteria for the red team to use
+ /// when evaluating task outputs. If not provided, uses default
+ /// quality criteria.
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub red_team_prompt: Option<String>,
pub version: i32,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
@@ -1508,6 +1527,15 @@ pub struct CreateContractRequest {
/// allowing users to manually handle code changes via patch files or other means.
#[serde(default)]
pub local_only: Option<bool>,
+ /// Enable red team monitoring for this contract.
+ /// When enabled, a parallel task monitors work task outputs
+ /// and can alert the supervisor about potential issues.
+ #[serde(default)]
+ pub red_team_enabled: Option<bool>,
+ /// Optional custom criteria for the red team to evaluate.
+ /// Examples: "Focus on security vulnerabilities",
+ /// "Ensure all functions have tests", etc.
+ pub red_team_prompt: Option<String>,
}
/// Request payload for updating a contract
diff --git a/makima/src/db/repository.rs b/makima/src/db/repository.rs
index 6d6642b..0282984 100644
--- a/makima/src/db/repository.rs
+++ b/makima/src/db/repository.rs
@@ -1100,11 +1100,11 @@ pub async fn create_task_for_owner(
r#"
INSERT INTO tasks (
owner_id, contract_id, parent_task_id, depth, name, description, plan, priority,
- is_supervisor, repository_url, base_branch, target_branch, merge_mode,
+ is_supervisor, is_red_team, repository_url, base_branch, target_branch, merge_mode,
target_repo_path, completion_action, continue_from_task_id, copy_files,
branched_from_task_id, conversation_state
)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20)
RETURNING *
"#,
)
@@ -1117,6 +1117,7 @@ pub async fn create_task_for_owner(
.bind(&req.plan)
.bind(req.priority)
.bind(req.is_supervisor)
+ .bind(req.is_red_team)
.bind(&repo_url)
.bind(&base_branch)
.bind(&target_branch)
@@ -1131,6 +1132,24 @@ pub async fn create_task_for_owner(
.await
}
+/// Get the red team task for a contract, if one exists.
+pub async fn get_red_team_task_for_contract(
+ pool: &PgPool,
+ contract_id: Uuid,
+) -> Result<Option<Task>, sqlx::Error> {
+ sqlx::query_as::<_, Task>(
+ r#"
+ SELECT *
+ FROM tasks
+ WHERE contract_id = $1 AND is_red_team = true
+ LIMIT 1
+ "#,
+ )
+ .bind(contract_id)
+ .fetch_optional(pool)
+ .await
+}
+
/// Get a task by ID, scoped to owner.
pub async fn get_task_for_owner(
pool: &PgPool,
@@ -2176,11 +2195,13 @@ pub async fn create_contract_for_owner(
let autonomous_loop = req.autonomous_loop.unwrap_or(false);
let phase_guard = req.phase_guard.unwrap_or(false);
let local_only = req.local_only.unwrap_or(false);
+ let red_team_enabled = req.red_team_enabled.unwrap_or(false);
+ let red_team_prompt = req.red_team_prompt.as_ref();
sqlx::query_as::<_, Contract>(
r#"
- INSERT INTO contracts (owner_id, name, description, contract_type, phase, autonomous_loop, phase_guard, local_only)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
+ INSERT INTO contracts (owner_id, name, description, contract_type, phase, autonomous_loop, phase_guard, local_only, red_team_enabled, red_team_prompt)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
RETURNING *
"#,
)
@@ -2192,6 +2213,8 @@ pub async fn create_contract_for_owner(
.bind(autonomous_loop)
.bind(phase_guard)
.bind(local_only)
+ .bind(red_team_enabled)
+ .bind(red_team_prompt)
.fetch_one(pool)
.await
}
diff --git a/makima/src/server/handlers/mesh_supervisor.rs b/makima/src/server/handlers/mesh_supervisor.rs
index a654a05..bcc8f3a 100644
--- a/makima/src/server/handlers/mesh_supervisor.rs
+++ b/makima/src/server/handlers/mesh_supervisor.rs
@@ -742,6 +742,67 @@ pub async fn spawn_task(
updated_fields: vec!["status".to_string(), "daemon_id".to_string()],
updated_by: "supervisor".to_string(),
});
+
+ // Check if we should spawn a red team task
+ // Conditions:
+ // 1. This is not a supervisor task
+ // 2. This is not already a red team task
+ // 3. Contract has red_team_enabled = true
+ // 4. No red team task exists for this contract yet
+ if !updated_task.is_supervisor && !updated_task.is_red_team && contract.red_team_enabled {
+ if let Some(contract_id) = updated_task.contract_id {
+ // Check if a red team task already exists
+ match repository::get_red_team_task_for_contract(pool, contract_id).await {
+ Ok(None) => {
+ // No red team task exists, spawn one
+ tracing::info!(
+ contract_id = %contract_id,
+ work_task_id = %updated_task.id,
+ "Spawning red team task for contract (first work task started)"
+ );
+ match spawn_red_team_task(
+ pool,
+ &state,
+ contract_id,
+ owner_id,
+ &contract.name,
+ &contract.phase,
+ contract.red_team_prompt.as_deref(),
+ ).await {
+ Ok(red_team_task) => {
+ tracing::info!(
+ contract_id = %contract_id,
+ red_team_task_id = %red_team_task.id,
+ "Red team task spawned successfully"
+ );
+ }
+ Err(e) => {
+ // Log error but don't fail the work task spawn
+ tracing::error!(
+ contract_id = %contract_id,
+ error = %e,
+ "Failed to spawn red team task"
+ );
+ }
+ }
+ }
+ Ok(Some(existing)) => {
+ tracing::debug!(
+ contract_id = %contract_id,
+ red_team_task_id = %existing.id,
+ "Red team task already exists for contract"
+ );
+ }
+ Err(e) => {
+ tracing::error!(
+ contract_id = %contract_id,
+ error = %e,
+ "Error checking for existing red team task"
+ );
+ }
+ }
+ }
+ }
}
break;
}
@@ -2396,3 +2457,233 @@ pub async fn rewind_conversation(
})
.into_response()
}
+
+// =============================================================================
+// Red Team Task Spawning
+// =============================================================================
+
+/// Generate the system prompt/plan for a red team task.
+///
+/// This creates detailed instructions for the red team monitor, including
+/// what to look for, severity levels, and how to report issues.
+pub fn generate_red_team_plan(
+ contract_name: &str,
+ contract_phase: &str,
+ custom_prompt: Option<&str>,
+) -> String {
+ let custom_criteria = if let Some(prompt) = custom_prompt {
+ format!(
+ r#"
+
+## Custom Review Criteria
+
+The contract owner has specified additional review criteria:
+{}
+"#,
+ prompt
+ )
+ } else {
+ String::new()
+ };
+
+ format!(
+ r#"# Red Team Monitor
+
+You are an adversarial quality reviewer for a software development contract. Your role is to monitor work task outputs in real-time and flag potential issues BEFORE they compound into larger problems.
+
+## Your Mission
+
+Monitor all task outputs and verify:
+1. **Plan Adherence**: Are tasks following the implementation plan?
+2. **Code Quality**: Does the code meet repository standards?
+3. **Contract Requirements**: Does the implementation match the specification?
+4. **Best Practices**: Are there obvious anti-patterns or issues?
+
+## Access Available
+
+You have read-only access to:
+- Task outputs (streamed in real-time)
+- Task diffs (code changes)
+- Contract specifications and plan documents
+- Repository configuration files (CONTRIBUTING.md, linting configs, etc.)
+
+## How to Monitor
+
+1. **Subscribe to task outputs**: You'll receive outputs from all work tasks
+2. **Analyze code changes**: Request diffs for completed tasks
+3. **Cross-reference**: Compare outputs against the plan and specifications
+4. **Report issues**: Use `makima red-team notify` when you detect problems
+
+## When to Notify
+
+NOTIFY the supervisor when you observe:
+- **Critical**: Security vulnerabilities, data loss risks, breaking changes
+- **High**: Significant deviations from the plan, major code quality issues
+- **Medium**: Missing tests, suboptimal implementations, minor standard violations
+- **Low**: Style inconsistencies, documentation gaps (use sparingly)
+
+## What NOT to Do
+
+- Do NOT nitpick minor style issues (that's what linters are for)
+- Do NOT block progress for trivial concerns
+- Do NOT write code or make changes yourself
+- Do NOT notify for things that are already in progress and being addressed
+- Do NOT create duplicate notifications for the same issue
+
+## Notification Format
+
+When notifying, always include:
+1. A clear, concise description of the issue
+2. The severity level (critical/high/medium/low)
+3. The related task ID if applicable
+4. The specific file or code location if known
+5. Why this matters (reference to plan, spec, or standards)
+
+## Example Notification
+
+```
+makima red-team notify "Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document" \
+ --severity critical \
+ --task <task_id> \
+ --file "src/auth/user.rs" \
+ --context "Specification section 3.2 requires bcrypt hashing for all passwords"
+```
+{}
+## Contract Context
+
+Contract: {}
+Phase: {}
+
+Focus your monitoring on outputs that relate to the active work tasks. Prioritize issues that could affect the success of the contract or introduce technical debt.
+"#,
+ custom_criteria, contract_name, contract_phase
+ )
+}
+
+/// Spawn a red team task for a contract.
+///
+/// This creates a red team monitor task that will observe work task outputs
+/// and can notify the supervisor about potential issues.
+pub async fn spawn_red_team_task(
+ pool: &sqlx::PgPool,
+ state: &SharedState,
+ contract_id: Uuid,
+ owner_id: Uuid,
+ contract_name: &str,
+ contract_phase: &str,
+ red_team_prompt: Option<&str>,
+) -> Result<Task, String> {
+ // Generate the red team plan/prompt
+ let plan = generate_red_team_plan(contract_name, contract_phase, red_team_prompt);
+
+ // Create task request
+ let create_req = CreateTaskRequest {
+ name: "Red Team Monitor".to_string(),
+ description: Some("Adversarial review task monitoring work task outputs".to_string()),
+ plan,
+ contract_id: Some(contract_id),
+ parent_task_id: None,
+ is_supervisor: false,
+ is_red_team: true,
+ priority: 0,
+ repository_url: None, // Red team doesn't need a repo
+ base_branch: None,
+ target_branch: None,
+ merge_mode: None,
+ target_repo_path: None,
+ completion_action: None,
+ continue_from_task_id: None,
+ copy_files: None,
+ checkpoint_sha: None,
+ branched_from_task_id: None,
+ conversation_history: None,
+ };
+
+ // Create task in DB
+ let task = repository::create_task_for_owner(pool, owner_id, create_req)
+ .await
+ .map_err(|e| format!("Failed to create red team task: {}", e))?;
+
+ tracing::info!(
+ contract_id = %contract_id,
+ red_team_task_id = %task.id,
+ "Created red team task for contract"
+ );
+
+ // Find a daemon to run the red team task
+ for entry in state.daemon_connections.iter() {
+ let daemon = entry.value();
+ if daemon.owner_id == owner_id {
+ // Update task with daemon assignment
+ let update_req = UpdateTaskRequest {
+ status: Some("starting".to_string()),
+ daemon_id: Some(daemon.id),
+ version: Some(task.version),
+ ..Default::default()
+ };
+
+ match repository::update_task_for_owner(pool, task.id, owner_id, update_req).await {
+ Ok(Some(updated_task)) => {
+ // Send spawn command to daemon
+ let cmd = DaemonCommand::SpawnTask {
+ task_id: updated_task.id,
+ task_name: updated_task.name.clone(),
+ plan: updated_task.plan.clone(),
+ repo_url: None, // Red team doesn't need a repo
+ base_branch: None,
+ target_branch: None,
+ parent_task_id: None,
+ depth: 0,
+ is_orchestrator: false,
+ target_repo_path: None,
+ completion_action: None,
+ continue_from_task_id: None,
+ copy_files: None,
+ contract_id: Some(contract_id),
+ is_supervisor: false,
+ autonomous_loop: false,
+ resume_session: false,
+ conversation_history: None,
+ patch_data: None,
+ patch_base_sha: None,
+ local_only: true, // Red team is always local-only
+ };
+
+ if let Err(e) = state.send_daemon_command(daemon.id, cmd).await {
+ tracing::warn!(
+ error = %e,
+ daemon_id = %daemon.id,
+ red_team_task_id = %task.id,
+ "Failed to send red team spawn command"
+ );
+ // Rollback
+ let rollback_req = UpdateTaskRequest {
+ status: Some("pending".to_string()),
+ clear_daemon_id: true,
+ ..Default::default()
+ };
+ let _ = repository::update_task_for_owner(pool, task.id, owner_id, rollback_req).await;
+ } else {
+ tracing::info!(
+ red_team_task_id = %task.id,
+ daemon_id = %daemon.id,
+ "Red team task spawn command sent"
+ );
+ return Ok(updated_task);
+ }
+ }
+ Ok(None) => {
+ tracing::warn!(red_team_task_id = %task.id, "Red team task not found when updating daemon_id");
+ }
+ Err(e) => {
+ tracing::error!(red_team_task_id = %task.id, error = %e, "Failed to update red team task with daemon_id");
+ }
+ }
+ break;
+ }
+ }
+
+ // Return the task even if we couldn't start it on a daemon
+ // It will remain pending and can be started later
+ Ok(task)
+}