From 6cd5b20670d7ecd3d48539ff898e021988f2a503 Mon Sep 17 00:00:00 2001 From: soryu Date: Tue, 27 Jan 2026 01:05:25 +0000 Subject: Add Red Team adversarial review system for contract monitoring (#35) Implements a parallel "red team" task that monitors work task outputs in real-time, verifying implementations stick to contract requirements, repository standards, and the execution plan. Key features: - New `red_team_enabled` and `red_team_prompt` contract configuration - Red team tasks auto-spawn when first work task is created - `makima red-team notify` CLI command for alerting supervisors - POST /api/v1/mesh/red-team/notify and /status endpoints - Alert delivery to supervisor via SendMessage daemon command - Notification audit trail via history_events table Database changes: - Add red_team_enabled/red_team_prompt columns to contracts - Add is_red_team flag to tasks with partial index - Create red_team_notifications table for audit logging Co-authored-by: Claude Opus 4.5 --- makima/src/db/models.rs | 91 ++++++++++++++++++++++++++++++++ makima/src/db/repository.rs | 125 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 203 insertions(+), 13 deletions(-) (limited to 'makima/src/db') diff --git a/makima/src/db/models.rs b/makima/src/db/models.rs index 9c2d072..d5f2814 100644 --- a/makima/src/db/models.rs +++ b/makima/src/db/models.rs @@ -440,6 +440,11 @@ pub struct Task { /// True for contract supervisor tasks. Only supervisors can spawn new tasks. #[serde(default)] pub is_supervisor: bool, + /// Whether this is a red team monitoring task. + /// Red team tasks monitor work task outputs and can notify + /// the supervisor about potential issues. + #[serde(default)] + pub is_red_team: bool, // Daemon/container info pub daemon_id: Option, @@ -570,6 +575,9 @@ pub struct TaskSummary { /// True for contract supervisor tasks #[serde(default)] pub is_supervisor: bool, + /// True for red team tasks that monitor and review other tasks' work + #[serde(default)] + pub is_red_team: bool, /// Whether this task is hidden from the UI (user dismissed it) #[serde(default)] pub hidden: bool, @@ -595,6 +603,7 @@ impl From for TaskSummary { subtask_count: 0, // Would need separate query version: task.version, is_supervisor: task.is_supervisor, + is_red_team: task.is_red_team, hidden: task.hidden, created_at: task.created_at, updated_at: task.updated_at, @@ -627,6 +636,9 @@ pub struct CreateTaskRequest { /// True for contract supervisor tasks. Only supervisors can spawn new tasks. #[serde(default)] pub is_supervisor: bool, + /// True for red team tasks that monitor and review other tasks' work. + #[serde(default)] + pub is_red_team: bool, /// Priority (higher = more urgent) #[serde(default)] pub priority: i32, @@ -1331,6 +1343,15 @@ pub struct Contract { /// allowing users to manually handle code changes via patch files or other means. #[serde(default)] pub local_only: bool, + /// Whether to spawn a red team task to monitor work tasks. + /// When enabled, a parallel task monitors outputs and can alert + /// the supervisor about potential issues. + #[serde(default)] + pub red_team_enabled: bool, + /// Optional custom prompt/criteria for the red team to use + /// when evaluating task outputs. + #[serde(skip_serializing_if = "Option::is_none")] + pub red_team_prompt: Option, pub version: i32, pub created_at: DateTime, pub updated_at: DateTime, @@ -1508,6 +1529,15 @@ pub struct CreateContractRequest { /// allowing users to manually handle code changes via patch files or other means. #[serde(default)] pub local_only: Option, + /// Enable red team monitoring for this contract. + /// When enabled, a parallel task monitors work task outputs + /// and can alert the supervisor about potential issues. + #[serde(default)] + pub red_team_enabled: Option, + /// Optional custom criteria for the red team to evaluate. + /// Examples: "Focus on security vulnerabilities", + /// "Ensure all functions have tests", etc. + pub red_team_prompt: Option, } /// Request payload for updating a contract @@ -2074,3 +2104,64 @@ pub struct CheckpointPatchInfo { pub created_at: DateTime, pub expires_at: DateTime, } + +// ============================================================================ +// Red Team Types +// ============================================================================ + +/// Red Team notification record +#[derive(Debug, Clone, FromRow, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamNotification { + pub id: Uuid, + pub contract_id: Uuid, + pub red_team_task_id: Uuid, + pub related_task_id: Option, + + pub message: String, + pub severity: String, + pub file_path: Option, + pub context: Option, + + pub delivered: bool, + pub delivered_at: Option>, + pub acknowledged: bool, + pub acknowledged_at: Option>, + + pub created_at: DateTime, +} + +/// Severity levels for red team notifications +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum NotificationSeverity { + Low, + Medium, + High, + Critical, +} + +impl std::fmt::Display for NotificationSeverity { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Low => write!(f, "low"), + Self::Medium => write!(f, "medium"), + Self::High => write!(f, "high"), + Self::Critical => write!(f, "critical"), + } + } +} + +impl std::str::FromStr for NotificationSeverity { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "low" => Ok(Self::Low), + "medium" => Ok(Self::Medium), + "high" => Ok(Self::High), + "critical" => Ok(Self::Critical), + _ => Err(format!("Invalid severity: {}", s)), + } + } +} diff --git a/makima/src/db/repository.rs b/makima/src/db/repository.rs index 6d6642b..de1712d 100644 --- a/makima/src/db/repository.rs +++ b/makima/src/db/repository.rs @@ -11,8 +11,8 @@ use super::models::{ ConversationMessage, ConversationSnapshot, CreateContractRequest, CreateFileRequest, CreateTaskRequest, Daemon, DaemonTaskAssignment, DaemonWithCapacity, File, FileSummary, FileVersion, HistoryEvent, HistoryQueryFilters, MeshChatConversation, MeshChatMessageRecord, - SupervisorState, Task, TaskCheckpoint, TaskEvent, TaskSummary, UpdateContractRequest, - UpdateFileRequest, UpdateTaskRequest, + RedTeamNotification, SupervisorState, Task, TaskCheckpoint, TaskEvent, TaskSummary, + UpdateContractRequest, UpdateFileRequest, UpdateTaskRequest, }; /// Repository error types. @@ -689,11 +689,11 @@ pub async fn create_task(pool: &PgPool, req: CreateTaskRequest) -> Result Result Result, sqlx::Error> t.parent_task_id, t.depth, t.name, t.status, t.priority, t.progress_summary, (SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count, - t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at + t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team, + COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at FROM tasks t LEFT JOIN contracts c ON t.contract_id = c.id WHERE t.parent_task_id IS NULL AND COALESCE(t.hidden, false) = false @@ -765,7 +767,8 @@ pub async fn list_subtasks(pool: &PgPool, parent_id: Uuid) -> Result, + file_path: Option<&str>, + context: Option<&str>, +) -> Result { + sqlx::query_as::<_, RedTeamNotification>( + r#" + INSERT INTO red_team_notifications + (contract_id, red_team_task_id, related_task_id, message, severity, file_path, context) + VALUES ($1, $2, $3, $4, $5, $6, $7) + RETURNING * + "#, + ) + .bind(contract_id) + .bind(red_team_task_id) + .bind(related_task_id) + .bind(message) + .bind(severity) + .bind(file_path) + .bind(context) + .fetch_one(pool) + .await + .map_err(RepositoryError::Database) +} + +/// Mark a notification as delivered to the supervisor. +pub async fn mark_notification_delivered( + pool: &PgPool, + notification_id: Uuid, +) -> Result { + sqlx::query_as::<_, RedTeamNotification>( + r#" + UPDATE red_team_notifications + SET delivered = TRUE, delivered_at = NOW() + WHERE id = $1 + RETURNING * + "#, + ) + .bind(notification_id) + .fetch_one(pool) + .await + .map_err(RepositoryError::Database) +} + +/// Get the red team task for a contract (if one exists). +/// Returns the most recently created red team task for the contract. +pub async fn get_red_team_task_for_contract( + pool: &PgPool, + contract_id: Uuid, +) -> Result, RepositoryError> { + sqlx::query_as::<_, Task>( + r#" + SELECT * FROM tasks + WHERE contract_id = $1 AND is_red_team = TRUE + ORDER BY created_at DESC + LIMIT 1 + "#, + ) + .bind(contract_id) + .fetch_optional(pool) + .await + .map_err(RepositoryError::Database) +} + +/// Get the count of notifications for a red team task. +pub async fn get_notification_count_for_task( + pool: &PgPool, + red_team_task_id: Uuid, +) -> Result { + let result: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM red_team_notifications WHERE red_team_task_id = $1", + ) + .bind(red_team_task_id) + .fetch_one(pool) + .await + .map_err(RepositoryError::Database)?; + Ok(result.0) +} -- cgit v1.2.3