summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2026-01-27 01:05:25 +0000
committerGitHub <noreply@github.com>2026-01-27 01:05:25 +0000
commit6cd5b20670d7ecd3d48539ff898e021988f2a503 (patch)
tree0232ecd8411b01db07230f8ea2003cbbcdc070a6
parent64cc98783d067625d633eea1142d114e324f76bb (diff)
downloadsoryu-6cd5b20670d7ecd3d48539ff898e021988f2a503.tar.gz
soryu-6cd5b20670d7ecd3d48539ff898e021988f2a503.zip
Add Red Team adversarial review system for contract monitoring (#35)
Implements a parallel "red team" task that monitors work task outputs in real-time, verifying implementations stick to contract requirements, repository standards, and the execution plan. Key features: - New `red_team_enabled` and `red_team_prompt` contract configuration - Red team tasks auto-spawn when first work task is created - `makima red-team notify` CLI command for alerting supervisors - POST /api/v1/mesh/red-team/notify and /status endpoints - Alert delivery to supervisor via SendMessage daemon command - Notification audit trail via history_events table Database changes: - Add red_team_enabled/red_team_prompt columns to contracts - Add is_red_team flag to tasks with partial index - Create red_team_notifications table for audit logging Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
-rw-r--r--.makima/specs/red-team-system.md748
-rw-r--r--makima/migrations/20260126010000_add_red_team_to_contracts.sql7
-rw-r--r--makima/migrations/20260126010001_add_red_team_to_tasks.sql9
-rw-r--r--makima/migrations/20260126010002_create_red_team_notifications.sql27
-rw-r--r--makima/src/bin/makima.rs3
-rw-r--r--makima/src/daemon/api/contract.rs6
-rw-r--r--makima/src/db/models.rs91
-rw-r--r--makima/src/db/repository.rs125
-rw-r--r--makima/src/server/handlers/contract_chat.rs6
-rw-r--r--makima/src/server/handlers/contracts.rs1
-rw-r--r--makima/src/server/handlers/mesh.rs4
-rw-r--r--makima/src/server/handlers/mesh_chat.rs1
-rw-r--r--makima/src/server/handlers/mesh_red_team.rs497
-rw-r--r--makima/src/server/handlers/mesh_supervisor.rs292
-rw-r--r--makima/src/server/handlers/mod.rs1
-rw-r--r--makima/src/server/handlers/transcript_analysis.rs4
-rw-r--r--makima/src/server/mod.rs5
17 files changed, 1813 insertions, 14 deletions
diff --git a/.makima/specs/red-team-system.md b/.makima/specs/red-team-system.md
new file mode 100644
index 0000000..31f4b78
--- /dev/null
+++ b/.makima/specs/red-team-system.md
@@ -0,0 +1,748 @@
+# Red Team System Specification
+
+## Overview
+
+The Red Team system is an adversarial review feature for makima contracts that provides real-time quality assurance during task execution. When enabled, a parallel "red team" task instance monitors the output of work tasks, verifying that implementations adhere to the contract requirements, repository standards, and the execution plan.
+
+### Goals
+
+1. **Quality Assurance**: Catch deviations from the plan before they compound
+2. **Standards Compliance**: Ensure code follows repository conventions (CONTRIBUTING.md, linting rules, etc.)
+3. **Contract Adherence**: Verify implementations match the specification and requirements
+4. **Proactive Issue Detection**: Flag potential problems early, not after task completion
+
+### Non-Goals
+
+1. The red team should NOT write code or make commits
+2. The red team should NOT be overly pedantic or block progress for minor style issues
+3. The red team is NOT a replacement for code review - it's an early warning system
+
+---
+
+## 1. Feature Overview
+
+### 1.1 Concept
+
+The Red Team operates as a parallel observer task that:
+- Monitors all work task outputs in real-time via the broadcast system
+- Has read-only access to task diffs and outputs
+- Can access contract specifications, plans, and repository standards
+- Can notify the supervisor when it detects issues requiring attention
+
+### 1.2 Relationship to Existing Components
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│ Contract │
+│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
+│ │ Supervisor │ │ Work Task 1 │ │ Work Task 2 │ │
+│ │ │<───│ │ │ │ │
+│ │ │<───│ │ │ │ │
+│ └──────────────┘ └──────────────┘ └──────────────┘ │
+│ ^ │ │ │
+│ │ outputs outputs │
+│ │ │ │ │
+│ [NOTIFY] v v │
+│ │ ┌─────────────────────────────┐ │
+│ └────────────│ Red Team Task │ │
+│ │ (Monitoring & Validation) │ │
+│ └─────────────────────────────┘ │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 1.3 Task Type
+
+The Red Team task is a special task variant with the following characteristics:
+- `is_red_team: true` flag on the Task model
+- Has tool key for API access (like supervisor tasks)
+- Does NOT have write permissions to the repository
+- Subscribes to task output broadcasts
+- Can use `makima red-team notify` command to alert supervisor
+
+---
+
+## 2. Contract Configuration
+
+### 2.1 Contract Model Changes
+
+Add the following field to the `Contract` model in `makima/src/db/models.rs`:
+
+```rust
+/// Contract record from the database
+#[derive(Debug, Clone, FromRow, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct Contract {
+ // ... existing fields ...
+
+ /// Whether to spawn a red team task to monitor work tasks.
+ /// When enabled, a parallel task monitors outputs and can alert
+ /// the supervisor about potential issues.
+ #[serde(default)]
+ pub red_team_enabled: bool,
+
+ /// Optional custom prompt/criteria for the red team to use
+ /// when evaluating task outputs. If not provided, uses default
+ /// quality criteria.
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub red_team_prompt: Option<String>,
+}
+```
+
+### 2.2 CreateContractRequest Changes
+
+```rust
+#[derive(Debug, Clone, Deserialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct CreateContractRequest {
+ // ... existing fields ...
+
+ /// Enable red team monitoring for this contract.
+ /// When enabled, a parallel task monitors work task outputs
+ /// and can alert the supervisor about potential issues.
+ #[serde(default)]
+ pub red_team_enabled: Option<bool>,
+
+ /// Optional custom criteria for the red team to evaluate.
+ /// Examples: "Focus on security vulnerabilities",
+ /// "Ensure all functions have tests", etc.
+ pub red_team_prompt: Option<String>,
+}
+```
+
+### 2.3 CLI Flag for Contract Creation
+
+The daemon CLI should support red team enablement during contract creation:
+
+```bash
+# Enable red team with default criteria
+makima supervisor create --red-team "Contract Name" "Description"
+
+# Enable red team with custom review criteria
+makima supervisor create --red-team --red-team-prompt "Focus on performance and memory usage" "Contract Name" "Description"
+```
+
+---
+
+## 3. Red Team Task Lifecycle
+
+### 3.1 Spawning
+
+The red team task is spawned automatically when:
+1. A contract has `red_team_enabled: true`
+2. The first work task is spawned (not the supervisor itself)
+
+**Spawn Logic** (in `spawn_task` handler or supervisor spawn logic):
+
+```rust
+// In spawn_task after creating a work task:
+if contract.red_team_enabled && !is_supervisor_task {
+ // Check if red team task already exists
+ let existing_red_team = repository::get_red_team_task_for_contract(pool, contract_id).await?;
+
+ if existing_red_team.is_none() {
+ // Spawn red team task
+ let red_team_task = spawn_red_team_task(
+ pool,
+ state,
+ contract_id,
+ owner_id,
+ contract.red_team_prompt.as_deref(),
+ ).await?;
+
+ tracing::info!(
+ contract_id = %contract_id,
+ red_team_task_id = %red_team_task.id,
+ "Spawned red team task for contract"
+ );
+ }
+}
+```
+
+### 3.2 Task Properties
+
+When creating the red team task:
+
+```rust
+CreateTaskRequest {
+ name: "Red Team Monitor".to_string(),
+ description: Some("Adversarial review task monitoring work task outputs".to_string()),
+ plan: generate_red_team_plan(contract, custom_prompt),
+ contract_id: Some(contract_id),
+ parent_task_id: None, // Not a child of supervisor
+ is_supervisor: false,
+ is_red_team: true, // NEW FIELD
+ // ... other fields ...
+}
+```
+
+### 3.3 Lifespan
+
+The red team task:
+- Lives for the duration of the **execute phase**
+- Is automatically terminated when:
+ - The contract advances past the execute phase
+ - The contract is completed
+ - The contract is archived
+- Can be paused/resumed along with other contract tasks
+- Does NOT restart automatically after daemon failure (not critical path)
+
+### 3.4 Read-Only Enforcement
+
+The red team task:
+- Has NO worktree of its own (or a read-only clone)
+- Cannot use git operations (commit, branch, etc.)
+- Can only READ files, not write them
+- Has API access limited to read operations
+
+---
+
+## 4. Red Team Notification CLI Command
+
+### 4.1 Command Specification
+
+New CLI command available only to red team tasks:
+
+```bash
+makima red-team notify "<message>"
+```
+
+**Arguments:**
+- `<message>`: A detailed description of the issue detected
+
+**Options:**
+- `--severity <level>`: Issue severity: `low`, `medium`, `high`, `critical` (default: `medium`)
+- `--task <task_id>`: The specific task this relates to (optional)
+- `--file <path>`: The file path where the issue was detected (optional)
+- `--context <text>`: Additional context about the issue (optional)
+
+**Example:**
+
+```bash
+makima red-team notify "Task is adding console.log statements which violates the no-debug-logging rule in CONTRIBUTING.md" \
+ --severity medium \
+ --task 550e8400-e29b-41d4-a716-446655440000 \
+ --file "src/api/handler.rs"
+```
+
+### 4.2 CLI Arguments Structure
+
+```rust
+// In makima/src/daemon/cli/mod.rs
+
+/// Red Team subcommand - red team task commands.
+#[derive(Subcommand, Debug)]
+pub enum RedTeamCommand {
+ /// Send a notification to the supervisor about a detected issue.
+ /// Only available to red team tasks.
+ Notify(NotifyArgs),
+}
+
+/// Arguments for red-team notify command.
+#[derive(Args, Debug)]
+pub struct NotifyArgs {
+ /// API URL
+ #[arg(long, env = "MAKIMA_API_URL", default_value = "https://api.makima.jp")]
+ pub api_url: String,
+
+ /// API key for authentication
+ #[arg(long, env = "MAKIMA_API_KEY")]
+ pub api_key: String,
+
+ /// Current task ID (must be a red team task)
+ #[arg(long, env = "MAKIMA_TASK_ID")]
+ pub task_id: Uuid,
+
+ /// Contract ID
+ #[arg(long, env = "MAKIMA_CONTRACT_ID")]
+ pub contract_id: Uuid,
+
+ /// The notification message
+ #[arg(index = 1)]
+ pub message: String,
+
+ /// Severity level: low, medium, high, critical
+ #[arg(long, default_value = "medium")]
+ pub severity: String,
+
+ /// Related task ID (optional)
+ #[arg(long)]
+ pub task: Option<Uuid>,
+
+ /// Related file path (optional)
+ #[arg(long)]
+ pub file: Option<String>,
+
+ /// Additional context (optional)
+ #[arg(long)]
+ pub context: Option<String>,
+}
+```
+
+### 4.3 API Endpoint
+
+**POST** `/api/v1/mesh/red-team/notify`
+
+**Request Body:**
+```json
+{
+ "message": "Issue description",
+ "severity": "medium",
+ "relatedTaskId": "uuid-optional",
+ "filePath": "src/path/optional.rs",
+ "context": "Additional context optional"
+}
+```
+
+**Response:**
+```json
+{
+ "notificationId": "uuid",
+ "delivered": true,
+ "supervisorTaskId": "uuid"
+}
+```
+
+### 4.4 Notification Delivery
+
+When a red team notification is received:
+
+1. **Validate Caller**: Ensure the request comes from a valid red team task
+2. **Find Supervisor**: Get the supervisor task for the contract
+3. **Format Message**: Create an `[ACTION REQUIRED]` formatted message
+4. **Send to Supervisor**: Inject the message into the supervisor's stdin via `SendMessage` command
+
+**Message Format:**
+
+```
+════════════════════════════════════════════════════════════════
+[RED TEAM ALERT] Severity: MEDIUM
+════════════════════════════════════════════════════════════════
+
+Issue: Task is adding console.log statements which violates the
+no-debug-logging rule in CONTRIBUTING.md
+
+Related Task: 550e8400-e29b-41d4-a716-446655440000
+File: src/api/handler.rs
+
+Context: The CONTRIBUTING.md file explicitly states that debug
+logging should use the tracing crate, not console.log or println!
+
+════════════════════════════════════════════════════════════════
+You can:
+- Pause the related task to investigate
+- Send feedback to the task to correct the issue
+- Acknowledge this alert and continue monitoring
+════════════════════════════════════════════════════════════════
+```
+
+### 4.5 Supervisor Response Handling
+
+The supervisor can respond to red team notifications by:
+1. **Pausing the task**: `makima supervisor pause <task_id>`
+2. **Sending feedback**: `makima supervisor message <task_id> "Please use tracing instead of console.log"`
+3. **Acknowledging**: Simply continue (the red team will keep monitoring)
+4. **Dismissing**: Mark the alert as false positive (future consideration)
+
+---
+
+## 5. Red Team Access Patterns
+
+### 5.1 Task Output Subscription
+
+The red team task subscribes to the `task_outputs` broadcast channel:
+
+```rust
+// In red team task initialization
+let mut task_output_rx = state.task_outputs.subscribe();
+
+loop {
+ match task_output_rx.recv().await {
+ Ok(notification) => {
+ // Only process outputs from work tasks in our contract
+ if notification.contract_id == Some(self.contract_id)
+ && !notification.is_supervisor
+ && !notification.is_red_team {
+ self.analyze_output(notification).await;
+ }
+ }
+ Err(e) => {
+ tracing::warn!("Red team task output subscription error: {}", e);
+ }
+ }
+}
+```
+
+### 5.2 Task Diff Access
+
+The red team can request diffs via the supervisor API:
+
+**GET** `/api/v1/mesh/supervisor/tasks/{task_id}/diff`
+
+This endpoint already exists and can be used by the red team (with tool key auth).
+
+### 5.3 Contract Information Access
+
+The red team can read:
+- Contract plan and specifications (via contract files)
+- Repository standards (CONTRIBUTING.md, .editorconfig, etc.)
+- Task descriptions and plans
+
+**Existing endpoints used:**
+- `GET /api/v1/contracts/{id}` - Contract details
+- `GET /api/v1/contracts/{id}/files` - Contract files
+- `GET /api/v1/files/{id}` - File content
+
+### 5.4 Repository File Access
+
+For repository standards, the red team uses the existing daemon file read capability:
+
+```bash
+# Via makima CLI (from within the red team task)
+makima supervisor read-file <task_id> "CONTRIBUTING.md"
+makima supervisor read-file <task_id> ".editorconfig"
+makima supervisor read-file <task_id> "rustfmt.toml"
+```
+
+Or direct filesystem access if the red team has a read-only worktree clone.
+
+---
+
+## 6. System Prompt for Red Team Task
+
+The red team task receives a specialized system prompt that guides its behavior:
+
+```markdown
+# Red Team Monitor
+
+You are an adversarial quality reviewer for a software development contract. Your role is to monitor work task outputs in real-time and flag potential issues BEFORE they compound into larger problems.
+
+## Your Mission
+
+Monitor all task outputs and verify:
+1. **Plan Adherence**: Are tasks following the implementation plan?
+2. **Code Quality**: Does the code meet repository standards?
+3. **Contract Requirements**: Does the implementation match the specification?
+4. **Best Practices**: Are there obvious anti-patterns or issues?
+
+## Access Available
+
+You have read-only access to:
+- Task outputs (streamed in real-time)
+- Task diffs (code changes)
+- Contract specifications and plan documents
+- Repository configuration files (CONTRIBUTING.md, linting configs, etc.)
+
+## How to Monitor
+
+1. **Subscribe to task outputs**: You'll receive outputs from all work tasks
+2. **Analyze code changes**: Request diffs for completed tasks
+3. **Cross-reference**: Compare outputs against the plan and specifications
+4. **Report issues**: Use `makima red-team notify` when you detect problems
+
+## When to Notify
+
+NOTIFY the supervisor when you observe:
+- **Critical**: Security vulnerabilities, data loss risks, breaking changes
+- **High**: Significant deviations from the plan, major code quality issues
+- **Medium**: Missing tests, suboptimal implementations, minor standard violations
+- **Low**: Style inconsistencies, documentation gaps (use sparingly)
+
+## What NOT to Do
+
+- Do NOT nitpick minor style issues (that's what linters are for)
+- Do NOT block progress for trivial concerns
+- Do NOT write code or make changes yourself
+- Do NOT notify for things that are already in progress and being addressed
+- Do NOT create duplicate notifications for the same issue
+
+## Notification Format
+
+When notifying, always include:
+1. A clear, concise description of the issue
+2. The severity level (critical/high/medium/low)
+3. The related task ID if applicable
+4. The specific file or code location if known
+5. Why this matters (reference to plan, spec, or standards)
+
+## Example Notification
+
+```
+makima red-team notify "Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document" \
+ --severity critical \
+ --task <task_id> \
+ --file "src/auth/user.rs" \
+ --context "Specification section 3.2 requires bcrypt hashing for all passwords"
+```
+
+## Custom Review Criteria
+
+{{#if red_team_prompt}}
+Additional review criteria for this contract:
+{{red_team_prompt}}
+{{/if}}
+
+## Contract Context
+
+Contract: {{contract_name}}
+Phase: {{contract_phase}}
+Repository: {{repository_url}}
+
+Focus your monitoring on outputs that relate to the active work tasks. Prioritize issues that could affect the success of the contract or introduce technical debt.
+```
+
+---
+
+## 7. API Changes Summary
+
+### 7.1 New Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/api/v1/mesh/red-team/notify` | Send notification from red team to supervisor |
+| GET | `/api/v1/mesh/red-team/status` | Get red team task status for a contract |
+
+### 7.2 Modified Endpoints
+
+| Method | Path | Change |
+|--------|------|--------|
+| POST | `/api/v1/contracts` | Add `red_team_enabled` and `red_team_prompt` fields |
+| GET | `/api/v1/contracts/{id}` | Include red team task info in response |
+
+### 7.3 New Request/Response Types
+
+**RedTeamNotifyRequest:**
+```rust
+#[derive(Debug, Deserialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct RedTeamNotifyRequest {
+ pub message: String,
+ #[serde(default = "default_severity")]
+ pub severity: String,
+ pub related_task_id: Option<Uuid>,
+ pub file_path: Option<String>,
+ pub context: Option<String>,
+}
+```
+
+**RedTeamNotifyResponse:**
+```rust
+#[derive(Debug, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct RedTeamNotifyResponse {
+ pub notification_id: Uuid,
+ pub delivered: bool,
+ pub supervisor_task_id: Uuid,
+}
+```
+
+**RedTeamStatusResponse:**
+```rust
+#[derive(Debug, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct RedTeamStatusResponse {
+ pub contract_id: Uuid,
+ pub red_team_task_id: Option<Uuid>,
+ pub status: Option<String>,
+ pub notifications_sent: i32,
+ pub last_activity: Option<DateTime<Utc>>,
+}
+```
+
+---
+
+## 8. Database Schema Changes
+
+### 8.1 Contracts Table
+
+```sql
+ALTER TABLE contracts
+ADD COLUMN red_team_enabled BOOLEAN NOT NULL DEFAULT FALSE,
+ADD COLUMN red_team_prompt TEXT;
+```
+
+### 8.2 Tasks Table
+
+```sql
+ALTER TABLE tasks
+ADD COLUMN is_red_team BOOLEAN NOT NULL DEFAULT FALSE;
+```
+
+### 8.3 Red Team Notifications Table (New)
+
+```sql
+CREATE TABLE red_team_notifications (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ contract_id UUID NOT NULL REFERENCES contracts(id) ON DELETE CASCADE,
+ red_team_task_id UUID NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
+ related_task_id UUID REFERENCES tasks(id) ON DELETE SET NULL,
+
+ message TEXT NOT NULL,
+ severity VARCHAR(20) NOT NULL DEFAULT 'medium',
+ file_path TEXT,
+ context TEXT,
+
+ -- Delivery status
+ delivered BOOLEAN NOT NULL DEFAULT FALSE,
+ delivered_at TIMESTAMP WITH TIME ZONE,
+ acknowledged BOOLEAN NOT NULL DEFAULT FALSE,
+ acknowledged_at TIMESTAMP WITH TIME ZONE,
+
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+);
+
+-- Indexes
+CREATE INDEX idx_red_team_notifications_contract_id ON red_team_notifications(contract_id);
+CREATE INDEX idx_red_team_notifications_red_team_task_id ON red_team_notifications(red_team_task_id);
+CREATE INDEX idx_red_team_notifications_created_at ON red_team_notifications(created_at DESC);
+```
+
+### 8.4 Index for Red Team Task Lookup
+
+```sql
+CREATE INDEX idx_tasks_contract_red_team ON tasks(contract_id, is_red_team)
+WHERE is_red_team = TRUE;
+```
+
+---
+
+## 9. Implementation Phases
+
+### Phase 1: Foundation (MVP)
+- [ ] Add `red_team_enabled` and `red_team_prompt` to Contract model
+- [ ] Add `is_red_team` to Task model
+- [ ] Database migrations
+- [ ] Basic red team task spawning logic
+- [ ] `makima red-team notify` CLI command
+- [ ] Red team notification API endpoint
+
+### Phase 2: Monitoring Infrastructure
+- [ ] Task output subscription for red team
+- [ ] Diff access for red team tasks
+- [ ] Red team system prompt generation
+- [ ] Notification delivery to supervisor
+
+### Phase 3: Polish & UX
+- [ ] Red team status in contract view
+- [ ] Notification history and acknowledgment
+- [ ] TUI integration for red team alerts
+- [ ] Frontend display of red team notifications
+
+### Phase 4: Future Enhancements
+- [ ] Configurable notification thresholds
+- [ ] Automatic pause on critical issues
+- [ ] Red team notification digest/summary
+- [ ] Integration with external code review tools
+
+---
+
+## 10. Security Considerations
+
+### 10.1 Access Control
+
+- Red team tasks MUST only have read access
+- Verify `is_red_team` flag before allowing notification API calls
+- Red team cannot spawn tasks or modify contract state
+- Tool key scope should be limited for red team tasks
+
+### 10.2 Abuse Prevention
+
+- Rate limit red team notifications (max 10 per minute per task)
+- Prevent notification spam with deduplication
+- Log all red team activities for audit
+
+### 10.3 Isolation
+
+- Red team task runs in separate worktree (or no worktree)
+- Cannot affect work task execution directly
+- Supervisor controls whether to act on notifications
+
+---
+
+## 11. Testing Strategy
+
+### 11.1 Unit Tests
+
+- Contract model serialization with red team fields
+- Red team task spawning conditions
+- Notification message formatting
+
+### 11.2 Integration Tests
+
+- Full contract lifecycle with red team enabled
+- Notification delivery to supervisor
+- Red team output subscription
+
+### 11.3 E2E Tests
+
+- Create contract with `--red-team` flag
+- Red team detects intentional violation
+- Supervisor receives and responds to notification
+
+---
+
+## 12. Success Metrics
+
+1. **Detection Rate**: Percentage of issues caught by red team before task completion
+2. **False Positive Rate**: Percentage of notifications that are dismissed as not actionable
+3. **Response Time**: Time between red team detection and supervisor acknowledgment
+4. **Contract Success Rate**: Compare success rates for contracts with/without red team
+
+---
+
+## Appendix A: Message Protocol
+
+### Task Output Notification Structure
+
+The red team subscribes to `TaskOutputNotification`:
+
+```rust
+pub struct TaskOutputNotification {
+ pub task_id: Uuid,
+ pub owner_id: Option<Uuid>,
+ pub message_type: String, // "assistant", "tool_use", "tool_result", etc.
+ pub content: String,
+ pub tool_name: Option<String>,
+ pub tool_input: Option<serde_json::Value>,
+ pub is_error: Option<bool>,
+ pub cost_usd: Option<f64>,
+ pub duration_ms: Option<u64>,
+ pub is_partial: bool,
+}
+```
+
+### Daemon Command for Supervisor Message
+
+```rust
+DaemonCommand::SendMessage {
+ task_id: supervisor_id,
+ message: formatted_red_team_alert,
+}
+```
+
+---
+
+## Appendix B: Configuration Examples
+
+### Contract Creation with Red Team (API)
+
+```json
+POST /api/v1/contracts
+{
+ "name": "Implement User Authentication",
+ "description": "Add OAuth2 authentication flow",
+ "contract_type": "specification",
+ "red_team_enabled": true,
+ "red_team_prompt": "Pay special attention to security best practices and OWASP guidelines. Flag any hardcoded secrets or insecure token handling."
+}
+```
+
+### Contract Creation with Red Team (CLI)
+
+```bash
+makima contract create \
+ --type specification \
+ --red-team \
+ --red-team-prompt "Focus on API backwards compatibility and deprecation handling" \
+ "API v2 Migration" \
+ "Migrate public API from v1 to v2"
+```
diff --git a/makima/migrations/20260126010000_add_red_team_to_contracts.sql b/makima/migrations/20260126010000_add_red_team_to_contracts.sql
new file mode 100644
index 0000000..742902e
--- /dev/null
+++ b/makima/migrations/20260126010000_add_red_team_to_contracts.sql
@@ -0,0 +1,7 @@
+-- Add red team configuration to contracts
+ALTER TABLE contracts
+ADD COLUMN red_team_enabled BOOLEAN NOT NULL DEFAULT FALSE,
+ADD COLUMN red_team_prompt TEXT;
+
+COMMENT ON COLUMN contracts.red_team_enabled IS 'Whether to spawn a red team task to monitor work tasks';
+COMMENT ON COLUMN contracts.red_team_prompt IS 'Custom criteria for the red team to evaluate';
diff --git a/makima/migrations/20260126010001_add_red_team_to_tasks.sql b/makima/migrations/20260126010001_add_red_team_to_tasks.sql
new file mode 100644
index 0000000..cb21405
--- /dev/null
+++ b/makima/migrations/20260126010001_add_red_team_to_tasks.sql
@@ -0,0 +1,9 @@
+-- Add red team flag to tasks
+ALTER TABLE tasks
+ADD COLUMN is_red_team BOOLEAN NOT NULL DEFAULT FALSE;
+
+-- Index for efficient red team task lookup per contract
+CREATE INDEX idx_tasks_contract_red_team ON tasks(contract_id, is_red_team)
+WHERE is_red_team = TRUE;
+
+COMMENT ON COLUMN tasks.is_red_team IS 'Whether this is a red team monitoring task';
diff --git a/makima/migrations/20260126010002_create_red_team_notifications.sql b/makima/migrations/20260126010002_create_red_team_notifications.sql
new file mode 100644
index 0000000..fc0b687
--- /dev/null
+++ b/makima/migrations/20260126010002_create_red_team_notifications.sql
@@ -0,0 +1,27 @@
+-- Create red team notifications table
+CREATE TABLE red_team_notifications (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ contract_id UUID NOT NULL REFERENCES contracts(id) ON DELETE CASCADE,
+ red_team_task_id UUID NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
+ related_task_id UUID REFERENCES tasks(id) ON DELETE SET NULL,
+
+ message TEXT NOT NULL,
+ severity VARCHAR(20) NOT NULL DEFAULT 'medium',
+ file_path TEXT,
+ context TEXT,
+
+ -- Delivery status
+ delivered BOOLEAN NOT NULL DEFAULT FALSE,
+ delivered_at TIMESTAMPTZ,
+ acknowledged BOOLEAN NOT NULL DEFAULT FALSE,
+ acknowledged_at TIMESTAMPTZ,
+
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+-- Indexes
+CREATE INDEX idx_red_team_notifications_contract_id ON red_team_notifications(contract_id);
+CREATE INDEX idx_red_team_notifications_red_team_task_id ON red_team_notifications(red_team_task_id);
+CREATE INDEX idx_red_team_notifications_created_at ON red_team_notifications(created_at DESC);
+
+COMMENT ON TABLE red_team_notifications IS 'Audit log of notifications sent from red team tasks to supervisors';
diff --git a/makima/src/bin/makima.rs b/makima/src/bin/makima.rs
index 6ddecab..6976106 100644
--- a/makima/src/bin/makima.rs
+++ b/makima/src/bin/makima.rs
@@ -1096,6 +1096,9 @@ async fn run_tui_loop(
initial_phase: None,
autonomous_loop: None,
phase_guard: None,
+ local_only: None,
+ red_team_enabled: None,
+ red_team_prompt: None,
};
match client.create_contract(req).await {
diff --git a/makima/src/daemon/api/contract.rs b/makima/src/daemon/api/contract.rs
index 12ebe95..445d676 100644
--- a/makima/src/daemon/api/contract.rs
+++ b/makima/src/daemon/api/contract.rs
@@ -66,6 +66,12 @@ pub struct CreateContractRequest {
pub autonomous_loop: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub phase_guard: Option<bool>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub local_only: Option<bool>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub red_team_enabled: Option<bool>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub red_team_prompt: Option<String>,
}
impl ApiClient {
diff --git a/makima/src/db/models.rs b/makima/src/db/models.rs
index 9c2d072..d5f2814 100644
--- a/makima/src/db/models.rs
+++ b/makima/src/db/models.rs
@@ -440,6 +440,11 @@ pub struct Task {
/// True for contract supervisor tasks. Only supervisors can spawn new tasks.
#[serde(default)]
pub is_supervisor: bool,
+ /// Whether this is a red team monitoring task.
+ /// Red team tasks monitor work task outputs and can notify
+ /// the supervisor about potential issues.
+ #[serde(default)]
+ pub is_red_team: bool,
// Daemon/container info
pub daemon_id: Option<Uuid>,
@@ -570,6 +575,9 @@ pub struct TaskSummary {
/// True for contract supervisor tasks
#[serde(default)]
pub is_supervisor: bool,
+ /// True for red team tasks that monitor and review other tasks' work
+ #[serde(default)]
+ pub is_red_team: bool,
/// Whether this task is hidden from the UI (user dismissed it)
#[serde(default)]
pub hidden: bool,
@@ -595,6 +603,7 @@ impl From<Task> for TaskSummary {
subtask_count: 0, // Would need separate query
version: task.version,
is_supervisor: task.is_supervisor,
+ is_red_team: task.is_red_team,
hidden: task.hidden,
created_at: task.created_at,
updated_at: task.updated_at,
@@ -627,6 +636,9 @@ pub struct CreateTaskRequest {
/// True for contract supervisor tasks. Only supervisors can spawn new tasks.
#[serde(default)]
pub is_supervisor: bool,
+ /// True for red team tasks that monitor and review other tasks' work.
+ #[serde(default)]
+ pub is_red_team: bool,
/// Priority (higher = more urgent)
#[serde(default)]
pub priority: i32,
@@ -1331,6 +1343,15 @@ pub struct Contract {
/// allowing users to manually handle code changes via patch files or other means.
#[serde(default)]
pub local_only: bool,
+ /// Whether to spawn a red team task to monitor work tasks.
+ /// When enabled, a parallel task monitors outputs and can alert
+ /// the supervisor about potential issues.
+ #[serde(default)]
+ pub red_team_enabled: bool,
+ /// Optional custom prompt/criteria for the red team to use
+ /// when evaluating task outputs.
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub red_team_prompt: Option<String>,
pub version: i32,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
@@ -1508,6 +1529,15 @@ pub struct CreateContractRequest {
/// allowing users to manually handle code changes via patch files or other means.
#[serde(default)]
pub local_only: Option<bool>,
+ /// Enable red team monitoring for this contract.
+ /// When enabled, a parallel task monitors work task outputs
+ /// and can alert the supervisor about potential issues.
+ #[serde(default)]
+ pub red_team_enabled: Option<bool>,
+ /// Optional custom criteria for the red team to evaluate.
+ /// Examples: "Focus on security vulnerabilities",
+ /// "Ensure all functions have tests", etc.
+ pub red_team_prompt: Option<String>,
}
/// Request payload for updating a contract
@@ -2074,3 +2104,64 @@ pub struct CheckpointPatchInfo {
pub created_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
}
+
+// ============================================================================
+// Red Team Types
+// ============================================================================
+
+/// Red Team notification record
+#[derive(Debug, Clone, FromRow, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct RedTeamNotification {
+ pub id: Uuid,
+ pub contract_id: Uuid,
+ pub red_team_task_id: Uuid,
+ pub related_task_id: Option<Uuid>,
+
+ pub message: String,
+ pub severity: String,
+ pub file_path: Option<String>,
+ pub context: Option<String>,
+
+ pub delivered: bool,
+ pub delivered_at: Option<DateTime<Utc>>,
+ pub acknowledged: bool,
+ pub acknowledged_at: Option<DateTime<Utc>>,
+
+ pub created_at: DateTime<Utc>,
+}
+
+/// Severity levels for red team notifications
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum NotificationSeverity {
+ Low,
+ Medium,
+ High,
+ Critical,
+}
+
+impl std::fmt::Display for NotificationSeverity {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::Low => write!(f, "low"),
+ Self::Medium => write!(f, "medium"),
+ Self::High => write!(f, "high"),
+ Self::Critical => write!(f, "critical"),
+ }
+ }
+}
+
+impl std::str::FromStr for NotificationSeverity {
+ type Err = String;
+
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ match s.to_lowercase().as_str() {
+ "low" => Ok(Self::Low),
+ "medium" => Ok(Self::Medium),
+ "high" => Ok(Self::High),
+ "critical" => Ok(Self::Critical),
+ _ => Err(format!("Invalid severity: {}", s)),
+ }
+ }
+}
diff --git a/makima/src/db/repository.rs b/makima/src/db/repository.rs
index 6d6642b..de1712d 100644
--- a/makima/src/db/repository.rs
+++ b/makima/src/db/repository.rs
@@ -11,8 +11,8 @@ use super::models::{
ConversationMessage, ConversationSnapshot, CreateContractRequest, CreateFileRequest,
CreateTaskRequest, Daemon, DaemonTaskAssignment, DaemonWithCapacity, File, FileSummary,
FileVersion, HistoryEvent, HistoryQueryFilters, MeshChatConversation, MeshChatMessageRecord,
- SupervisorState, Task, TaskCheckpoint, TaskEvent, TaskSummary, UpdateContractRequest,
- UpdateFileRequest, UpdateTaskRequest,
+ RedTeamNotification, SupervisorState, Task, TaskCheckpoint, TaskEvent, TaskSummary,
+ UpdateContractRequest, UpdateFileRequest, UpdateTaskRequest,
};
/// Repository error types.
@@ -689,11 +689,11 @@ pub async fn create_task(pool: &PgPool, req: CreateTaskRequest) -> Result<Task,
r#"
INSERT INTO tasks (
contract_id, parent_task_id, depth, name, description, plan, priority,
- is_supervisor, repository_url, base_branch, target_branch, merge_mode,
+ is_supervisor, is_red_team, repository_url, base_branch, target_branch, merge_mode,
target_repo_path, completion_action, continue_from_task_id, copy_files,
branched_from_task_id, conversation_state
)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)
RETURNING *
"#,
)
@@ -705,6 +705,7 @@ pub async fn create_task(pool: &PgPool, req: CreateTaskRequest) -> Result<Task,
.bind(&req.plan)
.bind(req.priority)
.bind(req.is_supervisor)
+ .bind(req.is_red_team)
.bind(&repo_url)
.bind(&base_branch)
.bind(&target_branch)
@@ -744,7 +745,8 @@ pub async fn list_tasks(pool: &PgPool) -> Result<Vec<TaskSummary>, sqlx::Error>
t.parent_task_id, t.depth, t.name, t.status, t.priority,
t.progress_summary,
(SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count,
- t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
+ t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team,
+ COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
FROM tasks t
LEFT JOIN contracts c ON t.contract_id = c.id
WHERE t.parent_task_id IS NULL AND COALESCE(t.hidden, false) = false
@@ -765,7 +767,8 @@ pub async fn list_subtasks(pool: &PgPool, parent_id: Uuid) -> Result<Vec<TaskSum
t.parent_task_id, t.depth, t.name, t.status, t.priority,
t.progress_summary,
(SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count,
- t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
+ t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team,
+ COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
FROM tasks t
LEFT JOIN contracts c ON t.contract_id = c.id
WHERE t.parent_task_id = $1
@@ -1100,11 +1103,11 @@ pub async fn create_task_for_owner(
r#"
INSERT INTO tasks (
owner_id, contract_id, parent_task_id, depth, name, description, plan, priority,
- is_supervisor, repository_url, base_branch, target_branch, merge_mode,
+ is_supervisor, is_red_team, repository_url, base_branch, target_branch, merge_mode,
target_repo_path, completion_action, continue_from_task_id, copy_files,
branched_from_task_id, conversation_state
)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20)
RETURNING *
"#,
)
@@ -1117,6 +1120,7 @@ pub async fn create_task_for_owner(
.bind(&req.plan)
.bind(req.priority)
.bind(req.is_supervisor)
+ .bind(req.is_red_team)
.bind(&repo_url)
.bind(&base_branch)
.bind(&target_branch)
@@ -1164,7 +1168,8 @@ pub async fn list_tasks_for_owner(
t.parent_task_id, t.depth, t.name, t.status, t.priority,
t.progress_summary,
(SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count,
- t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
+ t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team,
+ COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
FROM tasks t
LEFT JOIN contracts c ON t.contract_id = c.id
WHERE t.owner_id = $1 AND t.parent_task_id IS NULL AND COALESCE(t.hidden, false) = false
@@ -1190,7 +1195,8 @@ pub async fn list_subtasks_for_owner(
t.parent_task_id, t.depth, t.name, t.status, t.priority,
t.progress_summary,
(SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count,
- t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
+ t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team,
+ COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
FROM tasks t
LEFT JOIN contracts c ON t.contract_id = c.id
WHERE t.owner_id = $1 AND t.parent_task_id = $2
@@ -1711,7 +1717,8 @@ pub async fn list_sibling_tasks(
t.parent_task_id, t.depth, t.name, t.status, t.priority,
t.progress_summary,
(SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count,
- t.version, t.is_supervisor, t.created_at, t.updated_at
+ t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team,
+ COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
FROM tasks t
LEFT JOIN contracts c ON t.contract_id = c.id
WHERE t.parent_task_id = $1 AND t.id != $2
@@ -1733,7 +1740,8 @@ pub async fn list_sibling_tasks(
t.parent_task_id, t.depth, t.name, t.status, t.priority,
t.progress_summary,
(SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count,
- t.version, t.is_supervisor, t.created_at, t.updated_at
+ t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team,
+ COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
FROM tasks t
LEFT JOIN contracts c ON t.contract_id = c.id
WHERE t.parent_task_id IS NULL AND t.id != $1
@@ -2716,7 +2724,8 @@ pub async fn list_tasks_in_contract(
t.parent_task_id, t.depth, t.name, t.status, t.priority,
t.progress_summary,
(SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count,
- t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
+ t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team,
+ COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at
FROM tasks t
LEFT JOIN contracts c ON t.contract_id = c.id
WHERE t.contract_id = $1 AND t.owner_id = $2
@@ -3906,3 +3915,93 @@ pub async fn delete_checkpoint_patches_for_task(
.await?;
Ok(result.rows_affected() as i64)
}
+
+// =============================================================================
+// Red Team Notifications
+// =============================================================================
+
+/// Create a red team notification.
+/// Red team tasks use this to report issues found during implementation review.
+pub async fn create_red_team_notification(
+ pool: &PgPool,
+ contract_id: Uuid,
+ red_team_task_id: Uuid,
+ message: &str,
+ severity: &str,
+ related_task_id: Option<Uuid>,
+ file_path: Option<&str>,
+ context: Option<&str>,
+) -> Result<RedTeamNotification, RepositoryError> {
+ sqlx::query_as::<_, RedTeamNotification>(
+ r#"
+ INSERT INTO red_team_notifications
+ (contract_id, red_team_task_id, related_task_id, message, severity, file_path, context)
+ VALUES ($1, $2, $3, $4, $5, $6, $7)
+ RETURNING *
+ "#,
+ )
+ .bind(contract_id)
+ .bind(red_team_task_id)
+ .bind(related_task_id)
+ .bind(message)
+ .bind(severity)
+ .bind(file_path)
+ .bind(context)
+ .fetch_one(pool)
+ .await
+ .map_err(RepositoryError::Database)
+}
+
+/// Mark a notification as delivered to the supervisor.
+pub async fn mark_notification_delivered(
+ pool: &PgPool,
+ notification_id: Uuid,
+) -> Result<RedTeamNotification, RepositoryError> {
+ sqlx::query_as::<_, RedTeamNotification>(
+ r#"
+ UPDATE red_team_notifications
+ SET delivered = TRUE, delivered_at = NOW()
+ WHERE id = $1
+ RETURNING *
+ "#,
+ )
+ .bind(notification_id)
+ .fetch_one(pool)
+ .await
+ .map_err(RepositoryError::Database)
+}
+
+/// Get the red team task for a contract (if one exists).
+/// Returns the most recently created red team task for the contract.
+pub async fn get_red_team_task_for_contract(
+ pool: &PgPool,
+ contract_id: Uuid,
+) -> Result<Option<Task>, RepositoryError> {
+ sqlx::query_as::<_, Task>(
+ r#"
+ SELECT * FROM tasks
+ WHERE contract_id = $1 AND is_red_team = TRUE
+ ORDER BY created_at DESC
+ LIMIT 1
+ "#,
+ )
+ .bind(contract_id)
+ .fetch_optional(pool)
+ .await
+ .map_err(RepositoryError::Database)
+}
+
+/// Get the count of notifications for a red team task.
+pub async fn get_notification_count_for_task(
+ pool: &PgPool,
+ red_team_task_id: Uuid,
+) -> Result<i64, RepositoryError> {
+ let result: (i64,) = sqlx::query_as(
+ "SELECT COUNT(*) FROM red_team_notifications WHERE red_team_task_id = $1",
+ )
+ .bind(red_team_task_id)
+ .fetch_one(pool)
+ .await
+ .map_err(RepositoryError::Database)?;
+ Ok(result.0)
+}
diff --git a/makima/src/server/handlers/contract_chat.rs b/makima/src/server/handlers/contract_chat.rs
index e6ee8d4..dac806a 100644
--- a/makima/src/server/handlers/contract_chat.rs
+++ b/makima/src/server/handlers/contract_chat.rs
@@ -1362,6 +1362,7 @@ async fn handle_contract_request(
continue_from_task_id: None,
copy_files: None,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: None,
branched_from_task_id: None,
conversation_history: None,
@@ -1458,6 +1459,7 @@ async fn handle_contract_request(
continue_from_task_id: None,
copy_files: None,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: None,
branched_from_task_id: None,
conversation_history: None,
@@ -2190,6 +2192,7 @@ async fn handle_contract_request(
continue_from_task_id: previous_task_id,
copy_files: None,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: None,
branched_from_task_id: None,
conversation_history: None,
@@ -2586,6 +2589,8 @@ async fn handle_contract_request(
autonomous_loop: None,
phase_guard: None,
local_only: None,
+ red_team_enabled: None,
+ red_team_prompt: None,
};
let contract = match repository::create_contract_for_owner(pool, owner_id, contract_req).await {
@@ -2707,6 +2712,7 @@ async fn handle_contract_request(
continue_from_task_id: None,
copy_files: None,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: None,
branched_from_task_id: None,
conversation_history: None,
diff --git a/makima/src/server/handlers/contracts.rs b/makima/src/server/handlers/contracts.rs
index 3498063..3ad38da 100644
--- a/makima/src/server/handlers/contracts.rs
+++ b/makima/src/server/handlers/contracts.rs
@@ -295,6 +295,7 @@ pub async fn create_contract(
continue_from_task_id: None,
copy_files: None,
is_supervisor: true,
+ is_red_team: false,
checkpoint_sha: None,
priority: 0,
merge_mode: None,
diff --git a/makima/src/server/handlers/mesh.rs b/makima/src/server/handlers/mesh.rs
index 19958e7..c4d862c 100644
--- a/makima/src/server/handlers/mesh.rs
+++ b/makima/src/server/handlers/mesh.rs
@@ -2239,6 +2239,7 @@ pub async fn reassign_task(
plan: updated_plan.clone(),
parent_task_id: task.parent_task_id,
is_supervisor: task.is_supervisor,
+ is_red_team: task.is_red_team,
priority: task.priority,
repository_url: task.repository_url.clone(),
base_branch: task.base_branch.clone(),
@@ -3010,6 +3011,7 @@ pub async fn fork_task(
plan: req.new_task_plan.clone(),
parent_task_id: None, // Forked tasks are independent
is_supervisor: false,
+ is_red_team: false,
priority: task.priority,
repository_url: task.repository_url.clone(),
base_branch: task.base_branch.clone(),
@@ -3167,6 +3169,7 @@ pub async fn resume_from_checkpoint(
plan: req.plan,
parent_task_id: None,
is_supervisor: false,
+ is_red_team: false,
priority: task.priority,
repository_url: task.repository_url.clone(),
base_branch: task.base_branch.clone(),
@@ -3502,6 +3505,7 @@ pub async fn branch_task(
plan: req.message,
parent_task_id: None,
is_supervisor: false,
+ is_red_team: false,
priority: source_task.priority,
repository_url: source_task.repository_url.clone(),
base_branch: source_task.base_branch.clone(),
diff --git a/makima/src/server/handlers/mesh_chat.rs b/makima/src/server/handlers/mesh_chat.rs
index eb35728..ed6cfc0 100644
--- a/makima/src/server/handlers/mesh_chat.rs
+++ b/makima/src/server/handlers/mesh_chat.rs
@@ -1017,6 +1017,7 @@ async fn handle_mesh_request(
continue_from_task_id: None,
copy_files: None,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: None,
branched_from_task_id: None,
conversation_history: None,
diff --git a/makima/src/server/handlers/mesh_red_team.rs b/makima/src/server/handlers/mesh_red_team.rs
new file mode 100644
index 0000000..c5af60e
--- /dev/null
+++ b/makima/src/server/handlers/mesh_red_team.rs
@@ -0,0 +1,497 @@
+//! HTTP handlers for red team mesh operations.
+//!
+//! These endpoints are used by red team tasks (via the makima CLI) to notify
+//! supervisors of potential issues and query their own status.
+
+use axum::{
+ extract::State,
+ http::{HeaderMap, StatusCode},
+ response::IntoResponse,
+ Json,
+};
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+use uuid::Uuid;
+
+use crate::db::repository;
+use crate::server::handlers::mesh::{extract_auth, AuthSource};
+use crate::server::messages::ApiError;
+use crate::server::state::{DaemonCommand, SharedState};
+
+// =============================================================================
+// Request/Response Types
+// =============================================================================
+
+/// Severity level for red team notifications.
+#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum RedTeamSeverity {
+ /// Informational notice - minor issue or suggestion
+ Info,
+ /// Warning - potential problem that should be reviewed
+ Warning,
+ /// Critical - serious issue requiring immediate attention
+ Critical,
+}
+
+impl Default for RedTeamSeverity {
+ fn default() -> Self {
+ Self::Warning
+ }
+}
+
+impl std::fmt::Display for RedTeamSeverity {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::Info => write!(f, "INFO"),
+ Self::Warning => write!(f, "WARNING"),
+ Self::Critical => write!(f, "CRITICAL"),
+ }
+ }
+}
+
+/// Request to notify the supervisor of a potential issue.
+#[derive(Debug, Deserialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct RedTeamNotifyRequest {
+ /// The issue description/message to send to the supervisor
+ pub message: String,
+ /// Severity level of the issue
+ #[serde(default)]
+ pub severity: RedTeamSeverity,
+ /// ID of the task being reviewed (optional - if not provided, assumes general contract concern)
+ pub related_task_id: Option<Uuid>,
+ /// File path related to the issue (optional)
+ pub file_path: Option<String>,
+ /// Additional context about the issue
+ pub context: Option<String>,
+}
+
+/// Response from the notify endpoint.
+#[derive(Debug, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct RedTeamNotifyResponse {
+ /// Unique ID for this notification
+ pub notification_id: Uuid,
+ /// Whether the notification was successfully delivered to the supervisor
+ pub delivered: bool,
+ /// The supervisor task ID that received the notification
+ pub supervisor_task_id: Option<Uuid>,
+}
+
+/// Response from the status endpoint.
+#[derive(Debug, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct RedTeamStatusResponse {
+ /// Contract ID being monitored
+ pub contract_id: Uuid,
+ /// Red team task ID
+ pub red_team_task_id: Uuid,
+ /// Current task status
+ pub status: String,
+ /// Number of notifications sent so far
+ pub notifications_sent: i64,
+}
+
+/// Red team notification record stored in database.
+#[derive(Debug, Clone, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct RedTeamNotification {
+ pub id: Uuid,
+ pub red_team_task_id: Uuid,
+ pub contract_id: Uuid,
+ pub message: String,
+ pub severity: String,
+ pub related_task_id: Option<Uuid>,
+ pub file_path: Option<String>,
+ pub context: Option<String>,
+ pub delivered: bool,
+ pub created_at: chrono::DateTime<chrono::Utc>,
+}
+
+// =============================================================================
+// Helper Functions
+// =============================================================================
+
+/// Verify the request comes from a red team task and extract ownership info.
+///
+/// Returns (task_id, owner_id, contract_id) on success.
+async fn verify_red_team_auth(
+ state: &SharedState,
+ headers: &HeaderMap,
+) -> Result<(Uuid, Uuid, Uuid), (StatusCode, Json<ApiError>)> {
+ let auth = extract_auth(state, headers);
+
+ let task_id = match auth {
+ AuthSource::ToolKey(task_id) => task_id,
+ _ => {
+ return Err((
+ StatusCode::UNAUTHORIZED,
+ Json(ApiError::new(
+ "UNAUTHORIZED",
+ "Red team endpoints require tool key auth",
+ )),
+ ));
+ }
+ };
+
+ // Get the task to verify it's a red team task and get owner_id
+ let pool = state.db_pool.as_ref().ok_or_else(|| {
+ (
+ StatusCode::SERVICE_UNAVAILABLE,
+ Json(ApiError::new("DB_UNAVAILABLE", "Database not configured")),
+ )
+ })?;
+
+ let task = repository::get_task(pool, task_id)
+ .await
+ .map_err(|e| {
+ tracing::error!(error = %e, "Failed to get red team task");
+ (
+ StatusCode::INTERNAL_SERVER_ERROR,
+ Json(ApiError::new("DB_ERROR", "Failed to verify red team task")),
+ )
+ })?
+ .ok_or_else(|| {
+ (
+ StatusCode::NOT_FOUND,
+ Json(ApiError::new("NOT_FOUND", "Task not found")),
+ )
+ })?;
+
+ // Verify task is a red team task
+ // NOTE: This requires the is_red_team field to be added to the Task struct.
+ // For now, we check if the task name contains "red-team" or "red_team" as a fallback.
+ let is_red_team = task.name.to_lowercase().contains("red-team")
+ || task.name.to_lowercase().contains("red_team")
+ || task.name.to_lowercase().contains("redteam");
+
+ if !is_red_team {
+ return Err((
+ StatusCode::FORBIDDEN,
+ Json(ApiError::new(
+ "NOT_RED_TEAM",
+ "Only red team tasks can use these endpoints",
+ )),
+ ));
+ }
+
+ // Red team tasks must be associated with a contract
+ let contract_id = task.contract_id.ok_or_else(|| {
+ (
+ StatusCode::BAD_REQUEST,
+ Json(ApiError::new(
+ "NO_CONTRACT",
+ "Red team task must be associated with a contract",
+ )),
+ )
+ })?;
+
+ Ok((task_id, task.owner_id, contract_id))
+}
+
+/// Format an alert message for the supervisor.
+///
+/// Creates a formatted alert with clear visual markers to grab attention.
+fn format_alert_message(
+ severity: &RedTeamSeverity,
+ message: &str,
+ related_task_id: Option<Uuid>,
+ file_path: Option<&str>,
+ context: Option<&str>,
+) -> String {
+ let severity_marker = match severity {
+ RedTeamSeverity::Info => "ℹ️",
+ RedTeamSeverity::Warning => "⚠️",
+ RedTeamSeverity::Critical => "🚨",
+ };
+
+ let border = match severity {
+ RedTeamSeverity::Info => "─".repeat(60),
+ RedTeamSeverity::Warning => "━".repeat(60),
+ RedTeamSeverity::Critical => "═".repeat(60),
+ };
+
+ let mut alert = format!(
+ r#"
+{}
+{} [RED TEAM ALERT] - {}
+{}
+
+Issue: {}
+"#,
+ border, severity_marker, severity, border, message
+ );
+
+ if let Some(task_id) = related_task_id {
+ alert.push_str(&format!("\nRelated Task: {}\n", task_id));
+ }
+
+ if let Some(path) = file_path {
+ alert.push_str(&format!("File: {}\n", path));
+ }
+
+ if let Some(ctx) = context {
+ alert.push_str(&format!("\nContext:\n{}\n", ctx));
+ }
+
+ // Add action suggestions based on severity
+ let actions = match severity {
+ RedTeamSeverity::Info => {
+ "Suggested Actions:\n- Review when convenient\n- Consider if changes are needed"
+ }
+ RedTeamSeverity::Warning => {
+ "Suggested Actions:\n- Review the flagged item soon\n- Check if this deviates from the contract\n- Consider pausing related work until reviewed"
+ }
+ RedTeamSeverity::Critical => {
+ "Suggested Actions:\n- STOP related work immediately\n- Review the flagged item urgently\n- Verify compliance with contract requirements\n- Consider reverting recent changes if necessary"
+ }
+ };
+
+ alert.push_str(&format!("\n{}\n{}\n", actions, border));
+
+ alert
+}
+
+// =============================================================================
+// Handlers
+// =============================================================================
+
+/// Notify the supervisor of a potential issue.
+///
+/// POST /api/v1/mesh/red-team/notify
+///
+/// This endpoint allows red team tasks to alert supervisors about issues they've
+/// identified during code review. The notification is sent as a message to the
+/// supervisor task.
+#[utoipa::path(
+ post,
+ path = "/api/v1/mesh/red-team/notify",
+ request_body = RedTeamNotifyRequest,
+ responses(
+ (status = 200, description = "Notification sent", body = RedTeamNotifyResponse),
+ (status = 401, description = "Unauthorized - tool key required"),
+ (status = 403, description = "Forbidden - not a red team task"),
+ (status = 404, description = "Task not found"),
+ (status = 503, description = "Database not available"),
+ (status = 500, description = "Internal server error"),
+ ),
+ security(
+ ("tool_key" = [])
+ ),
+ tag = "Mesh Red Team"
+)]
+pub async fn notify_supervisor(
+ State(state): State<SharedState>,
+ headers: HeaderMap,
+ Json(request): Json<RedTeamNotifyRequest>,
+) -> impl IntoResponse {
+ let (red_team_task_id, owner_id, contract_id) =
+ match verify_red_team_auth(&state, &headers).await {
+ Ok(ids) => ids,
+ Err(e) => return e.into_response(),
+ };
+
+ let pool = state.db_pool.as_ref().unwrap();
+
+ // Generate notification ID
+ let notification_id = Uuid::new_v4();
+
+ // Get the contract to find the supervisor task
+ let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await {
+ Ok(Some(c)) => c,
+ Ok(None) => {
+ return (
+ StatusCode::NOT_FOUND,
+ Json(ApiError::new("NOT_FOUND", "Contract not found")),
+ )
+ .into_response();
+ }
+ Err(e) => {
+ tracing::error!(error = %e, "Failed to get contract");
+ return (
+ StatusCode::INTERNAL_SERVER_ERROR,
+ Json(ApiError::new("DB_ERROR", "Failed to get contract")),
+ )
+ .into_response();
+ }
+ };
+
+ let supervisor_task_id = contract.supervisor_task_id;
+
+ // Format the alert message
+ let alert_message = format_alert_message(
+ &request.severity,
+ &request.message,
+ request.related_task_id,
+ request.file_path.as_deref(),
+ request.context.as_deref(),
+ );
+
+ // Record the notification in the database as a history event
+ let event_data = serde_json::json!({
+ "notification_id": notification_id.to_string(),
+ "red_team_task_id": red_team_task_id.to_string(),
+ "severity": request.severity.to_string(),
+ "message": request.message,
+ "related_task_id": request.related_task_id.map(|id| id.to_string()),
+ "file_path": request.file_path,
+ "context": request.context,
+ });
+
+ let _ = repository::record_history_event(
+ pool,
+ owner_id,
+ Some(contract_id),
+ Some(red_team_task_id),
+ "red_team_alert",
+ Some(&request.severity.to_string().to_lowercase()),
+ Some(&request.message),
+ event_data,
+ )
+ .await;
+
+ // Try to send the message to the supervisor
+ let mut delivered = false;
+ if let Some(sup_task_id) = supervisor_task_id {
+ // Get the supervisor task to find its daemon
+ if let Ok(Some(supervisor_task)) = repository::get_task(pool, sup_task_id).await {
+ if let Some(daemon_id) = supervisor_task.daemon_id {
+ // Send the alert message to the supervisor
+ let cmd = DaemonCommand::SendMessage {
+ task_id: sup_task_id,
+ message: alert_message.clone(),
+ };
+
+ if let Err(e) = state.send_daemon_command(daemon_id, cmd).await {
+ tracing::warn!(
+ error = %e,
+ supervisor_task_id = %sup_task_id,
+ daemon_id = %daemon_id,
+ "Failed to send red team alert to supervisor"
+ );
+ } else {
+ delivered = true;
+ tracing::info!(
+ notification_id = %notification_id,
+ red_team_task_id = %red_team_task_id,
+ supervisor_task_id = %sup_task_id,
+ severity = %request.severity,
+ "Red team alert delivered to supervisor"
+ );
+ }
+ } else {
+ tracing::warn!(
+ supervisor_task_id = %sup_task_id,
+ "Supervisor task has no assigned daemon - alert not delivered"
+ );
+ }
+ }
+ } else {
+ tracing::warn!(
+ contract_id = %contract_id,
+ "Contract has no supervisor task - alert not delivered"
+ );
+ }
+
+ (
+ StatusCode::OK,
+ Json(RedTeamNotifyResponse {
+ notification_id,
+ delivered,
+ supervisor_task_id,
+ }),
+ )
+ .into_response()
+}
+
+/// Get the status of the red team task.
+///
+/// GET /api/v1/mesh/red-team/status
+///
+/// Returns information about the current red team task including the contract
+/// being monitored and notification statistics.
+#[utoipa::path(
+ get,
+ path = "/api/v1/mesh/red-team/status",
+ responses(
+ (status = 200, description = "Red team status", body = RedTeamStatusResponse),
+ (status = 401, description = "Unauthorized - tool key required"),
+ (status = 403, description = "Forbidden - not a red team task"),
+ (status = 404, description = "Task not found"),
+ (status = 503, description = "Database not available"),
+ (status = 500, description = "Internal server error"),
+ ),
+ security(
+ ("tool_key" = [])
+ ),
+ tag = "Mesh Red Team"
+)]
+pub async fn get_status(
+ State(state): State<SharedState>,
+ headers: HeaderMap,
+) -> impl IntoResponse {
+ let (red_team_task_id, owner_id, contract_id) =
+ match verify_red_team_auth(&state, &headers).await {
+ Ok(ids) => ids,
+ Err(e) => return e.into_response(),
+ };
+
+ let pool = state.db_pool.as_ref().unwrap();
+
+ // Get the red team task status
+ let task = match repository::get_task(pool, red_team_task_id).await {
+ Ok(Some(t)) => t,
+ Ok(None) => {
+ return (
+ StatusCode::NOT_FOUND,
+ Json(ApiError::new("NOT_FOUND", "Red team task not found")),
+ )
+ .into_response();
+ }
+ Err(e) => {
+ tracing::error!(error = %e, "Failed to get red team task");
+ return (
+ StatusCode::INTERNAL_SERVER_ERROR,
+ Json(ApiError::new("DB_ERROR", "Failed to get task")),
+ )
+ .into_response();
+ }
+ };
+
+ // Count notifications sent by this red team task
+ // Query history_events for red_team_alert events from this task
+ let notifications_sent = match sqlx::query_scalar::<_, i64>(
+ r#"
+ SELECT COUNT(*)
+ FROM history_events
+ WHERE owner_id = $1
+ AND contract_id = $2
+ AND task_id = $3
+ AND event_type = 'red_team_alert'
+ "#,
+ )
+ .bind(owner_id)
+ .bind(contract_id)
+ .bind(red_team_task_id)
+ .fetch_one(pool)
+ .await
+ {
+ Ok(count) => count,
+ Err(e) => {
+ tracing::warn!(error = %e, "Failed to count red team notifications");
+ 0
+ }
+ };
+
+ (
+ StatusCode::OK,
+ Json(RedTeamStatusResponse {
+ contract_id,
+ red_team_task_id,
+ status: task.status,
+ notifications_sent,
+ }),
+ )
+ .into_response()
+}
diff --git a/makima/src/server/handlers/mesh_supervisor.rs b/makima/src/server/handlers/mesh_supervisor.rs
index a654a05..6d9f8fb 100644
--- a/makima/src/server/handlers/mesh_supervisor.rs
+++ b/makima/src/server/handlers/mesh_supervisor.rs
@@ -610,6 +610,7 @@ pub async fn spawn_task(
contract_id: Some(request.contract_id),
parent_task_id: request.parent_task_id,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: request.checkpoint_sha.clone(),
merge_mode: Some("manual".to_string()),
priority: 0,
@@ -742,6 +743,67 @@ pub async fn spawn_task(
updated_fields: vec!["status".to_string(), "daemon_id".to_string()],
updated_by: "supervisor".to_string(),
});
+
+ // Check if we should spawn a red team task
+ // Conditions:
+ // 1. This is not a supervisor task
+ // 2. This is not already a red team task
+ // 3. Contract has red_team_enabled = true
+ // 4. No red team task exists for this contract yet
+ if !updated_task.is_supervisor && !updated_task.is_red_team && contract.red_team_enabled {
+ if let Some(contract_id) = updated_task.contract_id {
+ // Check if a red team task already exists
+ match repository::get_red_team_task_for_contract(pool, contract_id).await {
+ Ok(None) => {
+ // No red team task exists, spawn one
+ tracing::info!(
+ contract_id = %contract_id,
+ work_task_id = %updated_task.id,
+ "Spawning red team task for contract (first work task started)"
+ );
+ match spawn_red_team_task(
+ pool,
+ &state,
+ contract_id,
+ owner_id,
+ &contract.name,
+ &contract.phase,
+ contract.red_team_prompt.as_deref(),
+ ).await {
+ Ok(red_team_task) => {
+ tracing::info!(
+ contract_id = %contract_id,
+ red_team_task_id = %red_team_task.id,
+ "Red team task spawned successfully"
+ );
+ }
+ Err(e) => {
+ // Log error but don't fail the work task spawn
+ tracing::error!(
+ contract_id = %contract_id,
+ error = %e,
+ "Failed to spawn red team task"
+ );
+ }
+ }
+ }
+ Ok(Some(existing)) => {
+ tracing::debug!(
+ contract_id = %contract_id,
+ red_team_task_id = %existing.id,
+ "Red team task already exists for contract"
+ );
+ }
+ Err(e) => {
+ tracing::error!(
+ contract_id = %contract_id,
+ error = %e,
+ "Error checking for existing red team task"
+ );
+ }
+ }
+ }
+ }
}
break;
}
@@ -2396,3 +2458,233 @@ pub async fn rewind_conversation(
})
.into_response()
}
+
+// =============================================================================
+// Red Team Task Spawning
+// =============================================================================
+
+/// Generate the system prompt/plan for a red team task.
+///
+/// This creates detailed instructions for the red team monitor, including
+/// what to look for, severity levels, and how to report issues.
+pub fn generate_red_team_plan(
+ contract_name: &str,
+ contract_phase: &str,
+ custom_prompt: Option<&str>,
+) -> String {
+ let custom_criteria = if let Some(prompt) = custom_prompt {
+ format!(
+ r#"
+
+## Custom Review Criteria
+
+The contract owner has specified additional review criteria:
+{}
+"#,
+ prompt
+ )
+ } else {
+ String::new()
+ };
+
+ format!(
+ r#"# Red Team Monitor
+
+You are an adversarial quality reviewer for a software development contract. Your role is to monitor work task outputs in real-time and flag potential issues BEFORE they compound into larger problems.
+
+## Your Mission
+
+Monitor all task outputs and verify:
+1. **Plan Adherence**: Are tasks following the implementation plan?
+2. **Code Quality**: Does the code meet repository standards?
+3. **Contract Requirements**: Does the implementation match the specification?
+4. **Best Practices**: Are there obvious anti-patterns or issues?
+
+## Access Available
+
+You have read-only access to:
+- Task outputs (streamed in real-time)
+- Task diffs (code changes)
+- Contract specifications and plan documents
+- Repository configuration files (CONTRIBUTING.md, linting configs, etc.)
+
+## How to Monitor
+
+1. **Subscribe to task outputs**: You'll receive outputs from all work tasks
+2. **Analyze code changes**: Request diffs for completed tasks
+3. **Cross-reference**: Compare outputs against the plan and specifications
+4. **Report issues**: Use `makima red-team notify` when you detect problems
+
+## When to Notify
+
+NOTIFY the supervisor when you observe:
+- **Critical**: Security vulnerabilities, data loss risks, breaking changes
+- **High**: Significant deviations from the plan, major code quality issues
+- **Medium**: Missing tests, suboptimal implementations, minor standard violations
+- **Low**: Style inconsistencies, documentation gaps (use sparingly)
+
+## What NOT to Do
+
+- Do NOT nitpick minor style issues (that's what linters are for)
+- Do NOT block progress for trivial concerns
+- Do NOT write code or make changes yourself
+- Do NOT notify for things that are already in progress and being addressed
+- Do NOT create duplicate notifications for the same issue
+
+## Notification Format
+
+When notifying, always include:
+1. A clear, concise description of the issue
+2. The severity level (critical/high/medium/low)
+3. The related task ID if applicable
+4. The specific file or code location if known
+5. Why this matters (reference to plan, spec, or standards)
+
+## Example Notification
+
+```
+makima red-team notify "Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document" \
+ --severity critical \
+ --task <task_id> \
+ --file "src/auth/user.rs" \
+ --context "Specification section 3.2 requires bcrypt hashing for all passwords"
+```
+{}
+## Contract Context
+
+Contract: {}
+Phase: {}
+
+Focus your monitoring on outputs that relate to the active work tasks. Prioritize issues that could affect the success of the contract or introduce technical debt.
+"#,
+ custom_criteria, contract_name, contract_phase
+ )
+}
+
+/// Spawn a red team task for a contract.
+///
+/// This creates a red team monitor task that will observe work task outputs
+/// and can notify the supervisor about potential issues.
+pub async fn spawn_red_team_task(
+ pool: &sqlx::PgPool,
+ state: &SharedState,
+ contract_id: Uuid,
+ owner_id: Uuid,
+ contract_name: &str,
+ contract_phase: &str,
+ red_team_prompt: Option<&str>,
+) -> Result<Task, String> {
+ // Generate the red team plan/prompt
+ let plan = generate_red_team_plan(contract_name, contract_phase, red_team_prompt);
+
+ // Create task request
+ let create_req = CreateTaskRequest {
+ name: "Red Team Monitor".to_string(),
+ description: Some("Adversarial review task monitoring work task outputs".to_string()),
+ plan,
+ contract_id: Some(contract_id),
+ parent_task_id: None,
+ is_supervisor: false,
+ is_red_team: true,
+ priority: 0,
+ repository_url: None, // Red team doesn't need a repo
+ base_branch: None,
+ target_branch: None,
+ merge_mode: None,
+ target_repo_path: None,
+ completion_action: None,
+ continue_from_task_id: None,
+ copy_files: None,
+ checkpoint_sha: None,
+ branched_from_task_id: None,
+ conversation_history: None,
+ };
+
+ // Create task in DB
+ let task = repository::create_task_for_owner(pool, owner_id, create_req)
+ .await
+ .map_err(|e| format!("Failed to create red team task: {}", e))?;
+
+ tracing::info!(
+ contract_id = %contract_id,
+ red_team_task_id = %task.id,
+ "Created red team task for contract"
+ );
+
+ // Find a daemon to run the red team task
+ for entry in state.daemon_connections.iter() {
+ let daemon = entry.value();
+ if daemon.owner_id == owner_id {
+ // Update task with daemon assignment
+ let update_req = UpdateTaskRequest {
+ status: Some("starting".to_string()),
+ daemon_id: Some(daemon.id),
+ version: Some(task.version),
+ ..Default::default()
+ };
+
+ match repository::update_task_for_owner(pool, task.id, owner_id, update_req).await {
+ Ok(Some(updated_task)) => {
+ // Send spawn command to daemon
+ let cmd = DaemonCommand::SpawnTask {
+ task_id: updated_task.id,
+ task_name: updated_task.name.clone(),
+ plan: updated_task.plan.clone(),
+ repo_url: None, // Red team doesn't need a repo
+ base_branch: None,
+ target_branch: None,
+ parent_task_id: None,
+ depth: 0,
+ is_orchestrator: false,
+ target_repo_path: None,
+ completion_action: None,
+ continue_from_task_id: None,
+ copy_files: None,
+ contract_id: Some(contract_id),
+ is_supervisor: false,
+ autonomous_loop: false,
+ resume_session: false,
+ conversation_history: None,
+ patch_data: None,
+ patch_base_sha: None,
+ local_only: true, // Red team is always local-only
+ };
+
+ if let Err(e) = state.send_daemon_command(daemon.id, cmd).await {
+ tracing::warn!(
+ error = %e,
+ daemon_id = %daemon.id,
+ red_team_task_id = %task.id,
+ "Failed to send red team spawn command"
+ );
+ // Rollback
+ let rollback_req = UpdateTaskRequest {
+ status: Some("pending".to_string()),
+ clear_daemon_id: true,
+ ..Default::default()
+ };
+ let _ = repository::update_task_for_owner(pool, task.id, owner_id, rollback_req).await;
+ } else {
+ tracing::info!(
+ red_team_task_id = %task.id,
+ daemon_id = %daemon.id,
+ "Red team task spawn command sent"
+ );
+ return Ok(updated_task);
+ }
+ }
+ Ok(None) => {
+ tracing::warn!(red_team_task_id = %task.id, "Red team task not found when updating daemon_id");
+ }
+ Err(e) => {
+ tracing::error!(red_team_task_id = %task.id, error = %e, "Failed to update red team task with daemon_id");
+ }
+ }
+ break;
+ }
+ }
+
+ // Return the task even if we couldn't start it on a daemon
+ // It will remain pending and can be started later
+ Ok(task)
+}
diff --git a/makima/src/server/handlers/mod.rs b/makima/src/server/handlers/mod.rs
index 609b63b..b496922 100644
--- a/makima/src/server/handlers/mod.rs
+++ b/makima/src/server/handlers/mod.rs
@@ -13,6 +13,7 @@ pub mod mesh;
pub mod mesh_chat;
pub mod mesh_daemon;
pub mod mesh_merge;
+pub mod mesh_red_team;
pub mod mesh_supervisor;
pub mod mesh_ws;
pub mod repository_history;
diff --git a/makima/src/server/handlers/transcript_analysis.rs b/makima/src/server/handlers/transcript_analysis.rs
index 8eb50c7..3c283da 100644
--- a/makima/src/server/handlers/transcript_analysis.rs
+++ b/makima/src/server/handlers/transcript_analysis.rs
@@ -279,6 +279,8 @@ pub async fn create_contract_from_analysis(
autonomous_loop: None,
phase_guard: None,
local_only: None,
+ red_team_enabled: None,
+ red_team_prompt: None,
};
let contract = match repository::create_contract_for_owner(pool, auth.owner_id, contract_req).await {
@@ -358,6 +360,7 @@ pub async fn create_contract_from_analysis(
continue_from_task_id: None,
copy_files: None,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: None,
priority: match item.priority.as_deref() {
Some("high") => 10,
@@ -531,6 +534,7 @@ pub async fn update_contract_from_analysis(
continue_from_task_id: None,
copy_files: None,
is_supervisor: false,
+ is_red_team: false,
checkpoint_sha: None,
priority: 0,
merge_mode: None,
diff --git a/makima/src/server/mod.rs b/makima/src/server/mod.rs
index bf302a5..b002a49 100644
--- a/makima/src/server/mod.rs
+++ b/makima/src/server/mod.rs
@@ -18,7 +18,7 @@ use tower_http::trace::TraceLayer;
use utoipa::OpenApi;
use utoipa_swagger_ui::SwaggerUi;
-use crate::server::handlers::{api_keys, chat, contract_chat, contract_daemon, contracts, file_ws, files, history, listen, mesh, mesh_chat, mesh_daemon, mesh_merge, mesh_supervisor, mesh_ws, repository_history, templates, transcript_analysis, users, versions};
+use crate::server::handlers::{api_keys, chat, contract_chat, contract_daemon, contracts, file_ws, files, history, listen, mesh, mesh_chat, mesh_daemon, mesh_merge, mesh_red_team, mesh_supervisor, mesh_ws, repository_history, templates, transcript_analysis, users, versions};
use crate::server::openapi::ApiDoc;
use crate::server::state::SharedState;
@@ -129,6 +129,9 @@ pub fn make_router(state: SharedState) -> Router {
.route("/mesh/supervisor/questions", post(mesh_supervisor::ask_question))
.route("/mesh/questions", get(mesh_supervisor::list_pending_questions))
.route("/mesh/questions/{question_id}/answer", post(mesh_supervisor::answer_question))
+ // Red team endpoints (for red team tasks to notify supervisors)
+ .route("/mesh/red-team/notify", post(mesh_red_team::notify_supervisor))
+ .route("/mesh/red-team/status", get(mesh_red_team::get_status))
// Mesh WebSocket endpoints
.route("/mesh/tasks/subscribe", get(mesh_ws::task_subscription_handler))
.route("/mesh/daemons/connect", get(mesh_daemon::daemon_handler))