diff options
| author | soryu <soryu@soryu.co> | 2026-01-27 01:05:25 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-01-27 01:05:25 +0000 |
| commit | 6cd5b20670d7ecd3d48539ff898e021988f2a503 (patch) | |
| tree | 0232ecd8411b01db07230f8ea2003cbbcdc070a6 | |
| parent | 64cc98783d067625d633eea1142d114e324f76bb (diff) | |
| download | soryu-6cd5b20670d7ecd3d48539ff898e021988f2a503.tar.gz soryu-6cd5b20670d7ecd3d48539ff898e021988f2a503.zip | |
Add Red Team adversarial review system for contract monitoring (#35)
Implements a parallel "red team" task that monitors work task outputs in
real-time, verifying implementations stick to contract requirements,
repository standards, and the execution plan.
Key features:
- New `red_team_enabled` and `red_team_prompt` contract configuration
- Red team tasks auto-spawn when first work task is created
- `makima red-team notify` CLI command for alerting supervisors
- POST /api/v1/mesh/red-team/notify and /status endpoints
- Alert delivery to supervisor via SendMessage daemon command
- Notification audit trail via history_events table
Database changes:
- Add red_team_enabled/red_team_prompt columns to contracts
- Add is_red_team flag to tasks with partial index
- Create red_team_notifications table for audit logging
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
| -rw-r--r-- | .makima/specs/red-team-system.md | 748 | ||||
| -rw-r--r-- | makima/migrations/20260126010000_add_red_team_to_contracts.sql | 7 | ||||
| -rw-r--r-- | makima/migrations/20260126010001_add_red_team_to_tasks.sql | 9 | ||||
| -rw-r--r-- | makima/migrations/20260126010002_create_red_team_notifications.sql | 27 | ||||
| -rw-r--r-- | makima/src/bin/makima.rs | 3 | ||||
| -rw-r--r-- | makima/src/daemon/api/contract.rs | 6 | ||||
| -rw-r--r-- | makima/src/db/models.rs | 91 | ||||
| -rw-r--r-- | makima/src/db/repository.rs | 125 | ||||
| -rw-r--r-- | makima/src/server/handlers/contract_chat.rs | 6 | ||||
| -rw-r--r-- | makima/src/server/handlers/contracts.rs | 1 | ||||
| -rw-r--r-- | makima/src/server/handlers/mesh.rs | 4 | ||||
| -rw-r--r-- | makima/src/server/handlers/mesh_chat.rs | 1 | ||||
| -rw-r--r-- | makima/src/server/handlers/mesh_red_team.rs | 497 | ||||
| -rw-r--r-- | makima/src/server/handlers/mesh_supervisor.rs | 292 | ||||
| -rw-r--r-- | makima/src/server/handlers/mod.rs | 1 | ||||
| -rw-r--r-- | makima/src/server/handlers/transcript_analysis.rs | 4 | ||||
| -rw-r--r-- | makima/src/server/mod.rs | 5 |
17 files changed, 1813 insertions, 14 deletions
diff --git a/.makima/specs/red-team-system.md b/.makima/specs/red-team-system.md new file mode 100644 index 0000000..31f4b78 --- /dev/null +++ b/.makima/specs/red-team-system.md @@ -0,0 +1,748 @@ +# Red Team System Specification + +## Overview + +The Red Team system is an adversarial review feature for makima contracts that provides real-time quality assurance during task execution. When enabled, a parallel "red team" task instance monitors the output of work tasks, verifying that implementations adhere to the contract requirements, repository standards, and the execution plan. + +### Goals + +1. **Quality Assurance**: Catch deviations from the plan before they compound +2. **Standards Compliance**: Ensure code follows repository conventions (CONTRIBUTING.md, linting rules, etc.) +3. **Contract Adherence**: Verify implementations match the specification and requirements +4. **Proactive Issue Detection**: Flag potential problems early, not after task completion + +### Non-Goals + +1. The red team should NOT write code or make commits +2. The red team should NOT be overly pedantic or block progress for minor style issues +3. The red team is NOT a replacement for code review - it's an early warning system + +--- + +## 1. Feature Overview + +### 1.1 Concept + +The Red Team operates as a parallel observer task that: +- Monitors all work task outputs in real-time via the broadcast system +- Has read-only access to task diffs and outputs +- Can access contract specifications, plans, and repository standards +- Can notify the supervisor when it detects issues requiring attention + +### 1.2 Relationship to Existing Components + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Contract │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Supervisor │ │ Work Task 1 │ │ Work Task 2 │ │ +│ │ │<───│ │ │ │ │ +│ │ │<───│ │ │ │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +│ ^ │ │ │ +│ │ outputs outputs │ +│ │ │ │ │ +│ [NOTIFY] v v │ +│ │ ┌─────────────────────────────┐ │ +│ └────────────│ Red Team Task │ │ +│ │ (Monitoring & Validation) │ │ +│ └─────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +### 1.3 Task Type + +The Red Team task is a special task variant with the following characteristics: +- `is_red_team: true` flag on the Task model +- Has tool key for API access (like supervisor tasks) +- Does NOT have write permissions to the repository +- Subscribes to task output broadcasts +- Can use `makima red-team notify` command to alert supervisor + +--- + +## 2. Contract Configuration + +### 2.1 Contract Model Changes + +Add the following field to the `Contract` model in `makima/src/db/models.rs`: + +```rust +/// Contract record from the database +#[derive(Debug, Clone, FromRow, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct Contract { + // ... existing fields ... + + /// Whether to spawn a red team task to monitor work tasks. + /// When enabled, a parallel task monitors outputs and can alert + /// the supervisor about potential issues. + #[serde(default)] + pub red_team_enabled: bool, + + /// Optional custom prompt/criteria for the red team to use + /// when evaluating task outputs. If not provided, uses default + /// quality criteria. + #[serde(skip_serializing_if = "Option::is_none")] + pub red_team_prompt: Option<String>, +} +``` + +### 2.2 CreateContractRequest Changes + +```rust +#[derive(Debug, Clone, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CreateContractRequest { + // ... existing fields ... + + /// Enable red team monitoring for this contract. + /// When enabled, a parallel task monitors work task outputs + /// and can alert the supervisor about potential issues. + #[serde(default)] + pub red_team_enabled: Option<bool>, + + /// Optional custom criteria for the red team to evaluate. + /// Examples: "Focus on security vulnerabilities", + /// "Ensure all functions have tests", etc. + pub red_team_prompt: Option<String>, +} +``` + +### 2.3 CLI Flag for Contract Creation + +The daemon CLI should support red team enablement during contract creation: + +```bash +# Enable red team with default criteria +makima supervisor create --red-team "Contract Name" "Description" + +# Enable red team with custom review criteria +makima supervisor create --red-team --red-team-prompt "Focus on performance and memory usage" "Contract Name" "Description" +``` + +--- + +## 3. Red Team Task Lifecycle + +### 3.1 Spawning + +The red team task is spawned automatically when: +1. A contract has `red_team_enabled: true` +2. The first work task is spawned (not the supervisor itself) + +**Spawn Logic** (in `spawn_task` handler or supervisor spawn logic): + +```rust +// In spawn_task after creating a work task: +if contract.red_team_enabled && !is_supervisor_task { + // Check if red team task already exists + let existing_red_team = repository::get_red_team_task_for_contract(pool, contract_id).await?; + + if existing_red_team.is_none() { + // Spawn red team task + let red_team_task = spawn_red_team_task( + pool, + state, + contract_id, + owner_id, + contract.red_team_prompt.as_deref(), + ).await?; + + tracing::info!( + contract_id = %contract_id, + red_team_task_id = %red_team_task.id, + "Spawned red team task for contract" + ); + } +} +``` + +### 3.2 Task Properties + +When creating the red team task: + +```rust +CreateTaskRequest { + name: "Red Team Monitor".to_string(), + description: Some("Adversarial review task monitoring work task outputs".to_string()), + plan: generate_red_team_plan(contract, custom_prompt), + contract_id: Some(contract_id), + parent_task_id: None, // Not a child of supervisor + is_supervisor: false, + is_red_team: true, // NEW FIELD + // ... other fields ... +} +``` + +### 3.3 Lifespan + +The red team task: +- Lives for the duration of the **execute phase** +- Is automatically terminated when: + - The contract advances past the execute phase + - The contract is completed + - The contract is archived +- Can be paused/resumed along with other contract tasks +- Does NOT restart automatically after daemon failure (not critical path) + +### 3.4 Read-Only Enforcement + +The red team task: +- Has NO worktree of its own (or a read-only clone) +- Cannot use git operations (commit, branch, etc.) +- Can only READ files, not write them +- Has API access limited to read operations + +--- + +## 4. Red Team Notification CLI Command + +### 4.1 Command Specification + +New CLI command available only to red team tasks: + +```bash +makima red-team notify "<message>" +``` + +**Arguments:** +- `<message>`: A detailed description of the issue detected + +**Options:** +- `--severity <level>`: Issue severity: `low`, `medium`, `high`, `critical` (default: `medium`) +- `--task <task_id>`: The specific task this relates to (optional) +- `--file <path>`: The file path where the issue was detected (optional) +- `--context <text>`: Additional context about the issue (optional) + +**Example:** + +```bash +makima red-team notify "Task is adding console.log statements which violates the no-debug-logging rule in CONTRIBUTING.md" \ + --severity medium \ + --task 550e8400-e29b-41d4-a716-446655440000 \ + --file "src/api/handler.rs" +``` + +### 4.2 CLI Arguments Structure + +```rust +// In makima/src/daemon/cli/mod.rs + +/// Red Team subcommand - red team task commands. +#[derive(Subcommand, Debug)] +pub enum RedTeamCommand { + /// Send a notification to the supervisor about a detected issue. + /// Only available to red team tasks. + Notify(NotifyArgs), +} + +/// Arguments for red-team notify command. +#[derive(Args, Debug)] +pub struct NotifyArgs { + /// API URL + #[arg(long, env = "MAKIMA_API_URL", default_value = "https://api.makima.jp")] + pub api_url: String, + + /// API key for authentication + #[arg(long, env = "MAKIMA_API_KEY")] + pub api_key: String, + + /// Current task ID (must be a red team task) + #[arg(long, env = "MAKIMA_TASK_ID")] + pub task_id: Uuid, + + /// Contract ID + #[arg(long, env = "MAKIMA_CONTRACT_ID")] + pub contract_id: Uuid, + + /// The notification message + #[arg(index = 1)] + pub message: String, + + /// Severity level: low, medium, high, critical + #[arg(long, default_value = "medium")] + pub severity: String, + + /// Related task ID (optional) + #[arg(long)] + pub task: Option<Uuid>, + + /// Related file path (optional) + #[arg(long)] + pub file: Option<String>, + + /// Additional context (optional) + #[arg(long)] + pub context: Option<String>, +} +``` + +### 4.3 API Endpoint + +**POST** `/api/v1/mesh/red-team/notify` + +**Request Body:** +```json +{ + "message": "Issue description", + "severity": "medium", + "relatedTaskId": "uuid-optional", + "filePath": "src/path/optional.rs", + "context": "Additional context optional" +} +``` + +**Response:** +```json +{ + "notificationId": "uuid", + "delivered": true, + "supervisorTaskId": "uuid" +} +``` + +### 4.4 Notification Delivery + +When a red team notification is received: + +1. **Validate Caller**: Ensure the request comes from a valid red team task +2. **Find Supervisor**: Get the supervisor task for the contract +3. **Format Message**: Create an `[ACTION REQUIRED]` formatted message +4. **Send to Supervisor**: Inject the message into the supervisor's stdin via `SendMessage` command + +**Message Format:** + +``` +════════════════════════════════════════════════════════════════ +[RED TEAM ALERT] Severity: MEDIUM +════════════════════════════════════════════════════════════════ + +Issue: Task is adding console.log statements which violates the +no-debug-logging rule in CONTRIBUTING.md + +Related Task: 550e8400-e29b-41d4-a716-446655440000 +File: src/api/handler.rs + +Context: The CONTRIBUTING.md file explicitly states that debug +logging should use the tracing crate, not console.log or println! + +════════════════════════════════════════════════════════════════ +You can: +- Pause the related task to investigate +- Send feedback to the task to correct the issue +- Acknowledge this alert and continue monitoring +════════════════════════════════════════════════════════════════ +``` + +### 4.5 Supervisor Response Handling + +The supervisor can respond to red team notifications by: +1. **Pausing the task**: `makima supervisor pause <task_id>` +2. **Sending feedback**: `makima supervisor message <task_id> "Please use tracing instead of console.log"` +3. **Acknowledging**: Simply continue (the red team will keep monitoring) +4. **Dismissing**: Mark the alert as false positive (future consideration) + +--- + +## 5. Red Team Access Patterns + +### 5.1 Task Output Subscription + +The red team task subscribes to the `task_outputs` broadcast channel: + +```rust +// In red team task initialization +let mut task_output_rx = state.task_outputs.subscribe(); + +loop { + match task_output_rx.recv().await { + Ok(notification) => { + // Only process outputs from work tasks in our contract + if notification.contract_id == Some(self.contract_id) + && !notification.is_supervisor + && !notification.is_red_team { + self.analyze_output(notification).await; + } + } + Err(e) => { + tracing::warn!("Red team task output subscription error: {}", e); + } + } +} +``` + +### 5.2 Task Diff Access + +The red team can request diffs via the supervisor API: + +**GET** `/api/v1/mesh/supervisor/tasks/{task_id}/diff` + +This endpoint already exists and can be used by the red team (with tool key auth). + +### 5.3 Contract Information Access + +The red team can read: +- Contract plan and specifications (via contract files) +- Repository standards (CONTRIBUTING.md, .editorconfig, etc.) +- Task descriptions and plans + +**Existing endpoints used:** +- `GET /api/v1/contracts/{id}` - Contract details +- `GET /api/v1/contracts/{id}/files` - Contract files +- `GET /api/v1/files/{id}` - File content + +### 5.4 Repository File Access + +For repository standards, the red team uses the existing daemon file read capability: + +```bash +# Via makima CLI (from within the red team task) +makima supervisor read-file <task_id> "CONTRIBUTING.md" +makima supervisor read-file <task_id> ".editorconfig" +makima supervisor read-file <task_id> "rustfmt.toml" +``` + +Or direct filesystem access if the red team has a read-only worktree clone. + +--- + +## 6. System Prompt for Red Team Task + +The red team task receives a specialized system prompt that guides its behavior: + +```markdown +# Red Team Monitor + +You are an adversarial quality reviewer for a software development contract. Your role is to monitor work task outputs in real-time and flag potential issues BEFORE they compound into larger problems. + +## Your Mission + +Monitor all task outputs and verify: +1. **Plan Adherence**: Are tasks following the implementation plan? +2. **Code Quality**: Does the code meet repository standards? +3. **Contract Requirements**: Does the implementation match the specification? +4. **Best Practices**: Are there obvious anti-patterns or issues? + +## Access Available + +You have read-only access to: +- Task outputs (streamed in real-time) +- Task diffs (code changes) +- Contract specifications and plan documents +- Repository configuration files (CONTRIBUTING.md, linting configs, etc.) + +## How to Monitor + +1. **Subscribe to task outputs**: You'll receive outputs from all work tasks +2. **Analyze code changes**: Request diffs for completed tasks +3. **Cross-reference**: Compare outputs against the plan and specifications +4. **Report issues**: Use `makima red-team notify` when you detect problems + +## When to Notify + +NOTIFY the supervisor when you observe: +- **Critical**: Security vulnerabilities, data loss risks, breaking changes +- **High**: Significant deviations from the plan, major code quality issues +- **Medium**: Missing tests, suboptimal implementations, minor standard violations +- **Low**: Style inconsistencies, documentation gaps (use sparingly) + +## What NOT to Do + +- Do NOT nitpick minor style issues (that's what linters are for) +- Do NOT block progress for trivial concerns +- Do NOT write code or make changes yourself +- Do NOT notify for things that are already in progress and being addressed +- Do NOT create duplicate notifications for the same issue + +## Notification Format + +When notifying, always include: +1. A clear, concise description of the issue +2. The severity level (critical/high/medium/low) +3. The related task ID if applicable +4. The specific file or code location if known +5. Why this matters (reference to plan, spec, or standards) + +## Example Notification + +``` +makima red-team notify "Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document" \ + --severity critical \ + --task <task_id> \ + --file "src/auth/user.rs" \ + --context "Specification section 3.2 requires bcrypt hashing for all passwords" +``` + +## Custom Review Criteria + +{{#if red_team_prompt}} +Additional review criteria for this contract: +{{red_team_prompt}} +{{/if}} + +## Contract Context + +Contract: {{contract_name}} +Phase: {{contract_phase}} +Repository: {{repository_url}} + +Focus your monitoring on outputs that relate to the active work tasks. Prioritize issues that could affect the success of the contract or introduce technical debt. +``` + +--- + +## 7. API Changes Summary + +### 7.1 New Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/api/v1/mesh/red-team/notify` | Send notification from red team to supervisor | +| GET | `/api/v1/mesh/red-team/status` | Get red team task status for a contract | + +### 7.2 Modified Endpoints + +| Method | Path | Change | +|--------|------|--------| +| POST | `/api/v1/contracts` | Add `red_team_enabled` and `red_team_prompt` fields | +| GET | `/api/v1/contracts/{id}` | Include red team task info in response | + +### 7.3 New Request/Response Types + +**RedTeamNotifyRequest:** +```rust +#[derive(Debug, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamNotifyRequest { + pub message: String, + #[serde(default = "default_severity")] + pub severity: String, + pub related_task_id: Option<Uuid>, + pub file_path: Option<String>, + pub context: Option<String>, +} +``` + +**RedTeamNotifyResponse:** +```rust +#[derive(Debug, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamNotifyResponse { + pub notification_id: Uuid, + pub delivered: bool, + pub supervisor_task_id: Uuid, +} +``` + +**RedTeamStatusResponse:** +```rust +#[derive(Debug, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamStatusResponse { + pub contract_id: Uuid, + pub red_team_task_id: Option<Uuid>, + pub status: Option<String>, + pub notifications_sent: i32, + pub last_activity: Option<DateTime<Utc>>, +} +``` + +--- + +## 8. Database Schema Changes + +### 8.1 Contracts Table + +```sql +ALTER TABLE contracts +ADD COLUMN red_team_enabled BOOLEAN NOT NULL DEFAULT FALSE, +ADD COLUMN red_team_prompt TEXT; +``` + +### 8.2 Tasks Table + +```sql +ALTER TABLE tasks +ADD COLUMN is_red_team BOOLEAN NOT NULL DEFAULT FALSE; +``` + +### 8.3 Red Team Notifications Table (New) + +```sql +CREATE TABLE red_team_notifications ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + contract_id UUID NOT NULL REFERENCES contracts(id) ON DELETE CASCADE, + red_team_task_id UUID NOT NULL REFERENCES tasks(id) ON DELETE CASCADE, + related_task_id UUID REFERENCES tasks(id) ON DELETE SET NULL, + + message TEXT NOT NULL, + severity VARCHAR(20) NOT NULL DEFAULT 'medium', + file_path TEXT, + context TEXT, + + -- Delivery status + delivered BOOLEAN NOT NULL DEFAULT FALSE, + delivered_at TIMESTAMP WITH TIME ZONE, + acknowledged BOOLEAN NOT NULL DEFAULT FALSE, + acknowledged_at TIMESTAMP WITH TIME ZONE, + + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() +); + +-- Indexes +CREATE INDEX idx_red_team_notifications_contract_id ON red_team_notifications(contract_id); +CREATE INDEX idx_red_team_notifications_red_team_task_id ON red_team_notifications(red_team_task_id); +CREATE INDEX idx_red_team_notifications_created_at ON red_team_notifications(created_at DESC); +``` + +### 8.4 Index for Red Team Task Lookup + +```sql +CREATE INDEX idx_tasks_contract_red_team ON tasks(contract_id, is_red_team) +WHERE is_red_team = TRUE; +``` + +--- + +## 9. Implementation Phases + +### Phase 1: Foundation (MVP) +- [ ] Add `red_team_enabled` and `red_team_prompt` to Contract model +- [ ] Add `is_red_team` to Task model +- [ ] Database migrations +- [ ] Basic red team task spawning logic +- [ ] `makima red-team notify` CLI command +- [ ] Red team notification API endpoint + +### Phase 2: Monitoring Infrastructure +- [ ] Task output subscription for red team +- [ ] Diff access for red team tasks +- [ ] Red team system prompt generation +- [ ] Notification delivery to supervisor + +### Phase 3: Polish & UX +- [ ] Red team status in contract view +- [ ] Notification history and acknowledgment +- [ ] TUI integration for red team alerts +- [ ] Frontend display of red team notifications + +### Phase 4: Future Enhancements +- [ ] Configurable notification thresholds +- [ ] Automatic pause on critical issues +- [ ] Red team notification digest/summary +- [ ] Integration with external code review tools + +--- + +## 10. Security Considerations + +### 10.1 Access Control + +- Red team tasks MUST only have read access +- Verify `is_red_team` flag before allowing notification API calls +- Red team cannot spawn tasks or modify contract state +- Tool key scope should be limited for red team tasks + +### 10.2 Abuse Prevention + +- Rate limit red team notifications (max 10 per minute per task) +- Prevent notification spam with deduplication +- Log all red team activities for audit + +### 10.3 Isolation + +- Red team task runs in separate worktree (or no worktree) +- Cannot affect work task execution directly +- Supervisor controls whether to act on notifications + +--- + +## 11. Testing Strategy + +### 11.1 Unit Tests + +- Contract model serialization with red team fields +- Red team task spawning conditions +- Notification message formatting + +### 11.2 Integration Tests + +- Full contract lifecycle with red team enabled +- Notification delivery to supervisor +- Red team output subscription + +### 11.3 E2E Tests + +- Create contract with `--red-team` flag +- Red team detects intentional violation +- Supervisor receives and responds to notification + +--- + +## 12. Success Metrics + +1. **Detection Rate**: Percentage of issues caught by red team before task completion +2. **False Positive Rate**: Percentage of notifications that are dismissed as not actionable +3. **Response Time**: Time between red team detection and supervisor acknowledgment +4. **Contract Success Rate**: Compare success rates for contracts with/without red team + +--- + +## Appendix A: Message Protocol + +### Task Output Notification Structure + +The red team subscribes to `TaskOutputNotification`: + +```rust +pub struct TaskOutputNotification { + pub task_id: Uuid, + pub owner_id: Option<Uuid>, + pub message_type: String, // "assistant", "tool_use", "tool_result", etc. + pub content: String, + pub tool_name: Option<String>, + pub tool_input: Option<serde_json::Value>, + pub is_error: Option<bool>, + pub cost_usd: Option<f64>, + pub duration_ms: Option<u64>, + pub is_partial: bool, +} +``` + +### Daemon Command for Supervisor Message + +```rust +DaemonCommand::SendMessage { + task_id: supervisor_id, + message: formatted_red_team_alert, +} +``` + +--- + +## Appendix B: Configuration Examples + +### Contract Creation with Red Team (API) + +```json +POST /api/v1/contracts +{ + "name": "Implement User Authentication", + "description": "Add OAuth2 authentication flow", + "contract_type": "specification", + "red_team_enabled": true, + "red_team_prompt": "Pay special attention to security best practices and OWASP guidelines. Flag any hardcoded secrets or insecure token handling." +} +``` + +### Contract Creation with Red Team (CLI) + +```bash +makima contract create \ + --type specification \ + --red-team \ + --red-team-prompt "Focus on API backwards compatibility and deprecation handling" \ + "API v2 Migration" \ + "Migrate public API from v1 to v2" +``` diff --git a/makima/migrations/20260126010000_add_red_team_to_contracts.sql b/makima/migrations/20260126010000_add_red_team_to_contracts.sql new file mode 100644 index 0000000..742902e --- /dev/null +++ b/makima/migrations/20260126010000_add_red_team_to_contracts.sql @@ -0,0 +1,7 @@ +-- Add red team configuration to contracts +ALTER TABLE contracts +ADD COLUMN red_team_enabled BOOLEAN NOT NULL DEFAULT FALSE, +ADD COLUMN red_team_prompt TEXT; + +COMMENT ON COLUMN contracts.red_team_enabled IS 'Whether to spawn a red team task to monitor work tasks'; +COMMENT ON COLUMN contracts.red_team_prompt IS 'Custom criteria for the red team to evaluate'; diff --git a/makima/migrations/20260126010001_add_red_team_to_tasks.sql b/makima/migrations/20260126010001_add_red_team_to_tasks.sql new file mode 100644 index 0000000..cb21405 --- /dev/null +++ b/makima/migrations/20260126010001_add_red_team_to_tasks.sql @@ -0,0 +1,9 @@ +-- Add red team flag to tasks +ALTER TABLE tasks +ADD COLUMN is_red_team BOOLEAN NOT NULL DEFAULT FALSE; + +-- Index for efficient red team task lookup per contract +CREATE INDEX idx_tasks_contract_red_team ON tasks(contract_id, is_red_team) +WHERE is_red_team = TRUE; + +COMMENT ON COLUMN tasks.is_red_team IS 'Whether this is a red team monitoring task'; diff --git a/makima/migrations/20260126010002_create_red_team_notifications.sql b/makima/migrations/20260126010002_create_red_team_notifications.sql new file mode 100644 index 0000000..fc0b687 --- /dev/null +++ b/makima/migrations/20260126010002_create_red_team_notifications.sql @@ -0,0 +1,27 @@ +-- Create red team notifications table +CREATE TABLE red_team_notifications ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + contract_id UUID NOT NULL REFERENCES contracts(id) ON DELETE CASCADE, + red_team_task_id UUID NOT NULL REFERENCES tasks(id) ON DELETE CASCADE, + related_task_id UUID REFERENCES tasks(id) ON DELETE SET NULL, + + message TEXT NOT NULL, + severity VARCHAR(20) NOT NULL DEFAULT 'medium', + file_path TEXT, + context TEXT, + + -- Delivery status + delivered BOOLEAN NOT NULL DEFAULT FALSE, + delivered_at TIMESTAMPTZ, + acknowledged BOOLEAN NOT NULL DEFAULT FALSE, + acknowledged_at TIMESTAMPTZ, + + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- Indexes +CREATE INDEX idx_red_team_notifications_contract_id ON red_team_notifications(contract_id); +CREATE INDEX idx_red_team_notifications_red_team_task_id ON red_team_notifications(red_team_task_id); +CREATE INDEX idx_red_team_notifications_created_at ON red_team_notifications(created_at DESC); + +COMMENT ON TABLE red_team_notifications IS 'Audit log of notifications sent from red team tasks to supervisors'; diff --git a/makima/src/bin/makima.rs b/makima/src/bin/makima.rs index 6ddecab..6976106 100644 --- a/makima/src/bin/makima.rs +++ b/makima/src/bin/makima.rs @@ -1096,6 +1096,9 @@ async fn run_tui_loop( initial_phase: None, autonomous_loop: None, phase_guard: None, + local_only: None, + red_team_enabled: None, + red_team_prompt: None, }; match client.create_contract(req).await { diff --git a/makima/src/daemon/api/contract.rs b/makima/src/daemon/api/contract.rs index 12ebe95..445d676 100644 --- a/makima/src/daemon/api/contract.rs +++ b/makima/src/daemon/api/contract.rs @@ -66,6 +66,12 @@ pub struct CreateContractRequest { pub autonomous_loop: Option<bool>, #[serde(skip_serializing_if = "Option::is_none")] pub phase_guard: Option<bool>, + #[serde(skip_serializing_if = "Option::is_none")] + pub local_only: Option<bool>, + #[serde(skip_serializing_if = "Option::is_none")] + pub red_team_enabled: Option<bool>, + #[serde(skip_serializing_if = "Option::is_none")] + pub red_team_prompt: Option<String>, } impl ApiClient { diff --git a/makima/src/db/models.rs b/makima/src/db/models.rs index 9c2d072..d5f2814 100644 --- a/makima/src/db/models.rs +++ b/makima/src/db/models.rs @@ -440,6 +440,11 @@ pub struct Task { /// True for contract supervisor tasks. Only supervisors can spawn new tasks. #[serde(default)] pub is_supervisor: bool, + /// Whether this is a red team monitoring task. + /// Red team tasks monitor work task outputs and can notify + /// the supervisor about potential issues. + #[serde(default)] + pub is_red_team: bool, // Daemon/container info pub daemon_id: Option<Uuid>, @@ -570,6 +575,9 @@ pub struct TaskSummary { /// True for contract supervisor tasks #[serde(default)] pub is_supervisor: bool, + /// True for red team tasks that monitor and review other tasks' work + #[serde(default)] + pub is_red_team: bool, /// Whether this task is hidden from the UI (user dismissed it) #[serde(default)] pub hidden: bool, @@ -595,6 +603,7 @@ impl From<Task> for TaskSummary { subtask_count: 0, // Would need separate query version: task.version, is_supervisor: task.is_supervisor, + is_red_team: task.is_red_team, hidden: task.hidden, created_at: task.created_at, updated_at: task.updated_at, @@ -627,6 +636,9 @@ pub struct CreateTaskRequest { /// True for contract supervisor tasks. Only supervisors can spawn new tasks. #[serde(default)] pub is_supervisor: bool, + /// True for red team tasks that monitor and review other tasks' work. + #[serde(default)] + pub is_red_team: bool, /// Priority (higher = more urgent) #[serde(default)] pub priority: i32, @@ -1331,6 +1343,15 @@ pub struct Contract { /// allowing users to manually handle code changes via patch files or other means. #[serde(default)] pub local_only: bool, + /// Whether to spawn a red team task to monitor work tasks. + /// When enabled, a parallel task monitors outputs and can alert + /// the supervisor about potential issues. + #[serde(default)] + pub red_team_enabled: bool, + /// Optional custom prompt/criteria for the red team to use + /// when evaluating task outputs. + #[serde(skip_serializing_if = "Option::is_none")] + pub red_team_prompt: Option<String>, pub version: i32, pub created_at: DateTime<Utc>, pub updated_at: DateTime<Utc>, @@ -1508,6 +1529,15 @@ pub struct CreateContractRequest { /// allowing users to manually handle code changes via patch files or other means. #[serde(default)] pub local_only: Option<bool>, + /// Enable red team monitoring for this contract. + /// When enabled, a parallel task monitors work task outputs + /// and can alert the supervisor about potential issues. + #[serde(default)] + pub red_team_enabled: Option<bool>, + /// Optional custom criteria for the red team to evaluate. + /// Examples: "Focus on security vulnerabilities", + /// "Ensure all functions have tests", etc. + pub red_team_prompt: Option<String>, } /// Request payload for updating a contract @@ -2074,3 +2104,64 @@ pub struct CheckpointPatchInfo { pub created_at: DateTime<Utc>, pub expires_at: DateTime<Utc>, } + +// ============================================================================ +// Red Team Types +// ============================================================================ + +/// Red Team notification record +#[derive(Debug, Clone, FromRow, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamNotification { + pub id: Uuid, + pub contract_id: Uuid, + pub red_team_task_id: Uuid, + pub related_task_id: Option<Uuid>, + + pub message: String, + pub severity: String, + pub file_path: Option<String>, + pub context: Option<String>, + + pub delivered: bool, + pub delivered_at: Option<DateTime<Utc>>, + pub acknowledged: bool, + pub acknowledged_at: Option<DateTime<Utc>>, + + pub created_at: DateTime<Utc>, +} + +/// Severity levels for red team notifications +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum NotificationSeverity { + Low, + Medium, + High, + Critical, +} + +impl std::fmt::Display for NotificationSeverity { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Low => write!(f, "low"), + Self::Medium => write!(f, "medium"), + Self::High => write!(f, "high"), + Self::Critical => write!(f, "critical"), + } + } +} + +impl std::str::FromStr for NotificationSeverity { + type Err = String; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + match s.to_lowercase().as_str() { + "low" => Ok(Self::Low), + "medium" => Ok(Self::Medium), + "high" => Ok(Self::High), + "critical" => Ok(Self::Critical), + _ => Err(format!("Invalid severity: {}", s)), + } + } +} diff --git a/makima/src/db/repository.rs b/makima/src/db/repository.rs index 6d6642b..de1712d 100644 --- a/makima/src/db/repository.rs +++ b/makima/src/db/repository.rs @@ -11,8 +11,8 @@ use super::models::{ ConversationMessage, ConversationSnapshot, CreateContractRequest, CreateFileRequest, CreateTaskRequest, Daemon, DaemonTaskAssignment, DaemonWithCapacity, File, FileSummary, FileVersion, HistoryEvent, HistoryQueryFilters, MeshChatConversation, MeshChatMessageRecord, - SupervisorState, Task, TaskCheckpoint, TaskEvent, TaskSummary, UpdateContractRequest, - UpdateFileRequest, UpdateTaskRequest, + RedTeamNotification, SupervisorState, Task, TaskCheckpoint, TaskEvent, TaskSummary, + UpdateContractRequest, UpdateFileRequest, UpdateTaskRequest, }; /// Repository error types. @@ -689,11 +689,11 @@ pub async fn create_task(pool: &PgPool, req: CreateTaskRequest) -> Result<Task, r#" INSERT INTO tasks ( contract_id, parent_task_id, depth, name, description, plan, priority, - is_supervisor, repository_url, base_branch, target_branch, merge_mode, + is_supervisor, is_red_team, repository_url, base_branch, target_branch, merge_mode, target_repo_path, completion_action, continue_from_task_id, copy_files, branched_from_task_id, conversation_state ) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19) RETURNING * "#, ) @@ -705,6 +705,7 @@ pub async fn create_task(pool: &PgPool, req: CreateTaskRequest) -> Result<Task, .bind(&req.plan) .bind(req.priority) .bind(req.is_supervisor) + .bind(req.is_red_team) .bind(&repo_url) .bind(&base_branch) .bind(&target_branch) @@ -744,7 +745,8 @@ pub async fn list_tasks(pool: &PgPool) -> Result<Vec<TaskSummary>, sqlx::Error> t.parent_task_id, t.depth, t.name, t.status, t.priority, t.progress_summary, (SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count, - t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at + t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team, + COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at FROM tasks t LEFT JOIN contracts c ON t.contract_id = c.id WHERE t.parent_task_id IS NULL AND COALESCE(t.hidden, false) = false @@ -765,7 +767,8 @@ pub async fn list_subtasks(pool: &PgPool, parent_id: Uuid) -> Result<Vec<TaskSum t.parent_task_id, t.depth, t.name, t.status, t.priority, t.progress_summary, (SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count, - t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at + t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team, + COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at FROM tasks t LEFT JOIN contracts c ON t.contract_id = c.id WHERE t.parent_task_id = $1 @@ -1100,11 +1103,11 @@ pub async fn create_task_for_owner( r#" INSERT INTO tasks ( owner_id, contract_id, parent_task_id, depth, name, description, plan, priority, - is_supervisor, repository_url, base_branch, target_branch, merge_mode, + is_supervisor, is_red_team, repository_url, base_branch, target_branch, merge_mode, target_repo_path, completion_action, continue_from_task_id, copy_files, branched_from_task_id, conversation_state ) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20) RETURNING * "#, ) @@ -1117,6 +1120,7 @@ pub async fn create_task_for_owner( .bind(&req.plan) .bind(req.priority) .bind(req.is_supervisor) + .bind(req.is_red_team) .bind(&repo_url) .bind(&base_branch) .bind(&target_branch) @@ -1164,7 +1168,8 @@ pub async fn list_tasks_for_owner( t.parent_task_id, t.depth, t.name, t.status, t.priority, t.progress_summary, (SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count, - t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at + t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team, + COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at FROM tasks t LEFT JOIN contracts c ON t.contract_id = c.id WHERE t.owner_id = $1 AND t.parent_task_id IS NULL AND COALESCE(t.hidden, false) = false @@ -1190,7 +1195,8 @@ pub async fn list_subtasks_for_owner( t.parent_task_id, t.depth, t.name, t.status, t.priority, t.progress_summary, (SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count, - t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at + t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team, + COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at FROM tasks t LEFT JOIN contracts c ON t.contract_id = c.id WHERE t.owner_id = $1 AND t.parent_task_id = $2 @@ -1711,7 +1717,8 @@ pub async fn list_sibling_tasks( t.parent_task_id, t.depth, t.name, t.status, t.priority, t.progress_summary, (SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count, - t.version, t.is_supervisor, t.created_at, t.updated_at + t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team, + COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at FROM tasks t LEFT JOIN contracts c ON t.contract_id = c.id WHERE t.parent_task_id = $1 AND t.id != $2 @@ -1733,7 +1740,8 @@ pub async fn list_sibling_tasks( t.parent_task_id, t.depth, t.name, t.status, t.priority, t.progress_summary, (SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count, - t.version, t.is_supervisor, t.created_at, t.updated_at + t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team, + COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at FROM tasks t LEFT JOIN contracts c ON t.contract_id = c.id WHERE t.parent_task_id IS NULL AND t.id != $1 @@ -2716,7 +2724,8 @@ pub async fn list_tasks_in_contract( t.parent_task_id, t.depth, t.name, t.status, t.priority, t.progress_summary, (SELECT COUNT(*) FROM tasks WHERE parent_task_id = t.id) as subtask_count, - t.version, t.is_supervisor, COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at + t.version, t.is_supervisor, COALESCE(t.is_red_team, false) as is_red_team, + COALESCE(t.hidden, false) as hidden, t.created_at, t.updated_at FROM tasks t LEFT JOIN contracts c ON t.contract_id = c.id WHERE t.contract_id = $1 AND t.owner_id = $2 @@ -3906,3 +3915,93 @@ pub async fn delete_checkpoint_patches_for_task( .await?; Ok(result.rows_affected() as i64) } + +// ============================================================================= +// Red Team Notifications +// ============================================================================= + +/// Create a red team notification. +/// Red team tasks use this to report issues found during implementation review. +pub async fn create_red_team_notification( + pool: &PgPool, + contract_id: Uuid, + red_team_task_id: Uuid, + message: &str, + severity: &str, + related_task_id: Option<Uuid>, + file_path: Option<&str>, + context: Option<&str>, +) -> Result<RedTeamNotification, RepositoryError> { + sqlx::query_as::<_, RedTeamNotification>( + r#" + INSERT INTO red_team_notifications + (contract_id, red_team_task_id, related_task_id, message, severity, file_path, context) + VALUES ($1, $2, $3, $4, $5, $6, $7) + RETURNING * + "#, + ) + .bind(contract_id) + .bind(red_team_task_id) + .bind(related_task_id) + .bind(message) + .bind(severity) + .bind(file_path) + .bind(context) + .fetch_one(pool) + .await + .map_err(RepositoryError::Database) +} + +/// Mark a notification as delivered to the supervisor. +pub async fn mark_notification_delivered( + pool: &PgPool, + notification_id: Uuid, +) -> Result<RedTeamNotification, RepositoryError> { + sqlx::query_as::<_, RedTeamNotification>( + r#" + UPDATE red_team_notifications + SET delivered = TRUE, delivered_at = NOW() + WHERE id = $1 + RETURNING * + "#, + ) + .bind(notification_id) + .fetch_one(pool) + .await + .map_err(RepositoryError::Database) +} + +/// Get the red team task for a contract (if one exists). +/// Returns the most recently created red team task for the contract. +pub async fn get_red_team_task_for_contract( + pool: &PgPool, + contract_id: Uuid, +) -> Result<Option<Task>, RepositoryError> { + sqlx::query_as::<_, Task>( + r#" + SELECT * FROM tasks + WHERE contract_id = $1 AND is_red_team = TRUE + ORDER BY created_at DESC + LIMIT 1 + "#, + ) + .bind(contract_id) + .fetch_optional(pool) + .await + .map_err(RepositoryError::Database) +} + +/// Get the count of notifications for a red team task. +pub async fn get_notification_count_for_task( + pool: &PgPool, + red_team_task_id: Uuid, +) -> Result<i64, RepositoryError> { + let result: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM red_team_notifications WHERE red_team_task_id = $1", + ) + .bind(red_team_task_id) + .fetch_one(pool) + .await + .map_err(RepositoryError::Database)?; + Ok(result.0) +} diff --git a/makima/src/server/handlers/contract_chat.rs b/makima/src/server/handlers/contract_chat.rs index e6ee8d4..dac806a 100644 --- a/makima/src/server/handlers/contract_chat.rs +++ b/makima/src/server/handlers/contract_chat.rs @@ -1362,6 +1362,7 @@ async fn handle_contract_request( continue_from_task_id: None, copy_files: None, is_supervisor: false, + is_red_team: false, checkpoint_sha: None, branched_from_task_id: None, conversation_history: None, @@ -1458,6 +1459,7 @@ async fn handle_contract_request( continue_from_task_id: None, copy_files: None, is_supervisor: false, + is_red_team: false, checkpoint_sha: None, branched_from_task_id: None, conversation_history: None, @@ -2190,6 +2192,7 @@ async fn handle_contract_request( continue_from_task_id: previous_task_id, copy_files: None, is_supervisor: false, + is_red_team: false, checkpoint_sha: None, branched_from_task_id: None, conversation_history: None, @@ -2586,6 +2589,8 @@ async fn handle_contract_request( autonomous_loop: None, phase_guard: None, local_only: None, + red_team_enabled: None, + red_team_prompt: None, }; let contract = match repository::create_contract_for_owner(pool, owner_id, contract_req).await { @@ -2707,6 +2712,7 @@ async fn handle_contract_request( continue_from_task_id: None, copy_files: None, is_supervisor: false, + is_red_team: false, checkpoint_sha: None, branched_from_task_id: None, conversation_history: None, diff --git a/makima/src/server/handlers/contracts.rs b/makima/src/server/handlers/contracts.rs index 3498063..3ad38da 100644 --- a/makima/src/server/handlers/contracts.rs +++ b/makima/src/server/handlers/contracts.rs @@ -295,6 +295,7 @@ pub async fn create_contract( continue_from_task_id: None, copy_files: None, is_supervisor: true, + is_red_team: false, checkpoint_sha: None, priority: 0, merge_mode: None, diff --git a/makima/src/server/handlers/mesh.rs b/makima/src/server/handlers/mesh.rs index 19958e7..c4d862c 100644 --- a/makima/src/server/handlers/mesh.rs +++ b/makima/src/server/handlers/mesh.rs @@ -2239,6 +2239,7 @@ pub async fn reassign_task( plan: updated_plan.clone(), parent_task_id: task.parent_task_id, is_supervisor: task.is_supervisor, + is_red_team: task.is_red_team, priority: task.priority, repository_url: task.repository_url.clone(), base_branch: task.base_branch.clone(), @@ -3010,6 +3011,7 @@ pub async fn fork_task( plan: req.new_task_plan.clone(), parent_task_id: None, // Forked tasks are independent is_supervisor: false, + is_red_team: false, priority: task.priority, repository_url: task.repository_url.clone(), base_branch: task.base_branch.clone(), @@ -3167,6 +3169,7 @@ pub async fn resume_from_checkpoint( plan: req.plan, parent_task_id: None, is_supervisor: false, + is_red_team: false, priority: task.priority, repository_url: task.repository_url.clone(), base_branch: task.base_branch.clone(), @@ -3502,6 +3505,7 @@ pub async fn branch_task( plan: req.message, parent_task_id: None, is_supervisor: false, + is_red_team: false, priority: source_task.priority, repository_url: source_task.repository_url.clone(), base_branch: source_task.base_branch.clone(), diff --git a/makima/src/server/handlers/mesh_chat.rs b/makima/src/server/handlers/mesh_chat.rs index eb35728..ed6cfc0 100644 --- a/makima/src/server/handlers/mesh_chat.rs +++ b/makima/src/server/handlers/mesh_chat.rs @@ -1017,6 +1017,7 @@ async fn handle_mesh_request( continue_from_task_id: None, copy_files: None, is_supervisor: false, + is_red_team: false, checkpoint_sha: None, branched_from_task_id: None, conversation_history: None, diff --git a/makima/src/server/handlers/mesh_red_team.rs b/makima/src/server/handlers/mesh_red_team.rs new file mode 100644 index 0000000..c5af60e --- /dev/null +++ b/makima/src/server/handlers/mesh_red_team.rs @@ -0,0 +1,497 @@ +//! HTTP handlers for red team mesh operations. +//! +//! These endpoints are used by red team tasks (via the makima CLI) to notify +//! supervisors of potential issues and query their own status. + +use axum::{ + extract::State, + http::{HeaderMap, StatusCode}, + response::IntoResponse, + Json, +}; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; +use uuid::Uuid; + +use crate::db::repository; +use crate::server::handlers::mesh::{extract_auth, AuthSource}; +use crate::server::messages::ApiError; +use crate::server::state::{DaemonCommand, SharedState}; + +// ============================================================================= +// Request/Response Types +// ============================================================================= + +/// Severity level for red team notifications. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "lowercase")] +pub enum RedTeamSeverity { + /// Informational notice - minor issue or suggestion + Info, + /// Warning - potential problem that should be reviewed + Warning, + /// Critical - serious issue requiring immediate attention + Critical, +} + +impl Default for RedTeamSeverity { + fn default() -> Self { + Self::Warning + } +} + +impl std::fmt::Display for RedTeamSeverity { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Info => write!(f, "INFO"), + Self::Warning => write!(f, "WARNING"), + Self::Critical => write!(f, "CRITICAL"), + } + } +} + +/// Request to notify the supervisor of a potential issue. +#[derive(Debug, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamNotifyRequest { + /// The issue description/message to send to the supervisor + pub message: String, + /// Severity level of the issue + #[serde(default)] + pub severity: RedTeamSeverity, + /// ID of the task being reviewed (optional - if not provided, assumes general contract concern) + pub related_task_id: Option<Uuid>, + /// File path related to the issue (optional) + pub file_path: Option<String>, + /// Additional context about the issue + pub context: Option<String>, +} + +/// Response from the notify endpoint. +#[derive(Debug, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamNotifyResponse { + /// Unique ID for this notification + pub notification_id: Uuid, + /// Whether the notification was successfully delivered to the supervisor + pub delivered: bool, + /// The supervisor task ID that received the notification + pub supervisor_task_id: Option<Uuid>, +} + +/// Response from the status endpoint. +#[derive(Debug, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamStatusResponse { + /// Contract ID being monitored + pub contract_id: Uuid, + /// Red team task ID + pub red_team_task_id: Uuid, + /// Current task status + pub status: String, + /// Number of notifications sent so far + pub notifications_sent: i64, +} + +/// Red team notification record stored in database. +#[derive(Debug, Clone, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct RedTeamNotification { + pub id: Uuid, + pub red_team_task_id: Uuid, + pub contract_id: Uuid, + pub message: String, + pub severity: String, + pub related_task_id: Option<Uuid>, + pub file_path: Option<String>, + pub context: Option<String>, + pub delivered: bool, + pub created_at: chrono::DateTime<chrono::Utc>, +} + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/// Verify the request comes from a red team task and extract ownership info. +/// +/// Returns (task_id, owner_id, contract_id) on success. +async fn verify_red_team_auth( + state: &SharedState, + headers: &HeaderMap, +) -> Result<(Uuid, Uuid, Uuid), (StatusCode, Json<ApiError>)> { + let auth = extract_auth(state, headers); + + let task_id = match auth { + AuthSource::ToolKey(task_id) => task_id, + _ => { + return Err(( + StatusCode::UNAUTHORIZED, + Json(ApiError::new( + "UNAUTHORIZED", + "Red team endpoints require tool key auth", + )), + )); + } + }; + + // Get the task to verify it's a red team task and get owner_id + let pool = state.db_pool.as_ref().ok_or_else(|| { + ( + StatusCode::SERVICE_UNAVAILABLE, + Json(ApiError::new("DB_UNAVAILABLE", "Database not configured")), + ) + })?; + + let task = repository::get_task(pool, task_id) + .await + .map_err(|e| { + tracing::error!(error = %e, "Failed to get red team task"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiError::new("DB_ERROR", "Failed to verify red team task")), + ) + })? + .ok_or_else(|| { + ( + StatusCode::NOT_FOUND, + Json(ApiError::new("NOT_FOUND", "Task not found")), + ) + })?; + + // Verify task is a red team task + // NOTE: This requires the is_red_team field to be added to the Task struct. + // For now, we check if the task name contains "red-team" or "red_team" as a fallback. + let is_red_team = task.name.to_lowercase().contains("red-team") + || task.name.to_lowercase().contains("red_team") + || task.name.to_lowercase().contains("redteam"); + + if !is_red_team { + return Err(( + StatusCode::FORBIDDEN, + Json(ApiError::new( + "NOT_RED_TEAM", + "Only red team tasks can use these endpoints", + )), + )); + } + + // Red team tasks must be associated with a contract + let contract_id = task.contract_id.ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(ApiError::new( + "NO_CONTRACT", + "Red team task must be associated with a contract", + )), + ) + })?; + + Ok((task_id, task.owner_id, contract_id)) +} + +/// Format an alert message for the supervisor. +/// +/// Creates a formatted alert with clear visual markers to grab attention. +fn format_alert_message( + severity: &RedTeamSeverity, + message: &str, + related_task_id: Option<Uuid>, + file_path: Option<&str>, + context: Option<&str>, +) -> String { + let severity_marker = match severity { + RedTeamSeverity::Info => "ℹ️", + RedTeamSeverity::Warning => "⚠️", + RedTeamSeverity::Critical => "🚨", + }; + + let border = match severity { + RedTeamSeverity::Info => "─".repeat(60), + RedTeamSeverity::Warning => "━".repeat(60), + RedTeamSeverity::Critical => "═".repeat(60), + }; + + let mut alert = format!( + r#" +{} +{} [RED TEAM ALERT] - {} +{} + +Issue: {} +"#, + border, severity_marker, severity, border, message + ); + + if let Some(task_id) = related_task_id { + alert.push_str(&format!("\nRelated Task: {}\n", task_id)); + } + + if let Some(path) = file_path { + alert.push_str(&format!("File: {}\n", path)); + } + + if let Some(ctx) = context { + alert.push_str(&format!("\nContext:\n{}\n", ctx)); + } + + // Add action suggestions based on severity + let actions = match severity { + RedTeamSeverity::Info => { + "Suggested Actions:\n- Review when convenient\n- Consider if changes are needed" + } + RedTeamSeverity::Warning => { + "Suggested Actions:\n- Review the flagged item soon\n- Check if this deviates from the contract\n- Consider pausing related work until reviewed" + } + RedTeamSeverity::Critical => { + "Suggested Actions:\n- STOP related work immediately\n- Review the flagged item urgently\n- Verify compliance with contract requirements\n- Consider reverting recent changes if necessary" + } + }; + + alert.push_str(&format!("\n{}\n{}\n", actions, border)); + + alert +} + +// ============================================================================= +// Handlers +// ============================================================================= + +/// Notify the supervisor of a potential issue. +/// +/// POST /api/v1/mesh/red-team/notify +/// +/// This endpoint allows red team tasks to alert supervisors about issues they've +/// identified during code review. The notification is sent as a message to the +/// supervisor task. +#[utoipa::path( + post, + path = "/api/v1/mesh/red-team/notify", + request_body = RedTeamNotifyRequest, + responses( + (status = 200, description = "Notification sent", body = RedTeamNotifyResponse), + (status = 401, description = "Unauthorized - tool key required"), + (status = 403, description = "Forbidden - not a red team task"), + (status = 404, description = "Task not found"), + (status = 503, description = "Database not available"), + (status = 500, description = "Internal server error"), + ), + security( + ("tool_key" = []) + ), + tag = "Mesh Red Team" +)] +pub async fn notify_supervisor( + State(state): State<SharedState>, + headers: HeaderMap, + Json(request): Json<RedTeamNotifyRequest>, +) -> impl IntoResponse { + let (red_team_task_id, owner_id, contract_id) = + match verify_red_team_auth(&state, &headers).await { + Ok(ids) => ids, + Err(e) => return e.into_response(), + }; + + let pool = state.db_pool.as_ref().unwrap(); + + // Generate notification ID + let notification_id = Uuid::new_v4(); + + // Get the contract to find the supervisor task + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => { + return ( + StatusCode::NOT_FOUND, + Json(ApiError::new("NOT_FOUND", "Contract not found")), + ) + .into_response(); + } + Err(e) => { + tracing::error!(error = %e, "Failed to get contract"); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiError::new("DB_ERROR", "Failed to get contract")), + ) + .into_response(); + } + }; + + let supervisor_task_id = contract.supervisor_task_id; + + // Format the alert message + let alert_message = format_alert_message( + &request.severity, + &request.message, + request.related_task_id, + request.file_path.as_deref(), + request.context.as_deref(), + ); + + // Record the notification in the database as a history event + let event_data = serde_json::json!({ + "notification_id": notification_id.to_string(), + "red_team_task_id": red_team_task_id.to_string(), + "severity": request.severity.to_string(), + "message": request.message, + "related_task_id": request.related_task_id.map(|id| id.to_string()), + "file_path": request.file_path, + "context": request.context, + }); + + let _ = repository::record_history_event( + pool, + owner_id, + Some(contract_id), + Some(red_team_task_id), + "red_team_alert", + Some(&request.severity.to_string().to_lowercase()), + Some(&request.message), + event_data, + ) + .await; + + // Try to send the message to the supervisor + let mut delivered = false; + if let Some(sup_task_id) = supervisor_task_id { + // Get the supervisor task to find its daemon + if let Ok(Some(supervisor_task)) = repository::get_task(pool, sup_task_id).await { + if let Some(daemon_id) = supervisor_task.daemon_id { + // Send the alert message to the supervisor + let cmd = DaemonCommand::SendMessage { + task_id: sup_task_id, + message: alert_message.clone(), + }; + + if let Err(e) = state.send_daemon_command(daemon_id, cmd).await { + tracing::warn!( + error = %e, + supervisor_task_id = %sup_task_id, + daemon_id = %daemon_id, + "Failed to send red team alert to supervisor" + ); + } else { + delivered = true; + tracing::info!( + notification_id = %notification_id, + red_team_task_id = %red_team_task_id, + supervisor_task_id = %sup_task_id, + severity = %request.severity, + "Red team alert delivered to supervisor" + ); + } + } else { + tracing::warn!( + supervisor_task_id = %sup_task_id, + "Supervisor task has no assigned daemon - alert not delivered" + ); + } + } + } else { + tracing::warn!( + contract_id = %contract_id, + "Contract has no supervisor task - alert not delivered" + ); + } + + ( + StatusCode::OK, + Json(RedTeamNotifyResponse { + notification_id, + delivered, + supervisor_task_id, + }), + ) + .into_response() +} + +/// Get the status of the red team task. +/// +/// GET /api/v1/mesh/red-team/status +/// +/// Returns information about the current red team task including the contract +/// being monitored and notification statistics. +#[utoipa::path( + get, + path = "/api/v1/mesh/red-team/status", + responses( + (status = 200, description = "Red team status", body = RedTeamStatusResponse), + (status = 401, description = "Unauthorized - tool key required"), + (status = 403, description = "Forbidden - not a red team task"), + (status = 404, description = "Task not found"), + (status = 503, description = "Database not available"), + (status = 500, description = "Internal server error"), + ), + security( + ("tool_key" = []) + ), + tag = "Mesh Red Team" +)] +pub async fn get_status( + State(state): State<SharedState>, + headers: HeaderMap, +) -> impl IntoResponse { + let (red_team_task_id, owner_id, contract_id) = + match verify_red_team_auth(&state, &headers).await { + Ok(ids) => ids, + Err(e) => return e.into_response(), + }; + + let pool = state.db_pool.as_ref().unwrap(); + + // Get the red team task status + let task = match repository::get_task(pool, red_team_task_id).await { + Ok(Some(t)) => t, + Ok(None) => { + return ( + StatusCode::NOT_FOUND, + Json(ApiError::new("NOT_FOUND", "Red team task not found")), + ) + .into_response(); + } + Err(e) => { + tracing::error!(error = %e, "Failed to get red team task"); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiError::new("DB_ERROR", "Failed to get task")), + ) + .into_response(); + } + }; + + // Count notifications sent by this red team task + // Query history_events for red_team_alert events from this task + let notifications_sent = match sqlx::query_scalar::<_, i64>( + r#" + SELECT COUNT(*) + FROM history_events + WHERE owner_id = $1 + AND contract_id = $2 + AND task_id = $3 + AND event_type = 'red_team_alert' + "#, + ) + .bind(owner_id) + .bind(contract_id) + .bind(red_team_task_id) + .fetch_one(pool) + .await + { + Ok(count) => count, + Err(e) => { + tracing::warn!(error = %e, "Failed to count red team notifications"); + 0 + } + }; + + ( + StatusCode::OK, + Json(RedTeamStatusResponse { + contract_id, + red_team_task_id, + status: task.status, + notifications_sent, + }), + ) + .into_response() +} diff --git a/makima/src/server/handlers/mesh_supervisor.rs b/makima/src/server/handlers/mesh_supervisor.rs index a654a05..6d9f8fb 100644 --- a/makima/src/server/handlers/mesh_supervisor.rs +++ b/makima/src/server/handlers/mesh_supervisor.rs @@ -610,6 +610,7 @@ pub async fn spawn_task( contract_id: Some(request.contract_id), parent_task_id: request.parent_task_id, is_supervisor: false, + is_red_team: false, checkpoint_sha: request.checkpoint_sha.clone(), merge_mode: Some("manual".to_string()), priority: 0, @@ -742,6 +743,67 @@ pub async fn spawn_task( updated_fields: vec!["status".to_string(), "daemon_id".to_string()], updated_by: "supervisor".to_string(), }); + + // Check if we should spawn a red team task + // Conditions: + // 1. This is not a supervisor task + // 2. This is not already a red team task + // 3. Contract has red_team_enabled = true + // 4. No red team task exists for this contract yet + if !updated_task.is_supervisor && !updated_task.is_red_team && contract.red_team_enabled { + if let Some(contract_id) = updated_task.contract_id { + // Check if a red team task already exists + match repository::get_red_team_task_for_contract(pool, contract_id).await { + Ok(None) => { + // No red team task exists, spawn one + tracing::info!( + contract_id = %contract_id, + work_task_id = %updated_task.id, + "Spawning red team task for contract (first work task started)" + ); + match spawn_red_team_task( + pool, + &state, + contract_id, + owner_id, + &contract.name, + &contract.phase, + contract.red_team_prompt.as_deref(), + ).await { + Ok(red_team_task) => { + tracing::info!( + contract_id = %contract_id, + red_team_task_id = %red_team_task.id, + "Red team task spawned successfully" + ); + } + Err(e) => { + // Log error but don't fail the work task spawn + tracing::error!( + contract_id = %contract_id, + error = %e, + "Failed to spawn red team task" + ); + } + } + } + Ok(Some(existing)) => { + tracing::debug!( + contract_id = %contract_id, + red_team_task_id = %existing.id, + "Red team task already exists for contract" + ); + } + Err(e) => { + tracing::error!( + contract_id = %contract_id, + error = %e, + "Error checking for existing red team task" + ); + } + } + } + } } break; } @@ -2396,3 +2458,233 @@ pub async fn rewind_conversation( }) .into_response() } + +// ============================================================================= +// Red Team Task Spawning +// ============================================================================= + +/// Generate the system prompt/plan for a red team task. +/// +/// This creates detailed instructions for the red team monitor, including +/// what to look for, severity levels, and how to report issues. +pub fn generate_red_team_plan( + contract_name: &str, + contract_phase: &str, + custom_prompt: Option<&str>, +) -> String { + let custom_criteria = if let Some(prompt) = custom_prompt { + format!( + r#" + +## Custom Review Criteria + +The contract owner has specified additional review criteria: +{} +"#, + prompt + ) + } else { + String::new() + }; + + format!( + r#"# Red Team Monitor + +You are an adversarial quality reviewer for a software development contract. Your role is to monitor work task outputs in real-time and flag potential issues BEFORE they compound into larger problems. + +## Your Mission + +Monitor all task outputs and verify: +1. **Plan Adherence**: Are tasks following the implementation plan? +2. **Code Quality**: Does the code meet repository standards? +3. **Contract Requirements**: Does the implementation match the specification? +4. **Best Practices**: Are there obvious anti-patterns or issues? + +## Access Available + +You have read-only access to: +- Task outputs (streamed in real-time) +- Task diffs (code changes) +- Contract specifications and plan documents +- Repository configuration files (CONTRIBUTING.md, linting configs, etc.) + +## How to Monitor + +1. **Subscribe to task outputs**: You'll receive outputs from all work tasks +2. **Analyze code changes**: Request diffs for completed tasks +3. **Cross-reference**: Compare outputs against the plan and specifications +4. **Report issues**: Use `makima red-team notify` when you detect problems + +## When to Notify + +NOTIFY the supervisor when you observe: +- **Critical**: Security vulnerabilities, data loss risks, breaking changes +- **High**: Significant deviations from the plan, major code quality issues +- **Medium**: Missing tests, suboptimal implementations, minor standard violations +- **Low**: Style inconsistencies, documentation gaps (use sparingly) + +## What NOT to Do + +- Do NOT nitpick minor style issues (that's what linters are for) +- Do NOT block progress for trivial concerns +- Do NOT write code or make changes yourself +- Do NOT notify for things that are already in progress and being addressed +- Do NOT create duplicate notifications for the same issue + +## Notification Format + +When notifying, always include: +1. A clear, concise description of the issue +2. The severity level (critical/high/medium/low) +3. The related task ID if applicable +4. The specific file or code location if known +5. Why this matters (reference to plan, spec, or standards) + +## Example Notification + +``` +makima red-team notify "Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document" \ + --severity critical \ + --task <task_id> \ + --file "src/auth/user.rs" \ + --context "Specification section 3.2 requires bcrypt hashing for all passwords" +``` +{} +## Contract Context + +Contract: {} +Phase: {} + +Focus your monitoring on outputs that relate to the active work tasks. Prioritize issues that could affect the success of the contract or introduce technical debt. +"#, + custom_criteria, contract_name, contract_phase + ) +} + +/// Spawn a red team task for a contract. +/// +/// This creates a red team monitor task that will observe work task outputs +/// and can notify the supervisor about potential issues. +pub async fn spawn_red_team_task( + pool: &sqlx::PgPool, + state: &SharedState, + contract_id: Uuid, + owner_id: Uuid, + contract_name: &str, + contract_phase: &str, + red_team_prompt: Option<&str>, +) -> Result<Task, String> { + // Generate the red team plan/prompt + let plan = generate_red_team_plan(contract_name, contract_phase, red_team_prompt); + + // Create task request + let create_req = CreateTaskRequest { + name: "Red Team Monitor".to_string(), + description: Some("Adversarial review task monitoring work task outputs".to_string()), + plan, + contract_id: Some(contract_id), + parent_task_id: None, + is_supervisor: false, + is_red_team: true, + priority: 0, + repository_url: None, // Red team doesn't need a repo + base_branch: None, + target_branch: None, + merge_mode: None, + target_repo_path: None, + completion_action: None, + continue_from_task_id: None, + copy_files: None, + checkpoint_sha: None, + branched_from_task_id: None, + conversation_history: None, + }; + + // Create task in DB + let task = repository::create_task_for_owner(pool, owner_id, create_req) + .await + .map_err(|e| format!("Failed to create red team task: {}", e))?; + + tracing::info!( + contract_id = %contract_id, + red_team_task_id = %task.id, + "Created red team task for contract" + ); + + // Find a daemon to run the red team task + for entry in state.daemon_connections.iter() { + let daemon = entry.value(); + if daemon.owner_id == owner_id { + // Update task with daemon assignment + let update_req = UpdateTaskRequest { + status: Some("starting".to_string()), + daemon_id: Some(daemon.id), + version: Some(task.version), + ..Default::default() + }; + + match repository::update_task_for_owner(pool, task.id, owner_id, update_req).await { + Ok(Some(updated_task)) => { + // Send spawn command to daemon + let cmd = DaemonCommand::SpawnTask { + task_id: updated_task.id, + task_name: updated_task.name.clone(), + plan: updated_task.plan.clone(), + repo_url: None, // Red team doesn't need a repo + base_branch: None, + target_branch: None, + parent_task_id: None, + depth: 0, + is_orchestrator: false, + target_repo_path: None, + completion_action: None, + continue_from_task_id: None, + copy_files: None, + contract_id: Some(contract_id), + is_supervisor: false, + autonomous_loop: false, + resume_session: false, + conversation_history: None, + patch_data: None, + patch_base_sha: None, + local_only: true, // Red team is always local-only + }; + + if let Err(e) = state.send_daemon_command(daemon.id, cmd).await { + tracing::warn!( + error = %e, + daemon_id = %daemon.id, + red_team_task_id = %task.id, + "Failed to send red team spawn command" + ); + // Rollback + let rollback_req = UpdateTaskRequest { + status: Some("pending".to_string()), + clear_daemon_id: true, + ..Default::default() + }; + let _ = repository::update_task_for_owner(pool, task.id, owner_id, rollback_req).await; + } else { + tracing::info!( + red_team_task_id = %task.id, + daemon_id = %daemon.id, + "Red team task spawn command sent" + ); + return Ok(updated_task); + } + } + Ok(None) => { + tracing::warn!(red_team_task_id = %task.id, "Red team task not found when updating daemon_id"); + } + Err(e) => { + tracing::error!(red_team_task_id = %task.id, error = %e, "Failed to update red team task with daemon_id"); + } + } + break; + } + } + + // Return the task even if we couldn't start it on a daemon + // It will remain pending and can be started later + Ok(task) +} diff --git a/makima/src/server/handlers/mod.rs b/makima/src/server/handlers/mod.rs index 609b63b..b496922 100644 --- a/makima/src/server/handlers/mod.rs +++ b/makima/src/server/handlers/mod.rs @@ -13,6 +13,7 @@ pub mod mesh; pub mod mesh_chat; pub mod mesh_daemon; pub mod mesh_merge; +pub mod mesh_red_team; pub mod mesh_supervisor; pub mod mesh_ws; pub mod repository_history; diff --git a/makima/src/server/handlers/transcript_analysis.rs b/makima/src/server/handlers/transcript_analysis.rs index 8eb50c7..3c283da 100644 --- a/makima/src/server/handlers/transcript_analysis.rs +++ b/makima/src/server/handlers/transcript_analysis.rs @@ -279,6 +279,8 @@ pub async fn create_contract_from_analysis( autonomous_loop: None, phase_guard: None, local_only: None, + red_team_enabled: None, + red_team_prompt: None, }; let contract = match repository::create_contract_for_owner(pool, auth.owner_id, contract_req).await { @@ -358,6 +360,7 @@ pub async fn create_contract_from_analysis( continue_from_task_id: None, copy_files: None, is_supervisor: false, + is_red_team: false, checkpoint_sha: None, priority: match item.priority.as_deref() { Some("high") => 10, @@ -531,6 +534,7 @@ pub async fn update_contract_from_analysis( continue_from_task_id: None, copy_files: None, is_supervisor: false, + is_red_team: false, checkpoint_sha: None, priority: 0, merge_mode: None, diff --git a/makima/src/server/mod.rs b/makima/src/server/mod.rs index bf302a5..b002a49 100644 --- a/makima/src/server/mod.rs +++ b/makima/src/server/mod.rs @@ -18,7 +18,7 @@ use tower_http::trace::TraceLayer; use utoipa::OpenApi; use utoipa_swagger_ui::SwaggerUi; -use crate::server::handlers::{api_keys, chat, contract_chat, contract_daemon, contracts, file_ws, files, history, listen, mesh, mesh_chat, mesh_daemon, mesh_merge, mesh_supervisor, mesh_ws, repository_history, templates, transcript_analysis, users, versions}; +use crate::server::handlers::{api_keys, chat, contract_chat, contract_daemon, contracts, file_ws, files, history, listen, mesh, mesh_chat, mesh_daemon, mesh_merge, mesh_red_team, mesh_supervisor, mesh_ws, repository_history, templates, transcript_analysis, users, versions}; use crate::server::openapi::ApiDoc; use crate::server::state::SharedState; @@ -129,6 +129,9 @@ pub fn make_router(state: SharedState) -> Router { .route("/mesh/supervisor/questions", post(mesh_supervisor::ask_question)) .route("/mesh/questions", get(mesh_supervisor::list_pending_questions)) .route("/mesh/questions/{question_id}/answer", post(mesh_supervisor::answer_question)) + // Red team endpoints (for red team tasks to notify supervisors) + .route("/mesh/red-team/notify", post(mesh_red_team::notify_supervisor)) + .route("/mesh/red-team/status", get(mesh_red_team::get_status)) // Mesh WebSocket endpoints .route("/mesh/tasks/subscribe", get(mesh_ws::task_subscription_handler)) .route("/mesh/daemons/connect", get(mesh_daemon::daemon_handler)) |
