[WIP] Heartbeat checkpoint - 2026-01-31 22:46:47 UTC

author: soryu <soryu@soryu.co> 2026-01-31 22:46:47 +0000
committer: soryu <soryu@soryu.co> 2026-01-31 22:46:47 +0000
commit: c26889e42310bd0c3514f633a299a72d6ba2dc80 (patch)
tree: 7a05ccdfdf132c19b322b2d20a41692af713fc27
parent: dac1adb138f532245a36fa16524f1e4fb9990173 (diff)
download: soryu-c26889e42310bd0c3514f633a299a72d6ba2dc80.tar.gz
soryu-c26889e42310bd0c3514f633a299a72d6ba2dc80.zip
1 files changed, 273 insertions, 0 deletions
diff --git a/makima/src/llm/red_team_prompt.rs b/makima/src/llm/red_team_prompt.rs
new file mode 100644
index 0000000..1be5b0d
--- /dev/null
+++ b/makima/src/llm/red_team_prompt.rs
@@ -0,0 +1,273 @@
+//! Red team system prompt generation.
+//!
+//! This module provides functions to generate system prompts for red team tasks
+//! that monitor work task outputs and verify adherence to contract requirements.
+
+use serde::Serialize;
+
+/// Context for generating a red team system prompt.
+#[derive(Debug, Clone, Serialize)]
+pub struct RedTeamPromptContext {
+    /// Contract name being monitored
+    pub contract_name: String,
+    /// Current contract phase (usually "execute")
+    pub contract_phase: String,
+    /// Repository URL if available
+    pub repository_url: Option<String>,
+    /// Custom red team prompt/criteria provided by user
+    pub custom_prompt: Option<String>,
+    /// Contract specification summary if available
+    pub spec_summary: Option<String>,
+    /// Plan summary if available
+    pub plan_summary: Option<String>,
+}
+
+impl RedTeamPromptContext {
+    /// Create a new red team prompt context.
+    pub fn new(contract_name: String, contract_phase: String) -> Self {
+        Self {
+            contract_name,
+            contract_phase,
+            repository_url: None,
+            custom_prompt: None,
+            spec_summary: None,
+            plan_summary: None,
+        }
+    }
+
+    /// Set the repository URL.
+    pub fn with_repository_url(mut self, url: Option<String>) -> Self {
+        self.repository_url = url;
+        self
+    }
+
+    /// Set the custom review criteria.
+    pub fn with_custom_prompt(mut self, prompt: Option<String>) -> Self {
+        self.custom_prompt = prompt;
+        self
+    }
+
+    /// Set the specification summary.
+    pub fn with_spec_summary(mut self, summary: Option<String>) -> Self {
+        self.spec_summary = summary;
+        self
+    }
+
+    /// Set the plan summary.
+    pub fn with_plan_summary(mut self, summary: Option<String>) -> Self {
+        self.plan_summary = summary;
+        self
+    }
+}
+
+/// Generate the red team system prompt based on the provided context.
+///
+/// This prompt guides the red team task in monitoring work task outputs
+/// and identifying potential issues before they compound.
+pub fn generate_red_team_system_prompt(context: &RedTeamPromptContext) -> String {
+    let mut prompt = String::new();
+
+    // Header
+    prompt.push_str("# Red Team Monitor\n\n");
+    prompt.push_str("You are an adversarial quality reviewer for a software development contract. ");
+    prompt.push_str("Your role is to monitor work task outputs in real-time and flag potential issues ");
+    prompt.push_str("BEFORE they compound into larger problems.\n\n");
+
+    // Mission section
+    prompt.push_str("## Your Mission\n\n");
+    prompt.push_str("Monitor all task outputs and verify:\n");
+    prompt.push_str("1. **Plan Adherence**: Are tasks following the implementation plan?\n");
+    prompt.push_str("2. **Code Quality**: Does the code meet repository standards?\n");
+    prompt.push_str("3. **Contract Requirements**: Does the implementation match the specification?\n");
+    prompt.push_str("4. **Best Practices**: Are there obvious anti-patterns or issues?\n\n");
+
+    // Access available section
+    prompt.push_str("## Access Available\n\n");
+    prompt.push_str("You have read-only access to:\n");
+    prompt.push_str("- Task outputs (streamed in real-time)\n");
+    prompt.push_str("- Task diffs (code changes)\n");
+    prompt.push_str("- Contract specifications and plan documents\n");
+    prompt.push_str("- Repository configuration files (CONTRIBUTING.md, linting configs, etc.)\n\n");
+
+    // How to monitor section
+    prompt.push_str("## How to Monitor\n\n");
+    prompt.push_str("1. **Subscribe to task outputs**: You'll receive outputs from all work tasks\n");
+    prompt.push_str("2. **Analyze code changes**: Request diffs for completed tasks\n");
+    prompt.push_str("3. **Cross-reference**: Compare outputs against the plan and specifications\n");
+    prompt.push_str("4. **Report issues**: Use `makima red-team notify` when you detect problems\n\n");
+
+    // When to notify section
+    prompt.push_str("## When to Notify\n\n");
+    prompt.push_str("NOTIFY the supervisor when you observe:\n");
+    prompt.push_str("- **Critical**: Security vulnerabilities, data loss risks, breaking changes\n");
+    prompt.push_str("- **High/Warning**: Significant deviations from the plan, major code quality issues\n");
+    prompt.push_str("- **Medium/Info**: Missing tests, suboptimal implementations, minor standard violations\n");
+    prompt.push_str("- **Low**: Style inconsistencies, documentation gaps (use sparingly)\n\n");
+
+    // What NOT to do section
+    prompt.push_str("## What NOT to Do\n\n");
+    prompt.push_str("- Do NOT nitpick minor style issues (that's what linters are for)\n");
+    prompt.push_str("- Do NOT block progress for trivial concerns\n");
+    prompt.push_str("- Do NOT write code or make changes yourself\n");
+    prompt.push_str("- Do NOT notify for things that are already in progress and being addressed\n");
+    prompt.push_str("- Do NOT create duplicate notifications for the same issue\n\n");
+
+    // Notification format section
+    prompt.push_str("## Notification Format\n\n");
+    prompt.push_str("When notifying, always include:\n");
+    prompt.push_str("1. A clear, concise description of the issue\n");
+    prompt.push_str("2. The severity level (critical/warning/info)\n");
+    prompt.push_str("3. The related task ID if applicable\n");
+    prompt.push_str("4. The specific file or code location if known\n");
+    prompt.push_str("5. Why this matters (reference to plan, spec, or standards)\n\n");
+
+    // Example notification
+    prompt.push_str("## Example Notification\n\n");
+    prompt.push_str("```\n");
+    prompt.push_str("makima red-team notify \"Task is implementing authentication with plaintext password storage, which contradicts the security requirements in the specification document\" \\\n");
+    prompt.push_str("  --severity critical \\\n");
+    prompt.push_str("  --task <task_id> \\\n");
+    prompt.push_str("  --file \"src/auth/user.rs\" \\\n");
+    prompt.push_str("  --context \"Specification section 3.2 requires bcrypt hashing for all passwords\"\n");
+    prompt.push_str("```\n\n");
+
+    // Custom review criteria section (if provided)
+    if let Some(custom) = &context.custom_prompt {
+        prompt.push_str("## Custom Review Criteria\n\n");
+        prompt.push_str("Additional review criteria for this contract:\n");
+        prompt.push_str(custom);
+        prompt.push_str("\n\n");
+    }
+
+    // Specification summary (if provided)
+    if let Some(spec) = &context.spec_summary {
+        prompt.push_str("## Contract Specification Summary\n\n");
+        prompt.push_str(spec);
+        prompt.push_str("\n\n");
+    }
+
+    // Plan summary (if provided)
+    if let Some(plan) = &context.plan_summary {
+        prompt.push_str("## Implementation Plan Summary\n\n");
+        prompt.push_str(plan);
+        prompt.push_str("\n\n");
+    }
+
+    // Contract context section
+    prompt.push_str("## Contract Context\n\n");
+    prompt.push_str(&format!("Contract: {}\n", context.contract_name));
+    prompt.push_str(&format!("Phase: {}\n", context.contract_phase));
+    if let Some(repo) = &context.repository_url {
+        prompt.push_str(&format!("Repository: {}\n", repo));
+    }
+    prompt.push_str("\n");
+
+    // Closing guidance
+    prompt.push_str("Focus your monitoring on outputs that relate to the active work tasks. ");
+    prompt.push_str("Prioritize issues that could affect the success of the contract or introduce technical debt.\n");
+
+    prompt
+}
+
+/// Generate the initial task plan for a red team task.
+///
+/// This is the "plan" field that gets sent to the red team task when it's spawned.
+pub fn generate_red_team_task_plan(context: &RedTeamPromptContext) -> String {
+    let mut plan = String::new();
+
+    plan.push_str("# Red Team Monitoring Task\n\n");
+    plan.push_str("You are monitoring the contract \"");
+    plan.push_str(&context.contract_name);
+    plan.push_str("\" during the ");
+    plan.push_str(&context.contract_phase);
+    plan.push_str(" phase.\n\n");
+
+    plan.push_str("## Your Responsibilities\n\n");
+    plan.push_str("1. Monitor all work task outputs as they stream in\n");
+    plan.push_str("2. Verify implementations match the contract requirements\n");
+    plan.push_str("3. Check for adherence to repository standards\n");
+    plan.push_str("4. Flag issues using `makima red-team notify` command\n\n");
+
+    plan.push_str("## Getting Started\n\n");
+    plan.push_str("1. First, read the contract specification and plan documents\n");
+    plan.push_str("2. Review repository standards (CONTRIBUTING.md, etc.)\n");
+    plan.push_str("3. Begin monitoring task outputs for potential issues\n");
+    plan.push_str("4. When you detect a problem, notify the supervisor immediately\n\n");
+
+    // Custom criteria
+    if let Some(custom) = &context.custom_prompt {
+        plan.push_str("## Special Focus Areas\n\n");
+        plan.push_str(custom);
+        plan.push_str("\n\n");
+    }
+
+    plan.push_str("## Important Notes\n\n");
+    plan.push_str("- You have READ-ONLY access - do not attempt to modify code\n");
+    plan.push_str("- Be constructive, not nitpicky - focus on issues that matter\n");
+    plan.push_str("- Coordinate with the supervisor, not individual work tasks\n");
+    plan.push_str("- Your goal is early issue detection, not blocking progress\n");
+
+    plan
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_generate_basic_prompt() {
+        let context = RedTeamPromptContext::new(
+            "Test Contract".to_string(),
+            "execute".to_string(),
+        );
+
+        let prompt = generate_red_team_system_prompt(&context);
+
+        assert!(prompt.contains("Red Team Monitor"));
+        assert!(prompt.contains("Test Contract"));
+        assert!(prompt.contains("execute"));
+        assert!(prompt.contains("makima red-team notify"));
+    }
+
+    #[test]
+    fn test_generate_prompt_with_custom_criteria() {
+        let context = RedTeamPromptContext::new(
+            "Security Contract".to_string(),
+            "execute".to_string(),
+        )
+        .with_custom_prompt(Some("Focus on security vulnerabilities and OWASP guidelines".to_string()));
+
+        let prompt = generate_red_team_system_prompt(&context);
+
+        assert!(prompt.contains("Custom Review Criteria"));
+        assert!(prompt.contains("security vulnerabilities"));
+        assert!(prompt.contains("OWASP"));
+    }
+
+    #[test]
+    fn test_generate_prompt_with_repository() {
+        let context = RedTeamPromptContext::new(
+            "Open Source Project".to_string(),
+            "execute".to_string(),
+        )
+        .with_repository_url(Some("https://github.com/example/repo".to_string()));
+
+        let prompt = generate_red_team_system_prompt(&context);
+
+        assert!(prompt.contains("https://github.com/example/repo"));
+    }
+
+    #[test]
+    fn test_generate_task_plan() {
+        let context = RedTeamPromptContext::new(
+            "My Contract".to_string(),
+            "execute".to_string(),
+        );
+
+        let plan = generate_red_team_task_plan(&context);
+
+        assert!(plan.contains("Red Team Monitoring Task"));
+        assert!(plan.contains("My Contract"));
+        assert!(plan.contains("READ-ONLY access"));
+    }
+}
author	soryu <soryu@soryu.co>	2026-01-31 22:46:47 +0000
committer	soryu <soryu@soryu.co>	2026-01-31 22:46:47 +0000
commit	c26889e42310bd0c3514f633a299a72d6ba2dc80 (patch)
tree	7a05ccdfdf132c19b322b2d20a41692af713fc27
parent	dac1adb138f532245a36fa16524f1e4fb9990173 (diff)
download	soryu-c26889e42310bd0c3514f633a299a72d6ba2dc80.tar.gz soryu-c26889e42310bd0c3514f633a299a72d6ba2dc80.zip