Add makima directives

author: soryu <soryu@soryu.co> 2026-02-05 01:42:59 +0000
committer: soryu <soryu@soryu.co> 2026-02-05 01:42:59 +0000
commit: 6a0c912a3fbd8e9b3e87ef40e960803d819d966d (patch)
tree: b2c50c490811286d163e40f8d624ee8d43c0ce43 /makima/src/llm
parent: 0302b4596e14210884df5d645df9a179d8f0c1c6 (diff)
download: soryu-6a0c912a3fbd8e9b3e87ef40e960803d819d966d.tar.gz
soryu-6a0c912a3fbd8e9b3e87ef40e960803d819d966d.zip
3 files changed, 1048 insertions, 0 deletions
diff --git a/makima/src/llm/contract_evaluator.rs b/makima/src/llm/contract_evaluator.rs
new file mode 100644
index 0000000..fcc4826
--- /dev/null
+++ b/makima/src/llm/contract_evaluator.rs
@@ -0,0 +1,555 @@
+//! Contract Evaluator - LLM-based evaluation of completed contracts against directive.
+//!
+//! This module provides functionality for:
+//! - Gathering deliverables, files, and task outputs from completed contracts
+//! - Building evaluation prompts using directive and acceptance criteria
+//! - Calling LLM to evaluate work against requirements
+//! - Parsing evaluation responses
+
+use serde::{Deserialize, Serialize};
+use sqlx::PgPool;
+use uuid::Uuid;
+
+use crate::db::{
+    models::{
+        ChainContract, ChainDirective, Contract, ContractEvaluation, CreateContractEvaluationRequest,
+        DirectiveAcceptanceCriterion, DirectiveRequirement, EvaluationCriterionResult,
+    },
+    repository,
+};
+
+use super::claude::{ClaudeClient, ClaudeModel, Message, MessageContent};
+use super::tools::Tool;
+
+/// Result of contract evaluation
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ContractEvaluationResult {
+    /// Whether the contract passed evaluation
+    pub passed: bool,
+    /// Overall score from 0.0 to 1.0
+    pub overall_score: f64,
+    /// Results for each acceptance criterion
+    pub criteria_results: Vec<EvaluationCriterionResult>,
+    /// Summary feedback from the evaluator
+    pub summary_feedback: String,
+    /// Instructions for rework if failed
+    pub rework_instructions: Option<String>,
+}
+
+/// Context gathered for evaluation
+#[derive(Debug, Clone)]
+pub struct EvaluationContext {
+    /// The contract being evaluated
+    pub contract: Contract,
+    /// The chain contract record
+    pub chain_contract: ChainContract,
+    /// The directive document
+    pub directive: ChainDirective,
+    /// Files associated with the contract
+    pub files: Vec<FileContent>,
+    /// Task outputs from the contract
+    pub task_outputs: Vec<TaskOutput>,
+    /// Deliverables marked as complete
+    pub deliverables: Vec<DeliverableInfo>,
+    /// Acceptance criteria specific to this contract
+    pub acceptance_criteria: Vec<DirectiveAcceptanceCriterion>,
+    /// Requirements mapped to this contract
+    pub requirements: Vec<DirectiveRequirement>,
+}
+
+/// File content for evaluation
+#[derive(Debug, Clone, Serialize)]
+pub struct FileContent {
+    pub path: String,
+    pub description: Option<String>,
+    pub content: String,
+    pub is_deliverable: bool,
+}
+
+/// Task output for evaluation
+#[derive(Debug, Clone, Serialize)]
+pub struct TaskOutput {
+    pub task_name: String,
+    pub output_summary: String,
+    pub exit_code: Option<i32>,
+}
+
+/// Deliverable info for evaluation
+#[derive(Debug, Clone, Serialize)]
+pub struct DeliverableInfo {
+    pub name: String,
+    pub status: String,
+    pub file_path: Option<String>,
+}
+
+/// Error types for evaluation
+#[derive(Debug, thiserror::Error)]
+pub enum EvaluationError {
+    #[error("Database error: {0}")]
+    Database(#[from] sqlx::Error),
+
+    #[error("Contract not found: {0}")]
+    ContractNotFound(Uuid),
+
+    #[error("Chain contract not found for contract: {0}")]
+    ChainContractNotFound(Uuid),
+
+    #[error("Directive not found for chain: {0}")]
+    DirectiveNotFound(Uuid),
+
+    #[error("LLM evaluation failed: {0}")]
+    LlmError(String),
+
+    #[error("Failed to parse evaluation response: {0}")]
+    ParseError(String),
+}
+
+/// Contract evaluator for directive-driven evaluation
+pub struct ContractEvaluator {
+    pool: PgPool,
+    claude_client: ClaudeClient,
+    model: ClaudeModel,
+    /// Minimum score required to pass (default 0.8)
+    pass_threshold: f64,
+}
+
+impl ContractEvaluator {
+    /// Create a new evaluator
+    pub fn new(pool: PgPool, claude_client: ClaudeClient) -> Self {
+        Self {
+            pool,
+            claude_client,
+            model: ClaudeModel::Sonnet,
+            pass_threshold: 0.8,
+        }
+    }
+
+    /// Set the LLM model to use for evaluation
+    pub fn with_model(mut self, model: ClaudeModel) -> Self {
+        self.model = model;
+        self
+    }
+
+    /// Set the pass threshold
+    pub fn with_pass_threshold(mut self, threshold: f64) -> Self {
+        self.pass_threshold = threshold;
+        self
+    }
+
+    /// Evaluate a completed contract against the directive
+    pub async fn evaluate_contract(
+        &self,
+        contract_id: Uuid,
+        owner_id: Uuid,
+    ) -> Result<ContractEvaluationResult, EvaluationError> {
+        // Gather evaluation context
+        let context = self.gather_context(contract_id, owner_id).await?;
+
+        // Build evaluation prompt
+        let prompt = self.build_evaluation_prompt(&context);
+
+        // Call LLM for evaluation
+        let response = self.call_llm_for_evaluation(&prompt).await?;
+
+        // Parse the response
+        let result = self.parse_evaluation_response(&response, &context)?;
+
+        Ok(result)
+    }
+
+    /// Gather all context needed for evaluation
+    async fn gather_context(
+        &self,
+        contract_id: Uuid,
+        owner_id: Uuid,
+    ) -> Result<EvaluationContext, EvaluationError> {
+        // Get contract
+        let contract = repository::get_contract_for_owner(&self.pool, contract_id, owner_id)
+            .await?
+            .ok_or(EvaluationError::ContractNotFound(contract_id))?;
+
+        // Get chain contract
+        let chain_contract = repository::get_chain_contract_by_contract_id(&self.pool, contract_id)
+            .await?
+            .ok_or(EvaluationError::ChainContractNotFound(contract_id))?;
+
+        // Get directive
+        let directive = repository::get_chain_directive(&self.pool, chain_contract.chain_id)
+            .await?
+            .ok_or(EvaluationError::DirectiveNotFound(chain_contract.chain_id))?;
+
+        // Get files directly from repository
+        let contract_files = repository::list_files_in_contract(&self.pool, contract_id, owner_id)
+            .await
+            .unwrap_or_default();
+
+        // Get tasks directly from repository
+        let contract_tasks = repository::list_tasks_in_contract(&self.pool, contract_id, owner_id)
+            .await
+            .unwrap_or_default();
+
+        // Build file contents from FileSummary
+        // Note: FileSummary doesn't have content, so we use name and description
+        let files: Vec<FileContent> = contract_files.iter().map(|f| {
+            FileContent {
+                path: f.repo_file_path.clone().unwrap_or_else(|| f.name.clone()),
+                description: f.description.clone(),
+                content: format!("[File: {} - content not loaded in summary view]", f.name),
+                is_deliverable: false, // FileSummary doesn't track deliverable status
+            }
+        }).collect();
+
+        // Build task outputs from TaskSummary
+        let task_outputs: Vec<TaskOutput> = contract_tasks.iter().map(|t| {
+            TaskOutput {
+                task_name: t.name.clone(),
+                output_summary: t.progress_summary.clone().unwrap_or_else(|| format!("Status: {}", t.status)),
+                exit_code: None,
+            }
+        }).collect();
+
+        // Build deliverables info from files marked as deliverables
+        // Since FileSummary doesn't have deliverable info, we treat all files as potential deliverables
+        let deliverables: Vec<DeliverableInfo> = contract_files.iter()
+            .map(|f| DeliverableInfo {
+                name: f.name.clone(),
+                status: "complete".to_string(),
+                file_path: f.repo_file_path.clone(),
+            })
+            .collect();
+
+        // Parse requirements and acceptance criteria from directive
+        let requirements: Vec<DirectiveRequirement> =
+            serde_json::from_value(directive.requirements.clone()).unwrap_or_default();
+
+        let all_criteria: Vec<DirectiveAcceptanceCriterion> =
+            serde_json::from_value(directive.acceptance_criteria.clone()).unwrap_or_default();
+
+        // Get contract definition to find mapped requirements
+        // For now, use all acceptance criteria
+        let acceptance_criteria = all_criteria;
+
+        Ok(EvaluationContext {
+            contract,
+            chain_contract,
+            directive,
+            files,
+            task_outputs,
+            deliverables,
+            acceptance_criteria,
+            requirements,
+        })
+    }
+
+    /// Build the evaluation prompt
+    fn build_evaluation_prompt(&self, context: &EvaluationContext) -> String {
+        let mut prompt = String::new();
+
+        prompt.push_str("# Contract Completion Evaluation\n\n");
+        prompt.push_str("You are evaluating whether a contract has been completed successfully against its requirements.\n\n");
+
+        // Contract info
+        prompt.push_str("## Contract Information\n\n");
+        prompt.push_str(&format!("**Name:** {}\n", context.contract.name));
+        if let Some(ref desc) = context.contract.description {
+            prompt.push_str(&format!("**Description:** {}\n", desc));
+        }
+        prompt.push_str(&format!("**Type:** {}\n", context.contract.contract_type));
+        prompt.push_str(&format!("**Phase:** {}\n", context.contract.phase));
+        prompt.push_str("\n");
+
+        // Requirements
+        if !context.requirements.is_empty() {
+            prompt.push_str("## Requirements\n\n");
+            for req in &context.requirements {
+                prompt.push_str(&format!("- **{}** ({}): {}\n", req.id, req.priority, req.title));
+                if !req.description.is_empty() {
+                    prompt.push_str(&format!("  {}\n", req.description));
+                }
+            }
+            prompt.push_str("\n");
+        }
+
+        // Acceptance criteria
+        if !context.acceptance_criteria.is_empty() {
+            prompt.push_str("## Acceptance Criteria\n\n");
+            for (i, criterion) in context.acceptance_criteria.iter().enumerate() {
+                prompt.push_str(&format!("{}. **{}**\n", i + 1, criterion.description));
+                prompt.push_str(&format!("   - Testable: {}\n", criterion.testable));
+                if !criterion.requirement_ids.is_empty() {
+                    prompt.push_str(&format!("   - Covers: {}\n", criterion.requirement_ids.join(", ")));
+                }
+            }
+            prompt.push_str("\n");
+        }
+
+        // Deliverables
+        if !context.deliverables.is_empty() {
+            prompt.push_str("## Deliverables\n\n");
+            for d in &context.deliverables {
+                prompt.push_str(&format!("- {} ({})\n", d.name, d.status));
+            }
+            prompt.push_str("\n");
+        }
+
+        // Files
+        if !context.files.is_empty() {
+            prompt.push_str("## Files Created/Modified\n\n");
+            for file in &context.files {
+                prompt.push_str(&format!("### {}", file.path));
+                if file.is_deliverable {
+                    prompt.push_str(" [DELIVERABLE]");
+                }
+                prompt.push_str("\n");
+                if let Some(ref desc) = file.description {
+                    prompt.push_str(&format!("*{}*\n", desc));
+                }
+                // Truncate content if too long
+                let content = if file.content.len() > 5000 {
+                    format!("{}...\n[Content truncated - {} chars total]",
+                        &file.content[..5000], file.content.len())
+                } else {
+                    file.content.clone()
+                };
+                prompt.push_str("```\n");
+                prompt.push_str(&content);
+                prompt.push_str("\n```\n\n");
+            }
+        }
+
+        // Task outputs
+        if !context.task_outputs.is_empty() {
+            prompt.push_str("## Task Outputs\n\n");
+            for task in &context.task_outputs {
+                prompt.push_str(&format!("### {}\n", task.task_name));
+                prompt.push_str(&format!("{}\n\n", task.output_summary));
+            }
+        }
+
+        // Evaluation instructions
+        prompt.push_str("## Evaluation Instructions\n\n");
+        prompt.push_str("Please evaluate the completed work against the requirements and acceptance criteria.\n\n");
+        prompt.push_str("For each acceptance criterion, determine if it has been met and provide a brief explanation.\n\n");
+        prompt.push_str("Respond with a JSON object in the following format:\n\n");
+        prompt.push_str("```json\n");
+        prompt.push_str(r#"{
+  "passed": true/false,
+  "overallScore": 0.0-1.0,
+  "criteriaResults": [
+    {
+      "criterionId": "criterion identifier or index",
+      "met": true/false,
+      "score": 0.0-1.0,
+      "feedback": "explanation of why criterion was/wasn't met"
+    }
+  ],
+  "summaryFeedback": "overall summary of the evaluation",
+  "reworkInstructions": "if failed, specific instructions for what needs to be fixed (null if passed)"
+}
+"#);
+        prompt.push_str("```\n\n");
+        prompt.push_str(&format!("The pass threshold is {}. ", self.pass_threshold));
+        prompt.push_str("A contract passes if the overall score is >= the threshold AND all critical criteria are met.\n");
+
+        prompt
+    }
+
+    /// Call LLM for evaluation
+    async fn call_llm_for_evaluation(&self, prompt: &str) -> Result<String, EvaluationError> {
+        let messages = vec![Message {
+            role: "user".to_string(),
+            content: MessageContent::Text(prompt.to_string()),
+        }];
+
+        // Use chat_with_tools with empty tools array for simple chat
+        let empty_tools: Vec<Tool> = vec![];
+        let result = self
+            .claude_client
+            .chat_with_tools(messages, &empty_tools)
+            .await
+            .map_err(|e| EvaluationError::LlmError(e.to_string()))?;
+
+        // ChatResult.content is already an Option<String>
+        let text = result.content.unwrap_or_default();
+
+        Ok(text)
+    }
+
+    /// Parse the LLM response into an evaluation result
+    fn parse_evaluation_response(
+        &self,
+        response: &str,
+        context: &EvaluationContext,
+    ) -> Result<ContractEvaluationResult, EvaluationError> {
+        // Extract JSON from response (may be wrapped in markdown code blocks)
+        let json_str = extract_json_from_response(response)?;
+
+        // Parse the JSON
+        let parsed: EvaluationResponseJson = serde_json::from_str(&json_str)
+            .map_err(|e| EvaluationError::ParseError(format!("JSON parse error: {}", e)))?;
+
+        // Convert to our result type
+        let criteria_results: Vec<EvaluationCriterionResult> = parsed
+            .criteria_results
+            .into_iter()
+            .map(|cr| EvaluationCriterionResult {
+                criterion_id: cr.criterion_id.clone(),
+                criterion_text: cr.criterion_id, // Use ID as text if not provided
+                passed: cr.passed,
+                score: cr.score,
+                feedback: cr.feedback,
+                evidence: vec![],
+            })
+            .collect();
+
+        // Determine pass/fail based on threshold and results
+        let passed = parsed.passed && parsed.overall_score >= self.pass_threshold;
+
+        Ok(ContractEvaluationResult {
+            passed,
+            overall_score: parsed.overall_score,
+            criteria_results,
+            summary_feedback: parsed.summary_feedback,
+            rework_instructions: if passed { None } else { parsed.rework_instructions },
+        })
+    }
+
+    /// Save evaluation result to database
+    pub async fn save_evaluation(
+        &self,
+        contract_id: Uuid,
+        chain_id: Uuid,
+        chain_contract_id: Uuid,
+        result: &ContractEvaluationResult,
+    ) -> Result<ContractEvaluation, EvaluationError> {
+        let req = CreateContractEvaluationRequest {
+            contract_id,
+            chain_id: Some(chain_id),
+            chain_contract_id: Some(chain_contract_id),
+            evaluator_model: Some(format!("{:?}", self.model)),
+            passed: result.passed,
+            overall_score: Some(result.overall_score),
+            criteria_results: result.criteria_results.clone(),
+            summary_feedback: result.summary_feedback.clone(),
+            rework_instructions: result.rework_instructions.clone(),
+        };
+
+        let evaluation = repository::create_contract_evaluation(&self.pool, req).await?;
+
+        // Update chain contract status
+        let status = if result.passed { "passed" } else { "failed" };
+        repository::update_chain_contract_evaluation_status(
+            &self.pool,
+            chain_contract_id,
+            status,
+            Some(evaluation.id),
+            result.rework_instructions.as_deref(),
+        )
+        .await?;
+
+        Ok(evaluation)
+    }
+}
+
+/// JSON structure for parsing LLM response
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct EvaluationResponseJson {
+    passed: bool,
+    overall_score: f64,
+    criteria_results: Vec<CriterionResultJson>,
+    summary_feedback: String,
+    rework_instructions: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct CriterionResultJson {
+    criterion_id: String,
+    #[serde(alias = "met")]
+    passed: bool,
+    #[serde(default)]
+    score: f64,
+    feedback: String,
+}
+
+/// Extract JSON from a response that may contain markdown code blocks
+fn extract_json_from_response(response: &str) -> Result<String, EvaluationError> {
+    // Try to find JSON in code blocks first
+    if let Some(start) = response.find("```json") {
+        let json_start = start + 7;
+        if let Some(end) = response[json_start..].find("```") {
+            return Ok(response[json_start..json_start + end].trim().to_string());
+        }
+    }
+
+    // Try plain code blocks
+    if let Some(start) = response.find("```") {
+        let json_start = start + 3;
+        // Skip any language identifier on the same line
+        let actual_start = response[json_start..]
+            .find('\n')
+            .map(|i| json_start + i + 1)
+            .unwrap_or(json_start);
+        if let Some(end) = response[actual_start..].find("```") {
+            return Ok(response[actual_start..actual_start + end].trim().to_string());
+        }
+    }
+
+    // Try to find raw JSON (starts with {)
+    if let Some(start) = response.find('{') {
+        // Find matching closing brace
+        let mut depth = 0;
+        let mut end = start;
+        for (i, c) in response[start..].char_indices() {
+            match c {
+                '{' => depth += 1,
+                '}' => {
+                    depth -= 1;
+                    if depth == 0 {
+                        end = start + i + 1;
+                        break;
+                    }
+                }
+                _ => {}
+            }
+        }
+        if end > start {
+            return Ok(response[start..end].to_string());
+        }
+    }
+
+    Err(EvaluationError::ParseError(
+        "Could not find JSON in response".to_string(),
+    ))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_extract_json_from_code_block() {
+        let response = r#"Here is the evaluation:
+
+```json
+{
+  "passed": true,
+  "overallScore": 0.85
+}
+```
+
+Done."#;
+
+        let json = extract_json_from_response(response).unwrap();
+        assert!(json.contains("\"passed\": true"));
+    }
+
+    #[test]
+    fn test_extract_json_raw() {
+        let response = r#"The result is {"passed": false, "overallScore": 0.5}"#;
+        let json = extract_json_from_response(response).unwrap();
+        assert!(json.contains("\"passed\": false"));
+    }
+}
diff --git a/makima/src/llm/contract_tools.rs b/makima/src/llm/contract_tools.rs
index 0f50132..7f7e849 100644
--- a/makima/src/llm/contract_tools.rs
+++ b/makima/src/llm/contract_tools.rs
@@ -460,6 +460,214 @@ pub static CONTRACT_TOOLS: once_cell::sync::Lazy<Vec<Tool>> = once_cell::sync::L
                 "required": ["file_id"]
             }),
         },
+        // =============================================================================
+        // Chain Directive Tools (for directive contracts orchestrating chains)
+        // =============================================================================
+        Tool {
+            name: "create_chain_from_directive".to_string(),
+            description: "Create a new chain that this directive contract will orchestrate. The chain starts in 'pending' status and contract definitions can be added. Only available to directive contracts.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "Name for the chain"
+                    },
+                    "description": {
+                        "type": "string",
+                        "description": "Description of what the chain accomplishes"
+                    }
+                },
+                "required": ["name"]
+            }),
+        },
+        Tool {
+            name: "add_chain_contract".to_string(),
+            description: "Add a contract definition to the chain being orchestrated. The contract will be created when its dependencies are met.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "Contract name"
+                    },
+                    "description": {
+                        "type": "string",
+                        "description": "What this contract accomplishes"
+                    },
+                    "contract_type": {
+                        "type": "string",
+                        "enum": ["simple", "execute", "checkpoint"],
+                        "description": "Contract type (default: simple)"
+                    },
+                    "depends_on": {
+                        "type": "array",
+                        "items": { "type": "string" },
+                        "description": "Names of contracts this depends on"
+                    },
+                    "requirement_ids": {
+                        "type": "array",
+                        "items": { "type": "string" },
+                        "description": "Requirement IDs this contract addresses (for traceability)"
+                    }
+                },
+                "required": ["name"]
+            }),
+        },
+        Tool {
+            name: "set_chain_dependencies".to_string(),
+            description: "Set which contracts depend on which other contracts in the chain.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "contract_name": {
+                        "type": "string",
+                        "description": "Name of contract that has dependencies"
+                    },
+                    "depends_on": {
+                        "type": "array",
+                        "items": { "type": "string" },
+                        "description": "Names of contracts it depends on"
+                    }
+                },
+                "required": ["contract_name", "depends_on"]
+            }),
+        },
+        Tool {
+            name: "modify_chain_contract".to_string(),
+            description: "Update a contract definition in the chain.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "Name of the contract to modify"
+                    },
+                    "new_name": {
+                        "type": "string",
+                        "description": "New name for the contract"
+                    },
+                    "description": {
+                        "type": "string",
+                        "description": "New description"
+                    },
+                    "add_requirement_ids": {
+                        "type": "array",
+                        "items": { "type": "string" },
+                        "description": "Requirement IDs to add"
+                    },
+                    "remove_requirement_ids": {
+                        "type": "array",
+                        "items": { "type": "string" },
+                        "description": "Requirement IDs to remove"
+                    }
+                },
+                "required": ["name"]
+            }),
+        },
+        Tool {
+            name: "remove_chain_contract".to_string(),
+            description: "Remove a contract definition from the chain (only if not yet instantiated).".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "Name of the contract to remove"
+                    }
+                },
+                "required": ["name"]
+            }),
+        },
+        Tool {
+            name: "preview_chain_dag".to_string(),
+            description: "Generate a visual preview of the chain DAG structure for review.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {}
+            }),
+        },
+        Tool {
+            name: "validate_chain_directive".to_string(),
+            description: "Validate the chain specification is complete and valid (no cycles, all dependencies exist, all requirements covered).".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {}
+            }),
+        },
+        Tool {
+            name: "finalize_chain_directive".to_string(),
+            description: "Lock the directive and start chain execution. Call this after validation passes and user has approved (if phase_guard enabled).".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "auto_start": {
+                        "type": "boolean",
+                        "description": "Whether to immediately start the chain (default: true)"
+                    }
+                }
+            }),
+        },
+        Tool {
+            name: "get_chain_status".to_string(),
+            description: "Get current status of the chain being orchestrated, including contract statuses and progress.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {}
+            }),
+        },
+        Tool {
+            name: "get_uncovered_requirements".to_string(),
+            description: "List requirements from the directive that are not yet mapped to any contract.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {}
+            }),
+        },
+        Tool {
+            name: "evaluate_contract_completion".to_string(),
+            description: "Evaluate whether a completed chain contract meets the directive requirements. Use this after a contract completes to assess if it satisfies acceptance criteria.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "contract_id": {
+                        "type": "string",
+                        "description": "ID of the completed contract to evaluate"
+                    },
+                    "passed": {
+                        "type": "boolean",
+                        "description": "Whether the evaluation passed"
+                    },
+                    "feedback": {
+                        "type": "string",
+                        "description": "Evaluation feedback and rationale"
+                    },
+                    "rework_instructions": {
+                        "type": "string",
+                        "description": "Instructions for rework if evaluation failed"
+                    }
+                },
+                "required": ["contract_id", "passed", "feedback"]
+            }),
+        },
+        Tool {
+            name: "request_rework".to_string(),
+            description: "Request rework on a completed contract that didn't meet requirements. This will block chain progression and notify the contract to address issues.".to_string(),
+            parameters: json!({
+                "type": "object",
+                "properties": {
+                    "contract_id": {
+                        "type": "string",
+                        "description": "ID of the contract needing rework"
+                    },
+                    "feedback": {
+                        "type": "string",
+                        "description": "Detailed feedback on what needs to be fixed"
+                    }
+                },
+                "required": ["contract_id", "feedback"]
+            }),
+        },
     ]
 });
 
@@ -546,6 +754,50 @@ pub enum ContractToolRequest {
         include_decisions: bool,
         include_action_items: bool,
     },
+
+    // Chain directive tools (for directive contracts)
+    CreateChainFromDirective {
+        name: String,
+        description: Option<String>,
+    },
+    AddChainContract {
+        name: String,
+        description: Option<String>,
+        contract_type: Option<String>,
+        depends_on: Option<Vec<String>>,
+        requirement_ids: Option<Vec<String>>,
+    },
+    SetChainDependencies {
+        contract_name: String,
+        depends_on: Vec<String>,
+    },
+    ModifyChainContract {
+        name: String,
+        new_name: Option<String>,
+        description: Option<String>,
+        add_requirement_ids: Option<Vec<String>>,
+        remove_requirement_ids: Option<Vec<String>>,
+    },
+    RemoveChainContract {
+        name: String,
+    },
+    PreviewChainDag,
+    ValidateChainDirective,
+    FinalizeChainDirective {
+        auto_start: bool,
+    },
+    GetChainStatus,
+    GetUncoveredRequirements,
+    EvaluateContractCompletion {
+        contract_id: Uuid,
+        passed: bool,
+        feedback: String,
+        rework_instructions: Option<String>,
+    },
+    RequestRework {
+        contract_id: Uuid,
+        feedback: String,
+    },
 }
 
 /// Task definition for chained task creation
@@ -617,6 +869,20 @@ pub fn parse_contract_tool_call(call: &super::tools::ToolCall) -> ContractToolEx
         "analyze_transcript" => parse_analyze_transcript(call),
         "create_contract_from_transcript" => parse_create_contract_from_transcript(call),
 
+        // Chain directive tools
+        "create_chain_from_directive" => parse_create_chain_from_directive(call),
+        "add_chain_contract" => parse_add_chain_contract(call),
+        "set_chain_dependencies" => parse_set_chain_dependencies(call),
+        "modify_chain_contract" => parse_modify_chain_contract(call),
+        "remove_chain_contract" => parse_remove_chain_contract(call),
+        "preview_chain_dag" => parse_preview_chain_dag(),
+        "validate_chain_directive" => parse_validate_chain_directive(),
+        "finalize_chain_directive" => parse_finalize_chain_directive(call),
+        "get_chain_status" => parse_get_chain_status(),
+        "get_uncovered_requirements" => parse_get_uncovered_requirements(),
+        "evaluate_contract_completion" => parse_evaluate_contract_completion(call),
+        "request_rework" => parse_request_rework(call),
+
         _ => ContractToolExecutionResult {
             success: false,
             message: format!("Unknown contract tool: {}", call.name),
@@ -1206,6 +1472,229 @@ fn parse_create_contract_from_transcript(call: &super::tools::ToolCall) -> Contr
 }
 
 // =============================================================================
+// Chain Directive Tool Parsing
+// =============================================================================
+
+fn parse_create_chain_from_directive(call: &super::tools::ToolCall) -> ContractToolExecutionResult {
+    let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let Some(name) = name else {
+        return error_result("Missing required parameter: name");
+    };
+    let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string());
+
+    ContractToolExecutionResult {
+        success: true,
+        message: "Creating chain from directive...".to_string(),
+        data: None,
+        request: Some(ContractToolRequest::CreateChainFromDirective { name, description }),
+        pending_questions: None,
+    }
+}
+
+fn parse_add_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult {
+    let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let Some(name) = name else {
+        return error_result("Missing required parameter: name");
+    };
+
+    let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let contract_type = call.arguments.get("contract_type").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let depends_on = call.arguments.get("depends_on").and_then(|v| {
+        v.as_array().map(|arr| {
+            arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect()
+        })
+    });
+    let requirement_ids = call.arguments.get("requirement_ids").and_then(|v| {
+        v.as_array().map(|arr| {
+            arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect()
+        })
+    });
+
+    ContractToolExecutionResult {
+        success: true,
+        message: format!("Adding contract '{}' to chain...", name),
+        data: None,
+        request: Some(ContractToolRequest::AddChainContract {
+            name,
+            description,
+            contract_type,
+            depends_on,
+            requirement_ids,
+        }),
+        pending_questions: None,
+    }
+}
+
+fn parse_set_chain_dependencies(call: &super::tools::ToolCall) -> ContractToolExecutionResult {
+    let contract_name = call.arguments.get("contract_name").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let Some(contract_name) = contract_name else {
+        return error_result("Missing required parameter: contract_name");
+    };
+
+    let depends_on = call.arguments.get("depends_on").and_then(|v| {
+        v.as_array().map(|arr| {
+            arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect()
+        })
+    }).unwrap_or_default();
+
+    ContractToolExecutionResult {
+        success: true,
+        message: format!("Setting dependencies for '{}'...", contract_name),
+        data: None,
+        request: Some(ContractToolRequest::SetChainDependencies { contract_name, depends_on }),
+        pending_questions: None,
+    }
+}
+
+fn parse_modify_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult {
+    let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let Some(name) = name else {
+        return error_result("Missing required parameter: name");
+    };
+
+    let new_name = call.arguments.get("new_name").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let add_requirement_ids = call.arguments.get("add_requirement_ids").and_then(|v| {
+        v.as_array().map(|arr| {
+            arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect()
+        })
+    });
+    let remove_requirement_ids = call.arguments.get("remove_requirement_ids").and_then(|v| {
+        v.as_array().map(|arr| {
+            arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect()
+        })
+    });
+
+    ContractToolExecutionResult {
+        success: true,
+        message: format!("Modifying contract '{}'...", name),
+        data: None,
+        request: Some(ContractToolRequest::ModifyChainContract {
+            name,
+            new_name,
+            description,
+            add_requirement_ids,
+            remove_requirement_ids,
+        }),
+        pending_questions: None,
+    }
+}
+
+fn parse_remove_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult {
+    let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let Some(name) = name else {
+        return error_result("Missing required parameter: name");
+    };
+
+    ContractToolExecutionResult {
+        success: true,
+        message: format!("Removing contract '{}'...", name),
+        data: None,
+        request: Some(ContractToolRequest::RemoveChainContract { name }),
+        pending_questions: None,
+    }
+}
+
+fn parse_preview_chain_dag() -> ContractToolExecutionResult {
+    ContractToolExecutionResult {
+        success: true,
+        message: "Generating chain DAG preview...".to_string(),
+        data: None,
+        request: Some(ContractToolRequest::PreviewChainDag),
+        pending_questions: None,
+    }
+}
+
+fn parse_validate_chain_directive() -> ContractToolExecutionResult {
+    ContractToolExecutionResult {
+        success: true,
+        message: "Validating chain directive...".to_string(),
+        data: None,
+        request: Some(ContractToolRequest::ValidateChainDirective),
+        pending_questions: None,
+    }
+}
+
+fn parse_finalize_chain_directive(call: &super::tools::ToolCall) -> ContractToolExecutionResult {
+    let auto_start = call.arguments.get("auto_start").and_then(|v| v.as_bool()).unwrap_or(true);
+
+    ContractToolExecutionResult {
+        success: true,
+        message: "Finalizing chain directive...".to_string(),
+        data: None,
+        request: Some(ContractToolRequest::FinalizeChainDirective { auto_start }),
+        pending_questions: None,
+    }
+}
+
+fn parse_get_chain_status() -> ContractToolExecutionResult {
+    ContractToolExecutionResult {
+        success: true,
+        message: "Getting chain status...".to_string(),
+        data: None,
+        request: Some(ContractToolRequest::GetChainStatus),
+        pending_questions: None,
+    }
+}
+
+fn parse_get_uncovered_requirements() -> ContractToolExecutionResult {
+    ContractToolExecutionResult {
+        success: true,
+        message: "Getting uncovered requirements...".to_string(),
+        data: None,
+        request: Some(ContractToolRequest::GetUncoveredRequirements),
+        pending_questions: None,
+    }
+}
+
+fn parse_evaluate_contract_completion(call: &super::tools::ToolCall) -> ContractToolExecutionResult {
+    let contract_id = parse_uuid_arg(call, "contract_id");
+    let Some(contract_id) = contract_id else {
+        return error_result("Missing or invalid required parameter: contract_id");
+    };
+
+    let passed = call.arguments.get("passed").and_then(|v| v.as_bool()).unwrap_or(false);
+    let feedback = call.arguments.get("feedback").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let Some(feedback) = feedback else {
+        return error_result("Missing required parameter: feedback");
+    };
+    let rework_instructions = call.arguments.get("rework_instructions").and_then(|v| v.as_str()).map(|s| s.to_string());
+
+    ContractToolExecutionResult {
+        success: true,
+        message: format!("Evaluating contract completion (passed: {})...", passed),
+        data: None,
+        request: Some(ContractToolRequest::EvaluateContractCompletion {
+            contract_id,
+            passed,
+            feedback,
+            rework_instructions,
+        }),
+        pending_questions: None,
+    }
+}
+
+fn parse_request_rework(call: &super::tools::ToolCall) -> ContractToolExecutionResult {
+    let contract_id = parse_uuid_arg(call, "contract_id");
+    let Some(contract_id) = contract_id else {
+        return error_result("Missing or invalid required parameter: contract_id");
+    };
+
+    let feedback = call.arguments.get("feedback").and_then(|v| v.as_str()).map(|s| s.to_string());
+    let Some(feedback) = feedback else {
+        return error_result("Missing required parameter: feedback");
+    };
+
+    ContractToolExecutionResult {
+        success: true,
+        message: "Requesting rework...".to_string(),
+        data: None,
+        request: Some(ContractToolRequest::RequestRework { contract_id, feedback }),
+        pending_questions: None,
+    }
+}
+
+// =============================================================================
 // Helper Functions
 // =============================================================================
 
diff --git a/makima/src/llm/mod.rs b/makima/src/llm/mod.rs
index 4c84ced..702e1fd 100644
--- a/makima/src/llm/mod.rs
+++ b/makima/src/llm/mod.rs
@@ -1,6 +1,7 @@
 //! LLM integration module for file editing via tool calling.
 
 pub mod claude;
+pub mod contract_evaluator;
 pub mod contract_tools;
 pub mod discuss_tools;
 pub mod groq;
@@ -44,6 +45,9 @@ pub use transcript_analyzer::{
     ExtractedActionItem, SpeakerStats, format_transcript_for_analysis,
     calculate_speaker_stats, build_analysis_prompt, parse_analysis_response,
 };
+pub use contract_evaluator::{
+    ContractEvaluator, ContractEvaluationResult, EvaluationContext, EvaluationError,
+};
 
 /// Available LLM providers and models
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
author	soryu <soryu@soryu.co>	2026-02-05 01:42:59 +0000
committer	soryu <soryu@soryu.co>	2026-02-05 01:42:59 +0000
commit	6a0c912a3fbd8e9b3e87ef40e960803d819d966d (patch)
tree	b2c50c490811286d163e40f8d624ee8d43c0ce43 /makima/src/llm
parent	0302b4596e14210884df5d645df9a179d8f0c1c6 (diff)
download	soryu-6a0c912a3fbd8e9b3e87ef40e960803d819d966d.tar.gz soryu-6a0c912a3fbd8e9b3e87ef40e960803d819d966d.zip