diff options
| author | soryu <soryu@soryu.co> | 2026-02-05 01:42:59 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2026-02-05 01:42:59 +0000 |
| commit | 6a0c912a3fbd8e9b3e87ef40e960803d819d966d (patch) | |
| tree | b2c50c490811286d163e40f8d624ee8d43c0ce43 /makima/src/llm | |
| parent | 0302b4596e14210884df5d645df9a179d8f0c1c6 (diff) | |
| download | soryu-6a0c912a3fbd8e9b3e87ef40e960803d819d966d.tar.gz soryu-6a0c912a3fbd8e9b3e87ef40e960803d819d966d.zip | |
Add makima directives
Diffstat (limited to 'makima/src/llm')
| -rw-r--r-- | makima/src/llm/contract_evaluator.rs | 555 | ||||
| -rw-r--r-- | makima/src/llm/contract_tools.rs | 489 | ||||
| -rw-r--r-- | makima/src/llm/mod.rs | 4 |
3 files changed, 1048 insertions, 0 deletions
diff --git a/makima/src/llm/contract_evaluator.rs b/makima/src/llm/contract_evaluator.rs new file mode 100644 index 0000000..fcc4826 --- /dev/null +++ b/makima/src/llm/contract_evaluator.rs @@ -0,0 +1,555 @@ +//! Contract Evaluator - LLM-based evaluation of completed contracts against directive. +//! +//! This module provides functionality for: +//! - Gathering deliverables, files, and task outputs from completed contracts +//! - Building evaluation prompts using directive and acceptance criteria +//! - Calling LLM to evaluate work against requirements +//! - Parsing evaluation responses + +use serde::{Deserialize, Serialize}; +use sqlx::PgPool; +use uuid::Uuid; + +use crate::db::{ + models::{ + ChainContract, ChainDirective, Contract, ContractEvaluation, CreateContractEvaluationRequest, + DirectiveAcceptanceCriterion, DirectiveRequirement, EvaluationCriterionResult, + }, + repository, +}; + +use super::claude::{ClaudeClient, ClaudeModel, Message, MessageContent}; +use super::tools::Tool; + +/// Result of contract evaluation +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ContractEvaluationResult { + /// Whether the contract passed evaluation + pub passed: bool, + /// Overall score from 0.0 to 1.0 + pub overall_score: f64, + /// Results for each acceptance criterion + pub criteria_results: Vec<EvaluationCriterionResult>, + /// Summary feedback from the evaluator + pub summary_feedback: String, + /// Instructions for rework if failed + pub rework_instructions: Option<String>, +} + +/// Context gathered for evaluation +#[derive(Debug, Clone)] +pub struct EvaluationContext { + /// The contract being evaluated + pub contract: Contract, + /// The chain contract record + pub chain_contract: ChainContract, + /// The directive document + pub directive: ChainDirective, + /// Files associated with the contract + pub files: Vec<FileContent>, + /// Task outputs from the contract + pub task_outputs: Vec<TaskOutput>, + /// Deliverables marked as complete + pub deliverables: Vec<DeliverableInfo>, + /// Acceptance criteria specific to this contract + pub acceptance_criteria: Vec<DirectiveAcceptanceCriterion>, + /// Requirements mapped to this contract + pub requirements: Vec<DirectiveRequirement>, +} + +/// File content for evaluation +#[derive(Debug, Clone, Serialize)] +pub struct FileContent { + pub path: String, + pub description: Option<String>, + pub content: String, + pub is_deliverable: bool, +} + +/// Task output for evaluation +#[derive(Debug, Clone, Serialize)] +pub struct TaskOutput { + pub task_name: String, + pub output_summary: String, + pub exit_code: Option<i32>, +} + +/// Deliverable info for evaluation +#[derive(Debug, Clone, Serialize)] +pub struct DeliverableInfo { + pub name: String, + pub status: String, + pub file_path: Option<String>, +} + +/// Error types for evaluation +#[derive(Debug, thiserror::Error)] +pub enum EvaluationError { + #[error("Database error: {0}")] + Database(#[from] sqlx::Error), + + #[error("Contract not found: {0}")] + ContractNotFound(Uuid), + + #[error("Chain contract not found for contract: {0}")] + ChainContractNotFound(Uuid), + + #[error("Directive not found for chain: {0}")] + DirectiveNotFound(Uuid), + + #[error("LLM evaluation failed: {0}")] + LlmError(String), + + #[error("Failed to parse evaluation response: {0}")] + ParseError(String), +} + +/// Contract evaluator for directive-driven evaluation +pub struct ContractEvaluator { + pool: PgPool, + claude_client: ClaudeClient, + model: ClaudeModel, + /// Minimum score required to pass (default 0.8) + pass_threshold: f64, +} + +impl ContractEvaluator { + /// Create a new evaluator + pub fn new(pool: PgPool, claude_client: ClaudeClient) -> Self { + Self { + pool, + claude_client, + model: ClaudeModel::Sonnet, + pass_threshold: 0.8, + } + } + + /// Set the LLM model to use for evaluation + pub fn with_model(mut self, model: ClaudeModel) -> Self { + self.model = model; + self + } + + /// Set the pass threshold + pub fn with_pass_threshold(mut self, threshold: f64) -> Self { + self.pass_threshold = threshold; + self + } + + /// Evaluate a completed contract against the directive + pub async fn evaluate_contract( + &self, + contract_id: Uuid, + owner_id: Uuid, + ) -> Result<ContractEvaluationResult, EvaluationError> { + // Gather evaluation context + let context = self.gather_context(contract_id, owner_id).await?; + + // Build evaluation prompt + let prompt = self.build_evaluation_prompt(&context); + + // Call LLM for evaluation + let response = self.call_llm_for_evaluation(&prompt).await?; + + // Parse the response + let result = self.parse_evaluation_response(&response, &context)?; + + Ok(result) + } + + /// Gather all context needed for evaluation + async fn gather_context( + &self, + contract_id: Uuid, + owner_id: Uuid, + ) -> Result<EvaluationContext, EvaluationError> { + // Get contract + let contract = repository::get_contract_for_owner(&self.pool, contract_id, owner_id) + .await? + .ok_or(EvaluationError::ContractNotFound(contract_id))?; + + // Get chain contract + let chain_contract = repository::get_chain_contract_by_contract_id(&self.pool, contract_id) + .await? + .ok_or(EvaluationError::ChainContractNotFound(contract_id))?; + + // Get directive + let directive = repository::get_chain_directive(&self.pool, chain_contract.chain_id) + .await? + .ok_or(EvaluationError::DirectiveNotFound(chain_contract.chain_id))?; + + // Get files directly from repository + let contract_files = repository::list_files_in_contract(&self.pool, contract_id, owner_id) + .await + .unwrap_or_default(); + + // Get tasks directly from repository + let contract_tasks = repository::list_tasks_in_contract(&self.pool, contract_id, owner_id) + .await + .unwrap_or_default(); + + // Build file contents from FileSummary + // Note: FileSummary doesn't have content, so we use name and description + let files: Vec<FileContent> = contract_files.iter().map(|f| { + FileContent { + path: f.repo_file_path.clone().unwrap_or_else(|| f.name.clone()), + description: f.description.clone(), + content: format!("[File: {} - content not loaded in summary view]", f.name), + is_deliverable: false, // FileSummary doesn't track deliverable status + } + }).collect(); + + // Build task outputs from TaskSummary + let task_outputs: Vec<TaskOutput> = contract_tasks.iter().map(|t| { + TaskOutput { + task_name: t.name.clone(), + output_summary: t.progress_summary.clone().unwrap_or_else(|| format!("Status: {}", t.status)), + exit_code: None, + } + }).collect(); + + // Build deliverables info from files marked as deliverables + // Since FileSummary doesn't have deliverable info, we treat all files as potential deliverables + let deliverables: Vec<DeliverableInfo> = contract_files.iter() + .map(|f| DeliverableInfo { + name: f.name.clone(), + status: "complete".to_string(), + file_path: f.repo_file_path.clone(), + }) + .collect(); + + // Parse requirements and acceptance criteria from directive + let requirements: Vec<DirectiveRequirement> = + serde_json::from_value(directive.requirements.clone()).unwrap_or_default(); + + let all_criteria: Vec<DirectiveAcceptanceCriterion> = + serde_json::from_value(directive.acceptance_criteria.clone()).unwrap_or_default(); + + // Get contract definition to find mapped requirements + // For now, use all acceptance criteria + let acceptance_criteria = all_criteria; + + Ok(EvaluationContext { + contract, + chain_contract, + directive, + files, + task_outputs, + deliverables, + acceptance_criteria, + requirements, + }) + } + + /// Build the evaluation prompt + fn build_evaluation_prompt(&self, context: &EvaluationContext) -> String { + let mut prompt = String::new(); + + prompt.push_str("# Contract Completion Evaluation\n\n"); + prompt.push_str("You are evaluating whether a contract has been completed successfully against its requirements.\n\n"); + + // Contract info + prompt.push_str("## Contract Information\n\n"); + prompt.push_str(&format!("**Name:** {}\n", context.contract.name)); + if let Some(ref desc) = context.contract.description { + prompt.push_str(&format!("**Description:** {}\n", desc)); + } + prompt.push_str(&format!("**Type:** {}\n", context.contract.contract_type)); + prompt.push_str(&format!("**Phase:** {}\n", context.contract.phase)); + prompt.push_str("\n"); + + // Requirements + if !context.requirements.is_empty() { + prompt.push_str("## Requirements\n\n"); + for req in &context.requirements { + prompt.push_str(&format!("- **{}** ({}): {}\n", req.id, req.priority, req.title)); + if !req.description.is_empty() { + prompt.push_str(&format!(" {}\n", req.description)); + } + } + prompt.push_str("\n"); + } + + // Acceptance criteria + if !context.acceptance_criteria.is_empty() { + prompt.push_str("## Acceptance Criteria\n\n"); + for (i, criterion) in context.acceptance_criteria.iter().enumerate() { + prompt.push_str(&format!("{}. **{}**\n", i + 1, criterion.description)); + prompt.push_str(&format!(" - Testable: {}\n", criterion.testable)); + if !criterion.requirement_ids.is_empty() { + prompt.push_str(&format!(" - Covers: {}\n", criterion.requirement_ids.join(", "))); + } + } + prompt.push_str("\n"); + } + + // Deliverables + if !context.deliverables.is_empty() { + prompt.push_str("## Deliverables\n\n"); + for d in &context.deliverables { + prompt.push_str(&format!("- {} ({})\n", d.name, d.status)); + } + prompt.push_str("\n"); + } + + // Files + if !context.files.is_empty() { + prompt.push_str("## Files Created/Modified\n\n"); + for file in &context.files { + prompt.push_str(&format!("### {}", file.path)); + if file.is_deliverable { + prompt.push_str(" [DELIVERABLE]"); + } + prompt.push_str("\n"); + if let Some(ref desc) = file.description { + prompt.push_str(&format!("*{}*\n", desc)); + } + // Truncate content if too long + let content = if file.content.len() > 5000 { + format!("{}...\n[Content truncated - {} chars total]", + &file.content[..5000], file.content.len()) + } else { + file.content.clone() + }; + prompt.push_str("```\n"); + prompt.push_str(&content); + prompt.push_str("\n```\n\n"); + } + } + + // Task outputs + if !context.task_outputs.is_empty() { + prompt.push_str("## Task Outputs\n\n"); + for task in &context.task_outputs { + prompt.push_str(&format!("### {}\n", task.task_name)); + prompt.push_str(&format!("{}\n\n", task.output_summary)); + } + } + + // Evaluation instructions + prompt.push_str("## Evaluation Instructions\n\n"); + prompt.push_str("Please evaluate the completed work against the requirements and acceptance criteria.\n\n"); + prompt.push_str("For each acceptance criterion, determine if it has been met and provide a brief explanation.\n\n"); + prompt.push_str("Respond with a JSON object in the following format:\n\n"); + prompt.push_str("```json\n"); + prompt.push_str(r#"{ + "passed": true/false, + "overallScore": 0.0-1.0, + "criteriaResults": [ + { + "criterionId": "criterion identifier or index", + "met": true/false, + "score": 0.0-1.0, + "feedback": "explanation of why criterion was/wasn't met" + } + ], + "summaryFeedback": "overall summary of the evaluation", + "reworkInstructions": "if failed, specific instructions for what needs to be fixed (null if passed)" +} +"#); + prompt.push_str("```\n\n"); + prompt.push_str(&format!("The pass threshold is {}. ", self.pass_threshold)); + prompt.push_str("A contract passes if the overall score is >= the threshold AND all critical criteria are met.\n"); + + prompt + } + + /// Call LLM for evaluation + async fn call_llm_for_evaluation(&self, prompt: &str) -> Result<String, EvaluationError> { + let messages = vec![Message { + role: "user".to_string(), + content: MessageContent::Text(prompt.to_string()), + }]; + + // Use chat_with_tools with empty tools array for simple chat + let empty_tools: Vec<Tool> = vec![]; + let result = self + .claude_client + .chat_with_tools(messages, &empty_tools) + .await + .map_err(|e| EvaluationError::LlmError(e.to_string()))?; + + // ChatResult.content is already an Option<String> + let text = result.content.unwrap_or_default(); + + Ok(text) + } + + /// Parse the LLM response into an evaluation result + fn parse_evaluation_response( + &self, + response: &str, + context: &EvaluationContext, + ) -> Result<ContractEvaluationResult, EvaluationError> { + // Extract JSON from response (may be wrapped in markdown code blocks) + let json_str = extract_json_from_response(response)?; + + // Parse the JSON + let parsed: EvaluationResponseJson = serde_json::from_str(&json_str) + .map_err(|e| EvaluationError::ParseError(format!("JSON parse error: {}", e)))?; + + // Convert to our result type + let criteria_results: Vec<EvaluationCriterionResult> = parsed + .criteria_results + .into_iter() + .map(|cr| EvaluationCriterionResult { + criterion_id: cr.criterion_id.clone(), + criterion_text: cr.criterion_id, // Use ID as text if not provided + passed: cr.passed, + score: cr.score, + feedback: cr.feedback, + evidence: vec![], + }) + .collect(); + + // Determine pass/fail based on threshold and results + let passed = parsed.passed && parsed.overall_score >= self.pass_threshold; + + Ok(ContractEvaluationResult { + passed, + overall_score: parsed.overall_score, + criteria_results, + summary_feedback: parsed.summary_feedback, + rework_instructions: if passed { None } else { parsed.rework_instructions }, + }) + } + + /// Save evaluation result to database + pub async fn save_evaluation( + &self, + contract_id: Uuid, + chain_id: Uuid, + chain_contract_id: Uuid, + result: &ContractEvaluationResult, + ) -> Result<ContractEvaluation, EvaluationError> { + let req = CreateContractEvaluationRequest { + contract_id, + chain_id: Some(chain_id), + chain_contract_id: Some(chain_contract_id), + evaluator_model: Some(format!("{:?}", self.model)), + passed: result.passed, + overall_score: Some(result.overall_score), + criteria_results: result.criteria_results.clone(), + summary_feedback: result.summary_feedback.clone(), + rework_instructions: result.rework_instructions.clone(), + }; + + let evaluation = repository::create_contract_evaluation(&self.pool, req).await?; + + // Update chain contract status + let status = if result.passed { "passed" } else { "failed" }; + repository::update_chain_contract_evaluation_status( + &self.pool, + chain_contract_id, + status, + Some(evaluation.id), + result.rework_instructions.as_deref(), + ) + .await?; + + Ok(evaluation) + } +} + +/// JSON structure for parsing LLM response +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct EvaluationResponseJson { + passed: bool, + overall_score: f64, + criteria_results: Vec<CriterionResultJson>, + summary_feedback: String, + rework_instructions: Option<String>, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct CriterionResultJson { + criterion_id: String, + #[serde(alias = "met")] + passed: bool, + #[serde(default)] + score: f64, + feedback: String, +} + +/// Extract JSON from a response that may contain markdown code blocks +fn extract_json_from_response(response: &str) -> Result<String, EvaluationError> { + // Try to find JSON in code blocks first + if let Some(start) = response.find("```json") { + let json_start = start + 7; + if let Some(end) = response[json_start..].find("```") { + return Ok(response[json_start..json_start + end].trim().to_string()); + } + } + + // Try plain code blocks + if let Some(start) = response.find("```") { + let json_start = start + 3; + // Skip any language identifier on the same line + let actual_start = response[json_start..] + .find('\n') + .map(|i| json_start + i + 1) + .unwrap_or(json_start); + if let Some(end) = response[actual_start..].find("```") { + return Ok(response[actual_start..actual_start + end].trim().to_string()); + } + } + + // Try to find raw JSON (starts with {) + if let Some(start) = response.find('{') { + // Find matching closing brace + let mut depth = 0; + let mut end = start; + for (i, c) in response[start..].char_indices() { + match c { + '{' => depth += 1, + '}' => { + depth -= 1; + if depth == 0 { + end = start + i + 1; + break; + } + } + _ => {} + } + } + if end > start { + return Ok(response[start..end].to_string()); + } + } + + Err(EvaluationError::ParseError( + "Could not find JSON in response".to_string(), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_json_from_code_block() { + let response = r#"Here is the evaluation: + +```json +{ + "passed": true, + "overallScore": 0.85 +} +``` + +Done."#; + + let json = extract_json_from_response(response).unwrap(); + assert!(json.contains("\"passed\": true")); + } + + #[test] + fn test_extract_json_raw() { + let response = r#"The result is {"passed": false, "overallScore": 0.5}"#; + let json = extract_json_from_response(response).unwrap(); + assert!(json.contains("\"passed\": false")); + } +} diff --git a/makima/src/llm/contract_tools.rs b/makima/src/llm/contract_tools.rs index 0f50132..7f7e849 100644 --- a/makima/src/llm/contract_tools.rs +++ b/makima/src/llm/contract_tools.rs @@ -460,6 +460,214 @@ pub static CONTRACT_TOOLS: once_cell::sync::Lazy<Vec<Tool>> = once_cell::sync::L "required": ["file_id"] }), }, + // ============================================================================= + // Chain Directive Tools (for directive contracts orchestrating chains) + // ============================================================================= + Tool { + name: "create_chain_from_directive".to_string(), + description: "Create a new chain that this directive contract will orchestrate. The chain starts in 'pending' status and contract definitions can be added. Only available to directive contracts.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name for the chain" + }, + "description": { + "type": "string", + "description": "Description of what the chain accomplishes" + } + }, + "required": ["name"] + }), + }, + Tool { + name: "add_chain_contract".to_string(), + description: "Add a contract definition to the chain being orchestrated. The contract will be created when its dependencies are met.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Contract name" + }, + "description": { + "type": "string", + "description": "What this contract accomplishes" + }, + "contract_type": { + "type": "string", + "enum": ["simple", "execute", "checkpoint"], + "description": "Contract type (default: simple)" + }, + "depends_on": { + "type": "array", + "items": { "type": "string" }, + "description": "Names of contracts this depends on" + }, + "requirement_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Requirement IDs this contract addresses (for traceability)" + } + }, + "required": ["name"] + }), + }, + Tool { + name: "set_chain_dependencies".to_string(), + description: "Set which contracts depend on which other contracts in the chain.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "contract_name": { + "type": "string", + "description": "Name of contract that has dependencies" + }, + "depends_on": { + "type": "array", + "items": { "type": "string" }, + "description": "Names of contracts it depends on" + } + }, + "required": ["contract_name", "depends_on"] + }), + }, + Tool { + name: "modify_chain_contract".to_string(), + description: "Update a contract definition in the chain.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the contract to modify" + }, + "new_name": { + "type": "string", + "description": "New name for the contract" + }, + "description": { + "type": "string", + "description": "New description" + }, + "add_requirement_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Requirement IDs to add" + }, + "remove_requirement_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Requirement IDs to remove" + } + }, + "required": ["name"] + }), + }, + Tool { + name: "remove_chain_contract".to_string(), + description: "Remove a contract definition from the chain (only if not yet instantiated).".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the contract to remove" + } + }, + "required": ["name"] + }), + }, + Tool { + name: "preview_chain_dag".to_string(), + description: "Generate a visual preview of the chain DAG structure for review.".to_string(), + parameters: json!({ + "type": "object", + "properties": {} + }), + }, + Tool { + name: "validate_chain_directive".to_string(), + description: "Validate the chain specification is complete and valid (no cycles, all dependencies exist, all requirements covered).".to_string(), + parameters: json!({ + "type": "object", + "properties": {} + }), + }, + Tool { + name: "finalize_chain_directive".to_string(), + description: "Lock the directive and start chain execution. Call this after validation passes and user has approved (if phase_guard enabled).".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "auto_start": { + "type": "boolean", + "description": "Whether to immediately start the chain (default: true)" + } + } + }), + }, + Tool { + name: "get_chain_status".to_string(), + description: "Get current status of the chain being orchestrated, including contract statuses and progress.".to_string(), + parameters: json!({ + "type": "object", + "properties": {} + }), + }, + Tool { + name: "get_uncovered_requirements".to_string(), + description: "List requirements from the directive that are not yet mapped to any contract.".to_string(), + parameters: json!({ + "type": "object", + "properties": {} + }), + }, + Tool { + name: "evaluate_contract_completion".to_string(), + description: "Evaluate whether a completed chain contract meets the directive requirements. Use this after a contract completes to assess if it satisfies acceptance criteria.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "contract_id": { + "type": "string", + "description": "ID of the completed contract to evaluate" + }, + "passed": { + "type": "boolean", + "description": "Whether the evaluation passed" + }, + "feedback": { + "type": "string", + "description": "Evaluation feedback and rationale" + }, + "rework_instructions": { + "type": "string", + "description": "Instructions for rework if evaluation failed" + } + }, + "required": ["contract_id", "passed", "feedback"] + }), + }, + Tool { + name: "request_rework".to_string(), + description: "Request rework on a completed contract that didn't meet requirements. This will block chain progression and notify the contract to address issues.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "contract_id": { + "type": "string", + "description": "ID of the contract needing rework" + }, + "feedback": { + "type": "string", + "description": "Detailed feedback on what needs to be fixed" + } + }, + "required": ["contract_id", "feedback"] + }), + }, ] }); @@ -546,6 +754,50 @@ pub enum ContractToolRequest { include_decisions: bool, include_action_items: bool, }, + + // Chain directive tools (for directive contracts) + CreateChainFromDirective { + name: String, + description: Option<String>, + }, + AddChainContract { + name: String, + description: Option<String>, + contract_type: Option<String>, + depends_on: Option<Vec<String>>, + requirement_ids: Option<Vec<String>>, + }, + SetChainDependencies { + contract_name: String, + depends_on: Vec<String>, + }, + ModifyChainContract { + name: String, + new_name: Option<String>, + description: Option<String>, + add_requirement_ids: Option<Vec<String>>, + remove_requirement_ids: Option<Vec<String>>, + }, + RemoveChainContract { + name: String, + }, + PreviewChainDag, + ValidateChainDirective, + FinalizeChainDirective { + auto_start: bool, + }, + GetChainStatus, + GetUncoveredRequirements, + EvaluateContractCompletion { + contract_id: Uuid, + passed: bool, + feedback: String, + rework_instructions: Option<String>, + }, + RequestRework { + contract_id: Uuid, + feedback: String, + }, } /// Task definition for chained task creation @@ -617,6 +869,20 @@ pub fn parse_contract_tool_call(call: &super::tools::ToolCall) -> ContractToolEx "analyze_transcript" => parse_analyze_transcript(call), "create_contract_from_transcript" => parse_create_contract_from_transcript(call), + // Chain directive tools + "create_chain_from_directive" => parse_create_chain_from_directive(call), + "add_chain_contract" => parse_add_chain_contract(call), + "set_chain_dependencies" => parse_set_chain_dependencies(call), + "modify_chain_contract" => parse_modify_chain_contract(call), + "remove_chain_contract" => parse_remove_chain_contract(call), + "preview_chain_dag" => parse_preview_chain_dag(), + "validate_chain_directive" => parse_validate_chain_directive(), + "finalize_chain_directive" => parse_finalize_chain_directive(call), + "get_chain_status" => parse_get_chain_status(), + "get_uncovered_requirements" => parse_get_uncovered_requirements(), + "evaluate_contract_completion" => parse_evaluate_contract_completion(call), + "request_rework" => parse_request_rework(call), + _ => ContractToolExecutionResult { success: false, message: format!("Unknown contract tool: {}", call.name), @@ -1206,6 +1472,229 @@ fn parse_create_contract_from_transcript(call: &super::tools::ToolCall) -> Contr } // ============================================================================= +// Chain Directive Tool Parsing +// ============================================================================= + +fn parse_create_chain_from_directive(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(name) = name else { + return error_result("Missing required parameter: name"); + }; + let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()); + + ContractToolExecutionResult { + success: true, + message: "Creating chain from directive...".to_string(), + data: None, + request: Some(ContractToolRequest::CreateChainFromDirective { name, description }), + pending_questions: None, + } +} + +fn parse_add_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(name) = name else { + return error_result("Missing required parameter: name"); + }; + + let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()); + let contract_type = call.arguments.get("contract_type").and_then(|v| v.as_str()).map(|s| s.to_string()); + let depends_on = call.arguments.get("depends_on").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }); + let requirement_ids = call.arguments.get("requirement_ids").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }); + + ContractToolExecutionResult { + success: true, + message: format!("Adding contract '{}' to chain...", name), + data: None, + request: Some(ContractToolRequest::AddChainContract { + name, + description, + contract_type, + depends_on, + requirement_ids, + }), + pending_questions: None, + } +} + +fn parse_set_chain_dependencies(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let contract_name = call.arguments.get("contract_name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(contract_name) = contract_name else { + return error_result("Missing required parameter: contract_name"); + }; + + let depends_on = call.arguments.get("depends_on").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }).unwrap_or_default(); + + ContractToolExecutionResult { + success: true, + message: format!("Setting dependencies for '{}'...", contract_name), + data: None, + request: Some(ContractToolRequest::SetChainDependencies { contract_name, depends_on }), + pending_questions: None, + } +} + +fn parse_modify_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(name) = name else { + return error_result("Missing required parameter: name"); + }; + + let new_name = call.arguments.get("new_name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()); + let add_requirement_ids = call.arguments.get("add_requirement_ids").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }); + let remove_requirement_ids = call.arguments.get("remove_requirement_ids").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }); + + ContractToolExecutionResult { + success: true, + message: format!("Modifying contract '{}'...", name), + data: None, + request: Some(ContractToolRequest::ModifyChainContract { + name, + new_name, + description, + add_requirement_ids, + remove_requirement_ids, + }), + pending_questions: None, + } +} + +fn parse_remove_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(name) = name else { + return error_result("Missing required parameter: name"); + }; + + ContractToolExecutionResult { + success: true, + message: format!("Removing contract '{}'...", name), + data: None, + request: Some(ContractToolRequest::RemoveChainContract { name }), + pending_questions: None, + } +} + +fn parse_preview_chain_dag() -> ContractToolExecutionResult { + ContractToolExecutionResult { + success: true, + message: "Generating chain DAG preview...".to_string(), + data: None, + request: Some(ContractToolRequest::PreviewChainDag), + pending_questions: None, + } +} + +fn parse_validate_chain_directive() -> ContractToolExecutionResult { + ContractToolExecutionResult { + success: true, + message: "Validating chain directive...".to_string(), + data: None, + request: Some(ContractToolRequest::ValidateChainDirective), + pending_questions: None, + } +} + +fn parse_finalize_chain_directive(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let auto_start = call.arguments.get("auto_start").and_then(|v| v.as_bool()).unwrap_or(true); + + ContractToolExecutionResult { + success: true, + message: "Finalizing chain directive...".to_string(), + data: None, + request: Some(ContractToolRequest::FinalizeChainDirective { auto_start }), + pending_questions: None, + } +} + +fn parse_get_chain_status() -> ContractToolExecutionResult { + ContractToolExecutionResult { + success: true, + message: "Getting chain status...".to_string(), + data: None, + request: Some(ContractToolRequest::GetChainStatus), + pending_questions: None, + } +} + +fn parse_get_uncovered_requirements() -> ContractToolExecutionResult { + ContractToolExecutionResult { + success: true, + message: "Getting uncovered requirements...".to_string(), + data: None, + request: Some(ContractToolRequest::GetUncoveredRequirements), + pending_questions: None, + } +} + +fn parse_evaluate_contract_completion(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let contract_id = parse_uuid_arg(call, "contract_id"); + let Some(contract_id) = contract_id else { + return error_result("Missing or invalid required parameter: contract_id"); + }; + + let passed = call.arguments.get("passed").and_then(|v| v.as_bool()).unwrap_or(false); + let feedback = call.arguments.get("feedback").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(feedback) = feedback else { + return error_result("Missing required parameter: feedback"); + }; + let rework_instructions = call.arguments.get("rework_instructions").and_then(|v| v.as_str()).map(|s| s.to_string()); + + ContractToolExecutionResult { + success: true, + message: format!("Evaluating contract completion (passed: {})...", passed), + data: None, + request: Some(ContractToolRequest::EvaluateContractCompletion { + contract_id, + passed, + feedback, + rework_instructions, + }), + pending_questions: None, + } +} + +fn parse_request_rework(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let contract_id = parse_uuid_arg(call, "contract_id"); + let Some(contract_id) = contract_id else { + return error_result("Missing or invalid required parameter: contract_id"); + }; + + let feedback = call.arguments.get("feedback").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(feedback) = feedback else { + return error_result("Missing required parameter: feedback"); + }; + + ContractToolExecutionResult { + success: true, + message: "Requesting rework...".to_string(), + data: None, + request: Some(ContractToolRequest::RequestRework { contract_id, feedback }), + pending_questions: None, + } +} + +// ============================================================================= // Helper Functions // ============================================================================= diff --git a/makima/src/llm/mod.rs b/makima/src/llm/mod.rs index 4c84ced..702e1fd 100644 --- a/makima/src/llm/mod.rs +++ b/makima/src/llm/mod.rs @@ -1,6 +1,7 @@ //! LLM integration module for file editing via tool calling. pub mod claude; +pub mod contract_evaluator; pub mod contract_tools; pub mod discuss_tools; pub mod groq; @@ -44,6 +45,9 @@ pub use transcript_analyzer::{ ExtractedActionItem, SpeakerStats, format_transcript_for_analysis, calculate_speaker_stats, build_analysis_prompt, parse_analysis_response, }; +pub use contract_evaluator::{ + ContractEvaluator, ContractEvaluationResult, EvaluationContext, EvaluationError, +}; /// Available LLM providers and models #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] |
