diff options
| author | soryu <soryu@soryu.co> | 2026-02-05 01:42:59 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2026-02-05 01:42:59 +0000 |
| commit | 6a0c912a3fbd8e9b3e87ef40e960803d819d966d (patch) | |
| tree | b2c50c490811286d163e40f8d624ee8d43c0ce43 /makima | |
| parent | 0302b4596e14210884df5d645df9a179d8f0c1c6 (diff) | |
| download | soryu-6a0c912a3fbd8e9b3e87ef40e960803d819d966d.tar.gz soryu-6a0c912a3fbd8e9b3e87ef40e960803d819d966d.zip | |
Add makima directives
Diffstat (limited to 'makima')
| -rw-r--r-- | makima/migrations/20260205000000_chain_directives.sql | 129 | ||||
| -rw-r--r-- | makima/src/bin/makima.rs | 11 | ||||
| -rw-r--r-- | makima/src/daemon/skills/chain_directive.md | 224 | ||||
| -rw-r--r-- | makima/src/db/models.rs | 351 | ||||
| -rw-r--r-- | makima/src/db/repository.rs | 511 | ||||
| -rw-r--r-- | makima/src/llm/contract_evaluator.rs | 555 | ||||
| -rw-r--r-- | makima/src/llm/contract_tools.rs | 489 | ||||
| -rw-r--r-- | makima/src/llm/mod.rs | 4 | ||||
| -rw-r--r-- | makima/src/server/handlers/chains.rs | 62 | ||||
| -rw-r--r-- | makima/src/server/handlers/contract_chat.rs | 1214 | ||||
| -rw-r--r-- | makima/src/server/handlers/contracts.rs | 46 | ||||
| -rw-r--r-- | makima/src/server/mod.rs | 1 |
12 files changed, 3579 insertions, 18 deletions
diff --git a/makima/migrations/20260205000000_chain_directives.sql b/makima/migrations/20260205000000_chain_directives.sql new file mode 100644 index 0000000..d3d29c7 --- /dev/null +++ b/makima/migrations/20260205000000_chain_directives.sql @@ -0,0 +1,129 @@ +-- Chain Directives: Structured specification documents that drive chain creation and evaluation +-- Contract Evaluations: LLM evaluation results for completed contracts + +-- ============================================================================= +-- New Tables +-- ============================================================================= + +-- Chain directives: formal specification documents for directive-driven chains +CREATE TABLE IF NOT EXISTS chain_directives ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + chain_id UUID NOT NULL REFERENCES chains(id) ON DELETE CASCADE, + version INTEGER NOT NULL DEFAULT 1, + + -- Directive content (structured JSON) + -- Requirements: [{ id, title, description, priority, category, parentId? }] + requirements JSONB NOT NULL DEFAULT '[]', + -- Acceptance criteria: [{ id, requirementIds[], description, testable, verificationMethod }] + acceptance_criteria JSONB NOT NULL DEFAULT '[]', + -- Constraints: [{ id, type, description, impact }] + constraints JSONB NOT NULL DEFAULT '[]', + -- External dependencies: [{ id, name, type, status, requiredBy[] }] + external_dependencies JSONB NOT NULL DEFAULT '[]', + + -- Metadata + source_type VARCHAR(32) NOT NULL DEFAULT 'llm_generated', -- 'manual', 'llm_generated', 'imported' + + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + + CONSTRAINT chain_directives_chain_unique UNIQUE (chain_id) +); + +CREATE INDEX IF NOT EXISTS idx_chain_directives_chain_id ON chain_directives(chain_id); + +-- Contract evaluations: LLM evaluation results after contract completion +CREATE TABLE IF NOT EXISTS contract_evaluations ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + contract_id UUID NOT NULL REFERENCES contracts(id) ON DELETE CASCADE, + chain_id UUID REFERENCES chains(id) ON DELETE SET NULL, + chain_contract_id UUID REFERENCES chain_contracts(id) ON DELETE SET NULL, + + -- Evaluation metadata + evaluation_number INTEGER NOT NULL DEFAULT 1, + evaluator_model VARCHAR(100), + + -- Results + passed BOOLEAN NOT NULL, + overall_score DECIMAL(3,2), -- 0.00 to 1.00 + + -- Structured feedback + -- criteria_results: [{ criterionId, criterionText, passed, score, feedback, evidence[] }] + criteria_results JSONB NOT NULL DEFAULT '[]', + summary_feedback TEXT NOT NULL, + rework_instructions TEXT, + + -- Context snapshot for reproducibility + directive_snapshot JSONB, + deliverables_snapshot JSONB, + + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + completed_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_contract_evaluations_contract ON contract_evaluations(contract_id); +CREATE INDEX IF NOT EXISTS idx_contract_evaluations_chain ON contract_evaluations(chain_id); +CREATE INDEX IF NOT EXISTS idx_contract_evaluations_chain_contract ON contract_evaluations(chain_contract_id); + +-- ============================================================================= +-- Table Modifications +-- ============================================================================= + +-- Extend chains table with directive support +ALTER TABLE chains + ADD COLUMN IF NOT EXISTS directive_contract_id UUID REFERENCES contracts(id) ON DELETE SET NULL, + ADD COLUMN IF NOT EXISTS directive_document TEXT, + ADD COLUMN IF NOT EXISTS evaluation_enabled BOOLEAN NOT NULL DEFAULT true, + ADD COLUMN IF NOT EXISTS default_pass_threshold DECIMAL(3,2) DEFAULT 0.80, + ADD COLUMN IF NOT EXISTS default_max_retries INTEGER DEFAULT 3; + +CREATE INDEX IF NOT EXISTS idx_chains_directive_contract ON chains(directive_contract_id) WHERE directive_contract_id IS NOT NULL; + +-- Extend contracts table with chain directive support +ALTER TABLE contracts + ADD COLUMN IF NOT EXISTS spawned_chain_id UUID REFERENCES chains(id) ON DELETE SET NULL, + ADD COLUMN IF NOT EXISTS is_chain_directive BOOLEAN NOT NULL DEFAULT FALSE; + +CREATE INDEX IF NOT EXISTS idx_contracts_spawned_chain ON contracts(spawned_chain_id) WHERE spawned_chain_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_contracts_is_directive ON contracts(is_chain_directive) WHERE is_chain_directive = true; + +-- Extend chain_contracts table with evaluation tracking +ALTER TABLE chain_contracts + ADD COLUMN IF NOT EXISTS evaluation_status VARCHAR(50) NOT NULL DEFAULT 'pending', + ADD COLUMN IF NOT EXISTS evaluation_retry_count INTEGER NOT NULL DEFAULT 0, + ADD COLUMN IF NOT EXISTS max_evaluation_retries INTEGER NOT NULL DEFAULT 3, + ADD COLUMN IF NOT EXISTS last_evaluation_id UUID REFERENCES contract_evaluations(id) ON DELETE SET NULL, + ADD COLUMN IF NOT EXISTS rework_feedback TEXT, + ADD COLUMN IF NOT EXISTS rework_started_at TIMESTAMPTZ, + ADD COLUMN IF NOT EXISTS original_completion_at TIMESTAMPTZ; + +-- Extend chain_contract_definitions table with requirement traceability +ALTER TABLE chain_contract_definitions + ADD COLUMN IF NOT EXISTS requirement_ids TEXT[] DEFAULT '{}', + ADD COLUMN IF NOT EXISTS acceptance_criteria JSONB DEFAULT '[]', + ADD COLUMN IF NOT EXISTS evaluation_enabled BOOLEAN NOT NULL DEFAULT true, + ADD COLUMN IF NOT EXISTS pass_threshold DECIMAL(3,2) DEFAULT 0.80; + +-- ============================================================================= +-- Comments +-- ============================================================================= + +COMMENT ON TABLE chain_directives IS 'Formal directive documents that drive chain creation and contract evaluation'; +COMMENT ON COLUMN chain_directives.requirements IS 'Enumerated requirements with IDs for traceability'; +COMMENT ON COLUMN chain_directives.acceptance_criteria IS 'Testable criteria mapped to requirements'; +COMMENT ON COLUMN chain_directives.constraints IS 'Technical, business, time, or resource constraints'; +COMMENT ON COLUMN chain_directives.external_dependencies IS 'External APIs, services, or data dependencies'; + +COMMENT ON TABLE contract_evaluations IS 'LLM evaluation results after contract completion within a chain'; +COMMENT ON COLUMN contract_evaluations.criteria_results IS 'Per-criterion evaluation results with scores and feedback'; +COMMENT ON COLUMN contract_evaluations.directive_snapshot IS 'Snapshot of directive at evaluation time for audit'; + +COMMENT ON COLUMN chains.directive_contract_id IS 'Reference to the directive contract that created/orchestrates this chain'; +COMMENT ON COLUMN chains.evaluation_enabled IS 'Whether LLM evaluation is enabled after contract completion'; + +COMMENT ON COLUMN contracts.spawned_chain_id IS 'Reference to chain spawned by this directive contract'; +COMMENT ON COLUMN contracts.is_chain_directive IS 'Whether this contract is a chain directive orchestrator'; + +COMMENT ON COLUMN chain_contracts.evaluation_status IS 'Evaluation state: pending, evaluating, passed, failed, rework, escalated'; +COMMENT ON COLUMN chain_contracts.evaluation_retry_count IS 'Number of evaluation retry attempts'; diff --git a/makima/src/bin/makima.rs b/makima/src/bin/makima.rs index 71272f1..f9c981f 100644 --- a/makima/src/bin/makima.rs +++ b/makima/src/bin/makima.rs @@ -962,8 +962,15 @@ async fn run_chain( if let Some(desc) = &chain.description { println!("Description: {}", desc); } - if let Some(repo) = &chain.repository_url { - println!("Repository: {}", repo); + if !chain.repositories.is_empty() { + println!("Repositories:"); + for repo in &chain.repositories { + if let Some(url) = &repo.repository_url { + println!(" - {} ({})", repo.name, url); + } else if let Some(path) = &repo.local_path { + println!(" - {} (local: {})", repo.name, path); + } + } } println!(); diff --git a/makima/src/daemon/skills/chain_directive.md b/makima/src/daemon/skills/chain_directive.md new file mode 100644 index 0000000..53ac96b --- /dev/null +++ b/makima/src/daemon/skills/chain_directive.md @@ -0,0 +1,224 @@ +--- +name: makima-chain-directive +description: Directive contract tools for orchestrating chains. Use when creating chains from goals, adding contracts to chains, evaluating completions, or managing chain structure. +--- + +# Chain Directive Contract Tools + +Directive contracts are special contracts that research, plan, create, and orchestrate chains. They use formal directives with requirements and acceptance criteria, and evaluate each contract completion before allowing the chain to progress. + +## Workflow Overview + +1. **Init**: Create a directive contract + empty chain from a goal +2. **Research**: Directive contract explores codebase, understands requirements +3. **Specify**: Write formal directive with requirements (REQ-001, etc.) and acceptance criteria +4. **Plan**: Design chain structure, add contracts, set dependencies +5. **Execute**: Finalize chain, start execution, evaluate completions +6. **Review**: All contracts complete, create final report + +## Creating a Chain from a Goal + +### Initialize directive-driven chain +``` +POST /api/v1/chains/init +{ + "goal": "Add OAuth2 authentication support", + "repository_url": "https://github.com/org/repo", + "local_path": "/path/to/repo", + "phase_guard": true +} +``` + +Returns: +- `chain_id` - The created chain +- `directive_contract_id` - The directive contract orchestrating the chain +- `supervisor_task_id` - Task ID for the directive contract supervisor + +## Chain Design Tools (for directive contracts) + +These tools are available when working on a directive contract: + +### create_chain_from_directive +Create a new chain linked to this directive contract. +```json +{ + "name": "oauth-implementation", + "description": "Chain for OAuth2 implementation" +} +``` + +### add_chain_contract +Add a contract definition to the chain. +```json +{ + "name": "auth-backend", + "description": "Implement authentication backend", + "contract_type": "implementation", + "depends_on": ["setup"], + "requirement_ids": ["REQ-001", "REQ-002"] +} +``` + +### set_chain_dependencies +Update dependency relationships. +```json +{ + "contract_name": "integration-tests", + "depends_on": ["auth-backend", "auth-frontend"] +} +``` + +### modify_chain_contract +Update a contract definition. +```json +{ + "name": "auth-backend", + "new_name": "authentication-service", + "description": "Updated description", + "add_requirement_ids": ["REQ-003"], + "remove_requirement_ids": ["REQ-001"] +} +``` + +### remove_chain_contract +Remove a contract definition (fails if others depend on it). +```json +{ + "name": "unused-contract" +} +``` + +### preview_chain_dag +Generate visual DAG preview of the chain structure. +Returns ASCII diagram and JSON nodes. + +### validate_chain_directive +Validate chain structure before finalizing. +Checks for: +- Empty chains +- Missing dependencies +- Circular dependencies +- Uncovered requirements + +### finalize_chain_directive +Lock the directive and optionally start chain execution. +```json +{ + "auto_start": true +} +``` + +## Orchestration Tools (during execution) + +### get_chain_status +Get current chain progress and contract statuses. +Returns completed/active/pending counts and contract details. + +### get_uncovered_requirements +List requirements not mapped to any contract. +Returns uncovered requirement IDs and coverage percentage. + +### evaluate_contract_completion +Evaluate a completed contract against the directive. +```json +{ + "contract_id": "uuid", + "passed": true, + "feedback": "All acceptance criteria met", + "rework_instructions": null +} +``` + +### request_rework +Reject completion and request rework. +```json +{ + "contract_id": "uuid", + "feedback": "Missing error handling for edge cases" +} +``` + +## Evaluation Flow + +When a contract completes and evaluation is enabled: + +1. Contract status changes to `completed` +2. Chain contract marked as `pending_evaluation` +3. Directive contract evaluates using `evaluate_contract_completion` +4. **Pass**: Chain progresses, downstream contracts created +5. **Fail**: Contract marked for rework, retry count incremented +6. After max retries (default 3), escalate to user + +## Directive Document Structure + +The directive contains: + +```json +{ + "requirements": [ + { + "id": "REQ-001", + "title": "User Authentication", + "description": "Users must be able to log in with email/password", + "priority": "must", + "category": "feature" + } + ], + "acceptance_criteria": [ + { + "id": "AC-001", + "requirement_ids": ["REQ-001"], + "description": "Login endpoint returns JWT on valid credentials", + "testable": true, + "verification_method": "automated" + } + ], + "constraints": [ + { + "id": "CON-001", + "type": "technical", + "description": "Must use existing PostgreSQL database" + } + ], + "external_dependencies": [ + { + "id": "EXT-001", + "name": "OAuth Provider API", + "type": "api", + "required": true + } + ] +} +``` + +## Example Workflow + +```bash +# 1. Initialize a directive-driven chain +curl -X POST http://localhost:3000/api/v1/chains/init \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"goal": "Add user profile editing feature"}' + +# 2. Directive contract goes through phases: +# - Research: Explores codebase +# - Specify: Writes formal directive +# - Plan: Creates chain contracts using tools +# - Execute: Monitors and evaluates completions + +# 3. Monitor chain progress +curl http://localhost:3000/api/v1/chains/$CHAIN_ID \ + -H "Authorization: Bearer $TOKEN" + +# 4. View directive traceability +curl http://localhost:3000/api/v1/chains/$CHAIN_ID/directive/traceability \ + -H "Authorization: Bearer $TOKEN" +``` + +## Key Concepts + +- **Directive Contract**: The orchestrator that creates and manages the chain +- **Formal Directive**: Structured specification with traceable requirements +- **Continuous Evaluation**: LLM evaluates after every contract completion +- **Block & Rework**: Failed evaluations block progress until fixed +- **Dynamic Modification**: Chain structure can be modified during execution diff --git a/makima/src/db/models.rs b/makima/src/db/models.rs index 30e1603..392d019 100644 --- a/makima/src/db/models.rs +++ b/makima/src/db/models.rs @@ -1449,6 +1449,13 @@ pub struct Contract { /// Chain ID if this contract is part of a chain (DAG of contracts) #[serde(skip_serializing_if = "Option::is_none")] pub chain_id: Option<Uuid>, + /// Reference to chain spawned by this directive contract + #[serde(skip_serializing_if = "Option::is_none")] + pub spawned_chain_id: Option<Uuid>, + /// Whether this contract is a chain directive orchestrator + #[serde(default)] + #[sqlx(default)] + pub is_chain_directive: bool, pub version: i32, pub created_at: DateTime<Utc>, pub updated_at: DateTime<Utc>, @@ -2652,12 +2659,28 @@ pub struct Chain { pub loop_current_iteration: Option<i32>, /// Progress check prompt/criteria for evaluating loop completion pub loop_progress_check: Option<String>, + /// Reference to the directive contract that created/orchestrates this chain + pub directive_contract_id: Option<Uuid>, + /// The directive document text (formal specification) + pub directive_document: Option<String>, + /// Whether LLM evaluation is enabled after contract completion + #[serde(default = "default_evaluation_enabled")] + #[sqlx(default)] + pub evaluation_enabled: bool, + /// Default pass threshold for evaluations (0.0-1.0) + pub default_pass_threshold: Option<f64>, + /// Default max retry attempts for evaluations + pub default_max_retries: Option<i32>, /// Version for optimistic locking pub version: i32, pub created_at: DateTime<Utc>, pub updated_at: DateTime<Utc>, } +fn default_evaluation_enabled() -> bool { + true +} + /// Chain repository record from the database #[derive(Debug, Clone, FromRow, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] @@ -2709,9 +2732,37 @@ pub struct ChainContract { pub editor_x: Option<f64>, /// Y position for GUI editor pub editor_y: Option<f64>, + /// Evaluation status: pending, evaluating, passed, failed, rework, escalated + #[serde(default = "default_evaluation_status")] + #[sqlx(default)] + pub evaluation_status: String, + /// Number of evaluation retry attempts + #[serde(default)] + #[sqlx(default)] + pub evaluation_retry_count: i32, + /// Maximum evaluation retry attempts (default: 3) + #[serde(default = "default_max_evaluation_retries")] + #[sqlx(default)] + pub max_evaluation_retries: i32, + /// Reference to the last evaluation result + pub last_evaluation_id: Option<Uuid>, + /// Rework feedback/instructions from failed evaluation + pub rework_feedback: Option<String>, + /// When rework was started + pub rework_started_at: Option<DateTime<Utc>>, + /// When contract originally completed (before rework) + pub original_completion_at: Option<DateTime<Utc>>, pub created_at: DateTime<Utc>, } +fn default_evaluation_status() -> String { + "pending".to_string() +} + +fn default_max_evaluation_retries() -> i32 { + 3 +} + /// Chain event for audit trail #[derive(Debug, Clone, FromRow, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] @@ -2765,6 +2816,15 @@ pub struct ChainContractDetail { pub order_index: i32, pub editor_x: Option<f64>, pub editor_y: Option<f64>, + /// Evaluation status: pending, passed, failed, rework + #[sqlx(default)] + pub evaluation_status: Option<String>, + /// Number of evaluation retries + #[sqlx(default)] + pub evaluation_retry_count: i32, + /// Maximum evaluation retry attempts + #[sqlx(default)] + pub max_evaluation_retries: i32, } /// DAG graph structure for visualization @@ -3058,6 +3118,19 @@ pub struct ChainContractDefinition { pub deliverables: Option<serde_json::Value>, /// Validation configuration for checkpoint contracts (JSON) pub validation: Option<serde_json::Value>, + /// Requirement IDs this contract addresses (for traceability) + #[sqlx(default)] + #[serde(default)] + pub requirement_ids: Vec<String>, + /// Acceptance criteria for this contract (JSON array) + #[serde(default)] + pub acceptance_criteria: Option<serde_json::Value>, + /// Whether LLM evaluation is enabled for this contract + #[serde(default = "default_evaluation_enabled")] + #[sqlx(default)] + pub evaluation_enabled: bool, + /// Pass threshold for evaluation (0.0-1.0) + pub pass_threshold: Option<f64>, /// Position in GUI editor pub editor_x: Option<f64>, pub editor_y: Option<f64>, @@ -3154,6 +3227,284 @@ pub struct ChainDefinitionGraphResponse { } // ============================================================================= +// Chain Directives (formal specification documents for directive-driven chains) +// ============================================================================= + +/// Chain directive - formal specification document that drives chain creation and evaluation +#[derive(Debug, Clone, FromRow, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ChainDirective { + pub id: Uuid, + pub chain_id: Uuid, + pub version: i32, + /// Requirements as JSON: [{ id, title, description, priority, category, parentId? }] + #[sqlx(json)] + pub requirements: serde_json::Value, + /// Acceptance criteria as JSON: [{ id, requirementIds[], description, testable, verificationMethod }] + #[sqlx(json)] + pub acceptance_criteria: serde_json::Value, + /// Constraints as JSON: [{ id, type, description, impact }] + #[sqlx(json)] + pub constraints: serde_json::Value, + /// External dependencies as JSON: [{ id, name, type, status, requiredBy[] }] + #[sqlx(json)] + pub external_dependencies: serde_json::Value, + /// Source type: 'manual', 'llm_generated', 'imported' + pub source_type: String, + pub created_at: DateTime<Utc>, + pub updated_at: DateTime<Utc>, +} + +/// Requirement in a directive +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DirectiveRequirement { + pub id: String, + pub title: String, + pub description: String, + /// Priority: 'must', 'should', 'could', 'wont' + pub priority: String, + /// Category: 'feature', 'infrastructure', 'testing', etc. + pub category: Option<String>, + /// Parent requirement ID for hierarchical requirements + pub parent_id: Option<String>, +} + +/// Acceptance criterion in a directive +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DirectiveAcceptanceCriterion { + pub id: String, + /// Requirement IDs this criterion validates + pub requirement_ids: Vec<String>, + pub description: String, + pub testable: bool, + /// Verification method: 'automated', 'manual', 'review', 'llm' + pub verification_method: String, +} + +/// Constraint in a directive +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DirectiveConstraint { + pub id: String, + /// Type: 'technical', 'business', 'time', 'resource' + #[serde(rename = "type")] + pub constraint_type: String, + pub description: String, + /// Impact: 'high', 'medium', 'low' + pub impact: String, +} + +/// External dependency in a directive +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DirectiveExternalDependency { + pub id: String, + pub name: String, + /// Type: 'api', 'service', 'library', 'data' + #[serde(rename = "type")] + pub dependency_type: String, + /// Status: 'available', 'pending', 'blocked' + pub status: String, + /// Requirement IDs that need this dependency + pub required_by: Vec<String>, +} + +/// Request to create or update a chain directive +#[derive(Debug, Clone, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CreateChainDirectiveRequest { + pub requirements: Option<Vec<DirectiveRequirement>>, + pub acceptance_criteria: Option<Vec<DirectiveAcceptanceCriterion>>, + pub constraints: Option<Vec<DirectiveConstraint>>, + pub external_dependencies: Option<Vec<DirectiveExternalDependency>>, + pub source_type: Option<String>, +} + +/// Request to initialize a directive-driven chain +#[derive(Debug, Clone, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct InitChainRequest { + /// High-level goal/description for the directive contract + pub goal: String, + /// Repository URL for chain contracts + pub repository_url: Option<String>, + /// Local path for chain contracts + pub local_path: Option<String>, + /// Whether to enable phase guard (user approval between phases) + #[serde(default)] + pub phase_guard: bool, +} + +/// Response from initializing a directive-driven chain +#[derive(Debug, Clone, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct InitChainResponse { + pub chain_id: Uuid, + pub directive_contract_id: Uuid, + pub supervisor_task_id: Option<Uuid>, +} + +// ============================================================================= +// Contract Evaluations (LLM evaluation results for completed contracts) +// ============================================================================= + +/// Evaluation status for chain contracts +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "snake_case")] +pub enum EvaluationStatus { + /// Not yet evaluated + Pending, + /// Currently being evaluated + Evaluating, + /// Evaluation passed + Passed, + /// Evaluation failed + Failed, + /// Contract is being reworked after failed evaluation + Rework, + /// Max retries exceeded, escalated to user + Escalated, + /// User approved despite partial failure + ApprovedWithIssues, +} + +impl std::fmt::Display for EvaluationStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Pending => write!(f, "pending"), + Self::Evaluating => write!(f, "evaluating"), + Self::Passed => write!(f, "passed"), + Self::Failed => write!(f, "failed"), + Self::Rework => write!(f, "rework"), + Self::Escalated => write!(f, "escalated"), + Self::ApprovedWithIssues => write!(f, "approved_with_issues"), + } + } +} + +impl std::str::FromStr for EvaluationStatus { + type Err = String; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + match s.to_lowercase().as_str() { + "pending" => Ok(Self::Pending), + "evaluating" => Ok(Self::Evaluating), + "passed" => Ok(Self::Passed), + "failed" => Ok(Self::Failed), + "rework" => Ok(Self::Rework), + "escalated" => Ok(Self::Escalated), + "approved_with_issues" => Ok(Self::ApprovedWithIssues), + _ => Err(format!("Unknown evaluation status: {}", s)), + } + } +} + +/// Contract evaluation - LLM evaluation result after contract completion +#[derive(Debug, Clone, FromRow, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ContractEvaluation { + pub id: Uuid, + pub contract_id: Uuid, + pub chain_id: Option<Uuid>, + pub chain_contract_id: Option<Uuid>, + /// Evaluation attempt number (1-based) + pub evaluation_number: i32, + /// Model used for evaluation + pub evaluator_model: Option<String>, + /// Whether the evaluation passed + pub passed: bool, + /// Overall score (0.0-1.0) + pub overall_score: Option<f64>, + /// Per-criterion results as JSON + #[sqlx(json)] + pub criteria_results: serde_json::Value, + /// Summary feedback from the evaluator + pub summary_feedback: String, + /// Instructions for rework if evaluation failed + pub rework_instructions: Option<String>, + /// Snapshot of directive at evaluation time + pub directive_snapshot: Option<serde_json::Value>, + /// Snapshot of deliverables at evaluation time + pub deliverables_snapshot: Option<serde_json::Value>, + pub started_at: DateTime<Utc>, + pub completed_at: Option<DateTime<Utc>>, + pub created_at: DateTime<Utc>, +} + +/// Per-criterion evaluation result +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct EvaluationCriterionResult { + pub criterion_id: String, + pub criterion_text: String, + pub passed: bool, + /// Score (0.0-1.0) + pub score: f64, + pub feedback: String, + /// Evidence supporting the evaluation + pub evidence: Vec<String>, +} + +/// Request to create a contract evaluation +#[derive(Debug, Clone, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct CreateContractEvaluationRequest { + pub contract_id: Uuid, + pub chain_id: Option<Uuid>, + pub chain_contract_id: Option<Uuid>, + pub evaluator_model: Option<String>, + pub passed: bool, + pub overall_score: Option<f64>, + pub criteria_results: Vec<EvaluationCriterionResult>, + pub summary_feedback: String, + pub rework_instructions: Option<String>, +} + +/// Summary of contract evaluation for list views +#[derive(Debug, Clone, FromRow, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ContractEvaluationSummary { + pub id: Uuid, + pub contract_id: Uuid, + pub evaluation_number: i32, + pub passed: bool, + pub overall_score: Option<f64>, + pub summary_feedback: String, + pub created_at: DateTime<Utc>, +} + +/// Response listing evaluations for a chain or contract +#[derive(Debug, Clone, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ContractEvaluationsResponse { + pub evaluations: Vec<ContractEvaluationSummary>, + pub total: i64, +} + +/// Traceability matrix entry - maps requirements to contracts +#[derive(Debug, Clone, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct TraceabilityEntry { + pub requirement_id: String, + pub requirement_title: String, + pub contract_definition_ids: Vec<Uuid>, + pub contract_definition_names: Vec<String>, + pub acceptance_criteria_ids: Vec<String>, +} + +/// Response for directive traceability +#[derive(Debug, Clone, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DirectiveTraceabilityResponse { + pub chain_id: Uuid, + pub entries: Vec<TraceabilityEntry>, + /// Requirements not mapped to any contract + pub uncovered_requirements: Vec<String>, +} + +// ============================================================================= // Unit Tests // ============================================================================= diff --git a/makima/src/db/repository.rs b/makima/src/db/repository.rs index 2b595b5..9cb653f 100644 --- a/makima/src/db/repository.rs +++ b/makima/src/db/repository.rs @@ -8,18 +8,21 @@ use uuid::Uuid; use super::models::{ AddChainRepositoryRequest, AddContractDefinitionRequest, AddContractToChainRequest, Chain, ChainContract, ChainContractDefinition, ChainContractDetail, ChainDefinitionGraphNode, - ChainDefinitionGraphResponse, ChainEditorContract, ChainEditorData, ChainEditorDeliverable, - ChainEditorEdge, ChainEditorNode, ChainEditorTask, ChainEvent, ChainGraphEdge, ChainGraphNode, - ChainGraphResponse, ChainRepository, ChainSummary, ChainWithContracts, CheckpointPatch, - CheckpointPatchInfo, Contract, ContractChatConversation, ContractChatMessageRecord, - ContractEvent, ContractRepository, ContractSummary, ContractTypeTemplateRecord, - ConversationMessage, ConversationSnapshot, CreateChainRequest, CreateContractRequest, - CreateFileRequest, CreateTaskRequest, CreateTemplateRequest, Daemon, DaemonTaskAssignment, - DaemonWithCapacity, DeliverableDefinition, File, FileSummary, FileVersion, HistoryEvent, - HistoryQueryFilters, MeshChatConversation, MeshChatMessageRecord, PhaseChangeResult, - PhaseConfig, PhaseDefinition, SupervisorHeartbeatRecord, SupervisorState, Task, TaskCheckpoint, - TaskEvent, TaskSummary, UpdateChainRequest, UpdateContractDefinitionRequest, - UpdateContractRequest, UpdateFileRequest, UpdateTaskRequest, UpdateTemplateRequest, + ChainDefinitionGraphResponse, ChainDirective, ChainEditorContract, ChainEditorData, + ChainEditorDeliverable, ChainEditorEdge, ChainEditorNode, ChainEditorTask, ChainEvent, + ChainGraphEdge, ChainGraphNode, ChainGraphResponse, ChainRepository, ChainSummary, + ChainWithContracts, CheckpointPatch, CheckpointPatchInfo, Contract, ContractChatConversation, + ContractChatMessageRecord, ContractEvaluation, ContractEvaluationSummary, ContractEvent, + ContractRepository, ContractSummary, ContractTypeTemplateRecord, ConversationMessage, + ConversationSnapshot, CreateChainDirectiveRequest, CreateChainRequest, + CreateContractEvaluationRequest, CreateContractRequest, CreateFileRequest, CreateTaskRequest, + CreateTemplateRequest, Daemon, DaemonTaskAssignment, DaemonWithCapacity, DeliverableDefinition, + DirectiveTraceabilityResponse, EvaluationCriterionResult, File, FileSummary, FileVersion, + HistoryEvent, HistoryQueryFilters, InitChainRequest, InitChainResponse, MeshChatConversation, + MeshChatMessageRecord, PhaseChangeResult, PhaseConfig, PhaseDefinition, + SupervisorHeartbeatRecord, SupervisorState, Task, TaskCheckpoint, TaskEvent, TaskSummary, + TraceabilityEntry, UpdateChainRequest, UpdateContractDefinitionRequest, UpdateContractRequest, + UpdateFileRequest, UpdateTaskRequest, UpdateTemplateRequest, }; /// Repository error types. @@ -5156,7 +5159,10 @@ pub async fn list_chain_contracts( cc.depends_on, cc.order_index, cc.editor_x, - cc.editor_y + cc.editor_y, + cc.evaluation_status, + cc.evaluation_retry_count, + cc.max_evaluation_retries FROM chain_contracts cc JOIN contracts c ON c.id = cc.contract_id WHERE cc.chain_id = $1 @@ -6262,3 +6268,482 @@ async fn create_contract_from_definition( Ok(contract.id) } + +// ============================================================================= +// Chain Directives +// ============================================================================= + +/// Create a directive for a chain. +pub async fn create_chain_directive( + pool: &PgPool, + chain_id: Uuid, + req: CreateChainDirectiveRequest, +) -> Result<ChainDirective, sqlx::Error> { + let requirements = serde_json::to_value(&req.requirements.unwrap_or_default()) + .unwrap_or(serde_json::json!([])); + let acceptance_criteria = serde_json::to_value(&req.acceptance_criteria.unwrap_or_default()) + .unwrap_or(serde_json::json!([])); + let constraints = + serde_json::to_value(&req.constraints.unwrap_or_default()).unwrap_or(serde_json::json!([])); + let external_dependencies = + serde_json::to_value(&req.external_dependencies.unwrap_or_default()) + .unwrap_or(serde_json::json!([])); + let source_type = req.source_type.unwrap_or_else(|| "llm_generated".to_string()); + + sqlx::query_as::<_, ChainDirective>( + r#" + INSERT INTO chain_directives (chain_id, requirements, acceptance_criteria, constraints, external_dependencies, source_type) + VALUES ($1, $2, $3, $4, $5, $6) + RETURNING * + "#, + ) + .bind(chain_id) + .bind(&requirements) + .bind(&acceptance_criteria) + .bind(&constraints) + .bind(&external_dependencies) + .bind(&source_type) + .fetch_one(pool) + .await +} + +/// Get the directive for a chain. +pub async fn get_chain_directive( + pool: &PgPool, + chain_id: Uuid, +) -> Result<Option<ChainDirective>, sqlx::Error> { + sqlx::query_as::<_, ChainDirective>( + r#" + SELECT * + FROM chain_directives + WHERE chain_id = $1 + "#, + ) + .bind(chain_id) + .fetch_optional(pool) + .await +} + +/// Update a chain directive. +pub async fn update_chain_directive( + pool: &PgPool, + chain_id: Uuid, + req: CreateChainDirectiveRequest, +) -> Result<ChainDirective, sqlx::Error> { + let requirements = req + .requirements + .map(|r| serde_json::to_value(&r).unwrap_or(serde_json::json!([]))); + let acceptance_criteria = req + .acceptance_criteria + .map(|ac| serde_json::to_value(&ac).unwrap_or(serde_json::json!([]))); + let constraints = req + .constraints + .map(|c| serde_json::to_value(&c).unwrap_or(serde_json::json!([]))); + let external_dependencies = req + .external_dependencies + .map(|ed| serde_json::to_value(&ed).unwrap_or(serde_json::json!([]))); + + sqlx::query_as::<_, ChainDirective>( + r#" + UPDATE chain_directives SET + requirements = COALESCE($2, requirements), + acceptance_criteria = COALESCE($3, acceptance_criteria), + constraints = COALESCE($4, constraints), + external_dependencies = COALESCE($5, external_dependencies), + source_type = COALESCE($6, source_type), + version = version + 1, + updated_at = NOW() + WHERE chain_id = $1 + RETURNING * + "#, + ) + .bind(chain_id) + .bind(&requirements) + .bind(&acceptance_criteria) + .bind(&constraints) + .bind(&external_dependencies) + .bind(&req.source_type) + .fetch_one(pool) + .await +} + +/// Delete a chain directive. +pub async fn delete_chain_directive(pool: &PgPool, chain_id: Uuid) -> Result<bool, sqlx::Error> { + let result = sqlx::query("DELETE FROM chain_directives WHERE chain_id = $1") + .bind(chain_id) + .execute(pool) + .await?; + Ok(result.rows_affected() > 0) +} + +/// Get directive traceability (requirement -> contract mapping). +pub async fn get_directive_traceability( + pool: &PgPool, + chain_id: Uuid, +) -> Result<DirectiveTraceabilityResponse, sqlx::Error> { + // Get the directive + let directive = get_chain_directive(pool, chain_id).await?; + + // Get all contract definitions with their requirement mappings + let definitions = list_chain_contract_definitions(pool, chain_id).await?; + + // Parse requirements from directive + let requirements: Vec<super::models::DirectiveRequirement> = directive + .as_ref() + .and_then(|d| serde_json::from_value(d.requirements.clone()).ok()) + .unwrap_or_default(); + + // Build traceability entries + let mut entries: Vec<TraceabilityEntry> = Vec::new(); + let mut covered_requirements: std::collections::HashSet<String> = + std::collections::HashSet::new(); + + for req in &requirements { + let mut contract_def_ids: Vec<Uuid> = Vec::new(); + let mut contract_def_names: Vec<String> = Vec::new(); + + for def in &definitions { + if def.requirement_ids.contains(&req.id) { + contract_def_ids.push(def.id); + contract_def_names.push(def.name.clone()); + covered_requirements.insert(req.id.clone()); + } + } + + // Get acceptance criteria for this requirement + let acceptance_criteria: Vec<super::models::DirectiveAcceptanceCriterion> = directive + .as_ref() + .and_then(|d| serde_json::from_value(d.acceptance_criteria.clone()).ok()) + .unwrap_or_default(); + + let ac_ids: Vec<String> = acceptance_criteria + .iter() + .filter(|ac| ac.requirement_ids.contains(&req.id)) + .map(|ac| ac.id.clone()) + .collect(); + + entries.push(TraceabilityEntry { + requirement_id: req.id.clone(), + requirement_title: req.title.clone(), + contract_definition_ids: contract_def_ids, + contract_definition_names: contract_def_names, + acceptance_criteria_ids: ac_ids, + }); + } + + // Find uncovered requirements + let uncovered: Vec<String> = requirements + .iter() + .filter(|r| !covered_requirements.contains(&r.id)) + .map(|r| r.id.clone()) + .collect(); + + Ok(DirectiveTraceabilityResponse { + chain_id, + entries, + uncovered_requirements: uncovered, + }) +} + +// ============================================================================= +// Contract Evaluations +// ============================================================================= + +/// Create a contract evaluation record. +pub async fn create_contract_evaluation( + pool: &PgPool, + req: CreateContractEvaluationRequest, +) -> Result<ContractEvaluation, sqlx::Error> { + let criteria_results = serde_json::to_value(&req.criteria_results).unwrap_or(serde_json::json!([])); + + sqlx::query_as::<_, ContractEvaluation>( + r#" + INSERT INTO contract_evaluations ( + contract_id, chain_id, chain_contract_id, + evaluator_model, passed, overall_score, + criteria_results, summary_feedback, rework_instructions, + completed_at + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, NOW()) + RETURNING * + "#, + ) + .bind(req.contract_id) + .bind(req.chain_id) + .bind(req.chain_contract_id) + .bind(&req.evaluator_model) + .bind(req.passed) + .bind(req.overall_score) + .bind(&criteria_results) + .bind(&req.summary_feedback) + .bind(&req.rework_instructions) + .fetch_one(pool) + .await +} + +/// Get a contract evaluation by ID. +pub async fn get_contract_evaluation( + pool: &PgPool, + id: Uuid, +) -> Result<Option<ContractEvaluation>, sqlx::Error> { + sqlx::query_as::<_, ContractEvaluation>( + r#" + SELECT * + FROM contract_evaluations + WHERE id = $1 + "#, + ) + .bind(id) + .fetch_optional(pool) + .await +} + +/// List evaluations for a contract. +pub async fn list_contract_evaluations( + pool: &PgPool, + contract_id: Uuid, +) -> Result<Vec<ContractEvaluationSummary>, sqlx::Error> { + sqlx::query_as::<_, ContractEvaluationSummary>( + r#" + SELECT id, contract_id, evaluation_number, passed, overall_score, summary_feedback, created_at + FROM contract_evaluations + WHERE contract_id = $1 + ORDER BY evaluation_number DESC + "#, + ) + .bind(contract_id) + .fetch_all(pool) + .await +} + +/// List evaluations for a chain. +pub async fn list_chain_evaluations( + pool: &PgPool, + chain_id: Uuid, +) -> Result<Vec<ContractEvaluationSummary>, sqlx::Error> { + sqlx::query_as::<_, ContractEvaluationSummary>( + r#" + SELECT id, contract_id, evaluation_number, passed, overall_score, summary_feedback, created_at + FROM contract_evaluations + WHERE chain_id = $1 + ORDER BY created_at DESC + "#, + ) + .bind(chain_id) + .fetch_all(pool) + .await +} + +/// Get the latest evaluation for a chain contract. +pub async fn get_latest_chain_contract_evaluation( + pool: &PgPool, + chain_contract_id: Uuid, +) -> Result<Option<ContractEvaluation>, sqlx::Error> { + sqlx::query_as::<_, ContractEvaluation>( + r#" + SELECT * + FROM contract_evaluations + WHERE chain_contract_id = $1 + ORDER BY evaluation_number DESC + LIMIT 1 + "#, + ) + .bind(chain_contract_id) + .fetch_optional(pool) + .await +} + +/// Get the next evaluation number for a chain contract. +pub async fn get_next_evaluation_number( + pool: &PgPool, + chain_contract_id: Uuid, +) -> Result<i32, sqlx::Error> { + let result: Option<(i32,)> = sqlx::query_as( + r#" + SELECT COALESCE(MAX(evaluation_number), 0) + 1 as next_number + FROM contract_evaluations + WHERE chain_contract_id = $1 + "#, + ) + .bind(chain_contract_id) + .fetch_optional(pool) + .await?; + + Ok(result.map(|(n,)| n).unwrap_or(1)) +} + +/// Update chain contract evaluation status. +pub async fn update_chain_contract_evaluation_status( + pool: &PgPool, + chain_contract_id: Uuid, + status: &str, + evaluation_id: Option<Uuid>, + rework_feedback: Option<&str>, +) -> Result<ChainContract, sqlx::Error> { + sqlx::query_as::<_, ChainContract>( + r#" + UPDATE chain_contracts SET + evaluation_status = $2, + last_evaluation_id = COALESCE($3, last_evaluation_id), + rework_feedback = COALESCE($4, rework_feedback), + evaluation_retry_count = CASE + WHEN $2 = 'rework' THEN evaluation_retry_count + 1 + ELSE evaluation_retry_count + END, + rework_started_at = CASE + WHEN $2 = 'rework' THEN NOW() + ELSE rework_started_at + END + WHERE id = $1 + RETURNING * + "#, + ) + .bind(chain_contract_id) + .bind(status) + .bind(evaluation_id) + .bind(rework_feedback) + .fetch_one(pool) + .await +} + +/// Mark a chain contract's original completion time (before rework). +pub async fn mark_chain_contract_original_completion( + pool: &PgPool, + chain_contract_id: Uuid, +) -> Result<(), sqlx::Error> { + sqlx::query( + r#" + UPDATE chain_contracts SET + original_completion_at = COALESCE(original_completion_at, NOW()) + WHERE id = $1 + "#, + ) + .bind(chain_contract_id) + .execute(pool) + .await?; + Ok(()) +} + +/// Get chain contract by contract ID. +pub async fn get_chain_contract_by_contract_id( + pool: &PgPool, + contract_id: Uuid, +) -> Result<Option<ChainContract>, sqlx::Error> { + sqlx::query_as::<_, ChainContract>( + r#" + SELECT * + FROM chain_contracts + WHERE contract_id = $1 + "#, + ) + .bind(contract_id) + .fetch_optional(pool) + .await +} + +// ============================================================================= +// Init Chain (Directive-Driven Chain Creation) +// ============================================================================= + +/// Initialize a directive-driven chain. +/// Creates a directive contract and an empty chain linked to it. +pub async fn init_chain_for_owner( + pool: &PgPool, + owner_id: Uuid, + req: InitChainRequest, +) -> Result<InitChainResponse, sqlx::Error> { + // Create the directive contract + // Note: "directive" contract type uses the "specification" phases by default + let contract_req = CreateContractRequest { + name: format!("Directive: {}", truncate_string(&req.goal, 50)), + description: Some(req.goal.clone()), + contract_type: Some("specification".to_string()), // Directive uses spec workflow + template_id: None, + initial_phase: Some("research".to_string()), + phase_guard: Some(req.phase_guard), + autonomous_loop: Some(false), + local_only: Some(false), + auto_merge_local: Some(false), + }; + + let contract = create_contract_for_owner(pool, owner_id, contract_req).await?; + + // Mark it as a chain directive + sqlx::query("UPDATE contracts SET is_chain_directive = true WHERE id = $1") + .bind(contract.id) + .execute(pool) + .await?; + + // Build repositories list from request + let repositories = match (req.repository_url.as_ref(), req.local_path.as_ref()) { + (Some(url), _) => Some(vec![AddChainRepositoryRequest { + name: "Primary".to_string(), + repository_url: Some(url.clone()), + local_path: None, + source_type: "remote".to_string(), + is_primary: true, + }]), + (None, Some(path)) => Some(vec![AddChainRepositoryRequest { + name: "Primary".to_string(), + repository_url: None, + local_path: Some(path.clone()), + source_type: "local".to_string(), + is_primary: true, + }]), + (None, None) => None, + }; + + // Create the chain with directive contract reference + let chain_req = CreateChainRequest { + name: truncate_string(&req.goal, 100), + description: Some(req.goal), + repositories, + loop_enabled: Some(false), + loop_max_iterations: None, + loop_progress_check: None, + contracts: None, + }; + + let chain = create_chain_for_owner(pool, owner_id, chain_req).await?; + + // Link directive contract to chain + sqlx::query( + r#" + UPDATE chains SET directive_contract_id = $2 WHERE id = $1; + UPDATE contracts SET spawned_chain_id = $1 WHERE id = $2; + "#, + ) + .bind(chain.id) + .bind(contract.id) + .execute(pool) + .await?; + + // Create empty directive document + create_chain_directive( + pool, + chain.id, + CreateChainDirectiveRequest { + requirements: Some(vec![]), + acceptance_criteria: Some(vec![]), + constraints: Some(vec![]), + external_dependencies: Some(vec![]), + source_type: Some("llm_generated".to_string()), + }, + ) + .await?; + + Ok(InitChainResponse { + chain_id: chain.id, + directive_contract_id: contract.id, + supervisor_task_id: contract.supervisor_task_id, + }) +} + +/// Helper to truncate string to max length +fn truncate_string(s: &str, max_len: usize) -> String { + if s.len() <= max_len { + s.to_string() + } else { + format!("{}...", &s[..max_len - 3]) + } +} diff --git a/makima/src/llm/contract_evaluator.rs b/makima/src/llm/contract_evaluator.rs new file mode 100644 index 0000000..fcc4826 --- /dev/null +++ b/makima/src/llm/contract_evaluator.rs @@ -0,0 +1,555 @@ +//! Contract Evaluator - LLM-based evaluation of completed contracts against directive. +//! +//! This module provides functionality for: +//! - Gathering deliverables, files, and task outputs from completed contracts +//! - Building evaluation prompts using directive and acceptance criteria +//! - Calling LLM to evaluate work against requirements +//! - Parsing evaluation responses + +use serde::{Deserialize, Serialize}; +use sqlx::PgPool; +use uuid::Uuid; + +use crate::db::{ + models::{ + ChainContract, ChainDirective, Contract, ContractEvaluation, CreateContractEvaluationRequest, + DirectiveAcceptanceCriterion, DirectiveRequirement, EvaluationCriterionResult, + }, + repository, +}; + +use super::claude::{ClaudeClient, ClaudeModel, Message, MessageContent}; +use super::tools::Tool; + +/// Result of contract evaluation +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ContractEvaluationResult { + /// Whether the contract passed evaluation + pub passed: bool, + /// Overall score from 0.0 to 1.0 + pub overall_score: f64, + /// Results for each acceptance criterion + pub criteria_results: Vec<EvaluationCriterionResult>, + /// Summary feedback from the evaluator + pub summary_feedback: String, + /// Instructions for rework if failed + pub rework_instructions: Option<String>, +} + +/// Context gathered for evaluation +#[derive(Debug, Clone)] +pub struct EvaluationContext { + /// The contract being evaluated + pub contract: Contract, + /// The chain contract record + pub chain_contract: ChainContract, + /// The directive document + pub directive: ChainDirective, + /// Files associated with the contract + pub files: Vec<FileContent>, + /// Task outputs from the contract + pub task_outputs: Vec<TaskOutput>, + /// Deliverables marked as complete + pub deliverables: Vec<DeliverableInfo>, + /// Acceptance criteria specific to this contract + pub acceptance_criteria: Vec<DirectiveAcceptanceCriterion>, + /// Requirements mapped to this contract + pub requirements: Vec<DirectiveRequirement>, +} + +/// File content for evaluation +#[derive(Debug, Clone, Serialize)] +pub struct FileContent { + pub path: String, + pub description: Option<String>, + pub content: String, + pub is_deliverable: bool, +} + +/// Task output for evaluation +#[derive(Debug, Clone, Serialize)] +pub struct TaskOutput { + pub task_name: String, + pub output_summary: String, + pub exit_code: Option<i32>, +} + +/// Deliverable info for evaluation +#[derive(Debug, Clone, Serialize)] +pub struct DeliverableInfo { + pub name: String, + pub status: String, + pub file_path: Option<String>, +} + +/// Error types for evaluation +#[derive(Debug, thiserror::Error)] +pub enum EvaluationError { + #[error("Database error: {0}")] + Database(#[from] sqlx::Error), + + #[error("Contract not found: {0}")] + ContractNotFound(Uuid), + + #[error("Chain contract not found for contract: {0}")] + ChainContractNotFound(Uuid), + + #[error("Directive not found for chain: {0}")] + DirectiveNotFound(Uuid), + + #[error("LLM evaluation failed: {0}")] + LlmError(String), + + #[error("Failed to parse evaluation response: {0}")] + ParseError(String), +} + +/// Contract evaluator for directive-driven evaluation +pub struct ContractEvaluator { + pool: PgPool, + claude_client: ClaudeClient, + model: ClaudeModel, + /// Minimum score required to pass (default 0.8) + pass_threshold: f64, +} + +impl ContractEvaluator { + /// Create a new evaluator + pub fn new(pool: PgPool, claude_client: ClaudeClient) -> Self { + Self { + pool, + claude_client, + model: ClaudeModel::Sonnet, + pass_threshold: 0.8, + } + } + + /// Set the LLM model to use for evaluation + pub fn with_model(mut self, model: ClaudeModel) -> Self { + self.model = model; + self + } + + /// Set the pass threshold + pub fn with_pass_threshold(mut self, threshold: f64) -> Self { + self.pass_threshold = threshold; + self + } + + /// Evaluate a completed contract against the directive + pub async fn evaluate_contract( + &self, + contract_id: Uuid, + owner_id: Uuid, + ) -> Result<ContractEvaluationResult, EvaluationError> { + // Gather evaluation context + let context = self.gather_context(contract_id, owner_id).await?; + + // Build evaluation prompt + let prompt = self.build_evaluation_prompt(&context); + + // Call LLM for evaluation + let response = self.call_llm_for_evaluation(&prompt).await?; + + // Parse the response + let result = self.parse_evaluation_response(&response, &context)?; + + Ok(result) + } + + /// Gather all context needed for evaluation + async fn gather_context( + &self, + contract_id: Uuid, + owner_id: Uuid, + ) -> Result<EvaluationContext, EvaluationError> { + // Get contract + let contract = repository::get_contract_for_owner(&self.pool, contract_id, owner_id) + .await? + .ok_or(EvaluationError::ContractNotFound(contract_id))?; + + // Get chain contract + let chain_contract = repository::get_chain_contract_by_contract_id(&self.pool, contract_id) + .await? + .ok_or(EvaluationError::ChainContractNotFound(contract_id))?; + + // Get directive + let directive = repository::get_chain_directive(&self.pool, chain_contract.chain_id) + .await? + .ok_or(EvaluationError::DirectiveNotFound(chain_contract.chain_id))?; + + // Get files directly from repository + let contract_files = repository::list_files_in_contract(&self.pool, contract_id, owner_id) + .await + .unwrap_or_default(); + + // Get tasks directly from repository + let contract_tasks = repository::list_tasks_in_contract(&self.pool, contract_id, owner_id) + .await + .unwrap_or_default(); + + // Build file contents from FileSummary + // Note: FileSummary doesn't have content, so we use name and description + let files: Vec<FileContent> = contract_files.iter().map(|f| { + FileContent { + path: f.repo_file_path.clone().unwrap_or_else(|| f.name.clone()), + description: f.description.clone(), + content: format!("[File: {} - content not loaded in summary view]", f.name), + is_deliverable: false, // FileSummary doesn't track deliverable status + } + }).collect(); + + // Build task outputs from TaskSummary + let task_outputs: Vec<TaskOutput> = contract_tasks.iter().map(|t| { + TaskOutput { + task_name: t.name.clone(), + output_summary: t.progress_summary.clone().unwrap_or_else(|| format!("Status: {}", t.status)), + exit_code: None, + } + }).collect(); + + // Build deliverables info from files marked as deliverables + // Since FileSummary doesn't have deliverable info, we treat all files as potential deliverables + let deliverables: Vec<DeliverableInfo> = contract_files.iter() + .map(|f| DeliverableInfo { + name: f.name.clone(), + status: "complete".to_string(), + file_path: f.repo_file_path.clone(), + }) + .collect(); + + // Parse requirements and acceptance criteria from directive + let requirements: Vec<DirectiveRequirement> = + serde_json::from_value(directive.requirements.clone()).unwrap_or_default(); + + let all_criteria: Vec<DirectiveAcceptanceCriterion> = + serde_json::from_value(directive.acceptance_criteria.clone()).unwrap_or_default(); + + // Get contract definition to find mapped requirements + // For now, use all acceptance criteria + let acceptance_criteria = all_criteria; + + Ok(EvaluationContext { + contract, + chain_contract, + directive, + files, + task_outputs, + deliverables, + acceptance_criteria, + requirements, + }) + } + + /// Build the evaluation prompt + fn build_evaluation_prompt(&self, context: &EvaluationContext) -> String { + let mut prompt = String::new(); + + prompt.push_str("# Contract Completion Evaluation\n\n"); + prompt.push_str("You are evaluating whether a contract has been completed successfully against its requirements.\n\n"); + + // Contract info + prompt.push_str("## Contract Information\n\n"); + prompt.push_str(&format!("**Name:** {}\n", context.contract.name)); + if let Some(ref desc) = context.contract.description { + prompt.push_str(&format!("**Description:** {}\n", desc)); + } + prompt.push_str(&format!("**Type:** {}\n", context.contract.contract_type)); + prompt.push_str(&format!("**Phase:** {}\n", context.contract.phase)); + prompt.push_str("\n"); + + // Requirements + if !context.requirements.is_empty() { + prompt.push_str("## Requirements\n\n"); + for req in &context.requirements { + prompt.push_str(&format!("- **{}** ({}): {}\n", req.id, req.priority, req.title)); + if !req.description.is_empty() { + prompt.push_str(&format!(" {}\n", req.description)); + } + } + prompt.push_str("\n"); + } + + // Acceptance criteria + if !context.acceptance_criteria.is_empty() { + prompt.push_str("## Acceptance Criteria\n\n"); + for (i, criterion) in context.acceptance_criteria.iter().enumerate() { + prompt.push_str(&format!("{}. **{}**\n", i + 1, criterion.description)); + prompt.push_str(&format!(" - Testable: {}\n", criterion.testable)); + if !criterion.requirement_ids.is_empty() { + prompt.push_str(&format!(" - Covers: {}\n", criterion.requirement_ids.join(", "))); + } + } + prompt.push_str("\n"); + } + + // Deliverables + if !context.deliverables.is_empty() { + prompt.push_str("## Deliverables\n\n"); + for d in &context.deliverables { + prompt.push_str(&format!("- {} ({})\n", d.name, d.status)); + } + prompt.push_str("\n"); + } + + // Files + if !context.files.is_empty() { + prompt.push_str("## Files Created/Modified\n\n"); + for file in &context.files { + prompt.push_str(&format!("### {}", file.path)); + if file.is_deliverable { + prompt.push_str(" [DELIVERABLE]"); + } + prompt.push_str("\n"); + if let Some(ref desc) = file.description { + prompt.push_str(&format!("*{}*\n", desc)); + } + // Truncate content if too long + let content = if file.content.len() > 5000 { + format!("{}...\n[Content truncated - {} chars total]", + &file.content[..5000], file.content.len()) + } else { + file.content.clone() + }; + prompt.push_str("```\n"); + prompt.push_str(&content); + prompt.push_str("\n```\n\n"); + } + } + + // Task outputs + if !context.task_outputs.is_empty() { + prompt.push_str("## Task Outputs\n\n"); + for task in &context.task_outputs { + prompt.push_str(&format!("### {}\n", task.task_name)); + prompt.push_str(&format!("{}\n\n", task.output_summary)); + } + } + + // Evaluation instructions + prompt.push_str("## Evaluation Instructions\n\n"); + prompt.push_str("Please evaluate the completed work against the requirements and acceptance criteria.\n\n"); + prompt.push_str("For each acceptance criterion, determine if it has been met and provide a brief explanation.\n\n"); + prompt.push_str("Respond with a JSON object in the following format:\n\n"); + prompt.push_str("```json\n"); + prompt.push_str(r#"{ + "passed": true/false, + "overallScore": 0.0-1.0, + "criteriaResults": [ + { + "criterionId": "criterion identifier or index", + "met": true/false, + "score": 0.0-1.0, + "feedback": "explanation of why criterion was/wasn't met" + } + ], + "summaryFeedback": "overall summary of the evaluation", + "reworkInstructions": "if failed, specific instructions for what needs to be fixed (null if passed)" +} +"#); + prompt.push_str("```\n\n"); + prompt.push_str(&format!("The pass threshold is {}. ", self.pass_threshold)); + prompt.push_str("A contract passes if the overall score is >= the threshold AND all critical criteria are met.\n"); + + prompt + } + + /// Call LLM for evaluation + async fn call_llm_for_evaluation(&self, prompt: &str) -> Result<String, EvaluationError> { + let messages = vec![Message { + role: "user".to_string(), + content: MessageContent::Text(prompt.to_string()), + }]; + + // Use chat_with_tools with empty tools array for simple chat + let empty_tools: Vec<Tool> = vec![]; + let result = self + .claude_client + .chat_with_tools(messages, &empty_tools) + .await + .map_err(|e| EvaluationError::LlmError(e.to_string()))?; + + // ChatResult.content is already an Option<String> + let text = result.content.unwrap_or_default(); + + Ok(text) + } + + /// Parse the LLM response into an evaluation result + fn parse_evaluation_response( + &self, + response: &str, + context: &EvaluationContext, + ) -> Result<ContractEvaluationResult, EvaluationError> { + // Extract JSON from response (may be wrapped in markdown code blocks) + let json_str = extract_json_from_response(response)?; + + // Parse the JSON + let parsed: EvaluationResponseJson = serde_json::from_str(&json_str) + .map_err(|e| EvaluationError::ParseError(format!("JSON parse error: {}", e)))?; + + // Convert to our result type + let criteria_results: Vec<EvaluationCriterionResult> = parsed + .criteria_results + .into_iter() + .map(|cr| EvaluationCriterionResult { + criterion_id: cr.criterion_id.clone(), + criterion_text: cr.criterion_id, // Use ID as text if not provided + passed: cr.passed, + score: cr.score, + feedback: cr.feedback, + evidence: vec![], + }) + .collect(); + + // Determine pass/fail based on threshold and results + let passed = parsed.passed && parsed.overall_score >= self.pass_threshold; + + Ok(ContractEvaluationResult { + passed, + overall_score: parsed.overall_score, + criteria_results, + summary_feedback: parsed.summary_feedback, + rework_instructions: if passed { None } else { parsed.rework_instructions }, + }) + } + + /// Save evaluation result to database + pub async fn save_evaluation( + &self, + contract_id: Uuid, + chain_id: Uuid, + chain_contract_id: Uuid, + result: &ContractEvaluationResult, + ) -> Result<ContractEvaluation, EvaluationError> { + let req = CreateContractEvaluationRequest { + contract_id, + chain_id: Some(chain_id), + chain_contract_id: Some(chain_contract_id), + evaluator_model: Some(format!("{:?}", self.model)), + passed: result.passed, + overall_score: Some(result.overall_score), + criteria_results: result.criteria_results.clone(), + summary_feedback: result.summary_feedback.clone(), + rework_instructions: result.rework_instructions.clone(), + }; + + let evaluation = repository::create_contract_evaluation(&self.pool, req).await?; + + // Update chain contract status + let status = if result.passed { "passed" } else { "failed" }; + repository::update_chain_contract_evaluation_status( + &self.pool, + chain_contract_id, + status, + Some(evaluation.id), + result.rework_instructions.as_deref(), + ) + .await?; + + Ok(evaluation) + } +} + +/// JSON structure for parsing LLM response +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct EvaluationResponseJson { + passed: bool, + overall_score: f64, + criteria_results: Vec<CriterionResultJson>, + summary_feedback: String, + rework_instructions: Option<String>, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct CriterionResultJson { + criterion_id: String, + #[serde(alias = "met")] + passed: bool, + #[serde(default)] + score: f64, + feedback: String, +} + +/// Extract JSON from a response that may contain markdown code blocks +fn extract_json_from_response(response: &str) -> Result<String, EvaluationError> { + // Try to find JSON in code blocks first + if let Some(start) = response.find("```json") { + let json_start = start + 7; + if let Some(end) = response[json_start..].find("```") { + return Ok(response[json_start..json_start + end].trim().to_string()); + } + } + + // Try plain code blocks + if let Some(start) = response.find("```") { + let json_start = start + 3; + // Skip any language identifier on the same line + let actual_start = response[json_start..] + .find('\n') + .map(|i| json_start + i + 1) + .unwrap_or(json_start); + if let Some(end) = response[actual_start..].find("```") { + return Ok(response[actual_start..actual_start + end].trim().to_string()); + } + } + + // Try to find raw JSON (starts with {) + if let Some(start) = response.find('{') { + // Find matching closing brace + let mut depth = 0; + let mut end = start; + for (i, c) in response[start..].char_indices() { + match c { + '{' => depth += 1, + '}' => { + depth -= 1; + if depth == 0 { + end = start + i + 1; + break; + } + } + _ => {} + } + } + if end > start { + return Ok(response[start..end].to_string()); + } + } + + Err(EvaluationError::ParseError( + "Could not find JSON in response".to_string(), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_json_from_code_block() { + let response = r#"Here is the evaluation: + +```json +{ + "passed": true, + "overallScore": 0.85 +} +``` + +Done."#; + + let json = extract_json_from_response(response).unwrap(); + assert!(json.contains("\"passed\": true")); + } + + #[test] + fn test_extract_json_raw() { + let response = r#"The result is {"passed": false, "overallScore": 0.5}"#; + let json = extract_json_from_response(response).unwrap(); + assert!(json.contains("\"passed\": false")); + } +} diff --git a/makima/src/llm/contract_tools.rs b/makima/src/llm/contract_tools.rs index 0f50132..7f7e849 100644 --- a/makima/src/llm/contract_tools.rs +++ b/makima/src/llm/contract_tools.rs @@ -460,6 +460,214 @@ pub static CONTRACT_TOOLS: once_cell::sync::Lazy<Vec<Tool>> = once_cell::sync::L "required": ["file_id"] }), }, + // ============================================================================= + // Chain Directive Tools (for directive contracts orchestrating chains) + // ============================================================================= + Tool { + name: "create_chain_from_directive".to_string(), + description: "Create a new chain that this directive contract will orchestrate. The chain starts in 'pending' status and contract definitions can be added. Only available to directive contracts.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name for the chain" + }, + "description": { + "type": "string", + "description": "Description of what the chain accomplishes" + } + }, + "required": ["name"] + }), + }, + Tool { + name: "add_chain_contract".to_string(), + description: "Add a contract definition to the chain being orchestrated. The contract will be created when its dependencies are met.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Contract name" + }, + "description": { + "type": "string", + "description": "What this contract accomplishes" + }, + "contract_type": { + "type": "string", + "enum": ["simple", "execute", "checkpoint"], + "description": "Contract type (default: simple)" + }, + "depends_on": { + "type": "array", + "items": { "type": "string" }, + "description": "Names of contracts this depends on" + }, + "requirement_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Requirement IDs this contract addresses (for traceability)" + } + }, + "required": ["name"] + }), + }, + Tool { + name: "set_chain_dependencies".to_string(), + description: "Set which contracts depend on which other contracts in the chain.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "contract_name": { + "type": "string", + "description": "Name of contract that has dependencies" + }, + "depends_on": { + "type": "array", + "items": { "type": "string" }, + "description": "Names of contracts it depends on" + } + }, + "required": ["contract_name", "depends_on"] + }), + }, + Tool { + name: "modify_chain_contract".to_string(), + description: "Update a contract definition in the chain.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the contract to modify" + }, + "new_name": { + "type": "string", + "description": "New name for the contract" + }, + "description": { + "type": "string", + "description": "New description" + }, + "add_requirement_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Requirement IDs to add" + }, + "remove_requirement_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Requirement IDs to remove" + } + }, + "required": ["name"] + }), + }, + Tool { + name: "remove_chain_contract".to_string(), + description: "Remove a contract definition from the chain (only if not yet instantiated).".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the contract to remove" + } + }, + "required": ["name"] + }), + }, + Tool { + name: "preview_chain_dag".to_string(), + description: "Generate a visual preview of the chain DAG structure for review.".to_string(), + parameters: json!({ + "type": "object", + "properties": {} + }), + }, + Tool { + name: "validate_chain_directive".to_string(), + description: "Validate the chain specification is complete and valid (no cycles, all dependencies exist, all requirements covered).".to_string(), + parameters: json!({ + "type": "object", + "properties": {} + }), + }, + Tool { + name: "finalize_chain_directive".to_string(), + description: "Lock the directive and start chain execution. Call this after validation passes and user has approved (if phase_guard enabled).".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "auto_start": { + "type": "boolean", + "description": "Whether to immediately start the chain (default: true)" + } + } + }), + }, + Tool { + name: "get_chain_status".to_string(), + description: "Get current status of the chain being orchestrated, including contract statuses and progress.".to_string(), + parameters: json!({ + "type": "object", + "properties": {} + }), + }, + Tool { + name: "get_uncovered_requirements".to_string(), + description: "List requirements from the directive that are not yet mapped to any contract.".to_string(), + parameters: json!({ + "type": "object", + "properties": {} + }), + }, + Tool { + name: "evaluate_contract_completion".to_string(), + description: "Evaluate whether a completed chain contract meets the directive requirements. Use this after a contract completes to assess if it satisfies acceptance criteria.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "contract_id": { + "type": "string", + "description": "ID of the completed contract to evaluate" + }, + "passed": { + "type": "boolean", + "description": "Whether the evaluation passed" + }, + "feedback": { + "type": "string", + "description": "Evaluation feedback and rationale" + }, + "rework_instructions": { + "type": "string", + "description": "Instructions for rework if evaluation failed" + } + }, + "required": ["contract_id", "passed", "feedback"] + }), + }, + Tool { + name: "request_rework".to_string(), + description: "Request rework on a completed contract that didn't meet requirements. This will block chain progression and notify the contract to address issues.".to_string(), + parameters: json!({ + "type": "object", + "properties": { + "contract_id": { + "type": "string", + "description": "ID of the contract needing rework" + }, + "feedback": { + "type": "string", + "description": "Detailed feedback on what needs to be fixed" + } + }, + "required": ["contract_id", "feedback"] + }), + }, ] }); @@ -546,6 +754,50 @@ pub enum ContractToolRequest { include_decisions: bool, include_action_items: bool, }, + + // Chain directive tools (for directive contracts) + CreateChainFromDirective { + name: String, + description: Option<String>, + }, + AddChainContract { + name: String, + description: Option<String>, + contract_type: Option<String>, + depends_on: Option<Vec<String>>, + requirement_ids: Option<Vec<String>>, + }, + SetChainDependencies { + contract_name: String, + depends_on: Vec<String>, + }, + ModifyChainContract { + name: String, + new_name: Option<String>, + description: Option<String>, + add_requirement_ids: Option<Vec<String>>, + remove_requirement_ids: Option<Vec<String>>, + }, + RemoveChainContract { + name: String, + }, + PreviewChainDag, + ValidateChainDirective, + FinalizeChainDirective { + auto_start: bool, + }, + GetChainStatus, + GetUncoveredRequirements, + EvaluateContractCompletion { + contract_id: Uuid, + passed: bool, + feedback: String, + rework_instructions: Option<String>, + }, + RequestRework { + contract_id: Uuid, + feedback: String, + }, } /// Task definition for chained task creation @@ -617,6 +869,20 @@ pub fn parse_contract_tool_call(call: &super::tools::ToolCall) -> ContractToolEx "analyze_transcript" => parse_analyze_transcript(call), "create_contract_from_transcript" => parse_create_contract_from_transcript(call), + // Chain directive tools + "create_chain_from_directive" => parse_create_chain_from_directive(call), + "add_chain_contract" => parse_add_chain_contract(call), + "set_chain_dependencies" => parse_set_chain_dependencies(call), + "modify_chain_contract" => parse_modify_chain_contract(call), + "remove_chain_contract" => parse_remove_chain_contract(call), + "preview_chain_dag" => parse_preview_chain_dag(), + "validate_chain_directive" => parse_validate_chain_directive(), + "finalize_chain_directive" => parse_finalize_chain_directive(call), + "get_chain_status" => parse_get_chain_status(), + "get_uncovered_requirements" => parse_get_uncovered_requirements(), + "evaluate_contract_completion" => parse_evaluate_contract_completion(call), + "request_rework" => parse_request_rework(call), + _ => ContractToolExecutionResult { success: false, message: format!("Unknown contract tool: {}", call.name), @@ -1206,6 +1472,229 @@ fn parse_create_contract_from_transcript(call: &super::tools::ToolCall) -> Contr } // ============================================================================= +// Chain Directive Tool Parsing +// ============================================================================= + +fn parse_create_chain_from_directive(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(name) = name else { + return error_result("Missing required parameter: name"); + }; + let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()); + + ContractToolExecutionResult { + success: true, + message: "Creating chain from directive...".to_string(), + data: None, + request: Some(ContractToolRequest::CreateChainFromDirective { name, description }), + pending_questions: None, + } +} + +fn parse_add_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(name) = name else { + return error_result("Missing required parameter: name"); + }; + + let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()); + let contract_type = call.arguments.get("contract_type").and_then(|v| v.as_str()).map(|s| s.to_string()); + let depends_on = call.arguments.get("depends_on").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }); + let requirement_ids = call.arguments.get("requirement_ids").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }); + + ContractToolExecutionResult { + success: true, + message: format!("Adding contract '{}' to chain...", name), + data: None, + request: Some(ContractToolRequest::AddChainContract { + name, + description, + contract_type, + depends_on, + requirement_ids, + }), + pending_questions: None, + } +} + +fn parse_set_chain_dependencies(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let contract_name = call.arguments.get("contract_name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(contract_name) = contract_name else { + return error_result("Missing required parameter: contract_name"); + }; + + let depends_on = call.arguments.get("depends_on").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }).unwrap_or_default(); + + ContractToolExecutionResult { + success: true, + message: format!("Setting dependencies for '{}'...", contract_name), + data: None, + request: Some(ContractToolRequest::SetChainDependencies { contract_name, depends_on }), + pending_questions: None, + } +} + +fn parse_modify_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(name) = name else { + return error_result("Missing required parameter: name"); + }; + + let new_name = call.arguments.get("new_name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let description = call.arguments.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()); + let add_requirement_ids = call.arguments.get("add_requirement_ids").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }); + let remove_requirement_ids = call.arguments.get("remove_requirement_ids").and_then(|v| { + v.as_array().map(|arr| { + arr.iter().filter_map(|item| item.as_str().map(|s| s.to_string())).collect() + }) + }); + + ContractToolExecutionResult { + success: true, + message: format!("Modifying contract '{}'...", name), + data: None, + request: Some(ContractToolRequest::ModifyChainContract { + name, + new_name, + description, + add_requirement_ids, + remove_requirement_ids, + }), + pending_questions: None, + } +} + +fn parse_remove_chain_contract(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let name = call.arguments.get("name").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(name) = name else { + return error_result("Missing required parameter: name"); + }; + + ContractToolExecutionResult { + success: true, + message: format!("Removing contract '{}'...", name), + data: None, + request: Some(ContractToolRequest::RemoveChainContract { name }), + pending_questions: None, + } +} + +fn parse_preview_chain_dag() -> ContractToolExecutionResult { + ContractToolExecutionResult { + success: true, + message: "Generating chain DAG preview...".to_string(), + data: None, + request: Some(ContractToolRequest::PreviewChainDag), + pending_questions: None, + } +} + +fn parse_validate_chain_directive() -> ContractToolExecutionResult { + ContractToolExecutionResult { + success: true, + message: "Validating chain directive...".to_string(), + data: None, + request: Some(ContractToolRequest::ValidateChainDirective), + pending_questions: None, + } +} + +fn parse_finalize_chain_directive(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let auto_start = call.arguments.get("auto_start").and_then(|v| v.as_bool()).unwrap_or(true); + + ContractToolExecutionResult { + success: true, + message: "Finalizing chain directive...".to_string(), + data: None, + request: Some(ContractToolRequest::FinalizeChainDirective { auto_start }), + pending_questions: None, + } +} + +fn parse_get_chain_status() -> ContractToolExecutionResult { + ContractToolExecutionResult { + success: true, + message: "Getting chain status...".to_string(), + data: None, + request: Some(ContractToolRequest::GetChainStatus), + pending_questions: None, + } +} + +fn parse_get_uncovered_requirements() -> ContractToolExecutionResult { + ContractToolExecutionResult { + success: true, + message: "Getting uncovered requirements...".to_string(), + data: None, + request: Some(ContractToolRequest::GetUncoveredRequirements), + pending_questions: None, + } +} + +fn parse_evaluate_contract_completion(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let contract_id = parse_uuid_arg(call, "contract_id"); + let Some(contract_id) = contract_id else { + return error_result("Missing or invalid required parameter: contract_id"); + }; + + let passed = call.arguments.get("passed").and_then(|v| v.as_bool()).unwrap_or(false); + let feedback = call.arguments.get("feedback").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(feedback) = feedback else { + return error_result("Missing required parameter: feedback"); + }; + let rework_instructions = call.arguments.get("rework_instructions").and_then(|v| v.as_str()).map(|s| s.to_string()); + + ContractToolExecutionResult { + success: true, + message: format!("Evaluating contract completion (passed: {})...", passed), + data: None, + request: Some(ContractToolRequest::EvaluateContractCompletion { + contract_id, + passed, + feedback, + rework_instructions, + }), + pending_questions: None, + } +} + +fn parse_request_rework(call: &super::tools::ToolCall) -> ContractToolExecutionResult { + let contract_id = parse_uuid_arg(call, "contract_id"); + let Some(contract_id) = contract_id else { + return error_result("Missing or invalid required parameter: contract_id"); + }; + + let feedback = call.arguments.get("feedback").and_then(|v| v.as_str()).map(|s| s.to_string()); + let Some(feedback) = feedback else { + return error_result("Missing required parameter: feedback"); + }; + + ContractToolExecutionResult { + success: true, + message: "Requesting rework...".to_string(), + data: None, + request: Some(ContractToolRequest::RequestRework { contract_id, feedback }), + pending_questions: None, + } +} + +// ============================================================================= // Helper Functions // ============================================================================= diff --git a/makima/src/llm/mod.rs b/makima/src/llm/mod.rs index 4c84ced..702e1fd 100644 --- a/makima/src/llm/mod.rs +++ b/makima/src/llm/mod.rs @@ -1,6 +1,7 @@ //! LLM integration module for file editing via tool calling. pub mod claude; +pub mod contract_evaluator; pub mod contract_tools; pub mod discuss_tools; pub mod groq; @@ -44,6 +45,9 @@ pub use transcript_analyzer::{ ExtractedActionItem, SpeakerStats, format_transcript_for_analysis, calculate_speaker_stats, build_analysis_prompt, parse_analysis_response, }; +pub use contract_evaluator::{ + ContractEvaluator, ContractEvaluationResult, EvaluationContext, EvaluationError, +}; /// Available LLM providers and models #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] diff --git a/makima/src/server/handlers/chains.rs b/makima/src/server/handlers/chains.rs index 9b32495..b8716ca 100644 --- a/makima/src/server/handlers/chains.rs +++ b/makima/src/server/handlers/chains.rs @@ -16,7 +16,8 @@ use crate::db::models::{ AddChainRepositoryRequest, AddContractDefinitionRequest, ChainContractDefinition, ChainContractDetail, ChainDefinitionGraphResponse, ChainEditorData, ChainEvent, ChainGraphResponse, ChainRepository, ChainSummary, ChainWithContracts, CreateChainRequest, - StartChainRequest, StartChainResponse, UpdateChainRequest, UpdateContractDefinitionRequest, + InitChainRequest, InitChainResponse, StartChainRequest, StartChainResponse, UpdateChainRequest, + UpdateContractDefinitionRequest, }; use crate::db::repository::{self, RepositoryError}; use crate::server::auth::Authenticated; @@ -172,6 +173,65 @@ pub async fn create_chain( } } +/// Initialize a directive-driven chain. +/// +/// Creates a directive contract that will research, plan, create, and orchestrate +/// a chain of contracts to accomplish the given goal. The directive contract goes +/// through Research -> Specify -> Plan -> Execute -> Review phases. +/// +/// POST /api/v1/chains/init +#[utoipa::path( + post, + path = "/api/v1/chains/init", + request_body = InitChainRequest, + responses( + (status = 201, description = "Directive chain initialized", body = InitChainResponse), + (status = 400, description = "Invalid request", body = ApiError), + (status = 401, description = "Unauthorized", body = ApiError), + (status = 503, description = "Database not configured", body = ApiError), + (status = 500, description = "Internal server error", body = ApiError) + ), + security( + ("bearer_auth" = []), + ("api_key" = []) + ), + tag = "Chains" +)] +pub async fn init_chain( + State(state): State<SharedState>, + Authenticated(auth): Authenticated, + Json(req): Json<InitChainRequest>, +) -> impl IntoResponse { + let Some(ref pool) = state.db_pool else { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(ApiError::new("DB_UNAVAILABLE", "Database not configured")), + ) + .into_response(); + }; + + // Validate the request + if req.goal.trim().is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(ApiError::new("VALIDATION_ERROR", "Goal cannot be empty")), + ) + .into_response(); + } + + match repository::init_chain_for_owner(pool, auth.owner_id, req).await { + Ok(response) => (StatusCode::CREATED, Json(response)).into_response(), + Err(e) => { + tracing::error!("Failed to initialize directive chain: {}", e); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiError::new("DB_ERROR", e.to_string())), + ) + .into_response() + } + } +} + /// Get a chain by ID. /// /// GET /api/v1/chains/{id} diff --git a/makima/src/server/handlers/contract_chat.rs b/makima/src/server/handlers/contract_chat.rs index 2d54894..06b3a7c 100644 --- a/makima/src/server/handlers/contract_chat.rs +++ b/makima/src/server/handlers/contract_chat.rs @@ -15,7 +15,11 @@ use utoipa::ToSchema; use uuid::Uuid; use crate::db::{ - models::{ContractChatHistoryResponse, ContractWithRelations, CreateTaskRequest, UpdateFileRequest}, + models::{ + ContractChatHistoryResponse, ContractWithRelations, CreateTaskRequest, UpdateFileRequest, + AddContractDefinitionRequest, UpdateContractDefinitionRequest, CreateChainRequest, + CreateChainDirectiveRequest, CreateContractEvaluationRequest, + }, repository, }; use crate::llm::{ @@ -2762,6 +2766,1214 @@ async fn handle_contract_request( })), } } + + // Chain directive tools - for directive contracts to create and manage chains + ContractToolRequest::CreateChainFromDirective { name, description } => { + // First, get the current contract to verify it's a directive contract + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + // Check if contract already has a spawned chain + if contract.spawned_chain_id.is_some() { + return ContractRequestResult { + success: false, + message: "This contract already has a chain associated with it".to_string(), + data: Some(json!({ "existing_chain_id": contract.spawned_chain_id })), + }; + } + + // Create the chain + let chain_req = CreateChainRequest { + name: name.clone(), + description: description.clone(), + repositories: None, + loop_enabled: None, + loop_max_iterations: None, + loop_progress_check: None, + contracts: None, + }; + + let chain = match repository::create_chain_for_owner(pool, owner_id, chain_req).await { + Ok(c) => c, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to create chain: {}", e), + data: None, + }, + }; + + // Link the chain to this directive contract + if let Err(e) = sqlx::query( + r#" + UPDATE chains SET directive_contract_id = $2, evaluation_enabled = true WHERE id = $1; + UPDATE contracts SET spawned_chain_id = $1, is_chain_directive = true WHERE id = $2; + "#, + ) + .bind(chain.id) + .bind(contract_id) + .execute(pool) + .await { + return ContractRequestResult { + success: false, + message: format!("Failed to link chain to contract: {}", e), + data: None, + }; + } + + // Create empty directive for the chain + let directive_req = CreateChainDirectiveRequest { + requirements: Some(vec![]), + acceptance_criteria: Some(vec![]), + constraints: Some(vec![]), + external_dependencies: Some(vec![]), + source_type: Some("llm_generated".to_string()), + }; + + if let Err(e) = repository::create_chain_directive(pool, chain.id, directive_req).await { + return ContractRequestResult { + success: false, + message: format!("Failed to create directive: {}", e), + data: None, + }; + } + + ContractRequestResult { + success: true, + message: format!("Created chain '{}' linked to this directive contract", name), + data: Some(json!({ + "chain_id": chain.id, + "chain_name": name, + "description": description + })), + } + } + + ContractToolRequest::AddChainContract { name, description, contract_type, depends_on, requirement_ids } => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain. Use create_chain_from_directive first.".to_string(), + data: None, + }, + }; + + // Check for duplicate names + let existing_defs = match repository::list_chain_contract_definitions(pool, chain_id).await { + Ok(defs) => defs, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list definitions: {}", e), + data: None, + }, + }; + + if existing_defs.iter().any(|d| d.name == name) { + return ContractRequestResult { + success: false, + message: format!("A contract definition with name '{}' already exists", name), + data: None, + }; + } + + // Create the contract definition + let def_req = AddContractDefinitionRequest { + name: name.clone(), + description, + contract_type: contract_type.unwrap_or_else(|| "implementation".to_string()), + initial_phase: Some("research".to_string()), + depends_on, + tasks: None, + deliverables: None, + validation: None, + editor_x: None, + editor_y: None, + }; + + let definition = match repository::create_chain_contract_definition(pool, chain_id, def_req).await { + Ok(d) => d, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to create contract definition: {}", e), + data: None, + }, + }; + + // Update requirement_ids if provided + if let Some(req_ids) = requirement_ids { + if !req_ids.is_empty() { + if let Err(e) = sqlx::query( + "UPDATE chain_contract_definitions SET requirement_ids = $2 WHERE id = $1" + ) + .bind(definition.id) + .bind(&req_ids) + .execute(pool) + .await { + tracing::warn!("Failed to set requirement_ids: {}", e); + } + } + } + + ContractRequestResult { + success: true, + message: format!("Added contract '{}' to chain", name), + data: Some(json!({ + "definition_id": definition.id, + "name": name, + "order_index": definition.order_index + })), + } + } + + ContractToolRequest::SetChainDependencies { contract_name, depends_on } => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Find the definition by name + let definitions = match repository::list_chain_contract_definitions(pool, chain_id).await { + Ok(defs) => defs, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list definitions: {}", e), + data: None, + }, + }; + + let definition = match definitions.iter().find(|d| d.name == contract_name) { + Some(d) => d, + None => return ContractRequestResult { + success: false, + message: format!("No contract definition named '{}' found", contract_name), + data: None, + }, + }; + + // Validate that all dependencies exist + for dep_name in &depends_on { + if !definitions.iter().any(|d| &d.name == dep_name) { + return ContractRequestResult { + success: false, + message: format!("Dependency '{}' does not exist", dep_name), + data: None, + }; + } + } + + // Check for circular dependencies (simple check) + if depends_on.contains(&contract_name) { + return ContractRequestResult { + success: false, + message: "A contract cannot depend on itself".to_string(), + data: None, + }; + } + + // Update dependencies + let update_req = UpdateContractDefinitionRequest { + name: None, + description: None, + contract_type: None, + initial_phase: None, + depends_on: Some(depends_on.clone()), + tasks: None, + deliverables: None, + validation: None, + editor_x: None, + editor_y: None, + }; + + match repository::update_chain_contract_definition(pool, definition.id, update_req).await { + Ok(_) => ContractRequestResult { + success: true, + message: format!("Updated dependencies for '{}'", contract_name), + data: Some(json!({ + "contract_name": contract_name, + "depends_on": depends_on + })), + }, + Err(e) => ContractRequestResult { + success: false, + message: format!("Failed to update dependencies: {}", e), + data: None, + }, + } + } + + ContractToolRequest::ModifyChainContract { name, new_name, description, add_requirement_ids, remove_requirement_ids } => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Find the definition by name + let definitions = match repository::list_chain_contract_definitions(pool, chain_id).await { + Ok(defs) => defs, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list definitions: {}", e), + data: None, + }, + }; + + let definition = match definitions.iter().find(|d| d.name == name) { + Some(d) => d.clone(), + None => return ContractRequestResult { + success: false, + message: format!("No contract definition named '{}' found", name), + data: None, + }, + }; + + // Check if new name would conflict + if let Some(ref nn) = new_name { + if nn != &name && definitions.iter().any(|d| &d.name == nn) { + return ContractRequestResult { + success: false, + message: format!("A contract definition named '{}' already exists", nn), + data: None, + }; + } + } + + // Update the definition + let update_req = UpdateContractDefinitionRequest { + name: new_name.clone(), + description, + contract_type: None, + initial_phase: None, + depends_on: None, + tasks: None, + deliverables: None, + validation: None, + editor_x: None, + editor_y: None, + }; + + if let Err(e) = repository::update_chain_contract_definition(pool, definition.id, update_req).await { + return ContractRequestResult { + success: false, + message: format!("Failed to update definition: {}", e), + data: None, + }; + } + + // Handle requirement_ids modifications + let mut current_req_ids: Vec<String> = definition.requirement_ids.clone(); + if let Some(add_ids) = add_requirement_ids { + for id in add_ids { + if !current_req_ids.contains(&id) { + current_req_ids.push(id); + } + } + } + if let Some(remove_ids) = remove_requirement_ids { + current_req_ids.retain(|id| !remove_ids.contains(id)); + } + + if current_req_ids != definition.requirement_ids { + if let Err(e) = sqlx::query( + "UPDATE chain_contract_definitions SET requirement_ids = $2 WHERE id = $1" + ) + .bind(definition.id) + .bind(¤t_req_ids) + .execute(pool) + .await { + tracing::warn!("Failed to update requirement_ids: {}", e); + } + } + + ContractRequestResult { + success: true, + message: format!("Modified contract definition '{}'", new_name.as_ref().unwrap_or(&name)), + data: Some(json!({ + "definition_id": definition.id, + "name": new_name.as_ref().unwrap_or(&name), + "requirement_ids": current_req_ids + })), + } + } + + ContractToolRequest::RemoveChainContract { name } => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Find the definition by name + let definitions = match repository::list_chain_contract_definitions(pool, chain_id).await { + Ok(defs) => defs, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list definitions: {}", e), + data: None, + }, + }; + + let definition = match definitions.iter().find(|d| d.name == name) { + Some(d) => d, + None => return ContractRequestResult { + success: false, + message: format!("No contract definition named '{}' found", name), + data: None, + }, + }; + + // Check if other definitions depend on this one + let dependents: Vec<&str> = definitions.iter() + .filter(|d| d.depends_on_names.contains(&name)) + .map(|d| d.name.as_str()) + .collect(); + + if !dependents.is_empty() { + return ContractRequestResult { + success: false, + message: format!("Cannot remove '{}': other contracts depend on it: {}", name, dependents.join(", ")), + data: None, + }; + } + + // Delete the definition + match repository::delete_chain_contract_definition(pool, definition.id).await { + Ok(true) => ContractRequestResult { + success: true, + message: format!("Removed contract definition '{}'", name), + data: Some(json!({ "removed": name })), + }, + Ok(false) => ContractRequestResult { + success: false, + message: format!("Failed to remove '{}': not found", name), + data: None, + }, + Err(e) => ContractRequestResult { + success: false, + message: format!("Failed to remove definition: {}", e), + data: None, + }, + } + } + + ContractToolRequest::PreviewChainDag => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Get chain details and definitions + let chain = match repository::get_chain_for_owner(pool, chain_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Chain not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let definitions = match repository::list_chain_contract_definitions(pool, chain_id).await { + Ok(defs) => defs, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list definitions: {}", e), + data: None, + }, + }; + + // Build DAG representation + let nodes: Vec<serde_json::Value> = definitions.iter().map(|d| { + json!({ + "name": d.name, + "description": d.description, + "contract_type": d.contract_type, + "depends_on": d.depends_on_names, + "requirement_ids": d.requirement_ids + }) + }).collect(); + + // Build ASCII DAG representation + let mut ascii_dag = String::new(); + ascii_dag.push_str(&format!("Chain: {} ({})\n", chain.name, chain.status)); + ascii_dag.push_str(&format!("Contracts: {}\n\n", definitions.len())); + + // Find root nodes (no dependencies) + let roots: Vec<&str> = definitions.iter() + .filter(|d| d.depends_on_names.is_empty()) + .map(|d| d.name.as_str()) + .collect(); + + ascii_dag.push_str("Root contracts (no dependencies):\n"); + for root in &roots { + ascii_dag.push_str(&format!(" [{}]\n", root)); + } + + ascii_dag.push_str("\nDependency relationships:\n"); + for def in &definitions { + if !def.depends_on_names.is_empty() { + ascii_dag.push_str(&format!(" {} <- {}\n", def.name, def.depends_on_names.join(", "))); + } + } + + ContractRequestResult { + success: true, + message: format!("Chain DAG preview with {} contracts", definitions.len()), + data: Some(json!({ + "chain_id": chain_id, + "chain_name": chain.name, + "chain_status": chain.status, + "contract_count": definitions.len(), + "nodes": nodes, + "ascii_dag": ascii_dag + })), + } + } + + ContractToolRequest::ValidateChainDirective => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + let definitions = match repository::list_chain_contract_definitions(pool, chain_id).await { + Ok(defs) => defs, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list definitions: {}", e), + data: None, + }, + }; + + let mut errors: Vec<String> = Vec::new(); + let mut warnings: Vec<String> = Vec::new(); + + // Check for empty chain + if definitions.is_empty() { + errors.push("Chain has no contract definitions".to_string()); + } + + // Check for circular dependencies + let def_names: std::collections::HashSet<String> = definitions.iter().map(|d| d.name.clone()).collect(); + for def in &definitions { + for dep in &def.depends_on_names { + if !def_names.contains(dep) { + errors.push(format!("'{}' depends on non-existent contract '{}'", def.name, dep)); + } + } + } + + // Simple cycle detection using DFS + fn has_cycle( + name: &str, + definitions: &[crate::db::models::ChainContractDefinition], + visited: &mut std::collections::HashSet<String>, + rec_stack: &mut std::collections::HashSet<String>, + ) -> Option<String> { + visited.insert(name.to_string()); + rec_stack.insert(name.to_string()); + + if let Some(def) = definitions.iter().find(|d| d.name == name) { + for dep in &def.depends_on_names { + if !visited.contains(dep) { + if let Some(cycle) = has_cycle(dep, definitions, visited, rec_stack) { + return Some(cycle); + } + } else if rec_stack.contains(dep) { + return Some(format!("{} -> {}", name, dep)); + } + } + } + + rec_stack.remove(name); + None + } + + let mut visited = std::collections::HashSet::new(); + for def in &definitions { + if !visited.contains(&def.name) { + let mut rec_stack = std::collections::HashSet::new(); + if let Some(cycle) = has_cycle(&def.name, &definitions, &mut visited, &mut rec_stack) { + errors.push(format!("Circular dependency detected: {}", cycle)); + break; + } + } + } + + // Check for orphan contracts (no one depends on them and they're not root) + let roots: std::collections::HashSet<&str> = definitions.iter() + .filter(|d| d.depends_on_names.is_empty()) + .map(|d| d.name.as_str()) + .collect(); + + let depended_on: std::collections::HashSet<&str> = definitions.iter() + .flat_map(|d| d.depends_on_names.iter().map(|s| s.as_str())) + .collect(); + + for def in &definitions { + if !roots.contains(def.name.as_str()) && !depended_on.contains(def.name.as_str()) { + warnings.push(format!("'{}' has dependencies but nothing depends on it (orphan leaf)", def.name)); + } + } + + // Get directive to check requirement coverage + if let Ok(Some(directive)) = repository::get_chain_directive(pool, chain_id).await { + let requirements: Vec<crate::db::models::DirectiveRequirement> = + serde_json::from_value(directive.requirements.clone()).unwrap_or_default(); + + let covered: std::collections::HashSet<&str> = definitions.iter() + .flat_map(|d| d.requirement_ids.iter().map(|s| s.as_str())) + .collect(); + + for req in &requirements { + if !covered.contains(req.id.as_str()) { + warnings.push(format!("Requirement '{}' ({}) is not covered by any contract", req.id, req.title)); + } + } + } + + let is_valid = errors.is_empty(); + + ContractRequestResult { + success: is_valid, + message: if is_valid { + format!("Chain is valid with {} contracts", definitions.len()) + } else { + format!("Chain validation failed with {} errors", errors.len()) + }, + data: Some(json!({ + "valid": is_valid, + "contract_count": definitions.len(), + "errors": errors, + "warnings": warnings + })), + } + } + + ContractToolRequest::FinalizeChainDirective { auto_start } => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Get chain + let chain = match repository::get_chain_for_owner(pool, chain_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Chain not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + if chain.status != "pending" { + return ContractRequestResult { + success: false, + message: format!("Chain is already {} - cannot finalize", chain.status), + data: None, + }; + } + + // Update chain status + let new_status = if auto_start { "active" } else { "pending" }; + if let Err(e) = sqlx::query("UPDATE chains SET status = $2 WHERE id = $1") + .bind(chain_id) + .bind(new_status) + .execute(pool) + .await { + return ContractRequestResult { + success: false, + message: format!("Failed to update chain status: {}", e), + data: None, + }; + } + + // If auto_start, trigger chain progression to create root contracts + if auto_start { + match repository::progress_chain(pool, chain_id, owner_id).await { + Ok(result) => { + ContractRequestResult { + success: true, + message: format!("Chain finalized and started. Created {} root contracts.", result.contracts_created.len()), + data: Some(json!({ + "chain_id": chain_id, + "status": "active", + "contracts_created": result.contracts_created, + "chain_completed": result.chain_completed + })), + } + } + Err(e) => ContractRequestResult { + success: false, + message: format!("Chain finalized but failed to start: {}", e), + data: Some(json!({ "chain_id": chain_id, "status": "active" })), + }, + } + } else { + ContractRequestResult { + success: true, + message: "Chain finalized but not started. Call finalize_chain_directive with auto_start=true to start.".to_string(), + data: Some(json!({ + "chain_id": chain_id, + "status": "pending" + })), + } + } + } + + ContractToolRequest::GetChainStatus => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Get chain details + let chain = match repository::get_chain_for_owner(pool, chain_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Chain not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + // Get definitions + let definitions = match repository::list_chain_contract_definitions(pool, chain_id).await { + Ok(defs) => defs, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list definitions: {}", e), + data: None, + }, + }; + + // Get instantiated contracts + let chain_contracts = match repository::list_chain_contracts(pool, chain_id).await { + Ok(cc) => cc, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list chain contracts: {}", e), + data: None, + }, + }; + + // Build status map + let contract_statuses: Vec<serde_json::Value> = chain_contracts.iter().map(|cc| { + json!({ + "name": cc.contract_name, + "contract_id": cc.contract_id, + "status": cc.contract_status, + "phase": cc.contract_phase, + "evaluation_status": cc.evaluation_status, + "evaluation_retry_count": cc.evaluation_retry_count + }) + }).collect(); + + let completed = chain_contracts.iter().filter(|cc| cc.contract_status == "completed").count(); + let active = chain_contracts.iter().filter(|cc| cc.contract_status == "active").count(); + let pending = definitions.len() - chain_contracts.len(); + + ContractRequestResult { + success: true, + message: format!("Chain '{}': {} completed, {} active, {} pending", + chain.name, completed, active, pending), + data: Some(json!({ + "chain_id": chain_id, + "chain_name": chain.name, + "chain_status": chain.status, + "total_definitions": definitions.len(), + "instantiated": chain_contracts.len(), + "completed": completed, + "active": active, + "pending": pending, + "contracts": contract_statuses + })), + } + } + + ContractToolRequest::GetUncoveredRequirements => { + // Get the contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Get directive + let directive = match repository::get_chain_directive(pool, chain_id).await { + Ok(Some(d)) => d, + Ok(None) => return ContractRequestResult { + success: true, + message: "No directive found for this chain".to_string(), + data: Some(json!({ "uncovered": [], "total_requirements": 0 })), + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + // Get definitions + let definitions = match repository::list_chain_contract_definitions(pool, chain_id).await { + Ok(defs) => defs, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to list definitions: {}", e), + data: None, + }, + }; + + // Parse requirements + let requirements: Vec<crate::db::models::DirectiveRequirement> = + serde_json::from_value(directive.requirements.clone()).unwrap_or_default(); + + // Find covered requirement IDs + let covered: std::collections::HashSet<String> = definitions.iter() + .flat_map(|d| d.requirement_ids.iter().cloned()) + .collect(); + + // Find uncovered requirements + let uncovered: Vec<serde_json::Value> = requirements.iter() + .filter(|r| !covered.contains(&r.id)) + .map(|r| json!({ + "id": r.id, + "title": r.title, + "priority": r.priority + })) + .collect(); + + ContractRequestResult { + success: true, + message: format!("{} of {} requirements are uncovered", uncovered.len(), requirements.len()), + data: Some(json!({ + "uncovered": uncovered, + "uncovered_count": uncovered.len(), + "total_requirements": requirements.len(), + "coverage_percent": if requirements.is_empty() { 100.0 } else { + ((requirements.len() - uncovered.len()) as f64 / requirements.len() as f64 * 100.0).round() + } + })), + } + } + + ContractToolRequest::EvaluateContractCompletion { contract_id: target_contract_id, passed, feedback, rework_instructions } => { + // Get the directive contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Verify the target contract is part of this chain + let chain_contract = match repository::get_chain_contract_by_contract_id(pool, target_contract_id).await { + Ok(Some(cc)) => cc, + Ok(None) => return ContractRequestResult { + success: false, + message: format!("Contract {} is not part of a chain", target_contract_id), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + if chain_contract.chain_id != chain_id { + return ContractRequestResult { + success: false, + message: "Contract is not part of this directive's chain".to_string(), + data: None, + }; + } + + // Create evaluation record + let eval_req = CreateContractEvaluationRequest { + contract_id: target_contract_id, + chain_id: Some(chain_id), + chain_contract_id: Some(chain_contract.id), + evaluator_model: Some("directive_contract".to_string()), + passed, + overall_score: if passed { Some(1.0) } else { Some(0.0) }, + criteria_results: vec![], + summary_feedback: feedback.clone(), + rework_instructions: rework_instructions.clone(), + }; + + let evaluation = match repository::create_contract_evaluation(pool, eval_req).await { + Ok(e) => e, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Failed to create evaluation: {}", e), + data: None, + }, + }; + + // Update chain contract evaluation status + let new_status = if passed { "passed" } else { "failed" }; + if let Err(e) = repository::update_chain_contract_evaluation_status( + pool, + chain_contract.id, + new_status, + Some(evaluation.id), + None, // No rework feedback for passed/failed status + ).await { + tracing::warn!("Failed to update chain contract evaluation status: {}", e); + } + + if passed { + // Progress the chain to create downstream contracts + match repository::progress_chain(pool, chain_id, owner_id).await { + Ok(result) => ContractRequestResult { + success: true, + message: format!("Evaluation passed. Created {} downstream contracts.", result.contracts_created.len()), + data: Some(json!({ + "evaluation_id": evaluation.id, + "passed": true, + "contracts_created": result.contracts_created, + "chain_completed": result.chain_completed + })), + }, + Err(e) => ContractRequestResult { + success: true, + message: format!("Evaluation passed but failed to progress chain: {}", e), + data: Some(json!({ + "evaluation_id": evaluation.id, + "passed": true + })), + }, + } + } else { + // Mark contract for rework + if let Err(e) = sqlx::query( + r#" + UPDATE chain_contracts SET evaluation_status = 'rework', rework_feedback = $2 WHERE id = $1; + UPDATE contracts SET status = 'active' WHERE id = (SELECT contract_id FROM chain_contracts WHERE id = $1); + "# + ) + .bind(chain_contract.id) + .bind(&rework_instructions) + .execute(pool) + .await { + tracing::warn!("Failed to mark contract for rework: {}", e); + } + + ContractRequestResult { + success: true, + message: format!("Evaluation failed. Contract marked for rework."), + data: Some(json!({ + "evaluation_id": evaluation.id, + "passed": false, + "rework_instructions": rework_instructions, + "retry_count": chain_contract.evaluation_retry_count + 1 + })), + } + } + } + + ContractToolRequest::RequestRework { contract_id: target_contract_id, feedback } => { + // Get the directive contract's spawned chain + let contract = match repository::get_contract_for_owner(pool, contract_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => return ContractRequestResult { + success: false, + message: "Contract not found".to_string(), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + let chain_id = match contract.spawned_chain_id { + Some(id) => id, + None => return ContractRequestResult { + success: false, + message: "This contract has no associated chain".to_string(), + data: None, + }, + }; + + // Verify the target contract is part of this chain + let chain_contract = match repository::get_chain_contract_by_contract_id(pool, target_contract_id).await { + Ok(Some(cc)) => cc, + Ok(None) => return ContractRequestResult { + success: false, + message: format!("Contract {} is not part of a chain", target_contract_id), + data: None, + }, + Err(e) => return ContractRequestResult { + success: false, + message: format!("Database error: {}", e), + data: None, + }, + }; + + if chain_contract.chain_id != chain_id { + return ContractRequestResult { + success: false, + message: "Contract is not part of this directive's chain".to_string(), + data: None, + }; + } + + // Check retry count + let max_retries = chain_contract.max_evaluation_retries; + if chain_contract.evaluation_retry_count >= max_retries { + return ContractRequestResult { + success: false, + message: format!("Contract has exceeded max retries ({}/{}). Escalate to user.", + chain_contract.evaluation_retry_count, max_retries), + data: Some(json!({ + "retry_count": chain_contract.evaluation_retry_count, + "max_retries": max_retries, + "escalation_required": true + })), + }; + } + + // Mark contract for rework and increment retry count + if let Err(e) = sqlx::query( + r#" + UPDATE chain_contracts + SET evaluation_status = 'rework', + rework_feedback = $2, + evaluation_retry_count = evaluation_retry_count + 1 + WHERE id = $1; + UPDATE contracts SET status = 'active' WHERE id = (SELECT contract_id FROM chain_contracts WHERE id = $1); + "# + ) + .bind(chain_contract.id) + .bind(&feedback) + .execute(pool) + .await { + return ContractRequestResult { + success: false, + message: format!("Failed to request rework: {}", e), + data: None, + }; + } + + ContractRequestResult { + success: true, + message: format!("Rework requested for contract. Retry {}/{}", + chain_contract.evaluation_retry_count + 1, max_retries), + data: Some(json!({ + "contract_id": target_contract_id, + "retry_count": chain_contract.evaluation_retry_count + 1, + "max_retries": max_retries, + "feedback": feedback + })), + } + } } } diff --git a/makima/src/server/handlers/contracts.rs b/makima/src/server/handlers/contracts.rs index 54bae71..2b2fc26 100644 --- a/makima/src/server/handlers/contracts.rs +++ b/makima/src/server/handlers/contracts.rs @@ -575,11 +575,55 @@ pub async fn update_contract( }), ).await; - // If contract is part of a chain, progress the chain + // If contract is part of a chain, check evaluation requirements if let Some(chain_id) = contract.chain_id { let pool_clone = pool.clone(); let owner_id = auth.owner_id; + let contract_id = contract.id; tokio::spawn(async move { + // Check if chain has evaluation enabled + let chain = match repository::get_chain_for_owner(&pool_clone, chain_id, owner_id).await { + Ok(Some(c)) => c, + Ok(None) => { + tracing::warn!(chain_id = %chain_id, "Chain not found for progression"); + return; + } + Err(e) => { + tracing::error!(chain_id = %chain_id, error = %e, "Failed to get chain"); + return; + } + }; + + // If evaluation is enabled, mark contract for evaluation + if chain.evaluation_enabled { + // Mark the chain_contract as pending evaluation + if let Ok(Some(chain_contract)) = repository::get_chain_contract_by_contract_id(&pool_clone, contract_id).await { + if let Err(e) = repository::update_chain_contract_evaluation_status( + &pool_clone, + chain_contract.id, + "pending_evaluation", + None, + None, + ).await { + tracing::error!( + chain_id = %chain_id, + contract_id = %contract_id, + error = %e, + "Failed to mark contract for evaluation" + ); + } else { + tracing::info!( + chain_id = %chain_id, + contract_id = %contract_id, + "Contract marked for evaluation - waiting for directive contract to evaluate" + ); + } + } + // Don't progress chain - directive contract will evaluate and progress + return; + } + + // If evaluation is disabled, progress chain directly match repository::progress_chain(&pool_clone, chain_id, owner_id).await { Ok(result) => { if !result.contracts_created.is_empty() { diff --git a/makima/src/server/mod.rs b/makima/src/server/mod.rs index f6d2eda..e5b55e7 100644 --- a/makima/src/server/mod.rs +++ b/makima/src/server/mod.rs @@ -219,6 +219,7 @@ pub fn make_router(state: SharedState) -> Router { "/chains", get(chains::list_chains).post(chains::create_chain), ) + .route("/chains/init", post(chains::init_chain)) .route( "/chains/{id}", get(chains::get_chain) |
