diff options
| author | soryu <soryu@soryu.co> | 2026-02-07 00:01:50 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2026-02-07 00:01:50 +0000 |
| commit | b8d563d45f14a2b1db1f684aa0a8dcd7e5b6ad56 (patch) | |
| tree | 95543fd150270018e384fbcf9d3df3dc45f052f6 /makima/src/orchestration | |
| parent | cececbf326e258211ceae7afce716a5d1e46014f (diff) | |
| download | soryu-b8d563d45f14a2b1db1f684aa0a8dcd7e5b6ad56.tar.gz soryu-b8d563d45f14a2b1db1f684aa0a8dcd7e5b6ad56.zip | |
Remove directives for reimplementation
Diffstat (limited to 'makima/src/orchestration')
| -rw-r--r-- | makima/src/orchestration/engine.rs | 1335 | ||||
| -rw-r--r-- | makima/src/orchestration/mod.rs | 26 | ||||
| -rw-r--r-- | makima/src/orchestration/planner.rs | 848 | ||||
| -rw-r--r-- | makima/src/orchestration/verifier.rs | 833 |
4 files changed, 0 insertions, 3042 deletions
diff --git a/makima/src/orchestration/engine.rs b/makima/src/orchestration/engine.rs deleted file mode 100644 index 9f7c3b1..0000000 --- a/makima/src/orchestration/engine.rs +++ /dev/null @@ -1,1335 +0,0 @@ -//! Directive orchestration engine. -//! -//! Manages the lifecycle of directives: -//! - Starts directives and generates initial chains -//! - Monitors step execution and triggers evaluations -//! - Handles rework, escalation, and chain regeneration -//! - Enforces circuit breakers (cost, time, rework limits) - -use std::collections::HashMap; - -use sqlx::PgPool; -use thiserror::Error; -use tokio::sync::broadcast; -use uuid::Uuid; - -use crate::db::models::{ - AddStepRequest, ChainStep, CreateContractRequest, CreateTaskRequest, Directive, - DirectiveEvent, UpdateStepRequest, -}; -use crate::db::repository::{self, RepositoryError}; - -use super::planner::{ChainPlanner, GeneratedChain, PlannerError}; -use super::verifier::{ - auto_detect_verifiers, CompositeEvaluator, ConfidenceLevel, EvaluationResult, - VerificationContext, -}; - -/// Error type for engine operations. -#[derive(Error, Debug)] -pub enum EngineError { - #[error("Database error: {0}")] - Database(#[from] sqlx::Error), - - #[error("Repository error: {0}")] - Repository(#[from] RepositoryError), - - #[error("Planner error: {0}")] - Planner(#[from] PlannerError), - - #[error("Directive not found: {0}")] - DirectiveNotFound(Uuid), - - #[error("Chain not found for directive: {0}")] - ChainNotFound(Uuid), - - #[error("Step not found: {0}")] - StepNotFound(Uuid), - - #[error("Invalid state transition: {from} -> {to}")] - InvalidStateTransition { from: String, to: String }, - - #[error("Circuit breaker triggered: {0}")] - CircuitBreaker(String), - - #[error("Directive is paused")] - DirectivePaused, - - #[error("Contract creation failed: {0}")] - ContractCreation(String), - - #[error("LLM error: {0}")] - LlmError(String), -} - -/// Event emitted by the engine for UI updates. -#[derive(Debug, Clone)] -pub enum EngineEvent { - /// Directive status changed - DirectiveStatusChanged { - directive_id: Uuid, - old_status: String, - new_status: String, - }, - /// Step status changed - StepStatusChanged { - directive_id: Uuid, - step_id: Uuid, - old_status: String, - new_status: String, - }, - /// Evaluation completed - EvaluationCompleted { - directive_id: Uuid, - step_id: Uuid, - passed: bool, - confidence_level: ConfidenceLevel, - }, - /// Approval required - ApprovalRequired { - directive_id: Uuid, - approval_id: Uuid, - approval_type: String, - }, - /// Chain regenerated - ChainRegenerated { - directive_id: Uuid, - old_chain_id: Uuid, - new_chain_id: Uuid, - }, -} - -/// Result from starting a directive, containing info needed for auto-start. -pub struct PlanningStartResult { - /// The planning task ID that needs to be started on a daemon - pub task_id: Uuid, - /// The owner ID for finding available daemons - pub owner_id: Uuid, - /// The planning task details needed for the SpawnTask command - pub task_name: String, - pub plan: String, - pub contract_id: Uuid, - pub repository_url: Option<String>, - pub base_branch: Option<String>, -} - -/// Main orchestration engine for directives. -pub struct DirectiveEngine { - pool: PgPool, - planner: ChainPlanner, - event_tx: Option<broadcast::Sender<EngineEvent>>, -} - -impl DirectiveEngine { - /// Create a new directive engine. - pub fn new(pool: PgPool) -> Self { - Self { - planner: ChainPlanner::new(pool.clone()), - pool, - event_tx: None, - } - } - - /// Set the event broadcast channel for UI updates. - pub fn with_event_channel(mut self, tx: broadcast::Sender<EngineEvent>) -> Self { - self.event_tx = Some(tx); - self - } - - /// Emit an event if channel is configured. - fn emit_event(&self, event: EngineEvent) { - if let Some(tx) = &self.event_tx { - let _ = tx.send(event); - } - } - - // ======================================================================== - // Directive Lifecycle - // ======================================================================== - - /// Start a directive: spawn a planning contract+task to generate the chain. - /// Returns a `PlanningStartResult` so the caller can auto-start the task on a daemon. - pub async fn start_directive(&self, directive_id: Uuid) -> Result<PlanningStartResult, EngineError> { - let directive = repository::get_directive(&self.pool, directive_id) - .await? - .ok_or(EngineError::DirectiveNotFound(directive_id))?; - - // Validate current state - if directive.status != "draft" && directive.status != "paused" { - return Err(EngineError::InvalidStateTransition { - from: directive.status, - to: "planning".to_string(), - }); - } - - // Update status to planning - repository::update_directive_status(&self.pool, directive_id, "planning").await?; - self.emit_directive_event( - directive_id, - "status_changed", - "info", - serde_json::json!({"old_status": directive.status, "new_status": "planning"}), - "system", - ) - .await?; - - // Create an empty chain for the planning task to populate - let chain_name = format!( - "{}-chain", - directive.title.to_lowercase().replace(' ', "-") - ); - let _db_chain = repository::create_directive_chain( - &self.pool, - directive_id, - &chain_name, - Some(&format!("Execution plan for: {}", directive.goal)), - None, // rationale - None, // planning_model - ) - .await?; - - // Create a planning contract (type "execute", no phase guard) - let contract = repository::create_contract_for_owner( - &self.pool, - directive.owner_id, - CreateContractRequest { - name: format!("{} - Planning", directive.title), - description: Some(format!( - "Planning contract for directive: {}", - directive.goal - )), - contract_type: Some("execute".to_string()), - template_id: None, - initial_phase: Some("execute".to_string()), - autonomous_loop: Some(true), - phase_guard: Some(false), - local_only: Some(false), - auto_merge_local: None, - }, - ) - .await - .map_err(|e| { - EngineError::ContractCreation(format!("Failed to create planning contract: {}", e)) - })?; - - // Build instructions for the planning task - let plan = self.build_planning_task_instructions(&directive); - - // Create the planning task - let task_name = format!("{} - Planning", directive.title); - let task = repository::create_task_for_owner( - &self.pool, - directive.owner_id, - CreateTaskRequest { - contract_id: Some(contract.id), - name: task_name.clone(), - description: Some(format!( - "Plan the execution chain for directive: {}", - directive.goal - )), - plan: plan.clone(), - parent_task_id: None, - is_supervisor: true, - priority: 5, - repository_url: directive.repository_url.clone(), - base_branch: directive.base_branch.clone(), - target_branch: None, - merge_mode: None, - target_repo_path: None, - completion_action: Some("none".to_string()), - continue_from_task_id: None, - copy_files: None, - checkpoint_sha: None, - branched_from_task_id: None, - conversation_history: None, - supervisor_worktree_task_id: None, - }, - ) - .await - .map_err(|e| { - EngineError::ContractCreation(format!("Failed to create planning task: {}", e)) - })?; - - // Link the supervisor task to the contract - if let Err(e) = repository::update_contract_supervisor( - &self.pool, - contract.id, - task.id, - ) - .await - { - tracing::warn!( - contract_id = %contract.id, - task_id = %task.id, - error = %e, - "Failed to link supervisor task to planning contract" - ); - } - - // Link the planning contract to the directive - repository::set_directive_orchestrator_contract( - &self.pool, - directive_id, - contract.id, - ) - .await?; - - self.emit_directive_event( - directive_id, - "planning_started", - "info", - serde_json::json!({ - "contract_id": contract.id, - "task_id": task.id, - "message": "Planning task spawned, waiting for chain generation", - }), - "system", - ) - .await?; - - Ok(PlanningStartResult { - task_id: task.id, - owner_id: directive.owner_id, - task_name, - plan, - contract_id: contract.id, - repository_url: directive.repository_url.clone(), - base_branch: directive.base_branch.clone(), - }) - } - - /// Pause a directive. - pub async fn pause_directive(&self, directive_id: Uuid) -> Result<(), EngineError> { - let directive = repository::get_directive(&self.pool, directive_id) - .await? - .ok_or(EngineError::DirectiveNotFound(directive_id))?; - - if directive.status != "active" { - return Err(EngineError::InvalidStateTransition { - from: directive.status, - to: "paused".to_string(), - }); - } - - repository::update_directive_status(&self.pool, directive_id, "paused").await?; - self.emit_event(EngineEvent::DirectiveStatusChanged { - directive_id, - old_status: "active".to_string(), - new_status: "paused".to_string(), - }); - - Ok(()) - } - - /// Resume a paused directive. - pub async fn resume_directive(&self, directive_id: Uuid) -> Result<(), EngineError> { - let directive = repository::get_directive(&self.pool, directive_id) - .await? - .ok_or(EngineError::DirectiveNotFound(directive_id))?; - - if directive.status != "paused" { - return Err(EngineError::InvalidStateTransition { - from: directive.status, - to: "active".to_string(), - }); - } - - repository::update_directive_status(&self.pool, directive_id, "active").await?; - self.emit_event(EngineEvent::DirectiveStatusChanged { - directive_id, - old_status: "paused".to_string(), - new_status: "active".to_string(), - }); - - // Continue execution - self.advance_chain(directive_id).await?; - - Ok(()) - } - - /// Stop a directive (cannot be resumed). - pub async fn stop_directive(&self, directive_id: Uuid) -> Result<(), EngineError> { - let directive = repository::get_directive(&self.pool, directive_id) - .await? - .ok_or(EngineError::DirectiveNotFound(directive_id))?; - - if directive.status == "completed" || directive.status == "failed" { - return Err(EngineError::InvalidStateTransition { - from: directive.status, - to: "failed".to_string(), - }); - } - - repository::update_directive_status(&self.pool, directive_id, "failed").await?; - self.emit_event(EngineEvent::DirectiveStatusChanged { - directive_id, - old_status: directive.status, - new_status: "failed".to_string(), - }); - - Ok(()) - } - - // ======================================================================== - // Chain Management - // ======================================================================== - - /// Build a default chain as a fallback. - fn build_default_chain(&self, directive: &Directive) -> GeneratedChain { - GeneratedChain { - name: format!( - "{}-chain", - directive.title.to_lowercase().replace(' ', "-") - ), - description: format!("Execution plan for: {}", directive.goal), - steps: vec![ - super::planner::GeneratedStep { - name: "research".to_string(), - step_type: "research".to_string(), - description: format!( - "Research and understand the requirements for: {}", - directive.goal - ), - depends_on: vec![], - requirement_ids: vec![], - contract_template: None, - }, - super::planner::GeneratedStep { - name: "implement".to_string(), - step_type: "implement".to_string(), - description: format!("Implement the solution for: {}", directive.goal), - depends_on: vec!["research".to_string()], - requirement_ids: vec![], - contract_template: None, - }, - super::planner::GeneratedStep { - name: "test".to_string(), - step_type: "test".to_string(), - description: "Test and verify the implementation".to_string(), - depends_on: vec!["implement".to_string()], - requirement_ids: vec![], - contract_template: None, - }, - ], - } - } - - /// Create database steps from a generated chain. - async fn create_steps_from_chain( - &self, - chain_id: &Uuid, - chain: &GeneratedChain, - ) -> Result<(), EngineError> { - // First pass: create all steps and build name-to-id map - let mut step_id_map: HashMap<String, Uuid> = HashMap::new(); - - // Get editor positions - let positions = self.planner.compute_editor_positions(chain); - - for step in &chain.steps { - let (editor_x, editor_y) = positions - .get(&step.name) - .copied() - .unwrap_or((100.0, 100.0)); - - let task_plan = step - .contract_template - .as_ref() - .and_then(|t| t.tasks.first()) - .map(|t| t.plan.clone()) - .or_else(|| Some(step.description.clone())); - - let request = AddStepRequest { - name: step.name.clone(), - description: Some(step.description.clone()), - step_type: Some(step.step_type.clone()), - contract_type: step.contract_template.as_ref().map(|t| t.contract_type.clone()), - initial_phase: Some("plan".to_string()), - task_plan, - phases: step.contract_template.as_ref().map(|t| t.phases.clone()), - depends_on: None, // Will update in second pass - parallel_group: None, - requirement_ids: Some(step.requirement_ids.clone()), - acceptance_criteria_ids: None, - verifier_config: None, - editor_x: Some(editor_x), - editor_y: Some(editor_y), - }; - - let db_step = repository::create_chain_step(&self.pool, *chain_id, request).await?; - step_id_map.insert(step.name.clone(), db_step.id); - } - - // Second pass: update dependencies - for step in &chain.steps { - if step.depends_on.is_empty() { - continue; - } - - let step_id = step_id_map.get(&step.name).unwrap(); - let dep_ids: Vec<Uuid> = step - .depends_on - .iter() - .filter_map(|name| step_id_map.get(name)) - .copied() - .collect(); - - // Update step with proper dependencies - let update = UpdateStepRequest { - name: None, - description: None, - task_plan: None, - depends_on: Some(dep_ids), - requirement_ids: None, - acceptance_criteria_ids: None, - verifier_config: None, - editor_x: None, - editor_y: None, - }; - - repository::update_chain_step(&self.pool, *step_id, update).await?; - } - - Ok(()) - } - - /// Regenerate chain while preserving completed steps. - pub async fn regenerate_chain( - &self, - directive_id: Uuid, - reason: &str, - ) -> Result<Uuid, EngineError> { - let directive = repository::get_directive(&self.pool, directive_id) - .await? - .ok_or(EngineError::DirectiveNotFound(directive_id))?; - - let current_chain = repository::get_current_chain(&self.pool, directive_id) - .await? - .ok_or(EngineError::ChainNotFound(directive_id))?; - - // Use default chain for regeneration - // (planning contract handles initial generation; regeneration uses fallback) - let new_chain = self.build_default_chain(&directive); - - // Supersede old chain - repository::supersede_chain(&self.pool, current_chain.id).await?; - - // Create new chain - let db_chain = repository::create_directive_chain( - &self.pool, - directive_id, - &new_chain.name, - Some(&new_chain.description), - Some(reason), // rationale - None, // planning_model - ) - .await?; - - // Create steps - self.create_steps_from_chain(&db_chain.id, &new_chain).await?; - - self.emit_event(EngineEvent::ChainRegenerated { - directive_id, - old_chain_id: current_chain.id, - new_chain_id: db_chain.id, - }); - - // Continue execution - self.advance_chain(directive_id).await?; - - Ok(db_chain.id) - } - - // ======================================================================== - // Step Execution - // ======================================================================== - - /// Advance chain execution: find ready steps and start them. - pub async fn advance_chain(&self, directive_id: Uuid) -> Result<(), EngineError> { - let directive = repository::get_directive(&self.pool, directive_id) - .await? - .ok_or(EngineError::DirectiveNotFound(directive_id))?; - - // Check if directive is active - if directive.status == "paused" { - return Err(EngineError::DirectivePaused); - } - if directive.status != "active" { - return Ok(()); // Not an error, just nothing to do - } - - // Check circuit breakers - self.check_circuit_breakers(&directive).await?; - - // Get current chain - let chain = repository::get_current_chain(&self.pool, directive_id) - .await? - .ok_or(EngineError::ChainNotFound(directive_id))?; - - // Find ready steps (dependencies met, status=pending) - let ready_steps = repository::find_ready_steps(&self.pool, chain.id).await?; - - // Start each ready step - for step in ready_steps { - self.start_step(&directive, &step).await?; - } - - // Check if chain is complete - let all_steps = repository::list_chain_steps(&self.pool, chain.id).await?; - let all_passed = all_steps.iter().all(|s| s.status == "passed" || s.status == "skipped"); - let any_blocked = all_steps.iter().any(|s| s.status == "blocked" || s.status == "failed"); - - if all_passed && !all_steps.is_empty() { - // Complete the directive - self.complete_directive(directive_id).await?; - } else if any_blocked { - // Check if we should regenerate or fail - let failed_count = all_steps.iter().filter(|s| s.status == "failed").count(); - if failed_count > 3 { - // Too many failures, fail the directive - repository::update_directive_status(&self.pool, directive_id, "failed").await?; - } - } - - Ok(()) - } - - /// Start a step by creating its contract and supervisor task. - async fn start_step(&self, directive: &Directive, step: &ChainStep) -> Result<(), EngineError> { - // Update step status to ready - repository::update_step_status(&self.pool, step.id, "ready").await?; - self.emit_event(EngineEvent::StepStatusChanged { - directive_id: directive.id, - step_id: step.id, - old_status: "pending".to_string(), - new_status: "ready".to_string(), - }); - - // Get contract details from step template - let (name, description, contract_type, initial_phase) = - self.get_contract_details(directive, step); - - // Create contract for this step - let contract = repository::create_contract_for_owner( - &self.pool, - directive.owner_id, - CreateContractRequest { - name: name.clone(), - description: description.clone(), - contract_type: Some(contract_type), - template_id: None, - initial_phase: Some(initial_phase), - autonomous_loop: Some(directive.autonomy_level == "full_auto"), - phase_guard: Some(true), - local_only: Some(false), - auto_merge_local: None, - }, - ) - .await - .map_err(|e| EngineError::ContractCreation(format!("Failed to create contract: {}", e)))?; - - // Build task plan from step description and task_plan - let task_plan = step - .task_plan - .clone() - .unwrap_or_else(|| { - format!( - "## Step: {}\n\n{}\n\n## Directive Goal\n{}", - step.name, - description.as_deref().unwrap_or("Complete this step."), - directive.goal, - ) - }); - - // Create supervisor task linked to the contract - let task = repository::create_task_for_owner( - &self.pool, - directive.owner_id, - CreateTaskRequest { - contract_id: Some(contract.id), - name: name.clone(), - description: description.clone(), - plan: task_plan, - parent_task_id: None, - is_supervisor: true, - priority: 5, - repository_url: directive.repository_url.clone(), - base_branch: directive.base_branch.clone(), - target_branch: None, - merge_mode: Some("pr".to_string()), - target_repo_path: None, - completion_action: Some("pr".to_string()), - continue_from_task_id: None, - copy_files: None, - checkpoint_sha: None, - branched_from_task_id: None, - conversation_history: None, - supervisor_worktree_task_id: None, - }, - ) - .await - .map_err(|e| EngineError::ContractCreation(format!("Failed to create task: {}", e)))?; - - // Link contract and task to step - repository::update_step_contract(&self.pool, step.id, contract.id, Some(task.id)).await?; - - // Update step status to running - repository::update_step_status(&self.pool, step.id, "running").await?; - self.emit_event(EngineEvent::StepStatusChanged { - directive_id: directive.id, - step_id: step.id, - old_status: "ready".to_string(), - new_status: "running".to_string(), - }); - - self.emit_directive_event( - directive.id, - "step_started", - "info", - serde_json::json!({ - "step_id": step.id, - "step_name": step.name, - "contract_id": contract.id, - "task_id": task.id, - }), - "system", - ) - .await?; - - Ok(()) - } - - /// Build contract details from a step. - /// Returns (name, description, contract_type, initial_phase) - fn get_contract_details( - &self, - directive: &Directive, - step: &ChainStep, - ) -> (String, Option<String>, String, String) { - let name = format!("{} - {}", directive.title, step.name); - let description = step.description.clone(); - let contract_type = step.contract_type.clone(); - let initial_phase = step.initial_phase.clone().unwrap_or_else(|| "plan".to_string()); - - (name, description, contract_type, initial_phase) - } - - // ======================================================================== - // Evaluation - // ======================================================================== - - /// Handle contract completion: evaluate the step. - pub async fn on_contract_completed( - &self, - contract_id: Uuid, - ) -> Result<(), EngineError> { - // Find the step for this contract - let step = repository::get_step_by_contract_id(&self.pool, contract_id) - .await? - .ok_or(EngineError::StepNotFound(contract_id))?; - - // Get directive - let chain = repository::get_directive_chain(&self.pool, step.chain_id) - .await? - .ok_or(EngineError::ChainNotFound(step.chain_id))?; - - let directive = repository::get_directive(&self.pool, chain.directive_id) - .await? - .ok_or(EngineError::DirectiveNotFound(chain.directive_id))?; - - // Update step status to evaluating - repository::update_step_status(&self.pool, step.id, "evaluating").await?; - self.emit_event(EngineEvent::StepStatusChanged { - directive_id: directive.id, - step_id: step.id, - old_status: "running".to_string(), - new_status: "evaluating".to_string(), - }); - - // Run evaluation - let result = self.evaluate_step(&directive, &step).await?; - - // Record evaluation - let programmatic_results = result - .verifier_results - .iter() - .filter(|r| r.verifier_type != super::verifier::VerifierType::Llm) - .map(|r| serde_json::to_value(r).unwrap_or(serde_json::Value::Null)) - .collect::<Vec<_>>(); - - let llm_results = result - .verifier_results - .iter() - .filter(|r| r.verifier_type == super::verifier::VerifierType::Llm) - .map(|r| serde_json::to_value(r).unwrap_or(serde_json::Value::Null)) - .collect::<Vec<_>>(); - - // Get chain_id from step - let chain_id = step.chain_id; - - let _evaluation = repository::create_directive_evaluation( - &self.pool, - directive.id, - Some(chain_id), - Some(step.id), - step.contract_id, - "composite", - Some("orchestration_engine"), - result.passed, - Some(result.composite_score), - Some(result.confidence_level.as_str()), - serde_json::Value::Array(programmatic_results), - serde_json::Value::Array(llm_results), - serde_json::Value::Null, // criteria_results - &result.summary, - result.rework_instructions.as_deref(), - ) - .await?; - - // Update step based on result - let new_status = match result.confidence_level { - ConfidenceLevel::Green => "passed", - ConfidenceLevel::Yellow => { - // Check autonomy level - if directive.autonomy_level == "full_auto" { - "passed" // Accept yellow in full auto mode - } else { - // Create approval request - self.request_approval( - &directive, - &step, - "step_review", - &format!( - "Step '{}' completed with yellow confidence ({:.0}%). Review required.", - step.name, - result.composite_score * 100.0 - ), - ) - .await?; - "evaluating" // Wait for approval - } - } - ConfidenceLevel::Red => { - // Initiate rework - self.initiate_rework(&directive, &step, &result).await?; - "rework" - } - }; - - repository::update_step_status(&self.pool, step.id, new_status).await?; - repository::update_step_confidence( - &self.pool, - step.id, - result.composite_score, - result.confidence_level.as_str(), - result.id, - ) - .await?; - - self.emit_event(EngineEvent::EvaluationCompleted { - directive_id: directive.id, - step_id: step.id, - passed: result.passed, - confidence_level: result.confidence_level, - }); - - // If passed, continue chain execution - if new_status == "passed" { - self.advance_chain(directive.id).await?; - } - - Ok(()) - } - - /// Evaluate a step using tiered verification. - async fn evaluate_step( - &self, - directive: &Directive, - step: &ChainStep, - ) -> Result<EvaluationResult, EngineError> { - // Get repository path - let repo_path = directive - .local_path - .as_ref() - .map(std::path::PathBuf::from) - .unwrap_or_else(|| std::path::PathBuf::from(".")); - - // Auto-detect verifiers - let verifiers = auto_detect_verifiers(&repo_path).await; - - // Build verification context - let context = VerificationContext { - step_id: step.id, - contract_id: step.contract_id, - modified_files: vec![], // TODO: Get from contract/git - step_description: step.description.clone().unwrap_or_default(), - acceptance_criteria: vec![], // TODO: Get from directive - directive_context: directive.goal.clone(), - }; - - // Run composite evaluation - let evaluator = CompositeEvaluator::new(verifiers) - .with_thresholds( - directive.confidence_threshold_green, - directive.confidence_threshold_yellow, - ); - - Ok(evaluator.evaluate(&repo_path, &context).await) - } - - /// Initiate rework for a failed step. - async fn initiate_rework( - &self, - directive: &Directive, - step: &ChainStep, - result: &EvaluationResult, - ) -> Result<(), EngineError> { - // Increment rework count - let updated_step = repository::increment_step_rework_count(&self.pool, step.id).await?; - - // Check rework limit - let max_rework = directive.max_rework_cycles.unwrap_or(3); - if updated_step.rework_count >= max_rework { - // Too many rework attempts, mark as blocked - repository::update_step_status(&self.pool, step.id, "blocked").await?; - self.emit_directive_event( - directive.id, - "step_blocked", - "warning", - serde_json::json!({ - "step_id": step.id, - "step_name": step.name, - "reason": "Max rework attempts reached", - }), - "system", - ) - .await?; - return Ok(()); - } - - // Log rework event - self.emit_directive_event( - directive.id, - "step_rework", - "info", - serde_json::json!({ - "step_id": step.id, - "step_name": step.name, - "rework_count": updated_step.rework_count, - "instructions": result.rework_instructions, - }), - "system", - ) - .await?; - - // TODO: Send rework instructions to supervisor task - // This would involve: - // 1. Reset contract phase to 'plan' - // 2. Send message to supervisor with rework instructions - // 3. Update step status to 'running' - - Ok(()) - } - - /// Request human approval for a step. - async fn request_approval( - &self, - directive: &Directive, - step: &ChainStep, - approval_type: &str, - description: &str, - ) -> Result<Uuid, EngineError> { - let context = serde_json::json!({ - "step_id": step.id, - "step_name": step.name, - "confidence_score": step.confidence_score, - }); - - let approval = repository::create_approval_request( - &self.pool, - directive.id, - Some(step.id), - approval_type, - description, - Some(context), - "medium", - None, // expires_at - ) - .await?; - - self.emit_event(EngineEvent::ApprovalRequired { - directive_id: directive.id, - approval_id: approval.id, - approval_type: approval_type.to_string(), - }); - - Ok(approval.id) - } - - /// Handle approval resolution. - pub async fn on_approval_resolved( - &self, - approval_id: Uuid, - approved: bool, - responded_by: Uuid, - ) -> Result<(), EngineError> { - let status = if approved { "approved" } else { "denied" }; - let approval = repository::resolve_approval( - &self.pool, - approval_id, - status, - None, - responded_by, - ) - .await?; - - if let Some(step_id) = approval.step_id { - let step = repository::get_chain_step(&self.pool, step_id) - .await? - .ok_or(EngineError::StepNotFound(step_id))?; - - let chain = repository::get_directive_chain(&self.pool, step.chain_id) - .await? - .ok_or(EngineError::ChainNotFound(step.chain_id))?; - - if approved { - // Mark step as passed and continue - repository::update_step_status(&self.pool, step_id, "passed").await?; - self.advance_chain(chain.directive_id).await?; - } else { - // Mark step as failed/blocked - repository::update_step_status(&self.pool, step_id, "blocked").await?; - } - } - - Ok(()) - } - - // ======================================================================== - // Planning - // ======================================================================== - - /// Build the task instructions for the planning task. - fn build_planning_task_instructions(&self, directive: &Directive) -> String { - let requirements: Vec<String> = directive - .requirements - .as_array() - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_object()) - .map(|obj| { - let id = obj.get("id").and_then(|v| v.as_str()).unwrap_or("?"); - let desc = obj - .get("description") - .and_then(|v| v.as_str()) - .unwrap_or("?"); - format!("- {}: {}", id, desc) - }) - .collect() - }) - .unwrap_or_default(); - - let criteria: Vec<String> = directive - .acceptance_criteria - .as_array() - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_object()) - .map(|obj| { - let id = obj.get("id").and_then(|v| v.as_str()).unwrap_or("?"); - let criterion = obj - .get("criterion") - .and_then(|v| v.as_str()) - .unwrap_or("?"); - format!("- {}: {}", id, criterion) - }) - .collect() - }) - .unwrap_or_default(); - - let constraints: Vec<String> = directive - .constraints - .as_array() - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str()) - .map(|s| format!("- {}", s)) - .collect() - }) - .unwrap_or_default(); - - let repo_info = directive - .repository_url - .as_deref() - .unwrap_or("(not specified)"); - - format!( - r#"You are planning an execution chain for a directive. - -## Directive: {title} -## Goal -{goal} - -## Requirements -{requirements} - -## Acceptance Criteria -{criteria} - -## Constraints -{constraints} - -## Repository: {repo} - -## Your Task - -Analyze the repository and create a chain of execution steps. -For each step, add it via the API: - -```bash -curl -s -X POST "$MAKIMA_URL/api/v1/directives/{directive_id}/chain/steps" \ - -H "Authorization: Bearer $MAKIMA_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{{ - "name": "step-name", - "description": "What this step accomplishes", - "step_type": "implement", - "depends_on": [], - "contract_type": "execute", - "initial_phase": "execute", - "task_plan": "Detailed instructions for the step executor" - }}' -``` - -### Step types -Use these step types: research, design, implement, test, review, document - -### Dependencies -Each step can depend on other steps by name. Use the `depends_on` field with an array of step names. -Steps with no dependencies will run in parallel. - -### Guidelines -1. Break the work into logical, independently executable steps -2. Each step should be completable by a single Claude Code session -3. Use dependencies to enforce ordering where needed -4. Include a "test" or "verify" step at the end -5. Keep step names in kebab-case -6. The `task_plan` field should contain detailed instructions for the agent that will execute the step - -When you have added all steps, your task is complete."#, - title = directive.title, - goal = directive.goal, - requirements = if requirements.is_empty() { - "(none)".to_string() - } else { - requirements.join("\n") - }, - criteria = if criteria.is_empty() { - "(none)".to_string() - } else { - criteria.join("\n") - }, - constraints = if constraints.is_empty() { - "(none)".to_string() - } else { - constraints.join("\n") - }, - repo = repo_info, - directive_id = directive.id, - ) - } - - /// Handle planning task completion. - pub async fn on_planning_complete( - &self, - directive_id: Uuid, - success: bool, - ) -> Result<(), EngineError> { - let directive = repository::get_directive(&self.pool, directive_id) - .await? - .ok_or(EngineError::DirectiveNotFound(directive_id))?; - - // Only process if directive is still in planning state - if directive.status != "planning" { - tracing::warn!( - "Directive {} is in state '{}', not 'planning'. Skipping planning completion.", - directive_id, - directive.status - ); - return Ok(()); - } - - // Get current chain - let chain = repository::get_current_chain(&self.pool, directive_id) - .await? - .ok_or(EngineError::ChainNotFound(directive_id))?; - - // Check if chain has steps - let steps = repository::list_chain_steps(&self.pool, chain.id).await?; - - if success && !steps.is_empty() { - tracing::info!( - "Planning completed successfully for directive {} with {} steps", - directive_id, - steps.len() - ); - } else { - // Fall back to default chain - let reason = if !success { - "Planning task failed" - } else { - "Planning task produced no steps" - }; - tracing::warn!( - "{} for directive {}, using default chain", - reason, - directive_id - ); - - let default_chain = self.build_default_chain(&directive); - self.create_steps_from_chain(&chain.id, &default_chain) - .await?; - } - - // Activate the directive - repository::update_directive_status(&self.pool, directive_id, "active").await?; - self.emit_event(EngineEvent::DirectiveStatusChanged { - directive_id, - old_status: "planning".to_string(), - new_status: "active".to_string(), - }); - - self.emit_directive_event( - directive_id, - "planning_completed", - "info", - serde_json::json!({"success": success}), - "system", - ) - .await?; - - // Start ready steps - self.advance_chain(directive_id).await?; - - Ok(()) - } - - // ======================================================================== - // Circuit Breakers - // ======================================================================== - - /// Check circuit breakers for a directive. - async fn check_circuit_breakers(&self, directive: &Directive) -> Result<(), EngineError> { - // Check cost limit - if let Some(max_cost) = directive.max_total_cost_usd { - let current_cost = directive.total_cost_usd; - if current_cost >= max_cost { - return Err(EngineError::CircuitBreaker(format!( - "Cost limit exceeded: ${:.2} >= ${:.2}", - current_cost, max_cost - ))); - } - } - - // Check time limit (stored in minutes) - if let Some(max_minutes) = directive.max_wall_time_minutes { - if let Some(started_at) = directive.started_at { - let elapsed = chrono::Utc::now().signed_duration_since(started_at); - let elapsed_minutes = elapsed.num_minutes(); - if elapsed_minutes >= max_minutes as i64 { - return Err(EngineError::CircuitBreaker(format!( - "Time limit exceeded: {} min >= {} min", - elapsed_minutes, max_minutes - ))); - } - } - } - - // Check chain generation limit - if let Some(max_gen) = directive.max_chain_regenerations { - let current_gen = directive.chain_generation_count; - if current_gen >= max_gen { - return Err(EngineError::CircuitBreaker(format!( - "Chain generation limit exceeded: {} >= {}", - current_gen, max_gen - ))); - } - } - - Ok(()) - } - - // ======================================================================== - // Completion - // ======================================================================== - - /// Complete a directive after all steps pass. - async fn complete_directive(&self, directive_id: Uuid) -> Result<(), EngineError> { - // Run final evaluation (optional) - // TODO: LLM evaluation of overall directive completion - - // Update directive status - repository::update_directive_status(&self.pool, directive_id, "completed").await?; - - self.emit_event(EngineEvent::DirectiveStatusChanged { - directive_id, - old_status: "active".to_string(), - new_status: "completed".to_string(), - }); - - self.emit_directive_event( - directive_id, - "directive_completed", - "info", - serde_json::json!({}), - "system", - ) - .await?; - - Ok(()) - } - - // ======================================================================== - // Event Logging - // ======================================================================== - - /// Emit a directive event to the database. - async fn emit_directive_event( - &self, - directive_id: Uuid, - event_type: &str, - severity: &str, - event_data: serde_json::Value, - actor_type: &str, - ) -> Result<DirectiveEvent, EngineError> { - Ok(repository::emit_directive_event( - &self.pool, - directive_id, - None, // chain_id - None, // step_id - event_type, - severity, - Some(event_data), - actor_type, - None, // actor_id - ) - .await?) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_confidence_level_decision() { - // Green confidence should pass in all modes - assert_eq!(ConfidenceLevel::Green.as_str(), "green"); - - // Yellow confidence behavior depends on autonomy level - assert_eq!(ConfidenceLevel::Yellow.as_str(), "yellow"); - - // Red confidence should always trigger rework - assert_eq!(ConfidenceLevel::Red.as_str(), "red"); - } -} diff --git a/makima/src/orchestration/mod.rs b/makima/src/orchestration/mod.rs deleted file mode 100644 index 8fca5ba..0000000 --- a/makima/src/orchestration/mod.rs +++ /dev/null @@ -1,26 +0,0 @@ -//! Orchestration engine for directive-driven autonomous execution. -//! -//! This module provides the core orchestration capabilities: -//! - [`DirectiveEngine`]: Main orchestration loop that manages directive lifecycle -//! - [`ChainPlanner`]: LLM-based chain generation from directive goals -//! - [`Verifier`]: Pluggable verification system for step validation -//! -//! # Architecture -//! -//! The orchestration system follows a directive-first approach: -//! 1. Directives define goals, requirements, and acceptance criteria -//! 2. Chains are generated execution plans (DAGs of steps) -//! 3. Steps map to contracts that are created and monitored -//! 4. Tiered verification (programmatic first, then LLM) determines confidence -//! 5. Confidence scoring (green/yellow/red) drives autonomy decisions - -mod engine; -mod planner; -mod verifier; - -pub use engine::{DirectiveEngine, EngineError, PlanningStartResult}; -pub use planner::{ChainPlanner, GeneratedSpec, PlannerError}; -pub use verifier::{ - auto_detect_verifiers, CompositeEvaluator, ConfidenceLevel, EvaluationResult, Verifier, - VerifierError, VerifierInfo, VerifierResult, VerifierType, -}; diff --git a/makima/src/orchestration/planner.rs b/makima/src/orchestration/planner.rs deleted file mode 100644 index aec2e48..0000000 --- a/makima/src/orchestration/planner.rs +++ /dev/null @@ -1,848 +0,0 @@ -//! Chain planner for LLM-based execution plan generation. -//! -//! Generates chains (DAGs of steps) from directive goals and requirements. -//! Supports both initial plan generation and replanning while preserving -//! completed work. - -use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet}; -use thiserror::Error; -use uuid::Uuid; - -use crate::db::models::{AddStepRequest, ChainStep, Directive}; - -/// Error type for planner operations. -#[derive(Error, Debug)] -pub enum PlannerError { - #[error("Cycle detected in DAG: {0}")] - CycleDetected(String), - - #[error("Invalid dependency: step '{step}' depends on unknown step '{dependency}'")] - InvalidDependency { step: String, dependency: String }, - - #[error("LLM generation failed: {0}")] - LlmError(String), - - #[error("Requirement not covered: {0}")] - RequirementNotCovered(String), - - #[error("Invalid plan: {0}")] - InvalidPlan(String), - - #[error("Empty plan generated")] - EmptyPlan, -} - -/// Generated step from LLM planning. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GeneratedStep { - /// Unique name within the chain - pub name: String, - /// Type of step (e.g., "research", "implement", "test", "review") - pub step_type: String, - /// Description of what this step accomplishes - pub description: String, - /// Names of steps this depends on - pub depends_on: Vec<String>, - /// IDs of requirements this step addresses - pub requirement_ids: Vec<String>, - /// Contract template fields - pub contract_template: Option<ContractTemplate>, -} - -/// Template for contract creation from step. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ContractTemplate { - /// Contract name - pub name: String, - /// Contract description - pub description: String, - /// Contract type (e.g., "simple", "agentic") - pub contract_type: String, - /// Phases for the contract - pub phases: Vec<String>, - /// Tasks within the contract - pub tasks: Vec<TaskTemplate>, - /// Deliverables expected - pub deliverables: Vec<DeliverableTemplate>, -} - -/// Template for task within contract. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct TaskTemplate { - pub name: String, - pub plan: String, -} - -/// Template for deliverable. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DeliverableTemplate { - pub id: String, - pub name: String, - pub priority: String, -} - -/// Generated chain from planning. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GeneratedChain { - /// Name for the chain - pub name: String, - /// Description of the execution plan - pub description: String, - /// Steps in the chain - pub steps: Vec<GeneratedStep>, -} - -/// Generated specification from LLM. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GeneratedSpec { - /// Generated title (if improved from goal) - pub title: Option<String>, - /// Structured requirements - pub requirements: serde_json::Value, - /// Structured acceptance criteria - pub acceptance_criteria: serde_json::Value, - /// Constraints extracted from goal - pub constraints: Option<serde_json::Value>, -} - -/// Chain planner for LLM-based plan generation. -pub struct ChainPlanner { - /// Default step types to suggest (reserved for future use) - #[allow(dead_code)] - default_step_types: Vec<String>, - /// Database pool for persistence - #[allow(dead_code)] - pool: Option<sqlx::PgPool>, -} - -impl Default for ChainPlanner { - fn default() -> Self { - Self::without_pool() - } -} - -impl ChainPlanner { - /// Create a new chain planner without database pool. - pub fn without_pool() -> Self { - Self { - default_step_types: vec![ - "research".to_string(), - "design".to_string(), - "implement".to_string(), - "test".to_string(), - "review".to_string(), - "document".to_string(), - ], - pool: None, - } - } - - /// Create a new chain planner (backwards compatible). - pub fn new(pool: sqlx::PgPool) -> Self { - Self { - default_step_types: vec![ - "research".to_string(), - "design".to_string(), - "implement".to_string(), - "test".to_string(), - "review".to_string(), - "document".to_string(), - ], - pool: Some(pool), - } - } - - /// Generate a specification from a directive's goal. - /// - /// Analyzes the goal text to produce structured requirements and - /// acceptance criteria. In production, this would call an LLM for - /// richer spec generation. - pub async fn generate_spec( - &self, - directive: &Directive, - ) -> Result<GeneratedSpec, PlannerError> { - // Build a prompt for spec generation - let prompt = format!( - r#"Analyze this goal and generate structured requirements and acceptance criteria. - -Goal: {} - -Generate a JSON response with: -- title: A concise title -- requirements: Array of {{id, title, description, priority, category}} -- acceptance_criteria: Array of {{id, requirementIds, description, testable, verificationMethod}} -- constraints: Array of constraint strings"#, - directive.goal - ); - - // For now, generate a basic spec from the goal text. - // When LLM integration is available, this will call the LLM with the prompt. - let _prompt = prompt; // Will be used when LLM is wired up - - let title = generate_title_from_goal(&directive.goal); - - let requirements = serde_json::json!([ - { - "id": "REQ-001", - "title": title, - "description": directive.goal, - "priority": "required", - "category": "core" - } - ]); - - let acceptance_criteria = serde_json::json!([ - { - "id": "AC-001", - "requirementIds": ["REQ-001"], - "description": format!("Goal is achieved: {}", directive.goal), - "testable": true, - "verificationMethod": "manual" - } - ]); - - Ok(GeneratedSpec { - title: Some(title), - requirements, - acceptance_criteria, - constraints: None, - }) - } - - /// Build a planning prompt for the LLM. - pub fn build_planning_prompt(&self, directive: &Directive) -> String { - let requirements: Vec<String> = directive - .requirements - .as_array() - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_object()) - .map(|obj| { - let id = obj.get("id").and_then(|v| v.as_str()).unwrap_or("?"); - let desc = obj - .get("description") - .and_then(|v| v.as_str()) - .unwrap_or("?"); - format!("- {}: {}", id, desc) - }) - .collect() - }) - .unwrap_or_default(); - - let criteria: Vec<String> = directive - .acceptance_criteria - .as_array() - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_object()) - .map(|obj| { - let id = obj.get("id").and_then(|v| v.as_str()).unwrap_or("?"); - let criterion = obj - .get("criterion") - .and_then(|v| v.as_str()) - .unwrap_or("?"); - format!("- {}: {}", id, criterion) - }) - .collect() - }) - .unwrap_or_default(); - - let constraints: Vec<String> = directive - .constraints - .as_array() - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str()) - .map(|s| format!("- {}", s)) - .collect() - }) - .unwrap_or_default(); - - format!( - r#"You are a software architect planning an execution chain for a coding task. - -## Directive Goal -{goal} - -## Requirements -{requirements} - -## Acceptance Criteria -{criteria} - -## Constraints -{constraints} - -## Instructions - -Create an execution plan as a chain of steps. Each step should: -1. Have a unique, descriptive name (kebab-case) -2. Specify its type (research, design, implement, test, review, document) -3. Declare dependencies on prior steps (if any) -4. Map to specific requirement IDs it addresses -5. Include a contract template with tasks and deliverables - -The chain should form a valid DAG (no cycles). Steps can run in parallel if they don't depend on each other. - -Respond with a JSON object in this format: -```json -{{ - "name": "chain-name", - "description": "Brief description of the plan", - "steps": [ - {{ - "name": "step-name", - "step_type": "implement", - "description": "What this step does", - "depends_on": ["prior-step-name"], - "requirement_ids": ["REQ-001"], - "contract_template": {{ - "name": "Contract Name", - "description": "Contract description", - "contract_type": "simple", - "phases": ["plan", "execute"], - "tasks": [ - {{"name": "Task 1", "plan": "Detailed plan for this task"}} - ], - "deliverables": [ - {{"id": "del-1", "name": "Deliverable 1", "priority": "required"}} - ] - }} - }} - ] -}} -``` - -Generate the optimal execution plan now."#, - goal = directive.goal, - requirements = requirements.join("\n"), - criteria = criteria.join("\n"), - constraints = constraints.join("\n"), - ) - } - - /// Parse LLM response into a generated chain. - pub fn parse_plan_response(&self, response: &str) -> Result<GeneratedChain, PlannerError> { - // Extract JSON from response (may be wrapped in markdown code blocks) - let json_str = extract_json_from_response(response)?; - - let chain: GeneratedChain = serde_json::from_str(&json_str) - .map_err(|e| PlannerError::InvalidPlan(format!("JSON parse error: {}", e)))?; - - if chain.steps.is_empty() { - return Err(PlannerError::EmptyPlan); - } - - // Validate the chain - self.validate_chain(&chain)?; - - Ok(chain) - } - - /// Validate a generated chain. - pub fn validate_chain(&self, chain: &GeneratedChain) -> Result<(), PlannerError> { - // Build step name set - let step_names: HashSet<&str> = chain.steps.iter().map(|s| s.name.as_str()).collect(); - - // Check for duplicate names - if step_names.len() != chain.steps.len() { - return Err(PlannerError::InvalidPlan( - "Duplicate step names detected".to_string(), - )); - } - - // Validate dependencies exist - for step in &chain.steps { - for dep in &step.depends_on { - if !step_names.contains(dep.as_str()) { - return Err(PlannerError::InvalidDependency { - step: step.name.clone(), - dependency: dep.clone(), - }); - } - } - } - - // Check for cycles using DFS - self.detect_cycles(chain)?; - - Ok(()) - } - - /// Detect cycles in the chain DAG using DFS. - fn detect_cycles(&self, chain: &GeneratedChain) -> Result<(), PlannerError> { - let mut visited = HashSet::new(); - let mut rec_stack = HashSet::new(); - - // Build adjacency map - let adj: HashMap<&str, Vec<&str>> = chain - .steps - .iter() - .map(|s| (s.name.as_str(), s.depends_on.iter().map(|d| d.as_str()).collect())) - .collect(); - - for step in &chain.steps { - if !visited.contains(step.name.as_str()) { - if self.has_cycle(&step.name, &adj, &mut visited, &mut rec_stack) { - return Err(PlannerError::CycleDetected(step.name.clone())); - } - } - } - - Ok(()) - } - - fn has_cycle<'a>( - &self, - node: &'a str, - adj: &HashMap<&'a str, Vec<&'a str>>, - visited: &mut HashSet<&'a str>, - rec_stack: &mut HashSet<&'a str>, - ) -> bool { - visited.insert(node); - rec_stack.insert(node); - - if let Some(deps) = adj.get(node) { - for &dep in deps { - if !visited.contains(dep) { - if self.has_cycle(dep, adj, visited, rec_stack) { - return true; - } - } else if rec_stack.contains(dep) { - return true; - } - } - } - - rec_stack.remove(node); - false - } - - /// Check that all requirements are covered by at least one step. - pub fn check_requirement_coverage( - &self, - chain: &GeneratedChain, - directive: &Directive, - ) -> Result<(), PlannerError> { - let required_ids: HashSet<String> = directive - .requirements - .as_array() - .map(|arr| { - arr.iter() - .filter_map(|v| v.get("id").and_then(|id| id.as_str())) - .map(|s| s.to_string()) - .collect() - }) - .unwrap_or_default(); - - let covered_ids: HashSet<String> = chain - .steps - .iter() - .flat_map(|s| s.requirement_ids.clone()) - .collect(); - - for req_id in required_ids { - if !covered_ids.contains(&req_id) { - return Err(PlannerError::RequirementNotCovered(req_id)); - } - } - - Ok(()) - } - - /// Get topological order of steps. - pub fn topological_sort<'a>( - &self, - chain: &'a GeneratedChain, - ) -> Result<Vec<&'a str>, PlannerError> { - let mut in_degree: HashMap<&str, usize> = HashMap::new(); - let mut adj: HashMap<&str, Vec<&str>> = HashMap::new(); - - // Initialize - for step in &chain.steps { - in_degree.entry(step.name.as_str()).or_insert(0); - adj.entry(step.name.as_str()).or_insert_with(Vec::new); - } - - // Build graph (reversed - edges from dependency to dependent) - for step in &chain.steps { - for dep in &step.depends_on { - adj.entry(dep.as_str()) - .or_insert_with(Vec::new) - .push(step.name.as_str()); - *in_degree.entry(step.name.as_str()).or_insert(0) += 1; - } - } - - // Kahn's algorithm - let mut queue: Vec<&str> = in_degree - .iter() - .filter(|&(_, deg)| *deg == 0) - .map(|(&name, _)| name) - .collect(); - - let mut result = Vec::new(); - - while let Some(node) = queue.pop() { - result.push(node); - - if let Some(neighbors) = adj.get(node) { - for &neighbor in neighbors { - let deg = in_degree.get_mut(neighbor).unwrap(); - *deg -= 1; - if *deg == 0 { - queue.push(neighbor); - } - } - } - } - - if result.len() != chain.steps.len() { - return Err(PlannerError::CycleDetected( - "Cycle detected during topological sort".to_string(), - )); - } - - Ok(result) - } - - /// Convert generated steps to AddStepRequest for database insertion. - pub fn steps_to_requests( - &self, - chain: &GeneratedChain, - step_id_map: &HashMap<String, Uuid>, - ) -> Vec<AddStepRequest> { - chain - .steps - .iter() - .map(|step| { - let depends_on: Vec<Uuid> = step - .depends_on - .iter() - .filter_map(|name| step_id_map.get(name)) - .copied() - .collect(); - - let task_plan = step - .contract_template - .as_ref() - .and_then(|t| t.tasks.first()) - .map(|t| t.plan.clone()); - - AddStepRequest { - name: step.name.clone(), - description: Some(step.description.clone()), - step_type: Some(step.step_type.clone()), - contract_type: step.contract_template.as_ref().map(|t| t.contract_type.clone()), - initial_phase: Some("plan".to_string()), - task_plan, - phases: step.contract_template.as_ref().map(|t| t.phases.clone()), - depends_on: Some(depends_on), - parallel_group: None, - requirement_ids: Some(step.requirement_ids.clone()), - acceptance_criteria_ids: None, - verifier_config: None, - editor_x: None, - editor_y: None, - } - }) - .collect() - } - - /// Compute editor positions for steps based on DAG layout. - pub fn compute_editor_positions( - &self, - chain: &GeneratedChain, - ) -> HashMap<String, (f64, f64)> { - let depths = self.get_step_depths(chain); - let mut positions: HashMap<String, (f64, f64)> = HashMap::new(); - - // Group by depth - let mut by_depth: HashMap<usize, Vec<&str>> = HashMap::new(); - for step in &chain.steps { - let depth = depths.get(&step.name).copied().unwrap_or(0); - by_depth.entry(depth).or_default().push(&step.name); - } - - // Compute positions: x based on depth, y based on index within depth - let x_spacing = 250.0; - let y_spacing = 150.0; - - for (depth, steps) in &by_depth { - let x = (*depth as f64) * x_spacing + 100.0; - for (i, name) in steps.iter().enumerate() { - let y = (i as f64) * y_spacing + 100.0; - positions.insert(name.to_string(), (x, y)); - } - } - - positions - } - - /// Get depth of each step in the DAG. - fn get_step_depths(&self, chain: &GeneratedChain) -> HashMap<String, usize> { - let mut depths: HashMap<String, usize> = HashMap::new(); - - // Build dependency map - let deps: HashMap<String, Vec<String>> = chain - .steps - .iter() - .map(|s| (s.name.clone(), s.depends_on.clone())) - .collect(); - - fn compute_depth( - name: &str, - deps: &HashMap<String, Vec<String>>, - depths: &mut HashMap<String, usize>, - ) -> usize { - if let Some(&d) = depths.get(name) { - return d; - } - - let depth = deps - .get(name) - .map(|dep_list| { - dep_list - .iter() - .map(|d| compute_depth(d, deps, depths) + 1) - .max() - .unwrap_or(0) - }) - .unwrap_or(0); - - depths.insert(name.to_string(), depth); - depth - } - - for step in &chain.steps { - compute_depth(&step.name, &deps, &mut depths); - } - - depths - } - - /// Build a replanning prompt that preserves completed steps. - pub fn build_replan_prompt( - &self, - directive: &Directive, - completed_steps: &[ChainStep], - failed_step: Option<&ChainStep>, - reason: &str, - ) -> String { - let completed_summary: Vec<String> = completed_steps - .iter() - .map(|s| format!("- {} ({}): completed", s.name, s.step_type)) - .collect(); - - let failed_summary = failed_step - .map(|s| format!("Failed step: {} - {}", s.name, s.description.as_deref().unwrap_or(""))) - .unwrap_or_default(); - - format!( - r#"You are a software architect replanning an execution chain. - -## Original Goal -{goal} - -## Completed Steps (preserve these) -{completed} - -## Failure Information -{failed} -Reason: {reason} - -## Instructions -Generate a new execution plan that: -1. Preserves all completed work -2. Addresses the failure -3. Continues toward the original goal - -Use the same JSON format as before. Do not include already completed steps."#, - goal = directive.goal, - completed = completed_summary.join("\n"), - failed = failed_summary, - reason = reason, - ) - } -} - -/// Generate a concise title from a goal string. -fn generate_title_from_goal(goal: &str) -> String { - // Take the first sentence or first 80 chars - let title = if let Some(pos) = goal.find('.') { - if pos < 100 { - &goal[..pos] - } else { - &goal[..80.min(goal.len())] - } - } else if goal.len() > 80 { - &goal[..80] - } else { - goal - }; - title.trim().to_string() -} - -/// Extract JSON from LLM response (handles markdown code blocks). -fn extract_json_from_response(response: &str) -> Result<String, PlannerError> { - // Try to find JSON in code block - if let Some(start) = response.find("```json") { - let json_start = start + 7; - if let Some(end) = response[json_start..].find("```") { - return Ok(response[json_start..json_start + end].trim().to_string()); - } - } - - // Try to find JSON in generic code block - if let Some(start) = response.find("```") { - let block_start = start + 3; - // Skip language identifier if present - let json_start = response[block_start..] - .find('\n') - .map(|i| block_start + i + 1) - .unwrap_or(block_start); - if let Some(end) = response[json_start..].find("```") { - return Ok(response[json_start..json_start + end].trim().to_string()); - } - } - - // Try to parse the whole thing as JSON - if response.trim().starts_with('{') { - return Ok(response.trim().to_string()); - } - - Err(PlannerError::InvalidPlan( - "Could not extract JSON from response".to_string(), - )) -} - -#[cfg(test)] -mod tests { - use super::*; - - fn make_test_chain() -> GeneratedChain { - GeneratedChain { - name: "test-chain".to_string(), - description: "Test chain".to_string(), - steps: vec![ - GeneratedStep { - name: "step-a".to_string(), - step_type: "research".to_string(), - description: "Research step".to_string(), - depends_on: vec![], - requirement_ids: vec!["REQ-001".to_string()], - contract_template: None, - }, - GeneratedStep { - name: "step-b".to_string(), - step_type: "implement".to_string(), - description: "Implementation step".to_string(), - depends_on: vec!["step-a".to_string()], - requirement_ids: vec!["REQ-002".to_string()], - contract_template: None, - }, - GeneratedStep { - name: "step-c".to_string(), - step_type: "test".to_string(), - description: "Test step".to_string(), - depends_on: vec!["step-b".to_string()], - requirement_ids: vec!["REQ-001".to_string()], - contract_template: None, - }, - ], - } - } - - #[test] - fn test_validate_chain_valid() { - let planner = ChainPlanner::without_pool(); - let chain = make_test_chain(); - assert!(planner.validate_chain(&chain).is_ok()); - } - - #[test] - fn test_validate_chain_invalid_dependency() { - let planner = ChainPlanner::without_pool(); - let mut chain = make_test_chain(); - chain.steps[1].depends_on = vec!["nonexistent".to_string()]; - - let result = planner.validate_chain(&chain); - assert!(matches!(result, Err(PlannerError::InvalidDependency { .. }))); - } - - #[test] - fn test_validate_chain_cycle() { - let planner = ChainPlanner::without_pool(); - let chain = GeneratedChain { - name: "cyclic".to_string(), - description: "Has cycle".to_string(), - steps: vec![ - GeneratedStep { - name: "a".to_string(), - step_type: "research".to_string(), - description: "A".to_string(), - depends_on: vec!["c".to_string()], - requirement_ids: vec![], - contract_template: None, - }, - GeneratedStep { - name: "b".to_string(), - step_type: "implement".to_string(), - description: "B".to_string(), - depends_on: vec!["a".to_string()], - requirement_ids: vec![], - contract_template: None, - }, - GeneratedStep { - name: "c".to_string(), - step_type: "test".to_string(), - description: "C".to_string(), - depends_on: vec!["b".to_string()], - requirement_ids: vec![], - contract_template: None, - }, - ], - }; - - let result = planner.validate_chain(&chain); - assert!(matches!(result, Err(PlannerError::CycleDetected(_)))); - } - - #[test] - fn test_topological_sort() { - let planner = ChainPlanner::without_pool(); - let chain = make_test_chain(); - let order = planner.topological_sort(&chain).unwrap(); - - // step-a must come before step-b, step-b before step-c - let pos_a = order.iter().position(|&n| n == "step-a").unwrap(); - let pos_b = order.iter().position(|&n| n == "step-b").unwrap(); - let pos_c = order.iter().position(|&n| n == "step-c").unwrap(); - - assert!(pos_a < pos_b); - assert!(pos_b < pos_c); - } - - #[test] - fn test_extract_json_from_code_block() { - let response = r#" -Here's the plan: - -```json -{"name": "test"} -``` - -That's it! -"#; - let json = extract_json_from_response(response).unwrap(); - assert_eq!(json, r#"{"name": "test"}"#); - } - - #[test] - fn test_extract_json_raw() { - let response = r#"{"name": "test"}"#; - let json = extract_json_from_response(response).unwrap(); - assert_eq!(json, r#"{"name": "test"}"#); - } -} diff --git a/makima/src/orchestration/verifier.rs b/makima/src/orchestration/verifier.rs deleted file mode 100644 index bc29e47..0000000 --- a/makima/src/orchestration/verifier.rs +++ /dev/null @@ -1,833 +0,0 @@ -//! Verification system for directive step evaluation. -//! -//! Provides tiered verification: programmatic verifiers run first, -//! then LLM evaluation if programmatic checks pass. Composite scoring -//! combines results with configurable weights. - -use async_trait::async_trait; -use serde::{Deserialize, Serialize}; -use serde_json::Value as JsonValue; -use std::path::Path; -use thiserror::Error; -use uuid::Uuid; - -/// Confidence level based on composite score and thresholds. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum ConfidenceLevel { - /// High confidence (score >= green threshold) - Green, - /// Medium confidence (score >= yellow threshold but < green) - Yellow, - /// Low confidence (score < yellow threshold) - Red, -} - -impl ConfidenceLevel { - /// Compute confidence level from score and thresholds. - pub fn from_score(score: f64, green_threshold: f64, yellow_threshold: f64) -> Self { - if score >= green_threshold { - ConfidenceLevel::Green - } else if score >= yellow_threshold { - ConfidenceLevel::Yellow - } else { - ConfidenceLevel::Red - } - } - - /// Convert to string for database storage. - pub fn as_str(&self) -> &'static str { - match self { - ConfidenceLevel::Green => "green", - ConfidenceLevel::Yellow => "yellow", - ConfidenceLevel::Red => "red", - } - } -} - -impl std::fmt::Display for ConfidenceLevel { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -/// Type of verifier for categorization. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum VerifierType { - /// Run test suite (npm test, cargo test, pytest, etc.) - TestRunner, - /// Run linter (eslint, clippy, ruff, etc.) - Linter, - /// Run type checker (tsc, mypy, etc.) - TypeChecker, - /// Run build command (npm build, cargo build, etc.) - Build, - /// Custom command verifier - Custom, - /// LLM-based semantic evaluation - Llm, -} - -impl VerifierType { - pub fn as_str(&self) -> &'static str { - match self { - VerifierType::TestRunner => "test_runner", - VerifierType::Linter => "linter", - VerifierType::TypeChecker => "type_checker", - VerifierType::Build => "build", - VerifierType::Custom => "custom", - VerifierType::Llm => "llm", - } - } -} - -/// Result of a single verifier run. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct VerifierResult { - /// Name of the verifier - pub name: String, - /// Type of verifier - pub verifier_type: VerifierType, - /// Whether the verification passed - pub passed: bool, - /// Score from 0.0 to 1.0 (1.0 = perfect, 0.0 = complete failure) - pub score: f64, - /// Weight for composite scoring (default 1.0 for programmatic, 2.0 for LLM) - pub weight: f64, - /// Whether this verifier is required (failure = automatic red confidence) - pub required: bool, - /// Human-readable output/feedback - pub output: String, - /// Structured details (test counts, lint errors, etc.) - pub details: Option<JsonValue>, - /// Execution time in milliseconds - pub duration_ms: u64, -} - -impl VerifierResult { - /// Create a passed result with full score. - pub fn passed(name: String, verifier_type: VerifierType, output: String) -> Self { - Self { - name, - verifier_type, - passed: true, - score: 1.0, - weight: 1.0, - required: false, - output, - details: None, - duration_ms: 0, - } - } - - /// Create a failed result with zero score. - pub fn failed(name: String, verifier_type: VerifierType, output: String) -> Self { - Self { - name, - verifier_type, - passed: false, - score: 0.0, - weight: 1.0, - required: false, - output, - details: None, - duration_ms: 0, - } - } - - /// Set the weight for this result. - pub fn with_weight(mut self, weight: f64) -> Self { - self.weight = weight; - self - } - - /// Mark this verifier as required. - pub fn as_required(mut self) -> Self { - self.required = true; - self - } - - /// Set the score explicitly. - pub fn with_score(mut self, score: f64) -> Self { - self.score = score.clamp(0.0, 1.0); - self - } - - /// Set structured details. - pub fn with_details(mut self, details: JsonValue) -> Self { - self.details = Some(details); - self - } - - /// Set execution duration. - pub fn with_duration(mut self, duration_ms: u64) -> Self { - self.duration_ms = duration_ms; - self - } -} - -/// Composite evaluation result combining multiple verifier results. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct EvaluationResult { - /// Unique ID for this evaluation - pub id: Uuid, - /// Step ID being evaluated - pub step_id: Uuid, - /// Whether all required verifiers passed - pub passed: bool, - /// Weighted composite score (0.0-1.0) - pub composite_score: f64, - /// Confidence level derived from score - pub confidence_level: ConfidenceLevel, - /// Individual verifier results - pub verifier_results: Vec<VerifierResult>, - /// Summary feedback for the step - pub summary: String, - /// Rework instructions if failed - pub rework_instructions: Option<String>, - /// Total evaluation duration in milliseconds - pub total_duration_ms: u64, -} - -impl EvaluationResult { - /// Create a new evaluation result from verifier results. - pub fn from_verifiers( - step_id: Uuid, - results: Vec<VerifierResult>, - green_threshold: f64, - yellow_threshold: f64, - ) -> Self { - let id = Uuid::new_v4(); - - // Check if any required verifier failed - let any_required_failed = results.iter().any(|r| r.required && !r.passed); - - // Calculate weighted composite score - let (total_weighted_score, total_weight) = - results - .iter() - .fold((0.0, 0.0), |(score_acc, weight_acc), r| { - (score_acc + r.score * r.weight, weight_acc + r.weight) - }); - - let composite_score = if total_weight > 0.0 { - total_weighted_score / total_weight - } else { - 0.0 - }; - - // Override confidence to red if any required verifier failed - let confidence_level = if any_required_failed { - ConfidenceLevel::Red - } else { - ConfidenceLevel::from_score(composite_score, green_threshold, yellow_threshold) - }; - - let passed = !any_required_failed && confidence_level != ConfidenceLevel::Red; - - // Generate summary - let passed_count = results.iter().filter(|r| r.passed).count(); - let total_count = results.len(); - let summary = format!( - "{}/{} verifiers passed, composite score: {:.2}, confidence: {}", - passed_count, total_count, composite_score, confidence_level - ); - - // Generate rework instructions if failed - let rework_instructions = if !passed { - let failed_verifiers: Vec<&str> = results - .iter() - .filter(|r| !r.passed) - .map(|r| r.name.as_str()) - .collect(); - Some(format!( - "Fix issues identified by: {}", - failed_verifiers.join(", ") - )) - } else { - None - }; - - let total_duration_ms = results.iter().map(|r| r.duration_ms).sum(); - - Self { - id, - step_id, - passed, - composite_score, - confidence_level, - verifier_results: results, - summary, - rework_instructions, - total_duration_ms, - } - } -} - -/// Error type for verification operations. -#[derive(Error, Debug)] -pub enum VerifierError { - #[error("Command execution failed: {0}")] - CommandFailed(String), - - #[error("Command timed out after {0}ms")] - Timeout(u64), - - #[error("Working directory not found: {0}")] - WorkingDirectoryNotFound(String), - - #[error("Verifier not configured: {0}")] - NotConfigured(String), - - #[error("Parse error: {0}")] - ParseError(String), - - #[error("LLM error: {0}")] - LlmError(String), - - #[error("IO error: {0}")] - Io(#[from] std::io::Error), -} - -/// Information about a verifier for serialization and database storage. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct VerifierInfo { - pub name: String, - pub verifier_type: String, - pub command: String, - pub working_directory: Option<String>, - pub detect_files: Vec<String>, - pub weight: f64, - pub required: bool, -} - -/// Verifier trait for pluggable verification implementations. -#[async_trait] -pub trait Verifier: Send + Sync { - /// Get the name of this verifier. - fn name(&self) -> &str; - - /// Get the type of this verifier. - fn verifier_type(&self) -> VerifierType; - - /// Get serializable info about this verifier. - fn info(&self) -> VerifierInfo; - - /// Check if this verifier is applicable to the given repository. - async fn is_applicable(&self, repo_path: &Path) -> bool; - - /// Run verification and return result. - async fn verify(&self, repo_path: &Path, context: &VerificationContext) - -> Result<VerifierResult, VerifierError>; -} - -/// Context provided to verifiers during execution. -#[derive(Debug, Clone)] -pub struct VerificationContext { - /// Step ID being verified - pub step_id: Uuid, - /// Contract ID if step has been instantiated - pub contract_id: Option<Uuid>, - /// Files that were modified in this step - pub modified_files: Vec<String>, - /// Step description for LLM context - pub step_description: String, - /// Acceptance criteria for LLM evaluation - pub acceptance_criteria: Vec<String>, - /// Additional context from directive - pub directive_context: String, -} - -/// Command-based verifier for running shell commands. -pub struct CommandVerifier { - name: String, - verifier_type: VerifierType, - command: String, - #[allow(dead_code)] - working_dir: Option<String>, - #[allow(dead_code)] - timeout_ms: u64, - required: bool, - /// Files/patterns that indicate this verifier is applicable - applicable_patterns: Vec<String>, -} - -impl CommandVerifier { - /// Create a new command verifier. - pub fn new( - name: impl Into<String>, - verifier_type: VerifierType, - command: impl Into<String>, - ) -> Self { - Self { - name: name.into(), - verifier_type, - command: command.into(), - working_dir: None, - timeout_ms: 300_000, // 5 minute default - required: false, - applicable_patterns: Vec::new(), - } - } - - /// Set the working directory. - #[allow(dead_code)] - pub fn with_working_dir(mut self, dir: impl Into<String>) -> Self { - self.working_dir = Some(dir.into()); - self - } - - /// Set the timeout in milliseconds. - #[allow(dead_code)] - pub fn with_timeout(mut self, timeout_ms: u64) -> Self { - self.timeout_ms = timeout_ms; - self - } - - /// Mark as required verifier. - pub fn as_required(mut self) -> Self { - self.required = true; - self - } - - /// Add applicability patterns (files that must exist). - pub fn with_patterns(mut self, patterns: Vec<String>) -> Self { - self.applicable_patterns = patterns; - self - } -} - -#[async_trait] -impl Verifier for CommandVerifier { - fn name(&self) -> &str { - &self.name - } - - fn verifier_type(&self) -> VerifierType { - self.verifier_type.clone() - } - - fn info(&self) -> VerifierInfo { - VerifierInfo { - name: self.name.clone(), - verifier_type: self.verifier_type.as_str().to_string(), - command: self.command.clone(), - working_directory: self.working_dir.clone(), - detect_files: self.applicable_patterns.clone(), - weight: 1.0, - required: self.required, - } - } - - async fn is_applicable(&self, repo_path: &Path) -> bool { - if self.applicable_patterns.is_empty() { - return true; - } - - for pattern in &self.applicable_patterns { - let check_path = repo_path.join(pattern); - if check_path.exists() { - return true; - } - } - false - } - - async fn verify( - &self, - repo_path: &Path, - _context: &VerificationContext, - ) -> Result<VerifierResult, VerifierError> { - let start = std::time::Instant::now(); - - let work_dir = self - .working_dir - .as_ref() - .map(|d| repo_path.join(d)) - .unwrap_or_else(|| repo_path.to_path_buf()); - - if !work_dir.exists() { - return Err(VerifierError::WorkingDirectoryNotFound( - work_dir.display().to_string(), - )); - } - - // Parse command into program and args - let parts: Vec<&str> = self.command.split_whitespace().collect(); - if parts.is_empty() { - return Err(VerifierError::CommandFailed( - "Empty command".to_string(), - )); - } - - let program = parts[0]; - let args = &parts[1..]; - - // Execute command - let output = tokio::process::Command::new(program) - .args(args) - .current_dir(&work_dir) - .output() - .await?; - - let duration_ms = start.elapsed().as_millis() as u64; - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - let combined_output = format!("{}\n{}", stdout, stderr); - - let passed = output.status.success(); - let score = if passed { 1.0 } else { 0.0 }; - - let mut result = VerifierResult { - name: self.name.clone(), - verifier_type: self.verifier_type.clone(), - passed, - score, - weight: 1.0, - required: self.required, - output: combined_output, - details: Some(serde_json::json!({ - "exit_code": output.status.code(), - "command": self.command, - "working_dir": work_dir.display().to_string(), - })), - duration_ms, - }; - - // Try to extract more detailed scoring from output - result = self.enhance_result(result, &stdout); - - Ok(result) - } -} - -impl CommandVerifier { - /// Enhance result with parsed details from output. - fn enhance_result(&self, mut result: VerifierResult, stdout: &str) -> VerifierResult { - match self.verifier_type { - VerifierType::TestRunner => { - // Try to parse test counts from common formats - if let Some((passed, failed, total)) = parse_test_output(stdout) { - result.details = Some(serde_json::json!({ - "tests_passed": passed, - "tests_failed": failed, - "tests_total": total, - "command": self.command, - })); - if total > 0 { - result.score = passed as f64 / total as f64; - } - } - } - VerifierType::Linter => { - // Try to parse lint error counts - if let Some(error_count) = parse_lint_output(stdout) { - result.details = Some(serde_json::json!({ - "errors": error_count, - "command": self.command, - })); - // Score decreases with more errors (up to 10 errors = 0) - result.score = (1.0 - (error_count as f64 / 10.0)).max(0.0); - } - } - _ => {} - } - result - } -} - -/// Parse test output for common formats (Jest, pytest, cargo test). -fn parse_test_output(output: &str) -> Option<(u32, u32, u32)> { - // Jest format: "Tests: X passed, Y failed, Z total" - if let Some(caps) = regex::Regex::new(r"Tests:\s*(\d+)\s*passed,\s*(\d+)\s*failed,\s*(\d+)\s*total") - .ok()? - .captures(output) - { - let passed: u32 = caps.get(1)?.as_str().parse().ok()?; - let failed: u32 = caps.get(2)?.as_str().parse().ok()?; - let total: u32 = caps.get(3)?.as_str().parse().ok()?; - return Some((passed, failed, total)); - } - - // pytest format: "X passed, Y failed" - if let Some(caps) = regex::Regex::new(r"(\d+)\s*passed(?:,\s*(\d+)\s*failed)?") - .ok()? - .captures(output) - { - let passed: u32 = caps.get(1)?.as_str().parse().ok()?; - let failed: u32 = caps.get(2).map(|m| m.as_str().parse().ok()).flatten().unwrap_or(0); - let total = passed + failed; - return Some((passed, failed, total)); - } - - // cargo test format: "test result: ok. X passed; Y failed;" - if let Some(caps) = regex::Regex::new(r"test result:.*?(\d+)\s*passed;\s*(\d+)\s*failed") - .ok()? - .captures(output) - { - let passed: u32 = caps.get(1)?.as_str().parse().ok()?; - let failed: u32 = caps.get(2)?.as_str().parse().ok()?; - let total = passed + failed; - return Some((passed, failed, total)); - } - - None -} - -/// Parse lint output for error counts. -fn parse_lint_output(output: &str) -> Option<u32> { - // ESLint format: "X problems (Y errors, Z warnings)" - if let Some(caps) = regex::Regex::new(r"(\d+)\s*problems?\s*\((\d+)\s*errors?") - .ok()? - .captures(output) - { - return caps.get(2)?.as_str().parse().ok(); - } - - // Clippy format: "warning: X warnings emitted" - if let Some(caps) = regex::Regex::new(r"warning:\s*(\d+)\s*warnings?\s*emitted") - .ok()? - .captures(output) - { - return caps.get(1)?.as_str().parse().ok(); - } - - None -} - -/// Auto-detect applicable verifiers for a repository. -pub async fn auto_detect_verifiers(repo_path: &Path) -> Vec<Box<dyn Verifier>> { - let mut verifiers: Vec<Box<dyn Verifier>> = Vec::new(); - - // Check for package.json (Node.js) - let package_json = repo_path.join("package.json"); - if package_json.exists() { - if let Ok(content) = tokio::fs::read_to_string(&package_json).await { - if let Ok(pkg) = serde_json::from_str::<serde_json::Value>(&content) { - if let Some(scripts) = pkg.get("scripts").and_then(|s| s.as_object()) { - // Test runner - if scripts.contains_key("test") { - verifiers.push(Box::new( - CommandVerifier::new("npm-test", VerifierType::TestRunner, "npm test") - .with_patterns(vec!["package.json".to_string()]) - .as_required(), - )); - } - - // Linter - if scripts.contains_key("lint") { - verifiers.push(Box::new( - CommandVerifier::new("npm-lint", VerifierType::Linter, "npm run lint") - .with_patterns(vec!["package.json".to_string()]), - )); - } - - // Build - if scripts.contains_key("build") { - verifiers.push(Box::new( - CommandVerifier::new("npm-build", VerifierType::Build, "npm run build") - .with_patterns(vec!["package.json".to_string()]) - .as_required(), - )); - } - - // Type check (for TypeScript projects) - if scripts.contains_key("typecheck") || scripts.contains_key("type-check") { - let cmd = if scripts.contains_key("typecheck") { - "npm run typecheck" - } else { - "npm run type-check" - }; - verifiers.push(Box::new( - CommandVerifier::new("npm-typecheck", VerifierType::TypeChecker, cmd) - .with_patterns(vec!["tsconfig.json".to_string()]), - )); - } - } - } - } - } - - // Check for Cargo.toml (Rust) - let cargo_toml = repo_path.join("Cargo.toml"); - if cargo_toml.exists() { - verifiers.push(Box::new( - CommandVerifier::new("cargo-test", VerifierType::TestRunner, "cargo test") - .with_patterns(vec!["Cargo.toml".to_string()]) - .as_required(), - )); - - verifiers.push(Box::new( - CommandVerifier::new("cargo-clippy", VerifierType::Linter, "cargo clippy -- -D warnings") - .with_patterns(vec!["Cargo.toml".to_string()]), - )); - - verifiers.push(Box::new( - CommandVerifier::new("cargo-build", VerifierType::Build, "cargo build") - .with_patterns(vec!["Cargo.toml".to_string()]) - .as_required(), - )); - } - - // Check for pyproject.toml or setup.py (Python) - let pyproject = repo_path.join("pyproject.toml"); - let setup_py = repo_path.join("setup.py"); - if pyproject.exists() || setup_py.exists() { - verifiers.push(Box::new( - CommandVerifier::new("pytest", VerifierType::TestRunner, "pytest") - .with_patterns(vec![ - "pyproject.toml".to_string(), - "setup.py".to_string(), - ]) - .as_required(), - )); - - verifiers.push(Box::new( - CommandVerifier::new("ruff", VerifierType::Linter, "ruff check .") - .with_patterns(vec!["pyproject.toml".to_string()]), - )); - } - - verifiers -} - -/// Composite evaluator that runs multiple verifiers and combines results. -pub struct CompositeEvaluator { - verifiers: Vec<Box<dyn Verifier>>, - green_threshold: f64, - yellow_threshold: f64, -} - -impl CompositeEvaluator { - /// Create a new composite evaluator with default thresholds. - pub fn new(verifiers: Vec<Box<dyn Verifier>>) -> Self { - Self { - verifiers, - green_threshold: 0.8, - yellow_threshold: 0.5, - } - } - - /// Set confidence thresholds. - pub fn with_thresholds(mut self, green: f64, yellow: f64) -> Self { - self.green_threshold = green; - self.yellow_threshold = yellow; - self - } - - /// Add a verifier. - pub fn add_verifier(mut self, verifier: Box<dyn Verifier>) -> Self { - self.verifiers.push(verifier); - self - } - - /// Run all applicable verifiers and return composite result. - pub async fn evaluate( - &self, - repo_path: &Path, - context: &VerificationContext, - ) -> EvaluationResult { - let mut results = Vec::new(); - - for verifier in &self.verifiers { - if !verifier.is_applicable(repo_path).await { - continue; - } - - match verifier.verify(repo_path, context).await { - Ok(result) => results.push(result), - Err(e) => { - // Convert error to failed result - results.push(VerifierResult::failed( - verifier.name().to_string(), - verifier.verifier_type(), - format!("Verifier error: {}", e), - )); - } - } - } - - EvaluationResult::from_verifiers( - context.step_id, - results, - self.green_threshold, - self.yellow_threshold, - ) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_confidence_level_from_score() { - assert_eq!( - ConfidenceLevel::from_score(0.9, 0.8, 0.5), - ConfidenceLevel::Green - ); - assert_eq!( - ConfidenceLevel::from_score(0.8, 0.8, 0.5), - ConfidenceLevel::Green - ); - assert_eq!( - ConfidenceLevel::from_score(0.6, 0.8, 0.5), - ConfidenceLevel::Yellow - ); - assert_eq!( - ConfidenceLevel::from_score(0.5, 0.8, 0.5), - ConfidenceLevel::Yellow - ); - assert_eq!( - ConfidenceLevel::from_score(0.4, 0.8, 0.5), - ConfidenceLevel::Red - ); - } - - #[test] - fn test_evaluation_result_composite_score() { - let results = vec![ - VerifierResult::passed("test1".into(), VerifierType::TestRunner, "OK".into()) - .with_weight(1.0), - VerifierResult::failed("test2".into(), VerifierType::Linter, "Failed".into()) - .with_weight(1.0), - ]; - - let eval = EvaluationResult::from_verifiers(Uuid::new_v4(), results, 0.8, 0.5); - assert!((eval.composite_score - 0.5).abs() < 0.001); - assert_eq!(eval.confidence_level, ConfidenceLevel::Yellow); - } - - #[test] - fn test_required_verifier_override() { - let results = vec![ - VerifierResult::passed("test1".into(), VerifierType::TestRunner, "OK".into()), - VerifierResult::failed("build".into(), VerifierType::Build, "Failed".into()) - .as_required(), - ]; - - let eval = EvaluationResult::from_verifiers(Uuid::new_v4(), results, 0.8, 0.5); - // Even though composite score is 0.5, required failure overrides to red - assert_eq!(eval.confidence_level, ConfidenceLevel::Red); - assert!(!eval.passed); - } - - #[test] - fn test_parse_test_output_jest() { - let output = "Tests: 10 passed, 2 failed, 12 total"; - let (passed, failed, total) = parse_test_output(output).unwrap(); - assert_eq!(passed, 10); - assert_eq!(failed, 2); - assert_eq!(total, 12); - } - - #[test] - fn test_parse_test_output_cargo() { - let output = "test result: ok. 25 passed; 0 failed;"; - let (passed, failed, total) = parse_test_output(output).unwrap(); - assert_eq!(passed, 25); - assert_eq!(failed, 0); - assert_eq!(total, 25); - } -} |
