1 files changed, 806 insertions, 0 deletions
diff --git a/makima/src/orchestration/verifier.rs b/makima/src/orchestration/verifier.rs
new file mode 100644
index 0000000..e98da50
--- /dev/null
+++ b/makima/src/orchestration/verifier.rs
@@ -0,0 +1,806 @@
+//! Verification system for directive step evaluation.
+//!
+//! Provides tiered verification: programmatic verifiers run first,
+//! then LLM evaluation if programmatic checks pass. Composite scoring
+//! combines results with configurable weights.
+
+use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
+use serde_json::Value as JsonValue;
+use std::path::Path;
+use thiserror::Error;
+use uuid::Uuid;
+
+/// Confidence level based on composite score and thresholds.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ConfidenceLevel {
+    /// High confidence (score >= green threshold)
+    Green,
+    /// Medium confidence (score >= yellow threshold but < green)
+    Yellow,
+    /// Low confidence (score < yellow threshold)
+    Red,
+}
+
+impl ConfidenceLevel {
+    /// Compute confidence level from score and thresholds.
+    pub fn from_score(score: f64, green_threshold: f64, yellow_threshold: f64) -> Self {
+        if score >= green_threshold {
+            ConfidenceLevel::Green
+        } else if score >= yellow_threshold {
+            ConfidenceLevel::Yellow
+        } else {
+            ConfidenceLevel::Red
+        }
+    }
+
+    /// Convert to string for database storage.
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ConfidenceLevel::Green => "green",
+            ConfidenceLevel::Yellow => "yellow",
+            ConfidenceLevel::Red => "red",
+        }
+    }
+}
+
+impl std::fmt::Display for ConfidenceLevel {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+/// Type of verifier for categorization.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum VerifierType {
+    /// Run test suite (npm test, cargo test, pytest, etc.)
+    TestRunner,
+    /// Run linter (eslint, clippy, ruff, etc.)
+    Linter,
+    /// Run type checker (tsc, mypy, etc.)
+    TypeChecker,
+    /// Run build command (npm build, cargo build, etc.)
+    Build,
+    /// Custom command verifier
+    Custom,
+    /// LLM-based semantic evaluation
+    Llm,
+}
+
+impl VerifierType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            VerifierType::TestRunner => "test_runner",
+            VerifierType::Linter => "linter",
+            VerifierType::TypeChecker => "type_checker",
+            VerifierType::Build => "build",
+            VerifierType::Custom => "custom",
+            VerifierType::Llm => "llm",
+        }
+    }
+}
+
+/// Result of a single verifier run.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VerifierResult {
+    /// Name of the verifier
+    pub name: String,
+    /// Type of verifier
+    pub verifier_type: VerifierType,
+    /// Whether the verification passed
+    pub passed: bool,
+    /// Score from 0.0 to 1.0 (1.0 = perfect, 0.0 = complete failure)
+    pub score: f64,
+    /// Weight for composite scoring (default 1.0 for programmatic, 2.0 for LLM)
+    pub weight: f64,
+    /// Whether this verifier is required (failure = automatic red confidence)
+    pub required: bool,
+    /// Human-readable output/feedback
+    pub output: String,
+    /// Structured details (test counts, lint errors, etc.)
+    pub details: Option<JsonValue>,
+    /// Execution time in milliseconds
+    pub duration_ms: u64,
+}
+
+impl VerifierResult {
+    /// Create a passed result with full score.
+    pub fn passed(name: String, verifier_type: VerifierType, output: String) -> Self {
+        Self {
+            name,
+            verifier_type,
+            passed: true,
+            score: 1.0,
+            weight: 1.0,
+            required: false,
+            output,
+            details: None,
+            duration_ms: 0,
+        }
+    }
+
+    /// Create a failed result with zero score.
+    pub fn failed(name: String, verifier_type: VerifierType, output: String) -> Self {
+        Self {
+            name,
+            verifier_type,
+            passed: false,
+            score: 0.0,
+            weight: 1.0,
+            required: false,
+            output,
+            details: None,
+            duration_ms: 0,
+        }
+    }
+
+    /// Set the weight for this result.
+    pub fn with_weight(mut self, weight: f64) -> Self {
+        self.weight = weight;
+        self
+    }
+
+    /// Mark this verifier as required.
+    pub fn as_required(mut self) -> Self {
+        self.required = true;
+        self
+    }
+
+    /// Set the score explicitly.
+    pub fn with_score(mut self, score: f64) -> Self {
+        self.score = score.clamp(0.0, 1.0);
+        self
+    }
+
+    /// Set structured details.
+    pub fn with_details(mut self, details: JsonValue) -> Self {
+        self.details = Some(details);
+        self
+    }
+
+    /// Set execution duration.
+    pub fn with_duration(mut self, duration_ms: u64) -> Self {
+        self.duration_ms = duration_ms;
+        self
+    }
+}
+
+/// Composite evaluation result combining multiple verifier results.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EvaluationResult {
+    /// Unique ID for this evaluation
+    pub id: Uuid,
+    /// Step ID being evaluated
+    pub step_id: Uuid,
+    /// Whether all required verifiers passed
+    pub passed: bool,
+    /// Weighted composite score (0.0-1.0)
+    pub composite_score: f64,
+    /// Confidence level derived from score
+    pub confidence_level: ConfidenceLevel,
+    /// Individual verifier results
+    pub verifier_results: Vec<VerifierResult>,
+    /// Summary feedback for the step
+    pub summary: String,
+    /// Rework instructions if failed
+    pub rework_instructions: Option<String>,
+    /// Total evaluation duration in milliseconds
+    pub total_duration_ms: u64,
+}
+
+impl EvaluationResult {
+    /// Create a new evaluation result from verifier results.
+    pub fn from_verifiers(
+        step_id: Uuid,
+        results: Vec<VerifierResult>,
+        green_threshold: f64,
+        yellow_threshold: f64,
+    ) -> Self {
+        let id = Uuid::new_v4();
+
+        // Check if any required verifier failed
+        let any_required_failed = results.iter().any(|r| r.required && !r.passed);
+
+        // Calculate weighted composite score
+        let (total_weighted_score, total_weight) =
+            results
+                .iter()
+                .fold((0.0, 0.0), |(score_acc, weight_acc), r| {
+                    (score_acc + r.score * r.weight, weight_acc + r.weight)
+                });
+
+        let composite_score = if total_weight > 0.0 {
+            total_weighted_score / total_weight
+        } else {
+            0.0
+        };
+
+        // Override confidence to red if any required verifier failed
+        let confidence_level = if any_required_failed {
+            ConfidenceLevel::Red
+        } else {
+            ConfidenceLevel::from_score(composite_score, green_threshold, yellow_threshold)
+        };
+
+        let passed = !any_required_failed && confidence_level != ConfidenceLevel::Red;
+
+        // Generate summary
+        let passed_count = results.iter().filter(|r| r.passed).count();
+        let total_count = results.len();
+        let summary = format!(
+            "{}/{} verifiers passed, composite score: {:.2}, confidence: {}",
+            passed_count, total_count, composite_score, confidence_level
+        );
+
+        // Generate rework instructions if failed
+        let rework_instructions = if !passed {
+            let failed_verifiers: Vec<&str> = results
+                .iter()
+                .filter(|r| !r.passed)
+                .map(|r| r.name.as_str())
+                .collect();
+            Some(format!(
+                "Fix issues identified by: {}",
+                failed_verifiers.join(", ")
+            ))
+        } else {
+            None
+        };
+
+        let total_duration_ms = results.iter().map(|r| r.duration_ms).sum();
+
+        Self {
+            id,
+            step_id,
+            passed,
+            composite_score,
+            confidence_level,
+            verifier_results: results,
+            summary,
+            rework_instructions,
+            total_duration_ms,
+        }
+    }
+}
+
+/// Error type for verification operations.
+#[derive(Error, Debug)]
+pub enum VerifierError {
+    #[error("Command execution failed: {0}")]
+    CommandFailed(String),
+
+    #[error("Command timed out after {0}ms")]
+    Timeout(u64),
+
+    #[error("Working directory not found: {0}")]
+    WorkingDirectoryNotFound(String),
+
+    #[error("Verifier not configured: {0}")]
+    NotConfigured(String),
+
+    #[error("Parse error: {0}")]
+    ParseError(String),
+
+    #[error("LLM error: {0}")]
+    LlmError(String),
+
+    #[error("IO error: {0}")]
+    Io(#[from] std::io::Error),
+}
+
+/// Verifier trait for pluggable verification implementations.
+#[async_trait]
+pub trait Verifier: Send + Sync {
+    /// Get the name of this verifier.
+    fn name(&self) -> &str;
+
+    /// Get the type of this verifier.
+    fn verifier_type(&self) -> VerifierType;
+
+    /// Check if this verifier is applicable to the given repository.
+    async fn is_applicable(&self, repo_path: &Path) -> bool;
+
+    /// Run verification and return result.
+    async fn verify(&self, repo_path: &Path, context: &VerificationContext)
+        -> Result<VerifierResult, VerifierError>;
+}
+
+/// Context provided to verifiers during execution.
+#[derive(Debug, Clone)]
+pub struct VerificationContext {
+    /// Step ID being verified
+    pub step_id: Uuid,
+    /// Contract ID if step has been instantiated
+    pub contract_id: Option<Uuid>,
+    /// Files that were modified in this step
+    pub modified_files: Vec<String>,
+    /// Step description for LLM context
+    pub step_description: String,
+    /// Acceptance criteria for LLM evaluation
+    pub acceptance_criteria: Vec<String>,
+    /// Additional context from directive
+    pub directive_context: String,
+}
+
+/// Command-based verifier for running shell commands.
+pub struct CommandVerifier {
+    name: String,
+    verifier_type: VerifierType,
+    command: String,
+    #[allow(dead_code)]
+    working_dir: Option<String>,
+    #[allow(dead_code)]
+    timeout_ms: u64,
+    required: bool,
+    /// Files/patterns that indicate this verifier is applicable
+    applicable_patterns: Vec<String>,
+}
+
+impl CommandVerifier {
+    /// Create a new command verifier.
+    pub fn new(
+        name: impl Into<String>,
+        verifier_type: VerifierType,
+        command: impl Into<String>,
+    ) -> Self {
+        Self {
+            name: name.into(),
+            verifier_type,
+            command: command.into(),
+            working_dir: None,
+            timeout_ms: 300_000, // 5 minute default
+            required: false,
+            applicable_patterns: Vec::new(),
+        }
+    }
+
+    /// Set the working directory.
+    #[allow(dead_code)]
+    pub fn with_working_dir(mut self, dir: impl Into<String>) -> Self {
+        self.working_dir = Some(dir.into());
+        self
+    }
+
+    /// Set the timeout in milliseconds.
+    #[allow(dead_code)]
+    pub fn with_timeout(mut self, timeout_ms: u64) -> Self {
+        self.timeout_ms = timeout_ms;
+        self
+    }
+
+    /// Mark as required verifier.
+    pub fn as_required(mut self) -> Self {
+        self.required = true;
+        self
+    }
+
+    /// Add applicability patterns (files that must exist).
+    pub fn with_patterns(mut self, patterns: Vec<String>) -> Self {
+        self.applicable_patterns = patterns;
+        self
+    }
+}
+
+#[async_trait]
+impl Verifier for CommandVerifier {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn verifier_type(&self) -> VerifierType {
+        self.verifier_type.clone()
+    }
+
+    async fn is_applicable(&self, repo_path: &Path) -> bool {
+        if self.applicable_patterns.is_empty() {
+            return true;
+        }
+
+        for pattern in &self.applicable_patterns {
+            let check_path = repo_path.join(pattern);
+            if check_path.exists() {
+                return true;
+            }
+        }
+        false
+    }
+
+    async fn verify(
+        &self,
+        repo_path: &Path,
+        _context: &VerificationContext,
+    ) -> Result<VerifierResult, VerifierError> {
+        let start = std::time::Instant::now();
+
+        let work_dir = self
+            .working_dir
+            .as_ref()
+            .map(|d| repo_path.join(d))
+            .unwrap_or_else(|| repo_path.to_path_buf());
+
+        if !work_dir.exists() {
+            return Err(VerifierError::WorkingDirectoryNotFound(
+                work_dir.display().to_string(),
+            ));
+        }
+
+        // Parse command into program and args
+        let parts: Vec<&str> = self.command.split_whitespace().collect();
+        if parts.is_empty() {
+            return Err(VerifierError::CommandFailed(
+                "Empty command".to_string(),
+            ));
+        }
+
+        let program = parts[0];
+        let args = &parts[1..];
+
+        // Execute command
+        let output = tokio::process::Command::new(program)
+            .args(args)
+            .current_dir(&work_dir)
+            .output()
+            .await?;
+
+        let duration_ms = start.elapsed().as_millis() as u64;
+        let stdout = String::from_utf8_lossy(&output.stdout);
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        let combined_output = format!("{}\n{}", stdout, stderr);
+
+        let passed = output.status.success();
+        let score = if passed { 1.0 } else { 0.0 };
+
+        let mut result = VerifierResult {
+            name: self.name.clone(),
+            verifier_type: self.verifier_type.clone(),
+            passed,
+            score,
+            weight: 1.0,
+            required: self.required,
+            output: combined_output,
+            details: Some(serde_json::json!({
+                "exit_code": output.status.code(),
+                "command": self.command,
+                "working_dir": work_dir.display().to_string(),
+            })),
+            duration_ms,
+        };
+
+        // Try to extract more detailed scoring from output
+        result = self.enhance_result(result, &stdout);
+
+        Ok(result)
+    }
+}
+
+impl CommandVerifier {
+    /// Enhance result with parsed details from output.
+    fn enhance_result(&self, mut result: VerifierResult, stdout: &str) -> VerifierResult {
+        match self.verifier_type {
+            VerifierType::TestRunner => {
+                // Try to parse test counts from common formats
+                if let Some((passed, failed, total)) = parse_test_output(stdout) {
+                    result.details = Some(serde_json::json!({
+                        "tests_passed": passed,
+                        "tests_failed": failed,
+                        "tests_total": total,
+                        "command": self.command,
+                    }));
+                    if total > 0 {
+                        result.score = passed as f64 / total as f64;
+                    }
+                }
+            }
+            VerifierType::Linter => {
+                // Try to parse lint error counts
+                if let Some(error_count) = parse_lint_output(stdout) {
+                    result.details = Some(serde_json::json!({
+                        "errors": error_count,
+                        "command": self.command,
+                    }));
+                    // Score decreases with more errors (up to 10 errors = 0)
+                    result.score = (1.0 - (error_count as f64 / 10.0)).max(0.0);
+                }
+            }
+            _ => {}
+        }
+        result
+    }
+}
+
+/// Parse test output for common formats (Jest, pytest, cargo test).
+fn parse_test_output(output: &str) -> Option<(u32, u32, u32)> {
+    // Jest format: "Tests: X passed, Y failed, Z total"
+    if let Some(caps) = regex::Regex::new(r"Tests:\s*(\d+)\s*passed,\s*(\d+)\s*failed,\s*(\d+)\s*total")
+        .ok()?
+        .captures(output)
+    {
+        let passed: u32 = caps.get(1)?.as_str().parse().ok()?;
+        let failed: u32 = caps.get(2)?.as_str().parse().ok()?;
+        let total: u32 = caps.get(3)?.as_str().parse().ok()?;
+        return Some((passed, failed, total));
+    }
+
+    // pytest format: "X passed, Y failed"
+    if let Some(caps) = regex::Regex::new(r"(\d+)\s*passed(?:,\s*(\d+)\s*failed)?")
+        .ok()?
+        .captures(output)
+    {
+        let passed: u32 = caps.get(1)?.as_str().parse().ok()?;
+        let failed: u32 = caps.get(2).map(|m| m.as_str().parse().ok()).flatten().unwrap_or(0);
+        let total = passed + failed;
+        return Some((passed, failed, total));
+    }
+
+    // cargo test format: "test result: ok. X passed; Y failed;"
+    if let Some(caps) = regex::Regex::new(r"test result:.*?(\d+)\s*passed;\s*(\d+)\s*failed")
+        .ok()?
+        .captures(output)
+    {
+        let passed: u32 = caps.get(1)?.as_str().parse().ok()?;
+        let failed: u32 = caps.get(2)?.as_str().parse().ok()?;
+        let total = passed + failed;
+        return Some((passed, failed, total));
+    }
+
+    None
+}
+
+/// Parse lint output for error counts.
+fn parse_lint_output(output: &str) -> Option<u32> {
+    // ESLint format: "X problems (Y errors, Z warnings)"
+    if let Some(caps) = regex::Regex::new(r"(\d+)\s*problems?\s*\((\d+)\s*errors?")
+        .ok()?
+        .captures(output)
+    {
+        return caps.get(2)?.as_str().parse().ok();
+    }
+
+    // Clippy format: "warning: X warnings emitted"
+    if let Some(caps) = regex::Regex::new(r"warning:\s*(\d+)\s*warnings?\s*emitted")
+        .ok()?
+        .captures(output)
+    {
+        return caps.get(1)?.as_str().parse().ok();
+    }
+
+    None
+}
+
+/// Auto-detect applicable verifiers for a repository.
+pub async fn auto_detect_verifiers(repo_path: &Path) -> Vec<Box<dyn Verifier>> {
+    let mut verifiers: Vec<Box<dyn Verifier>> = Vec::new();
+
+    // Check for package.json (Node.js)
+    let package_json = repo_path.join("package.json");
+    if package_json.exists() {
+        if let Ok(content) = tokio::fs::read_to_string(&package_json).await {
+            if let Ok(pkg) = serde_json::from_str::<serde_json::Value>(&content) {
+                if let Some(scripts) = pkg.get("scripts").and_then(|s| s.as_object()) {
+                    // Test runner
+                    if scripts.contains_key("test") {
+                        verifiers.push(Box::new(
+                            CommandVerifier::new("npm-test", VerifierType::TestRunner, "npm test")
+                                .with_patterns(vec!["package.json".to_string()])
+                                .as_required(),
+                        ));
+                    }
+
+                    // Linter
+                    if scripts.contains_key("lint") {
+                        verifiers.push(Box::new(
+                            CommandVerifier::new("npm-lint", VerifierType::Linter, "npm run lint")
+                                .with_patterns(vec!["package.json".to_string()]),
+                        ));
+                    }
+
+                    // Build
+                    if scripts.contains_key("build") {
+                        verifiers.push(Box::new(
+                            CommandVerifier::new("npm-build", VerifierType::Build, "npm run build")
+                                .with_patterns(vec!["package.json".to_string()])
+                                .as_required(),
+                        ));
+                    }
+
+                    // Type check (for TypeScript projects)
+                    if scripts.contains_key("typecheck") || scripts.contains_key("type-check") {
+                        let cmd = if scripts.contains_key("typecheck") {
+                            "npm run typecheck"
+                        } else {
+                            "npm run type-check"
+                        };
+                        verifiers.push(Box::new(
+                            CommandVerifier::new("npm-typecheck", VerifierType::TypeChecker, cmd)
+                                .with_patterns(vec!["tsconfig.json".to_string()]),
+                        ));
+                    }
+                }
+            }
+        }
+    }
+
+    // Check for Cargo.toml (Rust)
+    let cargo_toml = repo_path.join("Cargo.toml");
+    if cargo_toml.exists() {
+        verifiers.push(Box::new(
+            CommandVerifier::new("cargo-test", VerifierType::TestRunner, "cargo test")
+                .with_patterns(vec!["Cargo.toml".to_string()])
+                .as_required(),
+        ));
+
+        verifiers.push(Box::new(
+            CommandVerifier::new("cargo-clippy", VerifierType::Linter, "cargo clippy -- -D warnings")
+                .with_patterns(vec!["Cargo.toml".to_string()]),
+        ));
+
+        verifiers.push(Box::new(
+            CommandVerifier::new("cargo-build", VerifierType::Build, "cargo build")
+                .with_patterns(vec!["Cargo.toml".to_string()])
+                .as_required(),
+        ));
+    }
+
+    // Check for pyproject.toml or setup.py (Python)
+    let pyproject = repo_path.join("pyproject.toml");
+    let setup_py = repo_path.join("setup.py");
+    if pyproject.exists() || setup_py.exists() {
+        verifiers.push(Box::new(
+            CommandVerifier::new("pytest", VerifierType::TestRunner, "pytest")
+                .with_patterns(vec![
+                    "pyproject.toml".to_string(),
+                    "setup.py".to_string(),
+                ])
+                .as_required(),
+        ));
+
+        verifiers.push(Box::new(
+            CommandVerifier::new("ruff", VerifierType::Linter, "ruff check .")
+                .with_patterns(vec!["pyproject.toml".to_string()]),
+        ));
+    }
+
+    verifiers
+}
+
+/// Composite evaluator that runs multiple verifiers and combines results.
+pub struct CompositeEvaluator {
+    verifiers: Vec<Box<dyn Verifier>>,
+    green_threshold: f64,
+    yellow_threshold: f64,
+}
+
+impl CompositeEvaluator {
+    /// Create a new composite evaluator with default thresholds.
+    pub fn new(verifiers: Vec<Box<dyn Verifier>>) -> Self {
+        Self {
+            verifiers,
+            green_threshold: 0.8,
+            yellow_threshold: 0.5,
+        }
+    }
+
+    /// Set confidence thresholds.
+    pub fn with_thresholds(mut self, green: f64, yellow: f64) -> Self {
+        self.green_threshold = green;
+        self.yellow_threshold = yellow;
+        self
+    }
+
+    /// Add a verifier.
+    pub fn add_verifier(mut self, verifier: Box<dyn Verifier>) -> Self {
+        self.verifiers.push(verifier);
+        self
+    }
+
+    /// Run all applicable verifiers and return composite result.
+    pub async fn evaluate(
+        &self,
+        repo_path: &Path,
+        context: &VerificationContext,
+    ) -> EvaluationResult {
+        let mut results = Vec::new();
+
+        for verifier in &self.verifiers {
+            if !verifier.is_applicable(repo_path).await {
+                continue;
+            }
+
+            match verifier.verify(repo_path, context).await {
+                Ok(result) => results.push(result),
+                Err(e) => {
+                    // Convert error to failed result
+                    results.push(VerifierResult::failed(
+                        verifier.name().to_string(),
+                        verifier.verifier_type(),
+                        format!("Verifier error: {}", e),
+                    ));
+                }
+            }
+        }
+
+        EvaluationResult::from_verifiers(
+            context.step_id,
+            results,
+            self.green_threshold,
+            self.yellow_threshold,
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_confidence_level_from_score() {
+        assert_eq!(
+            ConfidenceLevel::from_score(0.9, 0.8, 0.5),
+            ConfidenceLevel::Green
+        );
+        assert_eq!(
+            ConfidenceLevel::from_score(0.8, 0.8, 0.5),
+            ConfidenceLevel::Green
+        );
+        assert_eq!(
+            ConfidenceLevel::from_score(0.6, 0.8, 0.5),
+            ConfidenceLevel::Yellow
+        );
+        assert_eq!(
+            ConfidenceLevel::from_score(0.5, 0.8, 0.5),
+            ConfidenceLevel::Yellow
+        );
+        assert_eq!(
+            ConfidenceLevel::from_score(0.4, 0.8, 0.5),
+            ConfidenceLevel::Red
+        );
+    }
+
+    #[test]
+    fn test_evaluation_result_composite_score() {
+        let results = vec![
+            VerifierResult::passed("test1".into(), VerifierType::TestRunner, "OK".into())
+                .with_weight(1.0),
+            VerifierResult::failed("test2".into(), VerifierType::Linter, "Failed".into())
+                .with_weight(1.0),
+        ];
+
+        let eval = EvaluationResult::from_verifiers(Uuid::new_v4(), results, 0.8, 0.5);
+        assert!((eval.composite_score - 0.5).abs() < 0.001);
+        assert_eq!(eval.confidence_level, ConfidenceLevel::Yellow);
+    }
+
+    #[test]
+    fn test_required_verifier_override() {
+        let results = vec![
+            VerifierResult::passed("test1".into(), VerifierType::TestRunner, "OK".into()),
+            VerifierResult::failed("build".into(), VerifierType::Build, "Failed".into())
+                .as_required(),
+        ];
+
+        let eval = EvaluationResult::from_verifiers(Uuid::new_v4(), results, 0.8, 0.5);
+        // Even though composite score is 0.5, required failure overrides to red
+        assert_eq!(eval.confidence_level, ConfidenceLevel::Red);
+        assert!(!eval.passed);
+    }
+
+    #[test]
+    fn test_parse_test_output_jest() {
+        let output = "Tests: 10 passed, 2 failed, 12 total";
+        let (passed, failed, total) = parse_test_output(output).unwrap();
+        assert_eq!(passed, 10);
+        assert_eq!(failed, 2);
+        assert_eq!(total, 12);
+    }
+
+    #[test]
+    fn test_parse_test_output_cargo() {
+        let output = "test result: ok. 25 passed; 0 failed;";
+        let (passed, failed, total) = parse_test_output(output).unwrap();
+        assert_eq!(passed, 25);
+        assert_eq!(failed, 0);
+        assert_eq!(total, 25);
+    }
+}