summaryrefslogtreecommitdiff
path: root/makima/src/orchestration/verifier.rs
diff options
context:
space:
mode:
Diffstat (limited to 'makima/src/orchestration/verifier.rs')
-rw-r--r--makima/src/orchestration/verifier.rs806
1 files changed, 806 insertions, 0 deletions
diff --git a/makima/src/orchestration/verifier.rs b/makima/src/orchestration/verifier.rs
new file mode 100644
index 0000000..e98da50
--- /dev/null
+++ b/makima/src/orchestration/verifier.rs
@@ -0,0 +1,806 @@
+//! Verification system for directive step evaluation.
+//!
+//! Provides tiered verification: programmatic verifiers run first,
+//! then LLM evaluation if programmatic checks pass. Composite scoring
+//! combines results with configurable weights.
+
+use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
+use serde_json::Value as JsonValue;
+use std::path::Path;
+use thiserror::Error;
+use uuid::Uuid;
+
+/// Confidence level based on composite score and thresholds.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ConfidenceLevel {
+ /// High confidence (score >= green threshold)
+ Green,
+ /// Medium confidence (score >= yellow threshold but < green)
+ Yellow,
+ /// Low confidence (score < yellow threshold)
+ Red,
+}
+
+impl ConfidenceLevel {
+ /// Compute confidence level from score and thresholds.
+ pub fn from_score(score: f64, green_threshold: f64, yellow_threshold: f64) -> Self {
+ if score >= green_threshold {
+ ConfidenceLevel::Green
+ } else if score >= yellow_threshold {
+ ConfidenceLevel::Yellow
+ } else {
+ ConfidenceLevel::Red
+ }
+ }
+
+ /// Convert to string for database storage.
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ ConfidenceLevel::Green => "green",
+ ConfidenceLevel::Yellow => "yellow",
+ ConfidenceLevel::Red => "red",
+ }
+ }
+}
+
+impl std::fmt::Display for ConfidenceLevel {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.as_str())
+ }
+}
+
+/// Type of verifier for categorization.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum VerifierType {
+ /// Run test suite (npm test, cargo test, pytest, etc.)
+ TestRunner,
+ /// Run linter (eslint, clippy, ruff, etc.)
+ Linter,
+ /// Run type checker (tsc, mypy, etc.)
+ TypeChecker,
+ /// Run build command (npm build, cargo build, etc.)
+ Build,
+ /// Custom command verifier
+ Custom,
+ /// LLM-based semantic evaluation
+ Llm,
+}
+
+impl VerifierType {
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ VerifierType::TestRunner => "test_runner",
+ VerifierType::Linter => "linter",
+ VerifierType::TypeChecker => "type_checker",
+ VerifierType::Build => "build",
+ VerifierType::Custom => "custom",
+ VerifierType::Llm => "llm",
+ }
+ }
+}
+
+/// Result of a single verifier run.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VerifierResult {
+ /// Name of the verifier
+ pub name: String,
+ /// Type of verifier
+ pub verifier_type: VerifierType,
+ /// Whether the verification passed
+ pub passed: bool,
+ /// Score from 0.0 to 1.0 (1.0 = perfect, 0.0 = complete failure)
+ pub score: f64,
+ /// Weight for composite scoring (default 1.0 for programmatic, 2.0 for LLM)
+ pub weight: f64,
+ /// Whether this verifier is required (failure = automatic red confidence)
+ pub required: bool,
+ /// Human-readable output/feedback
+ pub output: String,
+ /// Structured details (test counts, lint errors, etc.)
+ pub details: Option<JsonValue>,
+ /// Execution time in milliseconds
+ pub duration_ms: u64,
+}
+
+impl VerifierResult {
+ /// Create a passed result with full score.
+ pub fn passed(name: String, verifier_type: VerifierType, output: String) -> Self {
+ Self {
+ name,
+ verifier_type,
+ passed: true,
+ score: 1.0,
+ weight: 1.0,
+ required: false,
+ output,
+ details: None,
+ duration_ms: 0,
+ }
+ }
+
+ /// Create a failed result with zero score.
+ pub fn failed(name: String, verifier_type: VerifierType, output: String) -> Self {
+ Self {
+ name,
+ verifier_type,
+ passed: false,
+ score: 0.0,
+ weight: 1.0,
+ required: false,
+ output,
+ details: None,
+ duration_ms: 0,
+ }
+ }
+
+ /// Set the weight for this result.
+ pub fn with_weight(mut self, weight: f64) -> Self {
+ self.weight = weight;
+ self
+ }
+
+ /// Mark this verifier as required.
+ pub fn as_required(mut self) -> Self {
+ self.required = true;
+ self
+ }
+
+ /// Set the score explicitly.
+ pub fn with_score(mut self, score: f64) -> Self {
+ self.score = score.clamp(0.0, 1.0);
+ self
+ }
+
+ /// Set structured details.
+ pub fn with_details(mut self, details: JsonValue) -> Self {
+ self.details = Some(details);
+ self
+ }
+
+ /// Set execution duration.
+ pub fn with_duration(mut self, duration_ms: u64) -> Self {
+ self.duration_ms = duration_ms;
+ self
+ }
+}
+
+/// Composite evaluation result combining multiple verifier results.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EvaluationResult {
+ /// Unique ID for this evaluation
+ pub id: Uuid,
+ /// Step ID being evaluated
+ pub step_id: Uuid,
+ /// Whether all required verifiers passed
+ pub passed: bool,
+ /// Weighted composite score (0.0-1.0)
+ pub composite_score: f64,
+ /// Confidence level derived from score
+ pub confidence_level: ConfidenceLevel,
+ /// Individual verifier results
+ pub verifier_results: Vec<VerifierResult>,
+ /// Summary feedback for the step
+ pub summary: String,
+ /// Rework instructions if failed
+ pub rework_instructions: Option<String>,
+ /// Total evaluation duration in milliseconds
+ pub total_duration_ms: u64,
+}
+
+impl EvaluationResult {
+ /// Create a new evaluation result from verifier results.
+ pub fn from_verifiers(
+ step_id: Uuid,
+ results: Vec<VerifierResult>,
+ green_threshold: f64,
+ yellow_threshold: f64,
+ ) -> Self {
+ let id = Uuid::new_v4();
+
+ // Check if any required verifier failed
+ let any_required_failed = results.iter().any(|r| r.required && !r.passed);
+
+ // Calculate weighted composite score
+ let (total_weighted_score, total_weight) =
+ results
+ .iter()
+ .fold((0.0, 0.0), |(score_acc, weight_acc), r| {
+ (score_acc + r.score * r.weight, weight_acc + r.weight)
+ });
+
+ let composite_score = if total_weight > 0.0 {
+ total_weighted_score / total_weight
+ } else {
+ 0.0
+ };
+
+ // Override confidence to red if any required verifier failed
+ let confidence_level = if any_required_failed {
+ ConfidenceLevel::Red
+ } else {
+ ConfidenceLevel::from_score(composite_score, green_threshold, yellow_threshold)
+ };
+
+ let passed = !any_required_failed && confidence_level != ConfidenceLevel::Red;
+
+ // Generate summary
+ let passed_count = results.iter().filter(|r| r.passed).count();
+ let total_count = results.len();
+ let summary = format!(
+ "{}/{} verifiers passed, composite score: {:.2}, confidence: {}",
+ passed_count, total_count, composite_score, confidence_level
+ );
+
+ // Generate rework instructions if failed
+ let rework_instructions = if !passed {
+ let failed_verifiers: Vec<&str> = results
+ .iter()
+ .filter(|r| !r.passed)
+ .map(|r| r.name.as_str())
+ .collect();
+ Some(format!(
+ "Fix issues identified by: {}",
+ failed_verifiers.join(", ")
+ ))
+ } else {
+ None
+ };
+
+ let total_duration_ms = results.iter().map(|r| r.duration_ms).sum();
+
+ Self {
+ id,
+ step_id,
+ passed,
+ composite_score,
+ confidence_level,
+ verifier_results: results,
+ summary,
+ rework_instructions,
+ total_duration_ms,
+ }
+ }
+}
+
+/// Error type for verification operations.
+#[derive(Error, Debug)]
+pub enum VerifierError {
+ #[error("Command execution failed: {0}")]
+ CommandFailed(String),
+
+ #[error("Command timed out after {0}ms")]
+ Timeout(u64),
+
+ #[error("Working directory not found: {0}")]
+ WorkingDirectoryNotFound(String),
+
+ #[error("Verifier not configured: {0}")]
+ NotConfigured(String),
+
+ #[error("Parse error: {0}")]
+ ParseError(String),
+
+ #[error("LLM error: {0}")]
+ LlmError(String),
+
+ #[error("IO error: {0}")]
+ Io(#[from] std::io::Error),
+}
+
+/// Verifier trait for pluggable verification implementations.
+#[async_trait]
+pub trait Verifier: Send + Sync {
+ /// Get the name of this verifier.
+ fn name(&self) -> &str;
+
+ /// Get the type of this verifier.
+ fn verifier_type(&self) -> VerifierType;
+
+ /// Check if this verifier is applicable to the given repository.
+ async fn is_applicable(&self, repo_path: &Path) -> bool;
+
+ /// Run verification and return result.
+ async fn verify(&self, repo_path: &Path, context: &VerificationContext)
+ -> Result<VerifierResult, VerifierError>;
+}
+
+/// Context provided to verifiers during execution.
+#[derive(Debug, Clone)]
+pub struct VerificationContext {
+ /// Step ID being verified
+ pub step_id: Uuid,
+ /// Contract ID if step has been instantiated
+ pub contract_id: Option<Uuid>,
+ /// Files that were modified in this step
+ pub modified_files: Vec<String>,
+ /// Step description for LLM context
+ pub step_description: String,
+ /// Acceptance criteria for LLM evaluation
+ pub acceptance_criteria: Vec<String>,
+ /// Additional context from directive
+ pub directive_context: String,
+}
+
+/// Command-based verifier for running shell commands.
+pub struct CommandVerifier {
+ name: String,
+ verifier_type: VerifierType,
+ command: String,
+ #[allow(dead_code)]
+ working_dir: Option<String>,
+ #[allow(dead_code)]
+ timeout_ms: u64,
+ required: bool,
+ /// Files/patterns that indicate this verifier is applicable
+ applicable_patterns: Vec<String>,
+}
+
+impl CommandVerifier {
+ /// Create a new command verifier.
+ pub fn new(
+ name: impl Into<String>,
+ verifier_type: VerifierType,
+ command: impl Into<String>,
+ ) -> Self {
+ Self {
+ name: name.into(),
+ verifier_type,
+ command: command.into(),
+ working_dir: None,
+ timeout_ms: 300_000, // 5 minute default
+ required: false,
+ applicable_patterns: Vec::new(),
+ }
+ }
+
+ /// Set the working directory.
+ #[allow(dead_code)]
+ pub fn with_working_dir(mut self, dir: impl Into<String>) -> Self {
+ self.working_dir = Some(dir.into());
+ self
+ }
+
+ /// Set the timeout in milliseconds.
+ #[allow(dead_code)]
+ pub fn with_timeout(mut self, timeout_ms: u64) -> Self {
+ self.timeout_ms = timeout_ms;
+ self
+ }
+
+ /// Mark as required verifier.
+ pub fn as_required(mut self) -> Self {
+ self.required = true;
+ self
+ }
+
+ /// Add applicability patterns (files that must exist).
+ pub fn with_patterns(mut self, patterns: Vec<String>) -> Self {
+ self.applicable_patterns = patterns;
+ self
+ }
+}
+
+#[async_trait]
+impl Verifier for CommandVerifier {
+ fn name(&self) -> &str {
+ &self.name
+ }
+
+ fn verifier_type(&self) -> VerifierType {
+ self.verifier_type.clone()
+ }
+
+ async fn is_applicable(&self, repo_path: &Path) -> bool {
+ if self.applicable_patterns.is_empty() {
+ return true;
+ }
+
+ for pattern in &self.applicable_patterns {
+ let check_path = repo_path.join(pattern);
+ if check_path.exists() {
+ return true;
+ }
+ }
+ false
+ }
+
+ async fn verify(
+ &self,
+ repo_path: &Path,
+ _context: &VerificationContext,
+ ) -> Result<VerifierResult, VerifierError> {
+ let start = std::time::Instant::now();
+
+ let work_dir = self
+ .working_dir
+ .as_ref()
+ .map(|d| repo_path.join(d))
+ .unwrap_or_else(|| repo_path.to_path_buf());
+
+ if !work_dir.exists() {
+ return Err(VerifierError::WorkingDirectoryNotFound(
+ work_dir.display().to_string(),
+ ));
+ }
+
+ // Parse command into program and args
+ let parts: Vec<&str> = self.command.split_whitespace().collect();
+ if parts.is_empty() {
+ return Err(VerifierError::CommandFailed(
+ "Empty command".to_string(),
+ ));
+ }
+
+ let program = parts[0];
+ let args = &parts[1..];
+
+ // Execute command
+ let output = tokio::process::Command::new(program)
+ .args(args)
+ .current_dir(&work_dir)
+ .output()
+ .await?;
+
+ let duration_ms = start.elapsed().as_millis() as u64;
+ let stdout = String::from_utf8_lossy(&output.stdout);
+ let stderr = String::from_utf8_lossy(&output.stderr);
+ let combined_output = format!("{}\n{}", stdout, stderr);
+
+ let passed = output.status.success();
+ let score = if passed { 1.0 } else { 0.0 };
+
+ let mut result = VerifierResult {
+ name: self.name.clone(),
+ verifier_type: self.verifier_type.clone(),
+ passed,
+ score,
+ weight: 1.0,
+ required: self.required,
+ output: combined_output,
+ details: Some(serde_json::json!({
+ "exit_code": output.status.code(),
+ "command": self.command,
+ "working_dir": work_dir.display().to_string(),
+ })),
+ duration_ms,
+ };
+
+ // Try to extract more detailed scoring from output
+ result = self.enhance_result(result, &stdout);
+
+ Ok(result)
+ }
+}
+
+impl CommandVerifier {
+ /// Enhance result with parsed details from output.
+ fn enhance_result(&self, mut result: VerifierResult, stdout: &str) -> VerifierResult {
+ match self.verifier_type {
+ VerifierType::TestRunner => {
+ // Try to parse test counts from common formats
+ if let Some((passed, failed, total)) = parse_test_output(stdout) {
+ result.details = Some(serde_json::json!({
+ "tests_passed": passed,
+ "tests_failed": failed,
+ "tests_total": total,
+ "command": self.command,
+ }));
+ if total > 0 {
+ result.score = passed as f64 / total as f64;
+ }
+ }
+ }
+ VerifierType::Linter => {
+ // Try to parse lint error counts
+ if let Some(error_count) = parse_lint_output(stdout) {
+ result.details = Some(serde_json::json!({
+ "errors": error_count,
+ "command": self.command,
+ }));
+ // Score decreases with more errors (up to 10 errors = 0)
+ result.score = (1.0 - (error_count as f64 / 10.0)).max(0.0);
+ }
+ }
+ _ => {}
+ }
+ result
+ }
+}
+
+/// Parse test output for common formats (Jest, pytest, cargo test).
+fn parse_test_output(output: &str) -> Option<(u32, u32, u32)> {
+ // Jest format: "Tests: X passed, Y failed, Z total"
+ if let Some(caps) = regex::Regex::new(r"Tests:\s*(\d+)\s*passed,\s*(\d+)\s*failed,\s*(\d+)\s*total")
+ .ok()?
+ .captures(output)
+ {
+ let passed: u32 = caps.get(1)?.as_str().parse().ok()?;
+ let failed: u32 = caps.get(2)?.as_str().parse().ok()?;
+ let total: u32 = caps.get(3)?.as_str().parse().ok()?;
+ return Some((passed, failed, total));
+ }
+
+ // pytest format: "X passed, Y failed"
+ if let Some(caps) = regex::Regex::new(r"(\d+)\s*passed(?:,\s*(\d+)\s*failed)?")
+ .ok()?
+ .captures(output)
+ {
+ let passed: u32 = caps.get(1)?.as_str().parse().ok()?;
+ let failed: u32 = caps.get(2).map(|m| m.as_str().parse().ok()).flatten().unwrap_or(0);
+ let total = passed + failed;
+ return Some((passed, failed, total));
+ }
+
+ // cargo test format: "test result: ok. X passed; Y failed;"
+ if let Some(caps) = regex::Regex::new(r"test result:.*?(\d+)\s*passed;\s*(\d+)\s*failed")
+ .ok()?
+ .captures(output)
+ {
+ let passed: u32 = caps.get(1)?.as_str().parse().ok()?;
+ let failed: u32 = caps.get(2)?.as_str().parse().ok()?;
+ let total = passed + failed;
+ return Some((passed, failed, total));
+ }
+
+ None
+}
+
+/// Parse lint output for error counts.
+fn parse_lint_output(output: &str) -> Option<u32> {
+ // ESLint format: "X problems (Y errors, Z warnings)"
+ if let Some(caps) = regex::Regex::new(r"(\d+)\s*problems?\s*\((\d+)\s*errors?")
+ .ok()?
+ .captures(output)
+ {
+ return caps.get(2)?.as_str().parse().ok();
+ }
+
+ // Clippy format: "warning: X warnings emitted"
+ if let Some(caps) = regex::Regex::new(r"warning:\s*(\d+)\s*warnings?\s*emitted")
+ .ok()?
+ .captures(output)
+ {
+ return caps.get(1)?.as_str().parse().ok();
+ }
+
+ None
+}
+
+/// Auto-detect applicable verifiers for a repository.
+pub async fn auto_detect_verifiers(repo_path: &Path) -> Vec<Box<dyn Verifier>> {
+ let mut verifiers: Vec<Box<dyn Verifier>> = Vec::new();
+
+ // Check for package.json (Node.js)
+ let package_json = repo_path.join("package.json");
+ if package_json.exists() {
+ if let Ok(content) = tokio::fs::read_to_string(&package_json).await {
+ if let Ok(pkg) = serde_json::from_str::<serde_json::Value>(&content) {
+ if let Some(scripts) = pkg.get("scripts").and_then(|s| s.as_object()) {
+ // Test runner
+ if scripts.contains_key("test") {
+ verifiers.push(Box::new(
+ CommandVerifier::new("npm-test", VerifierType::TestRunner, "npm test")
+ .with_patterns(vec!["package.json".to_string()])
+ .as_required(),
+ ));
+ }
+
+ // Linter
+ if scripts.contains_key("lint") {
+ verifiers.push(Box::new(
+ CommandVerifier::new("npm-lint", VerifierType::Linter, "npm run lint")
+ .with_patterns(vec!["package.json".to_string()]),
+ ));
+ }
+
+ // Build
+ if scripts.contains_key("build") {
+ verifiers.push(Box::new(
+ CommandVerifier::new("npm-build", VerifierType::Build, "npm run build")
+ .with_patterns(vec!["package.json".to_string()])
+ .as_required(),
+ ));
+ }
+
+ // Type check (for TypeScript projects)
+ if scripts.contains_key("typecheck") || scripts.contains_key("type-check") {
+ let cmd = if scripts.contains_key("typecheck") {
+ "npm run typecheck"
+ } else {
+ "npm run type-check"
+ };
+ verifiers.push(Box::new(
+ CommandVerifier::new("npm-typecheck", VerifierType::TypeChecker, cmd)
+ .with_patterns(vec!["tsconfig.json".to_string()]),
+ ));
+ }
+ }
+ }
+ }
+ }
+
+ // Check for Cargo.toml (Rust)
+ let cargo_toml = repo_path.join("Cargo.toml");
+ if cargo_toml.exists() {
+ verifiers.push(Box::new(
+ CommandVerifier::new("cargo-test", VerifierType::TestRunner, "cargo test")
+ .with_patterns(vec!["Cargo.toml".to_string()])
+ .as_required(),
+ ));
+
+ verifiers.push(Box::new(
+ CommandVerifier::new("cargo-clippy", VerifierType::Linter, "cargo clippy -- -D warnings")
+ .with_patterns(vec!["Cargo.toml".to_string()]),
+ ));
+
+ verifiers.push(Box::new(
+ CommandVerifier::new("cargo-build", VerifierType::Build, "cargo build")
+ .with_patterns(vec!["Cargo.toml".to_string()])
+ .as_required(),
+ ));
+ }
+
+ // Check for pyproject.toml or setup.py (Python)
+ let pyproject = repo_path.join("pyproject.toml");
+ let setup_py = repo_path.join("setup.py");
+ if pyproject.exists() || setup_py.exists() {
+ verifiers.push(Box::new(
+ CommandVerifier::new("pytest", VerifierType::TestRunner, "pytest")
+ .with_patterns(vec![
+ "pyproject.toml".to_string(),
+ "setup.py".to_string(),
+ ])
+ .as_required(),
+ ));
+
+ verifiers.push(Box::new(
+ CommandVerifier::new("ruff", VerifierType::Linter, "ruff check .")
+ .with_patterns(vec!["pyproject.toml".to_string()]),
+ ));
+ }
+
+ verifiers
+}
+
+/// Composite evaluator that runs multiple verifiers and combines results.
+pub struct CompositeEvaluator {
+ verifiers: Vec<Box<dyn Verifier>>,
+ green_threshold: f64,
+ yellow_threshold: f64,
+}
+
+impl CompositeEvaluator {
+ /// Create a new composite evaluator with default thresholds.
+ pub fn new(verifiers: Vec<Box<dyn Verifier>>) -> Self {
+ Self {
+ verifiers,
+ green_threshold: 0.8,
+ yellow_threshold: 0.5,
+ }
+ }
+
+ /// Set confidence thresholds.
+ pub fn with_thresholds(mut self, green: f64, yellow: f64) -> Self {
+ self.green_threshold = green;
+ self.yellow_threshold = yellow;
+ self
+ }
+
+ /// Add a verifier.
+ pub fn add_verifier(mut self, verifier: Box<dyn Verifier>) -> Self {
+ self.verifiers.push(verifier);
+ self
+ }
+
+ /// Run all applicable verifiers and return composite result.
+ pub async fn evaluate(
+ &self,
+ repo_path: &Path,
+ context: &VerificationContext,
+ ) -> EvaluationResult {
+ let mut results = Vec::new();
+
+ for verifier in &self.verifiers {
+ if !verifier.is_applicable(repo_path).await {
+ continue;
+ }
+
+ match verifier.verify(repo_path, context).await {
+ Ok(result) => results.push(result),
+ Err(e) => {
+ // Convert error to failed result
+ results.push(VerifierResult::failed(
+ verifier.name().to_string(),
+ verifier.verifier_type(),
+ format!("Verifier error: {}", e),
+ ));
+ }
+ }
+ }
+
+ EvaluationResult::from_verifiers(
+ context.step_id,
+ results,
+ self.green_threshold,
+ self.yellow_threshold,
+ )
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_confidence_level_from_score() {
+ assert_eq!(
+ ConfidenceLevel::from_score(0.9, 0.8, 0.5),
+ ConfidenceLevel::Green
+ );
+ assert_eq!(
+ ConfidenceLevel::from_score(0.8, 0.8, 0.5),
+ ConfidenceLevel::Green
+ );
+ assert_eq!(
+ ConfidenceLevel::from_score(0.6, 0.8, 0.5),
+ ConfidenceLevel::Yellow
+ );
+ assert_eq!(
+ ConfidenceLevel::from_score(0.5, 0.8, 0.5),
+ ConfidenceLevel::Yellow
+ );
+ assert_eq!(
+ ConfidenceLevel::from_score(0.4, 0.8, 0.5),
+ ConfidenceLevel::Red
+ );
+ }
+
+ #[test]
+ fn test_evaluation_result_composite_score() {
+ let results = vec![
+ VerifierResult::passed("test1".into(), VerifierType::TestRunner, "OK".into())
+ .with_weight(1.0),
+ VerifierResult::failed("test2".into(), VerifierType::Linter, "Failed".into())
+ .with_weight(1.0),
+ ];
+
+ let eval = EvaluationResult::from_verifiers(Uuid::new_v4(), results, 0.8, 0.5);
+ assert!((eval.composite_score - 0.5).abs() < 0.001);
+ assert_eq!(eval.confidence_level, ConfidenceLevel::Yellow);
+ }
+
+ #[test]
+ fn test_required_verifier_override() {
+ let results = vec![
+ VerifierResult::passed("test1".into(), VerifierType::TestRunner, "OK".into()),
+ VerifierResult::failed("build".into(), VerifierType::Build, "Failed".into())
+ .as_required(),
+ ];
+
+ let eval = EvaluationResult::from_verifiers(Uuid::new_v4(), results, 0.8, 0.5);
+ // Even though composite score is 0.5, required failure overrides to red
+ assert_eq!(eval.confidence_level, ConfidenceLevel::Red);
+ assert!(!eval.passed);
+ }
+
+ #[test]
+ fn test_parse_test_output_jest() {
+ let output = "Tests: 10 passed, 2 failed, 12 total";
+ let (passed, failed, total) = parse_test_output(output).unwrap();
+ assert_eq!(passed, 10);
+ assert_eq!(failed, 2);
+ assert_eq!(total, 12);
+ }
+
+ #[test]
+ fn test_parse_test_output_cargo() {
+ let output = "test result: ok. 25 passed; 0 failed;";
+ let (passed, failed, total) = parse_test_output(output).unwrap();
+ assert_eq!(passed, 25);
+ assert_eq!(failed, 0);
+ assert_eq!(total, 25);
+ }
+}