makima/src/llm/contract_evaluator.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

//! Contract Evaluator - LLM-based evaluation of completed contracts against directive.
//!
//! This module will be reimplemented as part of the directive verification engine.
//! See the orchestration module for the new evaluation system.
//!
//! The new evaluation system will provide:
//! - Tiered verification (programmatic verifiers first, then LLM evaluation)
//! - Composite confidence scoring (weighted combination of results)
//! - Pluggable verifier interface (test runner, linter, build, type checker)
//! - Proper integration with the directive chain steps

use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use uuid::Uuid;

// use crate::db::models::{Contract, DirectiveAcceptanceCriterion, DirectiveRequirement};

/// Result of contract evaluation
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ContractEvaluationResult {
    /// Whether the contract passed evaluation
    pub passed: bool,
    /// Overall score from 0.0 to 1.0
    pub overall_score: f64,
    /// Results for each acceptance criterion
    pub criteria_results: Vec<EvaluationCriterionResultLegacy>,
    /// Summary feedback from the evaluator
    pub summary_feedback: String,
    /// Instructions for rework if failed
    pub rework_instructions: Option<String>,
}

/// Per-criterion evaluation result (legacy - kept for compatibility)
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct EvaluationCriterionResultLegacy {
    pub criterion_id: String,
    pub criterion_text: String,
    pub passed: bool,
    /// Score (0.0-1.0)
    pub score: f64,
    pub feedback: String,
    /// Evidence supporting the evaluation
    pub evidence: Vec<String>,
}

/// File content for evaluation context
#[derive(Debug, Clone)]
pub struct FileContent {
    pub path: String,
    pub content: String,
}

/// Contract evaluator for LLM-based assessment.
///
/// NOTE: This is a stub implementation. The full evaluation system will be
/// implemented as part of the orchestration/verifier module.
pub struct ContractEvaluator {
    _pool: PgPool,
}

impl ContractEvaluator {
    /// Create a new contract evaluator.
    pub fn new(pool: PgPool) -> Self {
        Self { _pool: pool }
    }

    /// Evaluate a contract - stub implementation.
    ///
    /// This will be reimplemented in the orchestration module with:
    /// - Programmatic verification (tests, lint, build)
    /// - LLM evaluation
    /// - Composite scoring
    pub async fn evaluate_contract(
        &self,
        _contract_id: Uuid,
    ) -> Result<ContractEvaluationResult, ContractEvaluatorError> {
        // TODO: Implement using the new directive evaluation system
        Err(ContractEvaluatorError::NotImplemented(
            "Contract evaluator will be reimplemented with directive system".to_string(),
        ))
    }
}

/// Error types for contract evaluation.
#[derive(Debug, thiserror::Error)]
pub enum ContractEvaluatorError {
    #[error("Database error: {0}")]
    Database(#[from] sqlx::Error),

    #[error("LLM error: {0}")]
    Llm(String),

    #[error("Not implemented: {0}")]
    NotImplemented(String),
}