diff options
| author | soryu <soryu@soryu.co> | 2026-02-07 18:27:54 +0000 |
|---|---|---|
| committer | soryu <soryu@soryu.co> | 2026-02-07 18:27:54 +0000 |
| commit | 97e21c8296ec5f91912d56980ebf3b18a1ca3507 (patch) | |
| tree | 3650e2eb62ab5b387006563ce64139aa7688da5f | |
| parent | 8f757f561eeb397aaea70d7c10d41445cc5e50b5 (diff) | |
| download | soryu-97e21c8296ec5f91912d56980ebf3b18a1ca3507.tar.gz soryu-97e21c8296ec5f91912d56980ebf3b18a1ca3507.zip | |
Add directive monitor contracts
| -rw-r--r-- | makima/frontend/src/components/directives/DirectiveContractsTab.tsx | 131 | ||||
| -rw-r--r-- | makima/frontend/src/components/directives/DirectiveDetail.tsx | 393 | ||||
| -rw-r--r-- | makima/frontend/src/components/directives/StepDiagram.tsx | 152 | ||||
| -rw-r--r-- | makima/frontend/src/lib/api.ts | 14 | ||||
| -rw-r--r-- | makima/migrations/20260208000000_add_monitoring_contract_id.sql | 2 | ||||
| -rw-r--r-- | makima/src/bin/makima.rs | 14 | ||||
| -rw-r--r-- | makima/src/daemon/api/directive.rs | 26 | ||||
| -rw-r--r-- | makima/src/daemon/cli/directive.rs | 20 | ||||
| -rw-r--r-- | makima/src/daemon/cli/mod.rs | 6 | ||||
| -rw-r--r-- | makima/src/daemon/skills/directive.md | 20 | ||||
| -rw-r--r-- | makima/src/db/models.rs | 86 | ||||
| -rw-r--r-- | makima/src/db/repository.rs | 214 | ||||
| -rw-r--r-- | makima/src/orchestration/directive.rs | 562 | ||||
| -rw-r--r-- | makima/src/server/handlers/directives.rs | 223 | ||||
| -rw-r--r-- | makima/src/server/mod.rs | 2 | ||||
| -rw-r--r-- | makima/src/server/openapi.rs | 18 |
16 files changed, 1739 insertions, 144 deletions
diff --git a/makima/frontend/src/components/directives/DirectiveContractsTab.tsx b/makima/frontend/src/components/directives/DirectiveContractsTab.tsx new file mode 100644 index 0000000..59ebfc8 --- /dev/null +++ b/makima/frontend/src/components/directives/DirectiveContractsTab.tsx @@ -0,0 +1,131 @@ +import { useNavigate } from "react-router"; +import type { + DirectiveWithChains, + StepContractSummary, + ContractPhase, +} from "../../lib/api"; +import { PhaseProgressBarCompact } from "../contracts/PhaseProgressBar"; + +interface DirectiveContractsTabProps { + directive: DirectiveWithChains; +} + +const statusColors: Record<string, string> = { + active: "text-green-400", + completed: "text-blue-400", + archived: "text-[#555]", +}; + +function ContractCard({ + summary, + label, +}: { + summary: StepContractSummary; + label: string; +}) { + const navigate = useNavigate(); + + const progressPct = + summary.taskCount > 0 + ? Math.round((summary.tasksDone / summary.taskCount) * 100) + : 0; + + return ( + <div + className="border border-dashed border-[rgba(117,170,252,0.25)] bg-[rgba(117,170,252,0.03)] p-3 cursor-pointer hover:bg-[rgba(117,170,252,0.06)] transition-colors" + onClick={() => navigate(`/contracts/${summary.id}`)} + > + <div className="flex items-center justify-between mb-1.5"> + <div className="flex items-center gap-2 min-w-0"> + <span className="font-mono text-[11px] text-[#dbe7ff] truncate"> + {summary.name} + </span> + <span className="font-mono text-[9px] text-[#7788aa] uppercase shrink-0"> + {summary.contractType} + </span> + </div> + <div className="flex items-center gap-2 shrink-0"> + <span + className={`font-mono text-[9px] uppercase ${statusColors[summary.status] || "text-[#888]"}`} + > + {summary.status} + </span> + <span className="font-mono text-[9px] text-[#75aafc]">→</span> + </div> + </div> + + <div className="flex items-center gap-2 mb-1.5"> + <span className="font-mono text-[9px] text-[#7788aa] uppercase shrink-0"> + {label} + </span> + <PhaseProgressBarCompact + currentPhase={summary.phase as ContractPhase} + /> + </div> + + {/* Task progress bar */} + <div className="flex items-center gap-2"> + <div className="flex-1 h-1 bg-[rgba(117,170,252,0.1)] rounded-full overflow-hidden"> + <div + className="h-full bg-[#3f6fb3] rounded-full transition-all" + style={{ width: `${progressPct}%` }} + /> + </div> + <span className="font-mono text-[9px] text-[#7788aa] shrink-0"> + {summary.tasksDone}/{summary.taskCount} tasks + </span> + </div> + </div> + ); +} + +export function DirectiveContractsTab({ + directive, +}: DirectiveContractsTabProps) { + // Collect all contract summaries + const contracts: { summary: StepContractSummary; label: string }[] = []; + + if (directive.orchestratorContractSummary) { + contracts.push({ + summary: directive.orchestratorContractSummary, + label: "Planning", + }); + } + + for (const chain of directive.chains) { + for (const step of chain.steps) { + if (step.contractSummary) { + contracts.push({ + summary: step.contractSummary, + label: step.name, + }); + } + } + } + + if (contracts.length === 0) { + return ( + <div className="text-center py-8"> + <p className="font-mono text-xs text-[#7788aa]"> + {directive.status === "draft" + ? "No contracts yet. Start the directive to begin planning." + : directive.status === "planning" + ? "Planning in progress... contracts will appear when steps are created." + : "No contracts associated with this directive."} + </p> + </div> + ); + } + + return ( + <div className="space-y-2"> + {contracts.map((c) => ( + <ContractCard + key={c.summary.id} + summary={c.summary} + label={c.label} + /> + ))} + </div> + ); +} diff --git a/makima/frontend/src/components/directives/DirectiveDetail.tsx b/makima/frontend/src/components/directives/DirectiveDetail.tsx index 094cdf2..95dc7cc 100644 --- a/makima/frontend/src/components/directives/DirectiveDetail.tsx +++ b/makima/frontend/src/components/directives/DirectiveDetail.tsx @@ -1,12 +1,16 @@ -import { useEffect, useRef } from "react"; +import { useState, useEffect, useRef } from "react"; import { useNavigate } from "react-router"; import type { DirectiveWithChains, DirectiveStatus, ChainWithSteps, ChainStep, + ContractPhase, } from "../../lib/api"; import { getDirective } from "../../lib/api"; +import { PhaseProgressBarCompact } from "../contracts/PhaseProgressBar"; +import { StepDiagram } from "./StepDiagram"; +import { DirectiveContractsTab } from "./DirectiveContractsTab"; interface DirectiveDetailProps { directive: DirectiveWithChains; @@ -16,6 +20,8 @@ interface DirectiveDetailProps { onRefresh?: (updated: DirectiveWithChains) => void; } +type Tab = "overview" | "chain" | "contracts"; + const statusColors: Record<DirectiveStatus, string> = { draft: "text-[#888]", planning: "text-yellow-400", @@ -36,17 +42,18 @@ const stepStatusColors: Record<string, string> = { const stepStatusIcons: Record<string, string> = { pending: "\u25CB", // β running: "\u25D4", // β - passed: "\u25CF", // β - failed: "\u2715", // β + passed: "\u25CF", // β + failed: "\u2715", // β }; function StepRow({ step }: { step: ChainStep }) { const navigate = useNavigate(); const color = stepStatusColors[step.status] || "text-[#888]"; const icon = stepStatusIcons[step.status] || "\u25CB"; + const summary = step.contractSummary; return ( - <div className="flex items-start gap-2 py-1 px-2 hover:bg-[rgba(117,170,252,0.05)]"> + <div className="flex items-start gap-2 py-1.5 px-2 hover:bg-[rgba(117,170,252,0.05)]"> <span className={`font-mono text-[11px] ${color} mt-px`}>{icon}</span> <div className="flex-1 min-w-0"> <div className="flex items-center gap-2"> @@ -57,21 +64,41 @@ function StepRow({ step }: { step: ChainStep }) { {step.status} </span> </div> + {summary && ( + <div className="flex items-center gap-2 mt-0.5"> + <PhaseProgressBarCompact + currentPhase={summary.phase as ContractPhase} + /> + <span className="font-mono text-[9px] text-[#7788aa]"> + {summary.tasksDone}/{summary.taskCount} tasks + </span> + {step.contractId && ( + <button + onClick={(e) => { + e.stopPropagation(); + navigate(`/contracts/${step.contractId}`); + }} + className="font-mono text-[9px] text-[#75aafc] hover:text-white transition-colors" + > + contract → + </button> + )} + </div> + )} + {!summary && step.contractId && ( + <button + onClick={() => navigate(`/contracts/${step.contractId}`)} + className="font-mono text-[9px] text-[#75aafc] hover:text-white transition-colors mt-0.5" + > + contract → + </button> + )} {step.description && ( - <p className="font-mono text-[10px] text-[#7788aa] truncate"> + <p className="font-mono text-[10px] text-[#7788aa] truncate mt-0.5"> {step.description} </p> )} </div> - {step.contractId && ( - <button - onClick={() => navigate(`/contracts/${step.contractId}`)} - className="font-mono text-[9px] text-[#75aafc] hover:text-white transition-colors shrink-0" - title="View contract" - > - contract → - </button> - )} </div> ); } @@ -84,7 +111,9 @@ function ChainCard({ chainWithSteps }: { chainWithSteps: ChainWithSteps }) { <div className="border border-dashed border-[rgba(117,170,252,0.25)] bg-[rgba(117,170,252,0.03)]"> <div className="p-3"> <div className="flex items-center justify-between mb-1"> - <span className="font-mono text-xs text-[#dbe7ff]">{chain.name}</span> + <span className="font-mono text-xs text-[#dbe7ff]"> + {chain.name} + </span> <span className="font-mono text-[10px] text-[#7788aa] uppercase"> gen {chain.generation} · {chain.status} </span> @@ -102,7 +131,9 @@ function ChainCard({ chainWithSteps }: { chainWithSteps: ChainWithSteps }) { <span className="text-red-400">{chain.failedSteps} failed</span> )} {chain.currentConfidence != null && ( - <span>confidence: {(chain.currentConfidence * 100).toFixed(0)}%</span> + <span> + confidence: {(chain.currentConfidence * 100).toFixed(0)}% + </span> )} </div> </div> @@ -151,6 +182,7 @@ export function DirectiveDetail({ onRefresh, }: DirectiveDetailProps) { const navigate = useNavigate(); + const [activeTab, setActiveTab] = useState<Tab>("overview"); // Auto-poll when directive is in an active state const isLive = @@ -185,6 +217,31 @@ export function DirectiveDetail({ }; }, [isLive, directive.id, onRefresh]); + // Count total steps and completed steps across all chains + const totalSteps = directive.chains.reduce( + (sum, c) => sum + c.totalSteps, + 0 + ); + const completedSteps = directive.chains.reduce( + (sum, c) => sum + c.completedSteps, + 0 + ); + + // Count contracts + const contractCount = + (directive.orchestratorContractSummary ? 1 : 0) + + directive.chains.reduce( + (sum, c) => + sum + c.steps.filter((s) => s.contractSummary != null).length, + 0 + ); + + const tabs: { key: Tab; label: string; count?: number }[] = [ + { key: "overview", label: "Overview" }, + { key: "chain", label: "Chain", count: totalSteps }, + { key: "contracts", label: "Contracts", count: contractCount }, + ]; + return ( <div className="panel h-full flex flex-col"> {/* Header */} @@ -235,109 +292,235 @@ export function DirectiveDetail({ </h2> </div> - {/* Content */} - <div className="flex-1 overflow-y-auto p-4 space-y-4"> - {/* Orchestrator contract link */} - {directive.orchestratorContractId && ( - <div className="flex items-center gap-2 p-2 border border-dashed border-[rgba(117,170,252,0.2)] bg-[rgba(117,170,252,0.03)]"> - <span className="font-mono text-[10px] text-[#7788aa] uppercase"> - Planning Contract - </span> - <button - onClick={() => - navigate(`/contracts/${directive.orchestratorContractId}`) + {/* Tabs */} + <div className="flex border-b border-[rgba(117,170,252,0.2)]"> + {tabs.map((tab) => ( + <button + key={tab.key} + onClick={() => setActiveTab(tab.key)} + className={` + px-4 py-2 font-mono text-xs uppercase tracking-wider transition-colors + ${ + activeTab === tab.key + ? "text-[#dbe7ff] border-b-2 border-[#75aafc]" + : "text-[#555] hover:text-[#9bc3ff]" } - className="font-mono text-[11px] text-[#75aafc] hover:text-white transition-colors" - > - {directive.orchestratorContractId.slice(0, 8)}... → - </button> - {directive.status === "planning" && ( - <span className="font-mono text-[9px] text-yellow-400 animate-pulse"> - planning in progress + `} + > + {tab.label} + {tab.count != null && tab.count > 0 && ( + <span className="ml-1 text-[10px] text-[#7788aa]"> + ({tab.count}) </span> )} - </div> - )} + </button> + ))} + </div> - {/* Goal */} - <div> - <h4 className="font-mono text-[10px] text-[#75aafc] uppercase tracking-wider mb-1"> - Goal - </h4> - <p className="font-mono text-xs text-[#9bb8d8] whitespace-pre-wrap"> - {directive.goal} - </p> - </div> + {/* Tab content */} + <div className="flex-1 overflow-y-auto p-4"> + {activeTab === "overview" && ( + <div className="space-y-4"> + {/* Orchestrator contract link */} + {directive.orchestratorContractId && ( + <div className="flex items-center gap-2 p-2 border border-dashed border-[rgba(117,170,252,0.2)] bg-[rgba(117,170,252,0.03)]"> + <span className="font-mono text-[10px] text-[#7788aa] uppercase"> + Planning Contract + </span> + {directive.orchestratorContractSummary && ( + <PhaseProgressBarCompact + currentPhase={ + directive.orchestratorContractSummary + .phase as ContractPhase + } + /> + )} + <button + onClick={() => + navigate( + `/contracts/${directive.orchestratorContractId}` + ) + } + className="font-mono text-[11px] text-[#75aafc] hover:text-white transition-colors" + > + {directive.orchestratorContractSummary?.name || + directive.orchestratorContractId.slice(0, 8) + "..."}{" "} + → + </button> + {directive.status === "planning" && ( + <span className="font-mono text-[9px] text-yellow-400 animate-pulse"> + planning in progress + </span> + )} + </div> + )} - {/* Config */} - <div className="grid grid-cols-2 gap-2"> - <div> - <span className="font-mono text-[10px] text-[#7788aa] uppercase"> - Autonomy - </span> - <div className="font-mono text-xs text-[#dbe7ff]"> - {directive.autonomyLevel} + {/* Goal */} + <div> + <h4 className="font-mono text-[10px] text-[#75aafc] uppercase tracking-wider mb-1"> + Goal + </h4> + <p className="font-mono text-xs text-[#9bb8d8] whitespace-pre-wrap"> + {directive.goal} + </p> </div> - </div> - <div> - <span className="font-mono text-[10px] text-[#7788aa] uppercase"> - Chains - </span> - <div className="font-mono text-xs text-[#dbe7ff]"> - {directive.chainGenerationCount} generated + + {/* Config grid */} + <div className="grid grid-cols-3 gap-2"> + <div> + <span className="font-mono text-[10px] text-[#7788aa] uppercase"> + Autonomy + </span> + <div className="font-mono text-xs text-[#dbe7ff]"> + {directive.autonomyLevel} + </div> + </div> + <div> + <span className="font-mono text-[10px] text-[#7788aa] uppercase"> + Chains + </span> + <div className="font-mono text-xs text-[#dbe7ff]"> + {directive.chainGenerationCount} generated + </div> + </div> + <div> + <span className="font-mono text-[10px] text-[#7788aa] uppercase"> + Cost + </span> + <div className="font-mono text-xs text-[#dbe7ff]"> + ${directive.totalCostUsd.toFixed(2)} + </div> + </div> + {directive.repositoryUrl && ( + <div className="col-span-3"> + <span className="font-mono text-[10px] text-[#7788aa] uppercase"> + Repository + </span> + <div className="font-mono text-xs text-[#dbe7ff] truncate"> + {directive.repositoryUrl} + </div> + </div> + )} </div> - </div> - <div> - <span className="font-mono text-[10px] text-[#7788aa] uppercase"> - Cost - </span> - <div className="font-mono text-xs text-[#dbe7ff]"> - ${directive.totalCostUsd.toFixed(2)} + + {/* Stat cards */} + <div className="grid grid-cols-3 gap-2"> + <div className="border border-dashed border-[rgba(117,170,252,0.2)] p-2 text-center"> + <div className="font-mono text-lg text-[#dbe7ff]"> + {totalSteps} + </div> + <div className="font-mono text-[9px] text-[#7788aa] uppercase"> + Total Steps + </div> + </div> + <div className="border border-dashed border-[rgba(117,170,252,0.2)] p-2 text-center"> + <div className="font-mono text-lg text-green-400"> + {completedSteps} + </div> + <div className="font-mono text-[9px] text-[#7788aa] uppercase"> + Completed + </div> + </div> + <div className="border border-dashed border-[rgba(117,170,252,0.2)] p-2 text-center"> + <div className="font-mono text-lg text-[#dbe7ff]"> + ${directive.totalCostUsd.toFixed(2)} + </div> + <div className="font-mono text-[9px] text-[#7788aa] uppercase"> + Cost + </div> + </div> </div> - </div> - {directive.repositoryUrl && ( + + {/* Structured sections */} + <JsonSection label="Requirements" data={directive.requirements} /> + <JsonSection + label="Acceptance Criteria" + data={directive.acceptanceCriteria} + /> + <JsonSection label="Constraints" data={directive.constraints} /> + <JsonSection + label="External Dependencies" + data={directive.externalDependencies} + /> + + {/* Metadata */} <div> - <span className="font-mono text-[10px] text-[#7788aa] uppercase"> - Repository - </span> - <div className="font-mono text-xs text-[#dbe7ff] truncate"> - {directive.repositoryUrl} + <h4 className="font-mono text-[10px] text-[#75aafc] uppercase tracking-wider mb-1"> + Metadata + </h4> + <div className="grid grid-cols-2 gap-1 font-mono text-[10px]"> + <span className="text-[#7788aa]">Created</span> + <span className="text-[#9bb8d8]"> + {new Date(directive.createdAt).toLocaleString()} + </span> + <span className="text-[#7788aa]">Updated</span> + <span className="text-[#9bb8d8]"> + {new Date(directive.updatedAt).toLocaleString()} + </span> + {directive.startedAt && ( + <> + <span className="text-[#7788aa]">Started</span> + <span className="text-[#9bb8d8]"> + {new Date(directive.startedAt).toLocaleString()} + </span> + </> + )} + {directive.completedAt && ( + <> + <span className="text-[#7788aa]">Completed</span> + <span className="text-[#9bb8d8]"> + {new Date(directive.completedAt).toLocaleString()} + </span> + </> + )} + <span className="text-[#7788aa]">Version</span> + <span className="text-[#9bb8d8]">{directive.version}</span> </div> </div> - )} - </div> + </div> + )} - {/* Structured sections */} - <JsonSection label="Requirements" data={directive.requirements} /> - <JsonSection - label="Acceptance Criteria" - data={directive.acceptanceCriteria} - /> - <JsonSection label="Constraints" data={directive.constraints} /> - <JsonSection - label="External Dependencies" - data={directive.externalDependencies} - /> + {activeTab === "chain" && ( + <div className="space-y-4"> + {/* Step diagram */} + {directive.chains.length > 0 && ( + <div> + <h4 className="font-mono text-[10px] text-[#75aafc] uppercase tracking-wider mb-2"> + Step Dependencies + </h4> + <StepDiagram + steps={directive.chains.flatMap((c) => c.steps)} + /> + </div> + )} - {/* Chains */} - <div> - <h4 className="font-mono text-[10px] text-[#75aafc] uppercase tracking-wider mb-2"> - Chains ({directive.chains.length}) - </h4> - {directive.chains.length === 0 ? ( - <p className="font-mono text-xs text-[#7788aa]"> - {directive.status === "planning" - ? "Planning in progress... chains will appear when the planner completes." - : "No chains yet. Chains are created during planning."} - </p> - ) : ( - <div className="space-y-2"> - {directive.chains.map((cws) => ( - <ChainCard key={cws.id} chainWithSteps={cws} /> - ))} + {/* Chain cards */} + <div> + <h4 className="font-mono text-[10px] text-[#75aafc] uppercase tracking-wider mb-2"> + Chains ({directive.chains.length}) + </h4> + {directive.chains.length === 0 ? ( + <p className="font-mono text-xs text-[#7788aa]"> + {directive.status === "planning" + ? "Planning in progress... chains will appear when the planner completes." + : directive.status === "draft" + ? "No chains yet. Start the directive to begin planning." + : "No chains created for this directive."} + </p> + ) : ( + <div className="space-y-2"> + {directive.chains.map((cws) => ( + <ChainCard key={cws.id} chainWithSteps={cws} /> + ))} + </div> + )} </div> - )} - </div> + </div> + )} + + {activeTab === "contracts" && ( + <DirectiveContractsTab directive={directive} /> + )} </div> </div> ); diff --git a/makima/frontend/src/components/directives/StepDiagram.tsx b/makima/frontend/src/components/directives/StepDiagram.tsx new file mode 100644 index 0000000..5c65ae1 --- /dev/null +++ b/makima/frontend/src/components/directives/StepDiagram.tsx @@ -0,0 +1,152 @@ +import { useNavigate } from "react-router"; +import type { ChainStep, ContractPhase } from "../../lib/api"; +import { PhaseProgressBarCompact } from "../contracts/PhaseProgressBar"; + +interface StepDiagramProps { + steps: ChainStep[]; +} + +const statusBorderColors: Record<string, string> = { + pending: "border-[#555]", + running: "border-yellow-400", + passed: "border-green-400", + failed: "border-red-400", +}; + +const statusDotColors: Record<string, string> = { + pending: "bg-[#555]", + running: "bg-yellow-400", + passed: "bg-green-400", + failed: "bg-red-400", +}; + +/** + * Assign depth to each step via topological sort. + * Steps with no dependsOn = depth 0. Steps depending only on depth-0 = depth 1. Etc. + */ +function assignDepths(steps: ChainStep[]): Map<string, number> { + const depths = new Map<string, number>(); + const stepMap = new Map(steps.map((s) => [s.id, s])); + + function getDepth(id: string): number { + if (depths.has(id)) return depths.get(id)!; + const step = stepMap.get(id); + if (!step || !step.dependsOn || step.dependsOn.length === 0) { + depths.set(id, 0); + return 0; + } + const maxParent = Math.max( + ...step.dependsOn.map((depId) => getDepth(depId)) + ); + const d = maxParent + 1; + depths.set(id, d); + return d; + } + + for (const step of steps) { + getDepth(step.id); + } + + return depths; +} + +export function StepDiagram({ steps }: StepDiagramProps) { + const navigate = useNavigate(); + + if (steps.length === 0) { + return ( + <p className="font-mono text-xs text-[#7788aa]">No steps to display.</p> + ); + } + + const depths = assignDepths(steps); + const maxDepth = Math.max(...Array.from(depths.values())); + + // Group steps by depth + const levels: ChainStep[][] = []; + for (let d = 0; d <= maxDepth; d++) { + levels.push( + steps + .filter((s) => depths.get(s.id) === d) + .sort((a, b) => a.orderIndex - b.orderIndex) + ); + } + + // Build position map for connectors + const stepPositions = new Map<string, { level: number; index: number }>(); + levels.forEach((level, li) => { + level.forEach((step, si) => { + stepPositions.set(step.id, { level: li, index: si }); + }); + }); + + return ( + <div className="space-y-3"> + {levels.map((level, li) => ( + <div key={li} className="flex items-start gap-2 flex-wrap"> + {li > 0 && ( + <div className="w-full flex justify-center mb-1"> + <div className="w-px h-3 bg-[rgba(117,170,252,0.3)]" /> + </div> + )} + {level.map((step) => { + const borderColor = + statusBorderColors[step.status] || "border-[#555]"; + const dotColor = statusDotColors[step.status] || "bg-[#555]"; + const summary = step.contractSummary; + const hasContract = !!step.contractId; + + return ( + <div + key={step.id} + className={` + border ${borderColor} bg-[rgba(0,0,0,0.2)] p-2 min-w-[180px] max-w-[220px] + ${hasContract ? "cursor-pointer hover:bg-[rgba(117,170,252,0.05)]" : ""} + transition-colors + `} + onClick={() => { + if (hasContract) navigate(`/contracts/${step.contractId}`); + }} + title={hasContract ? "View contract" : undefined} + > + <div className="flex items-center gap-1.5 mb-1"> + <div className={`w-1.5 h-1.5 rounded-full ${dotColor}`} /> + <span className="font-mono text-[11px] text-[#dbe7ff] truncate flex-1"> + {step.name} + </span> + {hasContract && ( + <span className="font-mono text-[9px] text-[#75aafc] shrink-0"> + → + </span> + )} + </div> + {summary && ( + <> + <div className="mb-1"> + <PhaseProgressBarCompact + currentPhase={summary.phase as ContractPhase} + /> + </div> + <div className="font-mono text-[9px] text-[#7788aa]"> + {summary.tasksDone}/{summary.taskCount} tasks + {summary.tasksRunning > 0 && ( + <span className="text-yellow-400 ml-1"> + {summary.tasksRunning} running + </span> + )} + {summary.tasksFailed > 0 && ( + <span className="text-red-400 ml-1"> + {summary.tasksFailed} failed + </span> + )} + </div> + </> + )} + </div> + ); + })} + </div> + ))} + </div> + ); +} diff --git a/makima/frontend/src/lib/api.ts b/makima/frontend/src/lib/api.ts index ccc7156..9782a07 100644 --- a/makima/frontend/src/lib/api.ts +++ b/makima/frontend/src/lib/api.ts @@ -3075,6 +3075,18 @@ export interface DirectiveChain { updatedAt: string; } +export interface StepContractSummary { + id: string; + name: string; + contractType: string; + phase: string; + status: string; + taskCount: number; + tasksDone: number; + tasksRunning: number; + tasksFailed: number; +} + export interface ChainStep { id: string; chainId: string; @@ -3092,6 +3104,7 @@ export interface ChainStep { startedAt: string | null; completedAt: string | null; createdAt: string; + contractSummary: StepContractSummary | null; } export interface ChainWithSteps extends DirectiveChain { @@ -3099,6 +3112,7 @@ export interface ChainWithSteps extends DirectiveChain { } export interface DirectiveWithChains extends Directive { + orchestratorContractSummary: StepContractSummary | null; chains: ChainWithSteps[]; } diff --git a/makima/migrations/20260208000000_add_monitoring_contract_id.sql b/makima/migrations/20260208000000_add_monitoring_contract_id.sql new file mode 100644 index 0000000..65a4a0d --- /dev/null +++ b/makima/migrations/20260208000000_add_monitoring_contract_id.sql @@ -0,0 +1,2 @@ +ALTER TABLE chain_steps ADD COLUMN monitoring_contract_id UUID REFERENCES contracts(id) ON DELETE SET NULL; +ALTER TABLE chain_steps ADD COLUMN monitoring_task_id UUID; diff --git a/makima/src/bin/makima.rs b/makima/src/bin/makima.rs index 9d7f847..92fdae6 100644 --- a/makima/src/bin/makima.rs +++ b/makima/src/bin/makima.rs @@ -773,6 +773,20 @@ async fn run_directive( let result = client.directive_start(args.directive_id).await?; println!("{}", serde_json::to_string(&result.0)?); } + DirectiveCommand::Evaluate(args) => { + let client = ApiClient::new(args.common.api_url, args.common.api_key)?; + let result = client + .directive_evaluate_step(args.common.directive_id, args.step_id) + .await?; + println!("{}", serde_json::to_string(&result.0)?); + } + DirectiveCommand::Evaluations(args) => { + let client = ApiClient::new(args.common.api_url, args.common.api_key)?; + let result = client + .directive_evaluations(args.common.directive_id, args.step_id) + .await?; + println!("{}", serde_json::to_string(&result.0)?); + } } Ok(()) diff --git a/makima/src/daemon/api/directive.rs b/makima/src/daemon/api/directive.rs index 42f6f45..3589e78 100644 --- a/makima/src/daemon/api/directive.rs +++ b/makima/src/daemon/api/directive.rs @@ -57,4 +57,30 @@ impl ApiClient { self.post_empty(&format!("/api/v1/directives/{}/start", directive_id)) .await } + + /// Trigger a manual evaluation for a step. + pub async fn directive_evaluate_step( + &self, + directive_id: Uuid, + step_id: Uuid, + ) -> Result<JsonValue, ApiError> { + self.post_empty(&format!( + "/api/v1/directives/{}/steps/{}/evaluate", + directive_id, step_id + )) + .await + } + + /// List evaluations for a step. + pub async fn directive_evaluations( + &self, + directive_id: Uuid, + step_id: Uuid, + ) -> Result<JsonValue, ApiError> { + self.get(&format!( + "/api/v1/directives/{}/steps/{}/evaluations", + directive_id, step_id + )) + .await + } } diff --git a/makima/src/daemon/cli/directive.rs b/makima/src/daemon/cli/directive.rs index 5ce88c5..4c29c14 100644 --- a/makima/src/daemon/cli/directive.rs +++ b/makima/src/daemon/cli/directive.rs @@ -38,3 +38,23 @@ pub struct UpdateStatusArgs { /// New status (draft, planning, active, paused, completed, archived, failed) pub status: String, } + +/// Arguments for evaluate command (trigger manual evaluation). +#[derive(Args, Debug)] +pub struct EvaluateArgs { + #[command(flatten)] + pub common: DirectiveArgs, + + /// Step ID to evaluate + pub step_id: Uuid, +} + +/// Arguments for evaluations command (list evaluation history). +#[derive(Args, Debug)] +pub struct EvaluationsArgs { + #[command(flatten)] + pub common: DirectiveArgs, + + /// Step ID to list evaluations for + pub step_id: Uuid, +} diff --git a/makima/src/daemon/cli/mod.rs b/makima/src/daemon/cli/mod.rs index b07ab5a..c9a8c6f 100644 --- a/makima/src/daemon/cli/mod.rs +++ b/makima/src/daemon/cli/mod.rs @@ -225,6 +225,12 @@ pub enum DirectiveCommand { /// Start a directive (create planning contract and begin orchestration) Start(DirectiveArgs), + + /// Trigger a manual evaluation for a step + Evaluate(directive::EvaluateArgs), + + /// List evaluation history for a step + Evaluations(directive::EvaluationsArgs), } impl Cli { diff --git a/makima/src/daemon/skills/directive.md b/makima/src/daemon/skills/directive.md index cdfdaa2..0d1e9d6 100644 --- a/makima/src/daemon/skills/directive.md +++ b/makima/src/daemon/skills/directive.md @@ -47,6 +47,20 @@ makima directive update-status <status> ``` Updates the directive status. Valid statuses: `draft`, `planning`, `active`, `paused`, `completed`, `archived`, `failed`. +## Evaluation + +### Trigger manual evaluation for a step +```bash +makima directive evaluate <step_id> +``` +Triggers a monitoring evaluation for the specified step. The step must have been executed (have a contract). Sets the step to "evaluating" and dispatches a monitoring contract. + +### List evaluations for a step +```bash +makima directive evaluations <step_id> +``` +Returns the evaluation history for a step, ordered by evaluation number. + ## Output Format All commands output JSON to stdout. @@ -63,6 +77,12 @@ makima directive chains # Get details of a specific chain makima directive chain <chain_id> +# Trigger manual evaluation of a step +makima directive evaluate <step_id> + +# Check evaluation history +makima directive evaluations <step_id> + # Update status to active makima directive update-status active ``` diff --git a/makima/src/db/models.rs b/makima/src/db/models.rs index bc90942..eff2df0 100644 --- a/makima/src/db/models.rs +++ b/makima/src/db/models.rs @@ -2849,12 +2849,37 @@ pub struct UpdateDirectiveRequest { pub version: Option<i32>, } +/// Lightweight contract summary attached to a chain step. +#[derive(Debug, FromRow, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct StepContractSummary { + pub id: Uuid, + pub name: String, + pub contract_type: String, + pub phase: String, + pub status: String, + pub task_count: i64, + pub tasks_done: i64, + pub tasks_running: i64, + pub tasks_failed: i64, +} + +/// Chain step enriched with optional contract summary. +#[derive(Debug, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ChainStepWithContract { + #[serde(flatten)] + pub step: ChainStep, + pub contract_summary: Option<StepContractSummary>, +} + /// Directive with its chains and steps for detail view. #[derive(Debug, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct DirectiveWithChains { #[serde(flatten)] pub directive: Directive, + pub orchestrator_contract_summary: Option<StepContractSummary>, pub chains: Vec<ChainWithSteps>, } @@ -2903,6 +2928,8 @@ pub struct ChainStep { pub status: String, pub contract_id: Option<Uuid>, pub supervisor_task_id: Option<Uuid>, + pub monitoring_contract_id: Option<Uuid>, + pub monitoring_task_id: Option<Uuid>, pub confidence_score: Option<f64>, pub confidence_level: Option<String>, pub evaluation_count: i32, @@ -2922,5 +2949,62 @@ pub struct ChainStep { pub struct ChainWithSteps { #[serde(flatten)] pub chain: DirectiveChain, - pub steps: Vec<ChainStep>, + pub steps: Vec<ChainStepWithContract>, +} + +/// Full row from directive_evaluations table. +#[derive(Debug, Clone, FromRow, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DirectiveEvaluation { + pub id: Uuid, + pub directive_id: Uuid, + pub chain_id: Option<Uuid>, + pub step_id: Option<Uuid>, + pub contract_id: Option<Uuid>, + pub evaluation_type: String, + pub evaluation_number: i32, + pub evaluator: Option<String>, + pub passed: bool, + pub overall_score: Option<f64>, + pub confidence_level: Option<String>, + #[sqlx(json)] + pub programmatic_results: serde_json::Value, + #[sqlx(json)] + pub llm_results: serde_json::Value, + #[sqlx(json)] + pub criteria_results: serde_json::Value, + pub summary_feedback: String, + pub rework_instructions: Option<String>, + #[sqlx(json)] + pub directive_snapshot: Option<serde_json::Value>, + #[sqlx(json)] + pub deliverables_snapshot: Option<serde_json::Value>, + pub started_at: DateTime<Utc>, + pub completed_at: Option<DateTime<Utc>>, + pub created_at: DateTime<Utc>, +} + +/// Full row from directive_events table. +#[derive(Debug, Clone, FromRow, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct DirectiveEvent { + pub id: Uuid, + pub directive_id: Uuid, + pub chain_id: Option<Uuid>, + pub step_id: Option<Uuid>, + pub event_type: String, + pub severity: String, + #[sqlx(json)] + pub event_data: Option<serde_json::Value>, + pub actor_type: String, + pub actor_id: Option<Uuid>, + pub created_at: DateTime<Utc>, +} + +/// Response for evaluation list endpoint. +#[derive(Debug, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct EvaluationListResponse { + pub evaluations: Vec<DirectiveEvaluation>, + pub total: i64, } diff --git a/makima/src/db/repository.rs b/makima/src/db/repository.rs index e072eb8..d50ef61 100644 --- a/makima/src/db/repository.rs +++ b/makima/src/db/repository.rs @@ -11,12 +11,13 @@ use super::models::{ ContractTypeTemplateRecord, ConversationMessage, ConversationSnapshot, CreateContractRequest, CreateDirectiveRequest, CreateFileRequest, CreateTaskRequest, CreateTemplateRequest, Daemon, DaemonTaskAssignment, DaemonWithCapacity, - DeliverableDefinition, Directive, DirectiveChain, DirectiveSummary, + DeliverableDefinition, Directive, DirectiveChain, DirectiveEvaluation, DirectiveEvent, + DirectiveSummary, File, FileSummary, FileVersion, HistoryEvent, HistoryQueryFilters, MeshChatConversation, MeshChatMessageRecord, PhaseChangeResult, PhaseConfig, - PhaseDefinition, SupervisorHeartbeatRecord, SupervisorState, Task, TaskCheckpoint, - TaskEvent, TaskSummary, UpdateContractRequest, UpdateDirectiveRequest, - UpdateFileRequest, UpdateTaskRequest, UpdateTemplateRequest, + PhaseDefinition, StepContractSummary, SupervisorHeartbeatRecord, SupervisorState, + Task, TaskCheckpoint, TaskEvent, TaskSummary, UpdateContractRequest, + UpdateDirectiveRequest, UpdateFileRequest, UpdateTaskRequest, UpdateTemplateRequest, }; /// Repository error types. @@ -5184,6 +5185,29 @@ pub async fn list_steps_for_chain( .await } +/// Batch-fetch lightweight contract summaries for a set of contract IDs. +pub async fn get_contract_summaries_batch( + pool: &PgPool, + contract_ids: &[Uuid], +) -> Result<Vec<StepContractSummary>, sqlx::Error> { + sqlx::query_as::<_, StepContractSummary>( + r#" + SELECT c.id, c.name, c.contract_type, c.phase, c.status, + COUNT(t.id) as task_count, + COUNT(t.id) FILTER (WHERE t.status IN ('done','merged')) as tasks_done, + COUNT(t.id) FILTER (WHERE t.status IN ('running','initializing','starting')) as tasks_running, + COUNT(t.id) FILTER (WHERE t.status = 'failed') as tasks_failed + FROM contracts c + LEFT JOIN tasks t ON t.contract_id = c.id + WHERE c.id = ANY($1) + GROUP BY c.id, c.name, c.contract_type, c.phase, c.status + "#, + ) + .bind(contract_ids) + .fetch_all(pool) + .await +} + // ββ Directive orchestration functions βββββββββββββββββββββββββββββββββββββββ /// Update directive status with automatic timestamp management. @@ -5479,3 +5503,185 @@ pub async fn update_chain_status( .fetch_optional(pool) .await } + +// ββ Directive monitoring / evaluation functions βββββββββββββββββββββββββββββ + +/// Create a directive evaluation record. evaluation_number is auto-incremented per step. +pub async fn create_directive_evaluation( + pool: &PgPool, + directive_id: Uuid, + chain_id: Uuid, + step_id: Uuid, + contract_id: Uuid, + evaluation_type: &str, + evaluator: Option<&str>, + passed: bool, + overall_score: Option<f64>, + confidence_level: Option<&str>, + criteria_results: &serde_json::Value, + summary_feedback: &str, + rework_instructions: Option<&str>, +) -> Result<DirectiveEvaluation, sqlx::Error> { + sqlx::query_as::<_, DirectiveEvaluation>( + r#" + INSERT INTO directive_evaluations ( + directive_id, chain_id, step_id, contract_id, + evaluation_type, evaluation_number, evaluator, + passed, overall_score, confidence_level, + criteria_results, summary_feedback, rework_instructions, + completed_at + ) + VALUES ( + $1, $2, $3, $4, + $5, COALESCE((SELECT MAX(evaluation_number) FROM directive_evaluations WHERE step_id = $3), 0) + 1, $6, + $7, $8, $9, + $10, $11, $12, + NOW() + ) + RETURNING * + "#, + ) + .bind(directive_id) + .bind(chain_id) + .bind(step_id) + .bind(contract_id) + .bind(evaluation_type) + .bind(evaluator) + .bind(passed) + .bind(overall_score) + .bind(confidence_level) + .bind(criteria_results) + .bind(summary_feedback) + .bind(rework_instructions) + .fetch_one(pool) + .await +} + +/// List evaluations for a step, ordered by evaluation_number. +pub async fn list_evaluations_for_step( + pool: &PgPool, + step_id: Uuid, +) -> Result<Vec<DirectiveEvaluation>, sqlx::Error> { + sqlx::query_as::<_, DirectiveEvaluation>( + r#" + SELECT * FROM directive_evaluations + WHERE step_id = $1 + ORDER BY evaluation_number ASC + "#, + ) + .bind(step_id) + .fetch_all(pool) + .await +} + +/// Create a directive event. +pub async fn create_directive_event( + pool: &PgPool, + directive_id: Uuid, + chain_id: Option<Uuid>, + step_id: Option<Uuid>, + event_type: &str, + severity: &str, + event_data: Option<&serde_json::Value>, + actor_type: &str, + actor_id: Option<Uuid>, +) -> Result<DirectiveEvent, sqlx::Error> { + sqlx::query_as::<_, DirectiveEvent>( + r#" + INSERT INTO directive_events (directive_id, chain_id, step_id, event_type, severity, event_data, actor_type, actor_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + RETURNING * + "#, + ) + .bind(directive_id) + .bind(chain_id) + .bind(step_id) + .bind(event_type) + .bind(severity) + .bind(event_data) + .bind(actor_type) + .bind(actor_id) + .fetch_one(pool) + .await +} + +/// Update step evaluation fields after an evaluation completes. +pub async fn update_step_evaluation_fields( + pool: &PgPool, + step_id: Uuid, + confidence_score: Option<f64>, + confidence_level: Option<&str>, + last_evaluation_id: Uuid, +) -> Result<Option<ChainStep>, sqlx::Error> { + sqlx::query_as::<_, ChainStep>( + r#" + UPDATE chain_steps + SET confidence_score = $2, + confidence_level = $3, + evaluation_count = evaluation_count + 1, + last_evaluation_id = $4 + WHERE id = $1 + RETURNING * + "#, + ) + .bind(step_id) + .bind(confidence_score) + .bind(confidence_level) + .bind(last_evaluation_id) + .fetch_optional(pool) + .await +} + +/// Update step monitoring contract/task references. +pub async fn update_step_monitoring_contract( + pool: &PgPool, + step_id: Uuid, + monitoring_contract_id: Uuid, + monitoring_task_id: Uuid, +) -> Result<Option<ChainStep>, sqlx::Error> { + sqlx::query_as::<_, ChainStep>( + r#" + UPDATE chain_steps + SET monitoring_contract_id = $2, + monitoring_task_id = $3 + WHERE id = $1 + RETURNING * + "#, + ) + .bind(step_id) + .bind(monitoring_contract_id) + .bind(monitoring_task_id) + .fetch_optional(pool) + .await +} + +/// Increment step rework_count. +pub async fn increment_step_rework_count( + pool: &PgPool, + step_id: Uuid, +) -> Result<Option<ChainStep>, sqlx::Error> { + sqlx::query_as::<_, ChainStep>( + r#" + UPDATE chain_steps + SET rework_count = rework_count + 1 + WHERE id = $1 + RETURNING * + "#, + ) + .bind(step_id) + .fetch_optional(pool) + .await +} + +/// Get a chain step by its monitoring contract ID. +pub async fn get_step_by_monitoring_contract_id( + pool: &PgPool, + contract_id: Uuid, +) -> Result<Option<ChainStep>, sqlx::Error> { + sqlx::query_as::<_, ChainStep>( + r#"SELECT * FROM chain_steps WHERE monitoring_contract_id = $1"#, + ) + .bind(contract_id) + .fetch_optional(pool) + .await +} diff --git a/makima/src/orchestration/directive.rs b/makima/src/orchestration/directive.rs index d17deeb..e779c18 100644 --- a/makima/src/orchestration/directive.rs +++ b/makima/src/orchestration/directive.rs @@ -4,8 +4,9 @@ use serde::Deserialize; use sqlx::PgPool; use uuid::Uuid; +use serde::Serialize; use crate::db::models::{ - CreateContractRequest, CreateTaskRequest, Directive, Task, UpdateContractRequest, + ChainStep, CreateContractRequest, CreateTaskRequest, Directive, Task, UpdateContractRequest, }; use crate::db::repository; use crate::server::state::SharedState; @@ -26,6 +27,20 @@ struct ChainPlan { steps: Vec<ChainPlanStep>, } +/// Result written by the monitoring supervisor after evaluating a step. +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct MonitoringResult { + passed: bool, + overall_score: Option<f64>, + confidence_level: Option<String>, + #[serde(default)] + criteria_results: serde_json::Value, + #[serde(default)] + summary_feedback: String, + rework_instructions: Option<String>, +} + /// Initialize a directive: create a planning contract and transition to "planning". pub async fn init_directive( pool: &PgPool, @@ -195,8 +210,18 @@ pub async fn on_task_completed( on_planning_completed(pool, state, &directive, task, owner_id).await?; } } else if contract.directive_id.is_some() { - // This is a step contract completion - on_step_completed(pool, state, &contract, task, owner_id).await?; + // Check if this is a monitoring contract completion + let monitoring_step = + repository::get_step_by_monitoring_contract_id(pool, contract_id) + .await + .map_err(|e| format!("Failed to check monitoring contract: {}", e))?; + + if let Some(step) = monitoring_step { + on_monitoring_completed(pool, state, &contract, &step, task, owner_id).await?; + } else { + // This is a step contract completion + on_step_completed(pool, state, &contract, task, owner_id).await?; + } } Ok(()) @@ -403,32 +428,54 @@ async fn on_step_completed( return Ok(()); }; - // Update step status based on task outcome - let new_status = if task.status == "done" { - "passed" - } else { - "failed" - }; - - repository::update_step_status(pool, step.id, new_status) - .await - .map_err(|e| format!("Failed to update step status: {}", e))?; - - tracing::info!( - directive_id = %directive_id, - step_id = %step.id, - step_name = %step.name, - new_status = new_status, - "Step completed" - ); - - // Get the directive and advance + // Get the directive for threshold info let directive = repository::get_directive(pool, directive_id) .await .map_err(|e| format!("Failed to get directive: {}", e))? .ok_or("Directive not found")?; - advance_chain(pool, state, &directive, owner_id).await + if task.status == "done" { + // Step task succeeded β dispatch monitoring evaluation + repository::update_step_status(pool, step.id, "evaluating") + .await + .map_err(|e| format!("Failed to update step status: {}", e))?; + + let _ = repository::create_directive_event( + pool, + directive.id, + directive.current_chain_id, + Some(step.id), + "step_evaluating", + "info", + None, + "system", + None, + ) + .await; + + tracing::info!( + directive_id = %directive_id, + step_id = %step.id, + step_name = %step.name, + "Step task done, dispatching monitoring evaluation" + ); + + dispatch_monitoring(pool, &directive, &step, contract, owner_id).await + } else { + // Step task failed β mark step failed and advance + repository::update_step_status(pool, step.id, "failed") + .await + .map_err(|e| format!("Failed to update step status: {}", e))?; + + tracing::info!( + directive_id = %directive_id, + step_id = %step.id, + step_name = %step.name, + "Step failed" + ); + + advance_chain(pool, state, &directive, owner_id).await + } } /// Check chain progress and dispatch ready steps or mark directive complete. @@ -734,3 +781,470 @@ fn extract_plan_json(body: &[crate::db::models::BodyElement]) -> Option<String> None } + +/// Dispatch a monitoring contract to evaluate a completed step. +async fn dispatch_monitoring( + pool: &PgPool, + directive: &Directive, + step: &ChainStep, + step_contract: &crate::db::models::Contract, + owner_id: Uuid, +) -> Result<(), String> { + // Create monitoring contract + let contract = repository::create_contract_for_owner( + pool, + owner_id, + CreateContractRequest { + name: format!("{} - Monitor", step.name), + description: Some(format!("Monitoring evaluation for step: {}", step.name)), + contract_type: Some("monitoring".to_string()), + template_id: None, + initial_phase: Some("plan".to_string()), + autonomous_loop: Some(true), + phase_guard: None, + local_only: Some(true), + auto_merge_local: None, + }, + ) + .await + .map_err(|e| format!("Failed to create monitoring contract: {}", e))?; + + // Mark contract as directive-related (not orchestrator) + repository::set_contract_directive_fields(pool, contract.id, Some(directive.id), false) + .await + .map_err(|e| format!("Failed to set monitoring contract directive fields: {}", e))?; + + // Build evaluation prompt + let prompt = build_monitoring_prompt(directive, step, step_contract); + + // Create supervisor task + let supervisor_task = repository::create_task_for_owner( + pool, + owner_id, + CreateTaskRequest { + contract_id: Some(contract.id), + name: format!("{} - Evaluator", step.name), + description: Some("Evaluate step output against directive criteria".to_string()), + plan: prompt, + parent_task_id: None, + is_supervisor: true, + priority: 8, + repository_url: directive.repository_url.clone(), + base_branch: directive.base_branch.clone(), + target_branch: None, + merge_mode: None, + target_repo_path: directive.local_path.clone(), + completion_action: None, + continue_from_task_id: None, + copy_files: None, + checkpoint_sha: None, + branched_from_task_id: None, + conversation_history: None, + supervisor_worktree_task_id: None, + }, + ) + .await + .map_err(|e| format!("Failed to create monitoring supervisor task: {}", e))?; + + // Link supervisor to contract + repository::update_contract_for_owner( + pool, + contract.id, + owner_id, + UpdateContractRequest { + supervisor_task_id: Some(supervisor_task.id), + ..Default::default() + }, + ) + .await + .map_err(|e| match e { + crate::db::repository::RepositoryError::Database(e) => { + format!("Failed to link supervisor to monitoring contract: {}", e) + } + other => format!("Failed to link supervisor to monitoring contract: {:?}", other), + })?; + + // Link step to monitoring contract/task + repository::update_step_monitoring_contract(pool, step.id, contract.id, supervisor_task.id) + .await + .map_err(|e| format!("Failed to update step monitoring contract link: {}", e))?; + + // Copy repo config from directive to monitoring contract + if let Some(ref repo_url) = directive.repository_url { + let _ = repository::add_remote_repository( + pool, + contract.id, + "directive-repo", + repo_url, + true, + ) + .await; + } else if let Some(ref local_path) = directive.local_path { + let _ = repository::add_local_repository( + pool, + contract.id, + "directive-repo", + local_path, + true, + ) + .await; + } + + tracing::info!( + directive_id = %directive.id, + step_id = %step.id, + step_name = %step.name, + monitoring_contract_id = %contract.id, + monitoring_task_id = %supervisor_task.id, + "Monitoring evaluation dispatched" + ); + + Ok(()) +} + +/// Build the monitoring supervisor prompt. +fn build_monitoring_prompt( + directive: &Directive, + step: &ChainStep, + step_contract: &crate::db::models::Contract, +) -> String { + format!( + r#"You are evaluating the output of a completed step in a directive chain. + +DIRECTIVE: {title} +GOAL: {goal} +REQUIREMENTS: {requirements} +ACCEPTANCE CRITERIA: {acceptance_criteria} +CONSTRAINTS: {constraints} + +STEP: {step_name} +STEP DESCRIPTION: {step_description} +STEP TASK PLAN: {task_plan} +STEP CONTRACT ID: {step_contract_id} + +CONFIDENCE THRESHOLDS: +- Green (pass): >= {threshold_green} +- Yellow (marginal): >= {threshold_yellow} +- Red (fail): < {threshold_yellow} + +Your job: +1. Read the step contract's files to understand what was delivered: + makima contract files --contract-id {step_contract_id} + makima contract file <file_id> --contract-id {step_contract_id} + +2. Evaluate whether the step's output meets the directive's requirements and the step's specific task plan. + +3. Write your evaluation result as a JSON file named "evaluation-result" to this contract: + makima contract create-file "evaluation-result" < evaluation.json + +The JSON format: +{{ + "passed": true/false, + "overallScore": 0.0-1.0, + "confidenceLevel": "green" | "yellow" | "red", + "criteriaResults": [ + {{ + "criterion": "Description of what was checked", + "passed": true/false, + "score": 0.0-1.0, + "evidence": "Evidence supporting the assessment" + }} + ], + "summaryFeedback": "Brief summary of the evaluation", + "reworkInstructions": "If failed, specific instructions for rework (null if passed)" +}} + +Scoring guidelines: +- Score >= {threshold_green}: confidenceLevel = "green", passed = true +- Score >= {threshold_yellow} and < {threshold_green}: confidenceLevel = "yellow", use judgment on passed +- Score < {threshold_yellow}: confidenceLevel = "red", passed = false +- Be specific in reworkInstructions if the step fails β the step will be re-executed with these instructions. + +After writing the evaluation file, mark the contract as complete: + makima supervisor complete"#, + title = directive.title, + goal = directive.goal, + requirements = serde_json::to_string_pretty(&directive.requirements).unwrap_or_default(), + acceptance_criteria = serde_json::to_string_pretty(&directive.acceptance_criteria).unwrap_or_default(), + constraints = serde_json::to_string_pretty(&directive.constraints).unwrap_or_default(), + step_name = step.name, + step_description = step.description.as_deref().unwrap_or("N/A"), + task_plan = step.task_plan.as_deref().unwrap_or("N/A"), + step_contract_id = step_contract.id, + threshold_green = directive.confidence_threshold_green, + threshold_yellow = directive.confidence_threshold_yellow, + ) +} + +/// Handle monitoring contract task completion β parse evaluation and decide step outcome. +async fn on_monitoring_completed( + pool: &PgPool, + state: &SharedState, + contract: &crate::db::models::Contract, + step: &ChainStep, + task: &Task, + owner_id: Uuid, +) -> Result<(), String> { + // Only process supervisor task completions + if !task.is_supervisor { + return Ok(()); + } + + let Some(directive_id) = contract.directive_id else { + return Ok(()); + }; + + let directive = repository::get_directive(pool, directive_id) + .await + .map_err(|e| format!("Failed to get directive: {}", e))? + .ok_or("Directive not found")?; + + // If monitoring task itself failed, fail-open: mark step as passed + if task.status == "failed" { + tracing::warn!( + directive_id = %directive_id, + step_id = %step.id, + "Monitoring task failed, fail-open: marking step as passed" + ); + + repository::update_step_status(pool, step.id, "passed") + .await + .map_err(|e| format!("Failed to update step status: {}", e))?; + + let _ = repository::create_directive_event( + pool, + directive_id, + directive.current_chain_id, + Some(step.id), + "monitoring_failed_open", + "warn", + None, + "system", + None, + ) + .await; + + return advance_chain(pool, state, &directive, owner_id).await; + } + + if task.status != "done" { + return Ok(()); + } + + // Read evaluation result from monitoring contract files + let files = repository::list_files_in_contract(pool, contract.id, owner_id) + .await + .map_err(|e| format!("Failed to list monitoring contract files: {}", e))?; + + let eval_file = files.iter().find(|f| { + let name_lower = f.name.to_lowercase(); + name_lower.contains("evaluation") || name_lower.contains("eval") + }); + + let eval_file = eval_file.or_else(|| files.first()); + + let monitoring_result = if let Some(eval_file) = eval_file { + let full_file = repository::get_file(pool, eval_file.id) + .await + .map_err(|e| format!("Failed to get evaluation file: {}", e))?; + + if let Some(full_file) = full_file { + let json_str = extract_plan_json(&full_file.body); + json_str.and_then(|s| serde_json::from_str::<MonitoringResult>(&s).ok()) + } else { + None + } + } else { + None + }; + + // If we couldn't parse the result, fail-open + let Some(result) = monitoring_result else { + tracing::warn!( + directive_id = %directive_id, + step_id = %step.id, + "Could not parse monitoring result, fail-open: marking step as passed" + ); + + repository::update_step_status(pool, step.id, "passed") + .await + .map_err(|e| format!("Failed to update step status: {}", e))?; + + let _ = repository::create_directive_event( + pool, + directive_id, + directive.current_chain_id, + Some(step.id), + "monitoring_parse_failed_open", + "warn", + None, + "system", + None, + ) + .await; + + return advance_chain(pool, state, &directive, owner_id).await; + }; + + // Create evaluation record + let chain_id = directive.current_chain_id.unwrap_or(step.chain_id); + let evaluation = repository::create_directive_evaluation( + pool, + directive_id, + chain_id, + step.id, + contract.id, + "monitoring", + Some("automated"), + result.passed, + result.overall_score, + result.confidence_level.as_deref(), + &result.criteria_results, + &result.summary_feedback, + result.rework_instructions.as_deref(), + ) + .await + .map_err(|e| format!("Failed to create directive evaluation: {}", e))?; + + // Update step evaluation fields + repository::update_step_evaluation_fields( + pool, + step.id, + result.overall_score, + result.confidence_level.as_deref(), + evaluation.id, + ) + .await + .map_err(|e| format!("Failed to update step evaluation fields: {}", e))?; + + // Create event + let event_data = serde_json::json!({ + "passed": result.passed, + "overallScore": result.overall_score, + "confidenceLevel": result.confidence_level, + "summaryFeedback": result.summary_feedback, + }); + let _ = repository::create_directive_event( + pool, + directive_id, + Some(chain_id), + Some(step.id), + if result.passed { "step_evaluation_passed" } else { "step_evaluation_failed" }, + "info", + Some(&event_data), + "system", + None, + ) + .await; + + if result.passed { + // Evaluation passed β mark step as passed + tracing::info!( + directive_id = %directive_id, + step_id = %step.id, + step_name = %step.name, + score = ?result.overall_score, + "Step evaluation passed" + ); + + repository::update_step_status(pool, step.id, "passed") + .await + .map_err(|e| format!("Failed to update step status: {}", e))?; + + advance_chain(pool, state, &directive, owner_id).await + } else { + // Evaluation failed β check rework budget + let max_rework = directive.max_rework_cycles.unwrap_or(3); + if step.rework_count >= max_rework { + tracing::warn!( + directive_id = %directive_id, + step_id = %step.id, + step_name = %step.name, + rework_count = step.rework_count, + max_rework = max_rework, + "Step evaluation failed, max rework cycles exceeded" + ); + + repository::update_step_status(pool, step.id, "failed") + .await + .map_err(|e| format!("Failed to update step status: {}", e))?; + + advance_chain(pool, state, &directive, owner_id).await + } else { + tracing::info!( + directive_id = %directive_id, + step_id = %step.id, + step_name = %step.name, + rework_count = step.rework_count, + "Step evaluation failed, scheduling rework" + ); + + repository::increment_step_rework_count(pool, step.id) + .await + .map_err(|e| format!("Failed to increment rework count: {}", e))?; + + // Set step back to pending so advance_chain re-dispatches it + repository::update_step_status(pool, step.id, "pending") + .await + .map_err(|e| format!("Failed to update step status: {}", e))?; + + advance_chain(pool, state, &directive, owner_id).await + } + } +} + +/// Trigger a manual evaluation for a step. Public for use by handlers. +pub async fn trigger_manual_evaluation( + pool: &PgPool, + _state: &SharedState, + owner_id: Uuid, + directive_id: Uuid, + step_id: Uuid, +) -> Result<ChainStep, String> { + let directive = repository::get_directive_for_owner(pool, directive_id, owner_id) + .await + .map_err(|e| format!("Failed to get directive: {}", e))? + .ok_or("Directive not found")?; + + // Get the step β find via chain steps + let chain_id = directive.current_chain_id.ok_or("Directive has no active chain")?; + let steps = repository::list_steps_for_chain(pool, chain_id) + .await + .map_err(|e| format!("Failed to list steps: {}", e))?; + + let step = steps + .into_iter() + .find(|s| s.id == step_id) + .ok_or("Step not found in current chain")?; + + // Step must have a contract_id (must have been executed) + let contract_id = step.contract_id.ok_or("Step has no contract β it hasn't been executed yet")?; + + let contract = repository::get_contract_for_owner(pool, contract_id, owner_id) + .await + .map_err(|e| format!("Failed to get step contract: {}", e))? + .ok_or("Step contract not found")?; + + // Set step to evaluating + let updated_step = repository::update_step_status(pool, step.id, "evaluating") + .await + .map_err(|e| format!("Failed to update step status: {}", e))? + .ok_or("Step not found after status update")?; + + let _ = repository::create_directive_event( + pool, + directive.id, + directive.current_chain_id, + Some(step.id), + "manual_evaluation_triggered", + "info", + None, + "user", + None, + ) + .await; + + dispatch_monitoring(pool, &directive, &step, &contract, owner_id).await?; + + Ok(updated_step) +} diff --git a/makima/src/server/handlers/directives.rs b/makima/src/server/handlers/directives.rs index a877c6b..65f32d5 100644 --- a/makima/src/server/handlers/directives.rs +++ b/makima/src/server/handlers/directives.rs @@ -8,9 +8,12 @@ use axum::{ }; use uuid::Uuid; +use std::collections::HashMap; + use crate::db::models::{ - ChainStep, ChainWithSteps, CreateDirectiveRequest, Directive, DirectiveChain, - DirectiveListResponse, DirectiveWithChains, UpdateDirectiveRequest, + ChainStep, ChainStepWithContract, ChainWithSteps, CreateDirectiveRequest, Directive, + DirectiveChain, DirectiveListResponse, DirectiveWithChains, EvaluationListResponse, + StepContractSummary, UpdateDirectiveRequest, }; use crate::db::repository::{self, RepositoryError}; use crate::orchestration; @@ -123,8 +126,8 @@ pub async fn get_directive( }; // Build chains with steps - let mut chains_with_steps = Vec::new(); - for chain in chains { + let mut all_steps_by_chain = Vec::new(); + for chain in &chains { let steps = match repository::list_steps_for_chain(pool, chain.id).await { Ok(s) => s, Err(e) => { @@ -132,11 +135,61 @@ pub async fn get_directive( Vec::new() } }; - chains_with_steps.push(ChainWithSteps { chain, steps }); + all_steps_by_chain.push(steps); + } + + // Collect all contract IDs (from steps + orchestrator) + let mut contract_ids: Vec<Uuid> = all_steps_by_chain + .iter() + .flat_map(|steps| steps.iter().filter_map(|s| s.contract_id)) + .collect(); + if let Some(orch_id) = directive.orchestrator_contract_id { + contract_ids.push(orch_id); } + // Batch fetch contract summaries + let mut summary_map: HashMap<Uuid, StepContractSummary> = if contract_ids.is_empty() { + HashMap::new() + } else { + match repository::get_contract_summaries_batch(pool, &contract_ids).await { + Ok(summaries) => summaries.into_iter().map(|s| (s.id, s)).collect(), + Err(e) => { + tracing::warn!("Failed to fetch contract summaries: {}", e); + HashMap::new() + } + } + }; + + // Build enriched chains + let chains_with_steps: Vec<ChainWithSteps> = chains + .into_iter() + .zip(all_steps_by_chain.into_iter()) + .map(|(chain, steps)| { + let enriched_steps = steps + .into_iter() + .map(|step| { + let contract_summary = + step.contract_id.and_then(|id| summary_map.remove(&id)); + ChainStepWithContract { + step, + contract_summary, + } + }) + .collect(); + ChainWithSteps { + chain, + steps: enriched_steps, + } + }) + .collect(); + + let orchestrator_contract_summary = directive + .orchestrator_contract_id + .and_then(|id| summary_map.remove(&id)); + Json(DirectiveWithChains { directive, + orchestrator_contract_summary, chains: chains_with_steps, }) .into_response() @@ -454,7 +507,37 @@ pub async fn get_chain( } }; - Json(ChainWithSteps { chain, steps }).into_response() + // Collect contract IDs from steps + let contract_ids: Vec<Uuid> = steps.iter().filter_map(|s| s.contract_id).collect(); + + let mut summary_map: HashMap<Uuid, StepContractSummary> = if contract_ids.is_empty() { + HashMap::new() + } else { + match repository::get_contract_summaries_batch(pool, &contract_ids).await { + Ok(summaries) => summaries.into_iter().map(|s| (s.id, s)).collect(), + Err(e) => { + tracing::warn!("Failed to fetch contract summaries: {}", e); + HashMap::new() + } + } + }; + + let enriched_steps = steps + .into_iter() + .map(|step| { + let contract_summary = step.contract_id.and_then(|id| summary_map.remove(&id)); + ChainStepWithContract { + step, + contract_summary, + } + }) + .collect(); + + Json(ChainWithSteps { + chain, + steps: enriched_steps, + }) + .into_response() } /// Start a directive: create a planning contract and begin orchestration. @@ -513,3 +596,131 @@ pub async fn start_directive( } } } + +/// Trigger a manual evaluation for a step. +#[utoipa::path( + post, + path = "/api/v1/directives/{id}/steps/{step_id}/evaluate", + params( + ("id" = Uuid, Path, description = "Directive ID"), + ("step_id" = Uuid, Path, description = "Step ID") + ), + responses( + (status = 200, description = "Evaluation triggered", body = ChainStep), + (status = 400, description = "Step cannot be evaluated", body = ApiError), + (status = 401, description = "Unauthorized", body = ApiError), + (status = 404, description = "Not found", body = ApiError), + (status = 503, description = "Database not configured", body = ApiError), + (status = 500, description = "Internal server error", body = ApiError), + ), + security( + ("bearer_auth" = []), + ("api_key" = []) + ), + tag = "Directives" +)] +pub async fn evaluate_step( + State(state): State<SharedState>, + Authenticated(auth): Authenticated, + Path((id, step_id)): Path<(Uuid, Uuid)>, +) -> impl IntoResponse { + let Some(ref pool) = state.db_pool else { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(ApiError::new("DB_UNAVAILABLE", "Database not configured")), + ) + .into_response(); + }; + + match orchestration::directive::trigger_manual_evaluation(pool, &state, auth.owner_id, id, step_id).await { + Ok(step) => Json(step).into_response(), + Err(e) if e.contains("not found") || e.contains("Not found") => ( + StatusCode::NOT_FOUND, + Json(ApiError::new("NOT_FOUND", e)), + ) + .into_response(), + Err(e) if e.contains("hasn't been executed") || e.contains("no active chain") => ( + StatusCode::BAD_REQUEST, + Json(ApiError::new("INVALID_STATE", e)), + ) + .into_response(), + Err(e) => { + tracing::error!("Failed to trigger evaluation for step {}: {}", step_id, e); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiError::new("EVALUATION_FAILED", e)), + ) + .into_response() + } + } +} + +/// List evaluations for a step. +#[utoipa::path( + get, + path = "/api/v1/directives/{id}/steps/{step_id}/evaluations", + params( + ("id" = Uuid, Path, description = "Directive ID"), + ("step_id" = Uuid, Path, description = "Step ID") + ), + responses( + (status = 200, description = "List of evaluations", body = EvaluationListResponse), + (status = 401, description = "Unauthorized", body = ApiError), + (status = 404, description = "Not found", body = ApiError), + (status = 503, description = "Database not configured", body = ApiError), + (status = 500, description = "Internal server error", body = ApiError), + ), + security( + ("bearer_auth" = []), + ("api_key" = []) + ), + tag = "Directives" +)] +pub async fn list_evaluations( + State(state): State<SharedState>, + Authenticated(auth): Authenticated, + Path((id, step_id)): Path<(Uuid, Uuid)>, +) -> impl IntoResponse { + let Some(ref pool) = state.db_pool else { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(ApiError::new("DB_UNAVAILABLE", "Database not configured")), + ) + .into_response(); + }; + + // Verify directive exists and belongs to owner + match repository::get_directive_for_owner(pool, id, auth.owner_id).await { + Ok(Some(_)) => {} + Ok(None) => { + return ( + StatusCode::NOT_FOUND, + Json(ApiError::new("NOT_FOUND", "Directive not found")), + ) + .into_response(); + } + Err(e) => { + tracing::error!("Failed to get directive {}: {}", id, e); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiError::new("DB_ERROR", e.to_string())), + ) + .into_response(); + } + } + + match repository::list_evaluations_for_step(pool, step_id).await { + Ok(evaluations) => { + let total = evaluations.len() as i64; + Json(EvaluationListResponse { evaluations, total }).into_response() + } + Err(e) => { + tracing::error!("Failed to list evaluations for step {}: {}", step_id, e); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ApiError::new("DB_ERROR", e.to_string())), + ) + .into_response() + } + } +} diff --git a/makima/src/server/mod.rs b/makima/src/server/mod.rs index 1a59e12..c8242ae 100644 --- a/makima/src/server/mod.rs +++ b/makima/src/server/mod.rs @@ -184,6 +184,8 @@ pub fn make_router(state: SharedState) -> Router { .route("/directives/{id}/start", post(directives::start_directive)) .route("/directives/{id}/chains", get(directives::list_chains)) .route("/directives/{id}/chains/{chain_id}", get(directives::get_chain)) + .route("/directives/{id}/steps/{step_id}/evaluate", post(directives::evaluate_step)) + .route("/directives/{id}/steps/{step_id}/evaluations", get(directives::list_evaluations)) // Contract supervisor resume endpoints .route("/contracts/{id}/supervisor/resume", post(mesh_supervisor::resume_supervisor)) .route("/contracts/{id}/supervisor/conversation/rewind", post(mesh_supervisor::rewind_conversation)) diff --git a/makima/src/server/openapi.rs b/makima/src/server/openapi.rs index 96c19e0..e680c07 100644 --- a/makima/src/server/openapi.rs +++ b/makima/src/server/openapi.rs @@ -4,17 +4,20 @@ use utoipa::OpenApi; use crate::db::models::{ AddLocalRepositoryRequest, AddRemoteRepositoryRequest, BranchInfo, BranchListResponse, - BranchTaskRequest, BranchTaskResponse, ChainStep, ChainWithSteps, ChangePhaseRequest, + BranchTaskRequest, BranchTaskResponse, ChainStep, ChainStepWithContract, ChainWithSteps, + ChangePhaseRequest, Contract, ContractChatHistoryResponse, ContractChatMessageRecord, ContractEvent, ContractListResponse, ContractRepository, ContractSummary, ContractWithRelations, CreateContractRequest, CreateDirectiveRequest, CreateFileRequest, CreateManagedRepositoryRequest, CreateTaskRequest, Daemon, DaemonDirectoriesResponse, - DaemonDirectory, DaemonListResponse, Directive, DirectiveChain, DirectiveListResponse, - DirectiveSummary, DirectiveWithChains, File, FileListResponse, FileSummary, + DaemonDirectory, DaemonListResponse, Directive, DirectiveChain, DirectiveEvaluation, + DirectiveEvent, DirectiveListResponse, DirectiveSummary, DirectiveWithChains, + EvaluationListResponse, File, FileListResponse, FileSummary, MergeCommitRequest, MergeCompleteCheckResponse, MergeResolveRequest, MergeResultResponse, MergeSkipRequest, MergeStartRequest, MergeStatusResponse, MeshChatConversation, MeshChatHistoryResponse, MeshChatMessageRecord, RepositoryHistoryEntry, - RepositoryHistoryListResponse, RepositorySuggestionsQuery, SendMessageRequest, Task, + RepositoryHistoryListResponse, RepositorySuggestionsQuery, SendMessageRequest, + StepContractSummary, Task, TaskEventListResponse, TaskListResponse, TaskSummary, TaskWithSubtasks, TranscriptEntry, UpdateContractRequest, UpdateDirectiveRequest, UpdateFileRequest, UpdateTaskRequest, }; @@ -114,6 +117,8 @@ use crate::server::messages::{ApiError, AudioEncoding, StartMessage, StopMessage directives::start_directive, directives::list_chains, directives::get_chain, + directives::evaluate_step, + directives::list_evaluations, ), components( schemas( @@ -205,9 +210,14 @@ use crate::server::messages::{ApiError, AudioEncoding, StartMessage, StopMessage DirectiveWithChains, DirectiveChain, ChainStep, + ChainStepWithContract, ChainWithSteps, + StepContractSummary, CreateDirectiveRequest, UpdateDirectiveRequest, + DirectiveEvaluation, + DirectiveEvent, + EvaluationListResponse, ) ), tags( |
