From 88a4f15ce1310f8ee8693835be14aa5280233f17 Mon Sep 17 00:00:00 2001
From: soryu <soryu@soryu.co>
Date: Thu, 5 Feb 2026 23:42:48 +0000
Subject: Add directive-first chain system redesign

Redesigns the chain system with a directive-first architecture where
Directive is the top-level entity (the "why/what") and Chains are
generated execution plans (the "how") that can be dynamically modified.

Backend:
- Add database migration for directive system tables
- Add Directive, DirectiveChain, ChainStep, DirectiveEvent models
- Add DirectiveVerifier and DirectiveApproval models
- Add orchestration module with engine, planner, and verifier
- Add comprehensive API handlers for directives
- Add daemon CLI commands for directive management
- Add directive skill documentation
- Integrate contract completion with directive engine
- Add SSE endpoint for real-time directive events

Frontend:
- Add directives route with split-view layout
- Add 6-tab detail view (Overview, Chain, Events, Evaluations, Approvals, Verifiers)
- Add React Flow DAG visualization for chain steps
- Add SSE subscription hook for real-time event updates
- Add useDirectives and useDirectiveEventSubscription hooks
- Add directive types and API functions

Fixes:
- Fix test failures in ws/protocol, task_output, completion_gate, patch
- Fix word boundary matching in looks_like_task()
- Fix parse_last() to find actual last completion gate
- Fix create_export_patch when merge-base equals HEAD
- Clean up clippy warnings in new code

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 makima/src/daemon/skills/directive.md | 303 ++++++++++++++++++++++++++++++++++
 makima/src/daemon/skills/mod.rs       |   6 +-
 2 files changed, 308 insertions(+), 1 deletion(-)
 create mode 100644 makima/src/daemon/skills/directive.md

(limited to 'makima/src/daemon/skills')

diff --git a/makima/src/daemon/skills/directive.md b/makima/src/daemon/skills/directive.md
new file mode 100644
index 0000000..97e8e20
--- /dev/null
+++ b/makima/src/daemon/skills/directive.md
@@ -0,0 +1,303 @@
+---
+name: makima-directive
+description: Directive orchestration tools for autonomous goal-driven execution. Use when working with directives, chains, steps, verifiers, and approvals.
+---
+
+# Directive Orchestration Tools
+
+Directives are top-level goals that drive autonomous execution with configurable guardrails. Each directive generates a chain of steps that spawn contracts with supervisors, verified by programmatic checks and LLM evaluation.
+
+## Architecture
+
+```
+Directive (goal + requirements + acceptance criteria)
+  |
+  +-- Chain (generated DAG execution plan)
+  |     +-- Step 1 (pending -> ready -> running -> evaluating -> passed)
+  |     |     +-- Contract (spawned when step reaches 'ready')
+  |     |           +-- Supervisor Task
+  |     +-- Step 2 (depends_on: [Step 1])
+  |     +-- Step 3 (depends_on: [Step 1], parallel with Step 2)
+  |
+  +-- Verifiers (test runner, linter, build, type checker)
+  +-- Evaluations (programmatic + LLM composite scores)
+  +-- Events (audit stream)
+  +-- Approvals (human-in-the-loop gates)
+```
+
+## Status Flow
+
+### Directive Status
+- `draft` - Created but not started
+- `planning` - Generating chain from requirements
+- `active` - Executing steps
+- `paused` - Temporarily stopped
+- `completed` - All steps passed
+- `archived` - No longer active
+- `failed` - Execution failed
+
+### Step Status
+- `pending` - Waiting for dependencies
+- `ready` - Dependencies met, ready to start
+- `running` - Contract executing
+- `evaluating` - Running verifiers
+- `passed` - Evaluation succeeded
+- `failed` - Evaluation failed, exceeded retries
+- `rework` - Sent back for corrections
+- `skipped` - Manually skipped
+- `blocked` - Blocked by failed dependency
+
+## Autonomy Levels
+
+- `full_auto` - No approval gates, automatic progression
+- `guardrails` - Request approval for yellow/red confidence scores
+- `manual` - Request approval for all step completions
+
+## Confidence Scoring
+
+Each step evaluation produces a composite confidence score:
+
+1. **Programmatic verifiers** run first (tests, lint, build)
+   - Weight: 1.0 each
+   - If any required verifier fails: automatic RED
+
+2. **LLM evaluation** runs second
+   - Weight: 2.0
+   - Evaluates against acceptance criteria
+
+3. **Composite score** computed from weighted average
+   - GREEN: >= configured threshold (default 0.8)
+   - YELLOW: >= yellow threshold (default 0.5)
+   - RED: below yellow threshold
+
+## CLI Commands
+
+```bash
+# Create a new directive
+makima directive create --goal "Add OAuth2 authentication" --repository https://github.com/org/repo
+
+# List directives
+makima directive list [--status active]
+
+# Get directive status with progress
+makima directive status <directive-id>
+
+# Start execution (generates chain and begins)
+makima directive start <directive-id>
+
+# View chain steps
+makima directive steps <directive-id>
+
+# View DAG visualization
+makima directive graph <directive-id> --with-status
+
+# View recent events
+makima directive events <directive-id> --limit 20
+
+# Approve a pending request
+makima directive approve <directive-id> <approval-id> [--response "Looks good"]
+
+# Deny a pending request
+makima directive deny <directive-id> <approval-id> [--reason "Need more testing"]
+
+# Lifecycle commands
+makima directive pause <directive-id>
+makima directive resume <directive-id>
+makima directive stop <directive-id>
+makima directive archive <directive-id>
+```
+
+## API Endpoints
+
+### Directive CRUD
+```
+POST   /api/v1/directives                    # Create from goal
+GET    /api/v1/directives                    # List
+GET    /api/v1/directives/:id                # Get with progress
+PUT    /api/v1/directives/:id                # Update
+DELETE /api/v1/directives/:id                # Archive
+```
+
+### Lifecycle
+```
+POST   /api/v1/directives/:id/start          # Plan + execute
+POST   /api/v1/directives/:id/pause          # Pause
+POST   /api/v1/directives/:id/resume         # Resume
+POST   /api/v1/directives/:id/stop           # Stop
+```
+
+### Chain & Steps
+```
+GET    /api/v1/directives/:id/chain          # Current chain + steps
+GET    /api/v1/directives/:id/chain/graph    # DAG for visualization
+POST   /api/v1/directives/:id/chain/replan   # Force regeneration
+POST   /api/v1/directives/:id/chain/steps    # Add step
+PUT    /api/v1/directives/:id/chain/steps/:sid   # Modify step
+DELETE /api/v1/directives/:id/chain/steps/:sid   # Remove step
+```
+
+### Step Operations
+```
+GET    /api/v1/directives/:id/steps/:sid     # Step detail
+POST   /api/v1/directives/:id/steps/:sid/evaluate  # Force re-evaluation
+POST   /api/v1/directives/:id/steps/:sid/skip      # Skip step
+POST   /api/v1/directives/:id/steps/:sid/rework    # Manual rework
+```
+
+### Monitoring
+```
+GET    /api/v1/directives/:id/evaluations    # List evaluations
+GET    /api/v1/directives/:id/events         # Event log (polling)
+GET    /api/v1/directives/:id/events/stream  # Event stream (SSE)
+```
+
+### Verifiers
+```
+GET    /api/v1/directives/:id/verifiers             # List verifiers
+POST   /api/v1/directives/:id/verifiers             # Add verifier
+PUT    /api/v1/directives/:id/verifiers/:vid        # Update verifier
+POST   /api/v1/directives/:id/verifiers/auto-detect # Auto-detect
+```
+
+### Approvals
+```
+GET    /api/v1/directives/:id/approvals              # Pending approvals
+POST   /api/v1/directives/:id/approvals/:aid/approve # Approve
+POST   /api/v1/directives/:id/approvals/:aid/deny    # Deny
+```
+
+## Creating a Directive
+
+### Request
+```json
+POST /api/v1/directives
+{
+  "goal": "Implement user authentication with OAuth2",
+  "repositoryUrl": "https://github.com/org/repo",
+  "autonomyLevel": "guardrails",
+  "confidenceThresholdGreen": 0.8,
+  "confidenceThresholdYellow": 0.5,
+  "maxReworkCycles": 3,
+  "maxTotalCostUsd": 100.0,
+  "maxWallTimeMinutes": 480
+}
+```
+
+### Response
+```json
+{
+  "id": "uuid",
+  "title": "Implement user authentication with OAuth2",
+  "goal": "Implement user authentication with OAuth2",
+  "status": "draft",
+  "autonomyLevel": "guardrails",
+  "createdAt": "2026-02-05T12:00:00Z"
+}
+```
+
+## Starting a Directive
+
+When you start a directive:
+1. System generates requirements from the goal
+2. Chain planner creates a DAG of steps
+3. Root steps (no dependencies) transition to `ready`
+4. Contracts spawn for ready steps with supervisors
+5. Verifiers auto-detect from repository
+
+## Evaluation Flow
+
+When a contract completes:
+
+1. Step transitions to `evaluating`
+2. **Programmatic verifiers** run (tests, lint, build)
+   - Each produces pass/fail + output
+3. **LLM evaluation** runs
+   - Reviews code against acceptance criteria
+   - Provides feedback and score
+4. **Composite score** computed
+5. Based on confidence level and autonomy:
+   - GREEN: Step passes, downstream unblocks
+   - YELLOW (guardrails): Request approval
+   - RED: Initiate rework or request approval
+
+## Rework Flow
+
+When a step needs rework:
+
+1. Contract phase reset to editing
+2. Supervisor receives rework instructions
+3. Rework count incremented
+4. If max reworks exceeded: escalate or fail
+
+## Event Types
+
+Events are logged for audit and monitoring:
+
+- `directive_created`, `directive_started`, `directive_paused`, `directive_completed`
+- `chain_generated`, `chain_regenerated`
+- `step_ready`, `step_started`, `step_evaluating`, `step_passed`, `step_failed`
+- `rework_initiated`, `rework_completed`
+- `approval_requested`, `approval_granted`, `approval_denied`
+- `verifier_run`, `evaluation_completed`
+- `circuit_breaker_triggered`
+
+## Verifier Configuration
+
+Verifiers can be auto-detected or manually configured:
+
+```json
+POST /api/v1/directives/:id/verifiers
+{
+  "name": "Test Runner",
+  "verifierType": "test_runner",
+  "command": "npm test",
+  "workingDirectory": ".",
+  "timeoutSeconds": 300,
+  "weight": 1.0,
+  "required": true,
+  "enabled": true
+}
+```
+
+### Auto-Detection
+
+The system detects verifiers from:
+- `package.json` - npm test, npm run lint, npm run build
+- `Cargo.toml` - cargo test, cargo clippy, cargo build
+- `pyproject.toml` - pytest, ruff, mypy
+
+## Circuit Breakers
+
+Directives have built-in circuit breakers:
+
+- `maxTotalCostUsd` - Stop if cumulative cost exceeds limit
+- `maxWallTimeMinutes` - Stop if elapsed time exceeds limit
+- `maxReworkCycles` - Fail step after N rework attempts
+- `maxChainRegenerations` - Fail if chain regenerated too many times
+
+## Example Workflow
+
+```bash
+# 1. Create a directive
+makima directive create \
+  --goal "Add dark mode to the application" \
+  --repository https://github.com/myorg/myapp \
+  --autonomy guardrails
+
+# Returns directive ID: 123e4567-e89b-12d3-a456-426614174000
+
+# 2. Start execution
+makima directive start 123e4567-e89b-12d3-a456-426614174000
+
+# 3. Monitor progress
+makima directive status 123e4567-e89b-12d3-a456-426614174000
+
+# 4. View the execution graph
+makima directive graph 123e4567-e89b-12d3-a456-426614174000 --with-status
+
+# 5. Watch events
+makima directive events 123e4567-e89b-12d3-a456-426614174000
+
+# 6. If approval needed, approve or deny
+makima directive approve 123e4567-e89b-12d3-a456-426614174000 <approval-id>
+```
diff --git a/makima/src/daemon/skills/mod.rs b/makima/src/daemon/skills/mod.rs
index 3b0c0dc..dafa9ec 100644
--- a/makima/src/daemon/skills/mod.rs
+++ b/makima/src/daemon/skills/mod.rs
@@ -9,12 +9,16 @@ pub const SUPERVISOR_SKILL: &str = include_str!("supervisor.md");
 /// Contract skill content - task-contract interaction commands
 pub const CONTRACT_SKILL: &str = include_str!("contract.md");
 
-/// Chain skill content - multi-contract orchestration commands
+/// Chain skill content - multi-contract orchestration commands (legacy)
 pub const CHAIN_SKILL: &str = include_str!("chain.md");
 
+/// Directive skill content - autonomous goal-driven orchestration
+pub const DIRECTIVE_SKILL: &str = include_str!("directive.md");
+
 /// All skills as (name, content) pairs for installation
 pub const ALL_SKILLS: &[(&str, &str)] = &[
     ("makima-supervisor", SUPERVISOR_SKILL),
     ("makima-contract", CONTRACT_SKILL),
     ("makima-chain", CHAIN_SKILL),
+    ("makima-directive", DIRECTIVE_SKILL),
 ];
-- 
cgit v1.2.3