From 88a4f15ce1310f8ee8693835be14aa5280233f17 Mon Sep 17 00:00:00 2001 From: soryu Date: Thu, 5 Feb 2026 23:42:48 +0000 Subject: Add directive-first chain system redesign Redesigns the chain system with a directive-first architecture where Directive is the top-level entity (the "why/what") and Chains are generated execution plans (the "how") that can be dynamically modified. Backend: - Add database migration for directive system tables - Add Directive, DirectiveChain, ChainStep, DirectiveEvent models - Add DirectiveVerifier and DirectiveApproval models - Add orchestration module with engine, planner, and verifier - Add comprehensive API handlers for directives - Add daemon CLI commands for directive management - Add directive skill documentation - Integrate contract completion with directive engine - Add SSE endpoint for real-time directive events Frontend: - Add directives route with split-view layout - Add 6-tab detail view (Overview, Chain, Events, Evaluations, Approvals, Verifiers) - Add React Flow DAG visualization for chain steps - Add SSE subscription hook for real-time event updates - Add useDirectives and useDirectiveEventSubscription hooks - Add directive types and API functions Fixes: - Fix test failures in ws/protocol, task_output, completion_gate, patch - Fix word boundary matching in looks_like_task() - Fix parse_last() to find actual last completion gate - Fix create_export_patch when merge-base equals HEAD - Clean up clippy warnings in new code Co-Authored-By: Claude Opus 4.5 --- makima/src/daemon/skills/directive.md | 303 ++++++++++++++++++++++++++++++++++ makima/src/daemon/skills/mod.rs | 6 +- 2 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 makima/src/daemon/skills/directive.md (limited to 'makima/src/daemon/skills') diff --git a/makima/src/daemon/skills/directive.md b/makima/src/daemon/skills/directive.md new file mode 100644 index 0000000..97e8e20 --- /dev/null +++ b/makima/src/daemon/skills/directive.md @@ -0,0 +1,303 @@ +--- +name: makima-directive +description: Directive orchestration tools for autonomous goal-driven execution. Use when working with directives, chains, steps, verifiers, and approvals. +--- + +# Directive Orchestration Tools + +Directives are top-level goals that drive autonomous execution with configurable guardrails. Each directive generates a chain of steps that spawn contracts with supervisors, verified by programmatic checks and LLM evaluation. + +## Architecture + +``` +Directive (goal + requirements + acceptance criteria) + | + +-- Chain (generated DAG execution plan) + | +-- Step 1 (pending -> ready -> running -> evaluating -> passed) + | | +-- Contract (spawned when step reaches 'ready') + | | +-- Supervisor Task + | +-- Step 2 (depends_on: [Step 1]) + | +-- Step 3 (depends_on: [Step 1], parallel with Step 2) + | + +-- Verifiers (test runner, linter, build, type checker) + +-- Evaluations (programmatic + LLM composite scores) + +-- Events (audit stream) + +-- Approvals (human-in-the-loop gates) +``` + +## Status Flow + +### Directive Status +- `draft` - Created but not started +- `planning` - Generating chain from requirements +- `active` - Executing steps +- `paused` - Temporarily stopped +- `completed` - All steps passed +- `archived` - No longer active +- `failed` - Execution failed + +### Step Status +- `pending` - Waiting for dependencies +- `ready` - Dependencies met, ready to start +- `running` - Contract executing +- `evaluating` - Running verifiers +- `passed` - Evaluation succeeded +- `failed` - Evaluation failed, exceeded retries +- `rework` - Sent back for corrections +- `skipped` - Manually skipped +- `blocked` - Blocked by failed dependency + +## Autonomy Levels + +- `full_auto` - No approval gates, automatic progression +- `guardrails` - Request approval for yellow/red confidence scores +- `manual` - Request approval for all step completions + +## Confidence Scoring + +Each step evaluation produces a composite confidence score: + +1. **Programmatic verifiers** run first (tests, lint, build) + - Weight: 1.0 each + - If any required verifier fails: automatic RED + +2. **LLM evaluation** runs second + - Weight: 2.0 + - Evaluates against acceptance criteria + +3. **Composite score** computed from weighted average + - GREEN: >= configured threshold (default 0.8) + - YELLOW: >= yellow threshold (default 0.5) + - RED: below yellow threshold + +## CLI Commands + +```bash +# Create a new directive +makima directive create --goal "Add OAuth2 authentication" --repository https://github.com/org/repo + +# List directives +makima directive list [--status active] + +# Get directive status with progress +makima directive status + +# Start execution (generates chain and begins) +makima directive start + +# View chain steps +makima directive steps + +# View DAG visualization +makima directive graph --with-status + +# View recent events +makima directive events --limit 20 + +# Approve a pending request +makima directive approve [--response "Looks good"] + +# Deny a pending request +makima directive deny [--reason "Need more testing"] + +# Lifecycle commands +makima directive pause +makima directive resume +makima directive stop +makima directive archive +``` + +## API Endpoints + +### Directive CRUD +``` +POST /api/v1/directives # Create from goal +GET /api/v1/directives # List +GET /api/v1/directives/:id # Get with progress +PUT /api/v1/directives/:id # Update +DELETE /api/v1/directives/:id # Archive +``` + +### Lifecycle +``` +POST /api/v1/directives/:id/start # Plan + execute +POST /api/v1/directives/:id/pause # Pause +POST /api/v1/directives/:id/resume # Resume +POST /api/v1/directives/:id/stop # Stop +``` + +### Chain & Steps +``` +GET /api/v1/directives/:id/chain # Current chain + steps +GET /api/v1/directives/:id/chain/graph # DAG for visualization +POST /api/v1/directives/:id/chain/replan # Force regeneration +POST /api/v1/directives/:id/chain/steps # Add step +PUT /api/v1/directives/:id/chain/steps/:sid # Modify step +DELETE /api/v1/directives/:id/chain/steps/:sid # Remove step +``` + +### Step Operations +``` +GET /api/v1/directives/:id/steps/:sid # Step detail +POST /api/v1/directives/:id/steps/:sid/evaluate # Force re-evaluation +POST /api/v1/directives/:id/steps/:sid/skip # Skip step +POST /api/v1/directives/:id/steps/:sid/rework # Manual rework +``` + +### Monitoring +``` +GET /api/v1/directives/:id/evaluations # List evaluations +GET /api/v1/directives/:id/events # Event log (polling) +GET /api/v1/directives/:id/events/stream # Event stream (SSE) +``` + +### Verifiers +``` +GET /api/v1/directives/:id/verifiers # List verifiers +POST /api/v1/directives/:id/verifiers # Add verifier +PUT /api/v1/directives/:id/verifiers/:vid # Update verifier +POST /api/v1/directives/:id/verifiers/auto-detect # Auto-detect +``` + +### Approvals +``` +GET /api/v1/directives/:id/approvals # Pending approvals +POST /api/v1/directives/:id/approvals/:aid/approve # Approve +POST /api/v1/directives/:id/approvals/:aid/deny # Deny +``` + +## Creating a Directive + +### Request +```json +POST /api/v1/directives +{ + "goal": "Implement user authentication with OAuth2", + "repositoryUrl": "https://github.com/org/repo", + "autonomyLevel": "guardrails", + "confidenceThresholdGreen": 0.8, + "confidenceThresholdYellow": 0.5, + "maxReworkCycles": 3, + "maxTotalCostUsd": 100.0, + "maxWallTimeMinutes": 480 +} +``` + +### Response +```json +{ + "id": "uuid", + "title": "Implement user authentication with OAuth2", + "goal": "Implement user authentication with OAuth2", + "status": "draft", + "autonomyLevel": "guardrails", + "createdAt": "2026-02-05T12:00:00Z" +} +``` + +## Starting a Directive + +When you start a directive: +1. System generates requirements from the goal +2. Chain planner creates a DAG of steps +3. Root steps (no dependencies) transition to `ready` +4. Contracts spawn for ready steps with supervisors +5. Verifiers auto-detect from repository + +## Evaluation Flow + +When a contract completes: + +1. Step transitions to `evaluating` +2. **Programmatic verifiers** run (tests, lint, build) + - Each produces pass/fail + output +3. **LLM evaluation** runs + - Reviews code against acceptance criteria + - Provides feedback and score +4. **Composite score** computed +5. Based on confidence level and autonomy: + - GREEN: Step passes, downstream unblocks + - YELLOW (guardrails): Request approval + - RED: Initiate rework or request approval + +## Rework Flow + +When a step needs rework: + +1. Contract phase reset to editing +2. Supervisor receives rework instructions +3. Rework count incremented +4. If max reworks exceeded: escalate or fail + +## Event Types + +Events are logged for audit and monitoring: + +- `directive_created`, `directive_started`, `directive_paused`, `directive_completed` +- `chain_generated`, `chain_regenerated` +- `step_ready`, `step_started`, `step_evaluating`, `step_passed`, `step_failed` +- `rework_initiated`, `rework_completed` +- `approval_requested`, `approval_granted`, `approval_denied` +- `verifier_run`, `evaluation_completed` +- `circuit_breaker_triggered` + +## Verifier Configuration + +Verifiers can be auto-detected or manually configured: + +```json +POST /api/v1/directives/:id/verifiers +{ + "name": "Test Runner", + "verifierType": "test_runner", + "command": "npm test", + "workingDirectory": ".", + "timeoutSeconds": 300, + "weight": 1.0, + "required": true, + "enabled": true +} +``` + +### Auto-Detection + +The system detects verifiers from: +- `package.json` - npm test, npm run lint, npm run build +- `Cargo.toml` - cargo test, cargo clippy, cargo build +- `pyproject.toml` - pytest, ruff, mypy + +## Circuit Breakers + +Directives have built-in circuit breakers: + +- `maxTotalCostUsd` - Stop if cumulative cost exceeds limit +- `maxWallTimeMinutes` - Stop if elapsed time exceeds limit +- `maxReworkCycles` - Fail step after N rework attempts +- `maxChainRegenerations` - Fail if chain regenerated too many times + +## Example Workflow + +```bash +# 1. Create a directive +makima directive create \ + --goal "Add dark mode to the application" \ + --repository https://github.com/myorg/myapp \ + --autonomy guardrails + +# Returns directive ID: 123e4567-e89b-12d3-a456-426614174000 + +# 2. Start execution +makima directive start 123e4567-e89b-12d3-a456-426614174000 + +# 3. Monitor progress +makima directive status 123e4567-e89b-12d3-a456-426614174000 + +# 4. View the execution graph +makima directive graph 123e4567-e89b-12d3-a456-426614174000 --with-status + +# 5. Watch events +makima directive events 123e4567-e89b-12d3-a456-426614174000 + +# 6. If approval needed, approve or deny +makima directive approve 123e4567-e89b-12d3-a456-426614174000 +``` diff --git a/makima/src/daemon/skills/mod.rs b/makima/src/daemon/skills/mod.rs index 3b0c0dc..dafa9ec 100644 --- a/makima/src/daemon/skills/mod.rs +++ b/makima/src/daemon/skills/mod.rs @@ -9,12 +9,16 @@ pub const SUPERVISOR_SKILL: &str = include_str!("supervisor.md"); /// Contract skill content - task-contract interaction commands pub const CONTRACT_SKILL: &str = include_str!("contract.md"); -/// Chain skill content - multi-contract orchestration commands +/// Chain skill content - multi-contract orchestration commands (legacy) pub const CHAIN_SKILL: &str = include_str!("chain.md"); +/// Directive skill content - autonomous goal-driven orchestration +pub const DIRECTIVE_SKILL: &str = include_str!("directive.md"); + /// All skills as (name, content) pairs for installation pub const ALL_SKILLS: &[(&str, &str)] = &[ ("makima-supervisor", SUPERVISOR_SKILL), ("makima-contract", CONTRACT_SKILL), ("makima-chain", CHAIN_SKILL), + ("makima-directive", DIRECTIVE_SKILL), ]; -- cgit v1.2.3