From 96ad3af6051af69e2e8b34b35e8b40926bdd13a1 Mon Sep 17 00:00:00 2001 From: soryu Date: Sun, 1 Feb 2026 00:42:53 +0000 Subject: feat: Implement Phase 3 Tasks 3.3 and 3.4 - Supervisor State Persistence and Restoration Task 3.3: Supervisor State Persistence - Add migration 20260201000001_enhanced_supervisor_state.sql with new columns: - state (supervisor state enum) - current_activity (description) - progress (0-100) - error_message (for failed states) - spawned_task_ids (tasks created by supervisor) - pending_questions (questions awaiting user response) - restoration_count, last_restored_at, restoration_source (restoration tracking) - Update SupervisorState model with new fields - Add PendingQuestion struct for tracking unanswered questions - Add SupervisorRestorationContext for returning restoration info - Add StateValidationResult and StateRecoveryAction for state validation State persistence functions in repository.rs: - update_supervisor_detailed_state() - Update state, activity, progress - add_supervisor_spawned_task() - Track spawned tasks - add_supervisor_pending_question() - Track pending questions - remove_supervisor_pending_question() - Clear answered questions - save_supervisor_state_full() - Full state save (UPSERT) - mark_supervisor_restored() - Increment restoration count - get_supervisors_with_pending_questions() - Find supervisors with pending questions - get_supervisor_state_for_restoration() - Load state for restoration - validate_spawned_tasks() - Validate task consistency - update_supervisor_phase() - Update on phase change - update_supervisor_heartbeat_state() - Lightweight heartbeat update State save points: - On task spawn (save_state_on_task_spawn) - On question asked (save_state_on_question_asked) - On question answered (clear_pending_question) - On phase change (save_state_on_phase_change) - On heartbeat (update_supervisor_heartbeat_state) Task 3.4: Supervisor Restoration Protocol - Add restoration detection when supervisor starts with existing state - Implement validate_supervisor_state() for state consistency checks - Implement restore_supervisor() with validation and context generation - Add redeliver_pending_questions() for re-delivering questions after crash - Add generate_restoration_context_message() for Claude context injection - Update resume_supervisor endpoint to return RestorationInfo - Re-deliver pending questions when supervisor resumes Restoration flow: 1. Daemon restarts or task reassigned 2. Load supervisor state from supervisor_states 3. If NOT FOUND: Start fresh, log warning 4. If FOUND: Validate state consistency 5. If INVALID: Start from last checkpoint 6. If VALID: Restore conversation history 7. Check for pending questions - re-deliver to user 8. Check for waiting tasks - resume waiting state 9. Send restoration context to Claude 10. Resume execution from last state Co-Authored-By: Claude Opus 4.5 --- .../20260201000001_enhanced_supervisor_state.sql | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 makima/migrations/20260201000001_enhanced_supervisor_state.sql (limited to 'makima/migrations') diff --git a/makima/migrations/20260201000001_enhanced_supervisor_state.sql b/makima/migrations/20260201000001_enhanced_supervisor_state.sql new file mode 100644 index 0000000..5411b73 --- /dev/null +++ b/makima/migrations/20260201000001_enhanced_supervisor_state.sql @@ -0,0 +1,56 @@ +-- Enhanced supervisor state persistence for restoration after crashes. +-- Adds additional fields to supervisor_states to track detailed state for recovery. + +-- Add state tracking field (matches SupervisorStateEnum: initializing, idle, working, +-- waiting_for_user, waiting_for_tasks, blocked, completed, failed, interrupted) +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS state VARCHAR(50) NOT NULL DEFAULT 'initializing'; + +-- Add current activity description for monitoring +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS current_activity TEXT; + +-- Add progress percentage (0-100) +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS progress INTEGER DEFAULT 0 + CHECK (progress >= 0 AND progress <= 100); + +-- Add error message for failed states +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS error_message TEXT; + +-- Add spawned task IDs (tasks this supervisor has created) +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS spawned_task_ids UUID[] DEFAULT ARRAY[]::UUID[]; + +-- Add pending questions (questions waiting for user response) +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS pending_questions JSONB DEFAULT '[]'; + +-- Add restoration metadata +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS restoration_count INTEGER DEFAULT 0; + +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS last_restored_at TIMESTAMPTZ; + +ALTER TABLE supervisor_states + ADD COLUMN IF NOT EXISTS restoration_source VARCHAR(50); + +-- Index for finding supervisors by state (useful for finding blocked/failed supervisors) +CREATE INDEX IF NOT EXISTS idx_supervisor_states_state ON supervisor_states(state); + +-- Index for finding supervisors with pending questions +CREATE INDEX IF NOT EXISTS idx_supervisor_states_pending_questions + ON supervisor_states USING gin(pending_questions) + WHERE pending_questions != '[]'::jsonb; + +COMMENT ON COLUMN supervisor_states.state IS 'Current supervisor state: initializing, idle, working, waiting_for_user, waiting_for_tasks, blocked, completed, failed, interrupted'; +COMMENT ON COLUMN supervisor_states.current_activity IS 'Human-readable description of current activity'; +COMMENT ON COLUMN supervisor_states.progress IS 'Progress percentage (0-100)'; +COMMENT ON COLUMN supervisor_states.error_message IS 'Error message when state is failed or blocked'; +COMMENT ON COLUMN supervisor_states.spawned_task_ids IS 'Array of task UUIDs spawned by this supervisor'; +COMMENT ON COLUMN supervisor_states.pending_questions IS 'Array of questions awaiting user response: [{id, question, choices, context, asked_at}]'; +COMMENT ON COLUMN supervisor_states.restoration_count IS 'Number of times this supervisor has been restored after interruption'; +COMMENT ON COLUMN supervisor_states.last_restored_at IS 'Timestamp of last restoration'; +COMMENT ON COLUMN supervisor_states.restoration_source IS 'Source of last restoration: daemon_restart, task_reassignment, manual'; -- cgit v1.2.3