summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--makima/src/db/models.rs2
-rw-r--r--makima/src/server/handlers/mesh_daemon.rs51
-rw-r--r--makima/src/server/handlers/mesh_supervisor.rs82
3 files changed, 111 insertions, 24 deletions
diff --git a/makima/src/db/models.rs b/makima/src/db/models.rs
index a7d2cda..ca6409c 100644
--- a/makima/src/db/models.rs
+++ b/makima/src/db/models.rs
@@ -2155,7 +2155,7 @@ pub enum SupervisorSavePoint {
/// Supervisor restoration context
/// Contains all information needed to restore a supervisor after crash
-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
+#[derive(Debug, Clone, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct SupervisorRestorationContext {
/// The restored supervisor state
diff --git a/makima/src/server/handlers/mesh_daemon.rs b/makima/src/server/handlers/mesh_daemon.rs
index 4c6a045..b7398a8 100644
--- a/makima/src/server/handlers/mesh_daemon.rs
+++ b/makima/src/server/handlers/mesh_daemon.rs
@@ -536,6 +536,57 @@ pub enum DaemonMessage {
#[serde(rename = "baseSha")]
base_sha: String,
},
+ /// Supervisor state update for crash recovery.
+ /// Sent periodically or at key save points to persist state.
+ SupervisorStateUpdate {
+ /// Task ID of the supervisor.
+ #[serde(rename = "taskId")]
+ task_id: Uuid,
+ /// Contract ID.
+ #[serde(rename = "contractId")]
+ contract_id: Uuid,
+ /// Save point type that triggered this update.
+ #[serde(rename = "savePoint")]
+ save_point: String,
+ /// Current supervisor activity state.
+ state: Option<String>,
+ /// Human-readable current activity.
+ #[serde(rename = "currentActivity")]
+ current_activity: Option<String>,
+ /// Progress percentage (0-100).
+ progress: Option<i32>,
+ /// Last LLM response for context restoration.
+ #[serde(rename = "lastLlmResponse")]
+ last_llm_response: Option<String>,
+ /// Task that was just spawned (if save_point is "task_spawn").
+ #[serde(rename = "spawnedTaskId")]
+ spawned_task_id: Option<Uuid>,
+ /// Question ID (if save_point is "question_asked").
+ #[serde(rename = "questionId")]
+ question_id: Option<Uuid>,
+ /// Error message (if state is "error").
+ #[serde(rename = "errorMessage")]
+ error_message: Option<String>,
+ /// Updated conversation history (sent on llm_response save points).
+ #[serde(rename = "conversationHistory")]
+ conversation_history: Option<serde_json::Value>,
+ },
+ /// Supervisor heartbeat for lightweight state updates.
+ SupervisorHeartbeat {
+ /// Task ID of the supervisor.
+ #[serde(rename = "taskId")]
+ task_id: Uuid,
+ /// Contract ID.
+ #[serde(rename = "contractId")]
+ contract_id: Uuid,
+ /// Current state (optional).
+ state: Option<String>,
+ /// Current activity description (optional).
+ #[serde(rename = "currentActivity")]
+ current_activity: Option<String>,
+ /// Progress percentage (optional).
+ progress: Option<i32>,
+ },
}
/// Validated daemon authentication result.
diff --git a/makima/src/server/handlers/mesh_supervisor.rs b/makima/src/server/handlers/mesh_supervisor.rs
index b33c1c9..0e4b18b 100644
--- a/makima/src/server/handlers/mesh_supervisor.rs
+++ b/makima/src/server/handlers/mesh_supervisor.rs
@@ -2988,9 +2988,7 @@ pub async fn get_restoration_context(
}),
).into_response()
}
- repository::StateValidationResult::Valid(supervisor_state) |
- repository::StateValidationResult::PartiallyValid { state: supervisor_state, .. } |
- repository::StateValidationResult::PhaseStale { state: supervisor_state, .. } => {
+ repository::StateValidationResult::Valid(supervisor_state) => {
// Get full restoration context
let (pending_tasks, pending_question) = get_restoration_details(
pool,
@@ -2999,35 +2997,73 @@ pub async fn get_restoration_context(
auth_info.owner_id,
).await;
- let restoration_type = match validation {
- repository::StateValidationResult::Valid(_) => "full_restore",
- repository::StateValidationResult::PartiallyValid { .. } => "partial_restore",
- repository::StateValidationResult::PhaseStale { .. } => "checkpoint_restore",
- _ => "unknown",
- };
+ let restoration_count = supervisor_state.restoration_count.unwrap_or(0);
+ let last_llm_response = supervisor_state.last_llm_response.clone();
+ let message = format!("Supervisor state found. Last activity: {}. Restoring from {} phase.",
+ supervisor_state.last_activity.format("%Y-%m-%d %H:%M:%S UTC"),
+ supervisor_state.phase);
- let message = match validation {
- repository::StateValidationResult::Valid(_) =>
- format!("Supervisor state found. Last activity: {}. Restoring from {} phase.",
- supervisor_state.last_activity.format("%Y-%m-%d %H:%M:%S UTC"),
- supervisor_state.phase),
- repository::StateValidationResult::PartiallyValid { invalid_task_ids, .. } =>
- format!("Partial state found. {} task(s) no longer exist. Restoring with available context.",
- invalid_task_ids.len()),
- repository::StateValidationResult::PhaseStale { current_phase, .. } =>
- format!("State found but phase changed from {} to {}. Restoring with updated phase.",
- supervisor_state.phase, current_phase),
- _ => "Unknown restoration type".to_string(),
- };
+ (
+ StatusCode::OK,
+ Json(RestorationContextResponse {
+ found: true,
+ restoration_type: "full_restore".to_string(),
+ message,
+ state: Some(supervisor_state),
+ pending_tasks,
+ pending_question,
+ last_llm_response,
+ restoration_count,
+ }),
+ ).into_response()
+ }
+ repository::StateValidationResult::PartiallyValid { state: supervisor_state, invalid_task_ids } => {
+ // Get full restoration context
+ let (pending_tasks, pending_question) = get_restoration_details(
+ pool,
+ &state,
+ &supervisor_state,
+ auth_info.owner_id,
+ ).await;
+
+ let restoration_count = supervisor_state.restoration_count.unwrap_or(0);
+ let last_llm_response = supervisor_state.last_llm_response.clone();
+ let message = format!("Partial state found. {} task(s) no longer exist. Restoring with available context.",
+ invalid_task_ids.len());
+
+ (
+ StatusCode::OK,
+ Json(RestorationContextResponse {
+ found: true,
+ restoration_type: "partial_restore".to_string(),
+ message,
+ state: Some(supervisor_state),
+ pending_tasks,
+ pending_question,
+ last_llm_response,
+ restoration_count,
+ }),
+ ).into_response()
+ }
+ repository::StateValidationResult::PhaseStale { state: supervisor_state, current_phase } => {
+ // Get full restoration context
+ let (pending_tasks, pending_question) = get_restoration_details(
+ pool,
+ &state,
+ &supervisor_state,
+ auth_info.owner_id,
+ ).await;
let restoration_count = supervisor_state.restoration_count.unwrap_or(0);
let last_llm_response = supervisor_state.last_llm_response.clone();
+ let message = format!("State found but phase changed from {} to {}. Restoring with updated phase.",
+ supervisor_state.phase, current_phase);
(
StatusCode::OK,
Json(RestorationContextResponse {
found: true,
- restoration_type: restoration_type.to_string(),
+ restoration_type: "checkpoint_restore".to_string(),
message,
state: Some(supervisor_state),
pending_tasks,