//! Chat endpoint for LLM-powered file editing. use axum::{ extract::{Path, State}, http::StatusCode, response::IntoResponse, Json, }; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; use uuid::Uuid; use crate::db::{models::BodyElement, repository::{self, RepositoryError}}; use crate::llm::{ claude::{self, ClaudeClient, ClaudeError, ClaudeModel}, execute_tool_call, groq::{GroqClient, GroqError, Message, ToolCallResponse}, LlmModel, ToolCall, ToolResult, UserQuestion, VersionToolRequest, AVAILABLE_TOOLS, }; use crate::server::state::{FileUpdateNotification, SharedState}; /// Maximum number of tool-calling rounds to prevent infinite loops const MAX_TOOL_ROUNDS: usize = 20; /// Context limits for different models (in tokens) /// Claude models have 200K context, Groq models vary const CLAUDE_CONTEXT_LIMIT: usize = 200_000; const GROQ_CONTEXT_LIMIT: usize = 32_000; /// Threshold for triggering context compaction (90% of limit) const CONTEXT_COMPACTION_THRESHOLD: f32 = 0.90; /// Approximate characters per token (rough estimate for English text) const CHARS_PER_TOKEN: usize = 4; #[derive(Debug, Clone, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ChatHistoryMessage { /// Role: "user" or "assistant" pub role: String, /// Message content pub content: String, } #[derive(Debug, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ChatRequest { /// The user's message/instruction pub message: String, /// Optional model selection: "claude-sonnet" (default), "claude-opus", or "groq" #[serde(default)] pub model: Option, /// Optional conversation history for context continuity #[serde(default)] pub history: Option>, /// Optional focused element index (for targeted editing) #[serde(default)] pub focused_element_index: Option, } #[derive(Debug, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ChatResponse { /// The LLM's response message pub response: String, /// Tool calls that were executed pub tool_calls: Vec, /// Updated file body after tool execution pub updated_body: Vec, /// Updated summary (if changed) pub updated_summary: Option, /// Questions pending user answers (pauses conversation) #[serde(skip_serializing_if = "Option::is_none")] pub pending_questions: Option>, } #[derive(Debug, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ToolCallInfo { pub name: String, pub result: ToolResult, } /// Enum to hold LLM clients enum LlmClient { Groq(GroqClient), Claude(ClaudeClient), } /// Unified result from LLM call struct LlmResult { content: Option, tool_calls: Vec, raw_tool_calls: Vec, finish_reason: String, } /// Chat with a file using LLM tool calling #[utoipa::path( post, path = "/api/v1/files/{id}/chat", request_body = ChatRequest, responses( (status = 200, description = "Chat completed successfully", body = ChatResponse), (status = 404, description = "File not found"), (status = 500, description = "Internal server error") ), params( ("id" = Uuid, Path, description = "File ID") ), tag = "chat" )] pub async fn chat_handler( State(state): State, Path(id): Path, Json(request): Json, ) -> impl IntoResponse { // Check if database is configured let Some(ref pool) = state.db_pool else { return ( StatusCode::SERVICE_UNAVAILABLE, Json(serde_json::json!({ "error": "Database not configured" })), ) .into_response(); }; // Get the file let file = match repository::get_file(pool, id).await { Ok(Some(file)) => file, Ok(None) => { return ( StatusCode::NOT_FOUND, Json(serde_json::json!({ "error": "File not found" })), ) .into_response(); } Err(e) => { tracing::error!("Database error: {}", e); return ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ "error": format!("Database error: {}", e) })), ) .into_response(); } }; // Parse model selection (default to Claude Sonnet) let model = request .model .as_ref() .and_then(|m| LlmModel::from_str(m)) .unwrap_or_default(); tracing::info!("Using LLM model: {:?}", model); // Initialize the appropriate LLM client let llm_client = match model { LlmModel::ClaudeSonnet => { match ClaudeClient::from_env(ClaudeModel::Sonnet) { Ok(client) => LlmClient::Claude(client), Err(ClaudeError::MissingApiKey) => { return ( StatusCode::SERVICE_UNAVAILABLE, Json(serde_json::json!({ "error": "ANTHROPIC_API_KEY not configured" })), ) .into_response(); } Err(e) => { return ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ "error": format!("Claude client error: {}", e) })), ) .into_response(); } } } LlmModel::ClaudeOpus => { match ClaudeClient::from_env(ClaudeModel::Opus) { Ok(client) => LlmClient::Claude(client), Err(ClaudeError::MissingApiKey) => { return ( StatusCode::SERVICE_UNAVAILABLE, Json(serde_json::json!({ "error": "ANTHROPIC_API_KEY not configured" })), ) .into_response(); } Err(e) => { return ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ "error": format!("Claude client error: {}", e) })), ) .into_response(); } } } LlmModel::GroqKimi => { match GroqClient::from_env() { Ok(client) => LlmClient::Groq(client), Err(GroqError::MissingApiKey) => { return ( StatusCode::SERVICE_UNAVAILABLE, Json(serde_json::json!({ "error": "GROQ_API_KEY not configured" })), ) .into_response(); } Err(e) => { return ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ "error": format!("Groq client error: {}", e) })), ) .into_response(); } } } }; // Build context about the file let file_context = build_file_context(&file); // Build focused element context if specified let focused_context = build_focused_element_context(&file.body, request.focused_element_index); // Build agentic system prompt let system_prompt = format!( r#"You are an intelligent document editing agent. You help users view, analyze, and modify document files. ## Your Capabilities You have access to tools for: - **Viewing content**: view_body (see all elements), read_element (inspect specific element), view_transcript (read full transcript) - **Adding content**: add_heading, add_paragraph, add_code, add_list, add_chart - **Modifying content**: update_element, remove_element, reorder_elements, clear_body - **Document metadata**: set_summary - **Data processing**: parse_csv (convert CSV to JSON), jq (transform JSON data) - **Version history**: list_versions, read_version, restore_version - **Templates**: suggest_templates (get phase-appropriate templates), apply_template (apply a template structure) ## Agentic Behavior Guidelines ### 1. Analyze Before Acting - For complex requests, first gather information using view_body, view_transcript, or read_element - Understand the current state of the document before making changes - For simple, direct requests (e.g., "add a heading called X"), you can act immediately without prior inspection ### 2. Plan Multi-Step Operations - Break complex tasks into logical steps - For data visualization: parse_csv → (optionally jq to transform) → add_chart - For restructuring: view_body → understand structure → make targeted changes ### 3. Handle Errors Gracefully - If a tool call fails, analyze the error message - Try an alternative approach or different parameters - Don't repeat the exact same failing call ### 4. Know When to Stop - Stop when you've completed the user's request - Stop when you've provided the requested information - Provide a clear summary of what you did in your final response ### 5. Be Efficient - Don't over-analyze simple requests - Use the minimum number of tool calls needed - Combine operations when possible ## Current Document Context {file_context} {focused_context} ## Important Notes - Body element indices are 0-based - When updating elements, provide ALL required fields for that element type - The transcript is read-only (you cannot modify it, only read it) - Changes are saved automatically after tool execution"#, file_context = file_context, focused_context = focused_context ); // Build initial messages (Groq/OpenAI format - will be converted for Claude) let mut messages = vec![ Message { role: "system".to_string(), content: Some(system_prompt), tool_calls: None, tool_call_id: None, }, ]; // Add conversation history if provided (for context continuity) if let Some(history) = &request.history { for hist_msg in history { messages.push(Message { role: hist_msg.role.clone(), content: Some(hist_msg.content.clone()), tool_calls: None, tool_call_id: None, }); } tracing::info!( history_messages = history.len(), "Loaded conversation history" ); } // Add current user message messages.push(Message { role: "user".to_string(), content: Some(request.message.clone()), tool_calls: None, tool_call_id: None, }); // State for tracking changes let mut current_body = file.body.clone(); let mut current_summary = file.summary.clone(); let mut all_tool_call_infos: Vec = Vec::new(); let mut final_response: Option = None; // Track if a version restore already happened (to avoid double-saving) let mut version_restored = false; // Track if there were modifications after a restore let mut has_changes_after_restore = false; // Track consecutive failures for agentic retry logic let mut consecutive_failures = 0; const MAX_CONSECUTIVE_FAILURES: usize = 3; // Track pending user questions (pauses the conversation) let mut pending_questions: Option> = None; // Multi-turn agentic tool calling loop for round in 0..MAX_TOOL_ROUNDS { tracing::info!( round = round, body_elements = current_body.len(), total_tool_calls = all_tool_call_infos.len(), "Agentic loop iteration" ); // Check if we've hit too many consecutive failures if consecutive_failures >= MAX_CONSECUTIVE_FAILURES { tracing::warn!("Breaking loop due to {} consecutive failures", consecutive_failures); final_response = Some(format!( "I encountered multiple consecutive errors and stopped to avoid an infinite loop. \ Please try rephrasing your request or check if the document state is as expected." )); break; } // Check context usage and compact if nearing limit if is_context_near_limit(&messages, &model) { let estimated_tokens = estimate_total_tokens(&messages); tracing::warn!( estimated_tokens = estimated_tokens, round = round, "Context nearing limit, compacting conversation" ); compact_conversation(&mut messages, &all_tool_call_infos); // Log the new token count let new_tokens = estimate_total_tokens(&messages); tracing::info!( tokens_before = estimated_tokens, tokens_after = new_tokens, tokens_saved = estimated_tokens - new_tokens, "Conversation compacted" ); } // Call the appropriate LLM API let result = match &llm_client { LlmClient::Groq(groq) => { match groq.chat_with_tools(messages.clone(), &AVAILABLE_TOOLS).await { Ok(r) => LlmResult { content: r.content, tool_calls: r.tool_calls, raw_tool_calls: r.raw_tool_calls, finish_reason: r.finish_reason, }, Err(e) => { tracing::error!("Groq API error: {}", e); return ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ "error": format!("LLM API error: {}", e) })), ) .into_response(); } } } LlmClient::Claude(claude_client) => { // Convert messages to Claude format let claude_messages = claude::groq_messages_to_claude(&messages); match claude_client.chat_with_tools(claude_messages, &AVAILABLE_TOOLS).await { Ok(r) => { // Convert Claude tool uses to Groq-style ToolCallResponse for consistency let raw_tool_calls: Vec = r .tool_calls .iter() .map(|tc| ToolCallResponse { id: tc.id.clone(), call_type: "function".to_string(), function: crate::llm::groq::FunctionCall { name: tc.name.clone(), arguments: tc.arguments.to_string(), }, }) .collect(); LlmResult { content: r.content, tool_calls: r.tool_calls, raw_tool_calls, finish_reason: r.stop_reason, } } Err(e) => { tracing::error!("Claude API error: {}", e); return ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ "error": format!("LLM API error: {}", e) })), ) .into_response(); } } } }; // Check if there are tool calls to execute if result.tool_calls.is_empty() { // No more tool calls - capture the final response and exit loop final_response = result.content; break; } // Add assistant message with tool calls to conversation messages.push(Message { role: "assistant".to_string(), content: result.content.clone(), tool_calls: Some(result.raw_tool_calls.clone()), tool_call_id: None, }); // Execute each tool call and add results to conversation for (i, tool_call) in result.tool_calls.iter().enumerate() { tracing::info!( tool = %tool_call.name, round = round, "Executing tool call" ); let mut execution_result = execute_tool_call(tool_call, ¤t_body, current_summary.as_deref(), &file.transcript); // Handle version tool requests that need async database access if let Some(version_request) = &execution_result.version_request { let version_result = handle_version_request( pool, id, version_request, ¤t_body, current_summary.as_deref(), file.version, ) .await; // Update execution result with actual version operation result execution_result.result = version_result.result; execution_result.parsed_data = version_result.data; // Apply state changes from restore operation if let Some(new_body) = version_result.new_body { current_body = new_body; // Mark that a restore happened - file was already saved version_restored = true; } if let Some(new_summary) = version_result.new_summary { current_summary = Some(new_summary); } } // Apply state changes from regular tools if let Some(new_body) = execution_result.new_body { current_body = new_body; // If this is a regular tool (not a version operation), track it if execution_result.version_request.is_none() && version_restored { has_changes_after_restore = true; } } if let Some(new_summary) = execution_result.new_summary { current_summary = Some(new_summary); if execution_result.version_request.is_none() && version_restored { has_changes_after_restore = true; } } // Track consecutive failures for agentic behavior if execution_result.result.success { consecutive_failures = 0; } else { consecutive_failures += 1; tracing::warn!( tool = %tool_call.name, consecutive_failures = consecutive_failures, "Tool call failed" ); } // Check for pending user questions (pauses the conversation) if let Some(questions) = execution_result.pending_questions { tracing::info!( question_count = questions.len(), "LLM requesting user input, pausing conversation" ); pending_questions = Some(questions); // Track this tool call before breaking all_tool_call_infos.push(ToolCallInfo { name: tool_call.name.clone(), result: execution_result.result, }); break; // Exit inner loop } // Build tool result message content with enhanced context for agentic reasoning let result_content = if let Some(parsed_data) = &execution_result.parsed_data { // Include parsed data in the result for the LLM to use serde_json::json!({ "success": execution_result.result.success, "message": execution_result.result.message, "data": parsed_data }) .to_string() } else if !execution_result.result.success { // On failure, include hints for the LLM let hint = if consecutive_failures >= MAX_CONSECUTIVE_FAILURES { " [HINT: Multiple consecutive failures detected. Consider a different approach or verify your parameters.]" } else { "" }; serde_json::json!({ "success": false, "message": format!("{}{}", execution_result.result.message, hint), "currentBodyElementCount": current_body.len() }) .to_string() } else { serde_json::json!({ "success": execution_result.result.success, "message": execution_result.result.message }) .to_string() }; // Add tool result message // Use the appropriate ID format for each provider let tool_call_id = match &llm_client { LlmClient::Groq(_) => result.raw_tool_calls[i].id.clone(), LlmClient::Claude(_) => tool_call.id.clone(), }; messages.push(Message { role: "tool".to_string(), content: Some(result_content), tool_calls: None, tool_call_id: Some(tool_call_id), }); // Track for response all_tool_call_infos.push(ToolCallInfo { name: tool_call.name.clone(), result: execution_result.result, }); } // If user questions are pending, pause the conversation if pending_questions.is_some() { final_response = result.content; break; } // If finish reason indicates completion, exit loop let finish_lower = result.finish_reason.to_lowercase(); if finish_lower == "stop" || finish_lower == "end_turn" { final_response = result.content; break; } } // Save changes to database if any tools were executed // Skip if a version restore already happened (file was already saved during restore) // UNLESS there were additional modifications after the restore if !all_tool_call_infos.is_empty() && (!version_restored || has_changes_after_restore) { let update_req = crate::db::models::UpdateFileRequest { name: None, description: None, transcript: None, summary: current_summary.clone(), body: Some(current_body.clone()), version: None, // Internal update, skip version check repo_file_path: None, }; match repository::update_file(pool, id, update_req).await { Ok(Some(updated_file)) => { // Broadcast update notification for LLM changes let mut updated_fields = vec!["body".to_string()]; if current_summary.is_some() { updated_fields.push("summary".to_string()); } state.broadcast_file_update(FileUpdateNotification { file_id: id, version: updated_file.version, updated_fields, updated_by: "llm".to_string(), }); } Ok(None) => { // File was deleted during processing return ( StatusCode::NOT_FOUND, Json(serde_json::json!({ "error": "File not found" })), ) .into_response(); } Err(e) => { tracing::error!("Failed to save file changes: {}", e); return ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ "error": format!("Failed to save changes: {}", e) })), ) .into_response(); } } } // Build response let response_text = final_response.unwrap_or_else(|| { if all_tool_call_infos.is_empty() { "I couldn't understand your request. Please try rephrasing.".to_string() } else { format!( "Done! Executed {} tool{}.", all_tool_call_infos.len(), if all_tool_call_infos.len() == 1 { "" } else { "s" } ) } }); ( StatusCode::OK, Json(ChatResponse { response: response_text, tool_calls: all_tool_call_infos, updated_body: current_body, updated_summary: current_summary, pending_questions, }), ) .into_response() } fn build_file_context(file: &crate::db::models::File) -> String { let mut context = format!("File: {}\n", file.name); if let Some(ref desc) = file.description { context.push_str(&format!("Description: {}\n", desc)); } if let Some(ref summary) = file.summary { context.push_str(&format!("Summary: {}\n", summary)); } // Include contract phase context if file belongs to a contract if let Some(ref phase) = file.contract_phase { context.push_str(&format!("\n## Contract Context\n")); context.push_str(&format!("This file belongs to a contract in the '{}' phase.\n", phase)); context.push_str("You can use 'suggest_templates' to get phase-appropriate templates, "); context.push_str("or 'apply_template' to apply a template structure.\n"); context.push_str(&format!( "Templates for '{}' phase include: {}\n", phase, match phase.as_str() { "research" => "research-notes, competitor-analysis, user-research", "specify" => "requirements, user-stories, acceptance-criteria", "plan" => "architecture, technical-design, task-breakdown", "execute" => "dev-notes, test-plan, implementation-log", "review" => "review-checklist, release-notes, retrospective", _ => "(use suggest_templates to see available)", } )); } context.push_str(&format!("\nTranscript entries: {}\n", file.transcript.len())); context.push_str(&format!("Body elements: {}\n", file.body.len())); // Add body overview if !file.body.is_empty() { context.push_str("\nCurrent body elements:\n"); for (i, element) in file.body.iter().enumerate() { let desc = match element { BodyElement::Heading { level, text } => format!("H{}: {}", level, text), BodyElement::Paragraph { text } => { let preview: String = text.chars().take(50).collect(); if text.chars().count() > 50 { format!("Paragraph: {}...", preview) } else { format!("Paragraph: {}", preview) } } BodyElement::Code { language, content } => { let lang = language.as_deref().unwrap_or("plain"); let preview: String = content.chars().take(50).collect(); if content.chars().count() > 50 { format!("Code ({}): {}...", lang, preview) } else { format!("Code ({}): {}", lang, preview) } } BodyElement::List { ordered, items } => { let list_type = if *ordered { "ordered" } else { "unordered" }; format!("List ({}): {} items", list_type, items.len()) } BodyElement::Chart { chart_type, title, .. } => { format!( "Chart ({:?}){}", chart_type, title.as_ref().map(|t| format!(": {}", t)).unwrap_or_default() ) } BodyElement::Image { alt, .. } => { format!("Image{}", alt.as_ref().map(|a| format!(": {}", a)).unwrap_or_default()) } BodyElement::Markdown { content } => { let preview: String = content.chars().take(50).collect(); if content.chars().count() > 50 { format!("Markdown: {}...", preview) } else { format!("Markdown: {}", preview) } } }; context.push_str(&format!(" [{}] {}\n", i, desc)); } } // Add transcript preview if available if !file.transcript.is_empty() { context.push_str("\nTranscript preview (first 5 entries):\n"); for entry in file.transcript.iter().take(5) { context.push_str(&format!(" - {}: {}\n", entry.speaker, entry.text)); } if file.transcript.len() > 5 { context.push_str(&format!(" ... and {} more entries\n", file.transcript.len() - 5)); } } context } /// Build context for a focused element fn build_focused_element_context(body: &[BodyElement], focused_index: Option) -> String { let Some(index) = focused_index else { return String::new(); }; let Some(element) = body.get(index) else { return format!( "\n## Focused Element\nNote: User focused on element [{}] but it doesn't exist (document has {} elements).\n", index, body.len() ); }; let (element_type, full_content) = match element { BodyElement::Heading { level, text } => { (format!("Heading (level {})", level), text.clone()) } BodyElement::Paragraph { text } => { ("Paragraph".to_string(), text.clone()) } BodyElement::Code { language, content } => { let lang = language.as_deref().unwrap_or("plain"); (format!("Code ({})", lang), content.clone()) } BodyElement::List { ordered, items } => { let list_type = if *ordered { "Ordered list" } else { "Unordered list" }; let content = items.iter() .enumerate() .map(|(i, item)| format!("{}. {}", i + 1, item)) .collect::>() .join("\n"); (list_type.to_string(), content) } BodyElement::Chart { chart_type, title, .. } => { let title_str = title.as_deref().unwrap_or("untitled"); (format!("Chart ({:?})", chart_type), title_str.to_string()) } BodyElement::Image { alt, caption, .. } => { let desc = alt.as_deref().or(caption.as_deref()).unwrap_or("no description"); ("Image".to_string(), desc.to_string()) } BodyElement::Markdown { content } => { ("Markdown".to_string(), content.clone()) } }; format!( r#" ## Focused Element The user is focusing on element [{}]: {} Full content of focused element: --- {} --- When the user's request is ambiguous about which element to modify, prioritize this focused element. "#, index, element_type, full_content ) } /// Result of handling a version tool request struct VersionRequestResult { result: ToolResult, data: Option, new_body: Option>, new_summary: Option, } /// Handle version tool requests that require async database access async fn handle_version_request( pool: &sqlx::PgPool, file_id: Uuid, request: &VersionToolRequest, _current_body: &[BodyElement], _current_summary: Option<&str>, current_version: i32, ) -> VersionRequestResult { match request { VersionToolRequest::ListVersions => { match repository::list_file_versions(pool, file_id).await { Ok(versions) => { let version_data: Vec = versions .iter() .map(|v| { serde_json::json!({ "version": v.version, "source": v.source, "createdAt": v.created_at.to_rfc3339(), "changeDescription": v.change_description, }) }) .collect(); VersionRequestResult { result: ToolResult { success: true, message: format!("Found {} versions. Current version is {}.", versions.len(), current_version), }, data: Some(serde_json::json!({ "currentVersion": current_version, "versions": version_data, })), new_body: None, new_summary: None, } } Err(e) => VersionRequestResult { result: ToolResult { success: false, message: format!("Failed to list versions: {}", e), }, data: None, new_body: None, new_summary: None, }, } } VersionToolRequest::ReadVersion { version } => { match repository::get_file_version(pool, file_id, *version).await { Ok(Some(ver)) => { // Convert body elements to a readable format let body_preview: Vec = ver .body .iter() .enumerate() .map(|(i, element)| { let desc = match element { BodyElement::Heading { level, text } => format!("H{}: {}", level, text), BodyElement::Paragraph { text } => { let preview: String = text.chars().take(100).collect(); if text.chars().count() > 100 { format!("Paragraph: {}...", preview) } else { format!("Paragraph: {}", preview) } } BodyElement::Code { language, content } => { let lang = language.as_deref().unwrap_or("plain"); let preview: String = content.chars().take(100).collect(); if content.chars().count() > 100 { format!("Code ({}): {}...", lang, preview) } else { format!("Code ({}): {}", lang, preview) } } BodyElement::List { ordered, items } => { let list_type = if *ordered { "ordered" } else { "unordered" }; format!("List ({}): {} items", list_type, items.len()) } BodyElement::Chart { chart_type, title, .. } => { format!( "Chart ({:?}){}", chart_type, title.as_ref().map(|t| format!(": {}", t)).unwrap_or_default() ) } BodyElement::Image { alt, .. } => { format!("Image{}", alt.as_ref().map(|a| format!(": {}", a)).unwrap_or_default()) } BodyElement::Markdown { content } => { let preview: String = content.chars().take(100).collect(); if content.chars().count() > 100 { format!("Markdown: {}...", preview) } else { format!("Markdown: {}", preview) } } }; format!("[{}] {}", i, desc) }) .collect(); VersionRequestResult { result: ToolResult { success: true, message: format!( "Version {} from {} (source: {}). {} body elements.", ver.version, ver.created_at.format("%Y-%m-%d %H:%M"), ver.source, ver.body.len() ), }, data: Some(serde_json::json!({ "version": ver.version, "source": ver.source, "createdAt": ver.created_at.to_rfc3339(), "summary": ver.summary, "bodyPreview": body_preview, "changeDescription": ver.change_description, })), new_body: None, new_summary: None, } } Ok(None) => VersionRequestResult { result: ToolResult { success: false, message: format!("Version {} not found", version), }, data: None, new_body: None, new_summary: None, }, Err(e) => VersionRequestResult { result: ToolResult { success: false, message: format!("Failed to read version: {}", e), }, data: None, new_body: None, new_summary: None, }, } } VersionToolRequest::RestoreVersion { target_version, reason } => { // Set change description if provided if let Some(reason) = reason { let _ = repository::set_change_description(pool, reason).await; } match repository::restore_file_version(pool, file_id, *target_version, current_version).await { Ok(Some(restored_file)) => { VersionRequestResult { result: ToolResult { success: true, message: format!( "Restored to version {}. New version is {}.", target_version, restored_file.version ), }, data: Some(serde_json::json!({ "previousVersion": current_version, "restoredFromVersion": target_version, "newVersion": restored_file.version, })), new_body: Some(restored_file.body), new_summary: restored_file.summary, } } Ok(None) => VersionRequestResult { result: ToolResult { success: false, message: format!("Version {} not found", target_version), }, data: None, new_body: None, new_summary: None, }, Err(RepositoryError::VersionConflict { expected, actual }) => { VersionRequestResult { result: ToolResult { success: false, message: format!( "Version conflict: expected {}, actual {}. Document was modified.", expected, actual ), }, data: None, new_body: None, new_summary: None, } } Err(e) => VersionRequestResult { result: ToolResult { success: false, message: format!("Failed to restore version: {}", e), }, data: None, new_body: None, new_summary: None, }, } } } } /// Estimate the token count of a message fn estimate_message_tokens(message: &Message) -> usize { let mut chars = 0; // Count content characters if let Some(ref content) = message.content { chars += content.len(); } // Count tool call characters (rough estimate) if let Some(ref tool_calls) = message.tool_calls { for tc in tool_calls { chars += tc.function.name.len(); chars += tc.function.arguments.len(); } } // Count tool call ID if let Some(ref id) = message.tool_call_id { chars += id.len(); } // Add overhead for role and structure chars += message.role.len() + 20; // Convert to tokens chars / CHARS_PER_TOKEN } /// Estimate total token count of all messages fn estimate_total_tokens(messages: &[Message]) -> usize { messages.iter().map(estimate_message_tokens).sum() } /// Check if context is nearing the limit fn is_context_near_limit(messages: &[Message], model: &LlmModel) -> bool { let estimated_tokens = estimate_total_tokens(messages); let limit = match model { LlmModel::ClaudeSonnet | LlmModel::ClaudeOpus => CLAUDE_CONTEXT_LIMIT, LlmModel::GroqKimi => GROQ_CONTEXT_LIMIT, }; let threshold = (limit as f32 * CONTEXT_COMPACTION_THRESHOLD) as usize; estimated_tokens >= threshold } /// Compact the conversation by summarizing older messages /// Keeps: system message, last N user/assistant exchanges, and a summary of older content fn compact_conversation(messages: &mut Vec, tool_call_history: &[ToolCallInfo]) { // Keep at least system message + 4 recent messages (2 exchanges) const MIN_MESSAGES_TO_KEEP: usize = 5; if messages.len() <= MIN_MESSAGES_TO_KEEP { return; } // Extract system message (always first) let system_message = messages.remove(0); // Calculate how many messages to summarize // Keep the last ~1/3 of messages for recent context let messages_to_keep = std::cmp::max(4, messages.len() / 3); let messages_to_summarize = messages.len() - messages_to_keep; if messages_to_summarize < 2 { // Not enough to summarize, just put system message back messages.insert(0, system_message); return; } // Extract messages to summarize let old_messages: Vec = messages.drain(..messages_to_summarize).collect(); // Build summary of old messages let mut summary_parts: Vec = Vec::new(); // Summarize user requests let user_requests: Vec<&str> = old_messages .iter() .filter(|m| m.role == "user") .filter_map(|m| m.content.as_deref()) .collect(); if !user_requests.is_empty() { summary_parts.push(format!( "Previous user requests: {}", user_requests.join("; ") )); } // Summarize tool calls executed so far if !tool_call_history.is_empty() { let tool_summary: Vec = tool_call_history .iter() .map(|tc| { if tc.result.success { format!("{}(ok)", tc.name) } else { format!("{}(failed: {})", tc.name, tc.result.message) } }) .collect(); summary_parts.push(format!( "Tools executed: {}", tool_summary.join(", ") )); } // Count assistant responses that were summarized let assistant_responses = old_messages .iter() .filter(|m| m.role == "assistant" && m.content.is_some()) .count(); if assistant_responses > 0 { summary_parts.push(format!( "({} previous assistant responses omitted for brevity)", assistant_responses )); } // Create compacted context message let compacted_content = format!( "[CONTEXT SUMMARY - Earlier conversation compacted to save tokens]\n{}", summary_parts.join("\n") ); // Rebuild messages: system + summary + remaining recent messages let mut new_messages = vec![ system_message, Message { role: "user".to_string(), content: Some(compacted_content), tool_calls: None, tool_call_id: None, }, Message { role: "assistant".to_string(), content: Some("Understood. I have context from the previous conversation and will continue from here.".to_string()), tool_calls: None, tool_call_id: None, }, ]; new_messages.append(messages); *messages = new_messages; tracing::info!( summarized_messages = messages_to_summarize, remaining_messages = messages.len(), "Compacted conversation to save context" ); }