summaryrefslogblamecommitdiff
path: root/makima/src/server/handlers/chat.rs
blob: 9d8cd19f8bc914cb68bc316363584a1f5e828782 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12











                                               
                                                                          
                 
                                                           
                      
                                                             
                                                                                      
  
                                                                
 
                                                                   











                                                                      
 








                                              




                                       


                                                                                     


                                                            


                                                             












                                              


                                                            








                                     











                                          
























































                                                                                         
































                                                                          
         












































                                                                          





                                                 


                                                                                                   






                                                                                                                                    
                                                                               



                                                                                     
                                                                                                                 





























                                                                                                                
                 




                                                                           

                                         

      

                                                                                 

                                       
                                         


                               

      























                                                                    
                                 

                                                   

                                                                



                                                                           


                                                         

                                                                
 
                                           
                                     



































                                                                                                   
 


































































                                                                                                  

         





                                                                
           


                                                                    





                                       
                                      
                                                                                                          
 



























                                                                               

                                                               



                                                                                   


                                                                     


                                                                                   

             











                                                                














                                                                         
                                                                                            







                                                                                           












                                                                                                                               




























                                                                          





                                                                





                                                                 


                                                          


                                                                                            





                                                               
                                                                 
                                 

          

































                                                                         



                     

                                                          



                                                                                    

                                                                     







                                    
                                            

                                             
                              















                                                                 




















                                                                                                  








                                                                                        


                                                                          
                            














                                                                                   










                                                                                                    







                                                                             

















                                                                                                 
 









































                                                                                                                          


                                                     















                                                                                                    




































































                                                                                                                           


                                                                                           
                                            














                                                                                                   










                                                                                                                    







                                                                                              












































































































                                                                                                           


























































































































































                                                                                                                                
//! Chat endpoint for LLM-powered file editing.

use axum::{
    extract::{Path, State},
    http::StatusCode,
    response::IntoResponse,
    Json,
};
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use uuid::Uuid;

use crate::db::{models::BodyElement, repository::{self, RepositoryError}};
use crate::llm::{
    claude::{self, ClaudeClient, ClaudeError, ClaudeModel},
    execute_tool_call,
    groq::{GroqClient, GroqError, Message, ToolCallResponse},
    LlmModel, ToolCall, ToolResult, UserQuestion, VersionToolRequest, AVAILABLE_TOOLS,
};
use crate::server::state::{FileUpdateNotification, SharedState};

/// Maximum number of tool-calling rounds to prevent infinite loops
const MAX_TOOL_ROUNDS: usize = 20;

/// Context limits for different models (in tokens)
/// Claude models have 200K context, Groq models vary
const CLAUDE_CONTEXT_LIMIT: usize = 200_000;
const GROQ_CONTEXT_LIMIT: usize = 32_000;

/// Threshold for triggering context compaction (90% of limit)
const CONTEXT_COMPACTION_THRESHOLD: f32 = 0.90;

/// Approximate characters per token (rough estimate for English text)
const CHARS_PER_TOKEN: usize = 4;

#[derive(Debug, Clone, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ChatHistoryMessage {
    /// Role: "user" or "assistant"
    pub role: String,
    /// Message content
    pub content: String,
}

#[derive(Debug, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ChatRequest {
    /// The user's message/instruction
    pub message: String,
    /// Optional model selection: "claude-sonnet" (default), "claude-opus", or "groq"
    #[serde(default)]
    pub model: Option<String>,
    /// Optional conversation history for context continuity
    #[serde(default)]
    pub history: Option<Vec<ChatHistoryMessage>>,
    /// Optional focused element index (for targeted editing)
    #[serde(default)]
    pub focused_element_index: Option<usize>,
}

#[derive(Debug, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ChatResponse {
    /// The LLM's response message
    pub response: String,
    /// Tool calls that were executed
    pub tool_calls: Vec<ToolCallInfo>,
    /// Updated file body after tool execution
    pub updated_body: Vec<BodyElement>,
    /// Updated summary (if changed)
    pub updated_summary: Option<String>,
    /// Questions pending user answers (pauses conversation)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub pending_questions: Option<Vec<UserQuestion>>,
}

#[derive(Debug, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ToolCallInfo {
    pub name: String,
    pub result: ToolResult,
}

/// Enum to hold LLM clients
enum LlmClient {
    Groq(GroqClient),
    Claude(ClaudeClient),
}

/// Unified result from LLM call
struct LlmResult {
    content: Option<String>,
    tool_calls: Vec<ToolCall>,
    raw_tool_calls: Vec<ToolCallResponse>,
    finish_reason: String,
}

/// Chat with a file using LLM tool calling
#[utoipa::path(
    post,
    path = "/api/v1/files/{id}/chat",
    request_body = ChatRequest,
    responses(
        (status = 200, description = "Chat completed successfully", body = ChatResponse),
        (status = 404, description = "File not found"),
        (status = 500, description = "Internal server error")
    ),
    params(
        ("id" = Uuid, Path, description = "File ID")
    ),
    tag = "chat"
)]
pub async fn chat_handler(
    State(state): State<SharedState>,
    Path(id): Path<Uuid>,
    Json(request): Json<ChatRequest>,
) -> impl IntoResponse {
    // Check if database is configured
    let Some(ref pool) = state.db_pool else {
        return (
            StatusCode::SERVICE_UNAVAILABLE,
            Json(serde_json::json!({
                "error": "Database not configured"
            })),
        )
            .into_response();
    };

    // Get the file
    let file = match repository::get_file(pool, id).await {
        Ok(Some(file)) => file,
        Ok(None) => {
            return (
                StatusCode::NOT_FOUND,
                Json(serde_json::json!({
                    "error": "File not found"
                })),
            )
                .into_response();
        }
        Err(e) => {
            tracing::error!("Database error: {}", e);
            return (
                StatusCode::INTERNAL_SERVER_ERROR,
                Json(serde_json::json!({
                    "error": format!("Database error: {}", e)
                })),
            )
                .into_response();
        }
    };

    // Parse model selection (default to Claude Sonnet)
    let model = request
        .model
        .as_ref()
        .and_then(|m| LlmModel::from_str(m))
        .unwrap_or_default();

    tracing::info!("Using LLM model: {:?}", model);

    // Initialize the appropriate LLM client
    let llm_client = match model {
        LlmModel::ClaudeSonnet => {
            match ClaudeClient::from_env(ClaudeModel::Sonnet) {
                Ok(client) => LlmClient::Claude(client),
                Err(ClaudeError::MissingApiKey) => {
                    return (
                        StatusCode::SERVICE_UNAVAILABLE,
                        Json(serde_json::json!({
                            "error": "ANTHROPIC_API_KEY not configured"
                        })),
                    )
                        .into_response();
                }
                Err(e) => {
                    return (
                        StatusCode::INTERNAL_SERVER_ERROR,
                        Json(serde_json::json!({
                            "error": format!("Claude client error: {}", e)
                        })),
                    )
                        .into_response();
                }
            }
        }
        LlmModel::ClaudeOpus => {
            match ClaudeClient::from_env(ClaudeModel::Opus) {
                Ok(client) => LlmClient::Claude(client),
                Err(ClaudeError::MissingApiKey) => {
                    return (
                        StatusCode::SERVICE_UNAVAILABLE,
                        Json(serde_json::json!({
                            "error": "ANTHROPIC_API_KEY not configured"
                        })),
                    )
                        .into_response();
                }
                Err(e) => {
                    return (
                        StatusCode::INTERNAL_SERVER_ERROR,
                        Json(serde_json::json!({
                            "error": format!("Claude client error: {}", e)
                        })),
                    )
                        .into_response();
                }
            }
        }
        LlmModel::GroqKimi => {
            match GroqClient::from_env() {
                Ok(client) => LlmClient::Groq(client),
                Err(GroqError::MissingApiKey) => {
                    return (
                        StatusCode::SERVICE_UNAVAILABLE,
                        Json(serde_json::json!({
                            "error": "GROQ_API_KEY not configured"
                        })),
                    )
                        .into_response();
                }
                Err(e) => {
                    return (
                        StatusCode::INTERNAL_SERVER_ERROR,
                        Json(serde_json::json!({
                            "error": format!("Groq client error: {}", e)
                        })),
                    )
                        .into_response();
                }
            }
        }
    };

    // Build context about the file
    let file_context = build_file_context(&file);

    // Build focused element context if specified
    let focused_context = build_focused_element_context(&file.body, request.focused_element_index);

    // Build agentic system prompt
    let system_prompt = format!(
        r#"You are an intelligent document editing agent. You help users view, analyze, and modify document files.

## Your Capabilities
You have access to tools for:
- **Viewing content**: view_body (see all elements), read_element (inspect specific element), view_transcript (read full transcript)
- **Adding content**: add_heading, add_paragraph, add_code, add_list, add_chart
- **Modifying content**: update_element, remove_element, reorder_elements, clear_body
- **Document metadata**: set_summary
- **Data processing**: parse_csv (convert CSV to JSON), jq (transform JSON data)
- **Version history**: list_versions, read_version, restore_version
- **Templates**: suggest_templates (get phase-appropriate templates), apply_template (apply a template structure)

## Agentic Behavior Guidelines

### 1. Analyze Before Acting
- For complex requests, first gather information using view_body, view_transcript, or read_element
- Understand the current state of the document before making changes
- For simple, direct requests (e.g., "add a heading called X"), you can act immediately without prior inspection

### 2. Plan Multi-Step Operations
- Break complex tasks into logical steps
- For data visualization: parse_csv → (optionally jq to transform) → add_chart
- For restructuring: view_body → understand structure → make targeted changes

### 3. Handle Errors Gracefully
- If a tool call fails, analyze the error message
- Try an alternative approach or different parameters
- Don't repeat the exact same failing call

### 4. Know When to Stop
- Stop when you've completed the user's request
- Stop when you've provided the requested information
- Provide a clear summary of what you did in your final response

### 5. Be Efficient
- Don't over-analyze simple requests
- Use the minimum number of tool calls needed
- Combine operations when possible

## Current Document Context
{file_context}
{focused_context}
## Important Notes
- Body element indices are 0-based
- When updating elements, provide ALL required fields for that element type
- The transcript is read-only (you cannot modify it, only read it)
- Changes are saved automatically after tool execution"#,
        file_context = file_context,
        focused_context = focused_context
    );

    // Build initial messages (Groq/OpenAI format - will be converted for Claude)
    let mut messages = vec![
        Message {
            role: "system".to_string(),
            content: Some(system_prompt),
            tool_calls: None,
            tool_call_id: None,
        },
    ];

    // Add conversation history if provided (for context continuity)
    if let Some(history) = &request.history {
        for hist_msg in history {
            messages.push(Message {
                role: hist_msg.role.clone(),
                content: Some(hist_msg.content.clone()),
                tool_calls: None,
                tool_call_id: None,
            });
        }
        tracing::info!(
            history_messages = history.len(),
            "Loaded conversation history"
        );
    }

    // Add current user message
    messages.push(Message {
        role: "user".to_string(),
        content: Some(request.message.clone()),
        tool_calls: None,
        tool_call_id: None,
    });

    // State for tracking changes
    let mut current_body = file.body.clone();
    let mut current_summary = file.summary.clone();
    let mut all_tool_call_infos: Vec<ToolCallInfo> = Vec::new();
    let mut final_response: Option<String> = None;
    // Track if a version restore already happened (to avoid double-saving)
    let mut version_restored = false;
    // Track if there were modifications after a restore
    let mut has_changes_after_restore = false;
    // Track consecutive failures for agentic retry logic
    let mut consecutive_failures = 0;
    const MAX_CONSECUTIVE_FAILURES: usize = 3;
    // Track pending user questions (pauses the conversation)
    let mut pending_questions: Option<Vec<UserQuestion>> = None;

    // Multi-turn agentic tool calling loop
    for round in 0..MAX_TOOL_ROUNDS {
        tracing::info!(
            round = round,
            body_elements = current_body.len(),
            total_tool_calls = all_tool_call_infos.len(),
            "Agentic loop iteration"
        );

        // Check if we've hit too many consecutive failures
        if consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
            tracing::warn!("Breaking loop due to {} consecutive failures", consecutive_failures);
            final_response = Some(format!(
                "I encountered multiple consecutive errors and stopped to avoid an infinite loop. \
                Please try rephrasing your request or check if the document state is as expected."
            ));
            break;
        }

        // Check context usage and compact if nearing limit
        if is_context_near_limit(&messages, &model) {
            let estimated_tokens = estimate_total_tokens(&messages);
            tracing::warn!(
                estimated_tokens = estimated_tokens,
                round = round,
                "Context nearing limit, compacting conversation"
            );
            compact_conversation(&mut messages, &all_tool_call_infos);

            // Log the new token count
            let new_tokens = estimate_total_tokens(&messages);
            tracing::info!(
                tokens_before = estimated_tokens,
                tokens_after = new_tokens,
                tokens_saved = estimated_tokens - new_tokens,
                "Conversation compacted"
            );
        }

        // Call the appropriate LLM API
        let result = match &llm_client {
            LlmClient::Groq(groq) => {
                match groq.chat_with_tools(messages.clone(), &AVAILABLE_TOOLS).await {
                    Ok(r) => LlmResult {
                        content: r.content,
                        tool_calls: r.tool_calls,
                        raw_tool_calls: r.raw_tool_calls,
                        finish_reason: r.finish_reason,
                    },
                    Err(e) => {
                        tracing::error!("Groq API error: {}", e);
                        return (
                            StatusCode::INTERNAL_SERVER_ERROR,
                            Json(serde_json::json!({
                                "error": format!("LLM API error: {}", e)
                            })),
                        )
                            .into_response();
                    }
                }
            }
            LlmClient::Claude(claude_client) => {
                // Convert messages to Claude format
                let claude_messages = claude::groq_messages_to_claude(&messages);
                match claude_client.chat_with_tools(claude_messages, &AVAILABLE_TOOLS).await {
                    Ok(r) => {
                        // Convert Claude tool uses to Groq-style ToolCallResponse for consistency
                        let raw_tool_calls: Vec<ToolCallResponse> = r
                            .tool_calls
                            .iter()
                            .map(|tc| ToolCallResponse {
                                id: tc.id.clone(),
                                call_type: "function".to_string(),
                                function: crate::llm::groq::FunctionCall {
                                    name: tc.name.clone(),
                                    arguments: tc.arguments.to_string(),
                                },
                            })
                            .collect();

                        LlmResult {
                            content: r.content,
                            tool_calls: r.tool_calls,
                            raw_tool_calls,
                            finish_reason: r.stop_reason,
                        }
                    }
                    Err(e) => {
                        tracing::error!("Claude API error: {}", e);
                        return (
                            StatusCode::INTERNAL_SERVER_ERROR,
                            Json(serde_json::json!({
                                "error": format!("LLM API error: {}", e)
                            })),
                        )
                            .into_response();
                    }
                }
            }
        };

        // Check if there are tool calls to execute
        if result.tool_calls.is_empty() {
            // No more tool calls - capture the final response and exit loop
            final_response = result.content;
            break;
        }

        // Add assistant message with tool calls to conversation
        messages.push(Message {
            role: "assistant".to_string(),
            content: result.content.clone(),
            tool_calls: Some(result.raw_tool_calls.clone()),
            tool_call_id: None,
        });

        // Execute each tool call and add results to conversation
        for (i, tool_call) in result.tool_calls.iter().enumerate() {
            tracing::info!(
                tool = %tool_call.name,
                round = round,
                "Executing tool call"
            );

            let mut execution_result =
                execute_tool_call(tool_call, &current_body, current_summary.as_deref(), &file.transcript);

            // Handle version tool requests that need async database access
            if let Some(version_request) = &execution_result.version_request {
                let version_result = handle_version_request(
                    pool,
                    id,
                    version_request,
                    &current_body,
                    current_summary.as_deref(),
                    file.version,
                )
                .await;

                // Update execution result with actual version operation result
                execution_result.result = version_result.result;
                execution_result.parsed_data = version_result.data;

                // Apply state changes from restore operation
                if let Some(new_body) = version_result.new_body {
                    current_body = new_body;
                    // Mark that a restore happened - file was already saved
                    version_restored = true;
                }
                if let Some(new_summary) = version_result.new_summary {
                    current_summary = Some(new_summary);
                }
            }

            // Apply state changes from regular tools
            if let Some(new_body) = execution_result.new_body {
                current_body = new_body;
                // If this is a regular tool (not a version operation), track it
                if execution_result.version_request.is_none() && version_restored {
                    has_changes_after_restore = true;
                }
            }
            if let Some(new_summary) = execution_result.new_summary {
                current_summary = Some(new_summary);
                if execution_result.version_request.is_none() && version_restored {
                    has_changes_after_restore = true;
                }
            }

            // Track consecutive failures for agentic behavior
            if execution_result.result.success {
                consecutive_failures = 0;
            } else {
                consecutive_failures += 1;
                tracing::warn!(
                    tool = %tool_call.name,
                    consecutive_failures = consecutive_failures,
                    "Tool call failed"
                );
            }

            // Check for pending user questions (pauses the conversation)
            if let Some(questions) = execution_result.pending_questions {
                tracing::info!(
                    question_count = questions.len(),
                    "LLM requesting user input, pausing conversation"
                );
                pending_questions = Some(questions);
                // Track this tool call before breaking
                all_tool_call_infos.push(ToolCallInfo {
                    name: tool_call.name.clone(),
                    result: execution_result.result,
                });
                break; // Exit inner loop
            }

            // Build tool result message content with enhanced context for agentic reasoning
            let result_content = if let Some(parsed_data) = &execution_result.parsed_data {
                // Include parsed data in the result for the LLM to use
                serde_json::json!({
                    "success": execution_result.result.success,
                    "message": execution_result.result.message,
                    "data": parsed_data
                })
                .to_string()
            } else if !execution_result.result.success {
                // On failure, include hints for the LLM
                let hint = if consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
                    " [HINT: Multiple consecutive failures detected. Consider a different approach or verify your parameters.]"
                } else {
                    ""
                };
                serde_json::json!({
                    "success": false,
                    "message": format!("{}{}", execution_result.result.message, hint),
                    "currentBodyElementCount": current_body.len()
                })
                .to_string()
            } else {
                serde_json::json!({
                    "success": execution_result.result.success,
                    "message": execution_result.result.message
                })
                .to_string()
            };

            // Add tool result message
            // Use the appropriate ID format for each provider
            let tool_call_id = match &llm_client {
                LlmClient::Groq(_) => result.raw_tool_calls[i].id.clone(),
                LlmClient::Claude(_) => tool_call.id.clone(),
            };

            messages.push(Message {
                role: "tool".to_string(),
                content: Some(result_content),
                tool_calls: None,
                tool_call_id: Some(tool_call_id),
            });

            // Track for response
            all_tool_call_infos.push(ToolCallInfo {
                name: tool_call.name.clone(),
                result: execution_result.result,
            });
        }

        // If user questions are pending, pause the conversation
        if pending_questions.is_some() {
            final_response = result.content;
            break;
        }

        // If finish reason indicates completion, exit loop
        let finish_lower = result.finish_reason.to_lowercase();
        if finish_lower == "stop" || finish_lower == "end_turn" {
            final_response = result.content;
            break;
        }
    }

    // Save changes to database if any tools were executed
    // Skip if a version restore already happened (file was already saved during restore)
    // UNLESS there were additional modifications after the restore
    if !all_tool_call_infos.is_empty() && (!version_restored || has_changes_after_restore) {
        let update_req = crate::db::models::UpdateFileRequest {
            name: None,
            description: None,
            transcript: None,
            summary: current_summary.clone(),
            body: Some(current_body.clone()),
            version: None, // Internal update, skip version check
            repo_file_path: None,
        };

        match repository::update_file(pool, id, update_req).await {
            Ok(Some(updated_file)) => {
                // Broadcast update notification for LLM changes
                let mut updated_fields = vec!["body".to_string()];
                if current_summary.is_some() {
                    updated_fields.push("summary".to_string());
                }
                state.broadcast_file_update(FileUpdateNotification {
                    file_id: id,
                    version: updated_file.version,
                    updated_fields,
                    updated_by: "llm".to_string(),
                });
            }
            Ok(None) => {
                // File was deleted during processing
                return (
                    StatusCode::NOT_FOUND,
                    Json(serde_json::json!({
                        "error": "File not found"
                    })),
                )
                    .into_response();
            }
            Err(e) => {
                tracing::error!("Failed to save file changes: {}", e);
                return (
                    StatusCode::INTERNAL_SERVER_ERROR,
                    Json(serde_json::json!({
                        "error": format!("Failed to save changes: {}", e)
                    })),
                )
                    .into_response();
            }
        }
    }

    // Build response
    let response_text = final_response.unwrap_or_else(|| {
        if all_tool_call_infos.is_empty() {
            "I couldn't understand your request. Please try rephrasing.".to_string()
        } else {
            format!(
                "Done! Executed {} tool{}.",
                all_tool_call_infos.len(),
                if all_tool_call_infos.len() == 1 { "" } else { "s" }
            )
        }
    });

    (
        StatusCode::OK,
        Json(ChatResponse {
            response: response_text,
            tool_calls: all_tool_call_infos,
            updated_body: current_body,
            updated_summary: current_summary,
            pending_questions,
        }),
    )
        .into_response()
}

fn build_file_context(file: &crate::db::models::File) -> String {
    let mut context = format!("File: {}\n", file.name);

    if let Some(ref desc) = file.description {
        context.push_str(&format!("Description: {}\n", desc));
    }

    if let Some(ref summary) = file.summary {
        context.push_str(&format!("Summary: {}\n", summary));
    }

    // Include contract phase context if file belongs to a contract
    if let Some(ref phase) = file.contract_phase {
        context.push_str(&format!("\n## Contract Context\n"));
        context.push_str(&format!("This file belongs to a contract in the '{}' phase.\n", phase));
        context.push_str("You can use 'suggest_templates' to get phase-appropriate templates, ");
        context.push_str("or 'apply_template' to apply a template structure.\n");
        context.push_str(&format!(
            "Templates for '{}' phase include: {}\n",
            phase,
            match phase.as_str() {
                "research" => "research-notes, competitor-analysis, user-research",
                "specify" => "requirements, user-stories, acceptance-criteria",
                "plan" => "architecture, technical-design, task-breakdown",
                "execute" => "dev-notes, test-plan, implementation-log",
                "review" => "review-checklist, release-notes, retrospective",
                _ => "(use suggest_templates to see available)",
            }
        ));
    }

    context.push_str(&format!("\nTranscript entries: {}\n", file.transcript.len()));
    context.push_str(&format!("Body elements: {}\n", file.body.len()));

    // Add body overview
    if !file.body.is_empty() {
        context.push_str("\nCurrent body elements:\n");
        for (i, element) in file.body.iter().enumerate() {
            let desc = match element {
                BodyElement::Heading { level, text } => format!("H{}: {}", level, text),
                BodyElement::Paragraph { text } => {
                    let preview: String = text.chars().take(50).collect();
                    if text.chars().count() > 50 {
                        format!("Paragraph: {}...", preview)
                    } else {
                        format!("Paragraph: {}", preview)
                    }
                }
                BodyElement::Code { language, content } => {
                    let lang = language.as_deref().unwrap_or("plain");
                    let preview: String = content.chars().take(50).collect();
                    if content.chars().count() > 50 {
                        format!("Code ({}): {}...", lang, preview)
                    } else {
                        format!("Code ({}): {}", lang, preview)
                    }
                }
                BodyElement::List { ordered, items } => {
                    let list_type = if *ordered { "ordered" } else { "unordered" };
                    format!("List ({}): {} items", list_type, items.len())
                }
                BodyElement::Chart { chart_type, title, .. } => {
                    format!(
                        "Chart ({:?}){}",
                        chart_type,
                        title.as_ref().map(|t| format!(": {}", t)).unwrap_or_default()
                    )
                }
                BodyElement::Image { alt, .. } => {
                    format!("Image{}", alt.as_ref().map(|a| format!(": {}", a)).unwrap_or_default())
                }
                BodyElement::Markdown { content } => {
                    let preview: String = content.chars().take(50).collect();
                    if content.chars().count() > 50 {
                        format!("Markdown: {}...", preview)
                    } else {
                        format!("Markdown: {}", preview)
                    }
                }
            };
            context.push_str(&format!("  [{}] {}\n", i, desc));
        }
    }

    // Add transcript preview if available
    if !file.transcript.is_empty() {
        context.push_str("\nTranscript preview (first 5 entries):\n");
        for entry in file.transcript.iter().take(5) {
            context.push_str(&format!("  - {}: {}\n", entry.speaker, entry.text));
        }
        if file.transcript.len() > 5 {
            context.push_str(&format!("  ... and {} more entries\n", file.transcript.len() - 5));
        }
    }

    context
}

/// Build context for a focused element
fn build_focused_element_context(body: &[BodyElement], focused_index: Option<usize>) -> String {
    let Some(index) = focused_index else {
        return String::new();
    };

    let Some(element) = body.get(index) else {
        return format!(
            "\n## Focused Element\nNote: User focused on element [{}] but it doesn't exist (document has {} elements).\n",
            index,
            body.len()
        );
    };

    let (element_type, full_content) = match element {
        BodyElement::Heading { level, text } => {
            (format!("Heading (level {})", level), text.clone())
        }
        BodyElement::Paragraph { text } => {
            ("Paragraph".to_string(), text.clone())
        }
        BodyElement::Code { language, content } => {
            let lang = language.as_deref().unwrap_or("plain");
            (format!("Code ({})", lang), content.clone())
        }
        BodyElement::List { ordered, items } => {
            let list_type = if *ordered { "Ordered list" } else { "Unordered list" };
            let content = items.iter()
                .enumerate()
                .map(|(i, item)| format!("{}. {}", i + 1, item))
                .collect::<Vec<_>>()
                .join("\n");
            (list_type.to_string(), content)
        }
        BodyElement::Chart { chart_type, title, .. } => {
            let title_str = title.as_deref().unwrap_or("untitled");
            (format!("Chart ({:?})", chart_type), title_str.to_string())
        }
        BodyElement::Image { alt, caption, .. } => {
            let desc = alt.as_deref().or(caption.as_deref()).unwrap_or("no description");
            ("Image".to_string(), desc.to_string())
        }
        BodyElement::Markdown { content } => {
            ("Markdown".to_string(), content.clone())
        }
    };

    format!(
        r#"
## Focused Element
The user is focusing on element [{}]: {}
Full content of focused element:
---
{}
---
When the user's request is ambiguous about which element to modify, prioritize this focused element.
"#,
        index, element_type, full_content
    )
}

/// Result of handling a version tool request
struct VersionRequestResult {
    result: ToolResult,
    data: Option<serde_json::Value>,
    new_body: Option<Vec<BodyElement>>,
    new_summary: Option<String>,
}

/// Handle version tool requests that require async database access
async fn handle_version_request(
    pool: &sqlx::PgPool,
    file_id: Uuid,
    request: &VersionToolRequest,
    _current_body: &[BodyElement],
    _current_summary: Option<&str>,
    current_version: i32,
) -> VersionRequestResult {
    match request {
        VersionToolRequest::ListVersions => {
            match repository::list_file_versions(pool, file_id).await {
                Ok(versions) => {
                    let version_data: Vec<serde_json::Value> = versions
                        .iter()
                        .map(|v| {
                            serde_json::json!({
                                "version": v.version,
                                "source": v.source,
                                "createdAt": v.created_at.to_rfc3339(),
                                "changeDescription": v.change_description,
                            })
                        })
                        .collect();

                    VersionRequestResult {
                        result: ToolResult {
                            success: true,
                            message: format!("Found {} versions. Current version is {}.", versions.len(), current_version),
                        },
                        data: Some(serde_json::json!({
                            "currentVersion": current_version,
                            "versions": version_data,
                        })),
                        new_body: None,
                        new_summary: None,
                    }
                }
                Err(e) => VersionRequestResult {
                    result: ToolResult {
                        success: false,
                        message: format!("Failed to list versions: {}", e),
                    },
                    data: None,
                    new_body: None,
                    new_summary: None,
                },
            }
        }
        VersionToolRequest::ReadVersion { version } => {
            match repository::get_file_version(pool, file_id, *version).await {
                Ok(Some(ver)) => {
                    // Convert body elements to a readable format
                    let body_preview: Vec<String> = ver
                        .body
                        .iter()
                        .enumerate()
                        .map(|(i, element)| {
                            let desc = match element {
                                BodyElement::Heading { level, text } => format!("H{}: {}", level, text),
                                BodyElement::Paragraph { text } => {
                                    let preview: String = text.chars().take(100).collect();
                                    if text.chars().count() > 100 {
                                        format!("Paragraph: {}...", preview)
                                    } else {
                                        format!("Paragraph: {}", preview)
                                    }
                                }
                                BodyElement::Code { language, content } => {
                                    let lang = language.as_deref().unwrap_or("plain");
                                    let preview: String = content.chars().take(100).collect();
                                    if content.chars().count() > 100 {
                                        format!("Code ({}): {}...", lang, preview)
                                    } else {
                                        format!("Code ({}): {}", lang, preview)
                                    }
                                }
                                BodyElement::List { ordered, items } => {
                                    let list_type = if *ordered { "ordered" } else { "unordered" };
                                    format!("List ({}): {} items", list_type, items.len())
                                }
                                BodyElement::Chart { chart_type, title, .. } => {
                                    format!(
                                        "Chart ({:?}){}",
                                        chart_type,
                                        title.as_ref().map(|t| format!(": {}", t)).unwrap_or_default()
                                    )
                                }
                                BodyElement::Image { alt, .. } => {
                                    format!("Image{}", alt.as_ref().map(|a| format!(": {}", a)).unwrap_or_default())
                                }
                                BodyElement::Markdown { content } => {
                                    let preview: String = content.chars().take(100).collect();
                                    if content.chars().count() > 100 {
                                        format!("Markdown: {}...", preview)
                                    } else {
                                        format!("Markdown: {}", preview)
                                    }
                                }
                            };
                            format!("[{}] {}", i, desc)
                        })
                        .collect();

                    VersionRequestResult {
                        result: ToolResult {
                            success: true,
                            message: format!(
                                "Version {} from {} (source: {}). {} body elements.",
                                ver.version,
                                ver.created_at.format("%Y-%m-%d %H:%M"),
                                ver.source,
                                ver.body.len()
                            ),
                        },
                        data: Some(serde_json::json!({
                            "version": ver.version,
                            "source": ver.source,
                            "createdAt": ver.created_at.to_rfc3339(),
                            "summary": ver.summary,
                            "bodyPreview": body_preview,
                            "changeDescription": ver.change_description,
                        })),
                        new_body: None,
                        new_summary: None,
                    }
                }
                Ok(None) => VersionRequestResult {
                    result: ToolResult {
                        success: false,
                        message: format!("Version {} not found", version),
                    },
                    data: None,
                    new_body: None,
                    new_summary: None,
                },
                Err(e) => VersionRequestResult {
                    result: ToolResult {
                        success: false,
                        message: format!("Failed to read version: {}", e),
                    },
                    data: None,
                    new_body: None,
                    new_summary: None,
                },
            }
        }
        VersionToolRequest::RestoreVersion { target_version, reason } => {
            // Set change description if provided
            if let Some(reason) = reason {
                let _ = repository::set_change_description(pool, reason).await;
            }

            match repository::restore_file_version(pool, file_id, *target_version, current_version).await {
                Ok(Some(restored_file)) => {
                    VersionRequestResult {
                        result: ToolResult {
                            success: true,
                            message: format!(
                                "Restored to version {}. New version is {}.",
                                target_version, restored_file.version
                            ),
                        },
                        data: Some(serde_json::json!({
                            "previousVersion": current_version,
                            "restoredFromVersion": target_version,
                            "newVersion": restored_file.version,
                        })),
                        new_body: Some(restored_file.body),
                        new_summary: restored_file.summary,
                    }
                }
                Ok(None) => VersionRequestResult {
                    result: ToolResult {
                        success: false,
                        message: format!("Version {} not found", target_version),
                    },
                    data: None,
                    new_body: None,
                    new_summary: None,
                },
                Err(RepositoryError::VersionConflict { expected, actual }) => {
                    VersionRequestResult {
                        result: ToolResult {
                            success: false,
                            message: format!(
                                "Version conflict: expected {}, actual {}. Document was modified.",
                                expected, actual
                            ),
                        },
                        data: None,
                        new_body: None,
                        new_summary: None,
                    }
                }
                Err(e) => VersionRequestResult {
                    result: ToolResult {
                        success: false,
                        message: format!("Failed to restore version: {}", e),
                    },
                    data: None,
                    new_body: None,
                    new_summary: None,
                },
            }
        }
    }
}

/// Estimate the token count of a message
fn estimate_message_tokens(message: &Message) -> usize {
    let mut chars = 0;

    // Count content characters
    if let Some(ref content) = message.content {
        chars += content.len();
    }

    // Count tool call characters (rough estimate)
    if let Some(ref tool_calls) = message.tool_calls {
        for tc in tool_calls {
            chars += tc.function.name.len();
            chars += tc.function.arguments.len();
        }
    }

    // Count tool call ID
    if let Some(ref id) = message.tool_call_id {
        chars += id.len();
    }

    // Add overhead for role and structure
    chars += message.role.len() + 20;

    // Convert to tokens
    chars / CHARS_PER_TOKEN
}

/// Estimate total token count of all messages
fn estimate_total_tokens(messages: &[Message]) -> usize {
    messages.iter().map(estimate_message_tokens).sum()
}

/// Check if context is nearing the limit
fn is_context_near_limit(messages: &[Message], model: &LlmModel) -> bool {
    let estimated_tokens = estimate_total_tokens(messages);
    let limit = match model {
        LlmModel::ClaudeSonnet | LlmModel::ClaudeOpus => CLAUDE_CONTEXT_LIMIT,
        LlmModel::GroqKimi => GROQ_CONTEXT_LIMIT,
    };
    let threshold = (limit as f32 * CONTEXT_COMPACTION_THRESHOLD) as usize;

    estimated_tokens >= threshold
}

/// Compact the conversation by summarizing older messages
/// Keeps: system message, last N user/assistant exchanges, and a summary of older content
fn compact_conversation(messages: &mut Vec<Message>, tool_call_history: &[ToolCallInfo]) {
    // Keep at least system message + 4 recent messages (2 exchanges)
    const MIN_MESSAGES_TO_KEEP: usize = 5;

    if messages.len() <= MIN_MESSAGES_TO_KEEP {
        return;
    }

    // Extract system message (always first)
    let system_message = messages.remove(0);

    // Calculate how many messages to summarize
    // Keep the last ~1/3 of messages for recent context
    let messages_to_keep = std::cmp::max(4, messages.len() / 3);
    let messages_to_summarize = messages.len() - messages_to_keep;

    if messages_to_summarize < 2 {
        // Not enough to summarize, just put system message back
        messages.insert(0, system_message);
        return;
    }

    // Extract messages to summarize
    let old_messages: Vec<Message> = messages.drain(..messages_to_summarize).collect();

    // Build summary of old messages
    let mut summary_parts: Vec<String> = Vec::new();

    // Summarize user requests
    let user_requests: Vec<&str> = old_messages
        .iter()
        .filter(|m| m.role == "user")
        .filter_map(|m| m.content.as_deref())
        .collect();

    if !user_requests.is_empty() {
        summary_parts.push(format!(
            "Previous user requests: {}",
            user_requests.join("; ")
        ));
    }

    // Summarize tool calls executed so far
    if !tool_call_history.is_empty() {
        let tool_summary: Vec<String> = tool_call_history
            .iter()
            .map(|tc| {
                if tc.result.success {
                    format!("{}(ok)", tc.name)
                } else {
                    format!("{}(failed: {})", tc.name, tc.result.message)
                }
            })
            .collect();

        summary_parts.push(format!(
            "Tools executed: {}",
            tool_summary.join(", ")
        ));
    }

    // Count assistant responses that were summarized
    let assistant_responses = old_messages
        .iter()
        .filter(|m| m.role == "assistant" && m.content.is_some())
        .count();

    if assistant_responses > 0 {
        summary_parts.push(format!(
            "({} previous assistant responses omitted for brevity)",
            assistant_responses
        ));
    }

    // Create compacted context message
    let compacted_content = format!(
        "[CONTEXT SUMMARY - Earlier conversation compacted to save tokens]\n{}",
        summary_parts.join("\n")
    );

    // Rebuild messages: system + summary + remaining recent messages
    let mut new_messages = vec![
        system_message,
        Message {
            role: "user".to_string(),
            content: Some(compacted_content),
            tool_calls: None,
            tool_call_id: None,
        },
        Message {
            role: "assistant".to_string(),
            content: Some("Understood. I have context from the previous conversation and will continue from here.".to_string()),
            tool_calls: None,
            tool_call_id: None,
        },
    ];

    new_messages.append(messages);
    *messages = new_messages;

    tracing::info!(
        summarized_messages = messages_to_summarize,
        remaining_messages = messages.len(),
        "Compacted conversation to save context"
    );
}