summaryrefslogtreecommitdiff
path: root/makima/src/server/handlers
diff options
context:
space:
mode:
Diffstat (limited to 'makima/src/server/handlers')
-rw-r--r--makima/src/server/handlers/chat.rs307
1 files changed, 292 insertions, 15 deletions
diff --git a/makima/src/server/handlers/chat.rs b/makima/src/server/handlers/chat.rs
index 396c973..306093a 100644
--- a/makima/src/server/handlers/chat.rs
+++ b/makima/src/server/handlers/chat.rs
@@ -20,7 +20,18 @@ use crate::llm::{
use crate::server::state::{FileUpdateNotification, SharedState};
/// Maximum number of tool-calling rounds to prevent infinite loops
-const MAX_TOOL_ROUNDS: usize = 10;
+const MAX_TOOL_ROUNDS: usize = 20;
+
+/// Context limits for different models (in tokens)
+/// Claude models have 200K context, Groq models vary
+const CLAUDE_CONTEXT_LIMIT: usize = 200_000;
+const GROQ_CONTEXT_LIMIT: usize = 32_000;
+
+/// Threshold for triggering context compaction (90% of limit)
+const CONTEXT_COMPACTION_THRESHOLD: f32 = 0.90;
+
+/// Approximate characters per token (rough estimate for English text)
+const CHARS_PER_TOKEN: usize = 4;
#[derive(Debug, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
@@ -206,20 +217,62 @@ pub async fn chat_handler(
// Build context about the file
let file_context = build_file_context(&file);
+ // Build agentic system prompt
+ let system_prompt = format!(
+ r#"You are an intelligent document editing agent. You help users view, analyze, and modify document files.
+
+## Your Capabilities
+You have access to tools for:
+- **Viewing content**: view_body (see all elements), read_element (inspect specific element), view_transcript (read full transcript)
+- **Adding content**: add_heading, add_paragraph, add_chart
+- **Modifying content**: update_element, remove_element, reorder_elements, clear_body
+- **Document metadata**: set_summary
+- **Data processing**: parse_csv (convert CSV to JSON), jq (transform JSON data)
+- **Version history**: list_versions, read_version, restore_version
+
+## Agentic Behavior Guidelines
+
+### 1. Analyze Before Acting
+- For complex requests, first gather information using view_body, view_transcript, or read_element
+- Understand the current state of the document before making changes
+- For simple, direct requests (e.g., "add a heading called X"), you can act immediately without prior inspection
+
+### 2. Plan Multi-Step Operations
+- Break complex tasks into logical steps
+- For data visualization: parse_csv → (optionally jq to transform) → add_chart
+- For restructuring: view_body → understand structure → make targeted changes
+
+### 3. Handle Errors Gracefully
+- If a tool call fails, analyze the error message
+- Try an alternative approach or different parameters
+- Don't repeat the exact same failing call
+
+### 4. Know When to Stop
+- Stop when you've completed the user's request
+- Stop when you've provided the requested information
+- Provide a clear summary of what you did in your final response
+
+### 5. Be Efficient
+- Don't over-analyze simple requests
+- Use the minimum number of tool calls needed
+- Combine operations when possible
+
+## Current Document Context
+{file_context}
+
+## Important Notes
+- Body element indices are 0-based
+- When updating elements, provide ALL required fields for that element type
+- The transcript is read-only (you cannot modify it, only read it)
+- Changes are saved automatically after tool execution"#,
+ file_context = file_context
+ );
+
// Build initial messages (Groq/OpenAI format - will be converted for Claude)
let mut messages = vec![
Message {
role: "system".to_string(),
- content: Some(format!(
- "You are a helpful assistant that helps users edit and analyze document files. \
- You have access to tools to add headings, paragraphs, charts, and set summaries. \
- When the user asks you to modify the file, use the appropriate tools.\n\n\
- IMPORTANT: You can call multiple tools in sequence. For example, if the user provides CSV data \
- and asks for a chart, first call parse_csv to convert the data to JSON, then use that JSON \
- to call add_chart.\n\n\
- Current file context:\n{}",
- file_context
- )),
+ content: Some(system_prompt),
tool_calls: None,
tool_call_id: None,
},
@@ -240,10 +293,48 @@ pub async fn chat_handler(
let mut version_restored = false;
// Track if there were modifications after a restore
let mut has_changes_after_restore = false;
+ // Track consecutive failures for agentic retry logic
+ let mut consecutive_failures = 0;
+ const MAX_CONSECUTIVE_FAILURES: usize = 3;
- // Multi-turn tool calling loop
+ // Multi-turn agentic tool calling loop
for round in 0..MAX_TOOL_ROUNDS {
- tracing::debug!(round = round, "LLM tool calling round");
+ tracing::info!(
+ round = round,
+ body_elements = current_body.len(),
+ total_tool_calls = all_tool_call_infos.len(),
+ "Agentic loop iteration"
+ );
+
+ // Check if we've hit too many consecutive failures
+ if consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
+ tracing::warn!("Breaking loop due to {} consecutive failures", consecutive_failures);
+ final_response = Some(format!(
+ "I encountered multiple consecutive errors and stopped to avoid an infinite loop. \
+ Please try rephrasing your request or check if the document state is as expected."
+ ));
+ break;
+ }
+
+ // Check context usage and compact if nearing limit
+ if is_context_near_limit(&messages, &model) {
+ let estimated_tokens = estimate_total_tokens(&messages);
+ tracing::warn!(
+ estimated_tokens = estimated_tokens,
+ round = round,
+ "Context nearing limit, compacting conversation"
+ );
+ compact_conversation(&mut messages, &all_tool_call_infos);
+
+ // Log the new token count
+ let new_tokens = estimate_total_tokens(&messages);
+ tracing::info!(
+ tokens_before = estimated_tokens,
+ tokens_after = new_tokens,
+ tokens_saved = estimated_tokens - new_tokens,
+ "Conversation compacted"
+ );
+ }
// Call the appropriate LLM API
let result = match &llm_client {
@@ -324,8 +415,14 @@ pub async fn chat_handler(
// Execute each tool call and add results to conversation
for (i, tool_call) in result.tool_calls.iter().enumerate() {
+ tracing::info!(
+ tool = %tool_call.name,
+ round = round,
+ "Executing tool call"
+ );
+
let mut execution_result =
- execute_tool_call(tool_call, &current_body, current_summary.as_deref());
+ execute_tool_call(tool_call, &current_body, current_summary.as_deref(), &file.transcript);
// Handle version tool requests that need async database access
if let Some(version_request) = &execution_result.version_request {
@@ -369,7 +466,19 @@ pub async fn chat_handler(
}
}
- // Build tool result message content
+ // Track consecutive failures for agentic behavior
+ if execution_result.result.success {
+ consecutive_failures = 0;
+ } else {
+ consecutive_failures += 1;
+ tracing::warn!(
+ tool = %tool_call.name,
+ consecutive_failures = consecutive_failures,
+ "Tool call failed"
+ );
+ }
+
+ // Build tool result message content with enhanced context for agentic reasoning
let result_content = if let Some(parsed_data) = &execution_result.parsed_data {
// Include parsed data in the result for the LLM to use
serde_json::json!({
@@ -378,6 +487,19 @@ pub async fn chat_handler(
"data": parsed_data
})
.to_string()
+ } else if !execution_result.result.success {
+ // On failure, include hints for the LLM
+ let hint = if consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
+ " [HINT: Multiple consecutive failures detected. Consider a different approach or verify your parameters.]"
+ } else {
+ ""
+ };
+ serde_json::json!({
+ "success": false,
+ "message": format!("{}{}", execution_result.result.message, hint),
+ "currentBodyElementCount": current_body.len()
+ })
+ .to_string()
} else {
serde_json::json!({
"success": execution_result.result.success,
@@ -742,3 +864,158 @@ async fn handle_version_request(
}
}
}
+
+/// Estimate the token count of a message
+fn estimate_message_tokens(message: &Message) -> usize {
+ let mut chars = 0;
+
+ // Count content characters
+ if let Some(ref content) = message.content {
+ chars += content.len();
+ }
+
+ // Count tool call characters (rough estimate)
+ if let Some(ref tool_calls) = message.tool_calls {
+ for tc in tool_calls {
+ chars += tc.function.name.len();
+ chars += tc.function.arguments.len();
+ }
+ }
+
+ // Count tool call ID
+ if let Some(ref id) = message.tool_call_id {
+ chars += id.len();
+ }
+
+ // Add overhead for role and structure
+ chars += message.role.len() + 20;
+
+ // Convert to tokens
+ chars / CHARS_PER_TOKEN
+}
+
+/// Estimate total token count of all messages
+fn estimate_total_tokens(messages: &[Message]) -> usize {
+ messages.iter().map(estimate_message_tokens).sum()
+}
+
+/// Check if context is nearing the limit
+fn is_context_near_limit(messages: &[Message], model: &LlmModel) -> bool {
+ let estimated_tokens = estimate_total_tokens(messages);
+ let limit = match model {
+ LlmModel::ClaudeSonnet | LlmModel::ClaudeOpus => CLAUDE_CONTEXT_LIMIT,
+ LlmModel::GroqKimi => GROQ_CONTEXT_LIMIT,
+ };
+ let threshold = (limit as f32 * CONTEXT_COMPACTION_THRESHOLD) as usize;
+
+ estimated_tokens >= threshold
+}
+
+/// Compact the conversation by summarizing older messages
+/// Keeps: system message, last N user/assistant exchanges, and a summary of older content
+fn compact_conversation(messages: &mut Vec<Message>, tool_call_history: &[ToolCallInfo]) {
+ // Keep at least system message + 4 recent messages (2 exchanges)
+ const MIN_MESSAGES_TO_KEEP: usize = 5;
+
+ if messages.len() <= MIN_MESSAGES_TO_KEEP {
+ return;
+ }
+
+ // Extract system message (always first)
+ let system_message = messages.remove(0);
+
+ // Calculate how many messages to summarize
+ // Keep the last ~1/3 of messages for recent context
+ let messages_to_keep = std::cmp::max(4, messages.len() / 3);
+ let messages_to_summarize = messages.len() - messages_to_keep;
+
+ if messages_to_summarize < 2 {
+ // Not enough to summarize, just put system message back
+ messages.insert(0, system_message);
+ return;
+ }
+
+ // Extract messages to summarize
+ let old_messages: Vec<Message> = messages.drain(..messages_to_summarize).collect();
+
+ // Build summary of old messages
+ let mut summary_parts: Vec<String> = Vec::new();
+
+ // Summarize user requests
+ let user_requests: Vec<&str> = old_messages
+ .iter()
+ .filter(|m| m.role == "user")
+ .filter_map(|m| m.content.as_deref())
+ .collect();
+
+ if !user_requests.is_empty() {
+ summary_parts.push(format!(
+ "Previous user requests: {}",
+ user_requests.join("; ")
+ ));
+ }
+
+ // Summarize tool calls executed so far
+ if !tool_call_history.is_empty() {
+ let tool_summary: Vec<String> = tool_call_history
+ .iter()
+ .map(|tc| {
+ if tc.result.success {
+ format!("{}(ok)", tc.name)
+ } else {
+ format!("{}(failed: {})", tc.name, tc.result.message)
+ }
+ })
+ .collect();
+
+ summary_parts.push(format!(
+ "Tools executed: {}",
+ tool_summary.join(", ")
+ ));
+ }
+
+ // Count assistant responses that were summarized
+ let assistant_responses = old_messages
+ .iter()
+ .filter(|m| m.role == "assistant" && m.content.is_some())
+ .count();
+
+ if assistant_responses > 0 {
+ summary_parts.push(format!(
+ "({} previous assistant responses omitted for brevity)",
+ assistant_responses
+ ));
+ }
+
+ // Create compacted context message
+ let compacted_content = format!(
+ "[CONTEXT SUMMARY - Earlier conversation compacted to save tokens]\n{}",
+ summary_parts.join("\n")
+ );
+
+ // Rebuild messages: system + summary + remaining recent messages
+ let mut new_messages = vec![
+ system_message,
+ Message {
+ role: "user".to_string(),
+ content: Some(compacted_content),
+ tool_calls: None,
+ tool_call_id: None,
+ },
+ Message {
+ role: "assistant".to_string(),
+ content: Some("Understood. I have context from the previous conversation and will continue from here.".to_string()),
+ tool_calls: None,
+ tool_call_id: None,
+ },
+ ];
+
+ new_messages.append(messages);
+ *messages = new_messages;
+
+ tracing::info!(
+ summarized_messages = messages_to_summarize,
+ remaining_messages = messages.len(),
+ "Compacted conversation to save context"
+ );
+}