diff options
Diffstat (limited to 'makima/src/llm/transcript_analyzer.rs')
| -rw-r--r-- | makima/src/llm/transcript_analyzer.rs | 292 |
1 files changed, 0 insertions, 292 deletions
diff --git a/makima/src/llm/transcript_analyzer.rs b/makima/src/llm/transcript_analyzer.rs deleted file mode 100644 index 82aa69d..0000000 --- a/makima/src/llm/transcript_analyzer.rs +++ /dev/null @@ -1,292 +0,0 @@ -//! Transcript analyzer for extracting requirements, decisions, and action items. - -use serde::{Deserialize, Serialize}; -use utoipa::ToSchema; -use crate::db::models::TranscriptEntry; - -/// An extracted requirement from the transcript -#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct ExtractedRequirement { - pub text: String, - pub speaker: String, - pub timestamp: f32, - pub confidence: f32, - pub category: Option<String>, // functional, technical, non-functional, business -} - -/// An extracted decision from the transcript -#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct ExtractedDecision { - pub text: String, - pub speaker: String, - pub timestamp: f32, - pub confidence: f32, - pub context: Option<String>, -} - -/// An extracted action item from the transcript -#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct ExtractedActionItem { - pub text: String, - pub speaker: String, - pub timestamp: f32, - pub assignee: Option<String>, - pub priority: Option<String>, -} - -/// Result of transcript analysis -#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct TranscriptAnalysisResult { - pub requirements: Vec<ExtractedRequirement>, - pub decisions: Vec<ExtractedDecision>, - pub action_items: Vec<ExtractedActionItem>, - pub key_topics: Vec<String>, - pub suggested_contract_name: Option<String>, - pub suggested_description: Option<String>, - pub speaker_summary: Vec<SpeakerStats>, -} - -#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct SpeakerStats { - pub speaker: String, - pub word_count: usize, - pub speaking_time_seconds: f32, - pub contribution_percentage: f32, -} - -/// Format transcript entries into readable text for LLM analysis -pub fn format_transcript_for_analysis(entries: &[TranscriptEntry]) -> String { - entries - .iter() - .map(|e| format!("[{:.1}s] {}: {}", e.start, e.speaker, e.text)) - .collect::<Vec<_>>() - .join("\n") -} - -/// Calculate speaker statistics from transcript -pub fn calculate_speaker_stats(entries: &[TranscriptEntry]) -> Vec<SpeakerStats> { - use std::collections::HashMap; - - let mut stats: HashMap<String, (usize, f32)> = HashMap::new(); - - for entry in entries { - let word_count = entry.text.split_whitespace().count(); - let duration = entry.end - entry.start; - - let (count, time) = stats.entry(entry.speaker.clone()).or_insert((0, 0.0)); - *count += word_count; - *time += duration; - } - - let total_words: usize = stats.values().map(|(c, _)| c).sum(); - let total_time: f32 = stats.values().map(|(_, t)| t).sum(); - - // Suppress unused variable warning - let _ = total_time; - - stats - .into_iter() - .map(|(speaker, (word_count, speaking_time))| SpeakerStats { - speaker, - word_count, - speaking_time_seconds: speaking_time, - contribution_percentage: if total_words > 0 { - (word_count as f32 / total_words as f32) * 100.0 - } else { - 0.0 - }, - }) - .collect() -} - -/// Build the analysis prompt for the LLM -pub fn build_analysis_prompt(transcript_text: &str) -> String { - format!(r#"Analyze this meeting/conversation transcript and extract structured information. - -TRANSCRIPT: -{} - -Extract the following information in JSON format: - -1. **Requirements**: Statements where someone expresses a need, want, or must-have. Look for phrases like: - - "we need to...", "it should...", "must have...", "requirement is..." - - "the system should...", "users need to be able to..." - -2. **Decisions**: Explicit decisions made during the conversation. Look for: - - "let's go with...", "we decided...", "we'll use...", "agreed to..." - - "the decision is...", "we're going with..." - -3. **Action Items**: Tasks or todos mentioned. Look for: - - "someone needs to...", "we should...", "next step is..." - - "I'll do...", "can you...", "TODO:..." - -4. **Key Topics**: Main subjects discussed - -5. **Suggested Contract Name**: A short name (3-5 words) that captures the main goal - -6. **Suggested Description**: A 1-2 sentence description of what should be built/done - -Return your analysis as JSON with this structure: -{{ - "requirements": [ - {{"text": "...", "speaker": "Speaker X", "timestamp": 12.5, "confidence": 0.9, "category": "functional"}} - ], - "decisions": [ - {{"text": "...", "speaker": "Speaker X", "timestamp": 45.2, "confidence": 0.85, "context": "..."}} - ], - "action_items": [ - {{"text": "...", "speaker": "Speaker X", "timestamp": 78.0, "assignee": null, "priority": "high"}} - ], - "key_topics": ["topic1", "topic2"], - "suggested_contract_name": "...", - "suggested_description": "..." -}} - -Be conservative - only extract items with high confidence. If nothing is found for a category, return an empty array."#, transcript_text) -} - -/// Parse LLM response into analysis result -pub fn parse_analysis_response(response: &str, speaker_stats: Vec<SpeakerStats>) -> Result<TranscriptAnalysisResult, String> { - // Try to extract JSON from the response (it might be wrapped in markdown code blocks) - let json_str = extract_json_from_response(response)?; - - #[derive(Deserialize)] - struct LlmResponse { - requirements: Option<Vec<ExtractedRequirement>>, - decisions: Option<Vec<ExtractedDecision>>, - action_items: Option<Vec<ExtractedActionItem>>, - key_topics: Option<Vec<String>>, - suggested_contract_name: Option<String>, - suggested_description: Option<String>, - } - - let parsed: LlmResponse = serde_json::from_str(&json_str) - .map_err(|e| format!("Failed to parse LLM response as JSON: {}", e))?; - - Ok(TranscriptAnalysisResult { - requirements: parsed.requirements.unwrap_or_default(), - decisions: parsed.decisions.unwrap_or_default(), - action_items: parsed.action_items.unwrap_or_default(), - key_topics: parsed.key_topics.unwrap_or_default(), - suggested_contract_name: parsed.suggested_contract_name, - suggested_description: parsed.suggested_description, - speaker_summary: speaker_stats, - }) -} - -/// Extract JSON from LLM response (handles markdown code blocks) -fn extract_json_from_response(response: &str) -> Result<String, String> { - // Try to find JSON in code blocks first - if let Some(start) = response.find("```json") { - if let Some(end) = response[start..].find("```\n").or_else(|| response[start..].rfind("```")) { - let json_start = start + 7; // Skip "```json" - let json_end = start + end; - if json_end > json_start { - return Ok(response[json_start..json_end].trim().to_string()); - } - } - } - - // Try plain code blocks - if let Some(start) = response.find("```") { - let after_start = start + 3; - if let Some(end) = response[after_start..].find("```") { - let json_str = &response[after_start..after_start + end]; - // Skip language identifier if present - let json_str = if let Some(newline) = json_str.find('\n') { - &json_str[newline + 1..] - } else { - json_str - }; - return Ok(json_str.trim().to_string()); - } - } - - // Try to find raw JSON (starts with { or [) - if let Some(start) = response.find('{') { - if let Some(end) = response.rfind('}') { - if end > start { - return Ok(response[start..=end].to_string()); - } - } - } - - Err("Could not find JSON in LLM response".to_string()) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_format_transcript() { - let entries = vec![ - TranscriptEntry { - id: "1".to_string(), - speaker: "Speaker 0".to_string(), - start: 0.0, - end: 2.5, - text: "Hello world".to_string(), - is_final: true, - }, - ]; - - let formatted = format_transcript_for_analysis(&entries); - assert!(formatted.contains("[0.0s] Speaker 0: Hello world")); - } - - #[test] - fn test_speaker_stats() { - let entries = vec![ - TranscriptEntry { - id: "1".to_string(), - speaker: "Speaker 0".to_string(), - start: 0.0, - end: 5.0, - text: "One two three four five".to_string(), - is_final: true, - }, - TranscriptEntry { - id: "2".to_string(), - speaker: "Speaker 1".to_string(), - start: 5.0, - end: 10.0, - text: "Six seven eight nine ten".to_string(), - is_final: true, - }, - ]; - - let stats = calculate_speaker_stats(&entries); - assert_eq!(stats.len(), 2); - - for s in &stats { - assert_eq!(s.word_count, 5); - assert_eq!(s.speaking_time_seconds, 5.0); - assert!((s.contribution_percentage - 50.0).abs() < 0.1); - } - } - - #[test] - fn test_extract_json_from_response() { - let response = r#"Here is the analysis: -```json -{"key": "value"} -``` -Done."#; - - let json = extract_json_from_response(response).unwrap(); - assert_eq!(json, r#"{"key": "value"}"#); - } - - #[test] - fn test_extract_raw_json() { - let response = r#"Analysis: {"key": "value"}"#; - let json = extract_json_from_response(response).unwrap(); - assert_eq!(json, r#"{"key": "value"}"#); - } -} |
