summaryrefslogtreecommitdiff
path: root/makima/src/llm/transcript_analyzer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'makima/src/llm/transcript_analyzer.rs')
-rw-r--r--makima/src/llm/transcript_analyzer.rs292
1 files changed, 0 insertions, 292 deletions
diff --git a/makima/src/llm/transcript_analyzer.rs b/makima/src/llm/transcript_analyzer.rs
deleted file mode 100644
index 82aa69d..0000000
--- a/makima/src/llm/transcript_analyzer.rs
+++ /dev/null
@@ -1,292 +0,0 @@
-//! Transcript analyzer for extracting requirements, decisions, and action items.
-
-use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;
-use crate::db::models::TranscriptEntry;
-
-/// An extracted requirement from the transcript
-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct ExtractedRequirement {
- pub text: String,
- pub speaker: String,
- pub timestamp: f32,
- pub confidence: f32,
- pub category: Option<String>, // functional, technical, non-functional, business
-}
-
-/// An extracted decision from the transcript
-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct ExtractedDecision {
- pub text: String,
- pub speaker: String,
- pub timestamp: f32,
- pub confidence: f32,
- pub context: Option<String>,
-}
-
-/// An extracted action item from the transcript
-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct ExtractedActionItem {
- pub text: String,
- pub speaker: String,
- pub timestamp: f32,
- pub assignee: Option<String>,
- pub priority: Option<String>,
-}
-
-/// Result of transcript analysis
-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct TranscriptAnalysisResult {
- pub requirements: Vec<ExtractedRequirement>,
- pub decisions: Vec<ExtractedDecision>,
- pub action_items: Vec<ExtractedActionItem>,
- pub key_topics: Vec<String>,
- pub suggested_contract_name: Option<String>,
- pub suggested_description: Option<String>,
- pub speaker_summary: Vec<SpeakerStats>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
-#[serde(rename_all = "camelCase")]
-pub struct SpeakerStats {
- pub speaker: String,
- pub word_count: usize,
- pub speaking_time_seconds: f32,
- pub contribution_percentage: f32,
-}
-
-/// Format transcript entries into readable text for LLM analysis
-pub fn format_transcript_for_analysis(entries: &[TranscriptEntry]) -> String {
- entries
- .iter()
- .map(|e| format!("[{:.1}s] {}: {}", e.start, e.speaker, e.text))
- .collect::<Vec<_>>()
- .join("\n")
-}
-
-/// Calculate speaker statistics from transcript
-pub fn calculate_speaker_stats(entries: &[TranscriptEntry]) -> Vec<SpeakerStats> {
- use std::collections::HashMap;
-
- let mut stats: HashMap<String, (usize, f32)> = HashMap::new();
-
- for entry in entries {
- let word_count = entry.text.split_whitespace().count();
- let duration = entry.end - entry.start;
-
- let (count, time) = stats.entry(entry.speaker.clone()).or_insert((0, 0.0));
- *count += word_count;
- *time += duration;
- }
-
- let total_words: usize = stats.values().map(|(c, _)| c).sum();
- let total_time: f32 = stats.values().map(|(_, t)| t).sum();
-
- // Suppress unused variable warning
- let _ = total_time;
-
- stats
- .into_iter()
- .map(|(speaker, (word_count, speaking_time))| SpeakerStats {
- speaker,
- word_count,
- speaking_time_seconds: speaking_time,
- contribution_percentage: if total_words > 0 {
- (word_count as f32 / total_words as f32) * 100.0
- } else {
- 0.0
- },
- })
- .collect()
-}
-
-/// Build the analysis prompt for the LLM
-pub fn build_analysis_prompt(transcript_text: &str) -> String {
- format!(r#"Analyze this meeting/conversation transcript and extract structured information.
-
-TRANSCRIPT:
-{}
-
-Extract the following information in JSON format:
-
-1. **Requirements**: Statements where someone expresses a need, want, or must-have. Look for phrases like:
- - "we need to...", "it should...", "must have...", "requirement is..."
- - "the system should...", "users need to be able to..."
-
-2. **Decisions**: Explicit decisions made during the conversation. Look for:
- - "let's go with...", "we decided...", "we'll use...", "agreed to..."
- - "the decision is...", "we're going with..."
-
-3. **Action Items**: Tasks or todos mentioned. Look for:
- - "someone needs to...", "we should...", "next step is..."
- - "I'll do...", "can you...", "TODO:..."
-
-4. **Key Topics**: Main subjects discussed
-
-5. **Suggested Contract Name**: A short name (3-5 words) that captures the main goal
-
-6. **Suggested Description**: A 1-2 sentence description of what should be built/done
-
-Return your analysis as JSON with this structure:
-{{
- "requirements": [
- {{"text": "...", "speaker": "Speaker X", "timestamp": 12.5, "confidence": 0.9, "category": "functional"}}
- ],
- "decisions": [
- {{"text": "...", "speaker": "Speaker X", "timestamp": 45.2, "confidence": 0.85, "context": "..."}}
- ],
- "action_items": [
- {{"text": "...", "speaker": "Speaker X", "timestamp": 78.0, "assignee": null, "priority": "high"}}
- ],
- "key_topics": ["topic1", "topic2"],
- "suggested_contract_name": "...",
- "suggested_description": "..."
-}}
-
-Be conservative - only extract items with high confidence. If nothing is found for a category, return an empty array."#, transcript_text)
-}
-
-/// Parse LLM response into analysis result
-pub fn parse_analysis_response(response: &str, speaker_stats: Vec<SpeakerStats>) -> Result<TranscriptAnalysisResult, String> {
- // Try to extract JSON from the response (it might be wrapped in markdown code blocks)
- let json_str = extract_json_from_response(response)?;
-
- #[derive(Deserialize)]
- struct LlmResponse {
- requirements: Option<Vec<ExtractedRequirement>>,
- decisions: Option<Vec<ExtractedDecision>>,
- action_items: Option<Vec<ExtractedActionItem>>,
- key_topics: Option<Vec<String>>,
- suggested_contract_name: Option<String>,
- suggested_description: Option<String>,
- }
-
- let parsed: LlmResponse = serde_json::from_str(&json_str)
- .map_err(|e| format!("Failed to parse LLM response as JSON: {}", e))?;
-
- Ok(TranscriptAnalysisResult {
- requirements: parsed.requirements.unwrap_or_default(),
- decisions: parsed.decisions.unwrap_or_default(),
- action_items: parsed.action_items.unwrap_or_default(),
- key_topics: parsed.key_topics.unwrap_or_default(),
- suggested_contract_name: parsed.suggested_contract_name,
- suggested_description: parsed.suggested_description,
- speaker_summary: speaker_stats,
- })
-}
-
-/// Extract JSON from LLM response (handles markdown code blocks)
-fn extract_json_from_response(response: &str) -> Result<String, String> {
- // Try to find JSON in code blocks first
- if let Some(start) = response.find("```json") {
- if let Some(end) = response[start..].find("```\n").or_else(|| response[start..].rfind("```")) {
- let json_start = start + 7; // Skip "```json"
- let json_end = start + end;
- if json_end > json_start {
- return Ok(response[json_start..json_end].trim().to_string());
- }
- }
- }
-
- // Try plain code blocks
- if let Some(start) = response.find("```") {
- let after_start = start + 3;
- if let Some(end) = response[after_start..].find("```") {
- let json_str = &response[after_start..after_start + end];
- // Skip language identifier if present
- let json_str = if let Some(newline) = json_str.find('\n') {
- &json_str[newline + 1..]
- } else {
- json_str
- };
- return Ok(json_str.trim().to_string());
- }
- }
-
- // Try to find raw JSON (starts with { or [)
- if let Some(start) = response.find('{') {
- if let Some(end) = response.rfind('}') {
- if end > start {
- return Ok(response[start..=end].to_string());
- }
- }
- }
-
- Err("Could not find JSON in LLM response".to_string())
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_format_transcript() {
- let entries = vec![
- TranscriptEntry {
- id: "1".to_string(),
- speaker: "Speaker 0".to_string(),
- start: 0.0,
- end: 2.5,
- text: "Hello world".to_string(),
- is_final: true,
- },
- ];
-
- let formatted = format_transcript_for_analysis(&entries);
- assert!(formatted.contains("[0.0s] Speaker 0: Hello world"));
- }
-
- #[test]
- fn test_speaker_stats() {
- let entries = vec![
- TranscriptEntry {
- id: "1".to_string(),
- speaker: "Speaker 0".to_string(),
- start: 0.0,
- end: 5.0,
- text: "One two three four five".to_string(),
- is_final: true,
- },
- TranscriptEntry {
- id: "2".to_string(),
- speaker: "Speaker 1".to_string(),
- start: 5.0,
- end: 10.0,
- text: "Six seven eight nine ten".to_string(),
- is_final: true,
- },
- ];
-
- let stats = calculate_speaker_stats(&entries);
- assert_eq!(stats.len(), 2);
-
- for s in &stats {
- assert_eq!(s.word_count, 5);
- assert_eq!(s.speaking_time_seconds, 5.0);
- assert!((s.contribution_percentage - 50.0).abs() < 0.1);
- }
- }
-
- #[test]
- fn test_extract_json_from_response() {
- let response = r#"Here is the analysis:
-```json
-{"key": "value"}
-```
-Done."#;
-
- let json = extract_json_from_response(response).unwrap();
- assert_eq!(json, r#"{"key": "value"}"#);
- }
-
- #[test]
- fn test_extract_raw_json() {
- let response = r#"Analysis: {"key": "value"}"#;
- let json = extract_json_from_response(response).unwrap();
- assert_eq!(json, r#"{"key": "value"}"#);
- }
-}