//! Transcript analyzer for extracting requirements, decisions, and action items.
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::db::models::TranscriptEntry;
/// An extracted requirement from the transcript
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ExtractedRequirement {
pub text: String,
pub speaker: String,
pub timestamp: f32,
pub confidence: f32,
pub category: Option<String>, // functional, technical, non-functional, business
}
/// An extracted decision from the transcript
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ExtractedDecision {
pub text: String,
pub speaker: String,
pub timestamp: f32,
pub confidence: f32,
pub context: Option<String>,
}
/// An extracted action item from the transcript
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ExtractedActionItem {
pub text: String,
pub speaker: String,
pub timestamp: f32,
pub assignee: Option<String>,
pub priority: Option<String>,
}
/// Result of transcript analysis
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct TranscriptAnalysisResult {
pub requirements: Vec<ExtractedRequirement>,
pub decisions: Vec<ExtractedDecision>,
pub action_items: Vec<ExtractedActionItem>,
pub key_topics: Vec<String>,
pub suggested_contract_name: Option<String>,
pub suggested_description: Option<String>,
pub speaker_summary: Vec<SpeakerStats>,
}
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct SpeakerStats {
pub speaker: String,
pub word_count: usize,
pub speaking_time_seconds: f32,
pub contribution_percentage: f32,
}
/// Format transcript entries into readable text for LLM analysis
pub fn format_transcript_for_analysis(entries: &[TranscriptEntry]) -> String {
entries
.iter()
.map(|e| format!("[{:.1}s] {}: {}", e.start, e.speaker, e.text))
.collect::<Vec<_>>()
.join("\n")
}
/// Calculate speaker statistics from transcript
pub fn calculate_speaker_stats(entries: &[TranscriptEntry]) -> Vec<SpeakerStats> {
use std::collections::HashMap;
let mut stats: HashMap<String, (usize, f32)> = HashMap::new();
for entry in entries {
let word_count = entry.text.split_whitespace().count();
let duration = entry.end - entry.start;
let (count, time) = stats.entry(entry.speaker.clone()).or_insert((0, 0.0));
*count += word_count;
*time += duration;
}
let total_words: usize = stats.values().map(|(c, _)| c).sum();
let total_time: f32 = stats.values().map(|(_, t)| t).sum();
// Suppress unused variable warning
let _ = total_time;
stats
.into_iter()
.map(|(speaker, (word_count, speaking_time))| SpeakerStats {
speaker,
word_count,
speaking_time_seconds: speaking_time,
contribution_percentage: if total_words > 0 {
(word_count as f32 / total_words as f32) * 100.0
} else {
0.0
},
})
.collect()
}
/// Build the analysis prompt for the LLM
pub fn build_analysis_prompt(transcript_text: &str) -> String {
format!(r#"Analyze this meeting/conversation transcript and extract structured information.
TRANSCRIPT:
{}
Extract the following information in JSON format:
1. **Requirements**: Statements where someone expresses a need, want, or must-have. Look for phrases like:
- "we need to...", "it should...", "must have...", "requirement is..."
- "the system should...", "users need to be able to..."
2. **Decisions**: Explicit decisions made during the conversation. Look for:
- "let's go with...", "we decided...", "we'll use...", "agreed to..."
- "the decision is...", "we're going with..."
3. **Action Items**: Tasks or todos mentioned. Look for:
- "someone needs to...", "we should...", "next step is..."
- "I'll do...", "can you...", "TODO:..."
4. **Key Topics**: Main subjects discussed
5. **Suggested Contract Name**: A short name (3-5 words) that captures the main goal
6. **Suggested Description**: A 1-2 sentence description of what should be built/done
Return your analysis as JSON with this structure:
{{
"requirements": [
{{"text": "...", "speaker": "Speaker X", "timestamp": 12.5, "confidence": 0.9, "category": "functional"}}
],
"decisions": [
{{"text": "...", "speaker": "Speaker X", "timestamp": 45.2, "confidence": 0.85, "context": "..."}}
],
"action_items": [
{{"text": "...", "speaker": "Speaker X", "timestamp": 78.0, "assignee": null, "priority": "high"}}
],
"key_topics": ["topic1", "topic2"],
"suggested_contract_name": "...",
"suggested_description": "..."
}}
Be conservative - only extract items with high confidence. If nothing is found for a category, return an empty array."#, transcript_text)
}
/// Parse LLM response into analysis result
pub fn parse_analysis_response(response: &str, speaker_stats: Vec<SpeakerStats>) -> Result<TranscriptAnalysisResult, String> {
// Try to extract JSON from the response (it might be wrapped in markdown code blocks)
let json_str = extract_json_from_response(response)?;
#[derive(Deserialize)]
struct LlmResponse {
requirements: Option<Vec<ExtractedRequirement>>,
decisions: Option<Vec<ExtractedDecision>>,
action_items: Option<Vec<ExtractedActionItem>>,
key_topics: Option<Vec<String>>,
suggested_contract_name: Option<String>,
suggested_description: Option<String>,
}
let parsed: LlmResponse = serde_json::from_str(&json_str)
.map_err(|e| format!("Failed to parse LLM response as JSON: {}", e))?;
Ok(TranscriptAnalysisResult {
requirements: parsed.requirements.unwrap_or_default(),
decisions: parsed.decisions.unwrap_or_default(),
action_items: parsed.action_items.unwrap_or_default(),
key_topics: parsed.key_topics.unwrap_or_default(),
suggested_contract_name: parsed.suggested_contract_name,
suggested_description: parsed.suggested_description,
speaker_summary: speaker_stats,
})
}
/// Extract JSON from LLM response (handles markdown code blocks)
fn extract_json_from_response(response: &str) -> Result<String, String> {
// Try to find JSON in code blocks first
if let Some(start) = response.find("```json") {
if let Some(end) = response[start..].find("```\n").or_else(|| response[start..].rfind("```")) {
let json_start = start + 7; // Skip "```json"
let json_end = start + end;
if json_end > json_start {
return Ok(response[json_start..json_end].trim().to_string());
}
}
}
// Try plain code blocks
if let Some(start) = response.find("```") {
let after_start = start + 3;
if let Some(end) = response[after_start..].find("```") {
let json_str = &response[after_start..after_start + end];
// Skip language identifier if present
let json_str = if let Some(newline) = json_str.find('\n') {
&json_str[newline + 1..]
} else {
json_str
};
return Ok(json_str.trim().to_string());
}
}
// Try to find raw JSON (starts with { or [)
if let Some(start) = response.find('{') {
if let Some(end) = response.rfind('}') {
if end > start {
return Ok(response[start..=end].to_string());
}
}
}
Err("Could not find JSON in LLM response".to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_format_transcript() {
let entries = vec![
TranscriptEntry {
id: "1".to_string(),
speaker: "Speaker 0".to_string(),
start: 0.0,
end: 2.5,
text: "Hello world".to_string(),
is_final: true,
},
];
let formatted = format_transcript_for_analysis(&entries);
assert!(formatted.contains("[0.0s] Speaker 0: Hello world"));
}
#[test]
fn test_speaker_stats() {
let entries = vec![
TranscriptEntry {
id: "1".to_string(),
speaker: "Speaker 0".to_string(),
start: 0.0,
end: 5.0,
text: "One two three four five".to_string(),
is_final: true,
},
TranscriptEntry {
id: "2".to_string(),
speaker: "Speaker 1".to_string(),
start: 5.0,
end: 10.0,
text: "Six seven eight nine ten".to_string(),
is_final: true,
},
];
let stats = calculate_speaker_stats(&entries);
assert_eq!(stats.len(), 2);
for s in &stats {
assert_eq!(s.word_count, 5);
assert_eq!(s.speaking_time_seconds, 5.0);
assert!((s.contribution_percentage - 50.0).abs() < 0.1);
}
}
#[test]
fn test_extract_json_from_response() {
let response = r#"Here is the analysis:
```json
{"key": "value"}
```
Done."#;
let json = extract_json_from_response(response).unwrap();
assert_eq!(json, r#"{"key": "value"}"#);
}
#[test]
fn test_extract_raw_json() {
let response = r#"Analysis: {"key": "value"}"#;
let json = extract_json_from_response(response).unwrap();
assert_eq!(json, r#"{"key": "value"}"#);
}
}