diff options
Diffstat (limited to 'makima/src/llm/markdown.rs')
| -rw-r--r-- | makima/src/llm/markdown.rs | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/makima/src/llm/markdown.rs b/makima/src/llm/markdown.rs new file mode 100644 index 0000000..482dc8c --- /dev/null +++ b/makima/src/llm/markdown.rs @@ -0,0 +1,334 @@ +//! Markdown conversion utilities for BodyElement arrays. +//! +//! Provides bidirectional conversion between structured BodyElement[] and markdown strings. + +use crate::db::models::BodyElement; + +/// Convert a slice of BodyElements to a markdown string. +/// +/// Handles: +/// - Headings: `# heading` through `###### heading` based on level +/// - Paragraphs: plain text with blank lines between +/// - Code blocks: ````language\ncontent\n```` +/// - Lists: ordered (1. 2. 3.) and unordered (- - -) +/// - Charts: rendered as fenced JSON with chart type +/// - Images: rendered as markdown image syntax +pub fn body_to_markdown(elements: &[BodyElement]) -> String { + elements + .iter() + .filter_map(|elem| match elem { + BodyElement::Heading { level, text } => { + let hashes = "#".repeat((*level).min(6) as usize); + Some(format!("{} {}", hashes, text)) + } + BodyElement::Paragraph { text } => Some(text.clone()), + BodyElement::Code { language, content } => { + let lang = language.as_deref().unwrap_or(""); + Some(format!("```{}\n{}\n```", lang, content)) + } + BodyElement::List { ordered, items } => { + let list: Vec<String> = items + .iter() + .enumerate() + .map(|(i, item)| { + if *ordered { + format!("{}. {}", i + 1, item) + } else { + format!("- {}", item) + } + }) + .collect(); + Some(list.join("\n")) + } + BodyElement::Chart { + chart_type, + title, + data, + config: _, + } => { + // Render chart as a fenced block with metadata + let title_str = title + .as_ref() + .map(|t| format!(" - {}", t)) + .unwrap_or_default(); + let data_str = serde_json::to_string_pretty(data).unwrap_or_default(); + Some(format!( + "```chart:{:?}{}\n{}\n```", + chart_type, title_str, data_str + )) + } + BodyElement::Image { src, alt, caption } => { + let alt_text = alt.as_deref().unwrap_or("image"); + let caption_str = caption + .as_ref() + .map(|c| format!("\n*{}*", c)) + .unwrap_or_default(); + Some(format!("{}", alt_text, src, caption_str)) + } + // Markdown elements output their content directly - it's already markdown + BodyElement::Markdown { content } => Some(content.clone()), + }) + .collect::<Vec<_>>() + .join("\n\n") +} + +/// Parse a markdown string into a vector of BodyElements. +/// +/// Handles: +/// - Headings: lines starting with # through ###### +/// - Code blocks: ````language ... ```` +/// - Ordered lists: lines starting with 1. 2. etc. +/// - Unordered lists: lines starting with - or * +/// - Paragraphs: all other non-empty lines +pub fn markdown_to_body(markdown: &str) -> Vec<BodyElement> { + let mut elements = Vec::new(); + let lines: Vec<&str> = markdown.lines().collect(); + let mut i = 0; + + while i < lines.len() { + let line = lines[i]; + let trimmed = line.trim(); + + // Skip empty lines + if trimmed.is_empty() { + i += 1; + continue; + } + + // Check for code blocks + if trimmed.starts_with("```") { + let language = trimmed.trim_start_matches('`').trim(); + let language = if language.is_empty() { + None + } else { + Some(language.to_string()) + }; + + let mut content_lines = Vec::new(); + i += 1; + + // Collect content until closing ``` + while i < lines.len() && !lines[i].trim().starts_with("```") { + content_lines.push(lines[i]); + i += 1; + } + + // Skip the closing ``` + if i < lines.len() { + i += 1; + } + + elements.push(BodyElement::Code { + language, + content: content_lines.join("\n"), + }); + continue; + } + + // Check for headings + if trimmed.starts_with('#') { + let level = trimmed.chars().take_while(|&c| c == '#').count() as u8; + let text = trimmed.trim_start_matches('#').trim().to_string(); + elements.push(BodyElement::Heading { level, text }); + i += 1; + continue; + } + + // Check for unordered lists (- or *) + if trimmed.starts_with("- ") || trimmed.starts_with("* ") { + let mut items = Vec::new(); + while i < lines.len() { + let current = lines[i].trim(); + if current.starts_with("- ") || current.starts_with("* ") { + items.push(current[2..].to_string()); + i += 1; + } else if current.is_empty() { + i += 1; + break; + } else { + break; + } + } + elements.push(BodyElement::List { + ordered: false, + items, + }); + continue; + } + + // Check for ordered lists (1. 2. etc.) + if let Some(rest) = try_parse_ordered_list_item(trimmed) { + let mut items = Vec::new(); + items.push(rest.to_string()); + i += 1; + + while i < lines.len() { + let current = lines[i].trim(); + if let Some(item_rest) = try_parse_ordered_list_item(current) { + items.push(item_rest.to_string()); + i += 1; + } else if current.is_empty() { + i += 1; + break; + } else { + break; + } + } + elements.push(BodyElement::List { + ordered: true, + items, + }); + continue; + } + + // Default: paragraph (collect consecutive non-empty lines) + let mut para_lines = Vec::new(); + while i < lines.len() { + let current = lines[i].trim(); + if current.is_empty() + || current.starts_with('#') + || current.starts_with("```") + || current.starts_with("- ") + || current.starts_with("* ") + || try_parse_ordered_list_item(current).is_some() + { + break; + } + para_lines.push(current); + i += 1; + } + + if !para_lines.is_empty() { + elements.push(BodyElement::Paragraph { + text: para_lines.join(" "), + }); + } + } + + elements +} + +/// Try to parse an ordered list item (e.g., "1. Item text") +/// Returns the text after the number and period, or None if not a list item. +fn try_parse_ordered_list_item(s: &str) -> Option<&str> { + let mut chars = s.char_indices(); + + // Must start with a digit + let (_, first) = chars.next()?; + if !first.is_ascii_digit() { + return None; + } + + // Consume remaining digits + let mut last_digit_end = 1; + for (idx, c) in chars.by_ref() { + if c.is_ascii_digit() { + last_digit_end = idx + 1; + } else if c == '.' { + // Found the period - check for space after + let rest = &s[last_digit_end + 1..]; + let rest = rest.trim_start(); + if !rest.is_empty() || s.ends_with(". ") { + return Some(rest); + } + return None; + } else { + return None; + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_body_to_markdown_heading() { + let elements = vec![BodyElement::Heading { + level: 2, + text: "Hello World".to_string(), + }]; + assert_eq!(body_to_markdown(&elements), "## Hello World"); + } + + #[test] + fn test_body_to_markdown_paragraph() { + let elements = vec![BodyElement::Paragraph { + text: "This is a paragraph.".to_string(), + }]; + assert_eq!(body_to_markdown(&elements), "This is a paragraph."); + } + + #[test] + fn test_body_to_markdown_code() { + let elements = vec![BodyElement::Code { + language: Some("rust".to_string()), + content: "fn main() {}".to_string(), + }]; + assert_eq!( + body_to_markdown(&elements), + "```rust\nfn main() {}\n```" + ); + } + + #[test] + fn test_body_to_markdown_list() { + let elements = vec![BodyElement::List { + ordered: false, + items: vec!["Item 1".to_string(), "Item 2".to_string()], + }]; + assert_eq!(body_to_markdown(&elements), "- Item 1\n- Item 2"); + } + + #[test] + fn test_markdown_to_body_heading() { + let md = "## Hello World"; + let elements = markdown_to_body(md); + assert_eq!(elements.len(), 1); + match &elements[0] { + BodyElement::Heading { level, text } => { + assert_eq!(*level, 2); + assert_eq!(text, "Hello World"); + } + _ => panic!("Expected Heading"), + } + } + + #[test] + fn test_markdown_to_body_code() { + let md = "```rust\nfn main() {}\n```"; + let elements = markdown_to_body(md); + assert_eq!(elements.len(), 1); + match &elements[0] { + BodyElement::Code { language, content } => { + assert_eq!(language.as_deref(), Some("rust")); + assert_eq!(content, "fn main() {}"); + } + _ => panic!("Expected Code"), + } + } + + #[test] + fn test_roundtrip() { + let original = vec![ + BodyElement::Heading { + level: 1, + text: "Title".to_string(), + }, + BodyElement::Paragraph { + text: "Some text here.".to_string(), + }, + BodyElement::List { + ordered: false, + items: vec!["A".to_string(), "B".to_string()], + }, + ]; + + let markdown = body_to_markdown(&original); + let parsed = markdown_to_body(&markdown); + + assert_eq!(parsed.len(), 3); + } +} |
