diff options
Diffstat (limited to 'makima/src/llm/markdown.rs')
| -rw-r--r-- | makima/src/llm/markdown.rs | 334 |
1 files changed, 0 insertions, 334 deletions
diff --git a/makima/src/llm/markdown.rs b/makima/src/llm/markdown.rs deleted file mode 100644 index 482dc8c..0000000 --- a/makima/src/llm/markdown.rs +++ /dev/null @@ -1,334 +0,0 @@ -//! Markdown conversion utilities for BodyElement arrays. -//! -//! Provides bidirectional conversion between structured BodyElement[] and markdown strings. - -use crate::db::models::BodyElement; - -/// Convert a slice of BodyElements to a markdown string. -/// -/// Handles: -/// - Headings: `# heading` through `###### heading` based on level -/// - Paragraphs: plain text with blank lines between -/// - Code blocks: ````language\ncontent\n```` -/// - Lists: ordered (1. 2. 3.) and unordered (- - -) -/// - Charts: rendered as fenced JSON with chart type -/// - Images: rendered as markdown image syntax -pub fn body_to_markdown(elements: &[BodyElement]) -> String { - elements - .iter() - .filter_map(|elem| match elem { - BodyElement::Heading { level, text } => { - let hashes = "#".repeat((*level).min(6) as usize); - Some(format!("{} {}", hashes, text)) - } - BodyElement::Paragraph { text } => Some(text.clone()), - BodyElement::Code { language, content } => { - let lang = language.as_deref().unwrap_or(""); - Some(format!("```{}\n{}\n```", lang, content)) - } - BodyElement::List { ordered, items } => { - let list: Vec<String> = items - .iter() - .enumerate() - .map(|(i, item)| { - if *ordered { - format!("{}. {}", i + 1, item) - } else { - format!("- {}", item) - } - }) - .collect(); - Some(list.join("\n")) - } - BodyElement::Chart { - chart_type, - title, - data, - config: _, - } => { - // Render chart as a fenced block with metadata - let title_str = title - .as_ref() - .map(|t| format!(" - {}", t)) - .unwrap_or_default(); - let data_str = serde_json::to_string_pretty(data).unwrap_or_default(); - Some(format!( - "```chart:{:?}{}\n{}\n```", - chart_type, title_str, data_str - )) - } - BodyElement::Image { src, alt, caption } => { - let alt_text = alt.as_deref().unwrap_or("image"); - let caption_str = caption - .as_ref() - .map(|c| format!("\n*{}*", c)) - .unwrap_or_default(); - Some(format!("{}", alt_text, src, caption_str)) - } - // Markdown elements output their content directly - it's already markdown - BodyElement::Markdown { content } => Some(content.clone()), - }) - .collect::<Vec<_>>() - .join("\n\n") -} - -/// Parse a markdown string into a vector of BodyElements. -/// -/// Handles: -/// - Headings: lines starting with # through ###### -/// - Code blocks: ````language ... ```` -/// - Ordered lists: lines starting with 1. 2. etc. -/// - Unordered lists: lines starting with - or * -/// - Paragraphs: all other non-empty lines -pub fn markdown_to_body(markdown: &str) -> Vec<BodyElement> { - let mut elements = Vec::new(); - let lines: Vec<&str> = markdown.lines().collect(); - let mut i = 0; - - while i < lines.len() { - let line = lines[i]; - let trimmed = line.trim(); - - // Skip empty lines - if trimmed.is_empty() { - i += 1; - continue; - } - - // Check for code blocks - if trimmed.starts_with("```") { - let language = trimmed.trim_start_matches('`').trim(); - let language = if language.is_empty() { - None - } else { - Some(language.to_string()) - }; - - let mut content_lines = Vec::new(); - i += 1; - - // Collect content until closing ``` - while i < lines.len() && !lines[i].trim().starts_with("```") { - content_lines.push(lines[i]); - i += 1; - } - - // Skip the closing ``` - if i < lines.len() { - i += 1; - } - - elements.push(BodyElement::Code { - language, - content: content_lines.join("\n"), - }); - continue; - } - - // Check for headings - if trimmed.starts_with('#') { - let level = trimmed.chars().take_while(|&c| c == '#').count() as u8; - let text = trimmed.trim_start_matches('#').trim().to_string(); - elements.push(BodyElement::Heading { level, text }); - i += 1; - continue; - } - - // Check for unordered lists (- or *) - if trimmed.starts_with("- ") || trimmed.starts_with("* ") { - let mut items = Vec::new(); - while i < lines.len() { - let current = lines[i].trim(); - if current.starts_with("- ") || current.starts_with("* ") { - items.push(current[2..].to_string()); - i += 1; - } else if current.is_empty() { - i += 1; - break; - } else { - break; - } - } - elements.push(BodyElement::List { - ordered: false, - items, - }); - continue; - } - - // Check for ordered lists (1. 2. etc.) - if let Some(rest) = try_parse_ordered_list_item(trimmed) { - let mut items = Vec::new(); - items.push(rest.to_string()); - i += 1; - - while i < lines.len() { - let current = lines[i].trim(); - if let Some(item_rest) = try_parse_ordered_list_item(current) { - items.push(item_rest.to_string()); - i += 1; - } else if current.is_empty() { - i += 1; - break; - } else { - break; - } - } - elements.push(BodyElement::List { - ordered: true, - items, - }); - continue; - } - - // Default: paragraph (collect consecutive non-empty lines) - let mut para_lines = Vec::new(); - while i < lines.len() { - let current = lines[i].trim(); - if current.is_empty() - || current.starts_with('#') - || current.starts_with("```") - || current.starts_with("- ") - || current.starts_with("* ") - || try_parse_ordered_list_item(current).is_some() - { - break; - } - para_lines.push(current); - i += 1; - } - - if !para_lines.is_empty() { - elements.push(BodyElement::Paragraph { - text: para_lines.join(" "), - }); - } - } - - elements -} - -/// Try to parse an ordered list item (e.g., "1. Item text") -/// Returns the text after the number and period, or None if not a list item. -fn try_parse_ordered_list_item(s: &str) -> Option<&str> { - let mut chars = s.char_indices(); - - // Must start with a digit - let (_, first) = chars.next()?; - if !first.is_ascii_digit() { - return None; - } - - // Consume remaining digits - let mut last_digit_end = 1; - for (idx, c) in chars.by_ref() { - if c.is_ascii_digit() { - last_digit_end = idx + 1; - } else if c == '.' { - // Found the period - check for space after - let rest = &s[last_digit_end + 1..]; - let rest = rest.trim_start(); - if !rest.is_empty() || s.ends_with(". ") { - return Some(rest); - } - return None; - } else { - return None; - } - } - - None -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_body_to_markdown_heading() { - let elements = vec![BodyElement::Heading { - level: 2, - text: "Hello World".to_string(), - }]; - assert_eq!(body_to_markdown(&elements), "## Hello World"); - } - - #[test] - fn test_body_to_markdown_paragraph() { - let elements = vec![BodyElement::Paragraph { - text: "This is a paragraph.".to_string(), - }]; - assert_eq!(body_to_markdown(&elements), "This is a paragraph."); - } - - #[test] - fn test_body_to_markdown_code() { - let elements = vec![BodyElement::Code { - language: Some("rust".to_string()), - content: "fn main() {}".to_string(), - }]; - assert_eq!( - body_to_markdown(&elements), - "```rust\nfn main() {}\n```" - ); - } - - #[test] - fn test_body_to_markdown_list() { - let elements = vec![BodyElement::List { - ordered: false, - items: vec!["Item 1".to_string(), "Item 2".to_string()], - }]; - assert_eq!(body_to_markdown(&elements), "- Item 1\n- Item 2"); - } - - #[test] - fn test_markdown_to_body_heading() { - let md = "## Hello World"; - let elements = markdown_to_body(md); - assert_eq!(elements.len(), 1); - match &elements[0] { - BodyElement::Heading { level, text } => { - assert_eq!(*level, 2); - assert_eq!(text, "Hello World"); - } - _ => panic!("Expected Heading"), - } - } - - #[test] - fn test_markdown_to_body_code() { - let md = "```rust\nfn main() {}\n```"; - let elements = markdown_to_body(md); - assert_eq!(elements.len(), 1); - match &elements[0] { - BodyElement::Code { language, content } => { - assert_eq!(language.as_deref(), Some("rust")); - assert_eq!(content, "fn main() {}"); - } - _ => panic!("Expected Code"), - } - } - - #[test] - fn test_roundtrip() { - let original = vec![ - BodyElement::Heading { - level: 1, - text: "Title".to_string(), - }, - BodyElement::Paragraph { - text: "Some text here.".to_string(), - }, - BodyElement::List { - ordered: false, - items: vec!["A".to_string(), "B".to_string()], - }, - ]; - - let markdown = body_to_markdown(&original); - let parsed = markdown_to_body(&markdown); - - assert_eq!(parsed.len(), 3); - } -} |
