summaryrefslogtreecommitdiff
path: root/makima/src/llm/markdown.rs
diff options
context:
space:
mode:
Diffstat (limited to 'makima/src/llm/markdown.rs')
-rw-r--r--makima/src/llm/markdown.rs334
1 files changed, 0 insertions, 334 deletions
diff --git a/makima/src/llm/markdown.rs b/makima/src/llm/markdown.rs
deleted file mode 100644
index 482dc8c..0000000
--- a/makima/src/llm/markdown.rs
+++ /dev/null
@@ -1,334 +0,0 @@
-//! Markdown conversion utilities for BodyElement arrays.
-//!
-//! Provides bidirectional conversion between structured BodyElement[] and markdown strings.
-
-use crate::db::models::BodyElement;
-
-/// Convert a slice of BodyElements to a markdown string.
-///
-/// Handles:
-/// - Headings: `# heading` through `###### heading` based on level
-/// - Paragraphs: plain text with blank lines between
-/// - Code blocks: ````language\ncontent\n````
-/// - Lists: ordered (1. 2. 3.) and unordered (- - -)
-/// - Charts: rendered as fenced JSON with chart type
-/// - Images: rendered as markdown image syntax
-pub fn body_to_markdown(elements: &[BodyElement]) -> String {
- elements
- .iter()
- .filter_map(|elem| match elem {
- BodyElement::Heading { level, text } => {
- let hashes = "#".repeat((*level).min(6) as usize);
- Some(format!("{} {}", hashes, text))
- }
- BodyElement::Paragraph { text } => Some(text.clone()),
- BodyElement::Code { language, content } => {
- let lang = language.as_deref().unwrap_or("");
- Some(format!("```{}\n{}\n```", lang, content))
- }
- BodyElement::List { ordered, items } => {
- let list: Vec<String> = items
- .iter()
- .enumerate()
- .map(|(i, item)| {
- if *ordered {
- format!("{}. {}", i + 1, item)
- } else {
- format!("- {}", item)
- }
- })
- .collect();
- Some(list.join("\n"))
- }
- BodyElement::Chart {
- chart_type,
- title,
- data,
- config: _,
- } => {
- // Render chart as a fenced block with metadata
- let title_str = title
- .as_ref()
- .map(|t| format!(" - {}", t))
- .unwrap_or_default();
- let data_str = serde_json::to_string_pretty(data).unwrap_or_default();
- Some(format!(
- "```chart:{:?}{}\n{}\n```",
- chart_type, title_str, data_str
- ))
- }
- BodyElement::Image { src, alt, caption } => {
- let alt_text = alt.as_deref().unwrap_or("image");
- let caption_str = caption
- .as_ref()
- .map(|c| format!("\n*{}*", c))
- .unwrap_or_default();
- Some(format!("![{}]({}){}", alt_text, src, caption_str))
- }
- // Markdown elements output their content directly - it's already markdown
- BodyElement::Markdown { content } => Some(content.clone()),
- })
- .collect::<Vec<_>>()
- .join("\n\n")
-}
-
-/// Parse a markdown string into a vector of BodyElements.
-///
-/// Handles:
-/// - Headings: lines starting with # through ######
-/// - Code blocks: ````language ... ````
-/// - Ordered lists: lines starting with 1. 2. etc.
-/// - Unordered lists: lines starting with - or *
-/// - Paragraphs: all other non-empty lines
-pub fn markdown_to_body(markdown: &str) -> Vec<BodyElement> {
- let mut elements = Vec::new();
- let lines: Vec<&str> = markdown.lines().collect();
- let mut i = 0;
-
- while i < lines.len() {
- let line = lines[i];
- let trimmed = line.trim();
-
- // Skip empty lines
- if trimmed.is_empty() {
- i += 1;
- continue;
- }
-
- // Check for code blocks
- if trimmed.starts_with("```") {
- let language = trimmed.trim_start_matches('`').trim();
- let language = if language.is_empty() {
- None
- } else {
- Some(language.to_string())
- };
-
- let mut content_lines = Vec::new();
- i += 1;
-
- // Collect content until closing ```
- while i < lines.len() && !lines[i].trim().starts_with("```") {
- content_lines.push(lines[i]);
- i += 1;
- }
-
- // Skip the closing ```
- if i < lines.len() {
- i += 1;
- }
-
- elements.push(BodyElement::Code {
- language,
- content: content_lines.join("\n"),
- });
- continue;
- }
-
- // Check for headings
- if trimmed.starts_with('#') {
- let level = trimmed.chars().take_while(|&c| c == '#').count() as u8;
- let text = trimmed.trim_start_matches('#').trim().to_string();
- elements.push(BodyElement::Heading { level, text });
- i += 1;
- continue;
- }
-
- // Check for unordered lists (- or *)
- if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
- let mut items = Vec::new();
- while i < lines.len() {
- let current = lines[i].trim();
- if current.starts_with("- ") || current.starts_with("* ") {
- items.push(current[2..].to_string());
- i += 1;
- } else if current.is_empty() {
- i += 1;
- break;
- } else {
- break;
- }
- }
- elements.push(BodyElement::List {
- ordered: false,
- items,
- });
- continue;
- }
-
- // Check for ordered lists (1. 2. etc.)
- if let Some(rest) = try_parse_ordered_list_item(trimmed) {
- let mut items = Vec::new();
- items.push(rest.to_string());
- i += 1;
-
- while i < lines.len() {
- let current = lines[i].trim();
- if let Some(item_rest) = try_parse_ordered_list_item(current) {
- items.push(item_rest.to_string());
- i += 1;
- } else if current.is_empty() {
- i += 1;
- break;
- } else {
- break;
- }
- }
- elements.push(BodyElement::List {
- ordered: true,
- items,
- });
- continue;
- }
-
- // Default: paragraph (collect consecutive non-empty lines)
- let mut para_lines = Vec::new();
- while i < lines.len() {
- let current = lines[i].trim();
- if current.is_empty()
- || current.starts_with('#')
- || current.starts_with("```")
- || current.starts_with("- ")
- || current.starts_with("* ")
- || try_parse_ordered_list_item(current).is_some()
- {
- break;
- }
- para_lines.push(current);
- i += 1;
- }
-
- if !para_lines.is_empty() {
- elements.push(BodyElement::Paragraph {
- text: para_lines.join(" "),
- });
- }
- }
-
- elements
-}
-
-/// Try to parse an ordered list item (e.g., "1. Item text")
-/// Returns the text after the number and period, or None if not a list item.
-fn try_parse_ordered_list_item(s: &str) -> Option<&str> {
- let mut chars = s.char_indices();
-
- // Must start with a digit
- let (_, first) = chars.next()?;
- if !first.is_ascii_digit() {
- return None;
- }
-
- // Consume remaining digits
- let mut last_digit_end = 1;
- for (idx, c) in chars.by_ref() {
- if c.is_ascii_digit() {
- last_digit_end = idx + 1;
- } else if c == '.' {
- // Found the period - check for space after
- let rest = &s[last_digit_end + 1..];
- let rest = rest.trim_start();
- if !rest.is_empty() || s.ends_with(". ") {
- return Some(rest);
- }
- return None;
- } else {
- return None;
- }
- }
-
- None
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_body_to_markdown_heading() {
- let elements = vec![BodyElement::Heading {
- level: 2,
- text: "Hello World".to_string(),
- }];
- assert_eq!(body_to_markdown(&elements), "## Hello World");
- }
-
- #[test]
- fn test_body_to_markdown_paragraph() {
- let elements = vec![BodyElement::Paragraph {
- text: "This is a paragraph.".to_string(),
- }];
- assert_eq!(body_to_markdown(&elements), "This is a paragraph.");
- }
-
- #[test]
- fn test_body_to_markdown_code() {
- let elements = vec![BodyElement::Code {
- language: Some("rust".to_string()),
- content: "fn main() {}".to_string(),
- }];
- assert_eq!(
- body_to_markdown(&elements),
- "```rust\nfn main() {}\n```"
- );
- }
-
- #[test]
- fn test_body_to_markdown_list() {
- let elements = vec![BodyElement::List {
- ordered: false,
- items: vec!["Item 1".to_string(), "Item 2".to_string()],
- }];
- assert_eq!(body_to_markdown(&elements), "- Item 1\n- Item 2");
- }
-
- #[test]
- fn test_markdown_to_body_heading() {
- let md = "## Hello World";
- let elements = markdown_to_body(md);
- assert_eq!(elements.len(), 1);
- match &elements[0] {
- BodyElement::Heading { level, text } => {
- assert_eq!(*level, 2);
- assert_eq!(text, "Hello World");
- }
- _ => panic!("Expected Heading"),
- }
- }
-
- #[test]
- fn test_markdown_to_body_code() {
- let md = "```rust\nfn main() {}\n```";
- let elements = markdown_to_body(md);
- assert_eq!(elements.len(), 1);
- match &elements[0] {
- BodyElement::Code { language, content } => {
- assert_eq!(language.as_deref(), Some("rust"));
- assert_eq!(content, "fn main() {}");
- }
- _ => panic!("Expected Code"),
- }
- }
-
- #[test]
- fn test_roundtrip() {
- let original = vec![
- BodyElement::Heading {
- level: 1,
- text: "Title".to_string(),
- },
- BodyElement::Paragraph {
- text: "Some text here.".to_string(),
- },
- BodyElement::List {
- ordered: false,
- items: vec!["A".to_string(), "B".to_string()],
- },
- ];
-
- let markdown = body_to_markdown(&original);
- let parsed = markdown_to_body(&markdown);
-
- assert_eq!(parsed.len(), 3);
- }
-}