summaryrefslogblamecommitdiff
path: root/makima/src/llm/markdown.rs
blob: 482dc8c7b1e11cd71db83c39fcb006363dca8bf1 (plain) (tree)













































































































































































































































































































































                                                                                            
//! Markdown conversion utilities for BodyElement arrays.
//!
//! Provides bidirectional conversion between structured BodyElement[] and markdown strings.

use crate::db::models::BodyElement;

/// Convert a slice of BodyElements to a markdown string.
///
/// Handles:
/// - Headings: `# heading` through `###### heading` based on level
/// - Paragraphs: plain text with blank lines between
/// - Code blocks: ````language\ncontent\n````
/// - Lists: ordered (1. 2. 3.) and unordered (- - -)
/// - Charts: rendered as fenced JSON with chart type
/// - Images: rendered as markdown image syntax
pub fn body_to_markdown(elements: &[BodyElement]) -> String {
    elements
        .iter()
        .filter_map(|elem| match elem {
            BodyElement::Heading { level, text } => {
                let hashes = "#".repeat((*level).min(6) as usize);
                Some(format!("{} {}", hashes, text))
            }
            BodyElement::Paragraph { text } => Some(text.clone()),
            BodyElement::Code { language, content } => {
                let lang = language.as_deref().unwrap_or("");
                Some(format!("```{}\n{}\n```", lang, content))
            }
            BodyElement::List { ordered, items } => {
                let list: Vec<String> = items
                    .iter()
                    .enumerate()
                    .map(|(i, item)| {
                        if *ordered {
                            format!("{}. {}", i + 1, item)
                        } else {
                            format!("- {}", item)
                        }
                    })
                    .collect();
                Some(list.join("\n"))
            }
            BodyElement::Chart {
                chart_type,
                title,
                data,
                config: _,
            } => {
                // Render chart as a fenced block with metadata
                let title_str = title
                    .as_ref()
                    .map(|t| format!(" - {}", t))
                    .unwrap_or_default();
                let data_str = serde_json::to_string_pretty(data).unwrap_or_default();
                Some(format!(
                    "```chart:{:?}{}\n{}\n```",
                    chart_type, title_str, data_str
                ))
            }
            BodyElement::Image { src, alt, caption } => {
                let alt_text = alt.as_deref().unwrap_or("image");
                let caption_str = caption
                    .as_ref()
                    .map(|c| format!("\n*{}*", c))
                    .unwrap_or_default();
                Some(format!("![{}]({}){}", alt_text, src, caption_str))
            }
            // Markdown elements output their content directly - it's already markdown
            BodyElement::Markdown { content } => Some(content.clone()),
        })
        .collect::<Vec<_>>()
        .join("\n\n")
}

/// Parse a markdown string into a vector of BodyElements.
///
/// Handles:
/// - Headings: lines starting with # through ######
/// - Code blocks: ````language ... ````
/// - Ordered lists: lines starting with 1. 2. etc.
/// - Unordered lists: lines starting with - or *
/// - Paragraphs: all other non-empty lines
pub fn markdown_to_body(markdown: &str) -> Vec<BodyElement> {
    let mut elements = Vec::new();
    let lines: Vec<&str> = markdown.lines().collect();
    let mut i = 0;

    while i < lines.len() {
        let line = lines[i];
        let trimmed = line.trim();

        // Skip empty lines
        if trimmed.is_empty() {
            i += 1;
            continue;
        }

        // Check for code blocks
        if trimmed.starts_with("```") {
            let language = trimmed.trim_start_matches('`').trim();
            let language = if language.is_empty() {
                None
            } else {
                Some(language.to_string())
            };

            let mut content_lines = Vec::new();
            i += 1;

            // Collect content until closing ```
            while i < lines.len() && !lines[i].trim().starts_with("```") {
                content_lines.push(lines[i]);
                i += 1;
            }

            // Skip the closing ```
            if i < lines.len() {
                i += 1;
            }

            elements.push(BodyElement::Code {
                language,
                content: content_lines.join("\n"),
            });
            continue;
        }

        // Check for headings
        if trimmed.starts_with('#') {
            let level = trimmed.chars().take_while(|&c| c == '#').count() as u8;
            let text = trimmed.trim_start_matches('#').trim().to_string();
            elements.push(BodyElement::Heading { level, text });
            i += 1;
            continue;
        }

        // Check for unordered lists (- or *)
        if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
            let mut items = Vec::new();
            while i < lines.len() {
                let current = lines[i].trim();
                if current.starts_with("- ") || current.starts_with("* ") {
                    items.push(current[2..].to_string());
                    i += 1;
                } else if current.is_empty() {
                    i += 1;
                    break;
                } else {
                    break;
                }
            }
            elements.push(BodyElement::List {
                ordered: false,
                items,
            });
            continue;
        }

        // Check for ordered lists (1. 2. etc.)
        if let Some(rest) = try_parse_ordered_list_item(trimmed) {
            let mut items = Vec::new();
            items.push(rest.to_string());
            i += 1;

            while i < lines.len() {
                let current = lines[i].trim();
                if let Some(item_rest) = try_parse_ordered_list_item(current) {
                    items.push(item_rest.to_string());
                    i += 1;
                } else if current.is_empty() {
                    i += 1;
                    break;
                } else {
                    break;
                }
            }
            elements.push(BodyElement::List {
                ordered: true,
                items,
            });
            continue;
        }

        // Default: paragraph (collect consecutive non-empty lines)
        let mut para_lines = Vec::new();
        while i < lines.len() {
            let current = lines[i].trim();
            if current.is_empty()
                || current.starts_with('#')
                || current.starts_with("```")
                || current.starts_with("- ")
                || current.starts_with("* ")
                || try_parse_ordered_list_item(current).is_some()
            {
                break;
            }
            para_lines.push(current);
            i += 1;
        }

        if !para_lines.is_empty() {
            elements.push(BodyElement::Paragraph {
                text: para_lines.join(" "),
            });
        }
    }

    elements
}

/// Try to parse an ordered list item (e.g., "1. Item text")
/// Returns the text after the number and period, or None if not a list item.
fn try_parse_ordered_list_item(s: &str) -> Option<&str> {
    let mut chars = s.char_indices();

    // Must start with a digit
    let (_, first) = chars.next()?;
    if !first.is_ascii_digit() {
        return None;
    }

    // Consume remaining digits
    let mut last_digit_end = 1;
    for (idx, c) in chars.by_ref() {
        if c.is_ascii_digit() {
            last_digit_end = idx + 1;
        } else if c == '.' {
            // Found the period - check for space after
            let rest = &s[last_digit_end + 1..];
            let rest = rest.trim_start();
            if !rest.is_empty() || s.ends_with(". ") {
                return Some(rest);
            }
            return None;
        } else {
            return None;
        }
    }

    None
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_body_to_markdown_heading() {
        let elements = vec![BodyElement::Heading {
            level: 2,
            text: "Hello World".to_string(),
        }];
        assert_eq!(body_to_markdown(&elements), "## Hello World");
    }

    #[test]
    fn test_body_to_markdown_paragraph() {
        let elements = vec![BodyElement::Paragraph {
            text: "This is a paragraph.".to_string(),
        }];
        assert_eq!(body_to_markdown(&elements), "This is a paragraph.");
    }

    #[test]
    fn test_body_to_markdown_code() {
        let elements = vec![BodyElement::Code {
            language: Some("rust".to_string()),
            content: "fn main() {}".to_string(),
        }];
        assert_eq!(
            body_to_markdown(&elements),
            "```rust\nfn main() {}\n```"
        );
    }

    #[test]
    fn test_body_to_markdown_list() {
        let elements = vec![BodyElement::List {
            ordered: false,
            items: vec!["Item 1".to_string(), "Item 2".to_string()],
        }];
        assert_eq!(body_to_markdown(&elements), "- Item 1\n- Item 2");
    }

    #[test]
    fn test_markdown_to_body_heading() {
        let md = "## Hello World";
        let elements = markdown_to_body(md);
        assert_eq!(elements.len(), 1);
        match &elements[0] {
            BodyElement::Heading { level, text } => {
                assert_eq!(*level, 2);
                assert_eq!(text, "Hello World");
            }
            _ => panic!("Expected Heading"),
        }
    }

    #[test]
    fn test_markdown_to_body_code() {
        let md = "```rust\nfn main() {}\n```";
        let elements = markdown_to_body(md);
        assert_eq!(elements.len(), 1);
        match &elements[0] {
            BodyElement::Code { language, content } => {
                assert_eq!(language.as_deref(), Some("rust"));
                assert_eq!(content, "fn main() {}");
            }
            _ => panic!("Expected Code"),
        }
    }

    #[test]
    fn test_roundtrip() {
        let original = vec![
            BodyElement::Heading {
                level: 1,
                text: "Title".to_string(),
            },
            BodyElement::Paragraph {
                text: "Some text here.".to_string(),
            },
            BodyElement::List {
                ordered: false,
                items: vec!["A".to_string(), "B".to_string()],
            },
        ];

        let markdown = body_to_markdown(&original);
        let parsed = markdown_to_body(&markdown);

        assert_eq!(parsed.len(), 3);
    }
}