summaryrefslogtreecommitdiff
path: root/makima/src/llm/markdown.rs
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2026-01-11 05:52:14 +0000
committersoryu <soryu@soryu.co>2026-01-15 00:21:16 +0000
commit87044a747b47bd83249d61a45842c7f7b2eae56d (patch)
treeef2000ce79ffcc2723ef841acef5aa1deb1d5378 /makima/src/llm/markdown.rs
parent077820c4167c168072d217a1b01df840463a12a8 (diff)
downloadsoryu-87044a747b47bd83249d61a45842c7f7b2eae56d.tar.gz
soryu-87044a747b47bd83249d61a45842c7f7b2eae56d.zip
Contract system
Diffstat (limited to 'makima/src/llm/markdown.rs')
-rw-r--r--makima/src/llm/markdown.rs334
1 files changed, 334 insertions, 0 deletions
diff --git a/makima/src/llm/markdown.rs b/makima/src/llm/markdown.rs
new file mode 100644
index 0000000..482dc8c
--- /dev/null
+++ b/makima/src/llm/markdown.rs
@@ -0,0 +1,334 @@
+//! Markdown conversion utilities for BodyElement arrays.
+//!
+//! Provides bidirectional conversion between structured BodyElement[] and markdown strings.
+
+use crate::db::models::BodyElement;
+
+/// Convert a slice of BodyElements to a markdown string.
+///
+/// Handles:
+/// - Headings: `# heading` through `###### heading` based on level
+/// - Paragraphs: plain text with blank lines between
+/// - Code blocks: ````language\ncontent\n````
+/// - Lists: ordered (1. 2. 3.) and unordered (- - -)
+/// - Charts: rendered as fenced JSON with chart type
+/// - Images: rendered as markdown image syntax
+pub fn body_to_markdown(elements: &[BodyElement]) -> String {
+ elements
+ .iter()
+ .filter_map(|elem| match elem {
+ BodyElement::Heading { level, text } => {
+ let hashes = "#".repeat((*level).min(6) as usize);
+ Some(format!("{} {}", hashes, text))
+ }
+ BodyElement::Paragraph { text } => Some(text.clone()),
+ BodyElement::Code { language, content } => {
+ let lang = language.as_deref().unwrap_or("");
+ Some(format!("```{}\n{}\n```", lang, content))
+ }
+ BodyElement::List { ordered, items } => {
+ let list: Vec<String> = items
+ .iter()
+ .enumerate()
+ .map(|(i, item)| {
+ if *ordered {
+ format!("{}. {}", i + 1, item)
+ } else {
+ format!("- {}", item)
+ }
+ })
+ .collect();
+ Some(list.join("\n"))
+ }
+ BodyElement::Chart {
+ chart_type,
+ title,
+ data,
+ config: _,
+ } => {
+ // Render chart as a fenced block with metadata
+ let title_str = title
+ .as_ref()
+ .map(|t| format!(" - {}", t))
+ .unwrap_or_default();
+ let data_str = serde_json::to_string_pretty(data).unwrap_or_default();
+ Some(format!(
+ "```chart:{:?}{}\n{}\n```",
+ chart_type, title_str, data_str
+ ))
+ }
+ BodyElement::Image { src, alt, caption } => {
+ let alt_text = alt.as_deref().unwrap_or("image");
+ let caption_str = caption
+ .as_ref()
+ .map(|c| format!("\n*{}*", c))
+ .unwrap_or_default();
+ Some(format!("![{}]({}){}", alt_text, src, caption_str))
+ }
+ // Markdown elements output their content directly - it's already markdown
+ BodyElement::Markdown { content } => Some(content.clone()),
+ })
+ .collect::<Vec<_>>()
+ .join("\n\n")
+}
+
+/// Parse a markdown string into a vector of BodyElements.
+///
+/// Handles:
+/// - Headings: lines starting with # through ######
+/// - Code blocks: ````language ... ````
+/// - Ordered lists: lines starting with 1. 2. etc.
+/// - Unordered lists: lines starting with - or *
+/// - Paragraphs: all other non-empty lines
+pub fn markdown_to_body(markdown: &str) -> Vec<BodyElement> {
+ let mut elements = Vec::new();
+ let lines: Vec<&str> = markdown.lines().collect();
+ let mut i = 0;
+
+ while i < lines.len() {
+ let line = lines[i];
+ let trimmed = line.trim();
+
+ // Skip empty lines
+ if trimmed.is_empty() {
+ i += 1;
+ continue;
+ }
+
+ // Check for code blocks
+ if trimmed.starts_with("```") {
+ let language = trimmed.trim_start_matches('`').trim();
+ let language = if language.is_empty() {
+ None
+ } else {
+ Some(language.to_string())
+ };
+
+ let mut content_lines = Vec::new();
+ i += 1;
+
+ // Collect content until closing ```
+ while i < lines.len() && !lines[i].trim().starts_with("```") {
+ content_lines.push(lines[i]);
+ i += 1;
+ }
+
+ // Skip the closing ```
+ if i < lines.len() {
+ i += 1;
+ }
+
+ elements.push(BodyElement::Code {
+ language,
+ content: content_lines.join("\n"),
+ });
+ continue;
+ }
+
+ // Check for headings
+ if trimmed.starts_with('#') {
+ let level = trimmed.chars().take_while(|&c| c == '#').count() as u8;
+ let text = trimmed.trim_start_matches('#').trim().to_string();
+ elements.push(BodyElement::Heading { level, text });
+ i += 1;
+ continue;
+ }
+
+ // Check for unordered lists (- or *)
+ if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
+ let mut items = Vec::new();
+ while i < lines.len() {
+ let current = lines[i].trim();
+ if current.starts_with("- ") || current.starts_with("* ") {
+ items.push(current[2..].to_string());
+ i += 1;
+ } else if current.is_empty() {
+ i += 1;
+ break;
+ } else {
+ break;
+ }
+ }
+ elements.push(BodyElement::List {
+ ordered: false,
+ items,
+ });
+ continue;
+ }
+
+ // Check for ordered lists (1. 2. etc.)
+ if let Some(rest) = try_parse_ordered_list_item(trimmed) {
+ let mut items = Vec::new();
+ items.push(rest.to_string());
+ i += 1;
+
+ while i < lines.len() {
+ let current = lines[i].trim();
+ if let Some(item_rest) = try_parse_ordered_list_item(current) {
+ items.push(item_rest.to_string());
+ i += 1;
+ } else if current.is_empty() {
+ i += 1;
+ break;
+ } else {
+ break;
+ }
+ }
+ elements.push(BodyElement::List {
+ ordered: true,
+ items,
+ });
+ continue;
+ }
+
+ // Default: paragraph (collect consecutive non-empty lines)
+ let mut para_lines = Vec::new();
+ while i < lines.len() {
+ let current = lines[i].trim();
+ if current.is_empty()
+ || current.starts_with('#')
+ || current.starts_with("```")
+ || current.starts_with("- ")
+ || current.starts_with("* ")
+ || try_parse_ordered_list_item(current).is_some()
+ {
+ break;
+ }
+ para_lines.push(current);
+ i += 1;
+ }
+
+ if !para_lines.is_empty() {
+ elements.push(BodyElement::Paragraph {
+ text: para_lines.join(" "),
+ });
+ }
+ }
+
+ elements
+}
+
+/// Try to parse an ordered list item (e.g., "1. Item text")
+/// Returns the text after the number and period, or None if not a list item.
+fn try_parse_ordered_list_item(s: &str) -> Option<&str> {
+ let mut chars = s.char_indices();
+
+ // Must start with a digit
+ let (_, first) = chars.next()?;
+ if !first.is_ascii_digit() {
+ return None;
+ }
+
+ // Consume remaining digits
+ let mut last_digit_end = 1;
+ for (idx, c) in chars.by_ref() {
+ if c.is_ascii_digit() {
+ last_digit_end = idx + 1;
+ } else if c == '.' {
+ // Found the period - check for space after
+ let rest = &s[last_digit_end + 1..];
+ let rest = rest.trim_start();
+ if !rest.is_empty() || s.ends_with(". ") {
+ return Some(rest);
+ }
+ return None;
+ } else {
+ return None;
+ }
+ }
+
+ None
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_body_to_markdown_heading() {
+ let elements = vec![BodyElement::Heading {
+ level: 2,
+ text: "Hello World".to_string(),
+ }];
+ assert_eq!(body_to_markdown(&elements), "## Hello World");
+ }
+
+ #[test]
+ fn test_body_to_markdown_paragraph() {
+ let elements = vec![BodyElement::Paragraph {
+ text: "This is a paragraph.".to_string(),
+ }];
+ assert_eq!(body_to_markdown(&elements), "This is a paragraph.");
+ }
+
+ #[test]
+ fn test_body_to_markdown_code() {
+ let elements = vec![BodyElement::Code {
+ language: Some("rust".to_string()),
+ content: "fn main() {}".to_string(),
+ }];
+ assert_eq!(
+ body_to_markdown(&elements),
+ "```rust\nfn main() {}\n```"
+ );
+ }
+
+ #[test]
+ fn test_body_to_markdown_list() {
+ let elements = vec![BodyElement::List {
+ ordered: false,
+ items: vec!["Item 1".to_string(), "Item 2".to_string()],
+ }];
+ assert_eq!(body_to_markdown(&elements), "- Item 1\n- Item 2");
+ }
+
+ #[test]
+ fn test_markdown_to_body_heading() {
+ let md = "## Hello World";
+ let elements = markdown_to_body(md);
+ assert_eq!(elements.len(), 1);
+ match &elements[0] {
+ BodyElement::Heading { level, text } => {
+ assert_eq!(*level, 2);
+ assert_eq!(text, "Hello World");
+ }
+ _ => panic!("Expected Heading"),
+ }
+ }
+
+ #[test]
+ fn test_markdown_to_body_code() {
+ let md = "```rust\nfn main() {}\n```";
+ let elements = markdown_to_body(md);
+ assert_eq!(elements.len(), 1);
+ match &elements[0] {
+ BodyElement::Code { language, content } => {
+ assert_eq!(language.as_deref(), Some("rust"));
+ assert_eq!(content, "fn main() {}");
+ }
+ _ => panic!("Expected Code"),
+ }
+ }
+
+ #[test]
+ fn test_roundtrip() {
+ let original = vec![
+ BodyElement::Heading {
+ level: 1,
+ text: "Title".to_string(),
+ },
+ BodyElement::Paragraph {
+ text: "Some text here.".to_string(),
+ },
+ BodyElement::List {
+ ordered: false,
+ items: vec!["A".to_string(), "B".to_string()],
+ },
+ ];
+
+ let markdown = body_to_markdown(&original);
+ let parsed = markdown_to_body(&markdown);
+
+ assert_eq!(parsed.len(), 3);
+ }
+}