at 8645f5a
use std::collections::HashMap; use std::path::{Path, PathBuf}; use ignore::WalkBuilder; use miette::{IntoDiagnostic, Result, miette}; use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd, html}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use tera::{Context, Tera}; use crate::builders::{ArtifactPath, Builder, LogEvent, LogSender}; /// The default Tera template used to wrap rendered Markdown content in a /// complete HTML5 document. Exposed as a `pub const` so that `abbaye /// dump-theme` can write it to `.abbaye/theme/markdown.html.j2`. /// /// Template variables: /// - `{{ title }}` — plain-text page title (auto-escaped by Tera). /// - `{{ content | safe }}` — the rendered HTML body fragment. pub const TEMPLATE_MARKDOWN: &str = include_str!("../templates/markdown.html.j2"); /// Filename looked up inside `.abbaye/theme/` at runtime. const THEME_FILENAME: &str = "markdown.html.j2"; /// Name under which the template is registered inside the Tera instance. const TERA_NAME: &str = "markdown.html"; fn default_recursive() -> bool { true } /// Configuration for [`MarkdownBuilder`]. #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] pub struct MarkdownBuilderConfig { /// Directory containing `.md` files to render. /// /// Every `.md` file in the directory is rendered to a corresponding /// `.html` file in the output directory, preserving the subdirectory /// structure. Non-Markdown files referenced (linked or embedded) inside /// any Markdown source — images, PDFs, downloadable assets, etc. — are /// copied into the output directory next to the rendered HTML so that all /// relative URLs remain valid. /// /// Defaults to `"."` (the current working directory). pub input: Option<PathBuf>, /// Directory to write rendered files into. /// /// Defaults to `<input-name>-html` placed next to the input directory. /// /// ```toml /// [[builders]] /// type = "markdown" /// input = "docs/" /// output = "public/docs/" /// ``` pub output: Option<PathBuf>, /// Also descend into subdirectories of `input`. /// /// Defaults to `true`. Files matched by a `.gitignore` in the directory /// hierarchy are always excluded, mirroring the behaviour of the `archive` /// builder. #[serde(default = "default_recursive")] pub recursive: bool, } impl Default for MarkdownBuilderConfig { fn default() -> Self { Self { input: None, output: None, recursive: default_recursive(), } } } /// Renders a directory of Markdown files (`.md`) to standalone HTML documents. /// /// The output directory mirrors the input directory's structure: each `.md` /// file becomes a `.html` file at the same relative path. Any non-Markdown /// file referenced by a local link or image embed in a source document is /// copied to the output directory at the same relative path, keeping all URLs /// intact. pub struct MarkdownBuilder; impl Builder for MarkdownBuilder { type ConfigType = MarkdownBuilderConfig; async fn build( &self, config: Self::ConfigType, _version: &str, log: LogSender, ) -> Result<Vec<ArtifactPath>> { let input = config .input .unwrap_or_else(|| PathBuf::from(".")) .canonicalize() .into_diagnostic()?; if !input.is_dir() { return if input.exists() { Err(miette!( "markdown builder input must be a directory, got a file: {}", input.display() )) } else { Err(miette!( "markdown builder input directory does not exist: {}", input.display() )) }; } // Load the Tera template once per builder invocation and share it // across all files so template parsing only happens once. let tera = load_tera()?; build_directory(&input, config.output, config.recursive, &log, &tera).await } } /// Load the Tera instance for this builder invocation. /// /// Checks whether `.abbaye/theme/markdown.html.j2` exists and loads that /// file when present; otherwise falls back to the compiled-in /// [`TEMPLATE_MARKDOWN`] constant — exactly the same override mechanism /// used by the site templates (`root_index.html.j2` / `version_index.html.j2`). fn load_tera() -> Result<Tera> { let theme_file = PathBuf::from(".abbaye").join("theme").join(THEME_FILENAME); let mut tera = Tera::default(); if theme_file.is_file() { tera.add_template_file(&theme_file, Some(TERA_NAME)) .into_diagnostic()?; } else { tera.add_raw_template(TERA_NAME, TEMPLATE_MARKDOWN) .into_diagnostic()?; } Ok(tera) } // ── Directory rendering ─────────────────────────────────────────────────────── async fn build_directory( input_dir: &Path, output: Option<PathBuf>, recursive: bool, log: &LogSender, tera: &Tera, ) -> Result<Vec<ArtifactPath>> { let output_dir = output.unwrap_or_else(|| { let stem = input_dir .file_name() .map(|n| format!("{}-html", n.to_string_lossy())) .unwrap_or_else(|| "html".to_owned()); input_dir.parent().unwrap_or(Path::new(".")).join(stem) }); tokio::fs::create_dir_all(&output_dir) .await .into_diagnostic()?; // The ignore::WalkBuilder API is synchronous, so run it on the blocking // thread pool to avoid stalling the async runtime. let md_files = tokio::task::spawn_blocking({ let input_dir = input_dir.to_owned(); move || collect_md_files(&input_dir, recursive) }) .await .into_diagnostic()??; if md_files.is_empty() { let _ = log.send(LogEvent::Line(format!( "warning: no .md files found in {}", input_dir.display() ))); return Ok(vec![ArtifactPath { path: output_dir.clone(), name: dir_name_string(&output_dir), hash: None, }]); } // Map from absolute source path → absolute destination path, built up // while rendering so that files referenced by multiple documents are only // copied once. let mut files_to_copy: HashMap<PathBuf, PathBuf> = HashMap::new(); for md_path in &md_files { let relative = md_path.strip_prefix(input_dir).into_diagnostic()?; let out_path = output_dir.join(relative).with_extension("html"); // Ensure any intermediate subdirectories exist. if let Some(parent) = out_path.parent() { tokio::fs::create_dir_all(parent).await.into_diagnostic()?; } let _ = log.send(LogEvent::Line(format!( "{} → {}", md_path.display(), out_path.display() ))); let md = tokio::fs::read_to_string(md_path).await.into_diagnostic()?; // Collect local asset references before rendering so we can copy them. for (src, rel) in collect_referenced_files(&md, md_path, input_dir) { files_to_copy .entry(src) .or_insert_with(|| output_dir.join(rel)); } let title = extract_title(&md).unwrap_or_else(|| file_stem_string(md_path)); let document = render_template(tera, &title, &render_markdown(&md))?; tokio::fs::write(&out_path, document.as_bytes()) .await .into_diagnostic()?; } // Copy every referenced asset, creating parent directories as needed. for (src, dest) in &files_to_copy { if let Some(parent) = dest.parent() { tokio::fs::create_dir_all(parent).await.into_diagnostic()?; } let _ = log.send(LogEvent::Line(format!( "copying {} → {}", src.display(), dest.display() ))); tokio::fs::copy(src, dest).await.into_diagnostic()?; } Ok(vec![ArtifactPath { path: output_dir.clone(), name: dir_name_string(&output_dir), hash: None, }]) } // ── Helpers ─────────────────────────────────────────────────────────────────── /// Walk `dir` for `.md` files, honouring `.gitignore` rules (via the `ignore` /// crate). Returns paths in a stable, sorted order so output is reproducible. /// /// When `recursive` is `false` only the top level of `dir` is visited /// (`max_depth = 1`). fn collect_md_files(dir: &Path, recursive: bool) -> Result<Vec<PathBuf>> { let mut files = Vec::new(); let walker = WalkBuilder::new(dir) .max_depth(if recursive { None } else { Some(1) }) // Include dotfiles (e.g. .github/CONTRIBUTING.md). .hidden(false) .build(); for result in walker { let entry = result.into_diagnostic()?; let path = entry.into_path(); if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some("md") { files.push(path); } } files.sort(); Ok(files) } /// Parse `md` for local link and image targets that are not other Markdown /// files, resolve them relative to `md_path`'s parent directory, and return /// those that exist as files within `input_dir`. /// /// Returns a list of `(absolute_source_path, relative_path_from_input_dir)` /// pairs. The caller uses the relative path to mirror the asset at the same /// position inside the output directory, keeping all relative URLs in the /// rendered HTML valid. /// /// Skipped silently: /// - Remote URLs (`://`), data URIs, and fragment-only refs (`#…`). /// - `.md` files — those are rendered to `.html`, not copied. /// - Refs that do not resolve to an existing file. /// - Refs that resolve to a file outside `input_dir` (a warning is logged /// to the caller instead). fn collect_referenced_files(md: &str, md_path: &Path, input_dir: &Path) -> Vec<(PathBuf, PathBuf)> { let md_dir = md_path.parent().unwrap_or(Path::new(".")); let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH; let mut result = Vec::new(); for event in Parser::new_ext(md, opts) { let url: Option<pulldown_cmark::CowStr> = match event { Event::Start(Tag::Image { dest_url, .. }) => Some(dest_url), Event::Start(Tag::Link { dest_url, .. }) => Some(dest_url), _ => None, }; let Some(url) = url else { continue }; let s = url.as_ref(); // Skip remote URLs, data URIs, and fragment-only refs. if s.contains("://") || s.starts_with('#') || s.is_empty() { continue; } // Strip any trailing fragment before treating the string as a path. let path_part = s.split('#').next().unwrap_or(s); if path_part.is_empty() { continue; } // Skip links to other Markdown files — those will be rendered to // .html and don't need to be copied as assets. if Path::new(path_part).extension().and_then(|e| e.to_str()) == Some("md") { continue; } // Resolve the ref relative to the containing markdown file's directory. // A leading `/` is treated as relative to input_dir, not the filesystem // root, which is the most useful interpretation for a docs directory. let abs = if path_part.starts_with('/') { input_dir.join(path_part.trim_start_matches('/')) } else { md_dir.join(path_part) }; // canonicalize() fails if the path does not exist. let Ok(abs) = abs.canonicalize() else { continue; }; if !abs.is_file() { continue; } // Only copy assets that live inside the input directory; assets // outside it are silently skipped (they cannot be given a stable // relative output path). let Ok(relative) = abs.strip_prefix(input_dir) else { continue; }; let relative = relative.to_owned(); result.push((abs, relative)); } result } /// Convert a Markdown string to an HTML fragment. /// /// Enables tables, strikethrough, and footnotes — a superset of what /// [`crate::site`]'s own renderer uses. fn render_markdown(md: &str) -> String { let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH | Options::ENABLE_FOOTNOTES; let parser = Parser::new_ext(md, opts); let mut buf = String::new(); html::push_html(&mut buf, parser); buf } /// Scan a Markdown string for the first heading of any level and return its /// plain-text content. Used to populate the `<title>` element. /// /// Handles headings that contain inline emphasis, code spans, or other inline /// elements by concatenating all `Text` and `Code` events seen between the /// opening and closing heading tags. fn extract_title(md: &str) -> Option<String> { let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH; let mut parser = Parser::new_ext(md, opts); let mut in_heading = false; let mut title = String::new(); loop { match parser.next()? { Event::Start(Tag::Heading { .. }) => { in_heading = true; } Event::End(TagEnd::Heading(_)) => break, Event::Text(text) | Event::Code(text) if in_heading => { title.push_str(&text); } _ => {} } } if title.is_empty() { None } else { Some(title) } } /// Render the Tera template with the given `title` and HTML `content`. /// /// `title` is passed as a plain string; Tera auto-escapes it when inserted /// into `{{ title }}`. `content` is the already-rendered HTML fragment and /// must be inserted with `{{ content | safe }}` in the template. fn render_template(tera: &Tera, title: &str, content: &str) -> Result<String> { let mut ctx = Context::new(); ctx.insert("title", title); ctx.insert("content", content); tera.render(TERA_NAME, &ctx).into_diagnostic() } /// Extract the file stem as an owned `String`, falling back to `"Document"`. fn file_stem_string(path: &Path) -> String { path.file_stem() .map(|s| s.to_string_lossy().into_owned()) .unwrap_or_else(|| "Document".to_owned()) } /// Extract the directory name as an owned `String`, falling back to `"html"`. fn dir_name_string(path: &Path) -> String { path.file_name() .map(|n| n.to_string_lossy().into_owned()) .unwrap_or_else(|| "html".to_owned()) } #[cfg(test)] mod tests { use super::*; use std::fs; /// Build a temporary directory tree: /// /// ``` /// <tmpdir>/ /// docs/ /// page.md ← references image.png, guide.pdf, other.md, https://… /// image.png ← local asset (should be collected) /// guide.pdf ← local asset (should be collected) /// other.md ← .md file (should be skipped) /// sub/ /// nested.md ← references ../image.png (should be collected) /// ``` fn make_test_tree() -> (tempfile::TempDir, PathBuf) { let tmp = tempfile::tempdir().expect("tempdir"); let docs = tmp.path().join("docs"); fs::create_dir_all(docs.join("sub")).unwrap(); fs::write(docs.join("image.png"), b"PNG").unwrap(); fs::write(docs.join("guide.pdf"), b"PDF").unwrap(); fs::write(docs.join("other.md"), b"# Other").unwrap(); fs::write( docs.join("page.md"), b"# Hello\n\ \n\ [guide](guide.pdf)\n\ [other](other.md)\n\ [remote](https://example.com)\n\ [frag](#section)\n", ) .unwrap(); fs::write( docs.join("sub").join("nested.md"), b"# Nested\n\n", ) .unwrap(); (tmp, docs) } #[test] fn collects_images_and_non_md_links() { let (_tmp, docs) = make_test_tree(); let md_path = docs.join("page.md"); let md = fs::read_to_string(&md_path).unwrap(); let refs = collect_referenced_files(&md, &md_path, &docs); let sources: Vec<_> = refs.iter().map(|(src, _)| src.clone()).collect(); let img = docs.join("image.png").canonicalize().unwrap(); let pdf = docs.join("guide.pdf").canonicalize().unwrap(); let other = docs.join("other.md").canonicalize().unwrap(); assert!(sources.contains(&img), "image.png should be collected"); assert!(sources.contains(&pdf), "guide.pdf should be collected"); assert!(!sources.contains(&other), "other.md should be skipped"); // Remote URL and fragment-only link must not appear. assert_eq!(refs.len(), 2, "expected exactly 2 assets (image + pdf)"); } #[test] fn relative_paths_from_subdirectory_are_resolved() { let (_tmp, docs) = make_test_tree(); let md_path = docs.join("sub").join("nested.md"); let md = fs::read_to_string(&md_path).unwrap(); let refs = collect_referenced_files(&md, &md_path, &docs); assert_eq!(refs.len(), 1, "expected exactly 1 asset"); let (src, rel) = &refs[0]; assert_eq!( src, &docs.join("image.png").canonicalize().unwrap(), "source should be docs/image.png" ); assert_eq!( rel, &PathBuf::from("image.png"), "relative path should be image.png (relative to docs/)" ); } #[test] fn extract_title_finds_first_heading() { assert_eq!( extract_title("# Hello World\n\nsome text"), Some("Hello World".to_owned()) ); } #[test] fn extract_title_handles_inline_code_in_heading() { assert_eq!( extract_title("# Use `foo()` wisely"), Some("Use foo() wisely".to_owned()) ); } #[test] fn extract_title_returns_none_when_no_heading() { assert_eq!(extract_title("just a paragraph"), None); } }