| @@ -0,0 +1,520 @@ |
| +use std::collections::HashMap; |
| +use std::path::{Path, PathBuf}; |
| + |
| +use ignore::WalkBuilder; |
| +use miette::{IntoDiagnostic, Result, miette}; |
| +use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd, html}; |
| +use schemars::JsonSchema; |
| +use serde::{Deserialize, Serialize}; |
| +use tera::{Context, Tera}; |
| + |
| +use crate::builders::{ArtifactPath, Builder, LogEvent, LogSender}; |
| + |
| +/// The default Tera template used to wrap rendered Markdown content in a |
| +/// complete HTML5 document. Exposed as a `pub const` so that `abbaye |
| +/// dump-theme` can write it to `.abbaye/theme/markdown.html.j2`. |
| +/// |
| +/// Template variables: |
| +/// - `{{ title }}` — plain-text page title (auto-escaped by Tera). |
| +/// - `{{ content | safe }}` — the rendered HTML body fragment. |
| +pub const TEMPLATE_MARKDOWN: &str = include_str!("../templates/markdown.html.j2"); |
| + |
| +/// Filename looked up inside `.abbaye/theme/` at runtime. |
| +const THEME_FILENAME: &str = "markdown.html.j2"; |
| +/// Name under which the template is registered inside the Tera instance. |
| +const TERA_NAME: &str = "markdown.html"; |
| + |
| +fn default_recursive() -> bool { |
| + true |
| +} |
| + |
| +/// Configuration for [`MarkdownBuilder`]. |
| +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] |
| +pub struct MarkdownBuilderConfig { |
| + /// Directory containing `.md` files to render. |
| + /// |
| + /// Every `.md` file in the directory is rendered to a corresponding |
| + /// `.html` file in the output directory, preserving the subdirectory |
| + /// structure. Non-Markdown files referenced (linked or embedded) inside |
| + /// any Markdown source — images, PDFs, downloadable assets, etc. — are |
| + /// copied into the output directory next to the rendered HTML so that all |
| + /// relative URLs remain valid. |
| + /// |
| + /// Defaults to `"."` (the current working directory). |
| + pub input: Option<PathBuf>, |
| + |
| + /// Directory to write rendered files into. |
| + /// |
| + /// Defaults to `<input-name>-html` placed next to the input directory. |
| + /// |
| + /// ```toml |
| + /// [[builders]] |
| + /// type = "markdown" |
| + /// input = "docs/" |
| + /// output = "public/docs/" |
| + /// ``` |
| + pub output: Option<PathBuf>, |
| + |
| + /// Also descend into subdirectories of `input`. |
| + /// |
| + /// Defaults to `true`. Files matched by a `.gitignore` in the directory |
| + /// hierarchy are always excluded, mirroring the behaviour of the `archive` |
| + /// builder. |
| + #[serde(default = "default_recursive")] |
| + pub recursive: bool, |
| +} |
| + |
| +impl Default for MarkdownBuilderConfig { |
| + fn default() -> Self { |
| + Self { |
| + input: None, |
| + output: None, |
| + recursive: default_recursive(), |
| + } |
| + } |
| +} |
| + |
| +/// Renders a directory of Markdown files (`.md`) to standalone HTML documents. |
| +/// |
| +/// The output directory mirrors the input directory's structure: each `.md` |
| +/// file becomes a `.html` file at the same relative path. Any non-Markdown |
| +/// file referenced by a local link or image embed in a source document is |
| +/// copied to the output directory at the same relative path, keeping all URLs |
| +/// intact. |
| +pub struct MarkdownBuilder; |
| + |
| +impl Builder for MarkdownBuilder { |
| + type ConfigType = MarkdownBuilderConfig; |
| + |
| + async fn build( |
| + &self, |
| + config: Self::ConfigType, |
| + _version: &str, |
| + log: LogSender, |
| + ) -> Result<Vec<ArtifactPath>> { |
| + let input = config |
| + .input |
| + .unwrap_or_else(|| PathBuf::from(".")) |
| + .canonicalize() |
| + .into_diagnostic()?; |
| + |
| + if !input.is_dir() { |
| + return if input.exists() { |
| + Err(miette!( |
| + "markdown builder input must be a directory, got a file: {}", |
| + input.display() |
| + )) |
| + } else { |
| + Err(miette!( |
| + "markdown builder input directory does not exist: {}", |
| + input.display() |
| + )) |
| + }; |
| + } |
| + |
| + // Load the Tera template once per builder invocation and share it |
| + // across all files so template parsing only happens once. |
| + let tera = load_tera()?; |
| + |
| + build_directory(&input, config.output, config.recursive, &log, &tera).await |
| + } |
| +} |
| + |
| +/// Load the Tera instance for this builder invocation. |
| +/// |
| +/// Checks whether `.abbaye/theme/markdown.html.j2` exists and loads that |
| +/// file when present; otherwise falls back to the compiled-in |
| +/// [`TEMPLATE_MARKDOWN`] constant — exactly the same override mechanism |
| +/// used by the site templates (`root_index.html.j2` / `version_index.html.j2`). |
| +fn load_tera() -> Result<Tera> { |
| + let theme_file = PathBuf::from(".abbaye").join("theme").join(THEME_FILENAME); |
| + let mut tera = Tera::default(); |
| + if theme_file.is_file() { |
| + tera.add_template_file(&theme_file, Some(TERA_NAME)) |
| + .into_diagnostic()?; |
| + } else { |
| + tera.add_raw_template(TERA_NAME, TEMPLATE_MARKDOWN) |
| + .into_diagnostic()?; |
| + } |
| + Ok(tera) |
| +} |
| + |
| +// ── Directory rendering ─────────────────────────────────────────────────────── |
| + |
| +async fn build_directory( |
| + input_dir: &Path, |
| + output: Option<PathBuf>, |
| + recursive: bool, |
| + log: &LogSender, |
| + tera: &Tera, |
| +) -> Result<Vec<ArtifactPath>> { |
| + let output_dir = output.unwrap_or_else(|| { |
| + let stem = input_dir |
| + .file_name() |
| + .map(|n| format!("{}-html", n.to_string_lossy())) |
| + .unwrap_or_else(|| "html".to_owned()); |
| + input_dir.parent().unwrap_or(Path::new(".")).join(stem) |
| + }); |
| + |
| + tokio::fs::create_dir_all(&output_dir) |
| + .await |
| + .into_diagnostic()?; |
| + |
| + // The ignore::WalkBuilder API is synchronous, so run it on the blocking |
| + // thread pool to avoid stalling the async runtime. |
| + let md_files = tokio::task::spawn_blocking({ |
| + let input_dir = input_dir.to_owned(); |
| + move || collect_md_files(&input_dir, recursive) |
| + }) |
| + .await |
| + .into_diagnostic()??; |
| + |
| + if md_files.is_empty() { |
| + let _ = log.send(LogEvent::Line(format!( |
| + "warning: no .md files found in {}", |
| + input_dir.display() |
| + ))); |
| + return Ok(vec![ArtifactPath { |
| + path: output_dir.clone(), |
| + name: dir_name_string(&output_dir), |
| + hash: None, |
| + }]); |
| + } |
| + |
| + // Map from absolute source path → absolute destination path, built up |
| + // while rendering so that files referenced by multiple documents are only |
| + // copied once. |
| + let mut files_to_copy: HashMap<PathBuf, PathBuf> = HashMap::new(); |
| + |
| + for md_path in &md_files { |
| + let relative = md_path.strip_prefix(input_dir).into_diagnostic()?; |
| + let out_path = output_dir.join(relative).with_extension("html"); |
| + |
| + // Ensure any intermediate subdirectories exist. |
| + if let Some(parent) = out_path.parent() { |
| + tokio::fs::create_dir_all(parent).await.into_diagnostic()?; |
| + } |
| + |
| + let _ = log.send(LogEvent::Line(format!( |
| + "{} → {}", |
| + md_path.display(), |
| + out_path.display() |
| + ))); |
| + |
| + let md = tokio::fs::read_to_string(md_path).await.into_diagnostic()?; |
| + |
| + // Collect local asset references before rendering so we can copy them. |
| + for (src, rel) in collect_referenced_files(&md, md_path, input_dir) { |
| + files_to_copy |
| + .entry(src) |
| + .or_insert_with(|| output_dir.join(rel)); |
| + } |
| + |
| + let title = extract_title(&md).unwrap_or_else(|| file_stem_string(md_path)); |
| + let document = render_template(tera, &title, &render_markdown(&md))?; |
| + |
| + tokio::fs::write(&out_path, document.as_bytes()) |
| + .await |
| + .into_diagnostic()?; |
| + } |
| + |
| + // Copy every referenced asset, creating parent directories as needed. |
| + for (src, dest) in &files_to_copy { |
| + if let Some(parent) = dest.parent() { |
| + tokio::fs::create_dir_all(parent).await.into_diagnostic()?; |
| + } |
| + |
| + let _ = log.send(LogEvent::Line(format!( |
| + "copying {} → {}", |
| + src.display(), |
| + dest.display() |
| + ))); |
| + |
| + tokio::fs::copy(src, dest).await.into_diagnostic()?; |
| + } |
| + |
| + Ok(vec![ArtifactPath { |
| + path: output_dir.clone(), |
| + name: dir_name_string(&output_dir), |
| + hash: None, |
| + }]) |
| +} |
| + |
| +// ── Helpers ─────────────────────────────────────────────────────────────────── |
| + |
| +/// Walk `dir` for `.md` files, honouring `.gitignore` rules (via the `ignore` |
| +/// crate). Returns paths in a stable, sorted order so output is reproducible. |
| +/// |
| +/// When `recursive` is `false` only the top level of `dir` is visited |
| +/// (`max_depth = 1`). |
| +fn collect_md_files(dir: &Path, recursive: bool) -> Result<Vec<PathBuf>> { |
| + let mut files = Vec::new(); |
| + |
| + let walker = WalkBuilder::new(dir) |
| + .max_depth(if recursive { None } else { Some(1) }) |
| + // Include dotfiles (e.g. .github/CONTRIBUTING.md). |
| + .hidden(false) |
| + .build(); |
| + |
| + for result in walker { |
| + let entry = result.into_diagnostic()?; |
| + let path = entry.into_path(); |
| + if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some("md") { |
| + files.push(path); |
| + } |
| + } |
| + |
| + files.sort(); |
| + Ok(files) |
| +} |
| + |
| +/// Parse `md` for local link and image targets that are not other Markdown |
| +/// files, resolve them relative to `md_path`'s parent directory, and return |
| +/// those that exist as files within `input_dir`. |
| +/// |
| +/// Returns a list of `(absolute_source_path, relative_path_from_input_dir)` |
| +/// pairs. The caller uses the relative path to mirror the asset at the same |
| +/// position inside the output directory, keeping all relative URLs in the |
| +/// rendered HTML valid. |
| +/// |
| +/// Skipped silently: |
| +/// - Remote URLs (`://`), data URIs, and fragment-only refs (`#…`). |
| +/// - `.md` files — those are rendered to `.html`, not copied. |
| +/// - Refs that do not resolve to an existing file. |
| +/// - Refs that resolve to a file outside `input_dir` (a warning is logged |
| +/// to the caller instead). |
| +fn collect_referenced_files(md: &str, md_path: &Path, input_dir: &Path) -> Vec<(PathBuf, PathBuf)> { |
| + let md_dir = md_path.parent().unwrap_or(Path::new(".")); |
| + let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH; |
| + let mut result = Vec::new(); |
| + |
| + for event in Parser::new_ext(md, opts) { |
| + let url: Option<pulldown_cmark::CowStr> = match event { |
| + Event::Start(Tag::Image { dest_url, .. }) => Some(dest_url), |
| + Event::Start(Tag::Link { dest_url, .. }) => Some(dest_url), |
| + _ => None, |
| + }; |
| + |
| + let Some(url) = url else { continue }; |
| + let s = url.as_ref(); |
| + |
| + // Skip remote URLs, data URIs, and fragment-only refs. |
| + if s.contains("://") || s.starts_with('#') || s.is_empty() { |
| + continue; |
| + } |
| + |
| + // Strip any trailing fragment before treating the string as a path. |
| + let path_part = s.split('#').next().unwrap_or(s); |
| + if path_part.is_empty() { |
| + continue; |
| + } |
| + |
| + // Skip links to other Markdown files — those will be rendered to |
| + // .html and don't need to be copied as assets. |
| + if Path::new(path_part).extension().and_then(|e| e.to_str()) == Some("md") { |
| + continue; |
| + } |
| + |
| + // Resolve the ref relative to the containing markdown file's directory. |
| + // A leading `/` is treated as relative to input_dir, not the filesystem |
| + // root, which is the most useful interpretation for a docs directory. |
| + let abs = if path_part.starts_with('/') { |
| + input_dir.join(path_part.trim_start_matches('/')) |
| + } else { |
| + md_dir.join(path_part) |
| + }; |
| + |
| + // canonicalize() fails if the path does not exist. |
| + let Ok(abs) = abs.canonicalize() else { |
| + continue; |
| + }; |
| + |
| + if !abs.is_file() { |
| + continue; |
| + } |
| + |
| + // Only copy assets that live inside the input directory; assets |
| + // outside it are silently skipped (they cannot be given a stable |
| + // relative output path). |
| + let Ok(relative) = abs.strip_prefix(input_dir) else { |
| + continue; |
| + }; |
| + let relative = relative.to_owned(); |
| + |
| + result.push((abs, relative)); |
| + } |
| + |
| + result |
| +} |
| + |
| +/// Convert a Markdown string to an HTML fragment. |
| +/// |
| +/// Enables tables, strikethrough, and footnotes — a superset of what |
| +/// [`crate::site`]'s own renderer uses. |
| +fn render_markdown(md: &str) -> String { |
| + let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH | Options::ENABLE_FOOTNOTES; |
| + let parser = Parser::new_ext(md, opts); |
| + let mut buf = String::new(); |
| + html::push_html(&mut buf, parser); |
| + buf |
| +} |
| + |
| +/// Scan a Markdown string for the first heading of any level and return its |
| +/// plain-text content. Used to populate the `<title>` element. |
| +/// |
| +/// Handles headings that contain inline emphasis, code spans, or other inline |
| +/// elements by concatenating all `Text` and `Code` events seen between the |
| +/// opening and closing heading tags. |
| +fn extract_title(md: &str) -> Option<String> { |
| + let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH; |
| + let mut parser = Parser::new_ext(md, opts); |
| + let mut in_heading = false; |
| + let mut title = String::new(); |
| + |
| + loop { |
| + match parser.next()? { |
| + Event::Start(Tag::Heading { .. }) => { |
| + in_heading = true; |
| + } |
| + Event::End(TagEnd::Heading(_)) => break, |
| + Event::Text(text) | Event::Code(text) if in_heading => { |
| + title.push_str(&text); |
| + } |
| + _ => {} |
| + } |
| + } |
| + |
| + if title.is_empty() { None } else { Some(title) } |
| +} |
| + |
| +/// Render the Tera template with the given `title` and HTML `content`. |
| +/// |
| +/// `title` is passed as a plain string; Tera auto-escapes it when inserted |
| +/// into `{{ title }}`. `content` is the already-rendered HTML fragment and |
| +/// must be inserted with `{{ content | safe }}` in the template. |
| +fn render_template(tera: &Tera, title: &str, content: &str) -> Result<String> { |
| + let mut ctx = Context::new(); |
| + ctx.insert("title", title); |
| + ctx.insert("content", content); |
| + tera.render(TERA_NAME, &ctx).into_diagnostic() |
| +} |
| + |
| +/// Extract the file stem as an owned `String`, falling back to `"Document"`. |
| +fn file_stem_string(path: &Path) -> String { |
| + path.file_stem() |
| + .map(|s| s.to_string_lossy().into_owned()) |
| + .unwrap_or_else(|| "Document".to_owned()) |
| +} |
| + |
| +/// Extract the directory name as an owned `String`, falling back to `"html"`. |
| +fn dir_name_string(path: &Path) -> String { |
| + path.file_name() |
| + .map(|n| n.to_string_lossy().into_owned()) |
| + .unwrap_or_else(|| "html".to_owned()) |
| +} |
| + |
| +#[cfg(test)] |
| +mod tests { |
| + use super::*; |
| + use std::fs; |
| + |
| + /// Build a temporary directory tree: |
| + /// |
| + /// ``` |
| + /// <tmpdir>/ |
| + /// docs/ |
| + /// page.md ← references image.png, guide.pdf, other.md, https://… |
| + /// image.png ← local asset (should be collected) |
| + /// guide.pdf ← local asset (should be collected) |
| + /// other.md ← .md file (should be skipped) |
| + /// sub/ |
| + /// nested.md ← references ../image.png (should be collected) |
| + /// ``` |
| + fn make_test_tree() -> (tempfile::TempDir, PathBuf) { |
| + let tmp = tempfile::tempdir().expect("tempdir"); |
| + let docs = tmp.path().join("docs"); |
| + fs::create_dir_all(docs.join("sub")).unwrap(); |
| + fs::write(docs.join("image.png"), b"PNG").unwrap(); |
| + fs::write(docs.join("guide.pdf"), b"PDF").unwrap(); |
| + fs::write(docs.join("other.md"), b"# Other").unwrap(); |
| + fs::write( |
| + docs.join("page.md"), |
| + b"# Hello\n\ |
| + \n\ |
| + [guide](guide.pdf)\n\ |
| + [other](other.md)\n\ |
| + [remote](https://example.com)\n\ |
| + [frag](#section)\n", |
| + ) |
| + .unwrap(); |
| + fs::write( |
| + docs.join("sub").join("nested.md"), |
| + b"# Nested\n\n", |
| + ) |
| + .unwrap(); |
| + (tmp, docs) |
| + } |
| + |
| + #[test] |
| + fn collects_images_and_non_md_links() { |
| + let (_tmp, docs) = make_test_tree(); |
| + let md_path = docs.join("page.md"); |
| + let md = fs::read_to_string(&md_path).unwrap(); |
| + |
| + let refs = collect_referenced_files(&md, &md_path, &docs); |
| + let sources: Vec<_> = refs.iter().map(|(src, _)| src.clone()).collect(); |
| + |
| + let img = docs.join("image.png").canonicalize().unwrap(); |
| + let pdf = docs.join("guide.pdf").canonicalize().unwrap(); |
| + let other = docs.join("other.md").canonicalize().unwrap(); |
| + |
| + assert!(sources.contains(&img), "image.png should be collected"); |
| + assert!(sources.contains(&pdf), "guide.pdf should be collected"); |
| + assert!(!sources.contains(&other), "other.md should be skipped"); |
| + // Remote URL and fragment-only link must not appear. |
| + assert_eq!(refs.len(), 2, "expected exactly 2 assets (image + pdf)"); |
| + } |
| + |
| + #[test] |
| + fn relative_paths_from_subdirectory_are_resolved() { |
| + let (_tmp, docs) = make_test_tree(); |
| + let md_path = docs.join("sub").join("nested.md"); |
| + let md = fs::read_to_string(&md_path).unwrap(); |
| + |
| + let refs = collect_referenced_files(&md, &md_path, &docs); |
| + |
| + assert_eq!(refs.len(), 1, "expected exactly 1 asset"); |
| + let (src, rel) = &refs[0]; |
| + assert_eq!( |
| + src, |
| + &docs.join("image.png").canonicalize().unwrap(), |
| + "source should be docs/image.png" |
| + ); |
| + assert_eq!( |
| + rel, |
| + &PathBuf::from("image.png"), |
| + "relative path should be image.png (relative to docs/)" |
| + ); |
| + } |
| + |
| + #[test] |
| + fn extract_title_finds_first_heading() { |
| + assert_eq!( |
| + extract_title("# Hello World\n\nsome text"), |
| + Some("Hello World".to_owned()) |
| + ); |
| + } |
| + |
| + #[test] |
| + fn extract_title_handles_inline_code_in_heading() { |
| + assert_eq!( |
| + extract_title("# Use `foo()` wisely"), |
| + Some("Use foo() wisely".to_owned()) |
| + ); |
| + } |
| + |
| + #[test] |
| + fn extract_title_returns_none_when_no_heading() { |
| + assert_eq!(extract_title("just a paragraph"), None); |
| + } |
| +} |