Skip to main content

abbaye/builders/
markdown.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use ignore::WalkBuilder;
5use miette::{IntoDiagnostic, Result, miette};
6use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd, html};
7use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9use tera::{Context, Tera};
10
11use crate::builders::{ArtifactPath, Builder, LogEvent, LogSender};
12
13/// The default Tera template used to wrap rendered Markdown content in a
14/// complete HTML5 document. Exposed as a `pub const` so that `abbaye
15/// dump-theme` can write it to `.abbaye/theme/markdown.html.j2`.
16///
17/// Template variables:
18/// - `{{ title }}` — plain-text page title (auto-escaped by Tera).
19/// - `{{ content | safe }}` — the rendered HTML body fragment.
20pub const TEMPLATE_MARKDOWN: &str = include_str!("../templates/markdown.html.j2");
21
22/// Filename looked up inside `.abbaye/theme/` at runtime.
23const THEME_FILENAME: &str = "markdown.html.j2";
24/// Name under which the template is registered inside the Tera instance.
25const TERA_NAME: &str = "markdown.html";
26
27fn default_recursive() -> bool {
28    true
29}
30
31/// Configuration for [`MarkdownBuilder`].
32#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
33pub struct MarkdownBuilderConfig {
34    /// Directory containing `.md` files to render.
35    ///
36    /// Every `.md` file in the directory is rendered to a corresponding
37    /// `.html` file in the output directory, preserving the subdirectory
38    /// structure.  Non-Markdown files referenced (linked or embedded) inside
39    /// any Markdown source — images, PDFs, downloadable assets, etc. — are
40    /// copied into the output directory next to the rendered HTML so that all
41    /// relative URLs remain valid.
42    ///
43    /// Defaults to `"."` (the current working directory).
44    pub input: Option<PathBuf>,
45
46    /// Directory to write rendered files into.
47    ///
48    /// Defaults to `<input-name>-html` placed next to the input directory.
49    ///
50    /// ```toml
51    /// [[builders]]
52    /// type   = "markdown"
53    /// input  = "docs/"
54    /// output = "public/docs/"
55    /// ```
56    pub output: Option<PathBuf>,
57
58    /// Also descend into subdirectories of `input`.
59    ///
60    /// Defaults to `true`. Files matched by a `.gitignore` in the directory
61    /// hierarchy are always excluded, mirroring the behaviour of the `archive`
62    /// builder.
63    #[serde(default = "default_recursive")]
64    pub recursive: bool,
65}
66
67impl Default for MarkdownBuilderConfig {
68    fn default() -> Self {
69        Self {
70            input: None,
71            output: None,
72            recursive: default_recursive(),
73        }
74    }
75}
76
77/// Renders a directory of Markdown files (`.md`) to standalone HTML documents.
78///
79/// The output directory mirrors the input directory's structure: each `.md`
80/// file becomes a `.html` file at the same relative path.  Any non-Markdown
81/// file referenced by a local link or image embed in a source document is
82/// copied to the output directory at the same relative path, keeping all URLs
83/// intact.
84pub struct MarkdownBuilder;
85
86impl Builder for MarkdownBuilder {
87    type ConfigType = MarkdownBuilderConfig;
88
89    async fn build(
90        &self,
91        config: Self::ConfigType,
92        _version: &str,
93        log: LogSender,
94    ) -> Result<Vec<ArtifactPath>> {
95        let input = config
96            .input
97            .unwrap_or_else(|| PathBuf::from("."))
98            .canonicalize()
99            .into_diagnostic()?;
100
101        if !input.is_dir() {
102            return if input.exists() {
103                Err(miette!(
104                    "markdown builder input must be a directory, got a file: {}",
105                    input.display()
106                ))
107            } else {
108                Err(miette!(
109                    "markdown builder input directory does not exist: {}",
110                    input.display()
111                ))
112            };
113        }
114
115        // Load the Tera template once per builder invocation and share it
116        // across all files so template parsing only happens once.
117        let tera = load_tera()?;
118
119        build_directory(&input, config.output, config.recursive, &log, &tera).await
120    }
121}
122
123/// Load the Tera instance for this builder invocation.
124///
125/// Checks whether `.abbaye/theme/markdown.html.j2` exists and loads that
126/// file when present; otherwise falls back to the compiled-in
127/// [`TEMPLATE_MARKDOWN`] constant — exactly the same override mechanism
128/// used by the site templates (`root_index.html.j2` / `version_index.html.j2`).
129fn load_tera() -> Result<Tera> {
130    let theme_file = PathBuf::from(".abbaye").join("theme").join(THEME_FILENAME);
131    let mut tera = Tera::default();
132    if theme_file.is_file() {
133        tera.add_template_file(&theme_file, Some(TERA_NAME))
134            .into_diagnostic()?;
135    } else {
136        tera.add_raw_template(TERA_NAME, TEMPLATE_MARKDOWN)
137            .into_diagnostic()?;
138    }
139    Ok(tera)
140}
141
142// ── Directory rendering ───────────────────────────────────────────────────────
143
144async fn build_directory(
145    input_dir: &Path,
146    output: Option<PathBuf>,
147    recursive: bool,
148    log: &LogSender,
149    tera: &Tera,
150) -> Result<Vec<ArtifactPath>> {
151    let output_dir = output.unwrap_or_else(|| {
152        let stem = input_dir
153            .file_name()
154            .map(|n| format!("{}-html", n.to_string_lossy()))
155            .unwrap_or_else(|| "html".to_owned());
156        input_dir.parent().unwrap_or(Path::new(".")).join(stem)
157    });
158
159    tokio::fs::create_dir_all(&output_dir)
160        .await
161        .into_diagnostic()?;
162
163    // The ignore::WalkBuilder API is synchronous, so run it on the blocking
164    // thread pool to avoid stalling the async runtime.
165    let md_files = tokio::task::spawn_blocking({
166        let input_dir = input_dir.to_owned();
167        move || collect_md_files(&input_dir, recursive)
168    })
169    .await
170    .into_diagnostic()??;
171
172    if md_files.is_empty() {
173        let _ = log.send(LogEvent::Line(format!(
174            "warning: no .md files found in {}",
175            input_dir.display()
176        )));
177        return Ok(vec![ArtifactPath {
178            path: output_dir.clone(),
179            name: dir_name_string(&output_dir),
180            hash: None,
181        }]);
182    }
183
184    // Map from absolute source path → absolute destination path, built up
185    // while rendering so that files referenced by multiple documents are only
186    // copied once.
187    let mut files_to_copy: HashMap<PathBuf, PathBuf> = HashMap::new();
188
189    for md_path in &md_files {
190        let relative = md_path.strip_prefix(input_dir).into_diagnostic()?;
191        let out_path = output_dir.join(relative).with_extension("html");
192
193        // Ensure any intermediate subdirectories exist.
194        if let Some(parent) = out_path.parent() {
195            tokio::fs::create_dir_all(parent).await.into_diagnostic()?;
196        }
197
198        let _ = log.send(LogEvent::Line(format!(
199            "{} → {}",
200            md_path.display(),
201            out_path.display()
202        )));
203
204        let md = tokio::fs::read_to_string(md_path).await.into_diagnostic()?;
205
206        // Collect local asset references before rendering so we can copy them.
207        for (src, rel) in collect_referenced_files(&md, md_path, input_dir) {
208            files_to_copy
209                .entry(src)
210                .or_insert_with(|| output_dir.join(rel));
211        }
212
213        let title = extract_title(&md).unwrap_or_else(|| file_stem_string(md_path));
214        let document = render_template(tera, &title, &render_markdown(&md))?;
215
216        tokio::fs::write(&out_path, document.as_bytes())
217            .await
218            .into_diagnostic()?;
219    }
220
221    // Copy every referenced asset, creating parent directories as needed.
222    for (src, dest) in &files_to_copy {
223        if let Some(parent) = dest.parent() {
224            tokio::fs::create_dir_all(parent).await.into_diagnostic()?;
225        }
226
227        let _ = log.send(LogEvent::Line(format!(
228            "copying {} → {}",
229            src.display(),
230            dest.display()
231        )));
232
233        tokio::fs::copy(src, dest).await.into_diagnostic()?;
234    }
235
236    Ok(vec![ArtifactPath {
237        path: output_dir.clone(),
238        name: dir_name_string(&output_dir),
239        hash: None,
240    }])
241}
242
243// ── Helpers ───────────────────────────────────────────────────────────────────
244
245/// Walk `dir` for `.md` files, honouring `.gitignore` rules (via the `ignore`
246/// crate). Returns paths in a stable, sorted order so output is reproducible.
247///
248/// When `recursive` is `false` only the top level of `dir` is visited
249/// (`max_depth = 1`).
250fn collect_md_files(dir: &Path, recursive: bool) -> Result<Vec<PathBuf>> {
251    let mut files = Vec::new();
252
253    let walker = WalkBuilder::new(dir)
254        .max_depth(if recursive { None } else { Some(1) })
255        // Include dotfiles (e.g. .github/CONTRIBUTING.md).
256        .hidden(false)
257        .build();
258
259    for result in walker {
260        let entry = result.into_diagnostic()?;
261        let path = entry.into_path();
262        if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some("md") {
263            files.push(path);
264        }
265    }
266
267    files.sort();
268    Ok(files)
269}
270
271/// Parse `md` for local link and image targets that are not other Markdown
272/// files, resolve them relative to `md_path`'s parent directory, and return
273/// those that exist as files within `input_dir`.
274///
275/// Returns a list of `(absolute_source_path, relative_path_from_input_dir)`
276/// pairs.  The caller uses the relative path to mirror the asset at the same
277/// position inside the output directory, keeping all relative URLs in the
278/// rendered HTML valid.
279///
280/// Skipped silently:
281/// - Remote URLs (`://`), data URIs, and fragment-only refs (`#…`).
282/// - `.md` files — those are rendered to `.html`, not copied.
283/// - Refs that do not resolve to an existing file.
284/// - Refs that resolve to a file outside `input_dir` (a warning is logged
285///   to the caller instead).
286fn collect_referenced_files(md: &str, md_path: &Path, input_dir: &Path) -> Vec<(PathBuf, PathBuf)> {
287    let md_dir = md_path.parent().unwrap_or(Path::new("."));
288    let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
289    let mut result = Vec::new();
290
291    for event in Parser::new_ext(md, opts) {
292        let url: Option<pulldown_cmark::CowStr> = match event {
293            Event::Start(Tag::Image { dest_url, .. }) => Some(dest_url),
294            Event::Start(Tag::Link { dest_url, .. }) => Some(dest_url),
295            _ => None,
296        };
297
298        let Some(url) = url else { continue };
299        let s = url.as_ref();
300
301        // Skip remote URLs, data URIs, and fragment-only refs.
302        if s.contains("://") || s.starts_with('#') || s.is_empty() {
303            continue;
304        }
305
306        // Strip any trailing fragment before treating the string as a path.
307        let path_part = s.split('#').next().unwrap_or(s);
308        if path_part.is_empty() {
309            continue;
310        }
311
312        // Skip links to other Markdown files — those will be rendered to
313        // .html and don't need to be copied as assets.
314        if Path::new(path_part).extension().and_then(|e| e.to_str()) == Some("md") {
315            continue;
316        }
317
318        // Resolve the ref relative to the containing markdown file's directory.
319        // A leading `/` is treated as relative to input_dir, not the filesystem
320        // root, which is the most useful interpretation for a docs directory.
321        let abs = if path_part.starts_with('/') {
322            input_dir.join(path_part.trim_start_matches('/'))
323        } else {
324            md_dir.join(path_part)
325        };
326
327        // canonicalize() fails if the path does not exist.
328        let Ok(abs) = abs.canonicalize() else {
329            continue;
330        };
331
332        if !abs.is_file() {
333            continue;
334        }
335
336        // Only copy assets that live inside the input directory; assets
337        // outside it are silently skipped (they cannot be given a stable
338        // relative output path).
339        let Ok(relative) = abs.strip_prefix(input_dir) else {
340            continue;
341        };
342        let relative = relative.to_owned();
343
344        result.push((abs, relative));
345    }
346
347    result
348}
349
350/// Convert a Markdown string to an HTML fragment.
351///
352/// Enables tables, strikethrough, and footnotes — a superset of what
353/// [`crate::site`]'s own renderer uses.
354fn render_markdown(md: &str) -> String {
355    let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH | Options::ENABLE_FOOTNOTES;
356    let parser = Parser::new_ext(md, opts);
357    let mut buf = String::new();
358    html::push_html(&mut buf, parser);
359    buf
360}
361
362/// Scan a Markdown string for the first heading of any level and return its
363/// plain-text content. Used to populate the `<title>` element.
364///
365/// Handles headings that contain inline emphasis, code spans, or other inline
366/// elements by concatenating all `Text` and `Code` events seen between the
367/// opening and closing heading tags.
368fn extract_title(md: &str) -> Option<String> {
369    let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
370    let mut parser = Parser::new_ext(md, opts);
371    let mut in_heading = false;
372    let mut title = String::new();
373
374    loop {
375        match parser.next()? {
376            Event::Start(Tag::Heading { .. }) => {
377                in_heading = true;
378            }
379            Event::End(TagEnd::Heading(_)) => break,
380            Event::Text(text) | Event::Code(text) if in_heading => {
381                title.push_str(&text);
382            }
383            _ => {}
384        }
385    }
386
387    if title.is_empty() { None } else { Some(title) }
388}
389
390/// Render the Tera template with the given `title` and HTML `content`.
391///
392/// `title` is passed as a plain string; Tera auto-escapes it when inserted
393/// into `{{ title }}`. `content` is the already-rendered HTML fragment and
394/// must be inserted with `{{ content | safe }}` in the template.
395fn render_template(tera: &Tera, title: &str, content: &str) -> Result<String> {
396    let mut ctx = Context::new();
397    ctx.insert("title", title);
398    ctx.insert("content", content);
399    tera.render(TERA_NAME, &ctx).into_diagnostic()
400}
401
402/// Extract the file stem as an owned `String`, falling back to `"Document"`.
403fn file_stem_string(path: &Path) -> String {
404    path.file_stem()
405        .map(|s| s.to_string_lossy().into_owned())
406        .unwrap_or_else(|| "Document".to_owned())
407}
408
409/// Extract the directory name as an owned `String`, falling back to `"html"`.
410fn dir_name_string(path: &Path) -> String {
411    path.file_name()
412        .map(|n| n.to_string_lossy().into_owned())
413        .unwrap_or_else(|| "html".to_owned())
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419    use std::fs;
420
421    /// Build a temporary directory tree:
422    ///
423    /// ```
424    /// <tmpdir>/
425    ///   docs/
426    ///     page.md         ← references image.png, guide.pdf, other.md, https://…
427    ///     image.png       ← local asset (should be collected)
428    ///     guide.pdf       ← local asset (should be collected)
429    ///     other.md        ← .md file   (should be skipped)
430    ///     sub/
431    ///       nested.md     ← references ../image.png (should be collected)
432    /// ```
433    fn make_test_tree() -> (tempfile::TempDir, PathBuf) {
434        let tmp = tempfile::tempdir().expect("tempdir");
435        let docs = tmp.path().join("docs");
436        fs::create_dir_all(docs.join("sub")).unwrap();
437        fs::write(docs.join("image.png"), b"PNG").unwrap();
438        fs::write(docs.join("guide.pdf"), b"PDF").unwrap();
439        fs::write(docs.join("other.md"), b"# Other").unwrap();
440        fs::write(
441            docs.join("page.md"),
442            b"# Hello\n\
443              ![logo](image.png)\n\
444              [guide](guide.pdf)\n\
445              [other](other.md)\n\
446              [remote](https://example.com)\n\
447              [frag](#section)\n",
448        )
449        .unwrap();
450        fs::write(
451            docs.join("sub").join("nested.md"),
452            b"# Nested\n![logo](../image.png)\n",
453        )
454        .unwrap();
455        (tmp, docs)
456    }
457
458    #[test]
459    fn collects_images_and_non_md_links() {
460        let (_tmp, docs) = make_test_tree();
461        let md_path = docs.join("page.md");
462        let md = fs::read_to_string(&md_path).unwrap();
463
464        let refs = collect_referenced_files(&md, &md_path, &docs);
465        let sources: Vec<_> = refs.iter().map(|(src, _)| src.clone()).collect();
466
467        let img = docs.join("image.png").canonicalize().unwrap();
468        let pdf = docs.join("guide.pdf").canonicalize().unwrap();
469        let other = docs.join("other.md").canonicalize().unwrap();
470
471        assert!(sources.contains(&img), "image.png should be collected");
472        assert!(sources.contains(&pdf), "guide.pdf should be collected");
473        assert!(!sources.contains(&other), "other.md should be skipped");
474        // Remote URL and fragment-only link must not appear.
475        assert_eq!(refs.len(), 2, "expected exactly 2 assets (image + pdf)");
476    }
477
478    #[test]
479    fn relative_paths_from_subdirectory_are_resolved() {
480        let (_tmp, docs) = make_test_tree();
481        let md_path = docs.join("sub").join("nested.md");
482        let md = fs::read_to_string(&md_path).unwrap();
483
484        let refs = collect_referenced_files(&md, &md_path, &docs);
485
486        assert_eq!(refs.len(), 1, "expected exactly 1 asset");
487        let (src, rel) = &refs[0];
488        assert_eq!(
489            src,
490            &docs.join("image.png").canonicalize().unwrap(),
491            "source should be docs/image.png"
492        );
493        assert_eq!(
494            rel,
495            &PathBuf::from("image.png"),
496            "relative path should be image.png (relative to docs/)"
497        );
498    }
499
500    #[test]
501    fn extract_title_finds_first_heading() {
502        assert_eq!(
503            extract_title("# Hello World\n\nsome text"),
504            Some("Hello World".to_owned())
505        );
506    }
507
508    #[test]
509    fn extract_title_handles_inline_code_in_heading() {
510        assert_eq!(
511            extract_title("# Use `foo()` wisely"),
512            Some("Use foo() wisely".to_owned())
513        );
514    }
515
516    #[test]
517    fn extract_title_returns_none_when_no_heading() {
518        assert_eq!(extract_title("just a paragraph"), None);
519    }
520}