1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use ignore::WalkBuilder;
5use miette::{IntoDiagnostic, Result, miette};
6use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd, html};
7use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9use tera::{Context, Tera};
10
11use crate::builders::{ArtifactPath, Builder, LogEvent, LogSender};
12
13pub const TEMPLATE_MARKDOWN: &str = include_str!("../templates/markdown.html.j2");
21
22const THEME_FILENAME: &str = "markdown.html.j2";
24const TERA_NAME: &str = "markdown.html";
26
27fn default_recursive() -> bool {
28 true
29}
30
31#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
33pub struct MarkdownBuilderConfig {
34 pub input: Option<PathBuf>,
45
46 pub output: Option<PathBuf>,
57
58 #[serde(default = "default_recursive")]
64 pub recursive: bool,
65}
66
67impl Default for MarkdownBuilderConfig {
68 fn default() -> Self {
69 Self {
70 input: None,
71 output: None,
72 recursive: default_recursive(),
73 }
74 }
75}
76
77pub struct MarkdownBuilder;
85
86impl Builder for MarkdownBuilder {
87 type ConfigType = MarkdownBuilderConfig;
88
89 async fn build(
90 &self,
91 config: Self::ConfigType,
92 _version: &str,
93 log: LogSender,
94 ) -> Result<Vec<ArtifactPath>> {
95 let input = config
96 .input
97 .unwrap_or_else(|| PathBuf::from("."))
98 .canonicalize()
99 .into_diagnostic()?;
100
101 if !input.is_dir() {
102 return if input.exists() {
103 Err(miette!(
104 "markdown builder input must be a directory, got a file: {}",
105 input.display()
106 ))
107 } else {
108 Err(miette!(
109 "markdown builder input directory does not exist: {}",
110 input.display()
111 ))
112 };
113 }
114
115 let tera = load_tera()?;
118
119 build_directory(&input, config.output, config.recursive, &log, &tera).await
120 }
121}
122
123fn load_tera() -> Result<Tera> {
130 let theme_file = PathBuf::from(".abbaye").join("theme").join(THEME_FILENAME);
131 let mut tera = Tera::default();
132 if theme_file.is_file() {
133 tera.add_template_file(&theme_file, Some(TERA_NAME))
134 .into_diagnostic()?;
135 } else {
136 tera.add_raw_template(TERA_NAME, TEMPLATE_MARKDOWN)
137 .into_diagnostic()?;
138 }
139 Ok(tera)
140}
141
142async fn build_directory(
145 input_dir: &Path,
146 output: Option<PathBuf>,
147 recursive: bool,
148 log: &LogSender,
149 tera: &Tera,
150) -> Result<Vec<ArtifactPath>> {
151 let output_dir = output.unwrap_or_else(|| {
152 let stem = input_dir
153 .file_name()
154 .map(|n| format!("{}-html", n.to_string_lossy()))
155 .unwrap_or_else(|| "html".to_owned());
156 input_dir.parent().unwrap_or(Path::new(".")).join(stem)
157 });
158
159 tokio::fs::create_dir_all(&output_dir)
160 .await
161 .into_diagnostic()?;
162
163 let md_files = tokio::task::spawn_blocking({
166 let input_dir = input_dir.to_owned();
167 move || collect_md_files(&input_dir, recursive)
168 })
169 .await
170 .into_diagnostic()??;
171
172 if md_files.is_empty() {
173 let _ = log.send(LogEvent::Line(format!(
174 "warning: no .md files found in {}",
175 input_dir.display()
176 )));
177 return Ok(vec![ArtifactPath {
178 path: output_dir.clone(),
179 name: dir_name_string(&output_dir),
180 hash: None,
181 }]);
182 }
183
184 let mut files_to_copy: HashMap<PathBuf, PathBuf> = HashMap::new();
188
189 for md_path in &md_files {
190 let relative = md_path.strip_prefix(input_dir).into_diagnostic()?;
191 let out_path = output_dir.join(relative).with_extension("html");
192
193 if let Some(parent) = out_path.parent() {
195 tokio::fs::create_dir_all(parent).await.into_diagnostic()?;
196 }
197
198 let _ = log.send(LogEvent::Line(format!(
199 "{} → {}",
200 md_path.display(),
201 out_path.display()
202 )));
203
204 let md = tokio::fs::read_to_string(md_path).await.into_diagnostic()?;
205
206 for (src, rel) in collect_referenced_files(&md, md_path, input_dir) {
208 files_to_copy
209 .entry(src)
210 .or_insert_with(|| output_dir.join(rel));
211 }
212
213 let title = extract_title(&md).unwrap_or_else(|| file_stem_string(md_path));
214 let document = render_template(tera, &title, &render_markdown(&md))?;
215
216 tokio::fs::write(&out_path, document.as_bytes())
217 .await
218 .into_diagnostic()?;
219 }
220
221 for (src, dest) in &files_to_copy {
223 if let Some(parent) = dest.parent() {
224 tokio::fs::create_dir_all(parent).await.into_diagnostic()?;
225 }
226
227 let _ = log.send(LogEvent::Line(format!(
228 "copying {} → {}",
229 src.display(),
230 dest.display()
231 )));
232
233 tokio::fs::copy(src, dest).await.into_diagnostic()?;
234 }
235
236 Ok(vec![ArtifactPath {
237 path: output_dir.clone(),
238 name: dir_name_string(&output_dir),
239 hash: None,
240 }])
241}
242
243fn collect_md_files(dir: &Path, recursive: bool) -> Result<Vec<PathBuf>> {
251 let mut files = Vec::new();
252
253 let walker = WalkBuilder::new(dir)
254 .max_depth(if recursive { None } else { Some(1) })
255 .hidden(false)
257 .build();
258
259 for result in walker {
260 let entry = result.into_diagnostic()?;
261 let path = entry.into_path();
262 if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some("md") {
263 files.push(path);
264 }
265 }
266
267 files.sort();
268 Ok(files)
269}
270
271fn collect_referenced_files(md: &str, md_path: &Path, input_dir: &Path) -> Vec<(PathBuf, PathBuf)> {
287 let md_dir = md_path.parent().unwrap_or(Path::new("."));
288 let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
289 let mut result = Vec::new();
290
291 for event in Parser::new_ext(md, opts) {
292 let url: Option<pulldown_cmark::CowStr> = match event {
293 Event::Start(Tag::Image { dest_url, .. }) => Some(dest_url),
294 Event::Start(Tag::Link { dest_url, .. }) => Some(dest_url),
295 _ => None,
296 };
297
298 let Some(url) = url else { continue };
299 let s = url.as_ref();
300
301 if s.contains("://") || s.starts_with('#') || s.is_empty() {
303 continue;
304 }
305
306 let path_part = s.split('#').next().unwrap_or(s);
308 if path_part.is_empty() {
309 continue;
310 }
311
312 if Path::new(path_part).extension().and_then(|e| e.to_str()) == Some("md") {
315 continue;
316 }
317
318 let abs = if path_part.starts_with('/') {
322 input_dir.join(path_part.trim_start_matches('/'))
323 } else {
324 md_dir.join(path_part)
325 };
326
327 let Ok(abs) = abs.canonicalize() else {
329 continue;
330 };
331
332 if !abs.is_file() {
333 continue;
334 }
335
336 let Ok(relative) = abs.strip_prefix(input_dir) else {
340 continue;
341 };
342 let relative = relative.to_owned();
343
344 result.push((abs, relative));
345 }
346
347 result
348}
349
350fn render_markdown(md: &str) -> String {
355 let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH | Options::ENABLE_FOOTNOTES;
356 let parser = Parser::new_ext(md, opts);
357 let mut buf = String::new();
358 html::push_html(&mut buf, parser);
359 buf
360}
361
362fn extract_title(md: &str) -> Option<String> {
369 let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
370 let mut parser = Parser::new_ext(md, opts);
371 let mut in_heading = false;
372 let mut title = String::new();
373
374 loop {
375 match parser.next()? {
376 Event::Start(Tag::Heading { .. }) => {
377 in_heading = true;
378 }
379 Event::End(TagEnd::Heading(_)) => break,
380 Event::Text(text) | Event::Code(text) if in_heading => {
381 title.push_str(&text);
382 }
383 _ => {}
384 }
385 }
386
387 if title.is_empty() { None } else { Some(title) }
388}
389
390fn render_template(tera: &Tera, title: &str, content: &str) -> Result<String> {
396 let mut ctx = Context::new();
397 ctx.insert("title", title);
398 ctx.insert("content", content);
399 tera.render(TERA_NAME, &ctx).into_diagnostic()
400}
401
402fn file_stem_string(path: &Path) -> String {
404 path.file_stem()
405 .map(|s| s.to_string_lossy().into_owned())
406 .unwrap_or_else(|| "Document".to_owned())
407}
408
409fn dir_name_string(path: &Path) -> String {
411 path.file_name()
412 .map(|n| n.to_string_lossy().into_owned())
413 .unwrap_or_else(|| "html".to_owned())
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419 use std::fs;
420
421 fn make_test_tree() -> (tempfile::TempDir, PathBuf) {
434 let tmp = tempfile::tempdir().expect("tempdir");
435 let docs = tmp.path().join("docs");
436 fs::create_dir_all(docs.join("sub")).unwrap();
437 fs::write(docs.join("image.png"), b"PNG").unwrap();
438 fs::write(docs.join("guide.pdf"), b"PDF").unwrap();
439 fs::write(docs.join("other.md"), b"# Other").unwrap();
440 fs::write(
441 docs.join("page.md"),
442 b"# Hello\n\
443 \n\
444 [guide](guide.pdf)\n\
445 [other](other.md)\n\
446 [remote](https://example.com)\n\
447 [frag](#section)\n",
448 )
449 .unwrap();
450 fs::write(
451 docs.join("sub").join("nested.md"),
452 b"# Nested\n\n",
453 )
454 .unwrap();
455 (tmp, docs)
456 }
457
458 #[test]
459 fn collects_images_and_non_md_links() {
460 let (_tmp, docs) = make_test_tree();
461 let md_path = docs.join("page.md");
462 let md = fs::read_to_string(&md_path).unwrap();
463
464 let refs = collect_referenced_files(&md, &md_path, &docs);
465 let sources: Vec<_> = refs.iter().map(|(src, _)| src.clone()).collect();
466
467 let img = docs.join("image.png").canonicalize().unwrap();
468 let pdf = docs.join("guide.pdf").canonicalize().unwrap();
469 let other = docs.join("other.md").canonicalize().unwrap();
470
471 assert!(sources.contains(&img), "image.png should be collected");
472 assert!(sources.contains(&pdf), "guide.pdf should be collected");
473 assert!(!sources.contains(&other), "other.md should be skipped");
474 assert_eq!(refs.len(), 2, "expected exactly 2 assets (image + pdf)");
476 }
477
478 #[test]
479 fn relative_paths_from_subdirectory_are_resolved() {
480 let (_tmp, docs) = make_test_tree();
481 let md_path = docs.join("sub").join("nested.md");
482 let md = fs::read_to_string(&md_path).unwrap();
483
484 let refs = collect_referenced_files(&md, &md_path, &docs);
485
486 assert_eq!(refs.len(), 1, "expected exactly 1 asset");
487 let (src, rel) = &refs[0];
488 assert_eq!(
489 src,
490 &docs.join("image.png").canonicalize().unwrap(),
491 "source should be docs/image.png"
492 );
493 assert_eq!(
494 rel,
495 &PathBuf::from("image.png"),
496 "relative path should be image.png (relative to docs/)"
497 );
498 }
499
500 #[test]
501 fn extract_title_finds_first_heading() {
502 assert_eq!(
503 extract_title("# Hello World\n\nsome text"),
504 Some("Hello World".to_owned())
505 );
506 }
507
508 #[test]
509 fn extract_title_handles_inline_code_in_heading() {
510 assert_eq!(
511 extract_title("# Use `foo()` wisely"),
512 Some("Use foo() wisely".to_owned())
513 );
514 }
515
516 #[test]
517 fn extract_title_returns_none_when_no_heading() {
518 assert_eq!(extract_title("just a paragraph"), None);
519 }
520}