1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use ignore::WalkBuilder;
5use miette::{IntoDiagnostic, Result, miette};
6use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd, html};
7use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9use tera::{Context, Tera};
10
11use crate::builders::{ArtifactPath, Builder, LogEvent, LogSender};
12use crate::config::OutputFormat;
13
14pub const TEMPLATE_MARKDOWN_HTML: &str = include_str!("../templates/markdown.html.j2");
22pub const TEMPLATE_MARKDOWN_GEMTEXT: &str = include_str!("../templates/markdown.gmi.j2");
23
24fn default_recursive() -> bool {
25 true
26}
27
28#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
30pub struct MarkdownBuilderConfig {
31 pub input: Option<PathBuf>,
42
43 pub output: Option<PathBuf>,
54
55 #[serde(default = "default_recursive")]
61 pub recursive: bool,
62 #[serde(default = "default_recursive_formats")]
65 pub formats: Vec<OutputFormat>,
66}
67
68fn default_recursive_formats() -> Vec<OutputFormat> {
69 vec![OutputFormat::Html]
70}
71
72impl Default for MarkdownBuilderConfig {
73 fn default() -> Self {
74 Self {
75 input: None,
76 output: None,
77 recursive: default_recursive(),
78 formats: default_recursive_formats(),
79 }
80 }
81}
82
83pub struct MarkdownBuilder;
91
92impl Builder for MarkdownBuilder {
93 type ConfigType = MarkdownBuilderConfig;
94
95 async fn build(
96 &self,
97 config: Self::ConfigType,
98 _version: &str,
99 log: LogSender,
100 ) -> Result<Vec<ArtifactPath>> {
101 let input = config
102 .input
103 .unwrap_or_else(|| PathBuf::from("."))
104 .canonicalize()
105 .into_diagnostic()?;
106
107 if !input.is_dir() {
108 return if input.exists() {
109 Err(miette!(
110 "markdown builder input must be a directory, got a file: {}",
111 input.display()
112 ))
113 } else {
114 Err(miette!(
115 "markdown builder input directory does not exist: {}",
116 input.display()
117 ))
118 };
119 }
120
121 let tera = load_tera(&config.formats)?;
124
125 build_directory(
126 &input,
127 config.output,
128 config.recursive,
129 &log,
130 &tera,
131 &config.formats,
132 )
133 .await
134 }
135}
136
137fn load_tera(formats: &[OutputFormat]) -> Result<Tera> {
139 let theme_path = PathBuf::from(".abbaye").join("theme");
140 let mut tera = Tera::default();
141 crate::site::register_format_templates(
142 &mut tera,
143 &theme_path,
144 formats,
145 &[(
146 "markdown",
147 TEMPLATE_MARKDOWN_HTML,
148 TEMPLATE_MARKDOWN_GEMTEXT,
149 )],
150 )?;
151 Ok(tera)
152}
153
154async fn build_directory(
157 input_dir: &Path,
158 output: Option<PathBuf>,
159 recursive: bool,
160 log: &LogSender,
161 tera: &Tera,
162 formats: &[OutputFormat],
163) -> Result<Vec<ArtifactPath>> {
164 let output_dir = output.unwrap_or_else(|| {
165 let stem = input_dir
166 .file_name()
167 .map(|n| format!("{}-html", n.to_string_lossy()))
168 .unwrap_or_else(|| "html".to_owned());
169 input_dir.parent().unwrap_or(Path::new(".")).join(stem)
170 });
171
172 tokio::fs::create_dir_all(&output_dir)
173 .await
174 .into_diagnostic()?;
175
176 let md_files = tokio::task::spawn_blocking({
179 let input_dir = input_dir.to_owned();
180 move || collect_md_files(&input_dir, recursive)
181 })
182 .await
183 .into_diagnostic()??;
184
185 if md_files.is_empty() {
186 let _ = log.send(LogEvent::Line(format!(
187 "warning: no .md files found in {}",
188 input_dir.display()
189 )));
190 return Ok(vec![ArtifactPath {
191 path: output_dir.clone(),
192 name: dir_name_string(&output_dir),
193 hash: None,
194 category: None,
195 group_name: None,
196 group_comment: None,
197 }]);
198 }
199
200 let mut files_to_copy: HashMap<PathBuf, PathBuf> = HashMap::new();
204
205 for md_path in &md_files {
206 let relative = md_path.strip_prefix(input_dir).into_diagnostic()?;
207 let out_path = output_dir.join(relative).with_extension("html");
208
209 if let Some(parent) = out_path.parent() {
211 tokio::fs::create_dir_all(parent).await.into_diagnostic()?;
212 }
213
214 let _ = log.send(LogEvent::Line(format!(
215 "{} → {}",
216 md_path.display(),
217 out_path.display()
218 )));
219
220 let md = tokio::fs::read_to_string(md_path).await.into_diagnostic()?;
221
222 for (src, rel) in collect_referenced_files(&md, md_path, input_dir) {
224 files_to_copy
225 .entry(src)
226 .or_insert_with(|| output_dir.join(rel));
227 }
228
229 let title = extract_title(&md).unwrap_or_else(|| file_stem_string(md_path));
230
231 for format in formats {
232 let suffix = format.extension();
233 let ext = format.extension();
234 let tmpl_name = format!("markdown.{suffix}");
235 let content = match format {
236 OutputFormat::Html => render_markdown(&md),
237 OutputFormat::Gemtext => crate::render::render_markdown_gemtext(&md),
238 };
239 let document = render_template(tera, &tmpl_name, &title, &content)?;
240
241 let fmt_out_path = out_path.with_extension(ext);
242 tokio::fs::write(&fmt_out_path, document.as_bytes())
243 .await
244 .into_diagnostic()?;
245 }
246 }
247
248 for (src, dest) in &files_to_copy {
250 if let Some(parent) = dest.parent() {
251 tokio::fs::create_dir_all(parent).await.into_diagnostic()?;
252 }
253
254 let _ = log.send(LogEvent::Line(format!(
255 "copying {} → {}",
256 src.display(),
257 dest.display()
258 )));
259
260 tokio::fs::copy(src, dest).await.into_diagnostic()?;
261 }
262
263 Ok(vec![ArtifactPath {
264 path: output_dir.clone(),
265 name: dir_name_string(&output_dir),
266 hash: None,
267 category: None,
268 group_name: None,
269 group_comment: None,
270 }])
271}
272
273fn collect_md_files(dir: &Path, recursive: bool) -> Result<Vec<PathBuf>> {
281 let mut files = Vec::new();
282
283 let walker = WalkBuilder::new(dir)
284 .max_depth(if recursive { None } else { Some(1) })
285 .hidden(false)
287 .build();
288
289 for result in walker {
290 let entry = result.into_diagnostic()?;
291 let path = entry.into_path();
292 if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some("md") {
293 files.push(path);
294 }
295 }
296
297 files.sort();
298 Ok(files)
299}
300
301fn collect_referenced_files(md: &str, md_path: &Path, input_dir: &Path) -> Vec<(PathBuf, PathBuf)> {
317 let md_dir = md_path.parent().unwrap_or(Path::new("."));
318 let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
319 let mut result = Vec::new();
320
321 for event in Parser::new_ext(md, opts) {
322 let url: Option<pulldown_cmark::CowStr> = match event {
323 Event::Start(Tag::Image { dest_url, .. }) => Some(dest_url),
324 Event::Start(Tag::Link { dest_url, .. }) => Some(dest_url),
325 _ => None,
326 };
327
328 let Some(url) = url else { continue };
329 let s = url.as_ref();
330
331 if s.contains("://") || s.starts_with('#') || s.is_empty() {
333 continue;
334 }
335
336 let path_part = s.split('#').next().unwrap_or(s);
338 if path_part.is_empty() {
339 continue;
340 }
341
342 if Path::new(path_part).extension().and_then(|e| e.to_str()) == Some("md") {
345 continue;
346 }
347
348 let abs = if path_part.starts_with('/') {
352 input_dir.join(path_part.trim_start_matches('/'))
353 } else {
354 md_dir.join(path_part)
355 };
356
357 let Ok(abs) = abs.canonicalize() else {
359 continue;
360 };
361
362 if !abs.is_file() {
363 continue;
364 }
365
366 let Ok(relative) = abs.strip_prefix(input_dir) else {
370 continue;
371 };
372 let relative = relative.to_owned();
373
374 result.push((abs, relative));
375 }
376
377 result
378}
379
380fn render_markdown(md: &str) -> String {
385 let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH | Options::ENABLE_FOOTNOTES;
386 let parser = Parser::new_ext(md, opts);
387 let mut buf = String::new();
388 html::push_html(&mut buf, parser);
389 buf
390}
391
392fn extract_title(md: &str) -> Option<String> {
399 let opts = Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH;
400 let mut parser = Parser::new_ext(md, opts);
401 let mut in_heading = false;
402 let mut title = String::new();
403
404 loop {
405 match parser.next()? {
406 Event::Start(Tag::Heading { .. }) => {
407 in_heading = true;
408 }
409 Event::End(TagEnd::Heading(_)) => break,
410 Event::Text(text) | Event::Code(text) if in_heading => {
411 title.push_str(&text);
412 }
413 _ => {}
414 }
415 }
416
417 if title.is_empty() { None } else { Some(title) }
418}
419
420fn render_template(tera: &Tera, template_name: &str, title: &str, content: &str) -> Result<String> {
422 let mut ctx = Context::new();
423 ctx.insert("title", title);
424 ctx.insert("content", content);
425 tera.render(template_name, &ctx).into_diagnostic()
426}
427
428fn file_stem_string(path: &Path) -> String {
430 path.file_stem()
431 .map(|s| s.to_string_lossy().into_owned())
432 .unwrap_or_else(|| "Document".to_owned())
433}
434
435fn dir_name_string(path: &Path) -> String {
437 path.file_name()
438 .map(|n| n.to_string_lossy().into_owned())
439 .unwrap_or_else(|| "html".to_owned())
440}
441
442#[cfg(test)]
443mod tests {
444 use super::*;
445 use std::fs;
446
447 fn make_test_tree() -> (tempfile::TempDir, PathBuf) {
460 let tmp = tempfile::tempdir().expect("tempdir");
461 let docs = tmp.path().join("docs");
462 fs::create_dir_all(docs.join("sub")).unwrap();
463 fs::write(docs.join("image.png"), b"PNG").unwrap();
464 fs::write(docs.join("guide.pdf"), b"PDF").unwrap();
465 fs::write(docs.join("other.md"), b"# Other").unwrap();
466 fs::write(
467 docs.join("page.md"),
468 b"# Hello\n\
469 \n\
470 [guide](guide.pdf)\n\
471 [other](other.md)\n\
472 [remote](https://example.com)\n\
473 [frag](#section)\n",
474 )
475 .unwrap();
476 fs::write(
477 docs.join("sub").join("nested.md"),
478 b"# Nested\n\n",
479 )
480 .unwrap();
481 (tmp, docs)
482 }
483
484 #[test]
485 fn collects_images_and_non_md_links() {
486 let (_tmp, docs) = make_test_tree();
487 let md_path = docs.join("page.md");
488 let md = fs::read_to_string(&md_path).unwrap();
489
490 let refs = collect_referenced_files(&md, &md_path, &docs);
491 let sources: Vec<_> = refs.iter().map(|(src, _)| src.clone()).collect();
492
493 let img = docs.join("image.png").canonicalize().unwrap();
494 let pdf = docs.join("guide.pdf").canonicalize().unwrap();
495 let other = docs.join("other.md").canonicalize().unwrap();
496
497 assert!(sources.contains(&img), "image.png should be collected");
498 assert!(sources.contains(&pdf), "guide.pdf should be collected");
499 assert!(!sources.contains(&other), "other.md should be skipped");
500 assert_eq!(refs.len(), 2, "expected exactly 2 assets (image + pdf)");
502 }
503
504 #[test]
505 fn relative_paths_from_subdirectory_are_resolved() {
506 let (_tmp, docs) = make_test_tree();
507 let md_path = docs.join("sub").join("nested.md");
508 let md = fs::read_to_string(&md_path).unwrap();
509
510 let refs = collect_referenced_files(&md, &md_path, &docs);
511
512 assert_eq!(refs.len(), 1, "expected exactly 1 asset");
513 let (src, rel) = &refs[0];
514 assert_eq!(
515 src,
516 &docs.join("image.png").canonicalize().unwrap(),
517 "source should be docs/image.png"
518 );
519 assert_eq!(
520 rel,
521 &PathBuf::from("image.png"),
522 "relative path should be image.png (relative to docs/)"
523 );
524 }
525
526 #[test]
527 fn extract_title_finds_first_heading() {
528 assert_eq!(
529 extract_title("# Hello World\n\nsome text"),
530 Some("Hello World".to_owned())
531 );
532 }
533
534 #[test]
535 fn extract_title_handles_inline_code_in_heading() {
536 assert_eq!(
537 extract_title("# Use `foo()` wisely"),
538 Some("Use foo() wisely".to_owned())
539 );
540 }
541
542 #[test]
543 fn extract_title_returns_none_when_no_heading() {
544 assert_eq!(extract_title("just a paragraph"), None);
545 }
546}