Skip to main content

abbaye/builders/
archive.rs

1use std::{
2    fs::File,
3    path::{Path, PathBuf},
4};
5
6use flate2::{Compression, write::GzEncoder};
7use globset::{Glob, GlobSet, GlobSetBuilder};
8use ignore::WalkBuilder;
9use miette::{IntoDiagnostic, Result};
10use schemars::JsonSchema;
11use serde::{Deserialize, Serialize};
12
13use crate::builders::{ArtifactPath, Builder, LogEvent, LogSender};
14
15fn default_ignore_patterns() -> Vec<String> {
16    vec![".git".to_owned(), "*.local".to_owned()]
17}
18
19/// Configuration for [`ArchiveBuilder`].
20#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
21pub struct ArchiveBuilderConfig {
22    /// Root directory to archive. Defaults to the current working directory.
23    pub source_dir: Option<PathBuf>,
24
25    /// Output path for the generated `.tar.gz` archive.
26    /// Defaults to `source.tar.gz` in the current working directory.
27    pub output: Option<PathBuf>,
28
29    /// Prefix applied to every entry path inside the archive.
30    /// For example, `"myproject-1.0.0"` produces entries like
31    /// `myproject-1.0.0/src/main.rs`.
32    /// Defaults to the source directory's name.
33    pub prefix: Option<String>,
34
35    /// Glob patterns for files and directories to exclude from the archive.
36    /// Each pattern is matched against every component of a path, so a pattern
37    /// like `".git"` excludes the `.git` directory and all its contents, and
38    /// `"*.local"` excludes any entry whose name ends with `.local`.
39    /// Defaults to `[".git", "*.local"]`.
40    #[serde(default = "default_ignore_patterns")]
41    pub ignore_patterns: Vec<String>,
42}
43
44impl Default for ArchiveBuilderConfig {
45    fn default() -> Self {
46        Self {
47            source_dir: None,
48            output: None,
49            prefix: None,
50            ignore_patterns: default_ignore_patterns(),
51        }
52    }
53}
54
55/// Creates a `.tar.gz` archive of the source tree, honouring all `.gitignore`
56/// rules found in the directory hierarchy.
57pub struct ArchiveBuilder;
58
59impl Builder for ArchiveBuilder {
60    type ConfigType = ArchiveBuilderConfig;
61
62    async fn build(
63        &self,
64        config: Self::ConfigType,
65        _version: &str,
66        log: LogSender,
67    ) -> Result<Vec<ArtifactPath>> {
68        let source_dir = config
69            .source_dir
70            .unwrap_or_else(|| PathBuf::from("."))
71            .canonicalize()
72            .into_diagnostic()?;
73
74        let output = config
75            .output
76            .unwrap_or_else(|| PathBuf::from("../source.tar.gz"));
77
78        let prefix = config.prefix.unwrap_or_else(|| {
79            source_dir
80                .file_name()
81                .map(|n| n.to_string_lossy().into_owned())
82                .unwrap_or_else(|| "source".to_owned())
83        });
84
85        let ignore_set = build_ignore_set(&config.ignore_patterns)?;
86
87        let _ = log.send(LogEvent::Line(format!(
88            "archiving {} → {}",
89            source_dir.display(),
90            output.display()
91        )));
92        let archive_path = tokio::task::spawn_blocking(move || {
93            create_archive(&source_dir, &output, &prefix, &ignore_set)
94        })
95        .await
96        .into_diagnostic()??;
97        let _ = log.send(LogEvent::Line(format!(
98            "archive written: {}",
99            archive_path.display()
100        )));
101
102        let name = archive_path
103            .file_name()
104            .map(|n| n.to_string_lossy().into_owned())
105            .unwrap_or_default();
106
107        Ok(vec![ArtifactPath {
108            path: archive_path,
109            name,
110            hash: None,
111        }])
112    }
113}
114
115/// Compiles a [`GlobSet`] from the given list of glob patterns.
116fn build_ignore_set(patterns: &[String]) -> Result<GlobSet> {
117    let mut builder = GlobSetBuilder::new();
118    for pattern in patterns {
119        builder.add(Glob::new(pattern).into_diagnostic()?);
120    }
121    builder.build().into_diagnostic()
122}
123
124/// Walks `source_dir` respecting `.gitignore` rules and writes a `.tar.gz`
125/// archive to `output`, prefixing every entry with `prefix`.
126/// `.git` and the output file itself are always excluded. Additionally, entries
127/// whose path contains a component matched by `ignore_set` are skipped.
128fn create_archive(
129    source_dir: &Path,
130    output: &Path,
131    prefix: &str,
132    ignore_set: &GlobSet,
133) -> Result<PathBuf> {
134    let file = File::create(output).into_diagnostic()?;
135    // Canonicalize now that the file exists so we can reliably detect it during
136    // the walk and avoid embedding the archive inside itself.
137    let output_canonical = output.canonicalize().into_diagnostic()?;
138    let encoder = GzEncoder::new(file, Compression::default());
139    let mut archive = tar::Builder::new(encoder);
140
141    for result in WalkBuilder::new(source_dir)
142        .hidden(false) // include dotfiles such as .rustfmt.toml
143        .build()
144    {
145        let entry = result.into_diagnostic()?;
146        let path = entry.path();
147
148        let relative = path.strip_prefix(source_dir).into_diagnostic()?;
149
150        // Always exclude .git, regardless of ignore_patterns.
151        if relative.components().any(|c| c.as_os_str() == ".git") {
152            continue;
153        }
154
155        // Always exclude the output archive itself to prevent a tarbomb.
156        if path == output_canonical {
157            continue;
158        }
159
160        // Skip entries whose path contains a component matched by the ignore set.
161        if relative
162            .components()
163            .any(|c| ignore_set.is_match(Path::new(c.as_os_str())))
164        {
165            continue;
166        }
167
168        if !path.is_file() {
169            continue;
170        }
171
172        let entry_path = Path::new(prefix).join(relative);
173
174        archive
175            .append_path_with_name(path, &entry_path)
176            .into_diagnostic()?;
177    }
178
179    // Finalise the tar stream, then flush and close the gzip layer.
180    archive
181        .into_inner()
182        .into_diagnostic()?
183        .finish()
184        .into_diagnostic()?;
185
186    Ok(output.to_path_buf())
187}