Skip to main content

abbaye/builders/
archive.rs

1use std::{
2    fs::File,
3    path::{Path, PathBuf},
4};
5
6use flate2::{Compression, write::GzEncoder};
7use globset::{Glob, GlobSet, GlobSetBuilder};
8use ignore::WalkBuilder;
9use miette::{IntoDiagnostic, Result};
10use schemars::JsonSchema;
11use serde::{Deserialize, Serialize};
12
13use crate::builders::{ArtifactPath, Builder, LogEvent, LogSender};
14
15fn default_ignore_patterns() -> Vec<String> {
16    vec![".git".to_owned(), "*.local".to_owned()]
17}
18
19/// Configuration for [`ArchiveBuilder`].
20#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
21pub struct ArchiveBuilderConfig {
22    /// Root directory to archive. Defaults to the current working directory.
23    pub source_dir: Option<PathBuf>,
24
25    /// Output path for the generated `.tar.gz` archive.
26    /// Defaults to `{prefix}-{version}.tar.gz` in the current working directory.
27    pub output: Option<PathBuf>,
28
29    /// Prefix applied to every entry path inside the archive.
30    /// For example, `"myproject-1.0.0"` produces entries like
31    /// `myproject-1.0.0/src/main.rs`.
32    /// Defaults to the source directory's name.
33    pub prefix: Option<String>,
34
35    /// Glob patterns for files and directories to exclude from the archive.
36    /// Each pattern is matched against every component of a path, so a pattern
37    /// like `".git"` excludes the `.git` directory and all its contents, and
38    /// `"*.local"` excludes any entry whose name ends with `.local`.
39    /// Defaults to `[".git", "*.local"]`.
40    #[serde(default = "default_ignore_patterns")]
41    pub ignore_patterns: Vec<String>,
42}
43
44impl Default for ArchiveBuilderConfig {
45    fn default() -> Self {
46        Self {
47            source_dir: None,
48            output: None,
49            prefix: None,
50            ignore_patterns: default_ignore_patterns(),
51        }
52    }
53}
54
55/// Creates a `.tar.gz` archive of the source tree, honouring all `.gitignore`
56/// rules found in the directory hierarchy.
57pub struct ArchiveBuilder;
58
59impl Builder for ArchiveBuilder {
60    type ConfigType = ArchiveBuilderConfig;
61
62    async fn build(
63        &self,
64        config: Self::ConfigType,
65        version: &str,
66        log: LogSender,
67    ) -> Result<Vec<ArtifactPath>> {
68        let source_dir = config
69            .source_dir
70            .unwrap_or_else(|| PathBuf::from("."))
71            .canonicalize()
72            .into_diagnostic()?;
73
74        let prefix = config.prefix.unwrap_or_else(|| {
75            source_dir
76                .file_name()
77                .map(|n| n.to_string_lossy().into_owned())
78                .unwrap_or_else(|| "source".to_owned())
79        });
80
81        let output = config
82            .output
83            .unwrap_or_else(|| PathBuf::from(format!("{prefix}-{version}.tar.gz")));
84
85        let ignore_set = build_ignore_set(&config.ignore_patterns)?;
86
87        let _ = log.send(LogEvent::Line(format!(
88            "{} → {}",
89            source_dir.display(),
90            output.display()
91        )));
92        let archive_path = tokio::task::spawn_blocking(move || {
93            create_archive(&source_dir, &output, &prefix, &ignore_set)
94        })
95        .await
96        .into_diagnostic()??;
97
98        let name = archive_path
99            .file_name()
100            .map(|n| n.to_string_lossy().into_owned())
101            .unwrap_or_default();
102
103        Ok(vec![ArtifactPath {
104            path: archive_path,
105            name,
106            hash: None,
107            category: None,
108            group_name: None,
109            group_comment: None,
110        }])
111    }
112}
113
114/// Compiles a [`GlobSet`] from the given list of glob patterns.
115fn build_ignore_set(patterns: &[String]) -> Result<GlobSet> {
116    let mut builder = GlobSetBuilder::new();
117    for pattern in patterns {
118        builder.add(Glob::new(pattern).into_diagnostic()?);
119    }
120    builder.build().into_diagnostic()
121}
122
123/// Walks `source_dir` respecting `.gitignore` rules and writes a `.tar.gz`
124/// archive to `output`, prefixing every entry with `prefix`.
125/// `.git` and the output file itself are always excluded. Additionally, entries
126/// whose path contains a component matched by `ignore_set` are skipped.
127fn create_archive(
128    source_dir: &Path,
129    output: &Path,
130    prefix: &str,
131    ignore_set: &GlobSet,
132) -> Result<PathBuf> {
133    let file = File::create(output).into_diagnostic()?;
134    // Canonicalize now that the file exists so we can reliably detect it during
135    // the walk and avoid embedding the archive inside itself.
136    let output_canonical = output.canonicalize().into_diagnostic()?;
137    let encoder = GzEncoder::new(file, Compression::default());
138    let mut archive = tar::Builder::new(encoder);
139
140    for result in WalkBuilder::new(source_dir)
141        .hidden(false) // include dotfiles such as .rustfmt.toml
142        .build()
143    {
144        let entry = result.into_diagnostic()?;
145        let path = entry.path();
146
147        let relative = path.strip_prefix(source_dir).into_diagnostic()?;
148
149        // Always exclude .git, regardless of ignore_patterns.
150        if relative.components().any(|c| c.as_os_str() == ".git") {
151            continue;
152        }
153
154        // Always exclude the output archive itself to prevent a tarbomb.
155        if path == output_canonical {
156            continue;
157        }
158
159        // Skip entries whose path contains a component matched by the ignore set.
160        if relative
161            .components()
162            .any(|c| ignore_set.is_match(Path::new(c.as_os_str())))
163        {
164            continue;
165        }
166
167        if !path.is_file() {
168            continue;
169        }
170
171        let entry_path = Path::new(prefix).join(relative);
172
173        archive
174            .append_path_with_name(path, &entry_path)
175            .into_diagnostic()?;
176    }
177
178    // Finalise the tar stream, then flush and close the gzip layer.
179    archive
180        .into_inner()
181        .into_diagnostic()?
182        .finish()
183        .into_diagnostic()?;
184
185    Ok(output.to_path_buf())
186}