Skip to main content

abbaye/builders/
archive.rs

1use std::{
2    fs::File,
3    path::{Path, PathBuf},
4};
5
6use flate2::{Compression, write::GzEncoder};
7use globset::{Glob, GlobSet, GlobSetBuilder};
8use ignore::WalkBuilder;
9use miette::{IntoDiagnostic, Result};
10use schemars::JsonSchema;
11use serde::{Deserialize, Serialize};
12
13use crate::builders::{ArtifactPath, Builder};
14
15fn default_ignore_patterns() -> Vec<String> {
16    vec![".git".to_owned(), "*.local".to_owned()]
17}
18
19/// Configuration for [`ArchiveBuilder`].
20#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
21pub struct ArchiveBuilderConfig {
22    /// Root directory to archive. Defaults to the current working directory.
23    pub source_dir: Option<PathBuf>,
24
25    /// Output path for the generated `.tar.gz` archive.
26    /// Defaults to `source.tar.gz` in the current working directory.
27    pub output: Option<PathBuf>,
28
29    /// Prefix applied to every entry path inside the archive.
30    /// For example, `"myproject-1.0.0"` produces entries like
31    /// `myproject-1.0.0/src/main.rs`.
32    /// Defaults to the source directory's name.
33    pub prefix: Option<String>,
34
35    /// Glob patterns for files and directories to exclude from the archive.
36    /// Each pattern is matched against every component of a path, so a pattern
37    /// like `".git"` excludes the `.git` directory and all its contents, and
38    /// `"*.local"` excludes any entry whose name ends with `.local`.
39    /// Defaults to `[".git", "*.local"]`.
40    #[serde(default = "default_ignore_patterns")]
41    pub ignore_patterns: Vec<String>,
42}
43
44impl Default for ArchiveBuilderConfig {
45    fn default() -> Self {
46        Self {
47            source_dir: None,
48            output: None,
49            prefix: None,
50            ignore_patterns: default_ignore_patterns(),
51        }
52    }
53}
54
55/// Creates a `.tar.gz` archive of the source tree, honouring all `.gitignore`
56/// rules found in the directory hierarchy.
57pub struct ArchiveBuilder;
58
59impl Builder for ArchiveBuilder {
60    type ConfigType = ArchiveBuilderConfig;
61
62    async fn build(&self, config: Self::ConfigType, _version: &str) -> Result<Vec<ArtifactPath>> {
63        let source_dir = config
64            .source_dir
65            .unwrap_or_else(|| PathBuf::from("."))
66            .canonicalize()
67            .into_diagnostic()?;
68
69        let output = config
70            .output
71            .unwrap_or_else(|| PathBuf::from("../source.tar.gz"));
72
73        let prefix = config.prefix.unwrap_or_else(|| {
74            source_dir
75                .file_name()
76                .map(|n| n.to_string_lossy().into_owned())
77                .unwrap_or_else(|| "source".to_owned())
78        });
79
80        let ignore_set = build_ignore_set(&config.ignore_patterns)?;
81
82        let archive_path = tokio::task::spawn_blocking(move || {
83            create_archive(&source_dir, &output, &prefix, &ignore_set)
84        })
85        .await
86        .into_diagnostic()??;
87
88        let name = archive_path
89            .file_name()
90            .map(|n| n.to_string_lossy().into_owned())
91            .unwrap_or_default();
92
93        Ok(vec![ArtifactPath {
94            path: archive_path,
95            name,
96            hash: None,
97        }])
98    }
99}
100
101/// Compiles a [`GlobSet`] from the given list of glob patterns.
102fn build_ignore_set(patterns: &[String]) -> Result<GlobSet> {
103    let mut builder = GlobSetBuilder::new();
104    for pattern in patterns {
105        builder.add(Glob::new(pattern).into_diagnostic()?);
106    }
107    builder.build().into_diagnostic()
108}
109
110/// Walks `source_dir` respecting `.gitignore` rules and writes a `.tar.gz`
111/// archive to `output`, prefixing every entry with `prefix`.
112/// `.git` and the output file itself are always excluded. Additionally, entries
113/// whose path contains a component matched by `ignore_set` are skipped.
114fn create_archive(
115    source_dir: &Path,
116    output: &Path,
117    prefix: &str,
118    ignore_set: &GlobSet,
119) -> Result<PathBuf> {
120    let file = File::create(output).into_diagnostic()?;
121    // Canonicalize now that the file exists so we can reliably detect it during
122    // the walk and avoid embedding the archive inside itself.
123    let output_canonical = output.canonicalize().into_diagnostic()?;
124    let encoder = GzEncoder::new(file, Compression::default());
125    let mut archive = tar::Builder::new(encoder);
126
127    for result in WalkBuilder::new(source_dir)
128        .hidden(false) // include dotfiles such as .rustfmt.toml
129        .build()
130    {
131        let entry = result.into_diagnostic()?;
132        let path = entry.path();
133
134        let relative = path.strip_prefix(source_dir).into_diagnostic()?;
135
136        // Always exclude .git, regardless of ignore_patterns.
137        if relative.components().any(|c| c.as_os_str() == ".git") {
138            continue;
139        }
140
141        // Always exclude the output archive itself to prevent a tarbomb.
142        if path == output_canonical {
143            continue;
144        }
145
146        // Skip entries whose path contains a component matched by the ignore set.
147        if relative
148            .components()
149            .any(|c| ignore_set.is_match(Path::new(c.as_os_str())))
150        {
151            continue;
152        }
153
154        if !path.is_file() {
155            continue;
156        }
157
158        let entry_path = Path::new(prefix).join(relative);
159
160        archive
161            .append_path_with_name(path, &entry_path)
162            .into_diagnostic()?;
163    }
164
165    // Finalise the tar stream, then flush and close the gzip layer.
166    archive
167        .into_inner()
168        .into_diagnostic()?
169        .finish()
170        .into_diagnostic()?;
171
172    Ok(output.to_path_buf())
173}