Skip to main content

abbaye/builders/
archive.rs

1use std::{
2    fs::File,
3    path::{Path, PathBuf},
4};
5
6use flate2::{Compression, write::GzEncoder};
7use globset::{Glob, GlobSet, GlobSetBuilder};
8use ignore::WalkBuilder;
9use miette::{IntoDiagnostic, Result};
10use serde::{Deserialize, Serialize};
11
12use crate::builders::{ArtifactPath, Builder};
13
14fn default_ignore_patterns() -> Vec<String> {
15    vec![".git".to_owned(), "*.local".to_owned()]
16}
17
18/// Configuration for [`ArchiveBuilder`].
19#[derive(Debug, Clone, Deserialize, Serialize)]
20pub struct ArchiveBuilderConfig {
21    /// Root directory to archive. Defaults to the current working directory.
22    pub source_dir: Option<PathBuf>,
23
24    /// Output path for the generated `.tar.gz` archive.
25    /// Defaults to `source.tar.gz` in the current working directory.
26    pub output: Option<PathBuf>,
27
28    /// Prefix applied to every entry path inside the archive.
29    /// For example, `"myproject-1.0.0"` produces entries like
30    /// `myproject-1.0.0/src/main.rs`.
31    /// Defaults to the source directory's name.
32    pub prefix: Option<String>,
33
34    /// Glob patterns for files and directories to exclude from the archive.
35    /// Each pattern is matched against every component of a path, so a pattern
36    /// like `".git"` excludes the `.git` directory and all its contents, and
37    /// `"*.local"` excludes any entry whose name ends with `.local`.
38    /// Defaults to `[".git", "*.local"]`.
39    #[serde(default = "default_ignore_patterns")]
40    pub ignore_patterns: Vec<String>,
41}
42
43impl Default for ArchiveBuilderConfig {
44    fn default() -> Self {
45        Self {
46            source_dir: None,
47            output: None,
48            prefix: None,
49            ignore_patterns: default_ignore_patterns(),
50        }
51    }
52}
53
54/// Creates a `.tar.gz` archive of the source tree, honouring all `.gitignore`
55/// rules found in the directory hierarchy.
56pub struct ArchiveBuilder;
57
58impl Builder for ArchiveBuilder {
59    type ConfigType = ArchiveBuilderConfig;
60
61    async fn build(&self, config: Self::ConfigType) -> Result<Vec<ArtifactPath>> {
62        let source_dir = config
63            .source_dir
64            .unwrap_or_else(|| PathBuf::from("."))
65            .canonicalize()
66            .into_diagnostic()?;
67
68        let output = config
69            .output
70            .unwrap_or_else(|| PathBuf::from("../source.tar.gz"));
71
72        let prefix = config.prefix.unwrap_or_else(|| {
73            source_dir
74                .file_name()
75                .map(|n| n.to_string_lossy().into_owned())
76                .unwrap_or_else(|| "source".to_owned())
77        });
78
79        let ignore_set = build_ignore_set(&config.ignore_patterns)?;
80
81        let archive_path = tokio::task::spawn_blocking(move || {
82            create_archive(&source_dir, &output, &prefix, &ignore_set)
83        })
84        .await
85        .into_diagnostic()??;
86
87        let name = archive_path
88            .file_name()
89            .map(|n| n.to_string_lossy().into_owned())
90            .unwrap_or_default();
91
92        Ok(vec![ArtifactPath {
93            path: archive_path,
94            name,
95            hash: None,
96        }])
97    }
98}
99
100/// Compiles a [`GlobSet`] from the given list of glob patterns.
101fn build_ignore_set(patterns: &[String]) -> Result<GlobSet> {
102    let mut builder = GlobSetBuilder::new();
103    for pattern in patterns {
104        builder.add(Glob::new(pattern).into_diagnostic()?);
105    }
106    builder.build().into_diagnostic()
107}
108
109/// Walks `source_dir` respecting `.gitignore` rules and writes a `.tar.gz`
110/// archive to `output`, prefixing every entry with `prefix`.
111/// `.git` and the output file itself are always excluded. Additionally, entries
112/// whose path contains a component matched by `ignore_set` are skipped.
113fn create_archive(
114    source_dir: &Path,
115    output: &Path,
116    prefix: &str,
117    ignore_set: &GlobSet,
118) -> Result<PathBuf> {
119    let file = File::create(output).into_diagnostic()?;
120    // Canonicalize now that the file exists so we can reliably detect it during
121    // the walk and avoid embedding the archive inside itself.
122    let output_canonical = output.canonicalize().into_diagnostic()?;
123    let encoder = GzEncoder::new(file, Compression::default());
124    let mut archive = tar::Builder::new(encoder);
125
126    for result in WalkBuilder::new(source_dir)
127        .hidden(false) // include dotfiles such as .rustfmt.toml
128        .build()
129    {
130        let entry = result.into_diagnostic()?;
131        let path = entry.path();
132
133        let relative = path.strip_prefix(source_dir).into_diagnostic()?;
134
135        // Always exclude .git, regardless of ignore_patterns.
136        if relative.components().any(|c| c.as_os_str() == ".git") {
137            continue;
138        }
139
140        // Always exclude the output archive itself to prevent a tarbomb.
141        if path == output_canonical {
142            continue;
143        }
144
145        // Skip entries whose path contains a component matched by the ignore set.
146        if relative
147            .components()
148            .any(|c| ignore_set.is_match(Path::new(c.as_os_str())))
149        {
150            continue;
151        }
152
153        if !path.is_file() {
154            continue;
155        }
156
157        let entry_path = Path::new(prefix).join(relative);
158
159        archive
160            .append_path_with_name(path, &entry_path)
161            .into_diagnostic()?;
162    }
163
164    // Finalise the tar stream, then flush and close the gzip layer.
165    archive
166        .into_inner()
167        .into_diagnostic()?
168        .finish()
169        .into_diagnostic()?;
170
171    Ok(output.to_path_buf())
172}