at bd5ed90
use std::{ fs::File, path::{Path, PathBuf}, }; use flate2::{Compression, write::GzEncoder}; use globset::{Glob, GlobSet, GlobSetBuilder}; use ignore::WalkBuilder; use miette::{IntoDiagnostic, Result}; use serde::{Deserialize, Serialize}; use crate::builders::{ArtifactPath, Builder}; fn default_ignore_patterns() -> Vec<String> { vec![".git".to_owned(), "*.local".to_owned()] } /// Configuration for [`ArchiveBuilder`]. #[derive(Debug, Clone, Deserialize, Serialize)] pub struct ArchiveBuilderConfig { /// Root directory to archive. Defaults to the current working directory. pub source_dir: Option<PathBuf>, /// Output path for the generated `.tar.gz` archive. /// Defaults to `source.tar.gz` in the current working directory. pub output: Option<PathBuf>, /// Prefix applied to every entry path inside the archive. /// For example, `"myproject-1.0.0"` produces entries like /// `myproject-1.0.0/src/main.rs`. /// Defaults to the source directory's name. pub prefix: Option<String>, /// Glob patterns for files and directories to exclude from the archive. /// Each pattern is matched against every component of a path, so a pattern /// like `".git"` excludes the `.git` directory and all its contents, and /// `"*.local"` excludes any entry whose name ends with `.local`. /// Defaults to `[".git", "*.local"]`. #[serde(default = "default_ignore_patterns")] pub ignore_patterns: Vec<String>, } impl Default for ArchiveBuilderConfig { fn default() -> Self { Self { source_dir: None, output: None, prefix: None, ignore_patterns: default_ignore_patterns(), } } } /// Creates a `.tar.gz` archive of the source tree, honouring all `.gitignore` /// rules found in the directory hierarchy. pub struct ArchiveBuilder; impl Builder for ArchiveBuilder { type ConfigType = ArchiveBuilderConfig; async fn build(&self, config: Self::ConfigType) -> Result<Vec<ArtifactPath>> { let source_dir = config .source_dir .unwrap_or_else(|| PathBuf::from(".")) .canonicalize() .into_diagnostic()?; let output = config .output .unwrap_or_else(|| PathBuf::from("../source.tar.gz")); let prefix = config.prefix.unwrap_or_else(|| { source_dir .file_name() .map(|n| n.to_string_lossy().into_owned()) .unwrap_or_else(|| "source".to_owned()) }); let ignore_set = build_ignore_set(&config.ignore_patterns)?; let archive_path = tokio::task::spawn_blocking(move || { create_archive(&source_dir, &output, &prefix, &ignore_set) }) .await .into_diagnostic()??; let name = archive_path .file_name() .map(|n| n.to_string_lossy().into_owned()) .unwrap_or_default(); Ok(vec![ArtifactPath { path: archive_path, name, hash: None, }]) } } /// Compiles a [`GlobSet`] from the given list of glob patterns. fn build_ignore_set(patterns: &[String]) -> Result<GlobSet> { let mut builder = GlobSetBuilder::new(); for pattern in patterns { builder.add(Glob::new(pattern).into_diagnostic()?); } builder.build().into_diagnostic() } /// Walks `source_dir` respecting `.gitignore` rules and writes a `.tar.gz` /// archive to `output`, prefixing every entry with `prefix`. /// `.git` and the output file itself are always excluded. Additionally, entries /// whose path contains a component matched by `ignore_set` are skipped. fn create_archive( source_dir: &Path, output: &Path, prefix: &str, ignore_set: &GlobSet, ) -> Result<PathBuf> { let file = File::create(output).into_diagnostic()?; // Canonicalize now that the file exists so we can reliably detect it during // the walk and avoid embedding the archive inside itself. let output_canonical = output.canonicalize().into_diagnostic()?; let encoder = GzEncoder::new(file, Compression::default()); let mut archive = tar::Builder::new(encoder); for result in WalkBuilder::new(source_dir) .hidden(false) // include dotfiles such as .rustfmt.toml .build() { let entry = result.into_diagnostic()?; let path = entry.path(); let relative = path.strip_prefix(source_dir).into_diagnostic()?; // Always exclude .git, regardless of ignore_patterns. if relative.components().any(|c| c.as_os_str() == ".git") { continue; } // Always exclude the output archive itself to prevent a tarbomb. if path == output_canonical { continue; } // Skip entries whose path contains a component matched by the ignore set. if relative .components() .any(|c| ignore_set.is_match(Path::new(c.as_os_str()))) { continue; } if !path.is_file() { continue; } let entry_path = Path::new(prefix).join(relative); archive .append_path_with_name(path, &entry_path) .into_diagnostic()?; } // Finalise the tar stream, then flush and close the gzip layer. archive .into_inner() .into_diagnostic()? .finish() .into_diagnostic()?; Ok(output.to_path_buf()) }