Abbaye

at 6297799

use std::{
    path::{Path, PathBuf},
    process::Stdio,
};

use miette::{IntoDiagnostic, Result, miette};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use tokio::sync::mpsc::UnboundedSender;

use crate::builders::{ArtifactPath, Builder, LogEvent, LogSender};

fn default_parallel() -> bool {
    true
}

/// Configuration for [`CargoBuilder`].
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
pub struct CargoBuilderConfig {
    /// Cargo target triples to build for (e.g. `"x86_64-unknown-linux-musl"`).
    ///
    /// Each entry is passed as `--target <triple>` in a separate `cargo build`
    /// invocation. When the list is empty, cargo builds for the host target.
    #[serde(default)]
    pub targets: Vec<String>,

    /// Optional path to the Cargo.toml manifest.
    ///
    /// Passed verbatim as `--manifest-path`. Defaults to the manifest in the
    /// current working directory when absent.
    pub manifest_path: Option<PathBuf>,

    /// Restrict collected artifacts to these binary (or cdylib) target names.
    ///
    /// When empty every artifact produced by a **workspace member or local
    /// path-dependency** is kept. Use this to avoid picking up extra binaries
    /// from dev-tools or examples that live in the same workspace.
    ///
    /// ```toml
    /// [[builders]]
    /// type = "cargo"
    /// bins = ["my_binary", "my_cdylib"]
    /// ```
    #[serde(default)]
    pub bins: Vec<String>,

    /// Run cross-compilation targets in parallel using isolated temporary
    /// target directories.
    ///
    /// When `true` (the default), each target triple is given its own
    /// `--target-dir` backed by a [`tempfile::TempDir`], so multiple
    /// `cargo build` processes can compile simultaneously without contending
    /// on cargo's file lock (`target/.cargo-lock`).  Compiled artifacts are
    /// copied to the canonical `target/<triple>/release/` paths and the
    /// temporary directories are then removed automatically.
    ///
    /// Set this to `false` when:
    ///
    /// - **Disk space is tight.** Each temporary build tree can occupy several
    ///   gigabytes for dependency-heavy crates. Four targets running in
    ///   parallel means roughly four times the peak disk usage of a single
    ///   build.
    /// - **Incremental compilation matters.** Temporary target directories
    ///   always start cold, discarding Rust's incremental cache. Disabling
    ///   parallelism lets all targets share the persistent `target/` directory
    ///   and reuse previously compiled artefacts on subsequent runs.
    /// - **The build host is resource-constrained.** Parallel `cargo build`
    ///   processes each consume significant CPU and RAM. On CI machines with
    ///   limited memory, running them sequentially avoids thrashing or
    ///   out-of-memory failures.
    /// - **Your cross-compilation toolchain is not concurrency-safe.** Some
    ///   custom linkers or build-script tools assume exclusive access and may
    ///   produce corrupt output when invoked concurrently.
    #[serde(default = "default_parallel")]
    pub parallel: bool,
}

impl Default for CargoBuilderConfig {
    fn default() -> Self {
        Self {
            targets: Vec::new(),
            manifest_path: None,
            bins: Vec::new(),
            parallel: default_parallel(),
        }
    }
}

/// Runs `cargo build --release` and returns the produced artifacts.
pub struct CargoBuilder;

impl Builder for CargoBuilder {
    type ConfigType = CargoBuilderConfig;

    async fn build(
        &self,
        config: Self::ConfigType,
        abbaye_version: &str,
        log: LogSender,
    ) -> Result<Vec<ArtifactPath>> {
        let crate_version = read_crate_version(config.manifest_path.as_deref()).await?;

        if config.targets.is_empty() {
            // Single host target: forward stderr lines as plain LogEvent::Line events.
            let host = get_host_target().await?;
            let line_tx = line_bridge(log, LogEvent::Line);
            run_cargo_build(
                &config,
                None,
                &host,
                &crate_version,
                abbaye_version,
                line_tx,
                None,
            )
            .await
        } else {
            // Multiple targets: each runs in its own task with its own
            // temporary target directory so cargo's file lock does not
            // serialise them.
            let mut join_set = tokio::task::JoinSet::new();

            for target in &config.targets {
                let config = config.clone();
                let crate_version = crate_version.clone();
                let abbaye_version = abbaye_version.to_owned();
                let target = target.clone();
                let log = log.clone();

                join_set.spawn(async move {
                    // Announce this target as a child task.
                    let _ = log.send(LogEvent::ChildStart {
                        id: target.clone(),
                        label: target.clone(),
                    });

                    // Bridge: run_cargo_build emits plain Strings; forward
                    // them as ChildLine events on the parent LogSender.
                    let target_id = target.clone();
                    let line_tx = line_bridge(log.clone(), move |l| LogEvent::ChildLine {
                        id: target_id.clone(),
                        line: l,
                    });

                    let result = if config.parallel {
                        // Give this invocation its own target directory so it
                        // does not contend with sibling builds on cargo's lock.
                        let tmpdir = TempDir::new().into_diagnostic()?;
                        let r = run_cargo_build(
                            &config,
                            Some(target.as_str()),
                            &target,
                            &crate_version,
                            &abbaye_version,
                            line_tx,
                            Some(tmpdir.path()),
                        )
                        .await;
                        // Copy artifacts to stable paths inside target/ before
                        // tmpdir is dropped, then let tmpdir clean up.
                        match r {
                            Ok(artifacts) => relocate_artifacts(artifacts, tmpdir.path()).await,
                            Err(e) => Err(e),
                        }
                    } else {
                        // Sequential mode: share the default target/ directory.
                        // Cargo's file lock ensures the invocations do not
                        // corrupt each other; they simply queue up.
                        run_cargo_build(
                            &config,
                            Some(target.as_str()),
                            &target,
                            &crate_version,
                            &abbaye_version,
                            line_tx,
                            None,
                        )
                        .await
                    };

                    let _ = log.send(LogEvent::ChildFinish {
                        id: target.clone(),
                        success: result.is_ok(),
                        summary: match &result {
                            Ok(artifacts) => format!("{} artifact(s)", artifacts.len()),
                            Err(e) => e.to_string(),
                        },
                    });

                    result
                });
            }

            let mut all_artifacts = Vec::new();
            while let Some(res) = join_set.join_next().await {
                all_artifacts.extend(res.into_diagnostic()??);
            }
            Ok(all_artifacts)
        }
    }
}

/// Creates a plain-string sender whose lines are mapped through `f` and
/// forwarded to `log`.  This lets `run_cargo_build` (which only knows about
/// strings) feed into the structured [`LogSender`] without depending on
/// [`LogEvent`] directly.
fn line_bridge(
    log: LogSender,
    f: impl Fn(String) -> LogEvent + Send + 'static,
) -> UnboundedSender<String> {
    let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<String>();
    tokio::spawn(async move {
        while let Some(line) = rx.recv().await {
            let _ = log.send(f(line));
        }
    });
    tx
}

/// Minimal representation of the JSON messages emitted by
/// `cargo build --message-format=json`.
#[derive(Deserialize)]
struct CargoMessage {
    reason: String,
    /// Identifies the crate that produced this artifact.
    /// Local packages (workspace members and path-deps) always contain
    /// `path+file://`; external registry/git crates do not.
    package_id: Option<String>,
    target: Option<CargoMessageTarget>,
    filenames: Option<Vec<String>>,
}

#[derive(Deserialize)]
struct CargoMessageTarget {
    name: String,
    /// The kind(s) of the target, e.g. `["bin"]`, `["lib"]`, `["custom-build"]`.
    #[serde(default)]
    kind: Vec<String>,
}

/// Spawn `cargo build --release --message-format=json [--target <triple>]
/// [--manifest-path <path>]` and collect every artifact path from the
/// `compiler-artifact` messages.
///
/// Stderr lines are forwarded to `line_tx` as plain strings; the caller is
/// responsible for mapping them to the appropriate [`LogEvent`] variant.
async fn run_cargo_build(
    config: &CargoBuilderConfig,
    target: Option<&str>,
    triple: &str,
    version: &str,
    abbaye_version: &str,
    line_tx: UnboundedSender<String>,
    target_dir: Option<&Path>,
) -> Result<Vec<ArtifactPath>> {
    let mut cmd = Command::new("cargo");
    cmd.args(["build", "--release", "--message-format=json"]);
    cmd.env("ABBAYE_BUILDING_VERSION", abbaye_version);

    if let Some(t) = target {
        cmd.args(["--target", t]);
    }

    if let Some(manifest) = &config.manifest_path {
        cmd.arg("--manifest-path").arg(manifest);
    }

    if let Some(dir) = target_dir {
        cmd.arg("--target-dir").arg(dir);
    }

    let mut child = cmd
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .into_diagnostic()?;

    // Forward stderr lines to the caller's line sender concurrently with
    // JSON stdout parsing.
    let stderr = child.stderr.take().expect("stderr was piped");
    tokio::spawn(async move {
        let mut stderr_lines = BufReader::new(stderr).lines();
        while let Ok(Some(line)) = stderr_lines.next_line().await {
            let _ = line_tx.send(line);
        }
    });

    let stdout = child.stdout.take().expect("stdout was piped");
    let mut lines = BufReader::new(stdout).lines();

    let mut artifacts = Vec::new();

    while let Some(line) = lines.next_line().await.into_diagnostic()? {
        let Ok(msg) = serde_json::from_str::<CargoMessage>(&line) else {
            continue;
        };

        if msg.reason != "compiler-artifact" {
            continue;
        }

        // Skip artifacts from external (registry / git) dependencies.
        // Both the old package_id format ("name ver (path+file://...)") and the
        // newer spec format ("path+file://...#name@ver") contain "path+file://"
        // for every local crate, so a substring check is version-agnostic.
        if !msg
            .package_id
            .as_deref()
            .is_some_and(|id| id.contains("path+file://"))
        {
            continue;
        }

        // Skip build-script artifacts (kind == ["custom-build"]).
        if msg
            .target
            .as_ref()
            .is_some_and(|t| t.kind.iter().any(|k| k == "custom-build"))
        {
            continue;
        }

        // If the caller named specific targets, restrict to those.
        if !config.bins.is_empty() {
            let target_name = msg.target.as_ref().map(|t| t.name.as_str()).unwrap_or("");
            if !config.bins.iter().any(|b| b == target_name) {
                continue;
            }
        }

        for filename in msg.filenames.unwrap_or_default() {
            let path = PathBuf::from(&filename);

            // Skip rlib / rmeta files; we only want executables and cdylibs.
            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
            if matches!(ext, "rlib" | "rmeta" | "d") {
                continue;
            }

            if !path.exists() {
                continue;
            }

            // Name the artifact as `{stem}-{version}-{triple}{ext}` so that
            // binaries for different targets can coexist in the same dist dir.
            let stem = path
                .file_stem()
                .map(|s| s.to_string_lossy().into_owned())
                .unwrap_or_default();
            let dot_ext = path
                .extension()
                .map(|e| format!(".{}", e.to_string_lossy()))
                .unwrap_or_default();
            let name = format!("{stem}-{version}-{triple}{dot_ext}");

            artifacts.push(ArtifactPath {
                path,
                name,
                hash: None,
            });
        }
    }

    let status = child.wait().await.into_diagnostic()?;

    if !status.success() {
        return Err(miette!(
            "cargo build --release failed with exit status: {status}"
        ));
    }

    Ok(artifacts)
}

/// Copy each artifact from its path inside `tmp_root` to the corresponding
/// path under `target/`, creating parent directories as needed, and return
/// updated [`ArtifactPath`]s pointing at the new stable locations.
///
/// When `--target-dir <tmpdir>` is passed to `cargo build`, artifacts land at
/// `<tmpdir>/<triple>/release/<name>`.  Stripping the `tmpdir` prefix and
/// prepending `target/` gives the canonical path `target/<triple>/release/<name>`,
/// which is where a normal `cargo build --target <triple>` would place them.
async fn relocate_artifacts(
    artifacts: Vec<ArtifactPath>,
    tmp_root: &Path,
) -> Result<Vec<ArtifactPath>> {
    let mut relocated = Vec::with_capacity(artifacts.len());
    for artifact in artifacts {
        let relative = artifact.path.strip_prefix(tmp_root).into_diagnostic()?;
        let stable = PathBuf::from("target").join(relative);
        if let Some(parent) = stable.parent() {
            tokio::fs::create_dir_all(parent).await.into_diagnostic()?;
        }
        tokio::fs::copy(&artifact.path, &stable)
            .await
            .into_diagnostic()?;
        relocated.push(ArtifactPath {
            path: stable,
            name: artifact.name,
            hash: artifact.hash,
        });
    }
    Ok(relocated)
}

/// Query `rustc -vV` and return the host target triple
/// (e.g. `"x86_64-unknown-linux-gnu"`).
async fn get_host_target() -> Result<String> {
    let output = Command::new("rustc")
        .args(["-vV"])
        .output()
        .await
        .into_diagnostic()?;

    if !output.status.success() {
        return Err(miette!("rustc -vV failed"));
    }

    let stdout = String::from_utf8(output.stdout).into_diagnostic()?;
    stdout
        .lines()
        .find(|l| l.starts_with("host:"))
        .and_then(|l| l.split_whitespace().nth(1))
        .map(str::to_owned)
        .ok_or_else(|| miette!("could not parse host triple from `rustc -vV` output"))
}

/// Read `[package].version` from the Cargo.toml at `manifest_path`
/// (defaults to `Cargo.toml` in the current directory).
async fn read_crate_version(manifest_path: Option<&Path>) -> Result<String> {
    let path = manifest_path.unwrap_or(Path::new("Cargo.toml"));
    let content = tokio::fs::read_to_string(path).await.into_diagnostic()?;

    #[derive(Deserialize)]
    struct Manifest {
        package: Option<Package>,
    }
    #[derive(Deserialize)]
    struct Package {
        version: Option<String>,
    }

    let manifest: Manifest = toml::from_str(&content).into_diagnostic()?;
    manifest
        .package
        .ok_or_else(|| miette!("{} has no [package] section", path.display()))?
        .version
        .ok_or_else(|| miette!("no version field in [package] in {}", path.display()))
}

/// Configuration for [`CargoDocBuilder`].
#[derive(Debug, Default, Clone, Deserialize, Serialize, JsonSchema)]
pub struct CargoDocBuilderConfig {
    /// Optional path to the Cargo.toml manifest.
    ///
    /// Passed verbatim as `--manifest-path`. Defaults to the manifest in the
    /// current working directory when absent.
    pub manifest_path: Option<PathBuf>,

    /// Skip building documentation for dependencies (`--no-deps`).
    #[serde(default)]
    pub no_deps: bool,
}

/// Runs `cargo doc` and returns the whole doc directory as an artifact.
pub struct CargoDocBuilder;

impl Builder for CargoDocBuilder {
    type ConfigType = CargoDocBuilderConfig;

    async fn build(
        &self,
        config: Self::ConfigType,
        abbaye_version: &str,
        log: LogSender,
    ) -> Result<Vec<ArtifactPath>> {
        let mut cmd = Command::new("cargo");
        cmd.arg("doc");
        cmd.env("ABBAYE_BUILDING_VERSION", abbaye_version);

        if config.no_deps {
            cmd.arg("--no-deps");
        }

        if let Some(manifest) = &config.manifest_path {
            cmd.arg("--manifest-path").arg(manifest);
        }

        let mut child = cmd.stderr(Stdio::piped()).spawn().into_diagnostic()?;

        let stderr = child.stderr.take().expect("stderr was piped");
        tokio::spawn(async move {
            let mut stderr_lines = BufReader::new(stderr).lines();
            while let Ok(Some(line)) = stderr_lines.next_line().await {
                let _ = log.send(LogEvent::Line(line));
            }
        });

        let status = child.wait().await.into_diagnostic()?;

        if !status.success() {
            return Err(miette!("cargo doc failed with exit status: {status}"));
        }

        // Resolve the doc output directory. When a manifest path is given the
        // workspace root is its parent directory; otherwise fall back to CWD.
        let doc_dir = config
            .manifest_path
            .as_deref()
            .and_then(|p| p.parent())
            .unwrap_or_else(|| std::path::Path::new("."))
            .join("target/doc");

        if !doc_dir.exists() {
            return Err(miette!("doc directory not found at {}", doc_dir.display()));
        }

        // Return the entire target/doc tree as a single artifact so that the
        // shared rustdoc assets (CSS, JS, fonts, search indices) that live at
        // the root of target/doc/ are preserved alongside the per-crate HTML.
        Ok(vec![ArtifactPath {
            path: doc_dir,
            name: "doc".to_owned(),
            hash: None,
        }])
    }
}