diff --git a/README.md b/README.md index e25fcf9b..0cf4a9c9 100644 --- a/README.md +++ b/README.md @@ -248,6 +248,14 @@ All git tags of the form `vX.Y.Z` are considered a version of the package. [Relevant dependency resolution code](https://github.com/pulp-platform/bender/blob/master/src/resolver.rs) +#### Git LFS Support + +Bender detects if a repository requires Git LFS and if the `git-lfs` tool is installed on your system. + +- If the repository uses LFS (detected via `.gitattributes`) and `git-lfs` is installed, Bender will automatically configure LFS and pull the required files. +- If the repository appears to use LFS but `git-lfs` is **not** installed, Bender will print a warning (`W33`) but proceed with the checkout. In this case, you may end up with pointer files instead of the actual large files, which can cause build failures. +- If the repository does not use LFS, Bender skips LFS operations entirely to save time. + #### Target handling Specified dependencies can be filtered, similar to the sources below. For consistency, this filtering does **NOT** apply during an update, i.e., all dependencies will be accounted for in the Bender.lock file. The target filtering only applies for sources and script outputs. This can be used e.g., to include specific IP only for testing. @@ -397,6 +405,20 @@ overrides: # DEPRECATED: This will be removed at some point. plugins: additional-tools: { path: "/usr/local/additional-tools" } + +# Number of parallel git tasks. Optional. +# Default: 4 +# The number of parallel git operations executed by bender can be adjusted to +# manage performance and load on git servers. Can be overriden as a command +# line argument. +git_throttle: 2 + +# Enable git lfs. Optional. +# Default: true +# Some git dependencies may use git-lfs for additional source files. As +# fetching these files may not always be desired or requried, it can be +# disabled. For multiple conflicting settings will use true. +git_lfs: false ``` [Relevant code](https://github.com/pulp-platform/bender/blob/master/src/config.rs) diff --git a/src/cli.rs b/src/cli.rs index cb45ed07..74fdbdeb 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -483,6 +483,7 @@ fn load_config(from: &Path, warn_config_loaded: bool) -> Result { overrides: None, plugins: None, git_throttle: None, + git_lfs: None, }; out = out.merge(default_cfg); diff --git a/src/config.rs b/src/config.rs index 184dcd59..a1cfe28e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1439,6 +1439,8 @@ pub struct Config { pub plugins: IndexMap, /// The git throttle value to use unless overridden by the user. pub git_throttle: Option, + /// Enable git LFS support, requires git-lfs (default: true) + pub git_lfs: bool, } /// A partial configuration. @@ -1454,6 +1456,8 @@ pub struct PartialConfig { pub plugins: Option>, /// The git throttle value to use unless overridden by the user. pub git_throttle: Option, + /// Enable git LFS support, requires git-lfs (default: true) + pub git_lfs: Option, } impl PartialConfig { @@ -1465,6 +1469,7 @@ impl PartialConfig { overrides: None, plugins: None, git_throttle: None, + git_lfs: None, } } } @@ -1508,6 +1513,11 @@ impl Merge for PartialConfig { (None, None) => None, }, git_throttle: self.git_throttle.or(other.git_throttle), + git_lfs: match (self.git_lfs, other.git_lfs) { + (Some(v), None) | (None, Some(v)) => Some(v), + (Some(v1), Some(v2)) => Some(v1 | v2), + (None, None) => None, + }, } } } @@ -1540,6 +1550,7 @@ impl Validate for PartialConfig { None => IndexMap::new(), }, git_throttle: self.git_throttle, + git_lfs: self.git_lfs.unwrap_or(true), }) } } diff --git a/src/diagnostic.rs b/src/diagnostic.rs index 0aebfc64..c88cfa10 100644 --- a/src/diagnostic.rs +++ b/src/diagnostic.rs @@ -305,9 +305,23 @@ pub enum Warnings { IncludeDirMissing(PathBuf), #[error("Skipping dirty dependency {}", fmt_pkg!(pkg))] - #[diagnostic(help("Use `--no-skip` to still snapshot {}.", fmt_pkg!(pkg)))] + #[diagnostic(code(W25), help("Use `--no-skip` to still snapshot {}.", fmt_pkg!(pkg)))] SkippingDirtyDep { pkg: String }, + #[error("Dependency {} seems to use git-lfs, but git-lfs failed with `{}`.", fmt_pkg!(.0), .1)] + #[diagnostic( + code(W26), + help("You may need to install git-lfs to ensure all files are fetched correctly.") + )] + LfsMissing(String, String), + + #[error("Git LFS is disabled but dependency {} seems to use git-lfs.", fmt_pkg!(.0))] + #[diagnostic( + code(W27), + help("Enable git-lfs support in the configuration to fetch all files correctly.") + )] + LfsDisabled(String), + #[error("File not added, ignoring: {cause}")] #[diagnostic(code(W30))] IgnoredPath { cause: String }, diff --git a/src/git.rs b/src/git.rs index b11e5af0..96182622 100644 --- a/src/git.rs +++ b/src/git.rs @@ -12,6 +12,7 @@ use std::sync::Arc; use futures::TryFutureExt; use tokio::process::Command; use tokio::sync::Semaphore; +use walkdir::WalkDir; use crate::error::*; @@ -157,6 +158,34 @@ impl<'ctx> Git<'ctx> { Ok(()) } + /// Check if the repository uses LFS. + pub async fn uses_lfs(self) -> Result { + let output = self.spawn_with(|c| c.arg("lfs").arg("ls-files")).await?; + Ok(!output.trim().is_empty()) + } + + /// Check if the repository has LFS attributes configured. + pub async fn uses_lfs_attributes(self) -> Result { + // We use tokio::task::spawn_blocking because walkdir is synchronous + // and file I/O should not block the async runtime. + let path = self.path.to_path_buf(); + tokio::task::spawn_blocking(move || { + Ok(WalkDir::new(&path).into_iter().flatten().any(|entry| { + if entry.file_type().is_file() && entry.file_name() == ".gitattributes" { + if let Ok(content) = std::fs::read_to_string(entry.path()) { + content.contains("filter=lfs") + } else { + false + } + } else { + false + } + })) + }) + .await + .map_err(|cause| Error::chain("Failed to join blocking task", cause))? + } + /// Fetch the tags and refs of a remote. pub async fn fetch(self, remote: &str) -> Result<()> { let r1 = String::from(remote); diff --git a/src/sess.rs b/src/sess.rs index 964c2fd9..3d7b3567 100644 --- a/src/sess.rs +++ b/src/sess.rs @@ -970,7 +970,13 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> { if clear == CheckoutState::ToClone { git.clone() .spawn_with(move |c| { - c.arg("clone") + c.arg("-c") + .arg("filter.lfs.smudge=") + .arg("-c") + .arg("filter.lfs.process=") + .arg("-c") + .arg("filter.lfs.required=false") + .arg("clone") .arg(git.path) .arg(path) .arg("--branch") @@ -984,9 +990,44 @@ impl<'io, 'sess: 'io, 'ctx: 'sess> SessionIo<'sess, 'ctx> { .await?; local_git .clone() - .spawn_with(move |c| c.arg("checkout").arg(tag_name_2).arg("--force")) + .spawn_with(move |c| { + c.arg("-c") + .arg("filter.lfs.smudge=") + .arg("-c") + .arg("filter.lfs.process=") + .arg("-c") + .arg("filter.lfs.required=false") + .arg("checkout") + .arg(tag_name_2) + .arg("--force") + }) .await?; } + // Check if the repo uses LFS attributes + if local_git.clone().uses_lfs_attributes().await? { + if self.sess.config.git_lfs { + // Check if the repo actually tracks files with LFS + let uses_lfs = local_git.clone().uses_lfs().await; + match uses_lfs { + Ok(true) => { + local_git + .clone() + .spawn_with(move |c| c.arg("config").arg("lfs.url").arg(url)) + .await?; + local_git + .clone() + .spawn_with(move |c| c.arg("lfs").arg("pull")) + .await?; + } + Ok(false) => {} + Err(cause) => { + Warnings::LfsMissing(name.to_string(), cause.to_string()).emit(); + } + } + } else { + Warnings::LfsDisabled(name.to_string()).emit(); + } + } local_git .clone() .spawn_with(move |c| {