diff --git a/Cargo.lock b/Cargo.lock index a169be5b3..3decd4bc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2813,6 +2813,7 @@ dependencies = [ "tokio-util", "toml", "tower-service", + "typed-path", "url", "uuid", "version-compare", @@ -3671,6 +3672,12 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" +[[package]] +name = "typed-path" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7922f2cdc51280d47b491af9eafc41eb0cdab85eabcb390c854412fcbf26dbe8" + [[package]] name = "typenum" version = "1.17.0" diff --git a/Cargo.toml b/Cargo.toml index 022721b3c..60b9572d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,6 +103,7 @@ tokio-serde = "0.8" tokio-util = { version = "0.7", features = ["codec", "io"] } toml = "0.8" tower-service = "0.3" +typed-path = "0.12.0" url = { version = "2", optional = true } uuid = { version = "1.9", features = ["v4"] } walkdir = "2" diff --git a/README.md b/README.md index ed0a56b80..fb1a5d5cd 100644 --- a/README.md +++ b/README.md @@ -278,12 +278,49 @@ This is most useful when using sccache for Rust compilation, as rustc supports u --- +Normalizing Paths with `SCCACHE_BASEDIRS` +----------------------------------------- + +By default, sccache requires absolute paths to match for cache hits. To enable cache sharing across different build directories, you can set `SCCACHE_BASEDIRS` to strip a base directory from paths before hashing: + +```bash +export SCCACHE_BASEDIRS=/home/user/project +``` + +You can also specify multiple base directories by separating them by `;` on Windows hosts and by `:` on any other operating system. When multiple directories are provided, the longest matching prefix is used: + +```bash +export SCCACHE_BASEDIRS="/home/user/project:/home/user/workspace" +``` + +Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. + +This is similar to ccache's `CCACHE_BASEDIR` and helps when: +* Building the same project from different directories +* Sharing cache between CI jobs with different checkout paths +* Multiple developers working with different username paths +* Working with multiple project checkouts simultaneously + +**Note:** Only absolute paths are supported. Relative paths will prevent server from start. + +You can also configure this in the sccache config file: + +```toml +# Single directory +basedirs = ["/home/user/project"] + +# Or multiple directories +basedirs = ["/home/user/project", "/home/user/workspace"] +``` + +--- + Known Caveats ------------- ### General -* Absolute paths to files must match to get a cache hit. This means that even if you are using a shared cache, everyone will have to build at the same absolute path (i.e. not in `$HOME`) in order to benefit each other. In Rust this includes the source for third party crates which are stored in `$HOME/.cargo/registry/cache` by default. +* By default, absolute paths to files must match to get a cache hit. To work around this, use `SCCACHE_BASEDIRS` (see above) to normalize paths before hashing. ### Rust diff --git a/docs/Configuration.md b/docs/Configuration.md index c6c43c646..f96885b87 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -6,6 +6,26 @@ # If specified, wait this long for the server to start up. server_startup_timeout_ms = 10000 +# Base directories to strip from source paths during cache key +# computation. +# +# Similar to ccache's CCACHE_BASEDIR, but supports multiple paths. +# +# 'basedirs' enables cache hits across different absolute root +# paths when compiling the same source code, such as between +# parallel checkouts of the same project, Git worktrees, or different +# users in a shared environment. +# When multiple matching paths are provided, the longest prefix +# is used. +# +# Path matching is case-insensitive on Windows and case-sensitive on other OSes. +# +# Example: +# basedir = ["/home/user/project"] results in the path prefix rewrite: +# "/home/user/project/src/main.c" -> "./src/main.c" +basedirs = ["/home/user/project"] +# basedirs = ["/home/user/project", "/home/user/workspace"] + [dist] # where to find the scheduler scheduler_url = "http://1.2.3.4:10600" @@ -134,6 +154,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `;` on Windows hosts and by `:` on any other operating system. When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will cause an error and prevent the server from start. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 4e9d52800..a0d6a3816 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -381,6 +381,10 @@ pub trait Storage: Send + Sync { // Enable by default, only in local mode PreprocessorCacheModeConfig::default() } + /// Return the base directories for path normalization if configured + fn basedirs(&self) -> &[Vec] { + &[] + } /// Return the preprocessor cache entry for a given preprocessor key, /// if it exists. /// Only applicable when using preprocessor cache mode. @@ -453,6 +457,38 @@ impl PreprocessorCacheModeConfig { } } +/// Wrapper for opendal::Operator that adds basedirs support +#[cfg(any( + feature = "azure", + feature = "gcs", + feature = "gha", + feature = "memcached", + feature = "redis", + feature = "s3", + feature = "webdav", + feature = "oss", +))] +pub struct RemoteStorage { + operator: opendal::Operator, + basedirs: Vec>, +} + +#[cfg(any( + feature = "azure", + feature = "gcs", + feature = "gha", + feature = "memcached", + feature = "redis", + feature = "s3", + feature = "webdav", + feature = "oss", +))] +impl RemoteStorage { + pub fn new(operator: opendal::Operator, basedirs: Vec>) -> Self { + Self { operator, basedirs } + } +} + /// Implement storage for operator. #[cfg(any( feature = "azure", @@ -462,11 +498,12 @@ impl PreprocessorCacheModeConfig { feature = "redis", feature = "s3", feature = "webdav", + feature = "oss", ))] #[async_trait] -impl Storage for opendal::Operator { +impl Storage for RemoteStorage { async fn get(&self, key: &str) -> Result { - match self.read(&normalize_key(key)).await { + match self.operator.read(&normalize_key(key)).await { Ok(res) => { let hit = CacheRead::from(io::Cursor::new(res.to_bytes()))?; Ok(Cache::Hit(hit)) @@ -482,7 +519,9 @@ impl Storage for opendal::Operator { async fn put(&self, key: &str, entry: CacheWrite) -> Result { let start = std::time::Instant::now(); - self.write(&normalize_key(key), entry.finish()?).await?; + self.operator + .write(&normalize_key(key), entry.finish()?) + .await?; Ok(start.elapsed()) } @@ -493,7 +532,7 @@ impl Storage for opendal::Operator { let path = ".sccache_check"; // Read is required, return error directly if we can't read . - match self.read(path).await { + match self.operator.read(path).await { Ok(_) => (), // Read not exist file with not found is ok. Err(err) if err.kind() == ErrorKind::NotFound => (), @@ -512,7 +551,7 @@ impl Storage for opendal::Operator { Err(err) => bail!("cache storage failed to read: {:?}", err), } - let can_write = match self.write(path, "Hello, World!").await { + let can_write = match self.operator.write(path, "Hello, World!").await { Ok(_) => true, Err(err) if err.kind() == ErrorKind::AlreadyExists => true, // Tolerate all other write errors because we can do read at least. @@ -534,7 +573,7 @@ impl Storage for opendal::Operator { } fn location(&self) -> String { - let meta = self.info(); + let meta = self.operator.info(); format!( "{}, name: {}, prefix: {}", meta.scheme(), @@ -550,6 +589,10 @@ impl Storage for opendal::Operator { async fn max_size(&self) -> Result> { Ok(None) } + + fn basedirs(&self) -> &[Vec] { + &self.basedirs + } } /// Normalize key `abcdef` into `a/b/c/abcdef` @@ -572,8 +615,9 @@ pub fn storage_from_config( key_prefix, }) => { debug!("Init azure cache with container {container}, key_prefix {key_prefix}"); - let storage = AzureBlobCache::build(connection_string, container, key_prefix) + let operator = AzureBlobCache::build(connection_string, container, key_prefix) .map_err(|err| anyhow!("create azure cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "gcs")] @@ -587,7 +631,7 @@ pub fn storage_from_config( }) => { debug!("Init gcs cache with bucket {bucket}, key_prefix {key_prefix}"); - let storage = GCSCache::build( + let operator = GCSCache::build( bucket, key_prefix, cred_path.as_deref(), @@ -597,14 +641,16 @@ pub fn storage_from_config( ) .map_err(|err| anyhow!("create gcs cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "gha")] CacheType::GHA(config::GHACacheConfig { version, .. }) => { debug!("Init gha cache with version {version}"); - let storage = GHACache::build(version) + let operator = GHACache::build(version) .map_err(|err| anyhow!("create gha cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "memcached")] @@ -617,7 +663,7 @@ pub fn storage_from_config( }) => { debug!("Init memcached cache with url {url}"); - let storage = MemcachedCache::build( + let operator = MemcachedCache::build( url, username.as_deref(), password.as_deref(), @@ -625,6 +671,7 @@ pub fn storage_from_config( *expiration, ) .map_err(|err| anyhow!("create memcached cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "redis")] @@ -672,6 +719,7 @@ pub fn storage_from_config( _ => bail!("Only one of `endpoint`, `cluster_endpoints`, `url` must be set"), } .map_err(|err| anyhow!("create redis cache failed: {err:?}"))?; + let storage = RemoteStorage::new(storage, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "s3")] @@ -682,7 +730,7 @@ pub fn storage_from_config( ); let storage_builder = S3Cache::new(c.bucket.clone(), c.key_prefix.clone(), c.no_credentials); - let storage = storage_builder + let operator = storage_builder .with_region(c.region.clone()) .with_endpoint(c.endpoint.clone()) .with_use_ssl(c.use_ssl) @@ -691,13 +739,14 @@ pub fn storage_from_config( .build() .map_err(|err| anyhow!("create s3 cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "webdav")] CacheType::Webdav(c) => { debug!("Init webdav cache with endpoint {}", c.endpoint); - let storage = WebdavCache::build( + let operator = WebdavCache::build( &c.endpoint, &c.key_prefix, c.username.as_deref(), @@ -706,6 +755,7 @@ pub fn storage_from_config( ) .map_err(|err| anyhow!("create webdav cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "oss")] @@ -715,7 +765,7 @@ pub fn storage_from_config( c.bucket, c.endpoint ); - let storage = OSSCache::build( + let operator = OSSCache::build( &c.bucket, &c.key_prefix, c.endpoint.as_deref(), @@ -723,6 +773,7 @@ pub fn storage_from_config( ) .map_err(|err| anyhow!("create oss cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[allow(unreachable_patterns)] @@ -736,12 +787,14 @@ pub fn storage_from_config( let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode; let rw_mode = config.fallback_cache.rw_mode.into(); debug!("Init disk cache with dir {:?}, size {}", dir, size); + Ok(Arc::new(DiskCache::new( dir, size, pool, preprocessor_cache_mode_config, rw_mode, + config.basedirs.clone(), ))) } @@ -823,4 +876,53 @@ mod test { }); } } + + #[test] + #[cfg(feature = "s3")] + fn test_operator_storage_s3_with_basedirs() { + // Create S3 operator (doesn't need real credentials for this test) + let operator = crate::cache::s3::S3Cache::new( + "test-bucket".to_string(), + "test-prefix".to_string(), + true, // no_credentials = true + ) + .with_region(Some("us-east-1".to_string())) + .build() + .expect("Failed to create S3 cache operator"); + + let basedirs = vec![b"/home/user/project".to_vec(), b"/opt/build".to_vec()]; + + // Wrap with OperatorStorage + let storage = RemoteStorage::new(operator, basedirs.clone()); + + // Verify basedirs are stored and retrieved correctly + assert_eq!(storage.basedirs(), basedirs.as_slice()); + assert_eq!(storage.basedirs().len(), 2); + assert_eq!(storage.basedirs()[0], b"/home/user/project".to_vec()); + assert_eq!(storage.basedirs()[1], b"/opt/build".to_vec()); + } + + #[test] + #[cfg(feature = "redis")] + fn test_operator_storage_redis_with_basedirs() { + // Create Redis operator + let operator = crate::cache::redis::RedisCache::build_single( + "redis://localhost:6379", + None, + None, + 0, + "test-prefix", + 0, + ) + .expect("Failed to create Redis cache operator"); + + let basedirs = vec![b"/workspace".to_vec()]; + + // Wrap with OperatorStorage + let storage = RemoteStorage::new(operator, basedirs.clone()); + + // Verify basedirs work + assert_eq!(storage.basedirs(), basedirs.as_slice()); + assert_eq!(storage.basedirs().len(), 1); + } } diff --git a/src/cache/disk.rs b/src/cache/disk.rs index c4f3491e9..7e75100b1 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -74,6 +74,7 @@ pub struct DiskCache { preprocessor_cache_mode_config: PreprocessorCacheModeConfig, preprocessor_cache: Arc>, rw_mode: CacheMode, + basedirs: Vec>, } impl DiskCache { @@ -84,6 +85,7 @@ impl DiskCache { pool: &tokio::runtime::Handle, preprocessor_cache_mode_config: PreprocessorCacheModeConfig, rw_mode: CacheMode, + basedirs: Vec>, ) -> DiskCache { DiskCache { lru: Arc::new(Mutex::new(LazyDiskCache::Uninit { @@ -99,6 +101,7 @@ impl DiskCache { max_size, })), rw_mode, + basedirs, } } } @@ -181,6 +184,9 @@ impl Storage for DiskCache { fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig { self.preprocessor_cache_mode_config } + fn basedirs(&self) -> &[Vec] { + &self.basedirs + } async fn get_preprocessor_cache_entry(&self, key: &str) -> Result>> { let key = normalize_key(key); Ok(self diff --git a/src/cache/readonly.rs b/src/cache/readonly.rs index 90431c4fb..715e664f6 100644 --- a/src/cache/readonly.rs +++ b/src/cache/readonly.rs @@ -64,6 +64,11 @@ impl Storage for ReadOnlyStorage { self.0.preprocessor_cache_mode_config() } + /// Return the base directories for path normalization if configured + fn basedirs(&self) -> &[Vec] { + self.0.basedirs() + } + /// Return the preprocessor cache entry for a given preprocessor key, /// if it exists. /// Only applicable when using preprocessor cache mode. @@ -121,6 +126,40 @@ mod test { ); } + #[test] + fn readonly_storage_forwards_basedirs() { + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .worker_threads(1) + .build() + .unwrap(); + + let tempdir = tempfile::Builder::new() + .prefix("readonly_storage_forwards_basedirs") + .tempdir() + .expect("Failed to create tempdir"); + let cache_dir = tempdir.path().join("cache"); + std::fs::create_dir(&cache_dir).unwrap(); + + let basedirs = vec![ + b"/home/user/project".to_vec(), + b"/home/user/workspace".to_vec(), + ]; + + let disk_cache = crate::cache::disk::DiskCache::new( + &cache_dir, + 1024 * 1024, + runtime.handle(), + super::PreprocessorCacheModeConfig::default(), + super::CacheMode::ReadWrite, + basedirs.clone(), + ); + + let readonly_storage = ReadOnlyStorage(std::sync::Arc::new(disk_cache)); + + assert_eq!(readonly_storage.basedirs(), basedirs.as_slice()); + } + #[test] fn readonly_storage_put_err() { let runtime = tokio::runtime::Builder::new_current_thread() diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 8db84d265..7fa3d3493 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -26,7 +26,7 @@ use crate::dist::pkg; use crate::mock_command::CommandCreatorSync; use crate::util::{ Digest, HashToDigest, MetadataCtimeExt, TimeMacroFinder, Timestamp, decode_path, encode_path, - hash_all, + hash_all, strip_basedirs, }; use async_trait::async_trait; use fs_err as fs; @@ -444,6 +444,7 @@ where &absolute_input_path, self.compiler.plusplus(), preprocessor_cache_mode_config, + storage.basedirs(), )? } else { None @@ -613,6 +614,7 @@ where &env_vars, &preprocessor_result.stdout, self.compiler.plusplus(), + storage.basedirs(), ) }; @@ -1421,7 +1423,7 @@ impl pkg::ToolchainPackager for CToolchainPackager { } /// The cache is versioned by the inputs to `hash_key`. -pub const CACHE_VERSION: &[u8] = b"11"; +pub const CACHE_VERSION: &[u8] = b"12"; /// Environment variables that are factored into the cache key. static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { @@ -1444,6 +1446,11 @@ static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { }); /// Compute the hash key of `compiler` compiling `preprocessor_output` with `args`. +/// +/// If `basedirs` are provided, paths in the preprocessor output will be normalized by +/// stripping the longest matching basedir prefix. This enables cache hits across different +/// absolute paths (similar to ccache's CCACHE_BASEDIR). +#[allow(clippy::too_many_arguments)] pub fn hash_key( compiler_digest: &str, language: Language, @@ -1452,6 +1459,7 @@ pub fn hash_key( env_vars: &[(OsString, OsString)], preprocessor_output: &[u8], plusplus: bool, + basedirs: &[Vec], ) -> String { // If you change any of the inputs to the hash, you should change `CACHE_VERSION`. let mut m = Digest::new(); @@ -1475,7 +1483,11 @@ pub fn hash_key( val.hash(&mut HashToDigest { digest: &mut m }); } } - m.update(preprocessor_output); + + // Strip basedirs from preprocessor output if configured + let preprocessor_output_to_hash = strip_basedirs(preprocessor_output, basedirs); + + m.update(&preprocessor_output_to_hash); m.finish() } @@ -1490,8 +1502,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_eq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1500,8 +1530,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + true, + &[] + ) ); } @@ -1510,7 +1558,16 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), hash_key( "abcd", Language::CHeader, @@ -1518,7 +1575,8 @@ mod test { &[], &[], PREPROCESSED, - false + false, + &[] ) ); } @@ -1528,7 +1586,16 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true), + hash_key( + "abcd", + Language::Cxx, + &args, + &[], + &[], + PREPROCESSED, + true, + &[] + ), hash_key( "abcd", Language::CxxHeader, @@ -1536,7 +1603,8 @@ mod test { &[], &[], PREPROCESSED, - true + true, + &[] ) ); } @@ -1546,8 +1614,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "wxyz", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1560,18 +1646,54 @@ mod test { let a = ovec!["a"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false) + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + digest, + Language::C, + &xyz, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false) + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, &[]) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false) + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1586,9 +1708,19 @@ mod test { &[], &[], &b"hello world"[..], - false + false, + &[] ), - hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + &b"goodbye"[..], + false, + &[] + ) ); } @@ -1598,11 +1730,38 @@ mod test { let digest = "abcd"; const PREPROCESSED: &[u8] = b"hello world"; for var in CACHED_ENV_VARS.iter() { - let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false); + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[], + ); let vars = vec![(OsString::from(var), OsString::from("something"))]; - let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &vars, + PREPROCESSED, + false, + &[], + ); let vars = vec![(OsString::from(var), OsString::from("something else"))]; - let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false); + let h3 = hash_key( + digest, + Language::C, + &args, + &[], + &vars, + PREPROCESSED, + false, + &[], + ); assert_neq!(h1, h2); assert_neq!(h2, h3); } @@ -1623,10 +1782,167 @@ mod test { &extra_data, &[], PREPROCESSED, - false + false, + &[] ), - hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key( + digest, + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) + ); + } + + #[test] + fn test_hash_key_basedirs() { + let args = ovec!["a", "b", "c"]; + let digest = "abcd"; + + let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; + let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; + + let basedirs = [ + b"/home/user1/project".to_vec(), + b"/home/user2/project".to_vec(), + ]; + + // Test 1: Same hash with different absolute paths when basedir is used + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &basedirs, + ); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + &basedirs, + ); + + assert_eq!(h1, h2); + + // Test 2: Same hash with single basedir that matches each + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &basedirs[..1], + ); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + &basedirs[1..], + ); + + assert_eq!(h1, h2); + + // Test 3: Different hashes without basedir + let h1_no_base = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &[], + ); + let h2_no_base = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + &[], + ); + + assert_neq!(h1_no_base, h2_no_base); + + // Test 3: Works for C++ files too + let preprocessed_cpp1 = + b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; + let preprocessed_cpp2 = + b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; + + let h_cpp1 = hash_key( + digest, + Language::Cxx, + &args, + &[], + &[], + preprocessed_cpp1, + true, + &basedirs, + ); + let h_cpp2 = hash_key( + digest, + Language::Cxx, + &args, + &[], + &[], + preprocessed_cpp2, + true, + &basedirs, + ); + + assert_eq!(h_cpp1, h_cpp2); + + // Test 4: Doesn't work with trailing slash in basedir, they must be normalized in config + let basedir_slash = b"/home/user1/project/".to_vec(); + let h_slash = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + std::slice::from_ref(&basedir_slash), + ); + + assert_neq!(h1, h_slash); + + // Test 5: Multiple basedirs - longest match wins + let basedirs = vec![ + b"/home/user1".to_vec(), + b"/home/user1/project".to_vec(), // This should match (longest) + ]; + let h_multi = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &basedirs, ); + assert_eq!(h1, h_multi); } #[test] diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index bdab84e1e..95bb6dbbf 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -2314,6 +2314,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2444,6 +2445,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2747,6 +2749,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2876,6 +2879,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2974,6 +2978,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + vec![], ); let storage = Arc::new(storage); // Pretend to be GCC. diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index d03cd6e18..18a8127ab 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -34,7 +34,7 @@ use serde::{Deserialize, Serialize}; use crate::{ cache::PreprocessorCacheModeConfig, - util::{Digest, HashToDigest, MetadataCtimeExt, Timestamp, encode_path}, + util::{Digest, HashToDigest, MetadataCtimeExt, Timestamp, encode_path, strip_basedirs}, }; use super::Language; @@ -381,6 +381,7 @@ pub fn preprocessor_cache_entry_hash_key( input_file: &Path, plusplus: bool, config: PreprocessorCacheModeConfig, + basedirs: &[Vec], ) -> anyhow::Result> { // If you change any of the inputs to the hash, you should change `FORMAT_VERSION`. let mut m = Digest::new(); @@ -414,7 +415,10 @@ pub fn preprocessor_cache_entry_hash_key( // share preprocessor cache entries and a/r.h exists. let mut buf = vec![]; encode_path(&mut buf, input_file)?; - m.update(&buf); + + // Strip basedirs from the input file path if configured + let buf_to_hash = strip_basedirs(&buf, basedirs); + m.update(&buf_to_hash); let reader = std::fs::File::open(input_file) .with_context(|| format!("while hashing the input file '{}'", input_file.display()))?; @@ -634,4 +638,138 @@ mod test { assert!(!finder.found_timestamp()); assert!(!finder.found_date()); } + + #[test] + fn test_preprocessor_cache_entry_hash_key_basedirs() { + #[cfg(target_os = "windows")] + use crate::util::normalize_win_path; + use std::fs; + use tempfile::TempDir; + + // Create two different base directories + let dir1 = TempDir::new().unwrap(); + let dir2 = TempDir::new().unwrap(); + let dirs = [&dir1, &dir2] + .iter() + .map(|dir| { + let bytes = dir.path().to_string_lossy().into_owned().into_bytes(); + #[cfg(target_os = "windows")] + return normalize_win_path(&bytes); + #[cfg(not(target_os = "windows"))] + bytes + }) + .collect::>(); + + // Create identical files with the same relative path in each directory + let file1_path = dir1.path().join("test.c"); + let file2_path = dir2.path().join("test.c"); + + let content = b"int main() { return 0; }"; + fs::write(&file1_path, content).unwrap(); + fs::write(&file2_path, content).unwrap(); + + let config = PreprocessorCacheModeConfig::activated(); + + // Test 1: With basedirs, hashes should be the same + let hash1_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &dirs, + ) + .unwrap() + .unwrap(); + + let hash2_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &dirs, + ) + .unwrap() + .unwrap(); + + assert_eq!( + hash1_with_basedirs, hash2_with_basedirs, + "Hashes should be equal when using basedirs with identical files in different directories" + ); + + // Test 2: With basedir1 for first, and basedir2 for second, hashes should be the same + let hash1_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &dirs[..1], + ) + .unwrap() + .unwrap(); + + let hash2_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &dirs[1..], + ) + .unwrap() + .unwrap(); + + assert_eq!( + hash1_with_basedirs, hash2_with_basedirs, + "Hashes should be equal when using basedirs with identical files in different directories" + ); + + // Test 3: Without basedirs, hashes should be different + let hash1_no_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &[], + ) + .unwrap() + .unwrap(); + + let hash2_no_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &[], + ) + .unwrap() + .unwrap(); + + assert_ne!( + hash1_no_basedirs, hash2_no_basedirs, + "Hashes should be different without basedirs for files in different directories" + ); + } } diff --git a/src/config.rs b/src/config.rs index 8edaeb099..d0c11409b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -13,6 +13,8 @@ // limitations under the License. use crate::cache::CacheMode; +#[cfg(target_os = "windows")] +use crate::util::normalize_win_path; use directories::ProjectDirs; use fs::File; use fs_err as fs; @@ -31,6 +33,7 @@ use std::result::Result as StdResult; use std::str::FromStr; use std::sync::{LazyLock, Mutex}; use std::{collections::HashMap, fmt}; +use typed_path::Utf8TypedPathBuf; pub use crate::cache::PreprocessorCacheModeConfig; use crate::errors::*; @@ -584,6 +587,8 @@ pub struct FileConfig { pub cache: CacheConfigs, pub dist: DistConfig, pub server_startup_timeout_ms: Option, + /// Base directories to strip from paths for cache key computation. + pub basedirs: Vec, } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -621,6 +626,7 @@ pub fn try_read_config_file(path: &Path) -> Result>, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -946,7 +952,22 @@ fn config_from_env() -> Result { oss, }; - Ok(EnvConfig { cache }) + // ======= Base directory ======= + // Support multiple paths separated by ';' on Windows and ':' on other platforms + // to match PATH behavior. + #[cfg(target_os = "windows")] + let split_symbol = ';'; + #[cfg(not(target_os = "windows"))] + let split_symbol = ':'; + let basedirs = env::var_os("SCCACHE_BASEDIRS").map(|s| { + s.to_string_lossy() + .split(split_symbol) + .filter(|s| !s.is_empty()) + .map(|s| s.to_owned()) + .collect() + }); + + Ok(EnvConfig { cache, basedirs }) } // The directories crate changed the location of `config_dir` on macos in version 3, @@ -978,6 +999,9 @@ pub struct Config { pub fallback_cache: DiskCacheConfig, pub dist: DistConfig, pub server_startup_timeout: Option, + /// Base directory (or directories) to strip from paths for cache key computation. + /// Similar to ccache's CCACHE_BASEDIR. + pub basedirs: Vec>, } impl Config { @@ -989,32 +1013,84 @@ impl Config { .context("Failed to load config file")? .unwrap_or_default(); - Ok(Self::from_env_and_file_configs(env_conf, file_conf)) + Self::from_env_and_file_configs(env_conf, file_conf) } - fn from_env_and_file_configs(env_conf: EnvConfig, file_conf: FileConfig) -> Self { + fn from_env_and_file_configs(env_conf: EnvConfig, file_conf: FileConfig) -> Result { let mut conf_caches: CacheConfigs = Default::default(); let FileConfig { cache, dist, server_startup_timeout_ms, + basedirs: file_basedirs, } = file_conf; conf_caches.merge(cache); let server_startup_timeout = server_startup_timeout_ms.map(std::time::Duration::from_millis); - let EnvConfig { cache } = env_conf; + let EnvConfig { + cache, + basedirs: env_basedirs, + } = env_conf; conf_caches.merge(cache); + // Environment variable takes precedence over file config if it is set + let basedirs_raw = if let Some(basedirs) = env_basedirs { + basedirs + } else { + file_basedirs + }; + + // Validate that all basedirs are absolute paths + // basedirs_raw is Vec + let mut basedirs = Vec::with_capacity(basedirs_raw.len()); + for d in basedirs_raw { + let p = Utf8TypedPathBuf::from(d); + if !p.is_absolute() { + bail!("Basedir path must be absolute: {:?}", p); + } + // Normalize basedir: + // remove double separators, cur_dirs, parent_dirs, trailing slashes + let p_norm = p.normalize(); + let mut bytes = p_norm.to_string().into_bytes(); + + // Always add a trailing `/` to basedirs to ensure we only match complete path + // components + bytes.push(b'/'); + + // normalize windows paths + let normalized = { + #[cfg(target_os = "windows")] + { + normalize_win_path(&bytes) + } + + #[cfg(not(target_os = "windows"))] + { + bytes + } + }; + basedirs.push(normalized); + } + + if !basedirs.is_empty() { + let basedirs_str: Vec = basedirs + .iter() + .map(|b| String::from_utf8_lossy(b).into_owned()) + .collect(); + debug!("Using basedirs for path normalization: {:?}", basedirs_str); + } + let (caches, fallback_cache) = conf_caches.into_fallback(); - Self { + Ok(Self { cache: caches, fallback_cache, dist, server_startup_timeout, - } + basedirs, + }) } } @@ -1287,6 +1363,7 @@ fn config_overrides() { }), ..Default::default() }, + basedirs: None, }; let file_conf = FileConfig { @@ -1313,10 +1390,11 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout_ms: None, + basedirs: vec![], }; assert_eq!( - Config::from_env_and_file_configs(env_conf, file_conf), + Config::from_env_and_file_configs(env_conf, file_conf).unwrap(), Config { cache: Some(CacheType::Redis(RedisCacheConfig { endpoint: Some("myotherredisurl".to_owned()), @@ -1326,7 +1404,7 @@ fn config_overrides() { username: Some("user".to_owned()), password: Some("secret".to_owned()), ..Default::default() - }),), + })), fallback_cache: DiskCacheConfig { dir: "/env-cache".into(), size: 5, @@ -1335,10 +1413,412 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout: None, + basedirs: vec![], } ); } +#[test] +#[cfg(target_os = "windows")] +fn config_basedirs_overrides() { + // Test that env variable takes precedence over file config + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec!["C:/env/basedir".to_string()].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["C:/file/basedir".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs, vec![b"c:/env/basedir/".to_vec()]); + + // Test that file config is used when env is None + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["C:/file/basedir".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs, vec![b"c:/file/basedir/".to_vec()]); + + // Test that env config is used when env is set but empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["C:/file/basedir".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); + + // Test that both empty results in empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn config_basedirs_overrides() { + // Test that env variable takes precedence over file config + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec!["/env/basedir".to_string()].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/file/basedir".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs, vec![b"/env/basedir/".to_vec()]); + + // Test that file config is used when env is None + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/file/basedir".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs, vec![b"/file/basedir/".to_vec()]); + + // Test that env config is used when env is set but empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/file/basedir".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); + + // Test that both empty results in empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_deserialize_basedirs() { + // Test array of paths + let toml = r#" + basedirs = ["/home/user/project", "/home/user/workspace"] + + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!( + config.basedirs, + vec![ + "/home/user/project".to_string(), + "/home/user/workspace".to_string() + ] + ); +} + +#[test] +fn test_deserialize_basedirs_missing() { + // Test no basedirs specified (should default to empty vec) + let toml = r#" + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert!(config.basedirs.is_empty()); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(not(target_os = "windows"))] +fn test_env_basedirs_single() { + unsafe { + std::env::set_var("SCCACHE_BASEDIRS", "/home/user/project"); + } + let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + + assert_eq!( + config.basedirs.expect("SCCACHE_BASEDIRS is set"), + vec!["/home/user/project".to_string()] + ); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(target_os = "windows")] +fn test_env_basedirs_single() { + unsafe { + std::env::set_var("SCCACHE_BASEDIRS", "C:/home/user/project"); + } + let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + + assert_eq!( + config.basedirs.expect("SCCACHE_BASEDIRS is set"), + vec!["C:/home/user/project".to_string()] + ); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(not(target_os = "windows"))] +fn test_env_basedirs_multiple() { + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + "/home/user/project:/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + + assert_eq!( + config.basedirs.expect("SCCACHE_BASEDIRS is set"), + vec![ + "/home/user/project".to_string(), + "/home/user/workspace".to_string() + ] + ); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(target_os = "windows")] +fn test_env_basedirs_multiple() { + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + "C:/home/user/project;C:/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + + assert_eq!( + config.basedirs.expect("SCCACHE_BASEDIRS is set"), + vec![ + "C:/home/user/project".to_string(), + "C:/home/user/workspace".to_string() + ] + ); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(not(target_os = "windows"))] +fn test_env_basedirs_with_spaces() { + // Test that spaces around paths are not trimmed + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + " /home/user/project : /home/user/workspace ", + ); + } + let env_conf = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + + assert_eq!( + env_conf.basedirs.clone().expect("SCCACHE_BASEDIRS is set"), + vec![ + " /home/user/project ".to_string(), + " /home/user/workspace ".to_string() + ] + ); + // The lead to trailing spaces are preserved and server fails to start + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + Config::from_env_and_file_configs(env_conf, file_conf) + .expect_err("Should fail due to non-absolute path"); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(target_os = "windows")] +fn test_env_basedirs_with_spaces() { + // Test that spaces around paths are not trimmed + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + " C:/home/user/project ; C:/home/user/workspace ", + ); + } + let env_conf = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + + assert_eq!( + env_conf.basedirs.clone().expect("SCCACHE_BASEDIRS is set"), + vec![ + " C:/home/user/project ".to_string(), + " C:/home/user/workspace ".to_string() + ] + ); + // The lead to trailing spaces are preserved and server fails to start + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + Config::from_env_and_file_configs(env_conf, file_conf) + .expect_err("Should fail due to non-absolute path"); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(not(target_os = "windows"))] +fn test_env_basedirs_empty_entries() { + // Test that empty entries are filtered out + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + "/home/user/project::/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + + assert_eq!( + config.basedirs.expect("SCCACHE_BASEDIRS is set"), + vec![ + "/home/user/project".to_string(), + "/home/user/workspace".to_string() + ] + ); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(target_os = "windows")] +fn test_env_basedirs_empty_entries() { + // Test that empty entries are filtered out + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + "c:/home/user/project;;c:/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + + assert_eq!( + config.basedirs.expect("SCCACHE_BASEDIRS is set"), + vec![ + "c:/home/user/project".to_string(), + "c:/home/user/workspace".to_string() + ] + ); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +fn test_env_basedirs_not_set() { + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + let config = config_from_env().unwrap(); + assert!(config.basedirs.is_none()); +} + #[test] #[serial] #[cfg(feature = "s3")] @@ -1644,6 +2124,7 @@ no_credentials = true rewrite_includes_only: false, }, server_startup_timeout_ms: Some(10000), + basedirs: vec![], } ) } @@ -1736,6 +2217,289 @@ size = "7g" ..Default::default() }, server_startup_timeout_ms: None, + basedirs: vec![], } ); } + +// Integration tests: Config normalization + strip_basedirs usage + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_config_normalizes_and_strips() { + // Test that Config normalizes basedirs and strip_basedirs uses them correctly + use crate::util::strip_basedirs; + use std::borrow::Cow; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home/user/project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Verify config normalized the basedir with trailing slash + assert_eq!(config.basedirs, vec![b"/home/user/project/"]); + + // Test that strip_basedirs uses the normalized basedir + let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; + let output = strip_basedirs(input, &config.basedirs); + + // Should strip the basedir + let expected = b"# 1 \"src/main.c\"\nint main() { return 0; }"; + assert_eq!(&*output, expected); + assert!(matches!(output, Cow::Owned(_))); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_normalized_path_with_double_slashes() { + // Test that Config normalizes paths with double slashes + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home//user///project/".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Config should normalize to single slashes with one trailing slash + assert_eq!(config.basedirs, vec![b"/home/user/project/"]); + + // Verify it works with strip_basedirs + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + assert_eq!(&*output, b"# 1 \"src/main.c\""); +} + +#[test] +#[cfg(target_os = "windows")] +fn test_integration_windows_path_normalization() { + // Test that Config normalizes Windows paths correctly + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["C:\\Users\\Test\\Project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Should be normalized to lowercase with forward slashes + assert_eq!(config.basedirs, vec![b"c:/users/test/project/"]); + + // Test with mixed case preprocessor output + let input = b"# 1 \"C:\\Users\\Test\\Project\\src\\main.c\""; + let output = strip_basedirs(input, &config.basedirs); + assert_eq!(&*output, b"# 1 \"src\\main.c\""); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_cow_borrowed_when_no_match() { + // Test that strip_basedirs returns Cow::Borrowed when no stripping occurs + use crate::util::strip_basedirs; + use std::borrow::Cow; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home/user/project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Input doesn't contain the basedir + let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }"; + let output = strip_basedirs(input, &config.basedirs); + + // Should return borrowed reference (no allocation) + assert!(matches!(output, Cow::Borrowed(_))); + assert_eq!(&*output, input); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_cow_borrowed_when_empty_basedirs() { + // Test that strip_basedirs returns Cow::Borrowed when basedirs is empty + use crate::util::strip_basedirs; + use std::borrow::Cow; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); + + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + + // Should return borrowed reference when basedirs is empty + assert!(matches!(output, Cow::Borrowed(_))); + assert_eq!(&*output, input); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_multiple_basedirs_longest_match() { + // Test that strip_basedirs prefers longest match with normalized basedirs + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home/user".to_string(), "/home/user/project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Both should be normalized with trailing slashes + assert_eq!(config.basedirs.len(), 2); + assert_eq!(config.basedirs[0], b"/home/user/"); + assert_eq!(config.basedirs[1], b"/home/user/project/"); + + // Input matches both, but longest should win + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + + // Should match the longest basedir (/home/user/project/) + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_paths_with_dots_normalized() { + // Test that paths with . and .. are normalized correctly + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home/user/./project/../project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Should be normalized to remove ./ and ../ + assert_eq!(config.basedirs[0], b"/home/user/project/"); + + // Verify it works with strip_basedirs + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected); +} + +#[test] +#[cfg(target_os = "windows")] +fn test_integration_windows_mixed_slashes() { + // Test Windows path with mixed slashes in preprocessor output + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["C:\\Users\\test\\project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs[0], b"c:/users/test/project/"); + + // Preprocessor output with mixed slashes + let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + + // Should strip despite mixed slashes + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(not(target_os = "windows"))] +fn test_integration_env_variable_to_strip() { + // Test full flow: SCCACHE_BASEDIRS env var -> Config -> strip_basedirs + use crate::util::strip_basedirs; + + unsafe { + env::set_var("SCCACHE_BASEDIRS", "/home/user/project:/tmp/build"); + } + + let env_conf = config_from_env().unwrap(); + let file_conf = FileConfig::default(); + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + unsafe { + env::remove_var("SCCACHE_BASEDIRS"); + } + + // Should have two normalized basedirs + assert_eq!(config.basedirs.len(), 2); + assert_eq!(config.basedirs[0], b"/home/user/project/"); + assert_eq!(config.basedirs[1], b"/tmp/build/"); + + // Test stripping with both + let input1 = b"# 1 \"/home/user/project/src/main.c\""; + let output1 = strip_basedirs(input1, &config.basedirs); + assert_eq!(&*output1, b"# 1 \"src/main.c\""); + + let input2 = b"# 1 \"/tmp/build/obj/file.o\""; + let output2 = strip_basedirs(input2, &config.basedirs); + assert_eq!(&*output2, b"# 1 \"obj/file.o\""); +} diff --git a/src/server.rs b/src/server.rs index e44c00bed..2613d0903 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1636,6 +1636,7 @@ pub struct ServerInfo { pub max_cache_size: Option, pub use_preprocessor_cache_mode: bool, pub version: String, + pub basedirs: Vec, } /// Status of the dist client. @@ -1932,6 +1933,7 @@ impl ServerInfo { let use_preprocessor_cache_mode; let cache_size; let max_cache_size; + let basedirs; if let Some(storage) = storage { cache_location = storage.location(); use_preprocessor_cache_mode = storage @@ -1939,11 +1941,17 @@ impl ServerInfo { .use_preprocessor_cache_mode; (cache_size, max_cache_size) = futures::try_join!(storage.current_size(), storage.max_size())?; + basedirs = storage + .basedirs() + .iter() + .map(|p| String::from_utf8_lossy(p).to_string()) + .collect(); } else { cache_location = String::new(); use_preprocessor_cache_mode = false; cache_size = None; max_cache_size = None; + basedirs = Vec::new(); } let version = env!("CARGO_PKG_VERSION").to_string(); Ok(ServerInfo { @@ -1953,6 +1961,7 @@ impl ServerInfo { max_cache_size, use_preprocessor_cache_mode, version, + basedirs, }) } @@ -1965,6 +1974,16 @@ impl ServerInfo { self.cache_location, name_width = name_width ); + println!( + "{: usize { std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get) } +/// Strip base directories from absolute paths in preprocessor output. +/// +/// This function searches for basedir paths in the preprocessor output and +/// replaces them with relative path markers. When multiple basedirs are provided, +/// the longest matching prefix is used. This is similar to ccache's CCACHE_BASEDIR. +/// +/// Path matching is case-insensitive to handle various filesystem behaviors and build system +/// configurations uniformly across all operating systems. On Windows, this function also handles +/// paths with mixed forward and backward slashes, which can occur when different build tools +/// produce preprocessor output. +/// +/// Only paths that start with one of the basedirs are modified. The paths are expected to be +/// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). +pub fn strip_basedirs<'a>(preprocessor_output: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { + if basedirs.is_empty() || preprocessor_output.is_empty() { + return Cow::Borrowed(preprocessor_output); + } + + trace!( + "Stripping basedirs from preprocessor output with length {}", + preprocessor_output.len(), + ); + + // Find all potential matches for each basedir using fast substring search + // Store as (position, length, basedir_idx) sorted by position + let mut matches: Vec<(usize, usize, usize)> = Vec::new(); + // We must return the original preprocessor output on all platforms, + // so we only normalize a copy for searching. + #[cfg(not(target_os = "windows"))] + let normalized_output = preprocessor_output; + #[cfg(target_os = "windows")] + let normalized_output = &normalize_win_path(preprocessor_output); + + for (idx, basedir_bytes) in basedirs.iter().enumerate() { + let basedir = basedir_bytes.as_slice(); + // Use memchr's fast substring search + let finder = memchr::memmem::find_iter(normalized_output, &basedir); + + for pos in finder { + // Check if this is a valid boundary (start, whitespace, quote, or '<') + let is_boundary = pos == 0 + || normalized_output[pos - 1].is_ascii_whitespace() + || normalized_output[pos - 1] == b'"' + || normalized_output[pos - 1] == b'<'; + + if is_boundary { + matches.push((pos, basedir.len(), idx)); + } + } + } + + if matches.is_empty() { + return Cow::Borrowed(preprocessor_output); + } + + // Sort matches by position, then by length descending (longest first for overlaps) + matches.sort_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1))); + + // Remove overlapping matches, keeping the longest match at each position + let mut filtered_matches: Vec<(usize, usize)> = Vec::with_capacity(matches.len()); + let mut last_end = 0; + + for (pos, len, idx) in matches { + if pos >= last_end { + filtered_matches.push((pos, len)); + last_end = pos + len; + trace!( + "Matched basedir {} at position {} with length {}", + String::from_utf8_lossy(&basedirs[idx]), + pos, + len + ); + } + } + + // Build the result in a single pass + let mut result = Vec::with_capacity(preprocessor_output.len()); + let mut current_pos = 0; + + for (match_pos, match_len) in filtered_matches { + // Copy everything before the match + result.extend_from_slice(&preprocessor_output[current_pos..match_pos]); + // Replace the basedir is removed completely, including trailing slash (it is expected, see + // Config::basedir) + current_pos = match_pos + match_len; + } + + // Copy remaining data + result.extend_from_slice(&preprocessor_output[current_pos..]); + + Cow::Owned(result) +} + +/// Normalize path for case-insensitive comparison. +/// On Windows: converts all backslashes to forward slashes; +/// lowercases characters for consistency. +/// This function is used for: +/// - basedir_path: already normalized by std::path::absolute +/// - preprocessor_output: plain text that may contain invalid UTF-8 +/// Leave it for any platform for testing purposes. +pub fn normalize_win_path(path: &[u8]) -> Vec { + let mut result = Vec::with_capacity(path.len()); + let mut i = 0; + + while i < path.len() { + let b = path[i]; + + // Fast path: ASCII characters (most common case) + if b < 128 { + result.push(match b { + b'A'..=b'Z' => b + (b'a' - b'A'), + b'\\' => b'/', + _ => b, + }); + i += 1; + continue; + } + + // Non-ASCII: try to decode UTF-8 sequence + // Determine expected length from the first byte + let char_len = match b { + 0b1100_0000..=0b1101_1111 => 2, // 110xxxxx + 0b1110_0000..=0b1110_1111 => 3, // 1110xxxx + 0b1111_0000..=0b1111_0111 => 4, // 11110xxx + _ => { + // Invalid UTF-8 start byte, copy as-is + result.push(b); + i += 1; + continue; + } + }; + + // Check if we have enough bytes + if i + char_len > path.len() { + // Incomplete sequence, copy as-is + result.push(b); + i += 1; + continue; + } + + // Validate and decode the UTF-8 sequence + match std::str::from_utf8(&path[i..i + char_len]) { + Ok(s) => { + // Valid UTF-8, lowercase it + result.extend_from_slice(s.to_lowercase().as_bytes()); + i += char_len; + } + Err(_) => { + // Invalid sequence, copy first byte as-is + result.push(b); + i += 1; + } + } + } + + result +} + #[cfg(test)] mod tests { use super::{OsStrExt, TimeMacroFinder}; @@ -1167,4 +1326,212 @@ mod tests { let empty_result = super::ascii_unescape_default(&[]).unwrap(); assert!(empty_result.is_empty(), "{:?}", empty_result); } + + #[test] + fn test_strip_basedir_simple() { + // Simple cases + let basedir = b"/home/user/project/".to_vec(); + let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"src/main.c\"\nint main() { return 0; }"; + assert_eq!(&*output, expected); + + // Multiple occurrences + let input = + b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"src/main.c\"\n# 2 \"include/header.h\""; + assert_eq!(&*output, expected); + + // No occurrences + let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }"; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(&*output, input); + } + + #[test] + fn test_strip_basedir_empty() { + // Empty basedir slice + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, &[]); + assert_eq!(&*output, input); + + // Empty input + let basedir = b"/home/user/project/".to_vec(); + let input = b""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(&*output, input); + } + + #[test] + fn test_strip_basedir_not_at_boundary() { + // basedir should only match at word boundaries + let basedir = b"/home/user/".to_vec(); + let input = b"text/home/user/file.c and \"/home/user/other.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + // Should only replace the second occurrence (after quote) + let expected = b"text/home/user/file.c and \"other.c\""; + assert_eq!(&*output, expected); + } + + #[test] + fn test_strip_basedir_trailing_slashes() { + // Without trailing slash + let basedir = b"/home/user/project".to_vec(); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"/src/main.c\""; // Wrong, but expected + assert_eq!(&*output, expected); + + // Trailing slashes aren't ignored, they must be cleaned in config reader + let basedir = b"/home/user/project/".to_vec(); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected); + } + + #[test] + fn test_strip_basedirs_multiple() { + // Multiple basedirs - should match longest first + let basedirs = vec![ + b"/home/user1/project/".to_vec(), + b"/home/user2/workspace/".to_vec(), + ]; + let input = + b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; + let output = super::strip_basedirs(input, &basedirs); + let expected = b"# 1 \"src/main.c\"\n# 2 \"lib/util.c\""; + assert_eq!(&*output, expected); + + // Longest prefix wins + let basedirs = vec![b"/home/user/".to_vec(), b"/home/user/project/".to_vec()]; + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, &basedirs); + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected); + } + + #[cfg(target_os = "windows")] + #[test] + fn test_strip_basedir_windows_backslashes() { + // Without trailing backslash + let basedir = b"c:/users/test/project".to_vec(); + let input = b"# 1 \"C:\\Users\\test\\project\\Src\\Main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + // normalized backslash to slash + let expected = b"# 1 \"\\Src\\Main.c\""; // Wrong, but expected + assert_eq!(&*output, expected); + + // Trailing slashes aren't ignored, they must be cleaned in config reader + let basedir = b"c:/users/test/project/".to_vec(); + let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"src\\main.c\""; + assert_eq!(&*output, expected); + } + + #[cfg(target_os = "windows")] + #[test] + fn test_strip_basedir_windows_mixed_slashes() { + // The slashes may be mixed in preprocessor output, but the uncut output + // should remain untouched. + // Mixed forward and backslashes in input (common from certain build systems) + let basedir = b"c:/users/test/project/".to_vec(); + let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected, "Failed to strip mixed slash path"); + + // Also test the reverse case, it doesn't work, because basedir normalization must be done + // in advance + let input = b"# 1 \"C:\\Users/test/project/src\\main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"src\\main.c\""; + assert_eq!( + &*output, expected, + "Failed to strip reverse mixed slash path" + ); + } + + #[test] + fn test_normalize_win_path_ascii() { + // Test basic ASCII normalization + let input = b"C:\\Users\\Test\\Project"; + let normalized = super::normalize_win_path(input); + assert_eq!(normalized, b"c:/users/test/project"); + + // Test mixed case + let input = b"C:\\USERS\\test\\PROJECT"; + let normalized = super::normalize_win_path(input); + assert_eq!(normalized, b"c:/users/test/project"); + } + + #[test] + fn test_normalize_win_path_utf8() { + // Test with UTF-8 characters (e.g., German umlauts) + let input = "C:\\Users\\Müller\\Projekt".as_bytes(); + let normalized = super::normalize_win_path(input); + let expected = "c:/users/müller/projekt".as_bytes(); + assert_eq!(normalized, expected); + + // Test with Cyrillic characters + let input = "C:\\Пользователь\\Проект".as_bytes(); + let normalized = super::normalize_win_path(input); + let expected = "c:/пользователь/проект".as_bytes(); + assert_eq!(normalized, expected); + + // Test with Turkish İ (special case) + let input = "C:\\İstanbul\\DİREKTÖRY".as_bytes(); + let normalized = super::normalize_win_path(input); + // Turkish İ lowercases to i with dot + let expected = "c:/i\u{307}stanbul/di\u{307}rektöry".as_bytes(); + assert_eq!(normalized, expected); + } + + #[test] + fn test_normalize_win_path_mixed_ascii_utf8() { + // Test mixed ASCII and UTF-8 + let input = "C:\\Users\\Test\\Café\\Проект".as_bytes(); + let normalized = super::normalize_win_path(input); + let expected = "c:/users/test/café/проект".as_bytes(); + assert_eq!(normalized, expected); + } + + #[test] + fn test_normalize_win_path_invalid_utf8() { + // Test with invalid UTF-8 sequence (should preserve as-is) + let mut input = b"C:\\Users\\".to_vec(); + input.push(0xFF); // Invalid UTF-8 + input.extend_from_slice(b"\\Test"); + + let normalized = super::normalize_win_path(&input); + + // Should lowercase ASCII and convert backslashes, but preserve invalid byte + let mut expected = b"c:/users/".to_vec(); + expected.push(0xFF); + expected.extend_from_slice(b"/test"); + assert_eq!(normalized, expected); + } + + #[test] + fn test_normalize_win_path_incomplete_utf8() { + // Test with incomplete UTF-8 sequence at the end + let mut input = b"C:\\Users\\Test".to_vec(); + input.push(0xC3); // Start of 2-byte UTF-8 but incomplete + + let normalized = super::normalize_win_path(&input); + + // Should preserve incomplete byte as-is + let mut expected = b"c:/users/test".to_vec(); + expected.push(0xC3); + assert_eq!(normalized, expected); + } + + #[test] + fn test_normalize_win_path_empty() { + let input = b""; + let normalized = super::normalize_win_path(input); + assert_eq!(normalized, b""); + } } diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 219c08c29..9bb49cc7a 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -190,6 +190,7 @@ pub fn sccache_client_cfg( rewrite_includes_only: false, // TODO }, server_startup_timeout_ms: None, + basedirs: vec![], } } diff --git a/tests/oauth.rs b/tests/oauth.rs index 066bcc2bd..bfbf0f2f5 100644 --- a/tests/oauth.rs +++ b/tests/oauth.rs @@ -60,6 +60,7 @@ fn config_with_dist_auth( rewrite_includes_only: true, }, server_startup_timeout_ms: None, + basedirs: vec![], } } @@ -225,6 +226,7 @@ fn test_auth_with_config(dist_auth: sccache::config::DistAuth) { .tempdir() .unwrap(); let sccache_config = config_with_dist_auth(conf_dir.path(), dist_auth); + assert!(sccache_config.basedirs.is_empty()); let sccache_config_path = conf_dir.path().join("sccache-config.json"); fs::File::create(&sccache_config_path) .unwrap()