From b1184cc4b007b2873e235a81c9a522c1004f9e30 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 18 Dec 2025 22:05:02 +0100 Subject: [PATCH 01/31] Add SCCACHE_BASEDIR support --- README.md | 24 ++++++- docs/Configuration.md | 9 +++ src/cache/cache.rs | 5 ++ src/cache/disk.rs | 6 ++ src/compiler/c.rs | 106 +++++++++++++++++++++------- src/compiler/compiler.rs | 5 ++ src/config.rs | 81 ++++++++++++++++++++- src/test/tests.rs | 1 + src/util.rs | 147 +++++++++++++++++++++++++++++++++++++++ tests/harness/mod.rs | 1 + tests/oauth.rs | 1 + 11 files changed, 357 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 2941411a8..085442b9b 100644 --- a/README.md +++ b/README.md @@ -320,12 +320,34 @@ This is most useful when using sccache for Rust compilation, as rustc supports u --- +Normalizing Paths with `SCCACHE_BASEDIR` +----------------------------------------- + +By default, sccache requires absolute paths to match for cache hits. To enable cache sharing across different build directories, you can set `SCCACHE_BASEDIR` to strip a base directory from paths before hashing: + +```bash +export SCCACHE_BASEDIR=/home/user/project +``` + +This is similar to ccache's `CCACHE_BASEDIR` and helps when: +* Building the same project from different directories +* Sharing cache between CI jobs with different checkout paths +* Multiple developers working with different username paths + +You can also configure this in the sccache config file: + +```toml +basedir = "/home/user/project" +``` + +--- + Known Caveats ------------- ### General -* Absolute paths to files must match to get a cache hit. This means that even if you are using a shared cache, everyone will have to build at the same absolute path (i.e. not in `$HOME`) in order to benefit each other. In Rust this includes the source for third party crates which are stored in `$HOME/.cargo/registry/cache` by default. +* By default, absolute paths to files must match to get a cache hit. To work around this, use `SCCACHE_BASEDIR` (see above) to normalize paths before hashing. ### Rust diff --git a/docs/Configuration.md b/docs/Configuration.md index c6c43c646..32e1ec8cc 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -6,6 +6,14 @@ # If specified, wait this long for the server to start up. server_startup_timeout_ms = 10000 +# Base directory to strip from paths for cache key computation. +# Similar to ccache's CCACHE_BASEDIR. This enables cache hits across +# different absolute paths when compiling the same source code. +# For example, if basedir is "/home/user/project", then paths like +# "/home/user/project/src/main.c" will be normalized to "./src/main.c" +# for caching purposes. +basedir = "/home/user/project" + [dist] # where to find the scheduler scheduler_url = "http://1.2.3.4:10600" @@ -134,6 +142,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path +* `SCCACHE_BASEDIR` base directory to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Environment variable takes precedence over file configuration. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 4e9d52800..86886a043 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -381,6 +381,10 @@ pub trait Storage: Send + Sync { // Enable by default, only in local mode PreprocessorCacheModeConfig::default() } + /// Return the base directory for path normalization if configured + fn basedir(&self) -> Option<&Path> { + None + } /// Return the preprocessor cache entry for a given preprocessor key, /// if it exists. /// Only applicable when using preprocessor cache mode. @@ -742,6 +746,7 @@ pub fn storage_from_config( pool, preprocessor_cache_mode_config, rw_mode, + config.basedir.clone(), ))) } diff --git a/src/cache/disk.rs b/src/cache/disk.rs index c4f3491e9..2a5b17ef5 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -74,6 +74,7 @@ pub struct DiskCache { preprocessor_cache_mode_config: PreprocessorCacheModeConfig, preprocessor_cache: Arc>, rw_mode: CacheMode, + basedir: Option, } impl DiskCache { @@ -84,6 +85,7 @@ impl DiskCache { pool: &tokio::runtime::Handle, preprocessor_cache_mode_config: PreprocessorCacheModeConfig, rw_mode: CacheMode, + basedir: Option, ) -> DiskCache { DiskCache { lru: Arc::new(Mutex::new(LazyDiskCache::Uninit { @@ -99,6 +101,7 @@ impl DiskCache { max_size, })), rw_mode, + basedir, } } } @@ -181,6 +184,9 @@ impl Storage for DiskCache { fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig { self.preprocessor_cache_mode_config } + fn basedir(&self) -> Option<&Path> { + self.basedir.as_deref() + } async fn get_preprocessor_cache_entry(&self, key: &str) -> Result>> { let key = normalize_key(key); Ok(self diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 2d0d605a4..c5d063550 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -632,6 +632,7 @@ where &env_vars, &preprocessor_output, self.compiler.plusplus(), + storage.basedir(), ) }; @@ -1440,7 +1441,7 @@ impl pkg::ToolchainPackager for CToolchainPackager { } /// The cache is versioned by the inputs to `hash_key`. -pub const CACHE_VERSION: &[u8] = b"11"; +pub const CACHE_VERSION: &[u8] = b"12"; /// Environment variables that are factored into the cache key. static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { @@ -1471,6 +1472,7 @@ pub fn hash_key( env_vars: &[(OsString, OsString)], preprocessor_output: &[u8], plusplus: bool, + basedir: Option<&Path>, ) -> String { // If you change any of the inputs to the hash, you should change `CACHE_VERSION`. let mut m = Digest::new(); @@ -1494,7 +1496,16 @@ pub fn hash_key( val.hash(&mut HashToDigest { digest: &mut m }); } } - m.update(preprocessor_output); + + // Strip basedir from preprocessor output if configured + let preprocessor_output_to_hash = if let Some(base) = basedir { + use crate::util::strip_basedir; + Cow::Owned(strip_basedir(preprocessor_output, base)) + } else { + Cow::Borrowed(preprocessor_output) + }; + + m.update(&preprocessor_output_to_hash); m.finish() } @@ -1509,8 +1520,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_eq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None) ); } @@ -1519,8 +1530,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true, None) ); } @@ -1529,7 +1540,7 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), hash_key( "abcd", Language::CHeader, @@ -1537,7 +1548,8 @@ mod test { &[], &[], PREPROCESSED, - false + false, + None ) ); } @@ -1547,7 +1559,7 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true), + hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true, None), hash_key( "abcd", Language::CxxHeader, @@ -1555,7 +1567,8 @@ mod test { &[], &[], PREPROCESSED, - true + true, + None ) ); } @@ -1565,8 +1578,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false), - hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), + hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false, None) ); } @@ -1579,18 +1592,18 @@ mod test { let a = ovec!["a"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), + hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false, None) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), + hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, None) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false), - hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), + hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, None) ); } @@ -1605,9 +1618,10 @@ mod test { &[], &[], &b"hello world"[..], - false + false, + None ), - hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false) + hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false, None) ); } @@ -1617,11 +1631,11 @@ mod test { let digest = "abcd"; const PREPROCESSED: &[u8] = b"hello world"; for var in CACHED_ENV_VARS.iter() { - let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false); + let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, None); let vars = vec![(OsString::from(var), OsString::from("something"))]; - let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false); + let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, None); let vars = vec![(OsString::from(var), OsString::from("something else"))]; - let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false); + let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, None); assert_neq!(h1, h2); assert_neq!(h2, h3); } @@ -1642,12 +1656,54 @@ mod test { &extra_data, &[], PREPROCESSED, - false + false, + None ), - hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false) + hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, None) ); } + #[test] + fn test_hash_key_basedir() { + use std::path::Path; + + let args = ovec!["a", "b", "c"]; + let digest = "abcd"; + + // Test 1: Same hash with different absolute paths when basedir is used + let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; + let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; + + let basedir1 = Path::new("/home/user1/project"); + let basedir2 = Path::new("/home/user2/project"); + + let h1 = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, Some(basedir1)); + let h2 = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, Some(basedir2)); + + assert_eq!(h1, h2); + + // Test 2: Different hashes without basedir + let h1_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, None); + let h2_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, None); + + assert_neq!(h1_no_base, h2_no_base); + + // Test 3: Works for C++ files too + let preprocessed_cpp1 = b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; + let preprocessed_cpp2 = b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; + + let h_cpp1 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp1, true, Some(basedir1)); + let h_cpp2 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp2, true, Some(basedir2)); + + assert_eq!(h_cpp1, h_cpp2); + + // Test 4: Works with trailing slashes + let basedir_slash = Path::new("/home/user1/project/"); + let h_slash = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, Some(basedir_slash)); + + assert_eq!(h1, h_slash); + } + #[test] fn test_language_from_file_name() { fn t(extension: &str, expected: Language) { diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 8d194d6d9..bbc724ebe 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -2446,6 +2446,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2576,6 +2577,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2879,6 +2881,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -3008,6 +3011,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -3106,6 +3110,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, + None, ); let storage = Arc::new(storage); // Pretend to be GCC. diff --git a/src/config.rs b/src/config.rs index 8edaeb099..018bccf8c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -19,8 +19,8 @@ use fs_err as fs; #[cfg(any(feature = "dist-client", feature = "dist-server"))] use serde::ser::Serializer; use serde::{ - Deserialize, Serialize, de::{self, DeserializeOwned, Deserializer}, + Deserialize, Serialize, }; #[cfg(test)] use serial_test::serial; @@ -584,6 +584,8 @@ pub struct FileConfig { pub cache: CacheConfigs, pub dist: DistConfig, pub server_startup_timeout_ms: Option, + /// Base directory to strip from paths for cache key computation. + pub basedir: Option, } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -621,6 +623,7 @@ pub fn try_read_config_file(path: &Path) -> Result, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -946,7 +949,10 @@ fn config_from_env() -> Result { oss, }; - Ok(EnvConfig { cache }) + // ======= Base directory ======= + let basedir = env::var_os("SCCACHE_BASEDIR").map(PathBuf::from); + + Ok(EnvConfig { cache, basedir }) } // The directories crate changed the location of `config_dir` on macos in version 3, @@ -978,6 +984,9 @@ pub struct Config { pub fallback_cache: DiskCacheConfig, pub dist: DistConfig, pub server_startup_timeout: Option, + /// Base directory to strip from paths for cache key computation. + /// Similar to ccache's CCACHE_BASEDIR. + pub basedir: Option, } impl Config { @@ -999,21 +1008,29 @@ impl Config { cache, dist, server_startup_timeout_ms, + basedir: file_basedir, } = file_conf; conf_caches.merge(cache); let server_startup_timeout = server_startup_timeout_ms.map(std::time::Duration::from_millis); - let EnvConfig { cache } = env_conf; + let EnvConfig { + cache, + basedir: env_basedir, + } = env_conf; conf_caches.merge(cache); + // Environment variable takes precedence over file config + let basedir = env_basedir.or(file_basedir); + let (caches, fallback_cache) = conf_caches.into_fallback(); Self { cache: caches, fallback_cache, dist, server_startup_timeout, + basedir, } } } @@ -1287,6 +1304,7 @@ fn config_overrides() { }), ..Default::default() }, + basedir: None, }; let file_conf = FileConfig { @@ -1313,6 +1331,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout_ms: None, + basedir: None, }; assert_eq!( @@ -1335,10 +1354,64 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout: None, + basedir: None, } ); } +#[test] +fn config_basedir_overrides() { + use std::path::PathBuf; + + // Test that env variable takes precedence over file config + let env_conf = EnvConfig { + cache: Default::default(), + basedir: Some(PathBuf::from("/env/basedir")), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedir: Some(PathBuf::from("/file/basedir")), + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedir, Some(PathBuf::from("/env/basedir"))); + + // Test that file config is used when env is None + let env_conf = EnvConfig { + cache: Default::default(), + basedir: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedir: Some(PathBuf::from("/file/basedir")), + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedir, Some(PathBuf::from("/file/basedir"))); + + // Test that both None results in None + let env_conf = EnvConfig { + cache: Default::default(), + basedir: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedir: None, + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf); + assert_eq!(config.basedir, None); +} + #[test] #[serial] #[cfg(feature = "s3")] @@ -1644,6 +1717,7 @@ no_credentials = true rewrite_includes_only: false, }, server_startup_timeout_ms: Some(10000), + basedir: None, } ) } @@ -1736,6 +1810,7 @@ size = "7g" ..Default::default() }, server_startup_timeout_ms: None, + basedir: None, } ); } diff --git a/src/test/tests.rs b/src/test/tests.rs index ee41f6c2e..7ed2c7e8f 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -85,6 +85,7 @@ where runtime.handle(), PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, + None, )); let client = Client::new(); diff --git a/src/util.rs b/src/util.rs index 18329b42b..be268bb1f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1015,6 +1015,54 @@ pub fn num_cpus() -> usize { std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get) } +/// Strip the base directory from the preprocessor output to enable cache hits +/// across different absolute paths. +/// +/// This function searches for the basedir path in the preprocessor output and +/// replaces it with a relative path marker. +/// +/// The function handles both Unix-style (`/`) and Windows-style (`\`) path separators, +/// and normalizes trailing slashes. +pub fn strip_basedir(preprocessor_output: &[u8], basedir: &Path) -> Vec { + // Normalize the basedir by removing trailing slashes + let basedir_normalized = basedir.to_string_lossy(); + let basedir_str = basedir_normalized.trim_end_matches('/').trim_end_matches('\\'); + let basedir_bytes = basedir_str.as_bytes(); + + // If basedir is empty or preprocessor output is empty, return as-is + if basedir_bytes.is_empty() || preprocessor_output.is_empty() { + return preprocessor_output.to_vec(); + } + + let mut result = Vec::with_capacity(preprocessor_output.len()); + let mut i = 0; + + while i < preprocessor_output.len() { + // Check if we have a match for basedir at current position + if i + basedir_bytes.len() <= preprocessor_output.len() + && &preprocessor_output[i..i + basedir_bytes.len()] == basedir_bytes + { + // Check if this is actually a path boundary (preceded by whitespace, quote, or start) + let is_boundary = i == 0 + || preprocessor_output[i - 1].is_ascii_whitespace() + || preprocessor_output[i - 1] == b'"' + || preprocessor_output[i - 1] == b'<'; + + if is_boundary { + // Replace basedir with "." + result.push(b'.'); + i += basedir_bytes.len(); + continue; + } + } + + result.push(preprocessor_output[i]); + i += 1; + } + + result +} + #[cfg(test)] mod tests { use super::{OsStrExt, TimeMacroFinder}; @@ -1167,4 +1215,103 @@ mod tests { let empty_result = super::ascii_unescape_default(&[]).unwrap(); assert!(empty_result.is_empty(), "{:?}", empty_result); } + + #[test] + fn test_strip_basedir_simple() { + use std::path::Path; + + // Simple cases + let basedir = Path::new("/home/user/project"); + let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\"\nint main() { return 0; }"; + assert_eq!(output, expected); + + // Multiple occurrences + let input = b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\"\n# 2 \"./include/header.h\""; + assert_eq!(output, expected); + + // No occurrences + let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }"; + let output = super::strip_basedir(input, basedir); + assert_eq!(output, input); + } + + #[test] + fn test_strip_basedir_empty() { + use std::path::Path; + + // Empty basedir + let basedir = Path::new(""); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedir(input, basedir); + assert_eq!(output, input); + + // Empty input + let basedir = Path::new("/home/user/project"); + let input = b""; + let output = super::strip_basedir(input, basedir); + assert_eq!(output, input); + } + + #[test] + fn test_strip_basedir_not_at_boundary() { + use std::path::Path; + + // basedir should only match at word boundaries + let basedir = Path::new("/home/user"); + let input = b"text/home/user/file.c and \"/home/user/other.c\""; + let output = super::strip_basedir(input, basedir); + // Should only replace the second occurrence (after quote) + let expected = b"text/home/user/file.c and \"./other.c\""; + assert_eq!(output, expected); + } + + #[test] + fn test_strip_basedir_trailing_slashes() { + use std::path::Path; + + // Without trailing slash + let basedir = Path::new("/home/user/project"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // With single trailing slash + let basedir = Path::new("/home/user/project/"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // With multiple trailing slashes + let basedir = Path::new("/home/user/project////"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + } + + #[cfg(target_os = "windows")] + #[test] + fn test_strip_basedir_windows_backslashes() { + use std::path::Path; + + // Without trailing backslash + let basedir = Path::new("C:\\Users\\test\\project"); + let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \".\\src\\main.c\""; + assert_eq!(output, expected); + + // With multiple trailing backslashes + let basedir = Path::new("C:\\Users\\test\\project\\\\\\"); + let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; + let output = super::strip_basedir(input, basedir); + let expected = b"# 1 \".\\src\\main.c\""; + assert_eq!(output, expected); + } } diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 219c08c29..73b10fa99 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -190,6 +190,7 @@ pub fn sccache_client_cfg( rewrite_includes_only: false, // TODO }, server_startup_timeout_ms: None, + basedir: None, } } diff --git a/tests/oauth.rs b/tests/oauth.rs index 066bcc2bd..7d3eb9cd9 100644 --- a/tests/oauth.rs +++ b/tests/oauth.rs @@ -60,6 +60,7 @@ fn config_with_dist_auth( rewrite_includes_only: true, }, server_startup_timeout_ms: None, + basedir: None, } } From bd86d1d77152f30f0fc16f18a75bb22abff4902e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 18 Dec 2025 23:46:40 +0100 Subject: [PATCH 02/31] Fix clippy, introduce multiple basedirs logic --- README.md | 13 +++ docs/Configuration.md | 8 +- src/cache/cache.rs | 25 +++- src/cache/disk.rs | 10 +- src/compiler/c.rs | 93 ++++++++------- src/compiler/compiler.rs | 10 +- src/config.rs | 238 +++++++++++++++++++++++++++++++++++---- src/test/tests.rs | 4 +- src/util.rs | 158 ++++++++++++++++---------- tests/harness/mod.rs | 80 ++++++------- tests/oauth.rs | 2 +- 11 files changed, 455 insertions(+), 186 deletions(-) diff --git a/README.md b/README.md index 085442b9b..47f56c8ef 100644 --- a/README.md +++ b/README.md @@ -329,15 +329,28 @@ By default, sccache requires absolute paths to match for cache hits. To enable c export SCCACHE_BASEDIR=/home/user/project ``` +You can also specify multiple base directories by separating them with `|` (pipe character). When multiple directories are provided, the longest matching prefix is used: + +```bash +export SCCACHE_BASEDIR="/home/user/project|/home/user/workspace" +``` + This is similar to ccache's `CCACHE_BASEDIR` and helps when: * Building the same project from different directories * Sharing cache between CI jobs with different checkout paths * Multiple developers working with different username paths +* Working with multiple project checkouts simultaneously + +**Note:** Only absolute paths are supported. Relative paths will be ignored with a warning. You can also configure this in the sccache config file: ```toml +# Single directory basedir = "/home/user/project" + +# Or multiple directories +basedir = ["/home/user/project", "/home/user/workspace"] ``` --- diff --git a/docs/Configuration.md b/docs/Configuration.md index 32e1ec8cc..5371c3582 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -6,13 +6,17 @@ # If specified, wait this long for the server to start up. server_startup_timeout_ms = 10000 -# Base directory to strip from paths for cache key computation. +# Base directory (or directories) to strip from paths for cache key computation. # Similar to ccache's CCACHE_BASEDIR. This enables cache hits across # different absolute paths when compiling the same source code. +# Can be a single path or an array of paths. When multiple paths are provided, +# the longest matching prefix is used. # For example, if basedir is "/home/user/project", then paths like # "/home/user/project/src/main.c" will be normalized to "./src/main.c" # for caching purposes. basedir = "/home/user/project" +# Or multiple directories: +# basedir = ["/home/user/project", "/home/user/workspace"] [dist] # where to find the scheduler @@ -142,7 +146,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIR` base directory to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Environment variable takes precedence over file configuration. +* `SCCACHE_BASEDIR` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 86886a043..cfe8e7c20 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -381,9 +381,9 @@ pub trait Storage: Send + Sync { // Enable by default, only in local mode PreprocessorCacheModeConfig::default() } - /// Return the base directory for path normalization if configured - fn basedir(&self) -> Option<&Path> { - None + /// Return the base directories for path normalization if configured + fn basedirs(&self) -> &[PathBuf] { + &[] } /// Return the preprocessor cache entry for a given preprocessor key, /// if it exists. @@ -740,13 +740,30 @@ pub fn storage_from_config( let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode; let rw_mode = config.fallback_cache.rw_mode.into(); debug!("Init disk cache with dir {:?}, size {}", dir, size); + + // Validate that all basedirs are absolute paths + let basedirs: Vec = config.basedir.iter() + .filter_map(|p| { + if p.is_absolute() { + Some(p.clone()) + } else { + warn!("Ignoring relative basedir path: {:?}. Only absolute paths are supported.", p); + None + } + }) + .collect(); + + if !basedirs.is_empty() { + debug!("Using basedirs for path normalization: {:?}", basedirs); + } + Ok(Arc::new(DiskCache::new( dir, size, pool, preprocessor_cache_mode_config, rw_mode, - config.basedir.clone(), + basedirs, ))) } diff --git a/src/cache/disk.rs b/src/cache/disk.rs index 2a5b17ef5..515c3f2ca 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -74,7 +74,7 @@ pub struct DiskCache { preprocessor_cache_mode_config: PreprocessorCacheModeConfig, preprocessor_cache: Arc>, rw_mode: CacheMode, - basedir: Option, + basedirs: Vec, } impl DiskCache { @@ -85,7 +85,7 @@ impl DiskCache { pool: &tokio::runtime::Handle, preprocessor_cache_mode_config: PreprocessorCacheModeConfig, rw_mode: CacheMode, - basedir: Option, + basedirs: Vec, ) -> DiskCache { DiskCache { lru: Arc::new(Mutex::new(LazyDiskCache::Uninit { @@ -101,7 +101,7 @@ impl DiskCache { max_size, })), rw_mode, - basedir, + basedirs, } } } @@ -184,8 +184,8 @@ impl Storage for DiskCache { fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig { self.preprocessor_cache_mode_config } - fn basedir(&self) -> Option<&Path> { - self.basedir.as_deref() + fn basedirs(&self) -> &[PathBuf] { + &self.basedirs } async fn get_preprocessor_cache_entry(&self, key: &str) -> Result>> { let key = normalize_key(key); diff --git a/src/compiler/c.rs b/src/compiler/c.rs index c5d063550..23b6e2c39 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -632,7 +632,7 @@ where &env_vars, &preprocessor_output, self.compiler.plusplus(), - storage.basedir(), + storage.basedirs(), ) }; @@ -1464,6 +1464,11 @@ static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { }); /// Compute the hash key of `compiler` compiling `preprocessor_output` with `args`. +/// +/// If `basedirs` are provided, paths in the preprocessor output will be normalized by +/// stripping the longest matching basedir prefix. This enables cache hits across different +/// absolute paths (similar to ccache's CCACHE_BASEDIR). +#[allow(clippy::too_many_arguments)] pub fn hash_key( compiler_digest: &str, language: Language, @@ -1472,7 +1477,7 @@ pub fn hash_key( env_vars: &[(OsString, OsString)], preprocessor_output: &[u8], plusplus: bool, - basedir: Option<&Path>, + basedirs: &[PathBuf], ) -> String { // If you change any of the inputs to the hash, you should change `CACHE_VERSION`. let mut m = Digest::new(); @@ -1497,10 +1502,10 @@ pub fn hash_key( } } - // Strip basedir from preprocessor output if configured - let preprocessor_output_to_hash = if let Some(base) = basedir { - use crate::util::strip_basedir; - Cow::Owned(strip_basedir(preprocessor_output, base)) + // Strip basedirs from preprocessor output if configured + let preprocessor_output_to_hash = if !basedirs.is_empty() { + use crate::util::strip_basedirs; + Cow::Owned(strip_basedirs(preprocessor_output, basedirs)) } else { Cow::Borrowed(preprocessor_output) }; @@ -1520,8 +1525,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_eq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1530,8 +1535,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true, None) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true, &[]) ); } @@ -1540,7 +1545,7 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), hash_key( "abcd", Language::CHeader, @@ -1549,7 +1554,7 @@ mod test { &[], PREPROCESSED, false, - None + &[] ) ); } @@ -1559,7 +1564,7 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true, None), + hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true, &[]), hash_key( "abcd", Language::CxxHeader, @@ -1568,7 +1573,7 @@ mod test { &[], PREPROCESSED, true, - None + &[] ) ); } @@ -1578,8 +1583,8 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, None), - hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false, None) + hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), + hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1592,18 +1597,18 @@ mod test { let a = ovec!["a"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), - hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false, None) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false, &[]) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), - hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, None) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, &[]) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, None), - hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, None) + hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1619,9 +1624,9 @@ mod test { &[], &b"hello world"[..], false, - None + &[] ), - hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false, None) + hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false, &[]) ); } @@ -1631,11 +1636,11 @@ mod test { let digest = "abcd"; const PREPROCESSED: &[u8] = b"hello world"; for var in CACHED_ENV_VARS.iter() { - let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, None); + let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, &[]); let vars = vec![(OsString::from(var), OsString::from("something"))]; - let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, None); + let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, &[]); let vars = vec![(OsString::from(var), OsString::from("something else"))]; - let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, None); + let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, &[]); assert_neq!(h1, h2); assert_neq!(h2, h3); } @@ -1657,15 +1662,15 @@ mod test { &[], PREPROCESSED, false, - None + &[] ), - hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, None) + hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, &[]) ); } #[test] fn test_hash_key_basedir() { - use std::path::Path; + use std::path::PathBuf; let args = ovec!["a", "b", "c"]; let digest = "abcd"; @@ -1674,17 +1679,17 @@ mod test { let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; - let basedir1 = Path::new("/home/user1/project"); - let basedir2 = Path::new("/home/user2/project"); + let basedir1 = PathBuf::from("/home/user1/project"); + let basedir2 = PathBuf::from("/home/user2/project"); - let h1 = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, Some(basedir1)); - let h2 = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, Some(basedir2)); + let h1 = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, std::slice::from_ref(&basedir1)); + let h2 = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, std::slice::from_ref(&basedir2)); assert_eq!(h1, h2); // Test 2: Different hashes without basedir - let h1_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, None); - let h2_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, None); + let h1_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, &[]); + let h2_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, &[]); assert_neq!(h1_no_base, h2_no_base); @@ -1692,16 +1697,24 @@ mod test { let preprocessed_cpp1 = b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; let preprocessed_cpp2 = b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; - let h_cpp1 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp1, true, Some(basedir1)); - let h_cpp2 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp2, true, Some(basedir2)); + let h_cpp1 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp1, true, std::slice::from_ref(&basedir1)); + let h_cpp2 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp2, true, std::slice::from_ref(&basedir2)); assert_eq!(h_cpp1, h_cpp2); // Test 4: Works with trailing slashes - let basedir_slash = Path::new("/home/user1/project/"); - let h_slash = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, Some(basedir_slash)); + let basedir_slash = PathBuf::from("/home/user1/project/"); + let h_slash = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, std::slice::from_ref(&basedir_slash)); assert_eq!(h1, h_slash); + + // Test 5: Multiple basedirs - longest match wins + let basedirs = vec![ + PathBuf::from("/home/user1"), + PathBuf::from("/home/user1/project"), // This should match (longest) + ]; + let h_multi = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, &basedirs); + assert_eq!(h1, h_multi); } #[test] diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index bbc724ebe..cbb7a955b 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -2446,7 +2446,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2577,7 +2577,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2881,7 +2881,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -3011,7 +3011,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -3110,7 +3110,7 @@ LLVM version: 6.0", ..Default::default() }, CacheMode::ReadWrite, - None, + vec![], ); let storage = Arc::new(storage); // Pretend to be GCC. diff --git a/src/config.rs b/src/config.rs index 018bccf8c..1c9af6542 100644 --- a/src/config.rs +++ b/src/config.rs @@ -584,8 +584,29 @@ pub struct FileConfig { pub cache: CacheConfigs, pub dist: DistConfig, pub server_startup_timeout_ms: Option, - /// Base directory to strip from paths for cache key computation. - pub basedir: Option, + /// Base directory (or directories) to strip from paths for cache key computation. + /// Can be a single path or an array of paths. + #[serde(default, deserialize_with = "deserialize_basedir")] + pub basedir: Vec, +} + +fn deserialize_basedir<'de, D>(deserializer: D) -> std::result::Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + use serde::Deserialize; + + #[derive(Deserialize)] + #[serde(untagged)] + enum StringOrVec { + String(PathBuf), + Vec(Vec), + } + + match StringOrVec::deserialize(deserializer)? { + StringOrVec::String(s) => Ok(vec![s]), + StringOrVec::Vec(v) => Ok(v), + } } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -623,7 +644,7 @@ pub fn try_read_config_file(path: &Path) -> Result, + basedir: Vec, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -950,7 +971,16 @@ fn config_from_env() -> Result { }; // ======= Base directory ======= - let basedir = env::var_os("SCCACHE_BASEDIR").map(PathBuf::from); + // Support multiple paths separated by '|' (a character forbidden in paths) + let basedir = env::var_os("SCCACHE_BASEDIR") + .map(|s| { + s.to_string_lossy() + .split('|') + .map(|p| PathBuf::from(p.trim())) + .filter(|p| !p.as_os_str().is_empty()) + .collect() + }) + .unwrap_or_default(); Ok(EnvConfig { cache, basedir }) } @@ -984,9 +1014,10 @@ pub struct Config { pub fallback_cache: DiskCacheConfig, pub dist: DistConfig, pub server_startup_timeout: Option, - /// Base directory to strip from paths for cache key computation. + /// Base directory (or directories) to strip from paths for cache key computation. /// Similar to ccache's CCACHE_BASEDIR. - pub basedir: Option, + /// Currently only the first directory is used if multiple are specified. + pub basedir: Vec, } impl Config { @@ -1022,7 +1053,11 @@ impl Config { conf_caches.merge(cache); // Environment variable takes precedence over file config - let basedir = env_basedir.or(file_basedir); + let basedir = if !env_basedir.is_empty() { + env_basedir + } else { + file_basedir + }; let (caches, fallback_cache) = conf_caches.into_fallback(); Self { @@ -1304,7 +1339,7 @@ fn config_overrides() { }), ..Default::default() }, - basedir: None, + basedir: vec![], }; let file_conf = FileConfig { @@ -1331,7 +1366,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], }; assert_eq!( @@ -1354,7 +1389,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout: None, - basedir: None, + basedir: vec![], } ); } @@ -1366,50 +1401,207 @@ fn config_basedir_overrides() { // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), - basedir: Some(PathBuf::from("/env/basedir")), + basedir: vec![PathBuf::from("/env/basedir")], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: Some(PathBuf::from("/file/basedir")), + basedir: vec![PathBuf::from("/file/basedir")], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, Some(PathBuf::from("/env/basedir"))); + assert_eq!(config.basedir, vec![PathBuf::from("/env/basedir")]); - // Test that file config is used when env is None + // Test that file config is used when env is empty let env_conf = EnvConfig { cache: Default::default(), - basedir: None, + basedir: vec![], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: Some(PathBuf::from("/file/basedir")), + basedir: vec![PathBuf::from("/file/basedir")], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, Some(PathBuf::from("/file/basedir"))); + assert_eq!(config.basedir, vec![PathBuf::from("/file/basedir")]); - // Test that both None results in None + // Test that both empty results in empty let env_conf = EnvConfig { cache: Default::default(), - basedir: None, + basedir: vec![], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, None); + assert_eq!(config.basedir, Vec::::new()); +} + +#[test] +fn test_deserialize_basedir_single() { + use std::path::PathBuf; + + // Test single string value + let toml = r#" + basedir = "/home/user/project" + + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!(config.basedir, vec![PathBuf::from("/home/user/project")]); +} + +#[test] +fn test_deserialize_basedir_multiple() { + use std::path::PathBuf; + + // Test array of paths + let toml = r#" + basedir = ["/home/user/project", "/home/user/workspace"] + + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!( + config.basedir, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); +} + +#[test] +fn test_deserialize_basedir_missing() { + use std::path::PathBuf; + + // Test no basedir specified (should default to empty vec) + let toml = r#" + [cache.disk] + dir = "/tmp/cache" + size = 1073741824 + + [dist] + "#; + + let config: FileConfig = toml::from_str(toml).unwrap(); + assert_eq!(config.basedir, Vec::::new()); +} + +#[test] +#[serial] +fn test_env_basedir_single() { + use std::path::PathBuf; + + unsafe { + std::env::set_var("SCCACHE_BASEDIR", "/home/user/project"); + } + let config = config_from_env().unwrap(); + assert_eq!(config.basedir, vec![PathBuf::from("/home/user/project")]); + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } +} + +#[test] +#[serial] +fn test_env_basedir_multiple() { + use std::path::PathBuf; + + unsafe { + std::env::set_var("SCCACHE_BASEDIR", "/home/user/project|/home/user/workspace"); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedir, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } +} + +#[test] +#[serial] +fn test_env_basedir_with_spaces() { + use std::path::PathBuf; + + // Test that spaces around paths are trimmed + unsafe { + std::env::set_var( + "SCCACHE_BASEDIR", + " /home/user/project | /home/user/workspace ", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedir, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } +} + +#[test] +#[serial] +fn test_env_basedir_empty_entries() { + use std::path::PathBuf; + + // Test that empty entries are filtered out + unsafe { + std::env::set_var( + "SCCACHE_BASEDIR", + "/home/user/project||/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedir, + vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } +} + +#[test] +#[serial] +fn test_env_basedir_not_set() { + unsafe { + std::env::remove_var("SCCACHE_BASEDIR"); + } + let config = config_from_env().unwrap(); + assert_eq!(config.basedir, Vec::::new()); } #[test] @@ -1717,7 +1909,7 @@ no_credentials = true rewrite_includes_only: false, }, server_startup_timeout_ms: Some(10000), - basedir: None, + basedir: vec![], } ) } @@ -1810,7 +2002,7 @@ size = "7g" ..Default::default() }, server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], } ); } diff --git a/src/test/tests.rs b/src/test/tests.rs index 7ed2c7e8f..17e284ce5 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -31,7 +31,7 @@ use std::net::TcpListener; use std::path::Path; #[cfg(not(target_os = "macos"))] use std::process::Command; -use std::sync::{Arc, Mutex, mpsc}; +use std::sync::{mpsc, Arc, Mutex}; use std::thread; use std::time::Duration; use tokio::runtime::Runtime; @@ -85,7 +85,7 @@ where runtime.handle(), PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, - None, + vec![], )); let client = Client::new(); diff --git a/src/util.rs b/src/util.rs index be268bb1f..15d54c3f6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1015,49 +1015,69 @@ pub fn num_cpus() -> usize { std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get) } -/// Strip the base directory from the preprocessor output to enable cache hits -/// across different absolute paths. +/// Strip base directories from absolute paths in preprocessor output. /// -/// This function searches for the basedir path in the preprocessor output and -/// replaces it with a relative path marker. +/// This function searches for basedir paths in the preprocessor output and +/// replaces them with relative path markers. When multiple basedirs are provided, +/// the longest matching prefix is used. This is similar to ccache's CCACHE_BASEDIR. /// -/// The function handles both Unix-style (`/`) and Windows-style (`\`) path separators, -/// and normalizes trailing slashes. -pub fn strip_basedir(preprocessor_output: &[u8], basedir: &Path) -> Vec { - // Normalize the basedir by removing trailing slashes - let basedir_normalized = basedir.to_string_lossy(); - let basedir_str = basedir_normalized.trim_end_matches('/').trim_end_matches('\\'); - let basedir_bytes = basedir_str.as_bytes(); - - // If basedir is empty or preprocessor output is empty, return as-is - if basedir_bytes.is_empty() || preprocessor_output.is_empty() { +/// Only paths that start with one of the basedirs are modified. The paths are expected to be +/// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). +pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec { + if basedirs.is_empty() || preprocessor_output.is_empty() { return preprocessor_output.to_vec(); } + // Prepare normalized basedirs sorted by length (longest first) to match longest prefix first + let mut basedir_data: Vec<_> = basedirs + .iter() + .map(|basedir| { + let normalized = basedir.to_string_lossy(); + let trimmed = normalized.trim_end_matches('/').trim_end_matches('\\'); + (trimmed.as_bytes().to_vec(), trimmed.len()) + }) + .filter(|(bytes, _)| !bytes.is_empty()) + .collect(); + + if basedir_data.is_empty() { + return preprocessor_output.to_vec(); + } + + // Sort by length descending (longest first) + basedir_data.sort_by(|a, b| b.1.cmp(&a.1)); + let mut result = Vec::with_capacity(preprocessor_output.len()); let mut i = 0; while i < preprocessor_output.len() { - // Check if we have a match for basedir at current position - if i + basedir_bytes.len() <= preprocessor_output.len() - && &preprocessor_output[i..i + basedir_bytes.len()] == basedir_bytes - { - // Check if this is actually a path boundary (preceded by whitespace, quote, or start) - let is_boundary = i == 0 - || preprocessor_output[i - 1].is_ascii_whitespace() - || preprocessor_output[i - 1] == b'"' - || preprocessor_output[i - 1] == b'<'; - - if is_boundary { - // Replace basedir with "." - result.push(b'.'); - i += basedir_bytes.len(); - continue; + let mut matched = false; + + // Try to match each basedir (longest first) + for (basedir_bytes, basedir_len) in &basedir_data { + // Check if we have a match for this basedir at current position + if i + basedir_len <= preprocessor_output.len() + && preprocessor_output[i..i + basedir_len] == basedir_bytes[..] + { + // Check if this is actually a path boundary (preceded by whitespace, quote, or start) + let is_boundary = i == 0 + || preprocessor_output[i - 1].is_ascii_whitespace() + || preprocessor_output[i - 1] == b'"' + || preprocessor_output[i - 1] == b'<'; + + if is_boundary { + // Replace basedir with "." + result.push(b'.'); + i += basedir_len; + matched = true; + break; + } } } - result.push(preprocessor_output[i]); - i += 1; + if !matched { + result.push(preprocessor_output[i]); + i += 1; + } } result @@ -1218,52 +1238,51 @@ mod tests { #[test] fn test_strip_basedir_simple() { - use std::path::Path; + use std::path::PathBuf; // Simple cases - let basedir = Path::new("/home/user/project"); + let basedir = PathBuf::from("/home/user/project"); let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\"\nint main() { return 0; }"; assert_eq!(output, expected); // Multiple occurrences let input = b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\"\n# 2 \"./include/header.h\""; assert_eq!(output, expected); // No occurrences let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }"; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); assert_eq!(output, input); } #[test] fn test_strip_basedir_empty() { - use std::path::Path; + use std::path::PathBuf; - // Empty basedir - let basedir = Path::new(""); + // Empty basedir slice let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, &[]); assert_eq!(output, input); // Empty input - let basedir = Path::new("/home/user/project"); + let basedir = PathBuf::from("/home/user/project"); let input = b""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); assert_eq!(output, input); } #[test] fn test_strip_basedir_not_at_boundary() { - use std::path::Path; + use std::path::PathBuf; // basedir should only match at word boundaries - let basedir = Path::new("/home/user"); + let basedir = PathBuf::from("/home/user"); let input = b"text/home/user/file.c and \"/home/user/other.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); // Should only replace the second occurrence (after quote) let expected = b"text/home/user/file.c and \"./other.c\""; assert_eq!(output, expected); @@ -1271,26 +1290,51 @@ mod tests { #[test] fn test_strip_basedir_trailing_slashes() { - use std::path::Path; + use std::path::PathBuf; // Without trailing slash - let basedir = Path::new("/home/user/project"); + let basedir = PathBuf::from("/home/user/project"); let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); // With single trailing slash - let basedir = Path::new("/home/user/project/"); + let basedir = PathBuf::from("/home/user/project/"); let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); // With multiple trailing slashes - let basedir = Path::new("/home/user/project////"); + let basedir = PathBuf::from("/home/user/project////"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + } + + #[test] + fn test_strip_basedirs_multiple() { + use std::path::PathBuf; + + // Multiple basedirs - should match longest first + let basedirs = vec![ + PathBuf::from("/home/user1/project"), + PathBuf::from("/home/user2/workspace"), + ]; + let input = b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; + let output = super::strip_basedirs(input, &basedirs); + let expected = b"# 1 \"./src/main.c\"\n# 2 \"./lib/util.c\""; + assert_eq!(output, expected); + + // Longest prefix wins + let basedirs = vec![ + PathBuf::from("/home/user"), + PathBuf::from("/home/user/project"), // This should match first (longest) + ]; let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, &basedirs); let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); } @@ -1298,19 +1342,19 @@ mod tests { #[cfg(target_os = "windows")] #[test] fn test_strip_basedir_windows_backslashes() { - use std::path::Path; + use std::path::PathBuf; // Without trailing backslash - let basedir = Path::new("C:\\Users\\test\\project"); + let basedir = PathBuf::from("C:\\Users\\test\\project"); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \".\\src\\main.c\""; assert_eq!(output, expected); // With multiple trailing backslashes - let basedir = Path::new("C:\\Users\\test\\project\\\\\\"); + let basedir = PathBuf::from("C:\\Users\\test\\project\\\\\\"); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; - let output = super::strip_basedir(input, basedir); + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \".\\src\\main.c\""; assert_eq!(output, expected); } diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 73b10fa99..877f3ccfa 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -190,7 +190,7 @@ pub fn sccache_client_cfg( rewrite_includes_only: false, // TODO }, server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], } } @@ -577,44 +577,32 @@ impl Drop for DistSystem { let mut exits = vec![]; if let Some(scheduler_name) = self.scheduler_name.as_ref() { - droperr!( - Command::new("docker") - .args(["logs", scheduler_name]) - .output() - .map(|o| logs.push((scheduler_name, o))) - ); - droperr!( - Command::new("docker") - .args(["kill", scheduler_name]) - .output() - .map(|o| outputs.push((scheduler_name, o))) - ); - droperr!( - Command::new("docker") - .args(["rm", "-f", scheduler_name]) - .output() - .map(|o| outputs.push((scheduler_name, o))) - ); + droperr!(Command::new("docker") + .args(["logs", scheduler_name]) + .output() + .map(|o| logs.push((scheduler_name, o)))); + droperr!(Command::new("docker") + .args(["kill", scheduler_name]) + .output() + .map(|o| outputs.push((scheduler_name, o)))); + droperr!(Command::new("docker") + .args(["rm", "-f", scheduler_name]) + .output() + .map(|o| outputs.push((scheduler_name, o)))); } for server_name in self.server_names.iter() { - droperr!( - Command::new("docker") - .args(["logs", server_name]) - .output() - .map(|o| logs.push((server_name, o))) - ); - droperr!( - Command::new("docker") - .args(["kill", server_name]) - .output() - .map(|o| outputs.push((server_name, o))) - ); - droperr!( - Command::new("docker") - .args(["rm", "-f", server_name]) - .output() - .map(|o| outputs.push((server_name, o))) - ); + droperr!(Command::new("docker") + .args(["logs", server_name]) + .output() + .map(|o| logs.push((server_name, o)))); + droperr!(Command::new("docker") + .args(["kill", server_name]) + .output() + .map(|o| outputs.push((server_name, o)))); + droperr!(Command::new("docker") + .args(["rm", "-f", server_name]) + .output() + .map(|o| outputs.push((server_name, o)))); } for &pid in self.server_pids.iter() { droperr!(nix::sys::signal::kill(pid, Signal::SIGINT)); @@ -631,16 +619,14 @@ impl Drop for DistSystem { if killagain { eprintln!("SIGINT didn't kill process, trying SIGKILL"); droperr!(nix::sys::signal::kill(pid, Signal::SIGKILL)); - droperr!( - nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)) - .map_err(|e| e.to_string()) - .and_then(|ws| if ws == WaitStatus::StillAlive { - Err("process alive after sigkill".to_owned()) - } else { - exits.push(ws); - Ok(()) - }) - ); + droperr!(nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)) + .map_err(|e| e.to_string()) + .and_then(|ws| if ws == WaitStatus::StillAlive { + Err("process alive after sigkill".to_owned()) + } else { + exits.push(ws); + Ok(()) + })); } } diff --git a/tests/oauth.rs b/tests/oauth.rs index 7d3eb9cd9..6798d9939 100644 --- a/tests/oauth.rs +++ b/tests/oauth.rs @@ -60,7 +60,7 @@ fn config_with_dist_auth( rewrite_includes_only: true, }, server_startup_timeout_ms: None, - basedir: None, + basedir: vec![], } } From 24584859a1de10fa9a1e359f0cf704e981fd723f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 19 Dec 2025 00:27:42 +0100 Subject: [PATCH 03/31] Make codecov happy --- tests/oauth.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/oauth.rs b/tests/oauth.rs index 6798d9939..7096e084a 100644 --- a/tests/oauth.rs +++ b/tests/oauth.rs @@ -226,6 +226,7 @@ fn test_auth_with_config(dist_auth: sccache::config::DistAuth) { .tempdir() .unwrap(); let sccache_config = config_with_dist_auth(conf_dir.path(), dist_auth); + assert!(sccache_config.basedir.is_empty()); let sccache_config_path = conf_dir.path().join("sccache-config.json"); fs::File::create(&sccache_config_path) .unwrap() From 416b65c2375b6bcd7b315a1328326857b1ef8b1f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 19 Dec 2025 00:35:48 +0100 Subject: [PATCH 04/31] Apply fmt to all files --- src/cache/cache.rs | 9 +- src/compiler/c.rs | 287 ++++++++++++++++++++++++++++++++++++++----- src/config.rs | 2 +- src/test/tests.rs | 2 +- src/util.rs | 8 +- tests/harness/mod.rs | 78 +++++++----- 6 files changed, 317 insertions(+), 69 deletions(-) diff --git a/src/cache/cache.rs b/src/cache/cache.rs index cfe8e7c20..b4e71b788 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -742,12 +742,17 @@ pub fn storage_from_config( debug!("Init disk cache with dir {:?}, size {}", dir, size); // Validate that all basedirs are absolute paths - let basedirs: Vec = config.basedir.iter() + let basedirs: Vec = config + .basedir + .iter() .filter_map(|p| { if p.is_absolute() { Some(p.clone()) } else { - warn!("Ignoring relative basedir path: {:?}. Only absolute paths are supported.", p); + warn!( + "Ignoring relative basedir path: {:?}. Only absolute paths are supported.", + p + ); None } }) diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 23b6e2c39..e91c7eee3 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -1464,7 +1464,7 @@ static CACHED_ENV_VARS: LazyLock> = LazyLock::new(|| { }); /// Compute the hash key of `compiler` compiling `preprocessor_output` with `args`. -/// +/// /// If `basedirs` are provided, paths in the preprocessor output will be normalized by /// stripping the longest matching basedir prefix. This enables cache hits across different /// absolute paths (similar to ccache's CCACHE_BASEDIR). @@ -1525,8 +1525,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_eq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1535,8 +1553,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, true, &[]) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + true, + &[] + ) ); } @@ -1545,7 +1581,16 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), hash_key( "abcd", Language::CHeader, @@ -1564,7 +1609,16 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::Cxx, &args, &[], &[], PREPROCESSED, true, &[]), + hash_key( + "abcd", + Language::Cxx, + &args, + &[], + &[], + PREPROCESSED, + true, + &[] + ), hash_key( "abcd", Language::CxxHeader, @@ -1583,8 +1637,26 @@ mod test { let args = ovec!["a", "b", "c"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key("abcd", Language::C, &args, &[], &[], PREPROCESSED, false, &[]), - hash_key("wxyz", Language::C, &args, &[], &[], PREPROCESSED, false, &[]) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + "wxyz", + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1597,17 +1669,53 @@ mod test { let a = ovec!["a"]; const PREPROCESSED: &[u8] = b"hello world"; assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), - hash_key(digest, Language::C, &xyz, &[], &[], PREPROCESSED, false, &[]) + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), + hash_key( + digest, + Language::C, + &xyz, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), hash_key(digest, Language::C, &ab, &[], &[], PREPROCESSED, false, &[]) ); assert_neq!( - hash_key(digest, Language::C, &abc, &[], &[], PREPROCESSED, false, &[]), + hash_key( + digest, + Language::C, + &abc, + &[], + &[], + PREPROCESSED, + false, + &[] + ), hash_key(digest, Language::C, &a, &[], &[], PREPROCESSED, false, &[]) ); } @@ -1626,7 +1734,16 @@ mod test { false, &[] ), - hash_key("abcd", Language::C, &args, &[], &[], &b"goodbye"[..], false, &[]) + hash_key( + "abcd", + Language::C, + &args, + &[], + &[], + &b"goodbye"[..], + false, + &[] + ) ); } @@ -1636,11 +1753,38 @@ mod test { let digest = "abcd"; const PREPROCESSED: &[u8] = b"hello world"; for var in CACHED_ENV_VARS.iter() { - let h1 = hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, &[]); + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[], + ); let vars = vec![(OsString::from(var), OsString::from("something"))]; - let h2 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, &[]); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &vars, + PREPROCESSED, + false, + &[], + ); let vars = vec![(OsString::from(var), OsString::from("something else"))]; - let h3 = hash_key(digest, Language::C, &args, &[], &vars, PREPROCESSED, false, &[]); + let h3 = hash_key( + digest, + Language::C, + &args, + &[], + &vars, + PREPROCESSED, + false, + &[], + ); assert_neq!(h1, h2); assert_neq!(h2, h3); } @@ -1664,7 +1808,16 @@ mod test { false, &[] ), - hash_key(digest, Language::C, &args, &[], &[], PREPROCESSED, false, &[]) + hash_key( + digest, + Language::C, + &args, + &[], + &[], + PREPROCESSED, + false, + &[] + ) ); } @@ -1682,38 +1835,112 @@ mod test { let basedir1 = PathBuf::from("/home/user1/project"); let basedir2 = PathBuf::from("/home/user2/project"); - let h1 = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, std::slice::from_ref(&basedir1)); - let h2 = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, std::slice::from_ref(&basedir2)); + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + std::slice::from_ref(&basedir1), + ); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + std::slice::from_ref(&basedir2), + ); assert_eq!(h1, h2); // Test 2: Different hashes without basedir - let h1_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, &[]); - let h2_no_base = hash_key(digest, Language::C, &args, &[], &[], preprocessed2, false, &[]); + let h1_no_base = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &[], + ); + let h2_no_base = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + &[], + ); assert_neq!(h1_no_base, h2_no_base); // Test 3: Works for C++ files too - let preprocessed_cpp1 = b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; - let preprocessed_cpp2 = b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; - - let h_cpp1 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp1, true, std::slice::from_ref(&basedir1)); - let h_cpp2 = hash_key(digest, Language::Cxx, &args, &[], &[], preprocessed_cpp2, true, std::slice::from_ref(&basedir2)); + let preprocessed_cpp1 = + b"# 1 \"/home/user1/project/src/main.cpp\"\nint main() { return 0; }"; + let preprocessed_cpp2 = + b"# 1 \"/home/user2/project/src/main.cpp\"\nint main() { return 0; }"; + + let h_cpp1 = hash_key( + digest, + Language::Cxx, + &args, + &[], + &[], + preprocessed_cpp1, + true, + std::slice::from_ref(&basedir1), + ); + let h_cpp2 = hash_key( + digest, + Language::Cxx, + &args, + &[], + &[], + preprocessed_cpp2, + true, + std::slice::from_ref(&basedir2), + ); assert_eq!(h_cpp1, h_cpp2); // Test 4: Works with trailing slashes let basedir_slash = PathBuf::from("/home/user1/project/"); - let h_slash = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, std::slice::from_ref(&basedir_slash)); + let h_slash = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + std::slice::from_ref(&basedir_slash), + ); assert_eq!(h1, h_slash); // Test 5: Multiple basedirs - longest match wins let basedirs = vec![ PathBuf::from("/home/user1"), - PathBuf::from("/home/user1/project"), // This should match (longest) + PathBuf::from("/home/user1/project"), // This should match (longest) ]; - let h_multi = hash_key(digest, Language::C, &args, &[], &[], preprocessed1, false, &basedirs); + let h_multi = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &basedirs, + ); assert_eq!(h1, h_multi); } diff --git a/src/config.rs b/src/config.rs index 1c9af6542..4e29fa5ac 100644 --- a/src/config.rs +++ b/src/config.rs @@ -19,8 +19,8 @@ use fs_err as fs; #[cfg(any(feature = "dist-client", feature = "dist-server"))] use serde::ser::Serializer; use serde::{ - de::{self, DeserializeOwned, Deserializer}, Deserialize, Serialize, + de::{self, DeserializeOwned, Deserializer}, }; #[cfg(test)] use serial_test::serial; diff --git a/src/test/tests.rs b/src/test/tests.rs index 17e284ce5..cfa5f11f3 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -31,7 +31,7 @@ use std::net::TcpListener; use std::path::Path; #[cfg(not(target_os = "macos"))] use std::process::Command; -use std::sync::{mpsc, Arc, Mutex}; +use std::sync::{Arc, Mutex, mpsc}; use std::thread; use std::time::Duration; use tokio::runtime::Runtime; diff --git a/src/util.rs b/src/util.rs index 15d54c3f6..94686e0d6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1248,7 +1248,8 @@ mod tests { assert_eq!(output, expected); // Multiple occurrences - let input = b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; + let input = + b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\"\n# 2 \"./include/header.h\""; assert_eq!(output, expected); @@ -1323,7 +1324,8 @@ mod tests { PathBuf::from("/home/user1/project"), PathBuf::from("/home/user2/workspace"), ]; - let input = b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; + let input = + b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; let output = super::strip_basedirs(input, &basedirs); let expected = b"# 1 \"./src/main.c\"\n# 2 \"./lib/util.c\""; assert_eq!(output, expected); @@ -1331,7 +1333,7 @@ mod tests { // Longest prefix wins let basedirs = vec![ PathBuf::from("/home/user"), - PathBuf::from("/home/user/project"), // This should match first (longest) + PathBuf::from("/home/user/project"), // This should match first (longest) ]; let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, &basedirs); diff --git a/tests/harness/mod.rs b/tests/harness/mod.rs index 877f3ccfa..74302e660 100644 --- a/tests/harness/mod.rs +++ b/tests/harness/mod.rs @@ -577,32 +577,44 @@ impl Drop for DistSystem { let mut exits = vec![]; if let Some(scheduler_name) = self.scheduler_name.as_ref() { - droperr!(Command::new("docker") - .args(["logs", scheduler_name]) - .output() - .map(|o| logs.push((scheduler_name, o)))); - droperr!(Command::new("docker") - .args(["kill", scheduler_name]) - .output() - .map(|o| outputs.push((scheduler_name, o)))); - droperr!(Command::new("docker") - .args(["rm", "-f", scheduler_name]) - .output() - .map(|o| outputs.push((scheduler_name, o)))); + droperr!( + Command::new("docker") + .args(["logs", scheduler_name]) + .output() + .map(|o| logs.push((scheduler_name, o))) + ); + droperr!( + Command::new("docker") + .args(["kill", scheduler_name]) + .output() + .map(|o| outputs.push((scheduler_name, o))) + ); + droperr!( + Command::new("docker") + .args(["rm", "-f", scheduler_name]) + .output() + .map(|o| outputs.push((scheduler_name, o))) + ); } for server_name in self.server_names.iter() { - droperr!(Command::new("docker") - .args(["logs", server_name]) - .output() - .map(|o| logs.push((server_name, o)))); - droperr!(Command::new("docker") - .args(["kill", server_name]) - .output() - .map(|o| outputs.push((server_name, o)))); - droperr!(Command::new("docker") - .args(["rm", "-f", server_name]) - .output() - .map(|o| outputs.push((server_name, o)))); + droperr!( + Command::new("docker") + .args(["logs", server_name]) + .output() + .map(|o| logs.push((server_name, o))) + ); + droperr!( + Command::new("docker") + .args(["kill", server_name]) + .output() + .map(|o| outputs.push((server_name, o))) + ); + droperr!( + Command::new("docker") + .args(["rm", "-f", server_name]) + .output() + .map(|o| outputs.push((server_name, o))) + ); } for &pid in self.server_pids.iter() { droperr!(nix::sys::signal::kill(pid, Signal::SIGINT)); @@ -619,14 +631,16 @@ impl Drop for DistSystem { if killagain { eprintln!("SIGINT didn't kill process, trying SIGKILL"); droperr!(nix::sys::signal::kill(pid, Signal::SIGKILL)); - droperr!(nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)) - .map_err(|e| e.to_string()) - .and_then(|ws| if ws == WaitStatus::StillAlive { - Err("process alive after sigkill".to_owned()) - } else { - exits.push(ws); - Ok(()) - })); + droperr!( + nix::sys::wait::waitpid(pid, Some(WaitPidFlag::WNOHANG)) + .map_err(|e| e.to_string()) + .and_then(|ws| if ws == WaitStatus::StillAlive { + Err("process alive after sigkill".to_owned()) + } else { + exits.push(ws); + Ok(()) + }) + ); } } From f1f609516a79f4f5ffd4e4ad62f50dabc3ecd1d8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 22 Dec 2025 11:43:35 +0100 Subject: [PATCH 05/31] Clean out `basedir` intermediate step, use `basedirs` --- README.md | 14 ++-- docs/Configuration.md | 8 +-- src/cache/cache.rs | 2 +- src/compiler/c.rs | 16 +++-- src/config.rs | 145 ++++++++++++++++-------------------------- src/util.rs | 8 +-- tests/harness/mod.rs | 2 +- tests/oauth.rs | 4 +- 8 files changed, 82 insertions(+), 117 deletions(-) diff --git a/README.md b/README.md index 47f56c8ef..545aa7369 100644 --- a/README.md +++ b/README.md @@ -320,19 +320,19 @@ This is most useful when using sccache for Rust compilation, as rustc supports u --- -Normalizing Paths with `SCCACHE_BASEDIR` +Normalizing Paths with `SCCACHE_BASEDIRS` ----------------------------------------- -By default, sccache requires absolute paths to match for cache hits. To enable cache sharing across different build directories, you can set `SCCACHE_BASEDIR` to strip a base directory from paths before hashing: +By default, sccache requires absolute paths to match for cache hits. To enable cache sharing across different build directories, you can set `SCCACHE_BASEDIRS` to strip a base directory from paths before hashing: ```bash -export SCCACHE_BASEDIR=/home/user/project +export SCCACHE_BASEDIRS=/home/user/project ``` You can also specify multiple base directories by separating them with `|` (pipe character). When multiple directories are provided, the longest matching prefix is used: ```bash -export SCCACHE_BASEDIR="/home/user/project|/home/user/workspace" +export SCCACHE_BASEDIRS="/home/user/project|/home/user/workspace" ``` This is similar to ccache's `CCACHE_BASEDIR` and helps when: @@ -347,10 +347,10 @@ You can also configure this in the sccache config file: ```toml # Single directory -basedir = "/home/user/project" +basedirs = ["/home/user/project"] # Or multiple directories -basedir = ["/home/user/project", "/home/user/workspace"] +basedirs = ["/home/user/project", "/home/user/workspace"] ``` --- @@ -360,7 +360,7 @@ Known Caveats ### General -* By default, absolute paths to files must match to get a cache hit. To work around this, use `SCCACHE_BASEDIR` (see above) to normalize paths before hashing. +* By default, absolute paths to files must match to get a cache hit. To work around this, use `SCCACHE_BASEDIRS` (see above) to normalize paths before hashing. ### Rust diff --git a/docs/Configuration.md b/docs/Configuration.md index 5371c3582..822e4322e 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -9,14 +9,14 @@ server_startup_timeout_ms = 10000 # Base directory (or directories) to strip from paths for cache key computation. # Similar to ccache's CCACHE_BASEDIR. This enables cache hits across # different absolute paths when compiling the same source code. -# Can be a single path or an array of paths. When multiple paths are provided, +# Can be an array of paths. When multiple paths are provided, # the longest matching prefix is used. # For example, if basedir is "/home/user/project", then paths like # "/home/user/project/src/main.c" will be normalized to "./src/main.c" # for caching purposes. -basedir = "/home/user/project" +basedirs = ["/home/user/project"] # Or multiple directories: -# basedir = ["/home/user/project", "/home/user/workspace"] +# basedirs = ["/home/user/project", "/home/user/workspace"] [dist] # where to find the scheduler @@ -146,7 +146,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIR` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index b4e71b788..eed89a460 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -743,7 +743,7 @@ pub fn storage_from_config( // Validate that all basedirs are absolute paths let basedirs: Vec = config - .basedir + .basedirs .iter() .filter_map(|p| { if p.is_absolute() { diff --git a/src/compiler/c.rs b/src/compiler/c.rs index e91c7eee3..c0227f56f 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -1822,7 +1822,7 @@ mod test { } #[test] - fn test_hash_key_basedir() { + fn test_hash_key_basedirs() { use std::path::PathBuf; let args = ovec!["a", "b", "c"]; @@ -1832,8 +1832,10 @@ mod test { let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; - let basedir1 = PathBuf::from("/home/user1/project"); - let basedir2 = PathBuf::from("/home/user2/project"); + let basedirs = [ + PathBuf::from("/home/user1/project"), + PathBuf::from("/home/user2/project"), + ]; let h1 = hash_key( digest, @@ -1843,7 +1845,7 @@ mod test { &[], preprocessed1, false, - std::slice::from_ref(&basedir1), + &basedirs, ); let h2 = hash_key( digest, @@ -1853,7 +1855,7 @@ mod test { &[], preprocessed2, false, - std::slice::from_ref(&basedir2), + &basedirs, ); assert_eq!(h1, h2); @@ -1896,7 +1898,7 @@ mod test { &[], preprocessed_cpp1, true, - std::slice::from_ref(&basedir1), + &basedirs, ); let h_cpp2 = hash_key( digest, @@ -1906,7 +1908,7 @@ mod test { &[], preprocessed_cpp2, true, - std::slice::from_ref(&basedir2), + &basedirs, ); assert_eq!(h_cpp1, h_cpp2); diff --git a/src/config.rs b/src/config.rs index 4e29fa5ac..ea6ec12e2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -586,27 +586,7 @@ pub struct FileConfig { pub server_startup_timeout_ms: Option, /// Base directory (or directories) to strip from paths for cache key computation. /// Can be a single path or an array of paths. - #[serde(default, deserialize_with = "deserialize_basedir")] - pub basedir: Vec, -} - -fn deserialize_basedir<'de, D>(deserializer: D) -> std::result::Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - use serde::Deserialize; - - #[derive(Deserialize)] - #[serde(untagged)] - enum StringOrVec { - String(PathBuf), - Vec(Vec), - } - - match StringOrVec::deserialize(deserializer)? { - StringOrVec::String(s) => Ok(vec![s]), - StringOrVec::Vec(v) => Ok(v), - } + pub basedirs: Vec, } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -644,7 +624,7 @@ pub fn try_read_config_file(path: &Path) -> Result, + basedirs: Vec, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -972,7 +952,7 @@ fn config_from_env() -> Result { // ======= Base directory ======= // Support multiple paths separated by '|' (a character forbidden in paths) - let basedir = env::var_os("SCCACHE_BASEDIR") + let basedirs = env::var_os("SCCACHE_BASEDIRS") .map(|s| { s.to_string_lossy() .split('|') @@ -982,7 +962,7 @@ fn config_from_env() -> Result { }) .unwrap_or_default(); - Ok(EnvConfig { cache, basedir }) + Ok(EnvConfig { cache, basedirs }) } // The directories crate changed the location of `config_dir` on macos in version 3, @@ -1016,8 +996,7 @@ pub struct Config { pub server_startup_timeout: Option, /// Base directory (or directories) to strip from paths for cache key computation. /// Similar to ccache's CCACHE_BASEDIR. - /// Currently only the first directory is used if multiple are specified. - pub basedir: Vec, + pub basedirs: Vec, } impl Config { @@ -1039,7 +1018,7 @@ impl Config { cache, dist, server_startup_timeout_ms, - basedir: file_basedir, + basedirs: file_basedirs, } = file_conf; conf_caches.merge(cache); @@ -1048,15 +1027,15 @@ impl Config { let EnvConfig { cache, - basedir: env_basedir, + basedirs: env_basedirs, } = env_conf; conf_caches.merge(cache); // Environment variable takes precedence over file config - let basedir = if !env_basedir.is_empty() { - env_basedir + let basedirs = if !env_basedirs.is_empty() { + env_basedirs } else { - file_basedir + file_basedirs }; let (caches, fallback_cache) = conf_caches.into_fallback(); @@ -1065,7 +1044,7 @@ impl Config { fallback_cache, dist, server_startup_timeout, - basedir, + basedirs, } } } @@ -1339,7 +1318,7 @@ fn config_overrides() { }), ..Default::default() }, - basedir: vec![], + basedirs: vec![], }; let file_conf = FileConfig { @@ -1366,7 +1345,7 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout_ms: None, - basedir: vec![], + basedirs: vec![], }; assert_eq!( @@ -1389,90 +1368,71 @@ fn config_overrides() { }, dist: Default::default(), server_startup_timeout: None, - basedir: vec![], + basedirs: vec![], } ); } #[test] -fn config_basedir_overrides() { +fn config_basedirs_overrides() { use std::path::PathBuf; // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), - basedir: vec![PathBuf::from("/env/basedir")], + basedirs: vec![PathBuf::from("/env/basedir")], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: vec![PathBuf::from("/file/basedir")], + basedirs: vec![PathBuf::from("/file/basedir")], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, vec![PathBuf::from("/env/basedir")]); + assert_eq!(config.basedirs, vec![PathBuf::from("/env/basedir")]); // Test that file config is used when env is empty let env_conf = EnvConfig { cache: Default::default(), - basedir: vec![], + basedirs: vec![], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: vec![PathBuf::from("/file/basedir")], + basedirs: vec![PathBuf::from("/file/basedir")], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, vec![PathBuf::from("/file/basedir")]); + assert_eq!(config.basedirs, vec![PathBuf::from("/file/basedir")]); // Test that both empty results in empty let env_conf = EnvConfig { cache: Default::default(), - basedir: vec![], + basedirs: vec![], }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedir: vec![], + basedirs: vec![], }; let config = Config::from_env_and_file_configs(env_conf, file_conf); - assert_eq!(config.basedir, Vec::::new()); + assert_eq!(config.basedirs, Vec::::new()); } #[test] -fn test_deserialize_basedir_single() { - use std::path::PathBuf; - - // Test single string value - let toml = r#" - basedir = "/home/user/project" - - [cache.disk] - dir = "/tmp/cache" - size = 1073741824 - - [dist] - "#; - - let config: FileConfig = toml::from_str(toml).unwrap(); - assert_eq!(config.basedir, vec![PathBuf::from("/home/user/project")]); -} - -#[test] -fn test_deserialize_basedir_multiple() { +fn test_deserialize_basedirs() { use std::path::PathBuf; // Test array of paths let toml = r#" - basedir = ["/home/user/project", "/home/user/workspace"] + basedirs = ["/home/user/project", "/home/user/workspace"] [cache.disk] dir = "/tmp/cache" @@ -1483,7 +1443,7 @@ fn test_deserialize_basedir_multiple() { let config: FileConfig = toml::from_str(toml).unwrap(); assert_eq!( - config.basedir, + config.basedirs, vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") @@ -1492,10 +1452,10 @@ fn test_deserialize_basedir_multiple() { } #[test] -fn test_deserialize_basedir_missing() { +fn test_deserialize_basedirs_missing() { use std::path::PathBuf; - // Test no basedir specified (should default to empty vec) + // Test no basedirs specified (should default to empty vec) let toml = r#" [cache.disk] dir = "/tmp/cache" @@ -1505,103 +1465,106 @@ fn test_deserialize_basedir_missing() { "#; let config: FileConfig = toml::from_str(toml).unwrap(); - assert_eq!(config.basedir, Vec::::new()); + assert_eq!(config.basedirs, Vec::::new()); } #[test] #[serial] -fn test_env_basedir_single() { +fn test_env_basedirs_single() { use std::path::PathBuf; unsafe { - std::env::set_var("SCCACHE_BASEDIR", "/home/user/project"); + std::env::set_var("SCCACHE_BASEDIRS", "/home/user/project"); } let config = config_from_env().unwrap(); - assert_eq!(config.basedir, vec![PathBuf::from("/home/user/project")]); + assert_eq!(config.basedirs, vec![PathBuf::from("/home/user/project")]); unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } } #[test] #[serial] -fn test_env_basedir_multiple() { +fn test_env_basedirs_multiple() { use std::path::PathBuf; unsafe { - std::env::set_var("SCCACHE_BASEDIR", "/home/user/project|/home/user/workspace"); + std::env::set_var( + "SCCACHE_BASEDIRS", + "/home/user/project|/home/user/workspace", + ); } let config = config_from_env().unwrap(); assert_eq!( - config.basedir, + config.basedirs, vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") ] ); unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } } #[test] #[serial] -fn test_env_basedir_with_spaces() { +fn test_env_basedirs_with_spaces() { use std::path::PathBuf; // Test that spaces around paths are trimmed unsafe { std::env::set_var( - "SCCACHE_BASEDIR", + "SCCACHE_BASEDIRS", " /home/user/project | /home/user/workspace ", ); } let config = config_from_env().unwrap(); assert_eq!( - config.basedir, + config.basedirs, vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") ] ); unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } } #[test] #[serial] -fn test_env_basedir_empty_entries() { +fn test_env_basedirs_empty_entries() { use std::path::PathBuf; // Test that empty entries are filtered out unsafe { std::env::set_var( - "SCCACHE_BASEDIR", + "SCCACHE_BASEDIRS", "/home/user/project||/home/user/workspace", ); } let config = config_from_env().unwrap(); assert_eq!( - config.basedir, + config.basedirs, vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") ] ); unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } } #[test] #[serial] -fn test_env_basedir_not_set() { +fn test_env_basedirs_not_set() { unsafe { - std::env::remove_var("SCCACHE_BASEDIR"); + std::env::remove_var("SCCACHE_BASEDIRS"); } let config = config_from_env().unwrap(); - assert_eq!(config.basedir, Vec::::new()); + assert_eq!(config.basedirs, Vec::::new()); } #[test] @@ -1909,7 +1872,7 @@ no_credentials = true rewrite_includes_only: false, }, server_startup_timeout_ms: Some(10000), - basedir: vec![], + basedirs: vec![], } ) } @@ -2002,7 +1965,7 @@ size = "7g" ..Default::default() }, server_startup_timeout_ms: None, - basedir: vec![], + basedirs: vec![], } ); } diff --git a/src/util.rs b/src/util.rs index 94686e0d6..c31296b9a 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1029,7 +1029,7 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec = basedirs + let mut basedirs_data: Vec<_> = basedirs .iter() .map(|basedir| { let normalized = basedir.to_string_lossy(); @@ -1039,12 +1039,12 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec Date: Tue, 23 Dec 2025 14:38:48 +0100 Subject: [PATCH 06/31] Cover Windows case for mixed slashes in paths --- src/util.rs | 78 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/src/util.rs b/src/util.rs index c31296b9a..4fdeedebc 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1021,6 +1021,9 @@ pub fn num_cpus() -> usize { /// replaces them with relative path markers. When multiple basedirs are provided, /// the longest matching prefix is used. This is similar to ccache's CCACHE_BASEDIR. /// +/// On Windows, this function handles paths with mixed forward and backward slashes, +/// which can occur when different build tools produce preprocessor output. +/// /// Only paths that start with one of the basedirs are modified. The paths are expected to be /// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec { @@ -1055,21 +1058,33 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec Vec { + path.iter() + .map(|&b| if b == b'\\' { b'/' } else { b }) + .collect() +} + +#[cfg(not(target_os = "windows"))] +fn normalize_path_slashes(path: &[u8]) -> Vec { + path.to_vec() +} + #[cfg(test)] mod tests { use super::{OsStrExt, TimeMacroFinder}; @@ -1360,4 +1389,23 @@ mod tests { let expected = b"# 1 \".\\src\\main.c\""; assert_eq!(output, expected); } + + #[cfg(target_os = "windows")] + #[test] + fn test_strip_basedir_windows_mixed_slashes() { + use std::path::PathBuf; + + // Mixed forward and backslashes in input (common from certain build systems) + let basedir = PathBuf::from("C:\\Users\\test\\project"); + let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \".\\src/main.c\""; + assert_eq!(output, expected, "Failed to strip mixed slash path"); + + // Also test the reverse case + let input = b"# 1 \"C:\\Users/test/project/src\\main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src\\main.c\""; + assert_eq!(output, expected, "Failed to strip reverse mixed slash path"); + } } From c6b2e8bd4ce7c676e352bd7c727070124834b4cd Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 23 Dec 2025 14:57:22 +0100 Subject: [PATCH 07/31] Add `Base directories` to `--show-stats` output --- src/server.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/server.rs b/src/server.rs index e44c00bed..6561225be 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1636,6 +1636,7 @@ pub struct ServerInfo { pub max_cache_size: Option, pub use_preprocessor_cache_mode: bool, pub version: String, + pub basedirs: Vec, } /// Status of the dist client. @@ -1932,6 +1933,7 @@ impl ServerInfo { let use_preprocessor_cache_mode; let cache_size; let max_cache_size; + let basedirs; if let Some(storage) = storage { cache_location = storage.location(); use_preprocessor_cache_mode = storage @@ -1939,11 +1941,17 @@ impl ServerInfo { .use_preprocessor_cache_mode; (cache_size, max_cache_size) = futures::try_join!(storage.current_size(), storage.max_size())?; + basedirs = storage + .basedirs() + .iter() + .map(|p| p.to_string_lossy().to_string()) + .collect(); } else { cache_location = String::new(); use_preprocessor_cache_mode = false; cache_size = None; max_cache_size = None; + basedirs = Vec::new(); } let version = env!("CARGO_PKG_VERSION").to_string(); Ok(ServerInfo { @@ -1953,6 +1961,7 @@ impl ServerInfo { max_cache_size, use_preprocessor_cache_mode, version, + basedirs, }) } @@ -1965,6 +1974,16 @@ impl ServerInfo { self.cache_location, name_width = name_width ); + println!( + "{: Date: Tue, 23 Dec 2025 21:11:40 +0100 Subject: [PATCH 08/31] Make `basedirs` match case insensitive on Windows --- README.md | 2 ++ docs/Configuration.md | 3 ++- src/util.rs | 62 ++++++++++++++++++++++++++++++++----------- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 545aa7369..a323ed604 100644 --- a/README.md +++ b/README.md @@ -335,6 +335,8 @@ You can also specify multiple base directories by separating them with `|` (pipe export SCCACHE_BASEDIRS="/home/user/project|/home/user/workspace" ``` +Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. + This is similar to ccache's `CCACHE_BASEDIR` and helps when: * Building the same project from different directories * Sharing cache between CI jobs with different checkout paths diff --git a/docs/Configuration.md b/docs/Configuration.md index 822e4322e..0da157ebf 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -11,6 +11,7 @@ server_startup_timeout_ms = 10000 # different absolute paths when compiling the same source code. # Can be an array of paths. When multiple paths are provided, # the longest matching prefix is used. +# Path matching is case-insensitive on Windows and case-sensitive on other OSes. # For example, if basedir is "/home/user/project", then paths like # "/home/user/project/src/main.c" will be normalized to "./src/main.c" # for caching purposes. @@ -146,7 +147,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/util.rs b/src/util.rs index 4fdeedebc..0f04e6126 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1021,8 +1021,10 @@ pub fn num_cpus() -> usize { /// replaces them with relative path markers. When multiple basedirs are provided, /// the longest matching prefix is used. This is similar to ccache's CCACHE_BASEDIR. /// -/// On Windows, this function handles paths with mixed forward and backward slashes, -/// which can occur when different build tools produce preprocessor output. +/// Path matching is case-insensitive to handle various filesystem behaviors and build system +/// configurations uniformly across all operating systems. On Windows, this function also handles +/// paths with mixed forward and backward slashes, which can occur when different build tools +/// produce preprocessor output. /// /// Only paths that start with one of the basedirs are modified. The paths are expected to be /// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). @@ -1059,19 +1061,19 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec Vec { +fn normalize_path(path: &[u8]) -> Vec { path.iter() - .map(|&b| if b == b'\\' { b'/' } else { b }) + .map(|&b| match b { + b'A'..=b'Z' => b + 32, + b'\\' => b'/', + _ => b, + }) .collect() } #[cfg(not(target_os = "windows"))] -fn normalize_path_slashes(path: &[u8]) -> Vec { +fn normalize_path(path: &[u8]) -> Vec { path.to_vec() } @@ -1370,6 +1377,29 @@ mod tests { assert_eq!(output, expected); } + #[test] + fn test_strip_basedir_case_insensitive() { + use std::path::PathBuf; + + // Case insensitive matching - basedir in lowercase, input in uppercase + let basedir = PathBuf::from("/home/user/project"); + let input = b"# 1 \"/HOME/USER/PROJECT/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + let expected = b"# 1 \"./src/main.c\""; + assert_eq!(output, expected); + + // Mixed case in both + let input = b"# 1 \"/Home/User/Project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(output, expected); + + // Basedir in uppercase, input in lowercase + let basedir = PathBuf::from("/HOME/USER/PROJECT"); + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); + assert_eq!(output, expected); + } + #[cfg(target_os = "windows")] #[test] fn test_strip_basedir_windows_backslashes() { From d01046f13b33ed03f8436f460ee0bfe7eab88131 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 24 Dec 2025 00:04:36 +0100 Subject: [PATCH 09/31] Add trace logs to strip_basedirs, fix remainings --- src/compiler/c.rs | 1 + src/compiler/preprocessor_cache.rs | 99 +++++++++++++++++++++++++++++- src/util.rs | 10 +++ 3 files changed, 109 insertions(+), 1 deletion(-) diff --git a/src/compiler/c.rs b/src/compiler/c.rs index c0227f56f..a37a18064 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -452,6 +452,7 @@ where &absolute_input_path, self.compiler.plusplus(), preprocessor_cache_mode_config, + storage.basedirs(), )? } else { None diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index d03cd6e18..788fe662c 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -381,6 +381,7 @@ pub fn preprocessor_cache_entry_hash_key( input_file: &Path, plusplus: bool, config: PreprocessorCacheModeConfig, + basedirs: &[PathBuf], ) -> anyhow::Result> { // If you change any of the inputs to the hash, you should change `FORMAT_VERSION`. let mut m = Digest::new(); @@ -414,7 +415,15 @@ pub fn preprocessor_cache_entry_hash_key( // share preprocessor cache entries and a/r.h exists. let mut buf = vec![]; encode_path(&mut buf, input_file)?; - m.update(&buf); + + // Strip basedirs from the input file path if configured + let buf_to_hash = if !basedirs.is_empty() { + use crate::util::strip_basedirs; + strip_basedirs(&buf, basedirs) + } else { + buf.clone() + }; + m.update(&buf_to_hash); let reader = std::fs::File::open(input_file) .with_context(|| format!("while hashing the input file '{}'", input_file.display()))?; @@ -634,4 +643,92 @@ mod test { assert!(!finder.found_timestamp()); assert!(!finder.found_date()); } + + #[test] + fn test_preprocessor_cache_entry_hash_key_basedirs() { + use std::fs; + use tempfile::TempDir; + + // Create two different base directories + let dir1 = TempDir::new().unwrap(); + let dir2 = TempDir::new().unwrap(); + + // Create identical files with the same relative path in each directory + let file1_path = dir1.path().join("test.c"); + let file2_path = dir2.path().join("test.c"); + + let content = b"int main() { return 0; }"; + fs::write(&file1_path, content).unwrap(); + fs::write(&file2_path, content).unwrap(); + + let config = PreprocessorCacheModeConfig::activated(); + + // Test 1: With basedirs, hashes should be the same + let hash1_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &[dir1.path().to_path_buf(), dir2.path().to_path_buf()], + ) + .unwrap() + .unwrap(); + + let hash2_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &[dir1.path().to_path_buf(), dir2.path().to_path_buf()], + ) + .unwrap() + .unwrap(); + + assert_eq!( + hash1_with_basedirs, hash2_with_basedirs, + "Hashes should be equal when using basedirs with identical files in different directories" + ); + + // Test 2: Without basedirs, hashes should be different + let hash1_no_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &[], + ) + .unwrap() + .unwrap(); + + let hash2_no_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &[], + ) + .unwrap() + .unwrap(); + + assert_ne!( + hash1_no_basedirs, hash2_no_basedirs, + "Hashes should be different without basedirs for files in different directories" + ); + } } diff --git a/src/util.rs b/src/util.rs index 0f04e6126..bbd8a8966 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1051,6 +1051,12 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec Date: Wed, 24 Dec 2025 13:35:36 +0100 Subject: [PATCH 10/31] Remove outdated test --- src/util.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/util.rs b/src/util.rs index bbd8a8966..134ec0686 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1387,29 +1387,6 @@ mod tests { assert_eq!(output, expected); } - #[test] - fn test_strip_basedir_case_insensitive() { - use std::path::PathBuf; - - // Case insensitive matching - basedir in lowercase, input in uppercase - let basedir = PathBuf::from("/home/user/project"); - let input = b"# 1 \"/HOME/USER/PROJECT/src/main.c\""; - let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \"./src/main.c\""; - assert_eq!(output, expected); - - // Mixed case in both - let input = b"# 1 \"/Home/User/Project/src/main.c\""; - let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - assert_eq!(output, expected); - - // Basedir in uppercase, input in lowercase - let basedir = PathBuf::from("/HOME/USER/PROJECT"); - let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - assert_eq!(output, expected); - } - #[cfg(target_os = "windows")] #[test] fn test_strip_basedir_windows_backslashes() { From ba461c8ebe84dd05d718805f7093aa3aff340108 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Sun, 28 Dec 2025 19:40:30 +0100 Subject: [PATCH 11/31] Optimize strip_basedirs for big input --- src/util.rs | 119 +++++++++++++++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 52 deletions(-) diff --git a/src/util.rs b/src/util.rs index 134ec0686..b2dfc9e30 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1034,79 +1034,94 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec = basedirs + let basedirs_data: Vec<_> = basedirs .iter() .map(|basedir| { - let normalized = basedir.to_string_lossy(); - let trimmed = normalized.trim_end_matches('/').trim_end_matches('\\'); - (trimmed.as_bytes().to_vec(), trimmed.len()) + let basedir_str = basedir.to_string_lossy(); + let trimmed = basedir_str.trim_end_matches('/').trim_end_matches('\\'); + let normalized = normalize_path(trimmed.as_bytes()); + (trimmed.as_bytes().to_vec(), normalized, trimmed.len()) }) - .filter(|(bytes, _)| !bytes.is_empty()) + .filter(|(bytes, _, _)| !bytes.is_empty()) .collect(); if basedirs_data.is_empty() { return preprocessor_output.to_vec(); } - // Sort by length descending (longest first) - basedirs_data.sort_by(|a, b| b.1.cmp(&a.1)); - trace!( "Stripping basedirs from preprocessor output with length {}: {:?}", preprocessor_output.len(), basedirs ); - let mut result = Vec::with_capacity(preprocessor_output.len()); - let mut i = 0; - - while i < preprocessor_output.len() { - let mut matched = false; - - // Try to match each basedir (longest first) - for (basedir_bytes, basedir_len) in &basedirs_data { - // Check if we have a match for this basedir at current position - if i + basedir_len <= preprocessor_output.len() { - let candidate = &preprocessor_output[i..i + basedir_len]; - - // Try exact match first - let exact_match = candidate == basedir_bytes; - - // Try case-insensitive match - let normalized_match = if !exact_match { - normalize_path(candidate) == normalize_path(basedir_bytes) - } else { - false - }; - - if exact_match || normalized_match { - // Check if this is actually a path boundary (preceded by whitespace, quote, or start) - let is_boundary = i == 0 - || preprocessor_output[i - 1].is_ascii_whitespace() - || preprocessor_output[i - 1] == b'"' - || preprocessor_output[i - 1] == b'<'; - - if is_boundary { - // Replace basedir with "." - result.push(b'.'); - i += basedir_len; - matched = true; - trace!( - "Stripped basedir: {}", - String::from_utf8_lossy(basedir_bytes) - ); - break; - } - } + // Find all potential matches for each basedir using fast substring search + // Store as (position, basedir_index, length) sorted by position + let mut matches: Vec<(usize, usize, usize)> = Vec::new(); + #[cfg(target_os = "windows")] + let preprocessor_output_normalized = normalize_path(preprocessor_output); + + for (basedir_idx, basedir) in basedirs_data.iter().enumerate() { + let basedir_len = basedir.2; + // Use memchr's fast substring search + // Case sensitive + #[cfg(not(target_os = "windows"))] + let finder = memchr::memmem::find_iter(preprocessor_output, &basedir.0); + // Case insensitive + #[cfg(target_os = "windows")] + let finder = memchr::memmem::find_iter(&preprocessor_output_normalized, &basedir.1); + + for pos in finder { + // Check if this is a valid boundary (start, whitespace, quote, or '<') + let is_boundary = pos == 0 + || preprocessor_output[pos - 1].is_ascii_whitespace() + || preprocessor_output[pos - 1] == b'"' + || preprocessor_output[pos - 1] == b'<'; + + if is_boundary { + matches.push((pos, basedir_idx, basedir_len)); } } + } + + if matches.is_empty() { + return preprocessor_output.to_vec(); + } - if !matched { - result.push(preprocessor_output[i]); - i += 1; + // Sort matches by position, then by length descending (longest first for overlaps) + matches.sort_by(|a, b| a.0.cmp(&b.0).then(b.2.cmp(&a.2))); + + // Remove overlapping matches, keeping the longest match at each position + let mut filtered_matches: Vec<(usize, usize)> = Vec::new(); + let mut last_end = 0; + + for (pos, _basedir_idx, len) in matches { + if pos >= last_end { + filtered_matches.push((pos, len)); + last_end = pos + len; + trace!( + "Matched basedir at position {} with length {}", + pos, + len + ); } } + // Build the result in a single pass + let mut result = Vec::with_capacity(preprocessor_output.len()); + let mut current_pos = 0; + + for (match_pos, match_len) in filtered_matches { + // Copy everything before the match + result.extend_from_slice(&preprocessor_output[current_pos..match_pos]); + // Replace the basedir with "." + result.push(b'.'); + current_pos = match_pos + match_len; + } + + // Copy remaining data + result.extend_from_slice(&preprocessor_output[current_pos..]); + result } From 6b4d944fc105470c9430cf667779eed005477d46 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 29 Dec 2025 18:11:47 +0100 Subject: [PATCH 12/31] Apply suggestions from code review Co-authored-by: whisperity --- docs/Configuration.md | 25 ++++++++++++++++--------- src/config.rs | 3 +-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/docs/Configuration.md b/docs/Configuration.md index 0da157ebf..585729df1 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -6,17 +6,24 @@ # If specified, wait this long for the server to start up. server_startup_timeout_ms = 10000 -# Base directory (or directories) to strip from paths for cache key computation. -# Similar to ccache's CCACHE_BASEDIR. This enables cache hits across -# different absolute paths when compiling the same source code. -# Can be an array of paths. When multiple paths are provided, -# the longest matching prefix is used. +# Base directories to strip from source paths during cache key +# computation. +# +# Similar to ccache's CCACHE_BASEDIR, but supports multiple paths. +# +# 'basedirs' enables cache hits across different absolute root +# paths when compiling the same source code, such as between +# parallel checkouts of the same project, Git worktrees, or different +# users in a shared environment. +# When multiple paths are provided, the longest matching prefix +# is applied. +# # Path matching is case-insensitive on Windows and case-sensitive on other OSes. -# For example, if basedir is "/home/user/project", then paths like -# "/home/user/project/src/main.c" will be normalized to "./src/main.c" -# for caching purposes. +# +# Example: +# basedir = ["/home/user/project"] results in the path prefix rewrite: +# "/home/user/project/src/main.c" -> "./src/main.c" basedirs = ["/home/user/project"] -# Or multiple directories: # basedirs = ["/home/user/project", "/home/user/workspace"] [dist] diff --git a/src/config.rs b/src/config.rs index ea6ec12e2..ed4e0c432 100644 --- a/src/config.rs +++ b/src/config.rs @@ -584,8 +584,7 @@ pub struct FileConfig { pub cache: CacheConfigs, pub dist: DistConfig, pub server_startup_timeout_ms: Option, - /// Base directory (or directories) to strip from paths for cache key computation. - /// Can be a single path or an array of paths. + /// Base directories to strip from paths for cache key computation. pub basedirs: Vec, } From 3f8bd2d3dce78bc4d76252918f2da56daaf7e5ac Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 29 Dec 2025 15:53:33 +0100 Subject: [PATCH 13/31] Move basedir validation to config building --- src/cache/cache.rs | 23 +---------------------- src/config.rs | 33 +++++++++++++++++++++++---------- src/util.rs | 6 +----- 3 files changed, 25 insertions(+), 37 deletions(-) diff --git a/src/cache/cache.rs b/src/cache/cache.rs index eed89a460..bf5abe826 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -741,34 +741,13 @@ pub fn storage_from_config( let rw_mode = config.fallback_cache.rw_mode.into(); debug!("Init disk cache with dir {:?}, size {}", dir, size); - // Validate that all basedirs are absolute paths - let basedirs: Vec = config - .basedirs - .iter() - .filter_map(|p| { - if p.is_absolute() { - Some(p.clone()) - } else { - warn!( - "Ignoring relative basedir path: {:?}. Only absolute paths are supported.", - p - ); - None - } - }) - .collect(); - - if !basedirs.is_empty() { - debug!("Using basedirs for path normalization: {:?}", basedirs); - } - Ok(Arc::new(DiskCache::new( dir, size, pool, preprocessor_cache_mode_config, rw_mode, - basedirs, + config.basedirs.clone(), ))) } diff --git a/src/config.rs b/src/config.rs index ed4e0c432..68337daaf 100644 --- a/src/config.rs +++ b/src/config.rs @@ -26,7 +26,7 @@ use serde::{ use serial_test::serial; use std::env; use std::io::{Read, Write}; -use std::path::{Path, PathBuf}; +use std::path::{Path, PathBuf, absolute}; use std::result::Result as StdResult; use std::str::FromStr; use std::sync::{LazyLock, Mutex}; @@ -1007,10 +1007,10 @@ impl Config { .context("Failed to load config file")? .unwrap_or_default(); - Ok(Self::from_env_and_file_configs(env_conf, file_conf)) + Self::from_env_and_file_configs(env_conf, file_conf) } - fn from_env_and_file_configs(env_conf: EnvConfig, file_conf: FileConfig) -> Self { + fn from_env_and_file_configs(env_conf: EnvConfig, file_conf: FileConfig) -> Result { let mut conf_caches: CacheConfigs = Default::default(); let FileConfig { @@ -1031,20 +1031,33 @@ impl Config { conf_caches.merge(cache); // Environment variable takes precedence over file config - let basedirs = if !env_basedirs.is_empty() { + let basedirs_raw = if !env_basedirs.is_empty() { env_basedirs } else { file_basedirs }; + // Validate that all basedirs are absolute paths + let mut basedirs = Vec::new(); + for p in basedirs_raw { + if !p.is_absolute() { + bail!("Basedir path must be absolute: {:?}", p); + } + basedirs.push(absolute(&p)?); + } + + if !basedirs.is_empty() { + debug!("Using basedirs for path normalization: {:?}", basedirs); + } + let (caches, fallback_cache) = conf_caches.into_fallback(); - Self { + Ok(Self { cache: caches, fallback_cache, dist, server_startup_timeout, basedirs, - } + }) } } @@ -1348,7 +1361,7 @@ fn config_overrides() { }; assert_eq!( - Config::from_env_and_file_configs(env_conf, file_conf), + Config::from_env_and_file_configs(env_conf, file_conf).unwrap(), Config { cache: Some(CacheType::Redis(RedisCacheConfig { endpoint: Some("myotherredisurl".to_owned()), @@ -1389,7 +1402,7 @@ fn config_basedirs_overrides() { basedirs: vec![PathBuf::from("/file/basedir")], }; - let config = Config::from_env_and_file_configs(env_conf, file_conf); + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); assert_eq!(config.basedirs, vec![PathBuf::from("/env/basedir")]); // Test that file config is used when env is empty @@ -1405,7 +1418,7 @@ fn config_basedirs_overrides() { basedirs: vec![PathBuf::from("/file/basedir")], }; - let config = Config::from_env_and_file_configs(env_conf, file_conf); + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); assert_eq!(config.basedirs, vec![PathBuf::from("/file/basedir")]); // Test that both empty results in empty @@ -1421,7 +1434,7 @@ fn config_basedirs_overrides() { basedirs: vec![], }; - let config = Config::from_env_and_file_configs(env_conf, file_conf); + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); assert_eq!(config.basedirs, Vec::::new()); } diff --git a/src/util.rs b/src/util.rs index b2dfc9e30..ebd753e6b 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1099,11 +1099,7 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec= last_end { filtered_matches.push((pos, len)); last_end = pos + len; - trace!( - "Matched basedir at position {} with length {}", - pos, - len - ); + trace!("Matched basedir at position {} with length {}", pos, len); } } From 962dcd5ffcf6d66f5ea93905badb5df2f4b90e84 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 29 Dec 2025 16:10:56 +0100 Subject: [PATCH 14/31] Add basedirs for remote cache implementations --- src/cache/cache.rs | 129 +++++++++++++++++++++++++++++++++++++----- src/cache/readonly.rs | 40 +++++++++++++ 2 files changed, 156 insertions(+), 13 deletions(-) diff --git a/src/cache/cache.rs b/src/cache/cache.rs index bf5abe826..8d9c0043a 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -457,6 +457,38 @@ impl PreprocessorCacheModeConfig { } } +/// Wrapper for opendal::Operator that adds basedirs support +#[cfg(any( + feature = "azure", + feature = "gcs", + feature = "gha", + feature = "memcached", + feature = "redis", + feature = "s3", + feature = "webdav", + feature = "oss", +))] +pub struct RemoteStorage { + operator: opendal::Operator, + basedirs: Vec, +} + +#[cfg(any( + feature = "azure", + feature = "gcs", + feature = "gha", + feature = "memcached", + feature = "redis", + feature = "s3", + feature = "webdav", + feature = "oss", +))] +impl RemoteStorage { + pub fn new(operator: opendal::Operator, basedirs: Vec) -> Self { + Self { operator, basedirs } + } +} + /// Implement storage for operator. #[cfg(any( feature = "azure", @@ -466,11 +498,12 @@ impl PreprocessorCacheModeConfig { feature = "redis", feature = "s3", feature = "webdav", + feature = "oss", ))] #[async_trait] -impl Storage for opendal::Operator { +impl Storage for RemoteStorage { async fn get(&self, key: &str) -> Result { - match self.read(&normalize_key(key)).await { + match self.operator.read(&normalize_key(key)).await { Ok(res) => { let hit = CacheRead::from(io::Cursor::new(res.to_bytes()))?; Ok(Cache::Hit(hit)) @@ -486,7 +519,9 @@ impl Storage for opendal::Operator { async fn put(&self, key: &str, entry: CacheWrite) -> Result { let start = std::time::Instant::now(); - self.write(&normalize_key(key), entry.finish()?).await?; + self.operator + .write(&normalize_key(key), entry.finish()?) + .await?; Ok(start.elapsed()) } @@ -497,7 +532,7 @@ impl Storage for opendal::Operator { let path = ".sccache_check"; // Read is required, return error directly if we can't read . - match self.read(path).await { + match self.operator.read(path).await { Ok(_) => (), // Read not exist file with not found is ok. Err(err) if err.kind() == ErrorKind::NotFound => (), @@ -516,7 +551,7 @@ impl Storage for opendal::Operator { Err(err) => bail!("cache storage failed to read: {:?}", err), } - let can_write = match self.write(path, "Hello, World!").await { + let can_write = match self.operator.write(path, "Hello, World!").await { Ok(_) => true, Err(err) if err.kind() == ErrorKind::AlreadyExists => true, // Tolerate all other write errors because we can do read at least. @@ -538,7 +573,7 @@ impl Storage for opendal::Operator { } fn location(&self) -> String { - let meta = self.info(); + let meta = self.operator.info(); format!( "{}, name: {}, prefix: {}", meta.scheme(), @@ -554,6 +589,10 @@ impl Storage for opendal::Operator { async fn max_size(&self) -> Result> { Ok(None) } + + fn basedirs(&self) -> &[PathBuf] { + &self.basedirs + } } /// Normalize key `abcdef` into `a/b/c/abcdef` @@ -576,8 +615,9 @@ pub fn storage_from_config( key_prefix, }) => { debug!("Init azure cache with container {container}, key_prefix {key_prefix}"); - let storage = AzureBlobCache::build(connection_string, container, key_prefix) + let operator = AzureBlobCache::build(connection_string, container, key_prefix) .map_err(|err| anyhow!("create azure cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "gcs")] @@ -591,7 +631,7 @@ pub fn storage_from_config( }) => { debug!("Init gcs cache with bucket {bucket}, key_prefix {key_prefix}"); - let storage = GCSCache::build( + let operator = GCSCache::build( bucket, key_prefix, cred_path.as_deref(), @@ -601,14 +641,16 @@ pub fn storage_from_config( ) .map_err(|err| anyhow!("create gcs cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "gha")] CacheType::GHA(config::GHACacheConfig { version, .. }) => { debug!("Init gha cache with version {version}"); - let storage = GHACache::build(version) + let operator = GHACache::build(version) .map_err(|err| anyhow!("create gha cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "memcached")] @@ -621,7 +663,7 @@ pub fn storage_from_config( }) => { debug!("Init memcached cache with url {url}"); - let storage = MemcachedCache::build( + let operator = MemcachedCache::build( url, username.as_deref(), password.as_deref(), @@ -629,6 +671,7 @@ pub fn storage_from_config( *expiration, ) .map_err(|err| anyhow!("create memcached cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "redis")] @@ -676,6 +719,7 @@ pub fn storage_from_config( _ => bail!("Only one of `endpoint`, `cluster_endpoints`, `url` must be set"), } .map_err(|err| anyhow!("create redis cache failed: {err:?}"))?; + let storage = RemoteStorage::new(storage, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "s3")] @@ -686,7 +730,7 @@ pub fn storage_from_config( ); let storage_builder = S3Cache::new(c.bucket.clone(), c.key_prefix.clone(), c.no_credentials); - let storage = storage_builder + let operator = storage_builder .with_region(c.region.clone()) .with_endpoint(c.endpoint.clone()) .with_use_ssl(c.use_ssl) @@ -695,13 +739,14 @@ pub fn storage_from_config( .build() .map_err(|err| anyhow!("create s3 cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "webdav")] CacheType::Webdav(c) => { debug!("Init webdav cache with endpoint {}", c.endpoint); - let storage = WebdavCache::build( + let operator = WebdavCache::build( &c.endpoint, &c.key_prefix, c.username.as_deref(), @@ -710,6 +755,7 @@ pub fn storage_from_config( ) .map_err(|err| anyhow!("create webdav cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[cfg(feature = "oss")] @@ -719,7 +765,7 @@ pub fn storage_from_config( c.bucket, c.endpoint ); - let storage = OSSCache::build( + let operator = OSSCache::build( &c.bucket, &c.key_prefix, c.endpoint.as_deref(), @@ -727,6 +773,7 @@ pub fn storage_from_config( ) .map_err(|err| anyhow!("create oss cache failed: {err:?}"))?; + let storage = RemoteStorage::new(operator, config.basedirs.clone()); return Ok(Arc::new(storage)); } #[allow(unreachable_patterns)] @@ -829,4 +876,60 @@ mod test { }); } } + + #[test] + #[cfg(feature = "s3")] + fn test_operator_storage_s3_with_basedirs() { + use std::path::PathBuf; + + // Create S3 operator (doesn't need real credentials for this test) + let operator = crate::cache::s3::S3Cache::new( + "test-bucket".to_string(), + "test-prefix".to_string(), + true, // no_credentials = true + ) + .with_region(Some("us-east-1".to_string())) + .build() + .expect("Failed to create S3 cache operator"); + + let basedirs = vec![ + PathBuf::from("/home/user/project"), + PathBuf::from("/opt/build"), + ]; + + // Wrap with OperatorStorage + let storage = RemoteStorage::new(operator, basedirs.clone()); + + // Verify basedirs are stored and retrieved correctly + assert_eq!(storage.basedirs(), basedirs.as_slice()); + assert_eq!(storage.basedirs().len(), 2); + assert_eq!(storage.basedirs()[0], PathBuf::from("/home/user/project")); + assert_eq!(storage.basedirs()[1], PathBuf::from("/opt/build")); + } + + #[test] + #[cfg(feature = "redis")] + fn test_operator_storage_redis_with_basedirs() { + use std::path::PathBuf; + + // Create Redis operator + let operator = crate::cache::redis::RedisCache::build_single( + "redis://localhost:6379", + None, + None, + 0, + "test-prefix", + 0, + ) + .expect("Failed to create Redis cache operator"); + + let basedirs = vec![PathBuf::from("/workspace")]; + + // Wrap with OperatorStorage + let storage = RemoteStorage::new(operator, basedirs.clone()); + + // Verify basedirs work + assert_eq!(storage.basedirs(), basedirs.as_slice()); + assert_eq!(storage.basedirs().len(), 1); + } } diff --git a/src/cache/readonly.rs b/src/cache/readonly.rs index 90431c4fb..eef075c9b 100644 --- a/src/cache/readonly.rs +++ b/src/cache/readonly.rs @@ -10,6 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; @@ -64,6 +65,11 @@ impl Storage for ReadOnlyStorage { self.0.preprocessor_cache_mode_config() } + /// Return the base directories for path normalization if configured + fn basedirs(&self) -> &[PathBuf] { + self.0.basedirs() + } + /// Return the preprocessor cache entry for a given preprocessor key, /// if it exists. /// Only applicable when using preprocessor cache mode. @@ -121,6 +127,40 @@ mod test { ); } + #[test] + fn readonly_storage_forwards_basedirs() { + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .worker_threads(1) + .build() + .unwrap(); + + let tempdir = tempfile::Builder::new() + .prefix("sccache_test_readonly_basedirs") + .tempdir() + .expect("Failed to create tempdir"); + let cache_dir = tempdir.path().join("cache"); + std::fs::create_dir(&cache_dir).unwrap(); + + let basedirs = vec![ + std::path::PathBuf::from("/home/user/project"), + std::path::PathBuf::from("/home/user/workspace"), + ]; + + let disk_cache = crate::cache::disk::DiskCache::new( + &cache_dir, + 1024 * 1024, + runtime.handle(), + super::super::PreprocessorCacheModeConfig::default(), + super::super::CacheMode::ReadWrite, + basedirs.clone(), + ); + + let readonly_storage = ReadOnlyStorage(std::sync::Arc::new(disk_cache)); + + assert_eq!(readonly_storage.basedirs(), basedirs.as_slice()); + } + #[test] fn readonly_storage_put_err() { let runtime = tokio::runtime::Builder::new_current_thread() From 86c26c5335bfdfd77b256cda04337574a4fe4e14 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 29 Dec 2025 18:39:05 +0100 Subject: [PATCH 15/31] Address review point for directories separator --- README.md | 6 +- docs/Configuration.md | 4 +- src/config.rs | 172 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 172 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a323ed604..2ca2721c6 100644 --- a/README.md +++ b/README.md @@ -329,10 +329,10 @@ By default, sccache requires absolute paths to match for cache hits. To enable c export SCCACHE_BASEDIRS=/home/user/project ``` -You can also specify multiple base directories by separating them with `|` (pipe character). When multiple directories are provided, the longest matching prefix is used: +You can also specify multiple base directories by separating them by `;` on Windows hosts and by `:` on any other. When multiple directories are provided, the longest matching prefix is used: ```bash -export SCCACHE_BASEDIRS="/home/user/project|/home/user/workspace" +export SCCACHE_BASEDIRS="/home/user/project:/home/user/workspace" ``` Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. @@ -343,7 +343,7 @@ This is similar to ccache's `CCACHE_BASEDIR` and helps when: * Multiple developers working with different username paths * Working with multiple project checkouts simultaneously -**Note:** Only absolute paths are supported. Relative paths will be ignored with a warning. +**Note:** Only absolute paths are supported. Relative paths will prevent server from start. You can also configure this in the sccache config file: diff --git a/docs/Configuration.md b/docs/Configuration.md index 585729df1..8df0eb405 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -17,7 +17,7 @@ server_startup_timeout_ms = 10000 # users in a shared environment. # When multiple paths are provided, the longest matching prefix # is applied. -# +# # Path matching is case-insensitive on Windows and case-sensitive on other OSes. # # Example: @@ -154,7 +154,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `|` (pipe character). When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `;` on Windows hosts and by `:` on any other. When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/config.rs b/src/config.rs index 68337daaf..4e475ca9d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -950,11 +950,17 @@ fn config_from_env() -> Result { }; // ======= Base directory ======= - // Support multiple paths separated by '|' (a character forbidden in paths) + // Support multiple paths separated by ';' on Windows and ':' on other platforms + // to match PATH behavior. + let split_symbol = if cfg!(target_os = "windows") { + ';' + } else { + ':' + }; let basedirs = env::var_os("SCCACHE_BASEDIRS") .map(|s| { s.to_string_lossy() - .split('|') + .split(split_symbol) .map(|p| PathBuf::from(p.trim())) .filter(|p| !p.as_os_str().is_empty()) .collect() @@ -1386,6 +1392,61 @@ fn config_overrides() { } #[test] +#[cfg(target_os = "windows")] +fn config_basedirs_overrides() { + use std::path::PathBuf; + + // Test that env variable takes precedence over file config + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![PathBuf::from("C:/env/basedir")], + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![PathBuf::from("C:/file/basedir")], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs, vec![PathBuf::from("C:\\env\\basedir")]); + + // Test that file config is used when env is empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![], + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![PathBuf::from("C:/file/basedir")], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs, vec![PathBuf::from("C:\\file\\basedir")]); + + // Test that both empty results in empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![], + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs, Vec::::new()); +} + +#[test] +#[cfg(not(target_os = "windows"))] fn config_basedirs_overrides() { use std::path::PathBuf; @@ -1439,6 +1500,7 @@ fn config_basedirs_overrides() { } #[test] +#[cfg(not(target_os = "windows"))] fn test_deserialize_basedirs() { use std::path::PathBuf; @@ -1482,6 +1544,7 @@ fn test_deserialize_basedirs_missing() { #[test] #[serial] +#[cfg(not(target_os = "windows"))] fn test_env_basedirs_single() { use std::path::PathBuf; @@ -1497,13 +1560,33 @@ fn test_env_basedirs_single() { #[test] #[serial] +#[cfg(target_os = "windows")] +fn test_env_basedirs_single() { + use std::path::PathBuf; + + unsafe { + std::env::set_var("SCCACHE_BASEDIRS", "C:/home/user/project"); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedirs, + vec![PathBuf::from("C:\\home\\user\\project")] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } +} + +#[test] +#[serial] +#[cfg(not(target_os = "windows"))] fn test_env_basedirs_multiple() { use std::path::PathBuf; unsafe { std::env::set_var( "SCCACHE_BASEDIRS", - "/home/user/project|/home/user/workspace", + "/home/user/project:/home/user/workspace", ); } let config = config_from_env().unwrap(); @@ -1521,6 +1604,32 @@ fn test_env_basedirs_multiple() { #[test] #[serial] +#[cfg(target_os = "windows")] +fn test_env_basedirs_multiple() { + use std::path::PathBuf; + + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + "C:/home/user/project;C:/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedirs, + vec![ + PathBuf::from("C:\\home\\user\\project"), + PathBuf::from("C:\\home\\user\\workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } +} + +#[test] +#[serial] +#[cfg(not(target_os = "windows"))] fn test_env_basedirs_with_spaces() { use std::path::PathBuf; @@ -1528,7 +1637,7 @@ fn test_env_basedirs_with_spaces() { unsafe { std::env::set_var( "SCCACHE_BASEDIRS", - " /home/user/project | /home/user/workspace ", + " /home/user/project : /home/user/workspace ", ); } let config = config_from_env().unwrap(); @@ -1546,6 +1655,33 @@ fn test_env_basedirs_with_spaces() { #[test] #[serial] +#[cfg(target_os = "windows")] +fn test_env_basedirs_with_spaces() { + use std::path::PathBuf; + + // Test that spaces around paths are trimmed + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + " C:/home/user/project ; C:/home/user/workspace ", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedirs, + vec![ + PathBuf::from("C:\\home\\user\\project"), + PathBuf::from("C:\\home\\user\\workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } +} + +#[test] +#[serial] +#[cfg(not(target_os = "windows"))] fn test_env_basedirs_empty_entries() { use std::path::PathBuf; @@ -1553,7 +1689,7 @@ fn test_env_basedirs_empty_entries() { unsafe { std::env::set_var( "SCCACHE_BASEDIRS", - "/home/user/project||/home/user/workspace", + "/home/user/project::/home/user/workspace", ); } let config = config_from_env().unwrap(); @@ -1569,6 +1705,32 @@ fn test_env_basedirs_empty_entries() { } } +#[test] +#[serial] +#[cfg(target_os = "windows")] +fn test_env_basedirs_empty_entries() { + use std::path::PathBuf; + + // Test that empty entries are filtered out + unsafe { + std::env::set_var( + "SCCACHE_BASEDIRS", + "c:/home/user/project;;c:/home/user/workspace", + ); + } + let config = config_from_env().unwrap(); + assert_eq!( + config.basedirs, + vec![ + PathBuf::from("c:\\home\\user\\project"), + PathBuf::from("c:\\home\\user\\workspace") + ] + ); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } +} + #[test] #[serial] fn test_env_basedirs_not_set() { From 82a929e83e49bf474c2841a7ab79860b3c11ea5d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 29 Dec 2025 19:48:11 +0100 Subject: [PATCH 16/31] Simplify strip_basedirs a bit more --- src/util.rs | 66 +++++++++++++++++++++-------------------------------- 1 file changed, 26 insertions(+), 40 deletions(-) diff --git a/src/util.rs b/src/util.rs index ebd753e6b..0da208d6e 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1033,22 +1033,6 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec = basedirs - .iter() - .map(|basedir| { - let basedir_str = basedir.to_string_lossy(); - let trimmed = basedir_str.trim_end_matches('/').trim_end_matches('\\'); - let normalized = normalize_path(trimmed.as_bytes()); - (trimmed.as_bytes().to_vec(), normalized, trimmed.len()) - }) - .filter(|(bytes, _, _)| !bytes.is_empty()) - .collect(); - - if basedirs_data.is_empty() { - return preprocessor_output.to_vec(); - } - trace!( "Stripping basedirs from preprocessor output with length {}: {:?}", preprocessor_output.len(), @@ -1056,20 +1040,22 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec = Vec::new(); + // Store as (position, length) sorted by position + let mut matches: Vec<(usize, usize)> = Vec::new(); #[cfg(target_os = "windows")] - let preprocessor_output_normalized = normalize_path(preprocessor_output); - - for (basedir_idx, basedir) in basedirs_data.iter().enumerate() { - let basedir_len = basedir.2; - // Use memchr's fast substring search - // Case sensitive - #[cfg(not(target_os = "windows"))] - let finder = memchr::memmem::find_iter(preprocessor_output, &basedir.0); - // Case insensitive + let preprocessor_output = &normalize_path(preprocessor_output); + + for basedir_path in basedirs.iter() { + let basedir_str = basedir_path.to_string_lossy(); + let basedir = basedir_str + .trim_end_matches('/') + .trim_end_matches('\\') + .as_bytes(); #[cfg(target_os = "windows")] - let finder = memchr::memmem::find_iter(&preprocessor_output_normalized, &basedir.1); + // Case insensitive + let basedir = normalize_path(basedir); + // Use memchr's fast substring search + let finder = memchr::memmem::find_iter(preprocessor_output, &basedir); for pos in finder { // Check if this is a valid boundary (start, whitespace, quote, or '<') @@ -1079,7 +1065,7 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec = Vec::new(); let mut last_end = 0; - for (pos, _basedir_idx, len) in matches { + for (pos, len) in matches { if pos >= last_end { filtered_matches.push((pos, len)); last_end = pos + len; @@ -1124,6 +1110,10 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec { path.iter() @@ -1135,11 +1125,6 @@ fn normalize_path(path: &[u8]) -> Vec { .collect() } -#[cfg(not(target_os = "windows"))] -fn normalize_path(path: &[u8]) -> Vec { - path.to_vec() -} - #[cfg(test)] mod tests { use super::{OsStrExt, TimeMacroFinder}; @@ -1407,14 +1392,15 @@ mod tests { let basedir = PathBuf::from("C:\\Users\\test\\project"); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \".\\src\\main.c\""; + // normalized backslash to slash + let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); // With multiple trailing backslashes let basedir = PathBuf::from("C:\\Users\\test\\project\\\\\\"); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \".\\src\\main.c\""; + let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); } @@ -1427,13 +1413,13 @@ mod tests { let basedir = PathBuf::from("C:\\Users\\test\\project"); let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \".\\src/main.c\""; + let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected, "Failed to strip mixed slash path"); // Also test the reverse case let input = b"# 1 \"C:\\Users/test/project/src\\main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \"./src\\main.c\""; + let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected, "Failed to strip reverse mixed slash path"); } } From 890e9af6fcf53cb2bb47d4663f5073ed482551e3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 30 Dec 2025 13:08:20 +0100 Subject: [PATCH 17/31] Final cleanup of the code for leftovers --- docs/Configuration.md | 2 +- src/cache/cache.rs | 4 ---- src/cache/readonly.rs | 8 ++++---- src/compiler/c.rs | 5 +---- src/compiler/preprocessor_cache.rs | 3 +-- src/config.rs | 9 ++++----- src/util.rs | 15 +-------------- 7 files changed, 12 insertions(+), 34 deletions(-) diff --git a/docs/Configuration.md b/docs/Configuration.md index 8df0eb405..61e7ba26c 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -154,7 +154,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `;` on Windows hosts and by `:` on any other. When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will be ignored with a warning. +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `;` on Windows hosts and by `:` on any other. When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will cause an error and prevent the server from start. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 8d9c0043a..5e99ea5fa 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -880,8 +880,6 @@ mod test { #[test] #[cfg(feature = "s3")] fn test_operator_storage_s3_with_basedirs() { - use std::path::PathBuf; - // Create S3 operator (doesn't need real credentials for this test) let operator = crate::cache::s3::S3Cache::new( "test-bucket".to_string(), @@ -910,8 +908,6 @@ mod test { #[test] #[cfg(feature = "redis")] fn test_operator_storage_redis_with_basedirs() { - use std::path::PathBuf; - // Create Redis operator let operator = crate::cache::redis::RedisCache::build_single( "redis://localhost:6379", diff --git a/src/cache/readonly.rs b/src/cache/readonly.rs index eef075c9b..840666351 100644 --- a/src/cache/readonly.rs +++ b/src/cache/readonly.rs @@ -143,16 +143,16 @@ mod test { std::fs::create_dir(&cache_dir).unwrap(); let basedirs = vec![ - std::path::PathBuf::from("/home/user/project"), - std::path::PathBuf::from("/home/user/workspace"), + PathBuf::from("/home/user/project"), + PathBuf::from("/home/user/workspace"), ]; let disk_cache = crate::cache::disk::DiskCache::new( &cache_dir, 1024 * 1024, runtime.handle(), - super::super::PreprocessorCacheModeConfig::default(), - super::super::CacheMode::ReadWrite, + super::PreprocessorCacheModeConfig::default(), + super::CacheMode::ReadWrite, basedirs.clone(), ); diff --git a/src/compiler/c.rs b/src/compiler/c.rs index a37a18064..df319bf6e 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -26,7 +26,7 @@ use crate::dist::pkg; use crate::mock_command::CommandCreatorSync; use crate::util::{ Digest, HashToDigest, MetadataCtimeExt, TimeMacroFinder, Timestamp, decode_path, encode_path, - hash_all, + hash_all, strip_basedirs, }; use async_trait::async_trait; use fs_err as fs; @@ -1505,7 +1505,6 @@ pub fn hash_key( // Strip basedirs from preprocessor output if configured let preprocessor_output_to_hash = if !basedirs.is_empty() { - use crate::util::strip_basedirs; Cow::Owned(strip_basedirs(preprocessor_output, basedirs)) } else { Cow::Borrowed(preprocessor_output) @@ -1824,8 +1823,6 @@ mod test { #[test] fn test_hash_key_basedirs() { - use std::path::PathBuf; - let args = ovec!["a", "b", "c"]; let digest = "abcd"; diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index 788fe662c..ebef18ef2 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -34,7 +34,7 @@ use serde::{Deserialize, Serialize}; use crate::{ cache::PreprocessorCacheModeConfig, - util::{Digest, HashToDigest, MetadataCtimeExt, Timestamp, encode_path}, + util::{Digest, HashToDigest, MetadataCtimeExt, Timestamp, encode_path, strip_basedirs}, }; use super::Language; @@ -418,7 +418,6 @@ pub fn preprocessor_cache_entry_hash_key( // Strip basedirs from the input file path if configured let buf_to_hash = if !basedirs.is_empty() { - use crate::util::strip_basedirs; strip_basedirs(&buf, basedirs) } else { buf.clone() diff --git a/src/config.rs b/src/config.rs index 4e475ca9d..f9a03847b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -952,11 +952,10 @@ fn config_from_env() -> Result { // ======= Base directory ======= // Support multiple paths separated by ';' on Windows and ':' on other platforms // to match PATH behavior. - let split_symbol = if cfg!(target_os = "windows") { - ';' - } else { - ':' - }; + #[cfg(target_os = "windows")] + let split_symbol = ';'; + #[cfg(not(target_os = "windows"))] + let split_symbol = ':'; let basedirs = env::var_os("SCCACHE_BASEDIRS") .map(|s| { s.to_string_lossy() diff --git a/src/util.rs b/src/util.rs index 0da208d6e..c75a9d177 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1129,6 +1129,7 @@ fn normalize_path(path: &[u8]) -> Vec { mod tests { use super::{OsStrExt, TimeMacroFinder}; use std::ffi::{OsStr, OsString}; + use std::path::PathBuf; #[test] fn simple_starts_with() { @@ -1280,8 +1281,6 @@ mod tests { #[test] fn test_strip_basedir_simple() { - use std::path::PathBuf; - // Simple cases let basedir = PathBuf::from("/home/user/project"); let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; @@ -1304,8 +1303,6 @@ mod tests { #[test] fn test_strip_basedir_empty() { - use std::path::PathBuf; - // Empty basedir slice let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, &[]); @@ -1320,8 +1317,6 @@ mod tests { #[test] fn test_strip_basedir_not_at_boundary() { - use std::path::PathBuf; - // basedir should only match at word boundaries let basedir = PathBuf::from("/home/user"); let input = b"text/home/user/file.c and \"/home/user/other.c\""; @@ -1333,8 +1328,6 @@ mod tests { #[test] fn test_strip_basedir_trailing_slashes() { - use std::path::PathBuf; - // Without trailing slash let basedir = PathBuf::from("/home/user/project"); let input = b"# 1 \"/home/user/project/src/main.c\""; @@ -1359,8 +1352,6 @@ mod tests { #[test] fn test_strip_basedirs_multiple() { - use std::path::PathBuf; - // Multiple basedirs - should match longest first let basedirs = vec![ PathBuf::from("/home/user1/project"), @@ -1386,8 +1377,6 @@ mod tests { #[cfg(target_os = "windows")] #[test] fn test_strip_basedir_windows_backslashes() { - use std::path::PathBuf; - // Without trailing backslash let basedir = PathBuf::from("C:\\Users\\test\\project"); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; @@ -1407,8 +1396,6 @@ mod tests { #[cfg(target_os = "windows")] #[test] fn test_strip_basedir_windows_mixed_slashes() { - use std::path::PathBuf; - // Mixed forward and backslashes in input (common from certain build systems) let basedir = PathBuf::from("C:\\Users\\test\\project"); let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; From 331212c27a3eaba27edfcca51fa107d16cdf73cb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 30 Dec 2025 14:39:56 +0100 Subject: [PATCH 18/31] Review: less reallocations --- src/config.rs | 2 +- src/util.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config.rs b/src/config.rs index f9a03847b..bcce98d8f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1043,7 +1043,7 @@ impl Config { }; // Validate that all basedirs are absolute paths - let mut basedirs = Vec::new(); + let mut basedirs = Vec::with_capacity(basedirs_raw.len()); for p in basedirs_raw { if !p.is_absolute() { bail!("Basedir path must be absolute: {:?}", p); diff --git a/src/util.rs b/src/util.rs index c75a9d177..a5f537d38 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1078,7 +1078,7 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec = Vec::new(); + let mut filtered_matches: Vec<(usize, usize)> = Vec::with_capacity(matches.len()); let mut last_end = 0; for (pos, len) in matches { From eb2bc122b996fbe63fd7b0a5018318e13f8699f3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 30 Dec 2025 14:40:51 +0100 Subject: [PATCH 19/31] Apply suggestions from code review Co-authored-by: Alex Overchenko --- src/util.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/util.rs b/src/util.rs index a5f537d38..b1df258c3 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1048,8 +1048,7 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec Vec { path.iter() .map(|&b| match b { - b'A'..=b'Z' => b + 32, + b'A'..=b'Z' => b + 'a' - 'A', b'\\' => b'/', _ => b, }) From 544e2b2d075d4dafe4fda4ab3399e65622124fcb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 30 Dec 2025 16:20:48 +0100 Subject: [PATCH 20/31] Move normalization to the config building, use bytes for basedirs --- src/cache/cache.rs | 19 +++--- src/cache/disk.rs | 6 +- src/cache/readonly.rs | 7 +-- src/compiler/c.rs | 16 +++--- src/compiler/preprocessor_cache.rs | 10 +++- src/config.rs | 92 ++++++++++++++++-------------- src/server.rs | 2 +- src/util.rs | 72 ++++++++++------------- 8 files changed, 109 insertions(+), 115 deletions(-) diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 5e99ea5fa..a0d6a3816 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -382,7 +382,7 @@ pub trait Storage: Send + Sync { PreprocessorCacheModeConfig::default() } /// Return the base directories for path normalization if configured - fn basedirs(&self) -> &[PathBuf] { + fn basedirs(&self) -> &[Vec] { &[] } /// Return the preprocessor cache entry for a given preprocessor key, @@ -470,7 +470,7 @@ impl PreprocessorCacheModeConfig { ))] pub struct RemoteStorage { operator: opendal::Operator, - basedirs: Vec, + basedirs: Vec>, } #[cfg(any( @@ -484,7 +484,7 @@ pub struct RemoteStorage { feature = "oss", ))] impl RemoteStorage { - pub fn new(operator: opendal::Operator, basedirs: Vec) -> Self { + pub fn new(operator: opendal::Operator, basedirs: Vec>) -> Self { Self { operator, basedirs } } } @@ -590,7 +590,7 @@ impl Storage for RemoteStorage { Ok(None) } - fn basedirs(&self) -> &[PathBuf] { + fn basedirs(&self) -> &[Vec] { &self.basedirs } } @@ -890,10 +890,7 @@ mod test { .build() .expect("Failed to create S3 cache operator"); - let basedirs = vec![ - PathBuf::from("/home/user/project"), - PathBuf::from("/opt/build"), - ]; + let basedirs = vec![b"/home/user/project".to_vec(), b"/opt/build".to_vec()]; // Wrap with OperatorStorage let storage = RemoteStorage::new(operator, basedirs.clone()); @@ -901,8 +898,8 @@ mod test { // Verify basedirs are stored and retrieved correctly assert_eq!(storage.basedirs(), basedirs.as_slice()); assert_eq!(storage.basedirs().len(), 2); - assert_eq!(storage.basedirs()[0], PathBuf::from("/home/user/project")); - assert_eq!(storage.basedirs()[1], PathBuf::from("/opt/build")); + assert_eq!(storage.basedirs()[0], b"/home/user/project".to_vec()); + assert_eq!(storage.basedirs()[1], b"/opt/build".to_vec()); } #[test] @@ -919,7 +916,7 @@ mod test { ) .expect("Failed to create Redis cache operator"); - let basedirs = vec![PathBuf::from("/workspace")]; + let basedirs = vec![b"/workspace".to_vec()]; // Wrap with OperatorStorage let storage = RemoteStorage::new(operator, basedirs.clone()); diff --git a/src/cache/disk.rs b/src/cache/disk.rs index 515c3f2ca..7e75100b1 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -74,7 +74,7 @@ pub struct DiskCache { preprocessor_cache_mode_config: PreprocessorCacheModeConfig, preprocessor_cache: Arc>, rw_mode: CacheMode, - basedirs: Vec, + basedirs: Vec>, } impl DiskCache { @@ -85,7 +85,7 @@ impl DiskCache { pool: &tokio::runtime::Handle, preprocessor_cache_mode_config: PreprocessorCacheModeConfig, rw_mode: CacheMode, - basedirs: Vec, + basedirs: Vec>, ) -> DiskCache { DiskCache { lru: Arc::new(Mutex::new(LazyDiskCache::Uninit { @@ -184,7 +184,7 @@ impl Storage for DiskCache { fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig { self.preprocessor_cache_mode_config } - fn basedirs(&self) -> &[PathBuf] { + fn basedirs(&self) -> &[Vec] { &self.basedirs } async fn get_preprocessor_cache_entry(&self, key: &str) -> Result>> { diff --git a/src/cache/readonly.rs b/src/cache/readonly.rs index 840666351..f57cc1faf 100644 --- a/src/cache/readonly.rs +++ b/src/cache/readonly.rs @@ -10,7 +10,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; @@ -66,7 +65,7 @@ impl Storage for ReadOnlyStorage { } /// Return the base directories for path normalization if configured - fn basedirs(&self) -> &[PathBuf] { + fn basedirs(&self) -> &[Vec] { self.0.basedirs() } @@ -143,8 +142,8 @@ mod test { std::fs::create_dir(&cache_dir).unwrap(); let basedirs = vec![ - PathBuf::from("/home/user/project"), - PathBuf::from("/home/user/workspace"), + b"/home/user/project".to_vec(), + b"/home/user/workspace".to_vec(), ]; let disk_cache = crate::cache::disk::DiskCache::new( diff --git a/src/compiler/c.rs b/src/compiler/c.rs index df319bf6e..d717269ed 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -1478,7 +1478,7 @@ pub fn hash_key( env_vars: &[(OsString, OsString)], preprocessor_output: &[u8], plusplus: bool, - basedirs: &[PathBuf], + basedirs: &[Vec], ) -> String { // If you change any of the inputs to the hash, you should change `CACHE_VERSION`. let mut m = Digest::new(); @@ -1831,8 +1831,8 @@ mod test { let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; let basedirs = [ - PathBuf::from("/home/user1/project"), - PathBuf::from("/home/user2/project"), + b"/home/user1/project".to_vec(), + b"/home/user2/project".to_vec(), ]; let h1 = hash_key( @@ -1911,8 +1911,8 @@ mod test { assert_eq!(h_cpp1, h_cpp2); - // Test 4: Works with trailing slashes - let basedir_slash = PathBuf::from("/home/user1/project/"); + // Test 4: Doesn't work with trailing slash in basedir, they must be normalized in config + let basedir_slash = b"/home/user1/project/".to_vec(); let h_slash = hash_key( digest, Language::C, @@ -1924,12 +1924,12 @@ mod test { std::slice::from_ref(&basedir_slash), ); - assert_eq!(h1, h_slash); + assert_neq!(h1, h_slash); // Test 5: Multiple basedirs - longest match wins let basedirs = vec![ - PathBuf::from("/home/user1"), - PathBuf::from("/home/user1/project"), // This should match (longest) + b"/home/user1".to_vec(), + b"/home/user1/project".to_vec(), // This should match (longest) ]; let h_multi = hash_key( digest, diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index ebef18ef2..a3a7139aa 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -381,7 +381,7 @@ pub fn preprocessor_cache_entry_hash_key( input_file: &Path, plusplus: bool, config: PreprocessorCacheModeConfig, - basedirs: &[PathBuf], + basedirs: &[Vec], ) -> anyhow::Result> { // If you change any of the inputs to the hash, you should change `FORMAT_VERSION`. let mut m = Digest::new(); @@ -651,6 +651,10 @@ mod test { // Create two different base directories let dir1 = TempDir::new().unwrap(); let dir2 = TempDir::new().unwrap(); + let dirs = vec![ + dir1.path().to_string_lossy().into_owned().into_bytes(), + dir2.path().to_string_lossy().into_owned().into_bytes(), + ]; // Create identical files with the same relative path in each directory let file1_path = dir1.path().join("test.c"); @@ -672,7 +676,7 @@ mod test { &file1_path, false, config, - &[dir1.path().to_path_buf(), dir2.path().to_path_buf()], + &dirs, ) .unwrap() .unwrap(); @@ -686,7 +690,7 @@ mod test { &file2_path, false, config, - &[dir1.path().to_path_buf(), dir2.path().to_path_buf()], + &dirs, ) .unwrap() .unwrap(); diff --git a/src/config.rs b/src/config.rs index bcce98d8f..b96e36190 100644 --- a/src/config.rs +++ b/src/config.rs @@ -13,6 +13,8 @@ // limitations under the License. use crate::cache::CacheMode; +#[cfg(target_os = "windows")] +use crate::util::normalize_win_path; use directories::ProjectDirs; use fs::File; use fs_err as fs; @@ -1000,7 +1002,7 @@ pub struct Config { pub server_startup_timeout: Option, /// Base directory (or directories) to strip from paths for cache key computation. /// Similar to ccache's CCACHE_BASEDIR. - pub basedirs: Vec, + pub basedirs: Vec>, } impl Config { @@ -1043,16 +1045,42 @@ impl Config { }; // Validate that all basedirs are absolute paths + // basedirs_raw is Vec let mut basedirs = Vec::with_capacity(basedirs_raw.len()); for p in basedirs_raw { if !p.is_absolute() { bail!("Basedir path must be absolute: {:?}", p); } - basedirs.push(absolute(&p)?); + // Normalize basedir: + // remove double separators, cur_dirs + let abs = absolute(&p)?; + let mut bytes = abs.to_string_lossy().into_owned().into_bytes(); + + // remove trailing slashes + while matches!(bytes.last(), Some(b'/' | b'\\')) { + bytes.pop(); + } + // normalize windows paths + let normalized = { + #[cfg(target_os = "windows")] + { + normalize_win_path(&bytes) + } + + #[cfg(not(target_os = "windows"))] + { + bytes + } + }; + basedirs.push(normalized); } if !basedirs.is_empty() { - debug!("Using basedirs for path normalization: {:?}", basedirs); + let basedirs_str: Vec = basedirs + .iter() + .map(|b| String::from_utf8_lossy(b).into_owned()) + .collect(); + debug!("Using basedirs for path normalization: {:?}", basedirs_str); } let (caches, fallback_cache) = conf_caches.into_fallback(); @@ -1393,8 +1421,6 @@ fn config_overrides() { #[test] #[cfg(target_os = "windows")] fn config_basedirs_overrides() { - use std::path::PathBuf; - // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), @@ -1409,7 +1435,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![PathBuf::from("C:\\env\\basedir")]); + assert_eq!(config.basedirs, vec![b"C:\\env\\basedir".to_vec()]); // Test that file config is used when env is empty let env_conf = EnvConfig { @@ -1425,7 +1451,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![PathBuf::from("C:\\file\\basedir")]); + assert_eq!(config.basedirs, vec![b"C:\\file\\basedir".to_vec()]); // Test that both empty results in empty let env_conf = EnvConfig { @@ -1441,14 +1467,12 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, Vec::::new()); + assert!(config.basedirs.is_empty()); } #[test] #[cfg(not(target_os = "windows"))] fn config_basedirs_overrides() { - use std::path::PathBuf; - // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), @@ -1463,7 +1487,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![PathBuf::from("/env/basedir")]); + assert_eq!(config.basedirs, vec![b"/env/basedir".to_vec()]); // Test that file config is used when env is empty let env_conf = EnvConfig { @@ -1479,7 +1503,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![PathBuf::from("/file/basedir")]); + assert_eq!(config.basedirs, vec![b"/file/basedir".to_vec()]); // Test that both empty results in empty let env_conf = EnvConfig { @@ -1495,14 +1519,12 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, Vec::::new()); + assert!(config.basedirs.is_empty()); } #[test] #[cfg(not(target_os = "windows"))] fn test_deserialize_basedirs() { - use std::path::PathBuf; - // Test array of paths let toml = r#" basedirs = ["/home/user/project", "/home/user/workspace"] @@ -1526,8 +1548,6 @@ fn test_deserialize_basedirs() { #[test] fn test_deserialize_basedirs_missing() { - use std::path::PathBuf; - // Test no basedirs specified (should default to empty vec) let toml = r#" [cache.disk] @@ -1538,15 +1558,13 @@ fn test_deserialize_basedirs_missing() { "#; let config: FileConfig = toml::from_str(toml).unwrap(); - assert_eq!(config.basedirs, Vec::::new()); + assert!(config.basedirs.is_empty()); } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_single() { - use std::path::PathBuf; - unsafe { std::env::set_var("SCCACHE_BASEDIRS", "/home/user/project"); } @@ -1558,11 +1576,9 @@ fn test_env_basedirs_single() { } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] #[cfg(target_os = "windows")] fn test_env_basedirs_single() { - use std::path::PathBuf; - unsafe { std::env::set_var("SCCACHE_BASEDIRS", "C:/home/user/project"); } @@ -1577,11 +1593,9 @@ fn test_env_basedirs_single() { } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_multiple() { - use std::path::PathBuf; - unsafe { std::env::set_var( "SCCACHE_BASEDIRS", @@ -1602,11 +1616,9 @@ fn test_env_basedirs_multiple() { } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] #[cfg(target_os = "windows")] fn test_env_basedirs_multiple() { - use std::path::PathBuf; - unsafe { std::env::set_var( "SCCACHE_BASEDIRS", @@ -1627,11 +1639,9 @@ fn test_env_basedirs_multiple() { } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_with_spaces() { - use std::path::PathBuf; - // Test that spaces around paths are trimmed unsafe { std::env::set_var( @@ -1653,11 +1663,9 @@ fn test_env_basedirs_with_spaces() { } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] #[cfg(target_os = "windows")] fn test_env_basedirs_with_spaces() { - use std::path::PathBuf; - // Test that spaces around paths are trimmed unsafe { std::env::set_var( @@ -1679,11 +1687,9 @@ fn test_env_basedirs_with_spaces() { } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_empty_entries() { - use std::path::PathBuf; - // Test that empty entries are filtered out unsafe { std::env::set_var( @@ -1705,11 +1711,9 @@ fn test_env_basedirs_empty_entries() { } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] #[cfg(target_os = "windows")] fn test_env_basedirs_empty_entries() { - use std::path::PathBuf; - // Test that empty entries are filtered out unsafe { std::env::set_var( @@ -1731,13 +1735,13 @@ fn test_env_basedirs_empty_entries() { } #[test] -#[serial] +#[serial(SCCACHE_BASEDIRS)] fn test_env_basedirs_not_set() { unsafe { std::env::remove_var("SCCACHE_BASEDIRS"); } let config = config_from_env().unwrap(); - assert_eq!(config.basedirs, Vec::::new()); + assert!(config.basedirs.is_empty()); } #[test] diff --git a/src/server.rs b/src/server.rs index 6561225be..2613d0903 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1944,7 +1944,7 @@ impl ServerInfo { basedirs = storage .basedirs() .iter() - .map(|p| p.to_string_lossy().to_string()) + .map(|p| String::from_utf8_lossy(p).to_string()) .collect(); } else { cache_location = String::new(); diff --git a/src/util.rs b/src/util.rs index b1df258c3..3ebb5e5dc 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1028,31 +1028,27 @@ pub fn num_cpus() -> usize { /// /// Only paths that start with one of the basedirs are modified. The paths are expected to be /// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). -pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec { +pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[Vec]) -> Vec { if basedirs.is_empty() || preprocessor_output.is_empty() { return preprocessor_output.to_vec(); } trace!( - "Stripping basedirs from preprocessor output with length {}: {:?}", + "Stripping basedirs from preprocessor output with length {}", preprocessor_output.len(), - basedirs ); // Find all potential matches for each basedir using fast substring search - // Store as (position, length) sorted by position - let mut matches: Vec<(usize, usize)> = Vec::new(); + // Store as (position, length, basedir_idx) sorted by position + let mut matches: Vec<(usize, usize, usize)> = Vec::new(); #[cfg(target_os = "windows")] - let preprocessor_output = &normalize_path(preprocessor_output); + let preprocessor_output = &normalize_win_path(preprocessor_output); - for basedir_path in basedirs.iter() { - let basedir_str = basedir_path.to_string_lossy(); - let basedir = basedir_str - .trim_end_matches(|c| c == '/' || c == '\\') - .as_bytes(); + for (idx, basedir_bytes) in basedirs.iter().enumerate() { + let basedir = basedir_bytes.as_slice(); #[cfg(target_os = "windows")] // Case insensitive - let basedir = normalize_path(basedir); + let basedir = normalize_win_path(basedir); // Use memchr's fast substring search let finder = memchr::memmem::find_iter(preprocessor_output, &basedir); @@ -1064,7 +1060,7 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec = Vec::with_capacity(matches.len()); let mut last_end = 0; - for (pos, len) in matches { + for (pos, len, idx) in matches { if pos >= last_end { filtered_matches.push((pos, len)); last_end = pos + len; - trace!("Matched basedir at position {} with length {}", pos, len); + trace!( + "Matched basedir {} at position {} with length {}", + String::from_utf8_lossy(&basedirs[idx]), + pos, + len + ); } } @@ -1114,10 +1115,10 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[PathBuf]) -> Vec Vec { +pub fn normalize_win_path(path: &[u8]) -> Vec { path.iter() .map(|&b| match b { - b'A'..=b'Z' => b + 'a' - 'A', + b'A'..=b'Z' => b + (b'a' - b'A'), b'\\' => b'/', _ => b, }) @@ -1128,7 +1129,6 @@ fn normalize_path(path: &[u8]) -> Vec { mod tests { use super::{OsStrExt, TimeMacroFinder}; use std::ffi::{OsStr, OsString}; - use std::path::PathBuf; #[test] fn simple_starts_with() { @@ -1281,7 +1281,7 @@ mod tests { #[test] fn test_strip_basedir_simple() { // Simple cases - let basedir = PathBuf::from("/home/user/project"); + let basedir = b"/home/user/project".to_vec(); let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\"\nint main() { return 0; }"; @@ -1308,7 +1308,7 @@ mod tests { assert_eq!(output, input); // Empty input - let basedir = PathBuf::from("/home/user/project"); + let basedir = b"/home/user/project".to_vec(); let input = b""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); assert_eq!(output, input); @@ -1317,7 +1317,7 @@ mod tests { #[test] fn test_strip_basedir_not_at_boundary() { // basedir should only match at word boundaries - let basedir = PathBuf::from("/home/user"); + let basedir = b"/home/user".to_vec(); let input = b"text/home/user/file.c and \"/home/user/other.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); // Should only replace the second occurrence (after quote) @@ -1328,24 +1328,17 @@ mod tests { #[test] fn test_strip_basedir_trailing_slashes() { // Without trailing slash - let basedir = PathBuf::from("/home/user/project"); + let basedir = b"/home/user/project".to_vec(); let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\""; assert_eq!(output, expected); - // With single trailing slash - let basedir = PathBuf::from("/home/user/project/"); + // Trailing slashes aren't ignored, they must be cleaned in config reader + let basedir = b"/home/user/project/".to_vec(); let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \"./src/main.c\""; - assert_eq!(output, expected); - - // With multiple trailing slashes - let basedir = PathBuf::from("/home/user/project////"); - let input = b"# 1 \"/home/user/project/src/main.c\""; - let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \"./src/main.c\""; + let expected = b"# 1 \".src/main.c\""; // Wrong, but expected assert_eq!(output, expected); } @@ -1353,8 +1346,8 @@ mod tests { fn test_strip_basedirs_multiple() { // Multiple basedirs - should match longest first let basedirs = vec![ - PathBuf::from("/home/user1/project"), - PathBuf::from("/home/user2/workspace"), + b"/home/user1/project".to_vec(), + b"/home/user2/workspace".to_vec(), ]; let input = b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; @@ -1363,10 +1356,7 @@ mod tests { assert_eq!(output, expected); // Longest prefix wins - let basedirs = vec![ - PathBuf::from("/home/user"), - PathBuf::from("/home/user/project"), // This should match first (longest) - ]; + let basedirs = vec![b"/home/user".to_vec(), b"/home/user/project".to_vec()]; let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, &basedirs); let expected = b"# 1 \"./src/main.c\""; @@ -1377,7 +1367,7 @@ mod tests { #[test] fn test_strip_basedir_windows_backslashes() { // Without trailing backslash - let basedir = PathBuf::from("C:\\Users\\test\\project"); + let basedir = b"C:\\Users\\test\\project".to_vec(); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); // normalized backslash to slash @@ -1385,7 +1375,7 @@ mod tests { assert_eq!(output, expected); // With multiple trailing backslashes - let basedir = PathBuf::from("C:\\Users\\test\\project\\\\\\"); + let basedir = b"C:\\Users\\test\\project\\\\\\".to_vec(); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\""; @@ -1396,7 +1386,7 @@ mod tests { #[test] fn test_strip_basedir_windows_mixed_slashes() { // Mixed forward and backslashes in input (common from certain build systems) - let basedir = PathBuf::from("C:\\Users\\test\\project"); + let basedir = b"C:\\Users\\test\\project".to_vec(); let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); let expected = b"# 1 \"./src/main.c\""; From faf77a43d994edd9ca5f6a389e48130771910e1d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 30 Dec 2025 22:45:12 +0100 Subject: [PATCH 21/31] Return unnormalized output, fix tests --- src/config.rs | 4 ++-- src/util.rs | 30 ++++++++++++++++++------------ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/config.rs b/src/config.rs index b96e36190..1e6c78c37 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1435,7 +1435,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![b"C:\\env\\basedir".to_vec()]); + assert_eq!(config.basedirs, vec![b"c:/env/basedir".to_vec()]); // Test that file config is used when env is empty let env_conf = EnvConfig { @@ -1451,7 +1451,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![b"C:\\file\\basedir".to_vec()]); + assert_eq!(config.basedirs, vec![b"c:/file/basedir".to_vec()]); // Test that both empty results in empty let env_conf = EnvConfig { diff --git a/src/util.rs b/src/util.rs index 3ebb5e5dc..40ed7756a 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1041,8 +1041,12 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[Vec]) -> Vec = Vec::new(); + // We must return the original preprocessor output on all platforms, + // so we only normalize a copy for searching. + #[cfg(not(target_os = "windows"))] + let normalized_output = preprocessor_output; #[cfg(target_os = "windows")] - let preprocessor_output = &normalize_win_path(preprocessor_output); + let normalized_output = &normalize_win_path(preprocessor_output); for (idx, basedir_bytes) in basedirs.iter().enumerate() { let basedir = basedir_bytes.as_slice(); @@ -1050,14 +1054,14 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[Vec]) -> Vec Date: Fri, 9 Jan 2026 16:06:06 +0100 Subject: [PATCH 22/31] Empty SCCACHE_BASEDIRS overrides, but unset does not --- src/config.rs | 111 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 31 deletions(-) diff --git a/src/config.rs b/src/config.rs index 1e6c78c37..6dd452825 100644 --- a/src/config.rs +++ b/src/config.rs @@ -625,7 +625,7 @@ pub fn try_read_config_file(path: &Path) -> Result, + basedirs: Option>, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -958,15 +958,13 @@ fn config_from_env() -> Result { let split_symbol = ';'; #[cfg(not(target_os = "windows"))] let split_symbol = ':'; - let basedirs = env::var_os("SCCACHE_BASEDIRS") - .map(|s| { - s.to_string_lossy() - .split(split_symbol) - .map(|p| PathBuf::from(p.trim())) - .filter(|p| !p.as_os_str().is_empty()) - .collect() - }) - .unwrap_or_default(); + let basedirs = env::var_os("SCCACHE_BASEDIRS").map(|s| { + s.to_string_lossy() + .split(split_symbol) + .map(|p| PathBuf::from(p.trim())) + .filter(|p| !p.as_os_str().is_empty()) + .collect() + }); Ok(EnvConfig { cache, basedirs }) } @@ -1037,9 +1035,9 @@ impl Config { } = env_conf; conf_caches.merge(cache); - // Environment variable takes precedence over file config - let basedirs_raw = if !env_basedirs.is_empty() { - env_basedirs + // Environment variable takes precedence over file config if it is set + let basedirs_raw = if let Some(basedirs) = env_basedirs { + basedirs } else { file_basedirs }; @@ -1363,7 +1361,7 @@ fn config_overrides() { }), ..Default::default() }, - basedirs: vec![], + basedirs: None, }; let file_conf = FileConfig { @@ -1404,7 +1402,7 @@ fn config_overrides() { username: Some("user".to_owned()), password: Some("secret".to_owned()), ..Default::default() - }),), + })), fallback_cache: DiskCacheConfig { dir: "/env-cache".into(), size: 5, @@ -1416,6 +1414,8 @@ fn config_overrides() { basedirs: vec![], } ); + + // check that } #[test] @@ -1424,7 +1424,7 @@ fn config_basedirs_overrides() { // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), - basedirs: vec![PathBuf::from("C:/env/basedir")], + basedirs: vec![PathBuf::from("C:/env/basedir")].into(), }; let file_conf = FileConfig { @@ -1437,10 +1437,10 @@ fn config_basedirs_overrides() { let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); assert_eq!(config.basedirs, vec![b"c:/env/basedir".to_vec()]); - // Test that file config is used when env is empty + // Test that file config is used when env is None let env_conf = EnvConfig { cache: Default::default(), - basedirs: vec![], + basedirs: None, }; let file_conf = FileConfig { @@ -1453,10 +1453,26 @@ fn config_basedirs_overrides() { let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); assert_eq!(config.basedirs, vec![b"c:/file/basedir".to_vec()]); + // Test that env config is used when env is set but empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![PathBuf::from("C:/file/basedir")], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); + // Test that both empty results in empty let env_conf = EnvConfig { cache: Default::default(), - basedirs: vec![], + basedirs: vec![].into(), }; let file_conf = FileConfig { @@ -1476,7 +1492,7 @@ fn config_basedirs_overrides() { // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), - basedirs: vec![PathBuf::from("/env/basedir")], + basedirs: vec![PathBuf::from("/env/basedir")].into(), }; let file_conf = FileConfig { @@ -1489,10 +1505,10 @@ fn config_basedirs_overrides() { let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); assert_eq!(config.basedirs, vec![b"/env/basedir".to_vec()]); - // Test that file config is used when env is empty + // Test that file config is used when env is None let env_conf = EnvConfig { cache: Default::default(), - basedirs: vec![], + basedirs: None, }; let file_conf = FileConfig { @@ -1505,12 +1521,42 @@ fn config_basedirs_overrides() { let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); assert_eq!(config.basedirs, vec![b"/file/basedir".to_vec()]); + // Test that env config is used when env is set but empty + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: vec![].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![PathBuf::from("/file/basedir")], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); + // Test that both empty results in empty let env_conf = EnvConfig { cache: Default::default(), + basedirs: vec![].into(), + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, basedirs: vec![], }; + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), @@ -1569,7 +1615,10 @@ fn test_env_basedirs_single() { std::env::set_var("SCCACHE_BASEDIRS", "/home/user/project"); } let config = config_from_env().unwrap(); - assert_eq!(config.basedirs, vec![PathBuf::from("/home/user/project")]); + assert_eq!( + config.basedirs.expect("SCCACHE_BASEDIRS is set"), + vec![PathBuf::from("/home/user/project")] + ); unsafe { std::env::remove_var("SCCACHE_BASEDIRS"); } @@ -1584,7 +1633,7 @@ fn test_env_basedirs_single() { } let config = config_from_env().unwrap(); assert_eq!( - config.basedirs, + config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![PathBuf::from("C:\\home\\user\\project")] ); unsafe { @@ -1604,7 +1653,7 @@ fn test_env_basedirs_multiple() { } let config = config_from_env().unwrap(); assert_eq!( - config.basedirs, + config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") @@ -1627,7 +1676,7 @@ fn test_env_basedirs_multiple() { } let config = config_from_env().unwrap(); assert_eq!( - config.basedirs, + config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ PathBuf::from("C:\\home\\user\\project"), PathBuf::from("C:\\home\\user\\workspace") @@ -1651,7 +1700,7 @@ fn test_env_basedirs_with_spaces() { } let config = config_from_env().unwrap(); assert_eq!( - config.basedirs, + config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") @@ -1675,7 +1724,7 @@ fn test_env_basedirs_with_spaces() { } let config = config_from_env().unwrap(); assert_eq!( - config.basedirs, + config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ PathBuf::from("C:\\home\\user\\project"), PathBuf::from("C:\\home\\user\\workspace") @@ -1699,7 +1748,7 @@ fn test_env_basedirs_empty_entries() { } let config = config_from_env().unwrap(); assert_eq!( - config.basedirs, + config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ PathBuf::from("/home/user/project"), PathBuf::from("/home/user/workspace") @@ -1723,7 +1772,7 @@ fn test_env_basedirs_empty_entries() { } let config = config_from_env().unwrap(); assert_eq!( - config.basedirs, + config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ PathBuf::from("c:\\home\\user\\project"), PathBuf::from("c:\\home\\user\\workspace") @@ -1741,7 +1790,7 @@ fn test_env_basedirs_not_set() { std::env::remove_var("SCCACHE_BASEDIRS"); } let config = config_from_env().unwrap(); - assert!(config.basedirs.is_empty()); + assert!(config.basedirs.is_none()); } #[test] From 1fc4f7a2f6b9098721d93e14adb38d48f3d84baf Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 9 Jan 2026 16:54:07 +0100 Subject: [PATCH 23/31] Apply suggestions from code review Co-authored-by: Ben Boeckel --- README.md | 2 +- docs/Configuration.md | 6 +++--- src/cache/readonly.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2ca2721c6..42052c2cf 100644 --- a/README.md +++ b/README.md @@ -329,7 +329,7 @@ By default, sccache requires absolute paths to match for cache hits. To enable c export SCCACHE_BASEDIRS=/home/user/project ``` -You can also specify multiple base directories by separating them by `;` on Windows hosts and by `:` on any other. When multiple directories are provided, the longest matching prefix is used: +You can also specify multiple base directories by separating them by `;` on Windows hosts and by `:` on any other operating system. When multiple directories are provided, the longest matching prefix is used: ```bash export SCCACHE_BASEDIRS="/home/user/project:/home/user/workspace" diff --git a/docs/Configuration.md b/docs/Configuration.md index 61e7ba26c..f96885b87 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -15,8 +15,8 @@ server_startup_timeout_ms = 10000 # paths when compiling the same source code, such as between # parallel checkouts of the same project, Git worktrees, or different # users in a shared environment. -# When multiple paths are provided, the longest matching prefix -# is applied. +# When multiple matching paths are provided, the longest prefix +# is used. # # Path matching is case-insensitive on Windows and case-sensitive on other OSes. # @@ -154,7 +154,7 @@ Note that some env variables may need sccache server restart to take effect. * `SCCACHE_ALLOW_CORE_DUMPS` to enable core dumps by the server * `SCCACHE_CONF` configuration file path -* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `;` on Windows hosts and by `:` on any other. When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will cause an error and prevent the server from start. +* `SCCACHE_BASEDIRS` base directory (or directories) to strip from paths for cache key computation. This is similar to ccache's `CCACHE_BASEDIR` and enables cache hits across different absolute paths when compiling the same source code. Multiple directories can be separated by `;` on Windows hosts and by `:` on any other operating system. When multiple directories are specified, the longest matching prefix is used. Path matching is **case-insensitive** on Windows and **case-sensitive** on other operating systems. Environment variable takes precedence over file configuration. Only absolute paths are supported; relative paths will cause an error and prevent the server from start. * `SCCACHE_CACHED_CONF` * `SCCACHE_IDLE_TIMEOUT` how long the local daemon process waits for more client requests before exiting, in seconds. Set to `0` to run sccache permanently * `SCCACHE_STARTUP_NOTIFY` specify a path to a socket which will be used for server completion notification diff --git a/src/cache/readonly.rs b/src/cache/readonly.rs index f57cc1faf..715e664f6 100644 --- a/src/cache/readonly.rs +++ b/src/cache/readonly.rs @@ -135,7 +135,7 @@ mod test { .unwrap(); let tempdir = tempfile::Builder::new() - .prefix("sccache_test_readonly_basedirs") + .prefix("readonly_storage_forwards_basedirs") .tempdir() .expect("Failed to create tempdir"); let cache_dir = tempdir.path().join("cache"); From e1b555a2e9103c9257854b4cf6e86020585d3123 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 9 Jan 2026 17:23:05 +0100 Subject: [PATCH 24/31] Do not trim spaces from paths, safer env overwrite in tests --- src/config.rs | 98 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 37 deletions(-) diff --git a/src/config.rs b/src/config.rs index 6dd452825..92c86b0a7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -961,7 +961,7 @@ fn config_from_env() -> Result { let basedirs = env::var_os("SCCACHE_BASEDIRS").map(|s| { s.to_string_lossy() .split(split_symbol) - .map(|p| PathBuf::from(p.trim())) + .map(PathBuf::from) .filter(|p| !p.as_os_str().is_empty()) .collect() }); @@ -1414,8 +1414,6 @@ fn config_overrides() { basedirs: vec![], } ); - - // check that } #[test] @@ -1615,13 +1613,14 @@ fn test_env_basedirs_single() { std::env::set_var("SCCACHE_BASEDIRS", "/home/user/project"); } let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![PathBuf::from("/home/user/project")] ); - unsafe { - std::env::remove_var("SCCACHE_BASEDIRS"); - } } #[test] @@ -1632,13 +1631,14 @@ fn test_env_basedirs_single() { std::env::set_var("SCCACHE_BASEDIRS", "C:/home/user/project"); } let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![PathBuf::from("C:\\home\\user\\project")] ); - unsafe { - std::env::remove_var("SCCACHE_BASEDIRS"); - } } #[test] @@ -1652,6 +1652,10 @@ fn test_env_basedirs_multiple() { ); } let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ @@ -1659,9 +1663,6 @@ fn test_env_basedirs_multiple() { PathBuf::from("/home/user/workspace") ] ); - unsafe { - std::env::remove_var("SCCACHE_BASEDIRS"); - } } #[test] @@ -1675,6 +1676,10 @@ fn test_env_basedirs_multiple() { ); } let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ @@ -1682,57 +1687,74 @@ fn test_env_basedirs_multiple() { PathBuf::from("C:\\home\\user\\workspace") ] ); - unsafe { - std::env::remove_var("SCCACHE_BASEDIRS"); - } } #[test] #[serial(SCCACHE_BASEDIRS)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_with_spaces() { - // Test that spaces around paths are trimmed + // Test that spaces around paths are not trimmed unsafe { std::env::set_var( "SCCACHE_BASEDIRS", " /home/user/project : /home/user/workspace ", ); } - let config = config_from_env().unwrap(); + let env_conf = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + assert_eq!( - config.basedirs.expect("SCCACHE_BASEDIRS is set"), + env_conf.basedirs.clone().expect("SCCACHE_BASEDIRS is set"), vec![ - PathBuf::from("/home/user/project"), - PathBuf::from("/home/user/workspace") + PathBuf::from(" /home/user/project "), + PathBuf::from(" /home/user/workspace ") ] ); - unsafe { - std::env::remove_var("SCCACHE_BASEDIRS"); - } + // The lead to trailing spaces are preserved and server fails to start + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + Config::from_env_and_file_configs(env_conf, file_conf) + .expect_err("Should fail due to non-absolute path"); } #[test] #[serial(SCCACHE_BASEDIRS)] #[cfg(target_os = "windows")] fn test_env_basedirs_with_spaces() { - // Test that spaces around paths are trimmed + // Test that spaces around paths are not trimmed unsafe { std::env::set_var( "SCCACHE_BASEDIRS", " C:/home/user/project ; C:/home/user/workspace ", ); } - let config = config_from_env().unwrap(); + let env_conf = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + assert_eq!( - config.basedirs.expect("SCCACHE_BASEDIRS is set"), + env_conf.basedirs.clone().expect("SCCACHE_BASEDIRS is set"), vec![ - PathBuf::from("C:\\home\\user\\project"), - PathBuf::from("C:\\home\\user\\workspace") + PathBuf::from(" C:\\home\\user\\project "), + PathBuf::from(" C:\\home\\user\\workspace ") ] ); - unsafe { - std::env::remove_var("SCCACHE_BASEDIRS"); - } + // The lead to trailing spaces are preserved and server fails to start + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + Config::from_env_and_file_configs(env_conf, file_conf) + .expect_err("Should fail due to non-absolute path"); } #[test] @@ -1747,6 +1769,10 @@ fn test_env_basedirs_empty_entries() { ); } let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ @@ -1754,9 +1780,6 @@ fn test_env_basedirs_empty_entries() { PathBuf::from("/home/user/workspace") ] ); - unsafe { - std::env::remove_var("SCCACHE_BASEDIRS"); - } } #[test] @@ -1771,6 +1794,10 @@ fn test_env_basedirs_empty_entries() { ); } let config = config_from_env().unwrap(); + unsafe { + std::env::remove_var("SCCACHE_BASEDIRS"); + } + assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ @@ -1778,9 +1805,6 @@ fn test_env_basedirs_empty_entries() { PathBuf::from("c:\\home\\user\\workspace") ] ); - unsafe { - std::env::remove_var("SCCACHE_BASEDIRS"); - } } #[test] From eb3529e0c1bc0b0f5735cde064ad96e841386f51 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 9 Jan 2026 17:46:37 +0100 Subject: [PATCH 25/31] Add tests that check uniq basedir per case --- src/compiler/c.rs | 28 +++++++++++++++++++++-- src/compiler/preprocessor_cache.rs | 36 +++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/compiler/c.rs b/src/compiler/c.rs index d717269ed..233d90faa 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -1826,7 +1826,6 @@ mod test { let args = ovec!["a", "b", "c"]; let digest = "abcd"; - // Test 1: Same hash with different absolute paths when basedir is used let preprocessed1 = b"# 1 \"/home/user1/project/src/main.c\"\nint main() { return 0; }"; let preprocessed2 = b"# 1 \"/home/user2/project/src/main.c\"\nint main() { return 0; }"; @@ -1835,6 +1834,7 @@ mod test { b"/home/user2/project".to_vec(), ]; + // Test 1: Same hash with different absolute paths when basedir is used let h1 = hash_key( digest, Language::C, @@ -1858,7 +1858,31 @@ mod test { assert_eq!(h1, h2); - // Test 2: Different hashes without basedir + // Test 2: Same hash with single basedir that matches each + let h1 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed1, + false, + &basedirs[..1], + ); + let h2 = hash_key( + digest, + Language::C, + &args, + &[], + &[], + preprocessed2, + false, + &basedirs[1..], + ); + + assert_eq!(h1, h2); + + // Test 3: Different hashes without basedir let h1_no_base = hash_key( digest, Language::C, diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index a3a7139aa..5500834f7 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -700,7 +700,41 @@ mod test { "Hashes should be equal when using basedirs with identical files in different directories" ); - // Test 2: Without basedirs, hashes should be different + // Test 2: With basedir1 for first, and basedir2 for second, hashes should be the same + let hash1_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file1_path, + false, + config, + &dirs[..1], + ) + .unwrap() + .unwrap(); + + let hash2_with_basedirs = preprocessor_cache_entry_hash_key( + "test_digest", + Language::C, + &[], + &[], + &[], + &file2_path, + false, + config, + &dirs[1..], + ) + .unwrap() + .unwrap(); + + assert_eq!( + hash1_with_basedirs, hash2_with_basedirs, + "Hashes should be equal when using basedirs with identical files in different directories" + ); + + // Test 3: Without basedirs, hashes should be different let hash1_no_basedirs = preprocessor_cache_entry_hash_key( "test_digest", Language::C, From a47e230eaed00b395e672b6e5c0abb6c324124fb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 9 Jan 2026 18:07:06 +0100 Subject: [PATCH 26/31] Return `Cow` from strip_basedirs --- src/compiler/c.rs | 6 +---- src/compiler/preprocessor_cache.rs | 6 +---- src/util.rs | 40 ++++++++++++++++-------------- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 233d90faa..48a31b59f 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -1504,11 +1504,7 @@ pub fn hash_key( } // Strip basedirs from preprocessor output if configured - let preprocessor_output_to_hash = if !basedirs.is_empty() { - Cow::Owned(strip_basedirs(preprocessor_output, basedirs)) - } else { - Cow::Borrowed(preprocessor_output) - }; + let preprocessor_output_to_hash = strip_basedirs(preprocessor_output, basedirs); m.update(&preprocessor_output_to_hash); m.finish() diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index 5500834f7..98a5655b7 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -417,11 +417,7 @@ pub fn preprocessor_cache_entry_hash_key( encode_path(&mut buf, input_file)?; // Strip basedirs from the input file path if configured - let buf_to_hash = if !basedirs.is_empty() { - strip_basedirs(&buf, basedirs) - } else { - buf.clone() - }; + let buf_to_hash = strip_basedirs(&buf, basedirs); m.update(&buf_to_hash); let reader = std::fs::File::open(input_file) .with_context(|| format!("while hashing the input file '{}'", input_file.display()))?; diff --git a/src/util.rs b/src/util.rs index 40ed7756a..01c83c966 100644 --- a/src/util.rs +++ b/src/util.rs @@ -20,6 +20,7 @@ use fs_err as fs; use object::read::archive::ArchiveFile; use object::read::macho::{FatArch, MachOFatFile32, MachOFatFile64}; use serde::{Deserialize, Serialize}; +use std::borrow::Cow; use std::cell::Cell; use std::ffi::{OsStr, OsString}; use std::hash::Hasher; @@ -1028,9 +1029,9 @@ pub fn num_cpus() -> usize { /// /// Only paths that start with one of the basedirs are modified. The paths are expected to be /// in the format found in preprocessor output (e.g., `# 1 "/path/to/file"`). -pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[Vec]) -> Vec { +pub fn strip_basedirs<'a>(preprocessor_output: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { if basedirs.is_empty() || preprocessor_output.is_empty() { - return preprocessor_output.to_vec(); + return Cow::Borrowed(preprocessor_output); } trace!( @@ -1070,7 +1071,7 @@ pub fn strip_basedirs(preprocessor_output: &[u8], basedirs: &[Vec]) -> Vec]) -> Vec Date: Fri, 9 Jan 2026 19:28:45 +0100 Subject: [PATCH 27/31] Use typed_path::Utf8TypedPathBuf to normalize directories --- Cargo.lock | 7 +++++ Cargo.toml | 1 + src/config.rs | 72 +++++++++++++++++++++++++-------------------------- 3 files changed, 43 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 470952488..3cbbdc57c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2955,6 +2955,7 @@ dependencies = [ "tokio-util", "toml", "tower-service", + "typed-path", "url", "uuid", "version-compare", @@ -3855,6 +3856,12 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" +[[package]] +name = "typed-path" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7922f2cdc51280d47b491af9eafc41eb0cdab85eabcb390c854412fcbf26dbe8" + [[package]] name = "typenum" version = "1.17.0" diff --git a/Cargo.toml b/Cargo.toml index 6c6113baa..d59980599 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,6 +103,7 @@ tokio-serde = "0.8" tokio-util = { version = "0.7", features = ["codec", "io"] } toml = "0.8" tower-service = "0.3" +typed-path = "0.12.0" url = { version = "2", optional = true } uuid = { version = "1.9", features = ["v4"] } walkdir = "2" diff --git a/src/config.rs b/src/config.rs index 92c86b0a7..a66ad0950 100644 --- a/src/config.rs +++ b/src/config.rs @@ -28,11 +28,12 @@ use serde::{ use serial_test::serial; use std::env; use std::io::{Read, Write}; -use std::path::{Path, PathBuf, absolute}; +use std::path::{Path, PathBuf}; use std::result::Result as StdResult; use std::str::FromStr; use std::sync::{LazyLock, Mutex}; use std::{collections::HashMap, fmt}; +use typed_path::Utf8TypedPathBuf; pub use crate::cache::PreprocessorCacheModeConfig; use crate::errors::*; @@ -587,7 +588,7 @@ pub struct FileConfig { pub dist: DistConfig, pub server_startup_timeout_ms: Option, /// Base directories to strip from paths for cache key computation. - pub basedirs: Vec, + pub basedirs: Vec, } // If the file doesn't exist or we can't read it, log the issue and proceed. If the @@ -625,7 +626,7 @@ pub fn try_read_config_file(path: &Path) -> Result>, + basedirs: Option>, } fn key_prefix_from_env_var(env_var_name: &str) -> String { @@ -961,8 +962,8 @@ fn config_from_env() -> Result { let basedirs = env::var_os("SCCACHE_BASEDIRS").map(|s| { s.to_string_lossy() .split(split_symbol) - .map(PathBuf::from) - .filter(|p| !p.as_os_str().is_empty()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_owned()) .collect() }); @@ -1045,19 +1046,16 @@ impl Config { // Validate that all basedirs are absolute paths // basedirs_raw is Vec let mut basedirs = Vec::with_capacity(basedirs_raw.len()); - for p in basedirs_raw { + for d in basedirs_raw { + let p = Utf8TypedPathBuf::from(d); if !p.is_absolute() { bail!("Basedir path must be absolute: {:?}", p); } // Normalize basedir: - // remove double separators, cur_dirs - let abs = absolute(&p)?; - let mut bytes = abs.to_string_lossy().into_owned().into_bytes(); + // remove double separators, cur_dirs, parent_dirs, trailing slashes + let normalized = p.normalize(); + let bytes = normalized.to_string().into_bytes(); - // remove trailing slashes - while matches!(bytes.last(), Some(b'/' | b'\\')) { - bytes.pop(); - } // normalize windows paths let normalized = { #[cfg(target_os = "windows")] @@ -1422,14 +1420,14 @@ fn config_basedirs_overrides() { // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), - basedirs: vec![PathBuf::from("C:/env/basedir")].into(), + basedirs: vec!["C:/env/basedir".to_string()].into(), }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedirs: vec![PathBuf::from("C:/file/basedir")], + basedirs: vec!["C:/file/basedir".to_string()], }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); @@ -1445,7 +1443,7 @@ fn config_basedirs_overrides() { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedirs: vec![PathBuf::from("C:/file/basedir")], + basedirs: vec!["C:/file/basedir".to_string()], }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); @@ -1461,7 +1459,7 @@ fn config_basedirs_overrides() { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedirs: vec![PathBuf::from("C:/file/basedir")], + basedirs: vec!["C:/file/basedir".to_string()], }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); @@ -1490,14 +1488,14 @@ fn config_basedirs_overrides() { // Test that env variable takes precedence over file config let env_conf = EnvConfig { cache: Default::default(), - basedirs: vec![PathBuf::from("/env/basedir")].into(), + basedirs: vec!["/env/basedir".to_string()].into(), }; let file_conf = FileConfig { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedirs: vec![PathBuf::from("/file/basedir")], + basedirs: vec!["/file/basedir".to_string()], }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); @@ -1513,7 +1511,7 @@ fn config_basedirs_overrides() { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedirs: vec![PathBuf::from("/file/basedir")], + basedirs: vec!["/file/basedir".to_string()], }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); @@ -1529,7 +1527,7 @@ fn config_basedirs_overrides() { cache: Default::default(), dist: Default::default(), server_startup_timeout_ms: None, - basedirs: vec![PathBuf::from("/file/basedir")], + basedirs: vec!["/file/basedir".to_string()], }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); @@ -1584,8 +1582,8 @@ fn test_deserialize_basedirs() { assert_eq!( config.basedirs, vec![ - PathBuf::from("/home/user/project"), - PathBuf::from("/home/user/workspace") + "/home/user/project".to_string(), + "/home/user/workspace".to_string() ] ); } @@ -1619,7 +1617,7 @@ fn test_env_basedirs_single() { assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), - vec![PathBuf::from("/home/user/project")] + vec!["/home/user/project".to_string()] ); } @@ -1637,7 +1635,7 @@ fn test_env_basedirs_single() { assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), - vec![PathBuf::from("C:\\home\\user\\project")] + vec!["C:/home/user/project".to_string()] ); } @@ -1659,8 +1657,8 @@ fn test_env_basedirs_multiple() { assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ - PathBuf::from("/home/user/project"), - PathBuf::from("/home/user/workspace") + "/home/user/project".to_string(), + "/home/user/workspace".to_string() ] ); } @@ -1683,8 +1681,8 @@ fn test_env_basedirs_multiple() { assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ - PathBuf::from("C:\\home\\user\\project"), - PathBuf::from("C:\\home\\user\\workspace") + "C:/home/user/project".to_string(), + "C:/home/user/workspace".to_string() ] ); } @@ -1708,8 +1706,8 @@ fn test_env_basedirs_with_spaces() { assert_eq!( env_conf.basedirs.clone().expect("SCCACHE_BASEDIRS is set"), vec![ - PathBuf::from(" /home/user/project "), - PathBuf::from(" /home/user/workspace ") + " /home/user/project ".to_string(), + " /home/user/workspace ".to_string() ] ); // The lead to trailing spaces are preserved and server fails to start @@ -1742,8 +1740,8 @@ fn test_env_basedirs_with_spaces() { assert_eq!( env_conf.basedirs.clone().expect("SCCACHE_BASEDIRS is set"), vec![ - PathBuf::from(" C:\\home\\user\\project "), - PathBuf::from(" C:\\home\\user\\workspace ") + " C:/home/user/project ".to_string(), + " C:/home/user/workspace ".to_string() ] ); // The lead to trailing spaces are preserved and server fails to start @@ -1776,8 +1774,8 @@ fn test_env_basedirs_empty_entries() { assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ - PathBuf::from("/home/user/project"), - PathBuf::from("/home/user/workspace") + "/home/user/project".to_string(), + "/home/user/workspace".to_string() ] ); } @@ -1801,8 +1799,8 @@ fn test_env_basedirs_empty_entries() { assert_eq!( config.basedirs.expect("SCCACHE_BASEDIRS is set"), vec![ - PathBuf::from("c:\\home\\user\\project"), - PathBuf::from("c:\\home\\user\\workspace") + "c:/home/user/project".to_string(), + "c:/home/user/workspace".to_string() ] ); } From c5e9acc731aa5c1508ea6f1b7574981d1de1a348 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 9 Jan 2026 20:30:44 +0100 Subject: [PATCH 28/31] Strip basedir completely, normalize once, add integration tests --- src/compiler/preprocessor_cache.rs | 16 +- src/config.rs | 298 ++++++++++++++++++++++++++++- src/util.rs | 50 +++-- 3 files changed, 328 insertions(+), 36 deletions(-) diff --git a/src/compiler/preprocessor_cache.rs b/src/compiler/preprocessor_cache.rs index 98a5655b7..18a8127ab 100644 --- a/src/compiler/preprocessor_cache.rs +++ b/src/compiler/preprocessor_cache.rs @@ -641,16 +641,24 @@ mod test { #[test] fn test_preprocessor_cache_entry_hash_key_basedirs() { + #[cfg(target_os = "windows")] + use crate::util::normalize_win_path; use std::fs; use tempfile::TempDir; // Create two different base directories let dir1 = TempDir::new().unwrap(); let dir2 = TempDir::new().unwrap(); - let dirs = vec![ - dir1.path().to_string_lossy().into_owned().into_bytes(), - dir2.path().to_string_lossy().into_owned().into_bytes(), - ]; + let dirs = [&dir1, &dir2] + .iter() + .map(|dir| { + let bytes = dir.path().to_string_lossy().into_owned().into_bytes(); + #[cfg(target_os = "windows")] + return normalize_win_path(&bytes); + #[cfg(not(target_os = "windows"))] + bytes + }) + .collect::>(); // Create identical files with the same relative path in each directory let file1_path = dir1.path().join("test.c"); diff --git a/src/config.rs b/src/config.rs index a66ad0950..d0c11409b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1053,8 +1053,12 @@ impl Config { } // Normalize basedir: // remove double separators, cur_dirs, parent_dirs, trailing slashes - let normalized = p.normalize(); - let bytes = normalized.to_string().into_bytes(); + let p_norm = p.normalize(); + let mut bytes = p_norm.to_string().into_bytes(); + + // Always add a trailing `/` to basedirs to ensure we only match complete path + // components + bytes.push(b'/'); // normalize windows paths let normalized = { @@ -1431,7 +1435,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![b"c:/env/basedir".to_vec()]); + assert_eq!(config.basedirs, vec![b"c:/env/basedir/".to_vec()]); // Test that file config is used when env is None let env_conf = EnvConfig { @@ -1447,7 +1451,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![b"c:/file/basedir".to_vec()]); + assert_eq!(config.basedirs, vec![b"c:/file/basedir/".to_vec()]); // Test that env config is used when env is set but empty let env_conf = EnvConfig { @@ -1499,7 +1503,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![b"/env/basedir".to_vec()]); + assert_eq!(config.basedirs, vec![b"/env/basedir/".to_vec()]); // Test that file config is used when env is None let env_conf = EnvConfig { @@ -1515,7 +1519,7 @@ fn config_basedirs_overrides() { }; let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); - assert_eq!(config.basedirs, vec![b"/file/basedir".to_vec()]); + assert_eq!(config.basedirs, vec![b"/file/basedir/".to_vec()]); // Test that env config is used when env is set but empty let env_conf = EnvConfig { @@ -2217,3 +2221,285 @@ size = "7g" } ); } + +// Integration tests: Config normalization + strip_basedirs usage + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_config_normalizes_and_strips() { + // Test that Config normalizes basedirs and strip_basedirs uses them correctly + use crate::util::strip_basedirs; + use std::borrow::Cow; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home/user/project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Verify config normalized the basedir with trailing slash + assert_eq!(config.basedirs, vec![b"/home/user/project/"]); + + // Test that strip_basedirs uses the normalized basedir + let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; + let output = strip_basedirs(input, &config.basedirs); + + // Should strip the basedir + let expected = b"# 1 \"src/main.c\"\nint main() { return 0; }"; + assert_eq!(&*output, expected); + assert!(matches!(output, Cow::Owned(_))); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_normalized_path_with_double_slashes() { + // Test that Config normalizes paths with double slashes + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home//user///project/".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Config should normalize to single slashes with one trailing slash + assert_eq!(config.basedirs, vec![b"/home/user/project/"]); + + // Verify it works with strip_basedirs + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + assert_eq!(&*output, b"# 1 \"src/main.c\""); +} + +#[test] +#[cfg(target_os = "windows")] +fn test_integration_windows_path_normalization() { + // Test that Config normalizes Windows paths correctly + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["C:\\Users\\Test\\Project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Should be normalized to lowercase with forward slashes + assert_eq!(config.basedirs, vec![b"c:/users/test/project/"]); + + // Test with mixed case preprocessor output + let input = b"# 1 \"C:\\Users\\Test\\Project\\src\\main.c\""; + let output = strip_basedirs(input, &config.basedirs); + assert_eq!(&*output, b"# 1 \"src\\main.c\""); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_cow_borrowed_when_no_match() { + // Test that strip_basedirs returns Cow::Borrowed when no stripping occurs + use crate::util::strip_basedirs; + use std::borrow::Cow; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home/user/project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Input doesn't contain the basedir + let input = b"# 1 \"/other/path/main.c\"\nint main() { return 0; }"; + let output = strip_basedirs(input, &config.basedirs); + + // Should return borrowed reference (no allocation) + assert!(matches!(output, Cow::Borrowed(_))); + assert_eq!(&*output, input); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_cow_borrowed_when_empty_basedirs() { + // Test that strip_basedirs returns Cow::Borrowed when basedirs is empty + use crate::util::strip_basedirs; + use std::borrow::Cow; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec![], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert!(config.basedirs.is_empty()); + + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + + // Should return borrowed reference when basedirs is empty + assert!(matches!(output, Cow::Borrowed(_))); + assert_eq!(&*output, input); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_multiple_basedirs_longest_match() { + // Test that strip_basedirs prefers longest match with normalized basedirs + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home/user".to_string(), "/home/user/project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Both should be normalized with trailing slashes + assert_eq!(config.basedirs.len(), 2); + assert_eq!(config.basedirs[0], b"/home/user/"); + assert_eq!(config.basedirs[1], b"/home/user/project/"); + + // Input matches both, but longest should win + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + + // Should match the longest basedir (/home/user/project/) + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected); +} + +#[test] +#[cfg(not(target_os = "windows"))] +fn test_integration_paths_with_dots_normalized() { + // Test that paths with . and .. are normalized correctly + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["/home/user/./project/../project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + // Should be normalized to remove ./ and ../ + assert_eq!(config.basedirs[0], b"/home/user/project/"); + + // Verify it works with strip_basedirs + let input = b"# 1 \"/home/user/project/src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected); +} + +#[test] +#[cfg(target_os = "windows")] +fn test_integration_windows_mixed_slashes() { + // Test Windows path with mixed slashes in preprocessor output + use crate::util::strip_basedirs; + + let env_conf = EnvConfig { + cache: Default::default(), + basedirs: None, + }; + + let file_conf = FileConfig { + cache: Default::default(), + dist: Default::default(), + server_startup_timeout_ms: None, + basedirs: vec!["C:\\Users\\test\\project".to_string()], + }; + + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + assert_eq!(config.basedirs[0], b"c:/users/test/project/"); + + // Preprocessor output with mixed slashes + let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; + let output = strip_basedirs(input, &config.basedirs); + + // Should strip despite mixed slashes + let expected = b"# 1 \"src/main.c\""; + assert_eq!(&*output, expected); +} + +#[test] +#[serial(SCCACHE_BASEDIRS)] +#[cfg(not(target_os = "windows"))] +fn test_integration_env_variable_to_strip() { + // Test full flow: SCCACHE_BASEDIRS env var -> Config -> strip_basedirs + use crate::util::strip_basedirs; + + unsafe { + env::set_var("SCCACHE_BASEDIRS", "/home/user/project:/tmp/build"); + } + + let env_conf = config_from_env().unwrap(); + let file_conf = FileConfig::default(); + let config = Config::from_env_and_file_configs(env_conf, file_conf).unwrap(); + + unsafe { + env::remove_var("SCCACHE_BASEDIRS"); + } + + // Should have two normalized basedirs + assert_eq!(config.basedirs.len(), 2); + assert_eq!(config.basedirs[0], b"/home/user/project/"); + assert_eq!(config.basedirs[1], b"/tmp/build/"); + + // Test stripping with both + let input1 = b"# 1 \"/home/user/project/src/main.c\""; + let output1 = strip_basedirs(input1, &config.basedirs); + assert_eq!(&*output1, b"# 1 \"src/main.c\""); + + let input2 = b"# 1 \"/tmp/build/obj/file.o\""; + let output2 = strip_basedirs(input2, &config.basedirs); + assert_eq!(&*output2, b"# 1 \"obj/file.o\""); +} diff --git a/src/util.rs b/src/util.rs index 01c83c966..429f0b0ac 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1051,9 +1051,6 @@ pub fn strip_basedirs<'a>(preprocessor_output: &'a [u8], basedirs: &[Vec]) - for (idx, basedir_bytes) in basedirs.iter().enumerate() { let basedir = basedir_bytes.as_slice(); - #[cfg(target_os = "windows")] - // Case insensitive - let basedir = normalize_win_path(basedir); // Use memchr's fast substring search let finder = memchr::memmem::find_iter(normalized_output, &basedir); @@ -1101,8 +1098,8 @@ pub fn strip_basedirs<'a>(preprocessor_output: &'a [u8], basedirs: &[Vec]) - for (match_pos, match_len) in filtered_matches { // Copy everything before the match result.extend_from_slice(&preprocessor_output[current_pos..match_pos]); - // Replace the basedir with "." - result.push(b'.'); + // Replace the basedir is removed completely, including trailing slash (it is expected, see + // Config::basedir) current_pos = match_pos + match_len; } @@ -1286,17 +1283,17 @@ mod tests { #[test] fn test_strip_basedir_simple() { // Simple cases - let basedir = b"/home/user/project".to_vec(); + let basedir = b"/home/user/project/".to_vec(); let input = b"# 1 \"/home/user/project/src/main.c\"\nint main() { return 0; }"; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \"./src/main.c\"\nint main() { return 0; }"; + let expected = b"# 1 \"src/main.c\"\nint main() { return 0; }"; assert_eq!(&*output, expected); // Multiple occurrences let input = b"# 1 \"/home/user/project/src/main.c\"\n# 2 \"/home/user/project/include/header.h\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \"./src/main.c\"\n# 2 \"./include/header.h\""; + let expected = b"# 1 \"src/main.c\"\n# 2 \"include/header.h\""; assert_eq!(&*output, expected); // No occurrences @@ -1313,7 +1310,7 @@ mod tests { assert_eq!(&*output, input); // Empty input - let basedir = b"/home/user/project".to_vec(); + let basedir = b"/home/user/project/".to_vec(); let input = b""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); assert_eq!(&*output, input); @@ -1322,11 +1319,11 @@ mod tests { #[test] fn test_strip_basedir_not_at_boundary() { // basedir should only match at word boundaries - let basedir = b"/home/user".to_vec(); + let basedir = b"/home/user/".to_vec(); let input = b"text/home/user/file.c and \"/home/user/other.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); // Should only replace the second occurrence (after quote) - let expected = b"text/home/user/file.c and \"./other.c\""; + let expected = b"text/home/user/file.c and \"other.c\""; assert_eq!(&*output, expected); } @@ -1336,14 +1333,14 @@ mod tests { let basedir = b"/home/user/project".to_vec(); let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \"./src/main.c\""; + let expected = b"# 1 \"/src/main.c\""; // Wrong, but expected assert_eq!(&*output, expected); // Trailing slashes aren't ignored, they must be cleaned in config reader let basedir = b"/home/user/project/".to_vec(); let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \".src/main.c\""; // Wrong, but expected + let expected = b"# 1 \"src/main.c\""; assert_eq!(&*output, expected); } @@ -1351,20 +1348,20 @@ mod tests { fn test_strip_basedirs_multiple() { // Multiple basedirs - should match longest first let basedirs = vec![ - b"/home/user1/project".to_vec(), - b"/home/user2/workspace".to_vec(), + b"/home/user1/project/".to_vec(), + b"/home/user2/workspace/".to_vec(), ]; let input = b"# 1 \"/home/user1/project/src/main.c\"\n# 2 \"/home/user2/workspace/lib/util.c\""; let output = super::strip_basedirs(input, &basedirs); - let expected = b"# 1 \"./src/main.c\"\n# 2 \"./lib/util.c\""; + let expected = b"# 1 \"src/main.c\"\n# 2 \"lib/util.c\""; assert_eq!(&*output, expected); // Longest prefix wins - let basedirs = vec![b"/home/user".to_vec(), b"/home/user/project".to_vec()]; + let basedirs = vec![b"/home/user/".to_vec(), b"/home/user/project/".to_vec()]; let input = b"# 1 \"/home/user/project/src/main.c\""; let output = super::strip_basedirs(input, &basedirs); - let expected = b"# 1 \"./src/main.c\""; + let expected = b"# 1 \"src/main.c\""; assert_eq!(&*output, expected); } @@ -1372,18 +1369,18 @@ mod tests { #[test] fn test_strip_basedir_windows_backslashes() { // Without trailing backslash - let basedir = b"C:\\Users\\test\\project".to_vec(); + let basedir = b"c:/users/test/project".to_vec(); let input = b"# 1 \"C:\\Users\\test\\project\\Src\\Main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); // normalized backslash to slash - let expected = b"# 1 \".\\Src\\Main.c\""; + let expected = b"# 1 \"\\Src\\Main.c\""; // Wrong, but expected assert_eq!(&*output, expected); // Trailing slashes aren't ignored, they must be cleaned in config reader - let basedir = b"C:\\Users\\test\\project\\".to_vec(); + let basedir = b"c:/users/test/project/".to_vec(); let input = b"# 1 \"C:\\Users\\test\\project\\src\\main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \".src\\main.c\""; + let expected = b"# 1 \"src\\main.c\""; assert_eq!(&*output, expected); } @@ -1393,16 +1390,17 @@ mod tests { // The slashes may be mixed in preprocessor output, but the uncut output // should remain untouched. // Mixed forward and backslashes in input (common from certain build systems) - let basedir = b"C:\\Users\\test\\project".to_vec(); + let basedir = b"c:/users/test/project/".to_vec(); let input = b"# 1 \"C:/Users\\test\\project\\src/main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \".\\src/main.c\""; + let expected = b"# 1 \"src/main.c\""; assert_eq!(&*output, expected, "Failed to strip mixed slash path"); - // Also test the reverse case + // Also test the reverse case, it doesn't work, because basedir normalization must be done + // in advance let input = b"# 1 \"C:\\Users/test/project/src\\main.c\""; let output = super::strip_basedirs(input, std::slice::from_ref(&basedir)); - let expected = b"# 1 \"./src\\main.c\""; + let expected = b"# 1 \"src\\main.c\""; assert_eq!( &*output, expected, "Failed to strip reverse mixed slash path" From 4b4c3bd3d5ad56811adfeee28bba1181d09a9b37 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 9 Jan 2026 22:14:20 +0100 Subject: [PATCH 29/31] Use ascii + utf in normalize_win_path --- src/util.rs | 154 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 141 insertions(+), 13 deletions(-) diff --git a/src/util.rs b/src/util.rs index 429f0b0ac..78fc55c22 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1111,20 +1111,67 @@ pub fn strip_basedirs<'a>(preprocessor_output: &'a [u8], basedirs: &[Vec]) - /// Normalize path for case-insensitive comparison. /// On Windows: converts all backslashes to forward slashes; -/// lowercases ASCII characters for consistency. -/// This function used for two purposes: -/// - basedir_path: already normalized by std::path::absolute, -/// but still can mismatch the case of the actual file name -/// - preprocessor_output: plain text -#[cfg(target_os = "windows")] +/// lowercases characters for consistency. +/// This function is used for: +/// - basedir_path: already normalized by std::path::absolute +/// - preprocessor_output: plain text that may contain invalid UTF-8 +/// Leave it for any platform for testing purposes. pub fn normalize_win_path(path: &[u8]) -> Vec { - path.iter() - .map(|&b| match b { - b'A'..=b'Z' => b + (b'a' - b'A'), - b'\\' => b'/', - _ => b, - }) - .collect() + let mut result = Vec::with_capacity(path.len()); + let mut i = 0; + + while i < path.len() { + let b = path[i]; + + // Fast path: ASCII characters (most common case) + if b < 128 { + result.push(match b { + b'A'..=b'Z' => b + (b'a' - b'A'), + b'\\' => b'/', + _ => b, + }); + i += 1; + continue; + } + + // Non-ASCII: try to decode UTF-8 sequence + // Determine expected length from the first byte + let char_len = match b { + 0b1100_0000..=0b1101_1111 => 2, // 110xxxxx + 0b1110_0000..=0b1110_1111 => 3, // 1110xxxx + 0b1111_0000..=0b1111_0111 => 4, // 11110xxx + _ => { + // Invalid UTF-8 start byte, copy as-is + result.push(b); + i += 1; + continue; + } + }; + + // Check if we have enough bytes + if i + char_len > path.len() { + // Incomplete sequence, copy as-is + result.push(b); + i += 1; + continue; + } + + // Validate and decode the UTF-8 sequence + match std::str::from_utf8(&path[i..i + char_len]) { + Ok(s) => { + // Valid UTF-8, lowercase it + result.extend_from_slice(s.to_lowercase().as_bytes()); + i += char_len; + } + Err(_) => { + // Invalid sequence, copy first byte as-is + result.push(b); + i += 1; + } + } + } + + result } #[cfg(test)] @@ -1406,4 +1453,85 @@ mod tests { "Failed to strip reverse mixed slash path" ); } + + #[test] + fn test_normalize_win_path_ascii() { + // Test basic ASCII normalization + let input = b"C:\\Users\\Test\\Project"; + let normalized = super::normalize_win_path(input); + assert_eq!(normalized, b"c:/users/test/project"); + + // Test mixed case + let input = b"C:\\USERS\\test\\PROJECT"; + let normalized = super::normalize_win_path(input); + assert_eq!(normalized, b"c:/users/test/project"); + } + + #[test] + fn test_normalize_win_path_utf8() { + // Test with UTF-8 characters (e.g., German umlauts) + let input = "C:\\Users\\Müller\\Projekt".as_bytes(); + let normalized = super::normalize_win_path(input); + let expected = "c:/users/müller/projekt".as_bytes(); + assert_eq!(normalized, expected); + + // Test with Cyrillic characters + let input = "C:\\Пользователь\\Проект".as_bytes(); + let normalized = super::normalize_win_path(input); + let expected = "c:/пользователь/проект".as_bytes(); + assert_eq!(normalized, expected); + + // Test with Turkish İ (special case) + let input = "C:\\İstanbul\\DİREKTÖRY".as_bytes(); + let normalized = super::normalize_win_path(input); + // Turkish İ lowercases to i with dot + let expected = "c:/i\u{307}stanbul/di\u{307}rektöry".as_bytes(); + assert_eq!(normalized, expected); + } + + #[test] + fn test_normalize_win_path_mixed_ascii_utf8() { + // Test mixed ASCII and UTF-8 + let input = "C:\\Users\\Test\\Café\\Проект".as_bytes(); + let normalized = super::normalize_win_path(input); + let expected = "c:/users/test/café/проект".as_bytes(); + assert_eq!(normalized, expected); + } + + #[test] + fn test_normalize_win_path_invalid_utf8() { + // Test with invalid UTF-8 sequence (should preserve as-is) + let mut input = b"C:\\Users\\".to_vec(); + input.push(0xFF); // Invalid UTF-8 + input.extend_from_slice(b"\\Test"); + + let normalized = super::normalize_win_path(&input); + + // Should lowercase ASCII and convert backslashes, but preserve invalid byte + let mut expected = b"c:/users/".to_vec(); + expected.push(0xFF); + expected.extend_from_slice(b"/test"); + assert_eq!(normalized, expected); + } + + #[test] + fn test_normalize_win_path_incomplete_utf8() { + // Test with incomplete UTF-8 sequence at the end + let mut input = b"C:\\Users\\Test".to_vec(); + input.push(0xC3); // Start of 2-byte UTF-8 but incomplete + + let normalized = super::normalize_win_path(&input); + + // Should preserve incomplete byte as-is + let mut expected = b"c:/users/test".to_vec(); + expected.push(0xC3); + assert_eq!(normalized, expected); + } + + #[test] + fn test_normalize_win_path_empty() { + let input = b""; + let normalized = super::normalize_win_path(input); + assert_eq!(normalized, b""); + } } From f6cee988f2ee8ca46c76dbebc6fac8e75bf46b30 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Sat, 10 Jan 2026 15:32:51 +0100 Subject: [PATCH 30/31] Another review round --- docs/Configuration.md | 2 +- src/config.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/Configuration.md b/docs/Configuration.md index f96885b87..66ec6de61 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -22,7 +22,7 @@ server_startup_timeout_ms = 10000 # # Example: # basedir = ["/home/user/project"] results in the path prefix rewrite: -# "/home/user/project/src/main.c" -> "./src/main.c" +# "/home/user/project/src/main.c" -> "src/main.c" basedirs = ["/home/user/project"] # basedirs = ["/home/user/project", "/home/user/workspace"] diff --git a/src/config.rs b/src/config.rs index d0c11409b..1edce0656 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1060,7 +1060,7 @@ impl Config { // components bytes.push(b'/'); - // normalize windows paths + // normalize windows paths: use slashes and lowercase let normalized = { #[cfg(target_os = "windows")] { @@ -1075,7 +1075,7 @@ impl Config { basedirs.push(normalized); } - if !basedirs.is_empty() { + if !basedirs.is_empty() && log::log_enabled!(log::Level::Debug) { let basedirs_str: Vec = basedirs .iter() .map(|b| String::from_utf8_lossy(b).into_owned()) From b0290f335ca1ca39e211927139ea9f57e64cdf6c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Sat, 10 Jan 2026 15:34:33 +0100 Subject: [PATCH 31/31] Avoid config conflicts while set_env manipulations --- src/config.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/config.rs b/src/config.rs index 1edce0656..4fd2f9838 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1608,7 +1608,7 @@ fn test_deserialize_basedirs_missing() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_single() { unsafe { @@ -1626,7 +1626,7 @@ fn test_env_basedirs_single() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(target_os = "windows")] fn test_env_basedirs_single() { unsafe { @@ -1644,7 +1644,7 @@ fn test_env_basedirs_single() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_multiple() { unsafe { @@ -1668,7 +1668,7 @@ fn test_env_basedirs_multiple() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(target_os = "windows")] fn test_env_basedirs_multiple() { unsafe { @@ -1692,7 +1692,7 @@ fn test_env_basedirs_multiple() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_with_spaces() { // Test that spaces around paths are not trimmed @@ -1726,7 +1726,7 @@ fn test_env_basedirs_with_spaces() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(target_os = "windows")] fn test_env_basedirs_with_spaces() { // Test that spaces around paths are not trimmed @@ -1760,7 +1760,7 @@ fn test_env_basedirs_with_spaces() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(not(target_os = "windows"))] fn test_env_basedirs_empty_entries() { // Test that empty entries are filtered out @@ -1785,7 +1785,7 @@ fn test_env_basedirs_empty_entries() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(target_os = "windows")] fn test_env_basedirs_empty_entries() { // Test that empty entries are filtered out @@ -1810,7 +1810,7 @@ fn test_env_basedirs_empty_entries() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] fn test_env_basedirs_not_set() { unsafe { std::env::remove_var("SCCACHE_BASEDIRS"); @@ -1820,7 +1820,7 @@ fn test_env_basedirs_not_set() { } #[test] -#[serial] +#[serial(config_from_env)] #[cfg(feature = "s3")] fn test_s3_no_credentials_conflict() { unsafe { @@ -1847,7 +1847,7 @@ fn test_s3_no_credentials_conflict() { } #[test] -#[serial] +#[serial(config_from_env)] fn test_s3_no_credentials_invalid() { unsafe { env::set_var("SCCACHE_S3_NO_CREDENTIALS", "yes"); @@ -1869,7 +1869,7 @@ fn test_s3_no_credentials_invalid() { } #[test] -#[serial] +#[serial(config_from_env)] fn test_s3_no_credentials_valid_true() { unsafe { env::set_var("SCCACHE_S3_NO_CREDENTIALS", "true"); @@ -1898,7 +1898,7 @@ fn test_s3_no_credentials_valid_true() { } #[test] -#[serial] +#[serial(config_from_env)] fn test_s3_no_credentials_valid_false() { unsafe { env::set_var("SCCACHE_S3_NO_CREDENTIALS", "false"); @@ -1927,7 +1927,7 @@ fn test_s3_no_credentials_valid_false() { } #[test] -#[serial] +#[serial(config_from_env)] #[cfg(feature = "gcs")] fn test_gcs_service_account() { unsafe { @@ -2471,7 +2471,7 @@ fn test_integration_windows_mixed_slashes() { } #[test] -#[serial(SCCACHE_BASEDIRS)] +#[serial(config_from_env)] #[cfg(not(target_os = "windows"))] fn test_integration_env_variable_to_strip() { // Test full flow: SCCACHE_BASEDIRS env var -> Config -> strip_basedirs