From 9d9f24b87aac8dd8a7638f0eb437e5a07a7dd720 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 17 Dec 2025 15:50:24 +1100 Subject: [PATCH 1/3] Fix some formatting and typos. --- collector/src/compile/execute/mod.rs | 69 +++++++++++++++++----------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/collector/src/compile/execute/mod.rs b/collector/src/compile/execute/mod.rs index ff10bafe6..2de859fb3 100644 --- a/collector/src/compile/execute/mod.rs +++ b/collector/src/compile/execute/mod.rs @@ -134,31 +134,43 @@ pub struct CargoProcess<'a> { pub target: Target, pub workspace_package: Option, } -/// Returns an optional list of Performance CPU cores, if the system has P and E cores. + +/// Returns an optional list of P-cores, if the system has P-cores and E-cores. /// This list *should* be in a format suitable for the `taskset` command. #[cfg(target_os = "linux")] fn performance_cores() -> Option<&'static String> { use std::sync::LazyLock; static PERFORMANCE_CORES: LazyLock> = LazyLock::new(|| { - if std::fs::exists("/sys/devices/cpu").expect("Could not check the CPU architecture details: could not check if `/sys/devices/cpu` exists!") { - // If /sys/devices/cpu exists, then this is not a "Performance-hybrid" CPU. - None - } - else if std::fs::exists("/sys/devices/cpu_core").expect("Could not check the CPU architecture detali: could not check if `/sys/devices/cpu_core` exists!") { - // If /sys/devices/cpu_core exists, then this is a "Performance-hybrid" CPU. - eprintln!("WARNING: Performance-Hybrid CPU detected. `rustc-perf` can't run properly on Efficency cores: test suite will only use Performance cores!"); - Some(std::fs::read_to_string("/sys/devices/cpu_core/cpus").unwrap().trim().to_string()) - } else { - // If neither dir exists, then something is wrong - `/sys/devices/cpu` has been in Linux for over a decade. - eprintln!("WARNING: neither `/sys/devices/cpu` nor `/sys/devices/cpu_core` present, unable to determine if this CPU has a Performance-Hybrid architecture."); - None - } + if std::fs::exists("/sys/devices/cpu") + .expect("Could not check if `/sys/devices/cpu` exists") + { + // If /sys/devices/cpu exists, then this is not a hybrid CPU. + None + } else if std::fs::exists("/sys/devices/cpu_core") + .expect("Could not check if `/sys/devices/cpu_core` exists!") + { + // If /sys/devices/cpu_core exists, then this is a hybrid CPU. + eprintln!("WARNING: hybrid Intel CPU detected."); + eprintln!("WARNING: test suite will only use P-cores, not E-cores"); + Some( + std::fs::read_to_string("/sys/devices/cpu_core/cpus") + .unwrap() + .trim() + .to_string(), + ) + } else { + // If neither dir exists, then something is wrong, because `/sys/devices/cpu` has been + // in Linux for over a decade. + eprintln!("WARNING: neither `/sys/devices/cpu` nor `/sys/devices/cpu_core` present"); + eprintln!("WARNING: unable to determine if CPU has a hybrid architecture"); + None + } }); (*PERFORMANCE_CORES).as_ref() } #[cfg(not(target_os = "linux"))] -// Modify this stub if you want to add support for P/E cores on more OSs +// Modify this stub if you want to add support for P-/E-cores on more OSs fn performance_cores() -> Option<&'static String> { None } @@ -166,24 +178,27 @@ fn performance_cores() -> Option<&'static String> { #[cfg(target_os = "linux")] /// Makes the benchmark run only on Performance cores. fn run_on_p_cores(path: &Path, cpu_list: &str) -> Command { - // Parse CPU list to extract the number of P cores! - // This assumes the P core id's are countinus, in format `fisrt_id-last_id` + // Parse CPU list to extract the number of P-cores! + // This assumes the P-core id's are continuous, in format `first_id-last_id` let (core_start, core_end) = cpu_list .split_once("-") - .unwrap_or_else(|| panic!("Unsuported P core list format: {cpu_list:?}.")); + .unwrap_or_else(|| panic!("Unsupported P-core list format: {cpu_list:?}.")); let core_start: u32 = core_start .parse() - .expect("Expected a number when parsing the start of the P core list!"); + .expect("Expected a number when parsing the start of the P-core list!"); let core_end: u32 = core_end .parse() - .expect("Expected a number when parsing the end of the P core list!"); + .expect("Expected a number when parsing the end of the P-core list!"); let core_count = core_end - core_start; let mut cmd = Command::new("taskset"); - // Set job count to P core count - this is done for 2 reasons: - // 1. The instruction count info for E core is often very incompleate - a substantial chunk of events is lost. - // 2. The performance charcteristics of E cores are less reliable, so excluding them from the benchmark makes things easier. + // Set job count to P-core count. This is done for 3 reasons: + // 1. The instruction count info for E-cores is often incomplete, and a substantial chunk of + // events is lost. + // 2. The performance characteristics of E-cores are less reliable, so excluding them from the + // benchmark makes things easier. + // 3. An unpredictable mix of P-core and E-core execution will give inconsistent results. cmd.env("CARGO_BUILD_JOBS", format!("{core_count}")); - // pass the P core list to taskset to pin task to the P core. + // Pass the P-core list to taskset to pin task to the P-core. cmd.arg("--cpu-list"); cmd.arg(cpu_list); cmd.arg(path); @@ -191,9 +206,9 @@ fn run_on_p_cores(path: &Path, cpu_list: &str) -> Command { } #[cfg(not(target_os = "linux"))] -// Modify this stub if you want to add support for P/E cores on more OSs +// Modify this stub if you want to add support for P-cores/E-cores on more OSs. fn run_on_p_cores(_path: &Path, _cpu_list: &str) -> Command { - todo!("Can't run commands on the P cores on this platform"); + todo!("Can't run commands on the P-cores on this platform"); } impl<'a> CargoProcess<'a> { @@ -214,7 +229,7 @@ impl<'a> CargoProcess<'a> { } fn base_command(&self, cwd: &Path, subcommand: &str) -> Command { - // Processors with P and E cores require special handling + // Processors with P-core and E-cores require special handling. let mut cmd = if let Some(p_cores) = performance_cores() { run_on_p_cores(Path::new(&self.toolchain.components.cargo), p_cores) } else { From 1c128cc845b8cbbbbc6b479c8b5ff546ec043b87 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 18 Dec 2025 14:19:10 +1100 Subject: [PATCH 2/3] Fix off-by-one error in p-core count. If we have an inclusive range like `0-3`, that's four cores, not three. --- collector/src/compile/execute/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/collector/src/compile/execute/mod.rs b/collector/src/compile/execute/mod.rs index 2de859fb3..688a7f62c 100644 --- a/collector/src/compile/execute/mod.rs +++ b/collector/src/compile/execute/mod.rs @@ -189,7 +189,8 @@ fn run_on_p_cores(path: &Path, cpu_list: &str) -> Command { let core_end: u32 = core_end .parse() .expect("Expected a number when parsing the end of the P-core list!"); - let core_count = core_end - core_start; + let core_count = core_end - core_start + 1; // e.g. "0-3" is four cores: [0, 1, 2, 3] + let mut cmd = Command::new("taskset"); // Set job count to P-core count. This is done for 3 reasons: // 1. The instruction count info for E-cores is often incomplete, and a substantial chunk of From 29028934ef3ed8326fd54895502b239853740019 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 18 Dec 2025 14:19:54 +1100 Subject: [PATCH 3/3] Add support for ARM hybrid CPUs. Specifically: - Add support for detecting ARM hybrid CPUs, via a heuristic on CPU "capacity". - Adjust ARM-specific event names as necessary, e.g. `armv8_pmuv3_0/instructions:u/` -> `instructions:u`. There is also some refactoring of the existing code for handling Intel hybrid architectures, e.g. merging `run_on_p_cores` into `performance_cores`, to avoid code duplication. --- Cargo.lock | 1 + collector/Cargo.toml | 1 + collector/src/compile/benchmark/target.rs | 1 + collector/src/compile/execute/mod.rs | 251 +++++++++++++++------- site/frontend/package-lock.json | 4 - 5 files changed, 171 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ec43b492..ddefc3f08 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -399,6 +399,7 @@ dependencies = [ "anyhow", "benchlib", "cargo_metadata", + "cfg-if", "chrono", "clap", "console", diff --git a/collector/Cargo.toml b/collector/Cargo.toml index 021be7aaf..4e35ce576 100644 --- a/collector/Cargo.toml +++ b/collector/Cargo.toml @@ -18,6 +18,7 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tokio = { workspace = true, features = ["rt", "process"] } +cfg-if = "1" thiserror = "2" tempfile = "3" libc = "0.2" diff --git a/collector/src/compile/benchmark/target.rs b/collector/src/compile/benchmark/target.rs index 9903c3add..0a69833d1 100644 --- a/collector/src/compile/benchmark/target.rs +++ b/collector/src/compile/benchmark/target.rs @@ -5,6 +5,7 @@ use std::{fmt, str::FromStr}; /// https://doc.rust-lang.org/nightly/rustc/platform-support.html /// /// Presently we only support x86_64 +/// FIXME: we actually support Windows and aarch64, but that isn't captured here. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, serde::Deserialize)] pub enum Target { /// `x86_64-unknown-linux-gnu` diff --git a/collector/src/compile/execute/mod.rs b/collector/src/compile/execute/mod.rs index 688a7f62c..1792af944 100644 --- a/collector/src/compile/execute/mod.rs +++ b/collector/src/compile/execute/mod.rs @@ -135,81 +135,136 @@ pub struct CargoProcess<'a> { pub workspace_package: Option, } -/// Returns an optional list of P-cores, if the system has P-cores and E-cores. -/// This list *should* be in a format suitable for the `taskset` command. -#[cfg(target_os = "linux")] -fn performance_cores() -> Option<&'static String> { - use std::sync::LazyLock; - static PERFORMANCE_CORES: LazyLock> = LazyLock::new(|| { - if std::fs::exists("/sys/devices/cpu") - .expect("Could not check if `/sys/devices/cpu` exists") - { - // If /sys/devices/cpu exists, then this is not a hybrid CPU. - None - } else if std::fs::exists("/sys/devices/cpu_core") - .expect("Could not check if `/sys/devices/cpu_core` exists!") - { - // If /sys/devices/cpu_core exists, then this is a hybrid CPU. - eprintln!("WARNING: hybrid Intel CPU detected."); - eprintln!("WARNING: test suite will only use P-cores, not E-cores"); - Some( - std::fs::read_to_string("/sys/devices/cpu_core/cpus") - .unwrap() - .trim() - .to_string(), - ) - } else { - // If neither dir exists, then something is wrong, because `/sys/devices/cpu` has been - // in Linux for over a decade. - eprintln!("WARNING: neither `/sys/devices/cpu` nor `/sys/devices/cpu_core` present"); - eprintln!("WARNING: unable to determine if CPU has a hybrid architecture"); - None - } - }); - (*PERFORMANCE_CORES).as_ref() +// Some CPUs have a hybrid architecture with a mixture of P-cores (power) and E-cores (efficiency). +// When benchmarking we use `taskset` to restrict execution to P-cores. Why? +// 1. The instruction count info for E-cores is often incomplete, and a substantial chunk of events +// is lost. +// 2. The performance characteristics of E-cores are less reliable, so excluding them from the +// benchmark makes things easier. +// 3. An unpredictable mix of P-core and E-core execution can give inconsistent results. +// +// If a hybrid architecture is detected, this type is used to hold information about the P-cores. +// The detection method used varies across platforms. +#[derive(Debug)] +struct PCores { + /// The number of P-cores. + len: usize, + /// The list of P-cores, in a form suitable for passing to `taskset`. + list: String, } -#[cfg(not(target_os = "linux"))] -// Modify this stub if you want to add support for P-/E-cores on more OSs -fn performance_cores() -> Option<&'static String> { - None -} +static P_CORES: LazyLock> = LazyLock::new(p_cores); + +cfg_if::cfg_if! { + if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] { + // On x86-64/Linux we look for the presence of `/sys/devices/cpu_core/` which indicates a + // hybrid architecture. + fn p_cores() -> Option { + if std::fs::exists("/sys/devices/cpu").unwrap() { + // `/sys/devices/cpu` exists: this is not a hybrid CPU. + None + } else if std::fs::exists("/sys/devices/cpu_core").unwrap() { + // `/sys/devices/cpu_core/` exists: this is a hybrid CPU, and the `cpus` file + // within contains the list of P-cores. (`sys/devices/cpu_atom/cpus` contains + // the list of E-cores). + let list = + std::fs::read_to_string("/sys/devices/cpu_core/cpus") + .unwrap() + .trim() + .to_string(); + eprintln!( + "WARNING: hybrid Intel CPU detected; test suite will only use P-cores: {list}" + ); + // Parse CPU list to extract the number of P-cores. This assumes the P-core ids are + // continuous, in format `m-n`. + let (first, last) = list + .split_once("-") + .unwrap_or_else(|| panic!("unsupported P-core list format: {list:?}.")); + let first = first + .parse::() + .expect("expected a number at the start of the P-core list"); + let last = last + .parse::() + .expect("expected a number at the end of the P-core list"); + let len = last - first + 1; // e.g. "0-3" is four cores: [0, 1, 2, 3] + Some(PCores { len, list }) + } else { + // Neither dir exists: something is wrong, because `/sys/devices/cpu` has been + // in Linux (on x86-64, at least) for over a decade. + eprintln!( + "WARNING: `/sys/devices/{{cpu,cpu_core}}` not found; \ + unable to determine if CPU has a hybrid architecture" + ); + None + } + } + } else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] { + // On ARM64/Linux there is no definitive way to distinguish P-cores from E-cores, so we + // must use a heuristic. + // + // Each core has a listed "capacity", a performance estimate relative to the most powerful + // core in the system (scaled 0-1024). For example, an ASUS GX10 Ascent has a Cortex-X925 + // with 10 P-cores and a Cortex-A725 with 10 E-cores. The reported capacities are: + // * Cores 0- 4: 718 (E-cores in cluster 1 with 8MiB L3 cache) + // * Cores 5- 9: 997 (P-cores in cluster 1 with 8MiB L3 cache) + // * Cores 10-14: 731 (E-cores in cluster 2 with 16MiB L3 cache) + // * Cores 15-18: 1017 (P-cores in cluster 2 with 16MiB L3 cache) + // * Core 19: 1024 (P-core in cluster 2 with 16MiB L3 cache)) + // + // The heuristic is that any core with a capacity at least 90% of the maximum capacity is + // considered a P-core, and any other core is considered an E-core. (The 718/731 and + // 997/1017 differences are presumably due to the L3 cache size. The reason for the + // 1017/1024 difference is unclear. Even though the P-cores are not all identical, they are + // close enough for our purposes.) + fn p_cores() -> Option { + let mut caps = vec![]; + for i in 0.. { + let path = format!("/sys/devices/system/cpu/cpu{i}/cpu_capacity"); + if !std::fs::exists(&path).unwrap() { + break; + } + let cap = std::fs::read_to_string(&path).unwrap().trim().parse::().unwrap(); + caps.push((i, cap)); + } -#[cfg(target_os = "linux")] -/// Makes the benchmark run only on Performance cores. -fn run_on_p_cores(path: &Path, cpu_list: &str) -> Command { - // Parse CPU list to extract the number of P-cores! - // This assumes the P-core id's are continuous, in format `first_id-last_id` - let (core_start, core_end) = cpu_list - .split_once("-") - .unwrap_or_else(|| panic!("Unsupported P-core list format: {cpu_list:?}.")); - let core_start: u32 = core_start - .parse() - .expect("Expected a number when parsing the start of the P-core list!"); - let core_end: u32 = core_end - .parse() - .expect("Expected a number when parsing the end of the P-core list!"); - let core_count = core_end - core_start + 1; // e.g. "0-3" is four cores: [0, 1, 2, 3] - - let mut cmd = Command::new("taskset"); - // Set job count to P-core count. This is done for 3 reasons: - // 1. The instruction count info for E-cores is often incomplete, and a substantial chunk of - // events is lost. - // 2. The performance characteristics of E-cores are less reliable, so excluding them from the - // benchmark makes things easier. - // 3. An unpredictable mix of P-core and E-core execution will give inconsistent results. - cmd.env("CARGO_BUILD_JOBS", format!("{core_count}")); - // Pass the P-core list to taskset to pin task to the P-core. - cmd.arg("--cpu-list"); - cmd.arg(cpu_list); - cmd.arg(path); - cmd -} + if let Some(max_cap) = caps.iter().map(|(_, cap)| cap).max() { + // Filter out cores that fail the 90% capacity check. + let cap_threshold = *max_cap as f64 * 0.9; + let p_cores: Vec<_> = caps.iter().filter_map(|(i, cap)| { + if *cap as f64 >= cap_threshold { + Some(i.to_string()) + } else { + None + } + }).collect(); -#[cfg(not(target_os = "linux"))] -// Modify this stub if you want to add support for P-cores/E-cores on more OSs. -fn run_on_p_cores(_path: &Path, _cpu_list: &str) -> Command { - todo!("Can't run commands on the P-cores on this platform"); + if p_cores.len() == caps.len() { + // All cores have roughly the same capacity; this is not a hybrid CPU. + None + } else { + let list = p_cores.join(","); + eprintln!( + "WARNING: hybrid ARM CPU detected; test suite will only use P-cores: {list}" + ); + Some(PCores { + len: p_cores.len(), + list, + }) + } + } else { + eprintln!( + "WARNING: `/sys/devices/system/cpu/cpu*/cpu_capacity` not found; \ + unable to determine if CPU has a hybrid architecture" + ); + None + } + } + } else { + // Modify this stub if you want to add support for hybrid architectures on more platforms. + fn p_cores() -> Option { + None + } + } } impl<'a> CargoProcess<'a> { @@ -230,11 +285,17 @@ impl<'a> CargoProcess<'a> { } fn base_command(&self, cwd: &Path, subcommand: &str) -> Command { - // Processors with P-core and E-cores require special handling. - let mut cmd = if let Some(p_cores) = performance_cores() { - run_on_p_cores(Path::new(&self.toolchain.components.cargo), p_cores) + let cargo_path = Path::new(&self.toolchain.components.cargo); + let mut cmd = if let Some(p_cores) = (*P_CORES).as_ref() { + // Processors with P-cores and E-cores require special handling. + let mut cmd = Command::new("taskset"); + cmd.env("CARGO_BUILD_JOBS", p_cores.len.to_string()); + cmd.arg("--cpu-list"); + cmd.arg(&p_cores.list); + cmd.arg(cargo_path); + cmd } else { - Command::new(Path::new(&self.toolchain.components.cargo)) + Command::new(cargo_path) }; cmd // Not all cargo invocations (e.g. `cargo clean`) need all of these @@ -620,6 +681,11 @@ fn process_stat_output( let stdout = String::from_utf8(output.stdout.clone()).expect("utf8 output"); let mut stats = Stats::new(); + // ARM P-core events have names like `armv8_pmuv3_0/instructions:u/` and + // `armv8_pmuv3_1/branche-misses/`. + #[cfg(all(target_os = "linux", target_arch = "aarch64"))] + let arm_p_core_events_re = regex::Regex::new(r"armv[0-9]_pmuv[0-9]_[0-9]/([^/]*)/").unwrap(); + let mut self_profile_dir: Option = None; let mut self_profile_crate: Option = None; for line in stdout.lines() { @@ -670,24 +736,43 @@ fn process_stat_output( } }; } + let mut parts = line.split(';').map(|s| s.trim()); let cnt = get!(parts.next()); + if cnt == "" || cnt == "" || cnt.is_empty() { + continue; + } + let _unit = get!(parts.next()); - let mut name = get!(parts.next()); - // Map P-core events to normal events - if name == "cpu_core/instructions:u/" { - name = "instructions:u"; + + #[allow(unused_mut)] + let mut name = get!(parts.next()).to_string(); + // Map P-core event name to normal event names. + cfg_if::cfg_if! { + if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] { + if name == "cpu_core/instructions:u/" { + name = "instructions:u".to_string(); + } + } else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] { + // ARM P-core events have names like `armv8_pmuv3_0/instructions:u/` and + // `armv8_pmuv3_1/branche-misses/`. + if let Some(event) = arm_p_core_events_re.captures(&name) { + name = event[1].to_string(); + } + } } + let _time = get!(parts.next()); + let pct = get!(parts.next()); - if cnt == "" || cnt == "" || cnt.is_empty() { - continue; - } if !pct.starts_with("100.") { + // If this fails, it's probably because the CPU has a hybrid architecture and the + // metric is split across P-cores and E-cores. See `PCores`. panic!("measurement of `{name}` only active for {pct}% of the time"); } + stats.insert( - name.to_owned(), + name, cnt.parse() .map_err(|e| DeserializeStatError::ParseError(cnt.to_string(), e))?, ); diff --git a/site/frontend/package-lock.json b/site/frontend/package-lock.json index 714223450..e68e5ed6e 100644 --- a/site/frontend/package-lock.json +++ b/site/frontend/package-lock.json @@ -592,7 +592,6 @@ "resolved": "https://registry.npmjs.org/@parcel/core/-/core-2.16.0.tgz", "integrity": "sha512-erH9GdLe8Boie0mCO8hXn8Qt/pCACsOFlKp8UHNMlPaizUtCDkCOQqwmSi+VyrJ3dMMCOc/qBwTSGAJaJE8/Kw==", "license": "MIT", - "peer": true, "dependencies": { "@mischnic/json-sourcemap": "^0.1.1", "@parcel/cache": "2.16.0", @@ -2614,7 +2613,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.9", "caniuse-lite": "^1.0.30001746", @@ -3737,7 +3735,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -3807,7 +3804,6 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.22.tgz", "integrity": "sha512-toaZjQ3a/G/mYaLSbV+QsQhIdMo9x5rrqIpYRObsJ6T/J+RyCSFwN2LHNVH9v8uIcljDNa3QzPVdv3Y6b9hAJQ==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.22", "@vue/compiler-sfc": "3.5.22",