Skip to content

Commit 89b27b4

Browse files
committed
Add support for ARM hybrid CPUs.
Specifically: - Add support for detecting ARM hybrid CPUs, via a heuristic on CPU "capacity". - Adjust ARM-specific event names as necessary, e.g. `armv8_pmuv3_0/instructions:u/` -> `instructions:u`. There is also some refactoring of the existing code for handling Intel hybrid architectures, e.g. merging `run_on_p_cores` into `performance_cores`, to avoid code duplication.
1 parent 1c128cc commit 89b27b4

File tree

3 files changed

+163
-83
lines changed

3 files changed

+163
-83
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

collector/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ serde = { workspace = true, features = ["derive"] }
1818
serde_json = { workspace = true }
1919
tokio = { workspace = true, features = ["rt", "process"] }
2020

21+
cfg-if = "1"
2122
thiserror = "2"
2223
tempfile = "3"
2324
libc = "0.2"

collector/src/compile/execute/mod.rs

Lines changed: 161 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -135,81 +135,134 @@ pub struct CargoProcess<'a> {
135135
pub workspace_package: Option<String>,
136136
}
137137

138-
/// Returns an optional list of P-cores, if the system has P-cores and E-cores.
139-
/// This list *should* be in a format suitable for the `taskset` command.
140-
#[cfg(target_os = "linux")]
141-
fn performance_cores() -> Option<&'static String> {
142-
use std::sync::LazyLock;
143-
static PERFORMANCE_CORES: LazyLock<Option<String>> = LazyLock::new(|| {
144-
if std::fs::exists("/sys/devices/cpu")
145-
.expect("Could not check if `/sys/devices/cpu` exists")
146-
{
147-
// If /sys/devices/cpu exists, then this is not a hybrid CPU.
148-
None
149-
} else if std::fs::exists("/sys/devices/cpu_core")
150-
.expect("Could not check if `/sys/devices/cpu_core` exists!")
151-
{
152-
// If /sys/devices/cpu_core exists, then this is a hybrid CPU.
153-
eprintln!("WARNING: hybrid Intel CPU detected.");
154-
eprintln!("WARNING: test suite will only use P-cores, not E-cores");
155-
Some(
156-
std::fs::read_to_string("/sys/devices/cpu_core/cpus")
157-
.unwrap()
158-
.trim()
159-
.to_string(),
160-
)
161-
} else {
162-
// If neither dir exists, then something is wrong, because `/sys/devices/cpu` has been
163-
// in Linux for over a decade.
164-
eprintln!("WARNING: neither `/sys/devices/cpu` nor `/sys/devices/cpu_core` present");
165-
eprintln!("WARNING: unable to determine if CPU has a hybrid architecture");
166-
None
167-
}
168-
});
169-
(*PERFORMANCE_CORES).as_ref()
138+
// Some CPUs have a hybrid architecture with a mixture of P-cores (power) and E-cores (efficiency).
139+
// When benchmarking we use `taskset` to restrict execution to P-cores. Why?
140+
// 1. The instruction count info for E-cores is often incomplete, and a substantial chunk of events
141+
// is lost.
142+
// 2. The performance characteristics of E-cores are less reliable, so excluding them from the
143+
// benchmark makes things easier.
144+
// 3. An unpredictable mix of P-core and E-core execution can give inconsistent results.
145+
//
146+
// If a hybrid architecture is detected, this type is used to hold information about the P-cores.
147+
// The detection method used varies across platforms.
148+
#[derive(Debug)]
149+
struct PCores {
150+
/// The number of P-cores.
151+
len: usize,
152+
/// The list of P-cores, in a form suitable for passing to `taskset`.
153+
list: String,
170154
}
171155

172-
#[cfg(not(target_os = "linux"))]
173-
// Modify this stub if you want to add support for P-/E-cores on more OSs
174-
fn performance_cores() -> Option<&'static String> {
175-
None
176-
}
156+
static P_CORES: LazyLock<Option<PCores>> = LazyLock::new(p_cores);
157+
158+
cfg_if::cfg_if! {
159+
if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] {
160+
// On x86-64/Linux we look for the presence of `/sys/devices/cpu_core/` which indicates a
161+
// hybrid architecture.
162+
fn p_cores() -> Option<PCores> {
163+
if std::fs::exists("/sys/devices/cpu").unwrap() {
164+
// `/sys/devices/cpu` exists: this is not a hybrid CPU.
165+
None
166+
} else if std::fs::exists("/sys/devices/cpu_core").unwrap() {
167+
// `/sys/devices/cpu_core/` exists: this is a hybrid CPU, and the `cpus` file
168+
// within contains the list of P-cores. (`sys/devices/cpu_atom/cpus` contains
169+
// the list of E-cores).
170+
let list =
171+
std::fs::read_to_string("/sys/devices/cpu_core/cpus")
172+
.unwrap()
173+
.trim()
174+
.to_string();
175+
eprintln!(
176+
"WARNING: hybrid Intel CPU detected; test suite will only use P-cores: {list}"
177+
);
178+
// Parse CPU list to extract the number of P-cores. This assumes the P-core ids are
179+
// continuous, in format `m-n`.
180+
let (first, last) = list
181+
.split_once("-")
182+
.unwrap_or_else(|| panic!("unsupported P-core list format: {list:?}."));
183+
let first = first
184+
.parse::<usize>()
185+
.expect("expected a number at the start of the P-core list");
186+
let last = last
187+
.parse::<usize>()
188+
.expect("expected a number at the end of the P-core list");
189+
let len = last - first + 1; // e.g. "0-3" is four cores: [0, 1, 2, 3]
190+
Some(PCores { len, list })
191+
} else {
192+
// Neither dir exists: something is wrong, because `/sys/devices/cpu` has been
193+
// in Linux (on x86-64, at least) for over a decade.
194+
eprintln!(
195+
"WARNING: `/sys/devices/{{cpu,cpu_core}}` not found; \
196+
unable to determine if CPU has a hybrid architecture"
197+
);
198+
None
199+
}
200+
}
201+
} else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] {
202+
// On ARM64/Linux there is no definitive way to distinguish P-cores from E-cores, so we
203+
// must use a heuristic.
204+
//
205+
// Each core has a listed "capacity", a performance estimate relative to the most powerful
206+
// core in the system (scaled 0-1024). For example, an ASUS GX10 Ascent has a Cortex-X925
207+
// with 10 P-cores and a Cortex-A725 with 10 E-cores. The reported capacities are:
208+
// * Cores 0- 4: 718 (E-cores)
209+
// * Cores 5- 9: 997 (P-cores)
210+
// * Cores 10-14: 731 (E-cores)
211+
// * Cores 15-18: 1017 (P-cores)
212+
// * Core 19: 1024 (P-core)
213+
//
214+
// (It is unknown why there are minor variations, but the two categories are clear.) The
215+
// heuristic is that any core with a capacity at least 90% of the maximum capacity is
216+
// considered a P-core, and any other core is considered an E-core.
217+
fn p_cores() -> Option<PCores> {
218+
let mut caps = vec![];
219+
for i in 0.. {
220+
let path = format!("/sys/devices/system/cpu/cpu{i}/cpu_capacity");
221+
if !std::fs::exists(&path).unwrap() {
222+
break;
223+
}
224+
let cap = std::fs::read_to_string(&path).unwrap().trim().parse::<usize>().unwrap();
225+
caps.push((i, cap));
226+
}
177227

178-
#[cfg(target_os = "linux")]
179-
/// Makes the benchmark run only on Performance cores.
180-
fn run_on_p_cores(path: &Path, cpu_list: &str) -> Command {
181-
// Parse CPU list to extract the number of P-cores!
182-
// This assumes the P-core id's are continuous, in format `first_id-last_id`
183-
let (core_start, core_end) = cpu_list
184-
.split_once("-")
185-
.unwrap_or_else(|| panic!("Unsupported P-core list format: {cpu_list:?}."));
186-
let core_start: u32 = core_start
187-
.parse()
188-
.expect("Expected a number when parsing the start of the P-core list!");
189-
let core_end: u32 = core_end
190-
.parse()
191-
.expect("Expected a number when parsing the end of the P-core list!");
192-
let core_count = core_end - core_start + 1; // e.g. "0-3" is four cores: [0, 1, 2, 3]
193-
194-
let mut cmd = Command::new("taskset");
195-
// Set job count to P-core count. This is done for 3 reasons:
196-
// 1. The instruction count info for E-cores is often incomplete, and a substantial chunk of
197-
// events is lost.
198-
// 2. The performance characteristics of E-cores are less reliable, so excluding them from the
199-
// benchmark makes things easier.
200-
// 3. An unpredictable mix of P-core and E-core execution will give inconsistent results.
201-
cmd.env("CARGO_BUILD_JOBS", format!("{core_count}"));
202-
// Pass the P-core list to taskset to pin task to the P-core.
203-
cmd.arg("--cpu-list");
204-
cmd.arg(cpu_list);
205-
cmd.arg(path);
206-
cmd
207-
}
228+
if let Some(max_cap) = caps.iter().map(|(_, cap)| cap).max() {
229+
// Filter out cores that fail the 90% capacity check.
230+
let cap_threshold = *max_cap as f64 * 0.9;
231+
let p_cores: Vec<_> = caps.iter().filter_map(|(i, cap)| {
232+
if *cap as f64 >= cap_threshold {
233+
Some(i.to_string())
234+
} else {
235+
None
236+
}
237+
}).collect();
208238

209-
#[cfg(not(target_os = "linux"))]
210-
// Modify this stub if you want to add support for P-cores/E-cores on more OSs.
211-
fn run_on_p_cores(_path: &Path, _cpu_list: &str) -> Command {
212-
todo!("Can't run commands on the P-cores on this platform");
239+
if p_cores.len() == caps.len() {
240+
// All cores have roughly the same capacity; this is not a hybrid CPU.
241+
None
242+
} else {
243+
let list = p_cores.join(",");
244+
eprintln!(
245+
"WARNING: hybrid ARM CPU detected; test suite will only use P-cores: {list}"
246+
);
247+
Some(PCores {
248+
len: p_cores.len(),
249+
list,
250+
})
251+
}
252+
} else {
253+
eprintln!(
254+
"WARNING: `/sys/devices/system/cpu/cpu*/cpu_capacity` not found; \
255+
unable to determine if CPU has a hybrid architecture"
256+
);
257+
None
258+
}
259+
}
260+
} else {
261+
// Modify this stub if you want to add support for hybrid architectures on more platforms.
262+
fn p_cores() -> Option<PCores> {
263+
None
264+
}
265+
}
213266
}
214267

215268
impl<'a> CargoProcess<'a> {
@@ -230,11 +283,17 @@ impl<'a> CargoProcess<'a> {
230283
}
231284

232285
fn base_command(&self, cwd: &Path, subcommand: &str) -> Command {
233-
// Processors with P-core and E-cores require special handling.
234-
let mut cmd = if let Some(p_cores) = performance_cores() {
235-
run_on_p_cores(Path::new(&self.toolchain.components.cargo), p_cores)
286+
let cargo_path = Path::new(&self.toolchain.components.cargo);
287+
let mut cmd = if let Some(p_cores) = (*P_CORES).as_ref() {
288+
// Processors with P-cores and E-cores require special handling.
289+
let mut cmd = Command::new("taskset");
290+
cmd.env("CARGO_BUILD_JOBS", p_cores.len.to_string());
291+
cmd.arg("--cpu-list");
292+
cmd.arg(&p_cores.list);
293+
cmd.arg(cargo_path);
294+
cmd
236295
} else {
237-
Command::new(Path::new(&self.toolchain.components.cargo))
296+
Command::new(cargo_path)
238297
};
239298
cmd
240299
// Not all cargo invocations (e.g. `cargo clean`) need all of these
@@ -670,24 +729,43 @@ fn process_stat_output(
670729
}
671730
};
672731
}
732+
673733
let mut parts = line.split(';').map(|s| s.trim());
674734
let cnt = get!(parts.next());
735+
if cnt == "<not supported>" || cnt == "<not counted>" || cnt.is_empty() {
736+
continue;
737+
}
738+
675739
let _unit = get!(parts.next());
676-
let mut name = get!(parts.next());
677-
// Map P-core events to normal events
678-
if name == "cpu_core/instructions:u/" {
679-
name = "instructions:u";
740+
741+
let mut name = get!(parts.next()).to_string();
742+
// Map P-core event name to normal event names.
743+
cfg_if::cfg_if! {
744+
if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] {
745+
if name == "cpu_core/instructions:u/" {
746+
name = "instructions:u".to_string();
747+
}
748+
} else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] {
749+
// ARM P-core events have names like `armv8_pmuv3_0/instructions:u/` and
750+
// `armv8_pmuv3_1/branche-misses/`.
751+
let re = regex::Regex::new(r"armv[0-9]_pmuv[0-9]_[0-9]/([^/]*)/").unwrap();
752+
if let Some(event) = re.captures(&name) {
753+
name = event[1].to_string();
754+
}
755+
}
680756
}
757+
681758
let _time = get!(parts.next());
759+
682760
let pct = get!(parts.next());
683-
if cnt == "<not supported>" || cnt == "<not counted>" || cnt.is_empty() {
684-
continue;
685-
}
686761
if !pct.starts_with("100.") {
762+
// If this fails, it's probably because the CPU has a hybrid architecture and the
763+
// metric is split across P-cores and E-cores. See `PCores`.
687764
panic!("measurement of `{name}` only active for {pct}% of the time");
688765
}
766+
689767
stats.insert(
690-
name.to_owned(),
768+
name,
691769
cnt.parse()
692770
.map_err(|e| DeserializeStatError::ParseError(cnt.to_string(), e))?,
693771
);

0 commit comments

Comments
 (0)