From 256ace69baad0726aa7756805a1d9b0b5e3b2ea9 Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Wed, 14 Jan 2026 22:27:34 -0700 Subject: [PATCH] cmov: add `asm!` optimized `masknz32` for ARM32 In #1332 we ran into LLVM inserting branches in this routine for `thumbv6m-none-eabi` targets. It was "fixed" by fiddling around with `black_box` but that seems brittle. In #1334 we attempted a simple portable `asm!` optimization barrier approach but it did not work as expected. This instead opts to implement one of the fiddliest bits, mask generation, using ARM assembly instead. The resulting assembly is actually more efficient than what rustc/LLVM outputs and avoids touching the stack pointer. It's a simple enough function to implement in assembly on other platforms with stable `asm!` too, but this is a start. --- .github/workflows/cmov.yml | 1 - cmov/src/portable.rs | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmov.yml b/.github/workflows/cmov.yml index ecf054d4..9a2f2ea7 100644 --- a/.github/workflows/cmov.yml +++ b/.github/workflows/cmov.yml @@ -135,7 +135,6 @@ jobs: strategy: matrix: target: - - armv7-unknown-linux-gnueabi - powerpc-unknown-linux-gnu - s390x-unknown-linux-gnu - x86_64-unknown-linux-gnu diff --git a/cmov/src/portable.rs b/cmov/src/portable.rs index 7d35d2c6..6cdf7533 100644 --- a/cmov/src/portable.rs +++ b/cmov/src/portable.rs @@ -125,15 +125,39 @@ fn testnz64(mut x: u64) -> u64 { } /// Return a [`u32::MAX`] mask if `condition` is non-zero, otherwise return zero for a zero input. +#[cfg(not(target_arch = "arm"))] fn masknz32(condition: Condition) -> u32 { testnz32(condition.into()).wrapping_neg() } /// Return a [`u64::MAX`] mask if `condition` is non-zero, otherwise return zero for a zero input. +#[cfg(not(target_arch = "arm"))] fn masknz64(condition: Condition) -> u64 { testnz64(condition.into()).wrapping_neg() } +/// Optimized mask generation for ARM32 targets. +#[cfg(target_arch = "arm")] +fn masknz32(condition: u8) -> u32 { + let mut out = condition as u32; + unsafe { + core::arch::asm!( + "rsbs {0}, {0}, #0", // Reverse subtract + "sbcs {0}, {0}, {0}", // Subtract with carry, setting flags + inout(reg) out, + options(nostack, nomem), + ); + } + out +} + +/// 64-bit wrapper for targets that implement 32-bit mask generation in assembly. +#[cfg(target_arch = "arm")] +fn masknz64(condition: u8) -> u64 { + let mask = masknz32(condition) as u64; + mask | mask << 32 +} + #[cfg(test)] mod tests { #[test]