-
-
Notifications
You must be signed in to change notification settings - Fork 14
Make LibDeflate finally compile on Linux ARM #2361
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,27 +27,75 @@ if(NOT LibDeflate_FOUND) | |
| sourcemeta_add_default_options(PRIVATE libdeflate) | ||
|
|
||
| # Check if the assembler supports ARM dot-product (udot) instructions. | ||
| # GCC 14+ assumes binutils is new enough, but some CI environments | ||
| # pair GCC 14 with older binutils that lack udot support | ||
| if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") | ||
| include(CheckCSourceCompiles) | ||
| set(LIBDEFLATE_SAVED_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") | ||
| set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -march=armv8.2-a+dotprod") | ||
| check_c_source_compiles(" | ||
| #include <arm_neon.h> | ||
| int main(void) { | ||
| uint32x4_t a = vdupq_n_u32(0); | ||
| uint8x16_t b = vdupq_n_u8(0); | ||
| uint8x16_t c = vdupq_n_u8(0); | ||
| a = vdotq_u32(a, b, c); | ||
| return 0; | ||
| } | ||
| " LIBDEFLATE_HAS_DOTPROD_ASSEMBLER) | ||
| set(CMAKE_REQUIRED_FLAGS "${LIBDEFLATE_SAVED_CMAKE_REQUIRED_FLAGS}") | ||
| if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND | ||
| CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 14) | ||
| check_c_source_compiles(" | ||
| #include <arm_neon.h> | ||
| __attribute__((target(\"+dotprod\"))) | ||
| int test(void) { | ||
| uint32x4_t a = vdupq_n_u32(0); | ||
| uint8x16_t b = vdupq_n_u8(0); | ||
| uint8x16_t c = vdupq_n_u8(0); | ||
| a = vdotq_u32(a, b, c); | ||
| return (int)vgetq_lane_u32(a, 0); | ||
| } | ||
| int main(void) { return test(); } | ||
| " LIBDEFLATE_HAS_DOTPROD_ASSEMBLER) | ||
| else() | ||
| set(LIBDEFLATE_SAVED_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") | ||
| set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -march=armv8.2-a+dotprod") | ||
| check_c_source_compiles(" | ||
| #include <arm_neon.h> | ||
| int main(void) { | ||
| uint32x4_t a = vdupq_n_u32(0); | ||
| uint8x16_t b = vdupq_n_u8(0); | ||
| uint8x16_t c = vdupq_n_u8(0); | ||
| a = vdotq_u32(a, b, c); | ||
| return 0; | ||
| } | ||
| " LIBDEFLATE_HAS_DOTPROD_ASSEMBLER) | ||
| set(CMAKE_REQUIRED_FLAGS "${LIBDEFLATE_SAVED_CMAKE_REQUIRED_FLAGS}") | ||
| endif() | ||
| if(NOT LIBDEFLATE_HAS_DOTPROD_ASSEMBLER) | ||
| target_compile_definitions(libdeflate PRIVATE | ||
| LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_DOTPROD) | ||
| endif() | ||
|
|
||
| if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND | ||
| CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 14) | ||
| check_c_source_compiles(" | ||
| #include <arm_neon.h> | ||
| __attribute__((target(\"+crypto,+crc,+sha3\"))) | ||
| int test(void) { | ||
| uint8x16_t a = vdupq_n_u8(0); | ||
| uint8x16_t b = vdupq_n_u8(0); | ||
| uint8x16_t c = vdupq_n_u8(0); | ||
| a = veor3q_u8(a, b, c); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the dotprod probe, the Severity: medium 🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage. |
||
| return (int)vgetq_lane_u8(a, 0); | ||
| } | ||
| int main(void) { return test(); } | ||
| " LIBDEFLATE_HAS_SHA3_ASSEMBLER) | ||
| else() | ||
| set(LIBDEFLATE_SAVED_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") | ||
| set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -march=armv8.2-a+crypto+crc+sha3") | ||
| check_c_source_compiles(" | ||
| #include <arm_neon.h> | ||
| int main(void) { | ||
| uint8x16_t a = vdupq_n_u8(0); | ||
| uint8x16_t b = vdupq_n_u8(0); | ||
| uint8x16_t c = vdupq_n_u8(0); | ||
| a = veor3q_u8(a, b, c); | ||
| return 0; | ||
| } | ||
| " LIBDEFLATE_HAS_SHA3_ASSEMBLER) | ||
| set(CMAKE_REQUIRED_FLAGS "${LIBDEFLATE_SAVED_CMAKE_REQUIRED_FLAGS}") | ||
| endif() | ||
| if(NOT LIBDEFLATE_HAS_SHA3_ASSEMBLER) | ||
| target_compile_definitions(libdeflate PRIVATE | ||
| LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_SHA3) | ||
| endif() | ||
| endif() | ||
|
|
||
| target_include_directories(libdeflate PUBLIC | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
check_c_source_compiles()here uses all-zero vector inputs, which some optimization settings could constant-fold so the generated assembly never contains a dotprod instruction. If that happens, this probe could incorrectly succeed even when the assembler can’t assemble dotprod, leading to later build failures when libdeflate’s real dotprod paths are compiled.Severity: medium
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.