Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions kernels/optimized/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,21 @@ target_link_libraries(
kernels_util_all_deps
)
target_compile_options(optimized_kernels PUBLIC ${_common_compile_options})

# op_grid_sampler_2d.cpp uses ARMv8.2-a+fp16 NEON intrinsics
# (vcvt_f32_f16 / vld1_f16) when compiled for aarch64. Scope the extra
# `-march` flag to just that source so non-arm64 targets (e.g. x86_64 on
# Android) are unaffected — the kernel itself has `#ifdef __aarch64__`
# guards and falls through to the portable kernel otherwise.
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64"
OR ANDROID_ABI STREQUAL "arm64-v8a"
)
set_source_files_properties(
${EXECUTORCH_ROOT}/kernels/optimized/cpu/op_grid_sampler_2d.cpp
PROPERTIES COMPILE_OPTIONS "-march=armv8.2-a+fp16"
)
endif()

# Build a library for _optimized_kernels_srcs
#
# optimized_ops_lib: Register optimized ops kernels into Executorch runtime
Expand All @@ -83,6 +98,30 @@ gen_operators_lib(
executorch_core
)

# On-device verifier for optimized grid_sampler_2d / sum.IntList_out.
# Opt-in via -DEXECUTORCH_BUILD_OPTIMIZED_VERIFY=ON so it doesn't affect
# default AAR / library builds. Cross-checks both ops against an fp32
# reference derived from the portable kernel; non-zero exit on divergence.
if(EXECUTORCH_BUILD_OPTIMIZED_VERIFY)
add_executable(
verify_optimized_kernels ${EXECUTORCH_ROOT}/kernels/optimized/verify.cpp
)
target_link_libraries(
verify_optimized_kernels
PRIVATE optimized_kernels portable_kernels executorch_core
)
target_compile_options(
verify_optimized_kernels PRIVATE ${_common_compile_options}
)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64"
OR ANDROID_ABI STREQUAL "arm64-v8a"
)
target_compile_options(
verify_optimized_kernels PRIVATE -march=armv8.2-a+fp16
)
endif()
endif()

install(
# eigen_blas doesn't export itself, so we have to do our own install to export
# it.
Expand Down
Loading