diff --git a/Cargo.lock b/Cargo.lock index b2ea6a67fcb..ad551a0997c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -412,7 +412,7 @@ dependencies = [ "cpp_demangle", "gimli 0.32.0", "libc 0.2.177", - "memmap2", + "memmap2 0.9.5", "miniz_oxide", "rustc-demangle", ] @@ -1323,6 +1323,7 @@ dependencies = [ "criterion-perf-events", "crossbeam-channel", "datadog-php-profiling", + "dynasmrt", "env_logger 0.11.6", "http", "lazy_static", @@ -1619,6 +1620,33 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +[[package]] +name = "dynasm" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33dc03612f42465a8ed7f5e354bc2b79ba54cedefa81d5bd3a064f1835adaba8" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "lazy_static", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dynasmrt" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7dccc31a678058996aef614f6bd418ced384da70f284e83e2b7bf29b27b6a28" +dependencies = [ + "byteorder", + "dynasm", + "fnv", + "memmap2 0.5.10", +] + [[package]] name = "educe" version = "0.4.23" @@ -1699,9 +1727,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "erased-serde" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e8918065695684b2b0702da20382d5ae6065cf3327bc2d6436bd49a71ce9f3" +checksum = "d2add8a07dd6a8d93ff627029c51de145e12686fbc36ecb298ac22e74cf02dec" dependencies = [ "serde", "serde_core", @@ -3337,6 +3365,15 @@ dependencies = [ "rustix 0.38.43", ] +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc 0.2.177", +] + [[package]] name = "memmap2" version = "0.9.5" @@ -5664,7 +5701,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13a4dfe4bbeef59c1f32fc7524ae7c95b9e1de5e79a43ce1604e181081d71b0c" dependencies = [ "debugid", - "memmap2", + "memmap2 0.9.5", "stable_deref_trait", "uuid", ] @@ -5744,9 +5781,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "target-triple" diff --git a/profiling/Cargo.toml b/profiling/Cargo.toml index e41cb405526..93ff87629c1 100644 --- a/profiling/Cargo.toml +++ b/profiling/Cargo.toml @@ -22,6 +22,7 @@ cfg-if = { version = "1.0" } cpu-time = { version = "1.0" } chrono = { version = "0.4" } crossbeam-channel = { version = "0.5", default-features = false, features = ["std"] } +dynasmrt = "2.0" http = { version = "1.4" } libdd-alloc = { git = "https://github.com/DataDog/libdatadog", tag = "v27.0.0" } libdd-profiling = { git = "https://github.com/DataDog/libdatadog", tag = "v27.0.0" } diff --git a/profiling/build.rs b/profiling/build.rs index 15e9ecb6ce2..24414cad76d 100644 --- a/profiling/build.rs +++ b/profiling/build.rs @@ -36,6 +36,7 @@ fn main() { let post_startup_cb = cfg_post_startup_cb(vernum); let preload = cfg_preload(vernum); let fibers = cfg_fibers(vernum); + let frameless = cfg_frameless(vernum); let run_time_cache = cfg_run_time_cache(vernum); let trigger_time_sample = cfg_trigger_time_sample(); let zend_error_observer = cfg_zend_error_observer(vernum); @@ -47,6 +48,7 @@ fn main() { preload, run_time_cache, fibers, + frameless, trigger_time_sample, zend_error_observer, ); @@ -103,6 +105,7 @@ fn build_zend_php_ffis( preload: bool, run_time_cache: bool, fibers: bool, + frameless: bool, trigger_time_sample: bool, zend_error_observer: bool, ) { @@ -143,6 +146,7 @@ fn build_zend_php_ffis( let post_startup_cb = if post_startup_cb { "1" } else { "0" }; let preload = if preload { "1" } else { "0" }; let fibers = if fibers { "1" } else { "0" }; + let frameless = if frameless { "1" } else { "0" }; let run_time_cache = if run_time_cache { "1" } else { "0" }; let trigger_time_sample = if trigger_time_sample { "1" } else { "0" }; let zend_error_observer = if zend_error_observer { "1" } else { "0" }; @@ -159,6 +163,7 @@ fn build_zend_php_ffis( .define("CFG_POST_STARTUP_CB", post_startup_cb) .define("CFG_PRELOAD", preload) .define("CFG_FIBERS", fibers) + .define("CFG_FRAMELESS", frameless) .define("CFG_RUN_TIME_CACHE", run_time_cache) .define("CFG_STACK_WALKING_TESTS", stack_walking_tests) .define("CFG_TRIGGER_TIME_SAMPLE", trigger_time_sample) @@ -373,6 +378,16 @@ fn cfg_fibers(vernum: u64) -> bool { } } +fn cfg_frameless(vernum: u64) -> bool { + println!("cargo::rustc-check-cfg=cfg(php_frameless)"); + if vernum >= 80400 { + println!("cargo:rustc-cfg=php_frameless"); + true + } else { + false + } +} + fn cfg_php_feature_flags(vernum: u64) { println!("cargo::rustc-check-cfg=cfg(php_gc_status, php_zend_compile_string_has_position, php_gc_status_extended, php_frameless, php_opcache_restart_hook, php_zend_mm_set_custom_handlers_ex)"); @@ -386,7 +401,6 @@ fn cfg_php_feature_flags(vernum: u64) { println!("cargo:rustc-cfg=php_gc_status_extended"); } if vernum >= 80400 { - println!("cargo:rustc-cfg=php_frameless"); println!("cargo:rustc-cfg=php_opcache_restart_hook"); println!("cargo:rustc-cfg=php_zend_mm_set_custom_handlers_ex"); } diff --git a/profiling/src/php_ffi.c b/profiling/src/php_ffi.c index f4b42bcdaf3..b2cae921819 100644 --- a/profiling/src/php_ffi.c +++ b/profiling/src/php_ffi.c @@ -121,6 +121,10 @@ bool ddog_php_prof_is_post_startup(void) { static post_startup_cb_result (*orig_post_startup_cb)(void) = NULL; static post_startup_cb_result ddog_php_prof_post_startup_cb(void) { +#if CFG_FRAMELESS + ddog_php_prof_post_startup(); // before preload+JIT (which may hardcode the flf handlers) +#endif + if (orig_post_startup_cb) { post_startup_cb_result (*cb)(void) = orig_post_startup_cb; @@ -552,9 +556,6 @@ void ddog_php_test_free_fake_zend_function(zend_function *func) { free(func); } -// Stub for zend_flf_functions (PHP 8.4+ frameless calls) to allow tests to link -// without the real PHP runtime. The test doesn't exercise frameless code paths. -__attribute__((weak)) zend_function **zend_flf_functions; #endif // CFG_STACK_WALKING_TESTS || CFG_TEST void *opcache_handle = NULL; diff --git a/profiling/src/php_ffi.h b/profiling/src/php_ffi.h index f9aef1a21ec..bd089940b77 100644 --- a/profiling/src/php_ffi.h +++ b/profiling/src/php_ffi.h @@ -153,6 +153,10 @@ void ddog_php_prof_zend_mm_set_custom_handlers(zend_mm_heap *heap, zend_execute_data* ddog_php_prof_get_current_execute_data(); +#if CFG_FRAMELESS +void ddog_php_prof_post_startup(); +#endif + #if CFG_FIBERS zend_fiber* ddog_php_prof_get_active_fiber(); zend_fiber* ddog_php_prof_get_active_fiber_test(); diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index bfd9390c90d..34476136ee6 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -137,6 +137,169 @@ pub extern "C" fn ddog_php_prof_interrupt_function(execute_data: *mut zend_execu } } +#[cfg(php_frameless)] +mod frameless { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + mod trampoline { + #[cfg(target_arch = "aarch64")] + use dynasmrt::aarch64::Assembler; + #[cfg(target_arch = "aarch64")] + use dynasmrt::DynasmLabelApi; + #[cfg(target_arch = "x86_64")] + use dynasmrt::x64::Assembler; + use dynasmrt::{dynasm, DynasmApi, ExecutableBuffer}; + use std::ffi::c_void; + use std::sync::atomic::Ordering; + use log::error; + use crate::bindings::{zend_flf_functions, zend_flf_handlers, zend_frameless_function_info}; + use crate::{profiling::Profiler, RefCellExt, REQUEST_LOCALS, zend}; + + // This ensures that the memory stays reachable and is replaced on apache reload for example + static mut INFOS: Vec = Vec::new(); + static mut BUFFER: Option = None; + + pub unsafe fn install() { + // Collect frameless functions ahead of time to batch-process them. + // Otherwise we get a new memory page per function. + let mut originals = Vec::new(); + let mut i = 0; + loop { + let original = *zend_flf_handlers.add(i); + if original.is_null() { + break; + } + originals.push(original); + i += 1; + } + + let mut assembler = match Assembler::new() { + Ok(assembler) => assembler, + Err(e) => { + error!("Failed to create assembler for FLF trampolines: {e}. Frameless functions will not appear in wall-time profiles."); + return; + } + }; + let interrupt_addr = ddog_php_prof_icall_trampoline_target as *const (); + let mut offsets = Vec::new(); // keep function offsets + for orig in originals.iter() { + offsets.push(assembler.offset()); + // Calls original function, then calls interrupt function. + #[cfg(target_arch = "aarch64")] + { + // We need labels on aarch64 as immediates cannot be more than 16 bits + dynasm!(assembler + ; stp x29, x30, [sp, -16]! // save link register and allow clobber of x29 + ; mov x29, sp // store stack pointer + ; ldr x16, >orig_label + ; blr x16 + ; ldp x29, x30, [sp], 16 // restore link register and x29 + ; ldr x16, >interrupt_label + ; br x16 // tail call + ; orig_label: ; .qword *orig as i64 + ); + } + #[cfg(target_arch = "x86_64")] + dynasm!(assembler + ; push rbp // align stack + ; mov rax, QWORD *orig as i64 + ; call rax + ; pop rbp // restore stack + ; mov rax, QWORD interrupt_addr as i64 + ; jmp rax // tail call + ); + } + #[cfg(target_arch = "aarch64")] + dynasm!(assembler + ; interrupt_label: ; .qword interrupt_addr as i64 ); + + // Allocate enough space for all frameless_function_infos including trailing NULLs + let mut infos = Vec::with_capacity(originals.len() * 2); + + let buffer = match assembler.finalize() { + Ok(buffer) => buffer, + Err(_) => { + error!("Failed to finalize FLF trampolines (mprotect PROT_EXEC denied?). Frameless functions will not appear in cpu/wall-time profiles. This may be caused by security policies (SELinux, seccomp, etc.)."); + return; + } + }; + let mut last_infos = std::ptr::null_mut(); + for (i, offset) in offsets.iter().enumerate() { + let wrapper = buffer.as_ptr().add(offset.0) as *mut c_void; + *zend_flf_handlers.add(i) = wrapper; + let func = &mut **zend_flf_functions.add(i); + + // We need to do copies of frameless_function_infos as they may be readonly memory + let original_info = func.internal_function.frameless_function_infos; + if original_info != last_infos { + let info_size = infos.len(); + let mut ptr = original_info; + loop { + let info = *ptr; + infos.push(info); + if info.handler.is_null() { + break; + } + ptr = ptr.add(1); + } + last_infos = infos.as_ptr().add(info_size) as *mut _; + func.internal_function.frameless_function_infos = last_infos; + } + let mut ptr = last_infos; + loop { + let info = &mut *ptr; + if info.handler.is_null() { + break; + } + if info.handler == originals[i] { + info.handler = wrapper; + } + ptr = ptr.add(1); + } + } + + INFOS = infos; + BUFFER = Some(buffer); + } + + #[no_mangle] + #[inline(never)] + pub extern "C" fn ddog_php_prof_icall_trampoline_target() { + let interrupt_count = REQUEST_LOCALS + .try_with_borrow(|locals| { + if !locals.system_settings().profiling_enabled { + return 0; + } + locals.interrupt_count.swap(0, Ordering::SeqCst) + }) + .unwrap_or(0); + + if interrupt_count == 0 { + return; + } + + if let Some(profiler) = Profiler::get() { + // SAFETY: profiler doesn't mutate execute_data + let execute_data = unsafe { zend::ddog_php_prof_get_current_execute_data() }; + profiler.collect_time(execute_data, interrupt_count); + } + } + } + + #[no_mangle] + pub unsafe extern "C" fn ddog_php_prof_post_startup() { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + trampoline::install(); + } + + #[cfg(test)] + mod tests { + use crate::bindings::zend_function; + + #[no_mangle] + pub static mut zend_flf_functions: *mut *mut zend_function = std::ptr::null_mut(); + } +} + /// A wrapper for the `ddog_php_prof_interrupt_function` to call the /// previous interrupt handler, if there was one. #[no_mangle]