From 427feadd0d7fd9928fc33436d5ee983150f7ce51 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 24 Apr 2026 12:50:07 -0700 Subject: [PATCH 1/3] Inline `context.{get,set}` in components This commit reimplements the `context.{get,set}` intrinsics in the component model, introduced in the component-model-async and component-model-threading proposals. The intent of these intrinsics in WASIp3, for example, are intended to replace the `global`s used for the stack pointer and TLS base in previous modules, for example. The implementation of loading from a `global` is a single load instruction, whereas the previous implementation of `context.get` was a full libcall, which is significantly more expensive. The goal of this PR is to ensure that the transition to using `context.get` and `context.set` for high-performance uses retains the same performance as the WASIp2 constructs. Specifically the storage for `context.{get,set}` slots have been moved into the `VMStoreContext` structure which has a known layout to compiled code. There still remains storage within each `GuestThread` because there's only one store, and the idea is that whenever threads are switched between the switch operation is slightly more expensive now where it has to update and maintain the state in the store. The rationale for this is that it'll be far more often that these values are accessed rather than threads being swapped between. The implementation chosen in this commit is to model the `context.{get,set}` intrinsics as `UnsafeIntrinsic`s. This is a bit of a shoehorn where they're not actually unsafe, but all of the plumbing and support for `UnsafeIntrinsic` is effectively exactly what these want. To avoid duplicating lots of infrastructure that's where these now reside. The `concurrent.rs` implementation has been updated to save/restore context from the store, and this additionally updates a few other switch points to ensure that the store never switches away or to a deleted thread. This niche situation happened in a few scenarios with no impact from before, but with the switching implementation having to access threads it became load-bearing that these must be valid. The end result is that with `-Cinlining` the `context.{get,set}` instructions are two instructions instead of a full libcall. One instruction is loading `VMStoreContext`, which is GVN-able and hoist-able, while the other is the actual load/store. This is the same as the performance of the stack pointer being in an imported global, for example. --- crates/cranelift/src/compiler/component.rs | 117 +++++++++++------- crates/environ/src/component.rs | 4 - crates/environ/src/component/dfg.rs | 16 --- crates/environ/src/component/info.rs | 24 ---- crates/environ/src/component/intrinsic.rs | 5 + .../environ/src/component/translate/inline.rs | 32 ++--- crates/environ/src/vmoffsets.rs | 10 ++ .../src/runtime/component/concurrent.rs | 78 +++++++----- .../src/runtime/vm/component/libcalls.rs | 21 ---- crates/wasmtime/src/runtime/vm/vmcontext.rs | 14 +++ src/commands/objdump.rs | 1 + ...-may-leave-without-signals-based-traps.wat | 4 +- .../component-model/context-intrinsics.wat | 72 +++++++++++ .../inlining-and-unsafe-intrinsics.wat | 3 +- .../unsafe-intrinsics-used.wat | 2 +- .../disas/riscv64-component-builtins-asm.wat | 13 +- tests/disas/riscv64-component-builtins.wat | 2 +- 17 files changed, 249 insertions(+), 169 deletions(-) create mode 100644 tests/disas/component-model/context-intrinsics.wat diff --git a/crates/cranelift/src/compiler/component.rs b/crates/cranelift/src/compiler/component.rs index ed437ab47413..ebffa0a9e1ac 100644 --- a/crates/cranelift/src/compiler/component.rs +++ b/crates/cranelift/src/compiler/component.rs @@ -724,28 +724,6 @@ impl<'a> TrampolineCompiler<'a> { |_, _| {}, ); } - Trampoline::ContextGet { instance, slot } => { - self.translate_libcall( - host::context_get, - TrapSentinel::NegativeOne, - WasmArgs::InRegisters, - |me, params| { - params.push(me.index_value(*instance)); - params.push(me.builder.ins().iconst(ir::types::I32, i64::from(*slot))); - }, - ); - } - Trampoline::ContextSet { instance, slot } => { - self.translate_libcall( - host::context_set, - TrapSentinel::Falsy, - WasmArgs::InRegisters, - |me, params| { - params.push(me.index_value(*instance)); - params.push(me.builder.ins().iconst(ir::types::I32, i64::from(*slot))); - }, - ); - } Trampoline::ThreadIndex => { self.translate_libcall( host::thread_index, @@ -1464,9 +1442,7 @@ impl<'a> TrampolineCompiler<'a> { Trampoline::ResourceRep { .. } | Trampoline::ThreadIndex | Trampoline::BackpressureInc { .. } - | Trampoline::BackpressureDec { .. } - | Trampoline::ContextGet { .. } - | Trampoline::ContextSet { .. } => return, + | Trampoline::BackpressureDec { .. } => return, // Intrinsics used in adapters generated by FACT that aren't called // directly from guest wasm, so no check is needed. @@ -1552,6 +1528,70 @@ impl<'a> TrampolineCompiler<'a> { &mut self.builder, ) } + + /// Loads `*mut VMStoreContext` and returns it. + /// + /// Note that the `*mut VMStoreContext` value is the same for all + /// `VMContext`-like structures in a store. In this case it's loaded from + /// the *caller* vmctx rather than the *callee* vmctx. The caller is using a + /// `VMContext` for core wasm which is passed in a register, where the + /// callee is a `VMComponentContext` loaded from the `VMContext`. By using + /// the caller vmctx we're able to possibly eliminate the dead load of the + /// `VMComponentContext` if it's otherwise unused. + fn load_vm_store_context(&mut self) -> ir::Value { + let caller_vmctx = self.abi_load_params()[1]; + self.builder.ins().load( + self.isa.pointer_type(), + ir::MemFlags::trusted() + .with_readonly() + .with_alias_region(Some(ir::AliasRegion::Vmctx)) + .with_can_move(), + caller_vmctx, + i32::try_from(self.offsets.ptr.vmctx_store_context()).unwrap(), + ) + } + + fn translate_context_intrinsic(&mut self, intrinsic: UnsafeIntrinsic) { + let ty = match intrinsic { + UnsafeIntrinsic::ContextGetI32_0 + | UnsafeIntrinsic::ContextSetI32_0 + | UnsafeIntrinsic::ContextGetI32_1 + | UnsafeIntrinsic::ContextSetI32_1 => ir::types::I32, + _ => unreachable!(), + }; + let context_slot_size = 4; + let slot = match intrinsic { + UnsafeIntrinsic::ContextGetI32_0 | UnsafeIntrinsic::ContextSetI32_0 => 0, + UnsafeIntrinsic::ContextGetI32_1 | UnsafeIntrinsic::ContextSetI32_1 => 1, + _ => unreachable!(), + }; + let offset = + self.offsets.ptr.vmstore_context_component_context() + slot * context_slot_size; + let params = self.abi_load_params(); + let vmstore_context = self.load_vm_store_context(); + match intrinsic { + UnsafeIntrinsic::ContextGetI32_0 | UnsafeIntrinsic::ContextGetI32_1 => { + let context = self.builder.ins().load( + ty, + MemFlags::trusted(), + vmstore_context, + i32::from(offset), + ); + self.abi_store_results(&[context]); + } + UnsafeIntrinsic::ContextSetI32_0 | UnsafeIntrinsic::ContextSetI32_1 => { + let new_context = params[2]; + self.builder.ins().store( + MemFlags::trusted(), + new_context, + vmstore_context, + i32::from(offset), + ); + self.abi_store_results(&[]); + } + _ => unreachable!(), + } + } } // Helper structure to implement `TranslateTrap`. This isn't possible to do @@ -1633,12 +1673,7 @@ impl ComponentCompiler for Compiler { vmctx, wasmtime_environ::component::VMCOMPONENT_MAGIC, ); - let vm_store_context = c.builder.ins().load( - pointer_type, - MemFlags::trusted(), - vmctx, - i32::try_from(c.offsets.vm_store_context()).unwrap(), - ); + let vm_store_context = c.load_vm_store_context(); super::save_last_wasm_exit_fp_and_pc( &mut c.builder, pointer_type, @@ -1716,21 +1751,10 @@ impl ComponentCompiler for Compiler { | UnsafeIntrinsic::U32NativeStore | UnsafeIntrinsic::U64NativeStore => c.translate_store_intrinsic(intrinsic)?, UnsafeIntrinsic::StoreDataAddress => { - let [callee_vmctx, _caller_vmctx] = *c.abi_load_params() else { - unreachable!() - }; let pointer_type = self.isa.pointer_type(); // Load the `*mut VMStoreContext` out of our vmctx. - let store_ctx = c.builder.ins().load( - pointer_type, - ir::MemFlags::trusted() - .with_readonly() - .with_alias_region(Some(ir::AliasRegion::Vmctx)) - .with_can_move(), - callee_vmctx, - i32::try_from(c.offsets.vm_store_context()).unwrap(), - ); + let store_ctx = c.load_vm_store_context(); // Load the `*mut T` out of the `VMStoreContext`. let data_address = c.builder.ins().load( @@ -1752,6 +1776,13 @@ impl ComponentCompiler for Compiler { c.abi_store_results(&[data_address]); } + + UnsafeIntrinsic::ContextGetI32_0 + | UnsafeIntrinsic::ContextGetI32_1 + | UnsafeIntrinsic::ContextSetI32_0 + | UnsafeIntrinsic::ContextSetI32_1 => { + c.translate_context_intrinsic(intrinsic); + } } c.builder.finalize(); diff --git a/crates/environ/src/component.rs b/crates/environ/src/component.rs index 3a9b232d3f51..063ec8edb824 100644 --- a/crates/environ/src/component.rs +++ b/crates/environ/src/component.rs @@ -186,10 +186,6 @@ macro_rules! foreach_builtin_component_function { #[cfg(feature = "component-model-async")] error_context_transfer(vmctx: vmctx, src_idx: u32, src_table: u32, dst_table: u32) -> u64; #[cfg(feature = "component-model-async")] - context_get(vmctx: vmctx, caller_instance: u32, slot: u32) -> u64; - #[cfg(feature = "component-model-async")] - context_set(vmctx: vmctx, caller_instance: u32, slot: u32, val: u32) -> bool; - #[cfg(feature = "component-model-async")] thread_index(vmctx: vmctx) -> u64; #[cfg(feature = "component-model-async")] thread_new_indirect(vmctx: vmctx, caller_instance: u32, func_ty_id: u32, func_table_idx: u32, func_idx: u32, context: u32) -> u64; diff --git a/crates/environ/src/component/dfg.rs b/crates/environ/src/component/dfg.rs index 17915e028eb5..9e8211cf6788 100644 --- a/crates/environ/src/component/dfg.rs +++ b/crates/environ/src/component/dfg.rs @@ -476,14 +476,6 @@ pub enum Trampoline { Trap, EnterSyncCall, ExitSyncCall, - ContextGet { - instance: RuntimeComponentInstanceIndex, - slot: u32, - }, - ContextSet { - instance: RuntimeComponentInstanceIndex, - slot: u32, - }, ThreadIndex, ThreadNewIndirect { instance: RuntimeComponentInstanceIndex, @@ -1167,14 +1159,6 @@ impl LinearizeDfg<'_> { Trampoline::Trap => info::Trampoline::Trap, Trampoline::EnterSyncCall => info::Trampoline::EnterSyncCall, Trampoline::ExitSyncCall => info::Trampoline::ExitSyncCall, - Trampoline::ContextGet { instance, slot } => info::Trampoline::ContextGet { - instance: *instance, - slot: *slot, - }, - Trampoline::ContextSet { instance, slot } => info::Trampoline::ContextSet { - instance: *instance, - slot: *slot, - }, Trampoline::ThreadIndex => info::Trampoline::ThreadIndex, Trampoline::ThreadNewIndirect { instance, diff --git a/crates/environ/src/component/info.rs b/crates/environ/src/component/info.rs index 3f58fe5767ba..a0fb76d0a0fc 100644 --- a/crates/environ/src/component/info.rs +++ b/crates/environ/src/component/info.rs @@ -1118,28 +1118,6 @@ pub enum Trampoline { /// pushed by `EnterSyncCall`. ExitSyncCall, - /// Intrinsic used to implement the `context.get` component model builtin. - /// - /// The payload here represents that this is accessing the Nth slot of local - /// storage. - ContextGet { - /// The specific component instance which is calling the intrinsic. - instance: RuntimeComponentInstanceIndex, - /// Which slot to access. - slot: u32, - }, - - /// Intrinsic used to implement the `context.set` component model builtin. - /// - /// The payload here represents that this is accessing the Nth slot of local - /// storage. - ContextSet { - /// The specific component instance which is calling the intrinsic. - instance: RuntimeComponentInstanceIndex, - /// Which slot to update. - slot: u32, - }, - /// Intrinsic used to implement the `thread.index` component model builtin. ThreadIndex, @@ -1256,8 +1234,6 @@ impl Trampoline { Trap => format!("trap"), EnterSyncCall => format!("enter-sync-call"), ExitSyncCall => format!("exit-sync-call"), - ContextGet { .. } => format!("context-get"), - ContextSet { .. } => format!("context-set"), ThreadIndex => format!("thread-index"), ThreadNewIndirect { .. } => format!("thread-new-indirect"), ThreadSuspendToSuspended { .. } => format!("thread-suspend-to-suspended"), diff --git a/crates/environ/src/component/intrinsic.rs b/crates/environ/src/component/intrinsic.rs index 87e8e8631624..cbb473a470ce 100644 --- a/crates/environ/src/component/intrinsic.rs +++ b/crates/environ/src/component/intrinsic.rs @@ -22,6 +22,11 @@ macro_rules! for_each_unsafe_intrinsic { "u64-native-load" => U64NativeLoad : u64_native_load(address: u64) -> u64; "u64-native-store" => U64NativeStore : u64_native_store(address: u64, value: u64); + + "context-get-i32-0" => ContextGetI32_0 : context_get_i32_0() -> u32; + "context-set-i32-0" => ContextSetI32_0 : context_set_i32_0(val: u32); + "context-get-i32-1" => ContextGetI32_1 : context_get_i32_1() -> u32; + "context-set-i32-1" => ContextSetI32_1 : context_set_i32_1(val: u32); } }; } diff --git a/crates/environ/src/component/translate/inline.rs b/crates/environ/src/component/translate/inline.rs index 5b9a32623fbf..8d01cc5503c1 100644 --- a/crates/environ/src/component/translate/inline.rs +++ b/crates/environ/src/component/translate/inline.rs @@ -1074,24 +1074,24 @@ impl<'a> Inliner<'a> { frame.funcs.push((*func, dfg::CoreDef::Trampoline(index))); } ContextGet { func, i } => { - let index = self.result.trampolines.push(( - *func, - dfg::Trampoline::ContextGet { - instance: frame.instance, - slot: *i, - }, - )); - frame.funcs.push((*func, dfg::CoreDef::Trampoline(index))); + let intrinsic = match i { + 0 => UnsafeIntrinsic::ContextGetI32_0, + 1 => UnsafeIntrinsic::ContextGetI32_1, + _ => unreachable!(), + }; + frame + .funcs + .push((*func, dfg::CoreDef::UnsafeIntrinsic(*func, intrinsic))); } ContextSet { func, i } => { - let index = self.result.trampolines.push(( - *func, - dfg::Trampoline::ContextSet { - instance: frame.instance, - slot: *i, - }, - )); - frame.funcs.push((*func, dfg::CoreDef::Trampoline(index))); + let intrinsic = match i { + 0 => UnsafeIntrinsic::ContextSetI32_0, + 1 => UnsafeIntrinsic::ContextSetI32_1, + _ => unreachable!(), + }; + frame + .funcs + .push((*func, dfg::CoreDef::UnsafeIntrinsic(*func, intrinsic))); } ThreadIndex { func } => { let index = self diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs index d18bf87a7940..d7a3011d089c 100644 --- a/crates/environ/src/vmoffsets.rs +++ b/crates/environ/src/vmoffsets.rs @@ -250,6 +250,16 @@ pub trait PtrSize { self.vmstore_context_stack_chain() + self.size_of_vmstack_chain() } + /// Return the offset of the `async_guard_range` field of `VMStoreContext`. + fn vmstore_context_async_guard_range(&self) -> u8 { + self.vmstore_context_store_data() + self.size() + } + + /// Return the offset of the `component_context` field of `VMStoreContext`. + fn vmstore_context_component_context(&self) -> u8 { + self.vmstore_context_async_guard_range() + 2 * self.size() + } + // Offsets within `VMMemoryDefinition` /// The offset of the `base` field. diff --git a/crates/wasmtime/src/runtime/component/concurrent.rs b/crates/wasmtime/src/runtime/component/concurrent.rs index ed2138e8607c..43b5c2186929 100644 --- a/crates/wasmtime/src/runtime/component/concurrent.rs +++ b/crates/wasmtime/src/runtime/component/concurrent.rs @@ -1688,22 +1688,53 @@ impl StoreOpaque { .instance_state(instance.index) } + /// Configure the currently running `thread`. + /// + /// This will save off any state necessary for the previous thread, if + /// applicable, and then it'll additionally update state for `thread` if + /// needed too. fn set_thread(&mut self, thread: impl Into) -> Result { + let thread = thread.into(); + let old_thread = mem::replace(&mut state.current_thread, thread); + + // First thing to do after swapping threads is updating the context + // slots for this thread within the store. This restores the behavior of + // `context.{get,set}`. This involves taking the old state out of the + // store, saving it in the thread that's being swapped from, and doing + // the inverse for the new thread. When debug assertions are enabled + // this also leaves behind sentinel values to try to uncover bugs where + // this may be forgotten. + if let Some(old_thread) = old_thread.guest() { + let old_context = self.vm_store_context().component_context; + state.get_mut(old_thread.thread)?.context = old_context; + } + if cfg!(debug_assertions) { + self.vm_store_context_mut().component_context = [u32::MAX; 2]; + } + if let Some(thread) = thread.guest() { + let thread = state.get_mut(thread.thread)?; + let context = thread.context; + if cfg!(debug_assertions) { + thread.context = [u32::MAX; 2]; + } + self.vm_store_context_mut().component_context = context; + } + // Each time we switch threads, we conservatively set `task_may_block` // to `false` for the component instance we're switching away from (if // any), meaning it will be `false` for any new thread created for that // instance unless explicitly set otherwise. + // + // Additionally if we're switching to a new thread, set its component + // instance's `task_may_block` according to where it left off. let state = self.concurrent_state_mut(); - let old_thread = mem::replace(&mut state.current_thread, thread.into()); if let Some(old_thread) = old_thread.guest() { let instance = state.get_mut(old_thread.task)?.instance.instance; self.component_instance_mut(instance) .set_task_may_block(false) } - // If we're switching to a new thread, set its component instance's - // `task_may_block` according to where it left off. - if self.concurrent_state_mut().current_thread.guest().is_some() { + if let Some(thread) = thread.guest() { self.set_task_may_block()?; } @@ -2143,9 +2174,9 @@ impl Instance { } } - store.concurrent_state_mut().delete(guest_thread.thread)?; - store.concurrent_state_mut().delete(sync_call_set)?; - let task = store.concurrent_state_mut().get_mut(guest_thread.task)?; + state.delete(guest_thread.thread)?; + state.delete(sync_call_set)?; + let task = state.get_mut(guest_thread.task)?; task.threads.remove(&guest_thread.thread); Ok(()) } @@ -2369,11 +2400,11 @@ impl Instance { self.task_complete(store, guest_thread.task, result, Status::Returned)?; } + store.set_thread(old_thread)?; + // This is a callback-less call, so the implicit thread has now completed self.cleanup_thread(store, guest_thread, callee_instance.index)?; - store.set_thread(old_thread)?; - let state = store.concurrent_state_mut(); let task = state.get_mut(guest_thread.task)?; @@ -2732,6 +2763,8 @@ impl Instance { let set = state.get_mut(caller.thread)?.sync_call_set; guest_waitable.join(state, Some(set))?; + store.0.set_thread(CurrentThread::None)?; + // ... and suspend this fiber temporarily while we wait for it to start. // // Note that we _could_ call the callee directly using the current fiber @@ -3327,6 +3360,8 @@ impl Instance { // on a separate fiber if we're running in an async store. unsafe { callee.call_unchecked(store.as_context_mut(), &mut params)? }; + store.0.set_thread(old_thread)?; + self.cleanup_thread(store.0, guest_thread, runtime_instance)?; log::trace!("explicit thread {guest_thread:?} completed"); let state = store.0.concurrent_state_mut(); @@ -3334,7 +3369,6 @@ impl Instance { if task.threads.is_empty() && !task.returned_or_cancelled() { bail!(Trap::NoAsyncResult); } - store.0.set_thread(old_thread)?; let state = store.0.concurrent_state_mut(); if let Some(t) = old_thread.guest() { state.get_mut(t.thread)?.state = GuestThreadState::Running; @@ -3777,14 +3811,6 @@ impl Instance { bail!(Trap::SubtaskCancelAfterTerminal); } } - - pub(crate) fn context_get(self, store: &mut StoreOpaque, slot: u32) -> Result { - store.concurrent_state_mut().context_get(slot) - } - - pub(crate) fn context_set(self, store: &mut StoreOpaque, slot: u32, value: u32) -> Result<()> { - store.concurrent_state_mut().context_set(slot, value) - } } /// Trait representing component model ABI async intrinsics and fused adapter @@ -5120,22 +5146,6 @@ impl ConcurrentState { } } - /// Implements the `context.get` intrinsic. - pub(crate) fn context_get(&mut self, slot: u32) -> Result { - let thread = self.current_guest_thread()?; - let val = self.get_mut(thread.thread)?.context[usize::try_from(slot)?]; - log::trace!("context_get {thread:?} slot {slot} val {val:#x}"); - Ok(val) - } - - /// Implements the `context.set` intrinsic. - pub(crate) fn context_set(&mut self, slot: u32, val: u32) -> Result<()> { - let thread = self.current_guest_thread()?; - log::trace!("context_set {thread:?} slot {slot} val {val:#x}"); - self.get_mut(thread.thread)?.context[usize::try_from(slot)?] = val; - Ok(()) - } - /// Returns whether there's a pending cancellation on the current guest thread, /// consuming the event if so. fn take_pending_cancellation(&mut self) -> Result { diff --git a/crates/wasmtime/src/runtime/vm/component/libcalls.rs b/crates/wasmtime/src/runtime/vm/component/libcalls.rs index b5cd7db9341f..08ca1eb46d4c 100644 --- a/crates/wasmtime/src/runtime/vm/component/libcalls.rs +++ b/crates/wasmtime/src/runtime/vm/component/libcalls.rs @@ -1326,27 +1326,6 @@ fn error_context_drop( ) } -#[cfg(feature = "component-model-async")] -fn context_get( - store: &mut dyn VMStore, - instance: Instance, - _caller_instance: u32, - slot: u32, -) -> Result { - instance.context_get(store, slot) -} - -#[cfg(feature = "component-model-async")] -fn context_set( - store: &mut dyn VMStore, - instance: Instance, - _caller_instance: u32, - slot: u32, - val: u32, -) -> Result<()> { - instance.context_set(store, slot, val) -} - #[cfg(feature = "component-model-async")] fn thread_index(store: &mut dyn VMStore, instance: Instance) -> Result { instance.thread_index(store) diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index 6a3ba227d542..a7097f2faf9d 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -1282,6 +1282,15 @@ pub struct VMStoreContext { /// situation while this field is read it'll never classify a fault as an /// guard page fault. pub async_guard_range: Range<*mut u8>, + + /// The `context.{get,set}` values for the current thread in the component + /// model. This is only used for `component-model-async` and slot[1] is only + /// used for `component-model-threading`. Despite the conditional use nature + /// this is unconditionally present as it avoids the need to make logic in + /// `VMOffsets` conditional. + /// + /// This is saved/restored when threads are swapped in the component model. + pub component_context: [u32; 2], } impl VMStoreContext { @@ -1366,6 +1375,7 @@ impl Default for VMStoreContext { stack_chain: UnsafeCell::new(VMStackChain::Absent), async_guard_range: ptr::null_mut()..ptr::null_mut(), store_data: VmPtr::dangling(), + component_context: [0; 2], } } } @@ -1436,6 +1446,10 @@ mod test_vmstore_context { offset_of!(VMStoreContext, store_data), usize::from(offsets.ptr.vmstore_context_store_data()) ); + assert_eq!( + offset_of!(VMStoreContext, component_context), + usize::from(offsets.ptr.vmstore_context_component_context()) + ); } } diff --git a/src/commands/objdump.rs b/src/commands/objdump.rs index c89bd479189a..d5560f22c6bf 100644 --- a/src/commands/objdump.rs +++ b/src/commands/objdump.rs @@ -204,6 +204,7 @@ impl ObjdumpCommand { } else if name.contains("trampoline") || name.ends_with("_array_call") || name.ends_with("_wasm_call") + || name.contains("unsafe-intrinsics-") { Func::Trampoline } else if name.contains("libcall") || name.starts_with("component") { diff --git a/tests/disas/component-may-leave-without-signals-based-traps.wat b/tests/disas/component-may-leave-without-signals-based-traps.wat index fe2921d68a97..e67ea17b34b9 100644 --- a/tests/disas/component-may-leave-without-signals-based-traps.wat +++ b/tests/disas/component-may-leave-without-signals-based-traps.wat @@ -16,10 +16,10 @@ ;; movq %rsp, %rbp ;; subq $0x10, %rsp ;; movq %rbx, (%rsp) -;; movq %rsi, %rbx ;; movq %rdx, %r8 -;; movq 0x10(%rdi), %rax ;; movq %rbp, %rcx +;; movq 8(%rsi), %rax +;; movq %rsi, %rbx ;; movq %rcx, 0x30(%rax) ;; movq %rbp, %rcx ;; movq 8(%rcx), %rcx diff --git a/tests/disas/component-model/context-intrinsics.wat b/tests/disas/component-model/context-intrinsics.wat new file mode 100644 index 000000000000..018c405953b2 --- /dev/null +++ b/tests/disas/component-model/context-intrinsics.wat @@ -0,0 +1,72 @@ +;;! target = "x86_64" +;;! test = 'compile' +;;! flags = '-Wcomponent-model-async -Wcomponent-model-threading -Cinlining' + +(component + (core func $context.get0 (canon context.get i32 0)) + (core func $context.get1 (canon context.get i32 1)) + (core func $context.set0 (canon context.set i32 0)) + (core func $context.set1 (canon context.set i32 1)) + + (core module $m + (import "" "get0" (func $get0 (result i32))) + (import "" "get1" (func $get1 (result i32))) + (import "" "set0" (func $set0 (param i32))) + (import "" "set1" (func $set1 (param i32))) + + (func $g0 (export "get0") (result i32) (call $get0)) + (func $g1 (export "get1") (result i32) (call $get1)) + (func $s0 (export "set0") (param i32) (call $set0 (local.get 0))) + (func $s1 (export "set1") (param i32) (call $set1 (local.get 0))) + ) + (core instance $i (instantiate $m + (with "" (instance + (export "get0" (func $context.get0)) + (export "get1" (func $context.get1)) + (export "set0" (func $context.set0)) + (export "set1" (func $context.set1)) + )) + )) + + (func (export "get0") (result u32) (canon lift (core func $i "get0"))) + (func (export "get1") (result u32) (canon lift (core func $i "get1"))) + (func (export "set0") (param "x" u32) (canon lift (core func $i "set0"))) + (func (export "set1") (param "x" u32) (canon lift (core func $i "set1"))) +) + + +;; wasm[0]::function[4]::g0: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %rsi +;; movl 0x80(%rsi), %eax +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[5]::g1: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %rsi +;; movl 0x84(%rsi), %eax +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[6]::s0: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %rsi +;; movl %edx, 0x80(%rsi) +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[7]::s1: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %rsi +;; movl %edx, 0x84(%rsi) +;; movq %rbp, %rsp +;; popq %rbp +;; retq diff --git a/tests/disas/component-model/inlining-and-unsafe-intrinsics.wat b/tests/disas/component-model/inlining-and-unsafe-intrinsics.wat index ce6d789b7a23..452d14b47c60 100644 --- a/tests/disas/component-model/inlining-and-unsafe-intrinsics.wat +++ b/tests/disas/component-model/inlining-and-unsafe-intrinsics.wat @@ -65,8 +65,7 @@ ;; @0155 jump block4 ;; ;; block4: -;; @0153 v4 = load.i64 notrap aligned readonly can_move v0+72 -;; v17 = load.i64 notrap aligned readonly can_move vmctx v4+16 +;; v17 = load.i64 notrap aligned readonly can_move vmctx v0+8 ;; v18 = load.i64 notrap aligned readonly can_move vmctx v17+104 ;; v20 = load.i8 notrap aligned v18 ;; jump block5 diff --git a/tests/disas/component-model/unsafe-intrinsics-used.wat b/tests/disas/component-model/unsafe-intrinsics-used.wat index 3d9f9ed52e32..0cc37d1a03c8 100644 --- a/tests/disas/component-model/unsafe-intrinsics-used.wat +++ b/tests/disas/component-model/unsafe-intrinsics-used.wat @@ -36,7 +36,7 @@ ;; function u0:0(i64 vmctx, i64) -> i64 tail { ;; block0(v0: i64, v1: i64): -;; v2 = load.i64 notrap aligned readonly can_move vmctx v0+16 +;; v2 = load.i64 notrap aligned readonly can_move vmctx v1+8 ;; v3 = load.i64 notrap aligned readonly can_move vmctx v2+104 ;; return v3 ;; } diff --git a/tests/disas/riscv64-component-builtins-asm.wat b/tests/disas/riscv64-component-builtins-asm.wat index 88f9ee9f8489..e56763251f59 100644 --- a/tests/disas/riscv64-component-builtins-asm.wat +++ b/tests/disas/riscv64-component-builtins-asm.wat @@ -18,11 +18,12 @@ ;; mv s0, sp ;; addi sp, sp, -0x10 ;; sd s4, 8(sp) -;; mv a3, a2 +;; sd s8, 0(sp) ;; mv s4, a1 -;; ld a1, 0x10(a0) -;; mv a2, s0 -;; sd a2, 0x30(a1) +;; mv s8, a2 +;; mv a3, s0 +;; ld a1, 8(a1) +;; sd a3, 0x30(a1) ;; ld a2, 8(s0) ;; sd a2, 0x38(a1) ;; lw a1, 0x20(a0) @@ -37,12 +38,14 @@ ;; srai a1, a1, 0x20 ;; slli a2, a4, 0x20 ;; srai a2, a2, 0x20 +;; mv a3, s8 ;; slli a3, a3, 0x20 ;; srai a3, a3, 0x20 ;; jalr a5 ;; addi a1, zero, -1 -;; beq a0, a1, 0x1c +;; beq a0, a1, 0x20 ;; ld s4, 8(sp) +;; ld s8, 0(sp) ;; addi sp, sp, 0x10 ;; ld ra, 8(sp) ;; ld s0, 0(sp) diff --git a/tests/disas/riscv64-component-builtins.wat b/tests/disas/riscv64-component-builtins.wat index e6d01af8c843..4940688c43e3 100644 --- a/tests/disas/riscv64-component-builtins.wat +++ b/tests/disas/riscv64-component-builtins.wat @@ -15,8 +15,8 @@ ;; sig1 = (i64 sext vmctx) system_v ;; ;; block0(v0: i64, v1: i64, v2: i32): -;; v3 = load.i64 notrap aligned v0+16 ;; v4 = get_frame_pointer.i64 +;; v3 = load.i64 notrap aligned readonly can_move vmctx v1+8 ;; store notrap aligned v4, v3+48 ;; v5 = get_return_address.i64 ;; store notrap aligned v5, v3+56 From f9aa3dd1df18413899b70096a604a865a2acc83e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 24 Apr 2026 16:10:53 -0700 Subject: [PATCH 2/3] Fix build issues --- crates/cranelift/src/compiler/component.rs | 2 +- crates/wasmtime/src/runtime/component/concurrent.rs | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/cranelift/src/compiler/component.rs b/crates/cranelift/src/compiler/component.rs index ebffa0a9e1ac..289e69b4e73b 100644 --- a/crates/cranelift/src/compiler/component.rs +++ b/crates/cranelift/src/compiler/component.rs @@ -1547,7 +1547,7 @@ impl<'a> TrampolineCompiler<'a> { .with_alias_region(Some(ir::AliasRegion::Vmctx)) .with_can_move(), caller_vmctx, - i32::try_from(self.offsets.ptr.vmctx_store_context()).unwrap(), + i32::from(self.offsets.ptr.vmctx_store_context()), ) } diff --git a/crates/wasmtime/src/runtime/component/concurrent.rs b/crates/wasmtime/src/runtime/component/concurrent.rs index 43b5c2186929..4e03a87e2799 100644 --- a/crates/wasmtime/src/runtime/component/concurrent.rs +++ b/crates/wasmtime/src/runtime/component/concurrent.rs @@ -1695,6 +1695,7 @@ impl StoreOpaque { /// needed too. fn set_thread(&mut self, thread: impl Into) -> Result { let thread = thread.into(); + let state = self.concurrent_state_mut(); let old_thread = mem::replace(&mut state.current_thread, thread); // First thing to do after swapping threads is updating the context @@ -1706,13 +1707,15 @@ impl StoreOpaque { // this may be forgotten. if let Some(old_thread) = old_thread.guest() { let old_context = self.vm_store_context().component_context; - state.get_mut(old_thread.thread)?.context = old_context; + self.concurrent_state_mut() + .get_mut(old_thread.thread)? + .context = old_context; } if cfg!(debug_assertions) { self.vm_store_context_mut().component_context = [u32::MAX; 2]; } if let Some(thread) = thread.guest() { - let thread = state.get_mut(thread.thread)?; + let thread = self.concurrent_state_mut().get_mut(thread.thread)?; let context = thread.context; if cfg!(debug_assertions) { thread.context = [u32::MAX; 2]; @@ -1734,7 +1737,7 @@ impl StoreOpaque { .set_task_may_block(false) } - if let Some(thread) = thread.guest() { + if thread.guest().is_some() { self.set_task_may_block()?; } From 744b83786cede92d7abb3b5bfce60fce783a3995 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 27 Apr 2026 13:23:01 -0700 Subject: [PATCH 3/3] Review comments --- crates/cranelift/src/compiler/component.rs | 10 +++++++--- crates/environ/src/vmoffsets.rs | 14 +++++++++++--- .../src/runtime/component/concurrent.rs | 12 ++++++------ crates/wasmtime/src/runtime/vm/vmcontext.rs | 19 +++++++++++++++---- 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/crates/cranelift/src/compiler/component.rs b/crates/cranelift/src/compiler/component.rs index 289e69b4e73b..326fe5f6cab2 100644 --- a/crates/cranelift/src/compiler/component.rs +++ b/crates/cranelift/src/compiler/component.rs @@ -1552,6 +1552,8 @@ impl<'a> TrampolineCompiler<'a> { } fn translate_context_intrinsic(&mut self, intrinsic: UnsafeIntrinsic) { + // This is the width of the type being loaded from Wasmtime's + // `VMStoreContext` slot and it depends on the intrinsic. let ty = match intrinsic { UnsafeIntrinsic::ContextGetI32_0 | UnsafeIntrinsic::ContextSetI32_0 @@ -1559,14 +1561,16 @@ impl<'a> TrampolineCompiler<'a> { | UnsafeIntrinsic::ContextSetI32_1 => ir::types::I32, _ => unreachable!(), }; - let context_slot_size = 4; + let slot = match intrinsic { UnsafeIntrinsic::ContextGetI32_0 | UnsafeIntrinsic::ContextSetI32_0 => 0, UnsafeIntrinsic::ContextGetI32_1 | UnsafeIntrinsic::ContextSetI32_1 => 1, _ => unreachable!(), }; - let offset = - self.offsets.ptr.vmstore_context_component_context() + slot * context_slot_size; + let offset = self + .offsets + .ptr + .vmstore_context_component_context_slot(slot); let params = self.abi_load_params(); let vmstore_context = self.load_vm_store_context(); match intrinsic { diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs index d7a3011d089c..d73193ab56b1 100644 --- a/crates/environ/src/vmoffsets.rs +++ b/crates/environ/src/vmoffsets.rs @@ -39,6 +39,10 @@ use crate::{ }; use cranelift_entity::packed_option::ReservedValue; +/// Number of slots in for `component_context` in the `VMStoreContext`. This is +/// defined by the component model's `context.{get,set}` intrinsics. +pub const NUM_COMPONENT_CONTEXT_SLOTS: usize = 2; + #[cfg(target_pointer_width = "32")] fn cast_to_u32(sz: usize) -> u32 { u32::try_from(sz).unwrap() @@ -255,9 +259,13 @@ pub trait PtrSize { self.vmstore_context_store_data() + self.size() } - /// Return the offset of the `component_context` field of `VMStoreContext`. - fn vmstore_context_component_context(&self) -> u8 { - self.vmstore_context_async_guard_range() + 2 * self.size() + /// Return the offset of the `component_context[i]` field of + /// `VMStoreContext`. + fn vmstore_context_component_context_slot(&self, i: u8) -> u8 { + assert!(usize::from(i) < NUM_COMPONENT_CONTEXT_SLOTS); + let base = self.vmstore_context_async_guard_range() + 2 * self.size(); + let slot_size = 4; + base + i * slot_size } // Offsets within `VMMemoryDefinition` diff --git a/crates/wasmtime/src/runtime/component/concurrent.rs b/crates/wasmtime/src/runtime/component/concurrent.rs index 4e03a87e2799..ac019a5216b6 100644 --- a/crates/wasmtime/src/runtime/component/concurrent.rs +++ b/crates/wasmtime/src/runtime/component/concurrent.rs @@ -83,7 +83,6 @@ use std::ptr::{self, NonNull}; use std::task::{Context, Poll, Waker}; use std::vec::Vec; use table::{TableDebug, TableId}; -use wasmtime_environ::Trap; use wasmtime_environ::component::{ CanonicalAbiInfo, CanonicalOptions, CanonicalOptionsDataModel, MAX_FLAT_PARAMS, MAX_FLAT_RESULTS, OptionsIndex, PREPARE_ASYNC_NO_RESULT, PREPARE_ASYNC_WITH_RESULT, @@ -92,6 +91,7 @@ use wasmtime_environ::component::{ TypeFuncIndex, TypeFutureTableIndex, TypeStreamTableIndex, TypeTupleIndex, }; use wasmtime_environ::packed_option::ReservedValue; +use wasmtime_environ::{NUM_COMPONENT_CONTEXT_SLOTS, Trap}; pub use abort::JoinHandle; pub use future_stream_any::{FutureAny, StreamAny}; @@ -1712,13 +1712,13 @@ impl StoreOpaque { .context = old_context; } if cfg!(debug_assertions) { - self.vm_store_context_mut().component_context = [u32::MAX; 2]; + self.vm_store_context_mut().component_context = [u32::MAX; NUM_COMPONENT_CONTEXT_SLOTS]; } if let Some(thread) = thread.guest() { let thread = self.concurrent_state_mut().get_mut(thread.thread)?; let context = thread.context; if cfg!(debug_assertions) { - thread.context = [u32::MAX; 2]; + thread.context = [u32::MAX; NUM_COMPONENT_CONTEXT_SLOTS]; } self.vm_store_context_mut().component_context = context; } @@ -4428,7 +4428,7 @@ enum GuestThreadState { pub struct GuestThread { /// Context-local state used to implement the `context.{get,set}` /// intrinsics. - context: [u32; 2], + context: [u32; NUM_COMPONENT_CONTEXT_SLOTS], /// The owning guest task. parent_task: TableId, /// If present, indicates that the thread is currently waiting on the @@ -4460,7 +4460,7 @@ impl GuestThread { fn new_implicit(state: &mut ConcurrentState, parent_task: TableId) -> Result { let sync_call_set = state.push(WaitableSet::default())?; Ok(Self { - context: [0; 2], + context: [0; NUM_COMPONENT_CONTEXT_SLOTS], parent_task, wake_on_cancel: None, state: GuestThreadState::NotStartedImplicit, @@ -4478,7 +4478,7 @@ impl GuestThread { ) -> Result { let sync_call_set = state.push(WaitableSet::default())?; Ok(Self { - context: [0; 2], + context: [0; NUM_COMPONENT_CONTEXT_SLOTS], parent_task, wake_on_cancel: None, state: GuestThreadState::NotStartedExplicit(start_func), diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index a7097f2faf9d..946b87e26c1c 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -18,7 +18,8 @@ use core::ptr::{self, NonNull}; use core::sync::atomic::{AtomicUsize, Ordering}; use wasmtime_environ::{ BuiltinFunctionIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, - DefinedTagIndex, VMCONTEXT_MAGIC, VMSharedTypeIndex, WasmHeapTopType, WasmValType, + DefinedTagIndex, NUM_COMPONENT_CONTEXT_SLOTS, VMCONTEXT_MAGIC, VMSharedTypeIndex, + WasmHeapTopType, WasmValType, }; /// A function pointer that exposes the array calling convention. @@ -1290,7 +1291,7 @@ pub struct VMStoreContext { /// `VMOffsets` conditional. /// /// This is saved/restored when threads are swapped in the component model. - pub component_context: [u32; 2], + pub component_context: [u32; NUM_COMPONENT_CONTEXT_SLOTS], } impl VMStoreContext { @@ -1375,7 +1376,7 @@ impl Default for VMStoreContext { stack_chain: UnsafeCell::new(VMStackChain::Absent), async_guard_range: ptr::null_mut()..ptr::null_mut(), store_data: VmPtr::dangling(), - component_context: [0; 2], + component_context: [0; NUM_COMPONENT_CONTEXT_SLOTS], } } } @@ -1448,7 +1449,17 @@ mod test_vmstore_context { ); assert_eq!( offset_of!(VMStoreContext, component_context), - usize::from(offsets.ptr.vmstore_context_component_context()) + usize::from(offsets.ptr.vmstore_context_component_context_slot(0)) + ); + + // Make sure that the calculation for the size of a slot is also + // accurate. + let slot_width = offsets.ptr.vmstore_context_component_context_slot(1) + - offsets.ptr.vmstore_context_component_context_slot(0); + let default = VMStoreContext::default(); + assert_eq!( + size_of_val(&default.component_context[0]), + usize::from(slot_width) ); } }