diff --git a/src/hyperlight_common/src/layout.rs b/src/hyperlight_common/src/layout.rs index 69ecdb6ef..83b6540f7 100644 --- a/src/hyperlight_common/src/layout.rs +++ b/src/hyperlight_common/src/layout.rs @@ -26,6 +26,8 @@ pub const SCRATCH_TOP_ALLOCATOR_OFFSET: u64 = 0x10; pub const SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET: u64 = 0x18; pub const SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET: u64 = 0x20; pub const SCRATCH_TOP_EXN_STACK_OFFSET: u64 = 0x30; +/// Top of the page-fault exception stack, one page below the top of scratch memory. +pub const SCRATCH_TOP_PF_EXN_STACK_OFFSET: u64 = 0x1000; pub fn scratch_base_gpa(size: usize) -> u64 { (MAX_GPA - size + 1) as u64 diff --git a/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs b/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs index cfaad9a0b..3392fd19e 100644 --- a/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs +++ b/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs @@ -31,8 +31,9 @@ pub unsafe fn alloc_phys_pages(n: u64) -> u64 { x = inout(reg) x ); } - // Set aside two pages at the top of the scratch region for the - // exception stack, shared state, etc + // Set aside two pages at the top of the scratch region. The top + // page holds shared metadata and the general exception stack. The + // page below it holds the page-fault exception stack. let max_avail = hyperlight_common::layout::MAX_GPA - hyperlight_common::vmem::PAGE_SIZE * 2; if x.checked_add(nbytes) .is_none_or(|xx| xx >= max_avail as u64) diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs index 87f89f15c..576b7bd9e 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs @@ -22,7 +22,9 @@ use core::arch::{asm, global_asm}; use hyperlight_common::outb::Exception; use super::super::context; -use super::super::machine::{IDT, IdtEntry, IdtPointer, ProcCtrl}; +use super::super::machine::{ + IDT, IST_GENERAL_EXCEPTION, IST_PAGE_FAULT, IdtEntry, IdtPointer, ProcCtrl, +}; unsafe extern "C" { // Exception handlers @@ -174,12 +176,16 @@ global_asm!( pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { let idt = unsafe { &raw mut (*pc).idt }; - let set_idt_entry = |idx, handler: unsafe extern "C" fn()| { + let set_idt_entry_ist = |idx, handler: unsafe extern "C" fn(), ist: u8| { let handler_addr = handler as *const () as u64; unsafe { - (&raw mut (*idt).entries[idx as usize]).write_volatile(IdtEntry::new(handler_addr)); + (&raw mut (*idt).entries[idx as usize]) + .write_volatile(IdtEntry::new_with_ist(handler_addr, ist)); } }; + let set_idt_entry = |idx, handler: unsafe extern "C" fn()| { + set_idt_entry_ist(idx, handler, IST_GENERAL_EXCEPTION) + }; set_idt_entry(Exception::DivideByZero, _do_excp0); // Divide by zero set_idt_entry(Exception::Debug, _do_excp1); // Debug set_idt_entry(Exception::NonMaskableInterrupt, _do_excp2); // Non-maskable interrupt @@ -194,7 +200,7 @@ pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { set_idt_entry(Exception::SegmentNotPresent, _do_excp11); // Segment Not Present set_idt_entry(Exception::StackSegmentFault, _do_excp12); // Stack-Segment Fault set_idt_entry(Exception::GeneralProtectionFault, _do_excp13); // General Protection Fault - set_idt_entry(Exception::PageFault, _do_excp14); // Page Fault + set_idt_entry_ist(Exception::PageFault, _do_excp14, IST_PAGE_FAULT); // Page Fault (own IST stack) set_idt_entry(Exception::Reserved, _do_excp15); // Reserved set_idt_entry(Exception::X87FloatingPointException, _do_excp16); // x87 Floating-Point Exception set_idt_entry(Exception::AlignmentCheck, _do_excp17); // Alignment Check diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index 073bd3a2f..912bc5d5d 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -79,10 +79,19 @@ unsafe fn init_gdt(pc: *mut ProcCtrl) { } } -/// Hyperlight's TSS contains only a single IST entry, which is used -/// to set up the stack switch to the exception stack whenever we take -/// an exception (including page faults, which are important, since -/// the fault might be due to needing to grow the stack!) +/// Hyperlight's TSS provides two IST stacks. The CPU switches to one +/// when an exception is taken, so a handler always runs on a known-good +/// stack. This matters because a fault can mean the main stack needs to +/// grow. +/// +/// * `ist1` is the general exception stack. +/// * `ist2` is the page-fault stack. +/// +/// Page faults get a separate stack because they can nest inside +/// another exception. A handler running on `ist1` may write a +/// copy-on-write page, which raises a page fault. The CPU delivers that +/// fault on `ist2`, so each one has its own stack and the handler +/// resumes once the fault is serviced. /// /// This function sets up the TSS and then points the processor at the /// system segment descriptor, initialized in [`init_gdt`] above, @@ -96,6 +105,11 @@ unsafe fn init_tss(pc: *mut ProcCtrl) { - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET + 1; ist1_ptr.write_volatile(exn_stack.to_ne_bytes()); + let ist2_ptr = &raw mut (*tss_ptr).ist2 as *mut [u8; 8]; + let pf_exn_stack = hyperlight_common::layout::MAX_GVA as u64 + - hyperlight_common::layout::SCRATCH_TOP_PF_EXN_STACK_OFFSET + + 1; + ist2_ptr.write_volatile(pf_exn_stack.to_ne_bytes()); asm!( "ltr ax", in("ax") core::mem::offset_of!(HyperlightGDT, tss), diff --git a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs index cde8118e3..b4facd6d0 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs @@ -20,6 +20,12 @@ use hyperlight_common::vmem::{BasicMapping, MappingKind, PAGE_SIZE}; use super::layout::PROC_CONTROL_GVA; +/// IDT gate IST index for general exceptions. Selects [`TSS::ist1`]. +pub(super) const IST_GENERAL_EXCEPTION: u8 = 1; +/// IDT gate IST index for page faults. Selects [`TSS::ist2`], the +/// page-fault stack. See the TSS setup in `init.rs` for why. +pub(super) const IST_PAGE_FAULT: u8 = 2; + /// Entry in the Global Descriptor Table (GDT) /// For reference, see page 3-10 Vol. 3A of Intel 64 and IA-32 /// Architectures Software Developer's Manual, figure 3-8 @@ -117,7 +123,7 @@ pub(super) struct TSS { _rsp2: u64, _rsvd1: [u8; 8], pub(super) ist1: u64, - _ist2: u64, + pub(super) ist2: u64, _ist3: u64, _ist4: u64, _ist5: u64, @@ -127,6 +133,7 @@ pub(super) struct TSS { } const _: () = assert!(mem::size_of::() == 0x64); const _: () = assert!(mem::offset_of!(TSS, ist1) == 0x24); +const _: () = assert!(mem::offset_of!(TSS, ist2) == 0x2c); /// An entry in the Interrupt Descriptor Table (IDT) /// For reference, see page 7-20 Vol. 3A of Intel 64 and IA-32 @@ -154,10 +161,16 @@ const _: () = assert!(mem::size_of::() == 0x10); impl IdtEntry { pub(super) fn new(handler: u64) -> Self { + Self::new_with_ist(handler, IST_GENERAL_EXCEPTION) + } + + /// Build an IDT gate that switches to IST stack `ist` (1-based, one + /// of `TSS::ist1..ist7`) when the vector is taken. + pub(super) fn new_with_ist(handler: u64, ist: u8) -> Self { Self { offset_low: (handler & 0xFFFF) as u16, selector: 0x08, // Kernel Code Segment - interrupt_stack_table_offset: 1, + interrupt_stack_table_offset: ist, type_attr: 0x8E, // 0x8E = 10001110b // 1 00 0 1101 diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 6b5a7f8e3..49f9d2cf2 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1674,6 +1674,31 @@ fn exception_handler_installation_and_validation() { }); } +/// A guest exception handler writes a copy-on-write page, which faults while +/// the handler runs on the exception stack. Page faults use their own stack, +/// so the handler frame survives and the guest resumes. +#[test] +fn exception_handler_nested_page_fault() { + with_rust_sandbox(|mut sandbox| { + let count: i32 = sandbox.call("GetExceptionHandlerCallCount", ()).unwrap(); + assert_eq!(count, 0, "Handler should not have been called yet"); + + sandbox + .call::<()>("InstallCowFaultingHandler", 3i32) + .unwrap(); + + // The handler faults as it runs. The guest resumes from int3 and returns 0. + let trigger_result: i32 = sandbox.call("TriggerInt3Bare", ()).unwrap(); + assert_eq!( + trigger_result, 0, + "Guest should resume after the nested page fault" + ); + + let count: i32 = sandbox.call("GetExceptionHandlerCallCount", ()).unwrap(); + assert_eq!(count, 1, "Handler should have been called once"); + }); +} + /// Tests that an exception can be properly handled even when the heap is exhausted. /// The guest function fills the heap completely, then triggers a ud2 exception. /// This validates that the exception handling path does not require heap allocations. diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index acc176052..1e58d1cbe 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -187,6 +187,54 @@ fn trigger_int3() -> i32 { 0 } +/// Page-aligned probe written from [`cow_faulting_exception_handler`]. +/// Its page stays copy-on-write after the snapshot, so the handler's +/// first write faults while the handler runs on the exception stack. +#[repr(align(4096))] +struct CowFaultProbe([u64; 512]); +static mut COW_FAULT_PROBE: CowFaultProbe = CowFaultProbe([0; 512]); + +/// Handler that faults while it runs by writing a copy-on-write page. +fn cow_faulting_exception_handler( + exception_number: u64, + _exception_info: *mut ExceptionInfo, + _context: *mut Context, + _page_fault_address: u64, +) -> bool { + HANDLER_INVOCATION_COUNT.fetch_add(1, Ordering::SeqCst); + + // INT3 is exception vector 3 + assert_eq!(exception_number, 3); + + // First write to this page faults, here on the exception stack. + unsafe { + let probe = &raw mut COW_FAULT_PROBE.0; + core::ptr::write_volatile(&mut (*probe)[0], TEST_R10_VALUE); + } + + // Return true to resume execution. + true +} + +/// Install [`cow_faulting_exception_handler`] for a vector. +#[guest_function("InstallCowFaultingHandler")] +fn install_cow_faulting_handler(vector: i32) { + hyperlight_guest_bin::exception::arch::HANDLERS[vector as usize].store( + cow_faulting_exception_handler as *const () as usize as u64, + Ordering::Release, + ); +} + +/// Trigger an INT3 breakpoint (vector 3). Pairs with +/// [`install_cow_faulting_handler`]. +#[guest_function("TriggerInt3Bare")] +fn trigger_int3_bare() -> i32 { + unsafe { + core::arch::asm!("int3"); + } + 0 +} + #[guest_function("EchoFloat")] fn echo_float(value: f32) -> f32 { value