From 8568ecd1dd75b670bd73315f582c6183bcba5424 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Wed, 17 Jun 2026 23:02:14 -0700 Subject: [PATCH 1/2] fix(guest): give page faults their own exception stack A guest exception handler runs on the IST1 stack. If the handler writes a copy-on-write page, the first write faults. The page fault also uses IST1, so the CPU resets RSP to the top of IST1 and writes the fault frame over the live handler frame. The handler then returns to a bad address and the guest aborts. The bug stays latent until an exception handler writes a copy-on-write page. It surfaced when a memory layout change moved a counter that an existing handler increments onto a page that stays copy-on-write after a snapshot. The increment then faulted while the handler ran and crashed the guest. Send page faults to their own IST2 stack so a fault inside a handler keeps the handler frame intact. The page-fault stack uses the second of the two scratch pages already reserved at the top of the region. Add a regression test, exception_handler_nested_page_fault. It installs a handler that writes a copy-on-write page, then triggers int3. Without the fix the guest aborts with a page fault. With the fix it returns 0. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_common/src/layout.rs | 2 + .../src/arch/amd64/prim_alloc.rs | 5 +- .../src/arch/amd64/exception/entry.rs | 14 ++++-- .../src/arch/amd64/init.rs | 22 +++++++-- .../src/arch/amd64/machine.rs | 17 ++++++- src/hyperlight_host/tests/integration_test.rs | 25 ++++++++++ src/tests/rust_guests/simpleguest/src/main.rs | 48 +++++++++++++++++++ 7 files changed, 121 insertions(+), 12 deletions(-) diff --git a/src/hyperlight_common/src/layout.rs b/src/hyperlight_common/src/layout.rs index 69ecdb6ef..83b6540f7 100644 --- a/src/hyperlight_common/src/layout.rs +++ b/src/hyperlight_common/src/layout.rs @@ -26,6 +26,8 @@ pub const SCRATCH_TOP_ALLOCATOR_OFFSET: u64 = 0x10; pub const SCRATCH_TOP_SNAPSHOT_PT_GPA_BASE_OFFSET: u64 = 0x18; pub const SCRATCH_TOP_SNAPSHOT_GENERATION_OFFSET: u64 = 0x20; pub const SCRATCH_TOP_EXN_STACK_OFFSET: u64 = 0x30; +/// Top of the page-fault exception stack, one page below the top of scratch memory. +pub const SCRATCH_TOP_PF_EXN_STACK_OFFSET: u64 = 0x1000; pub fn scratch_base_gpa(size: usize) -> u64 { (MAX_GPA - size + 1) as u64 diff --git a/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs b/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs index cfaad9a0b..3392fd19e 100644 --- a/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs +++ b/src/hyperlight_guest/src/arch/amd64/prim_alloc.rs @@ -31,8 +31,9 @@ pub unsafe fn alloc_phys_pages(n: u64) -> u64 { x = inout(reg) x ); } - // Set aside two pages at the top of the scratch region for the - // exception stack, shared state, etc + // Set aside two pages at the top of the scratch region. The top + // page holds shared metadata and the general exception stack. The + // page below it holds the page-fault exception stack. let max_avail = hyperlight_common::layout::MAX_GPA - hyperlight_common::vmem::PAGE_SIZE * 2; if x.checked_add(nbytes) .is_none_or(|xx| xx >= max_avail as u64) diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs index 87f89f15c..576b7bd9e 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs @@ -22,7 +22,9 @@ use core::arch::{asm, global_asm}; use hyperlight_common::outb::Exception; use super::super::context; -use super::super::machine::{IDT, IdtEntry, IdtPointer, ProcCtrl}; +use super::super::machine::{ + IDT, IST_GENERAL_EXCEPTION, IST_PAGE_FAULT, IdtEntry, IdtPointer, ProcCtrl, +}; unsafe extern "C" { // Exception handlers @@ -174,12 +176,16 @@ global_asm!( pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { let idt = unsafe { &raw mut (*pc).idt }; - let set_idt_entry = |idx, handler: unsafe extern "C" fn()| { + let set_idt_entry_ist = |idx, handler: unsafe extern "C" fn(), ist: u8| { let handler_addr = handler as *const () as u64; unsafe { - (&raw mut (*idt).entries[idx as usize]).write_volatile(IdtEntry::new(handler_addr)); + (&raw mut (*idt).entries[idx as usize]) + .write_volatile(IdtEntry::new_with_ist(handler_addr, ist)); } }; + let set_idt_entry = |idx, handler: unsafe extern "C" fn()| { + set_idt_entry_ist(idx, handler, IST_GENERAL_EXCEPTION) + }; set_idt_entry(Exception::DivideByZero, _do_excp0); // Divide by zero set_idt_entry(Exception::Debug, _do_excp1); // Debug set_idt_entry(Exception::NonMaskableInterrupt, _do_excp2); // Non-maskable interrupt @@ -194,7 +200,7 @@ pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { set_idt_entry(Exception::SegmentNotPresent, _do_excp11); // Segment Not Present set_idt_entry(Exception::StackSegmentFault, _do_excp12); // Stack-Segment Fault set_idt_entry(Exception::GeneralProtectionFault, _do_excp13); // General Protection Fault - set_idt_entry(Exception::PageFault, _do_excp14); // Page Fault + set_idt_entry_ist(Exception::PageFault, _do_excp14, IST_PAGE_FAULT); // Page Fault (own IST stack) set_idt_entry(Exception::Reserved, _do_excp15); // Reserved set_idt_entry(Exception::X87FloatingPointException, _do_excp16); // x87 Floating-Point Exception set_idt_entry(Exception::AlignmentCheck, _do_excp17); // Alignment Check diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index 073bd3a2f..912bc5d5d 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -79,10 +79,19 @@ unsafe fn init_gdt(pc: *mut ProcCtrl) { } } -/// Hyperlight's TSS contains only a single IST entry, which is used -/// to set up the stack switch to the exception stack whenever we take -/// an exception (including page faults, which are important, since -/// the fault might be due to needing to grow the stack!) +/// Hyperlight's TSS provides two IST stacks. The CPU switches to one +/// when an exception is taken, so a handler always runs on a known-good +/// stack. This matters because a fault can mean the main stack needs to +/// grow. +/// +/// * `ist1` is the general exception stack. +/// * `ist2` is the page-fault stack. +/// +/// Page faults get a separate stack because they can nest inside +/// another exception. A handler running on `ist1` may write a +/// copy-on-write page, which raises a page fault. The CPU delivers that +/// fault on `ist2`, so each one has its own stack and the handler +/// resumes once the fault is serviced. /// /// This function sets up the TSS and then points the processor at the /// system segment descriptor, initialized in [`init_gdt`] above, @@ -96,6 +105,11 @@ unsafe fn init_tss(pc: *mut ProcCtrl) { - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET + 1; ist1_ptr.write_volatile(exn_stack.to_ne_bytes()); + let ist2_ptr = &raw mut (*tss_ptr).ist2 as *mut [u8; 8]; + let pf_exn_stack = hyperlight_common::layout::MAX_GVA as u64 + - hyperlight_common::layout::SCRATCH_TOP_PF_EXN_STACK_OFFSET + + 1; + ist2_ptr.write_volatile(pf_exn_stack.to_ne_bytes()); asm!( "ltr ax", in("ax") core::mem::offset_of!(HyperlightGDT, tss), diff --git a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs index cde8118e3..b4facd6d0 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/machine.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/machine.rs @@ -20,6 +20,12 @@ use hyperlight_common::vmem::{BasicMapping, MappingKind, PAGE_SIZE}; use super::layout::PROC_CONTROL_GVA; +/// IDT gate IST index for general exceptions. Selects [`TSS::ist1`]. +pub(super) const IST_GENERAL_EXCEPTION: u8 = 1; +/// IDT gate IST index for page faults. Selects [`TSS::ist2`], the +/// page-fault stack. See the TSS setup in `init.rs` for why. +pub(super) const IST_PAGE_FAULT: u8 = 2; + /// Entry in the Global Descriptor Table (GDT) /// For reference, see page 3-10 Vol. 3A of Intel 64 and IA-32 /// Architectures Software Developer's Manual, figure 3-8 @@ -117,7 +123,7 @@ pub(super) struct TSS { _rsp2: u64, _rsvd1: [u8; 8], pub(super) ist1: u64, - _ist2: u64, + pub(super) ist2: u64, _ist3: u64, _ist4: u64, _ist5: u64, @@ -127,6 +133,7 @@ pub(super) struct TSS { } const _: () = assert!(mem::size_of::() == 0x64); const _: () = assert!(mem::offset_of!(TSS, ist1) == 0x24); +const _: () = assert!(mem::offset_of!(TSS, ist2) == 0x2c); /// An entry in the Interrupt Descriptor Table (IDT) /// For reference, see page 7-20 Vol. 3A of Intel 64 and IA-32 @@ -154,10 +161,16 @@ const _: () = assert!(mem::size_of::() == 0x10); impl IdtEntry { pub(super) fn new(handler: u64) -> Self { + Self::new_with_ist(handler, IST_GENERAL_EXCEPTION) + } + + /// Build an IDT gate that switches to IST stack `ist` (1-based, one + /// of `TSS::ist1..ist7`) when the vector is taken. + pub(super) fn new_with_ist(handler: u64, ist: u8) -> Self { Self { offset_low: (handler & 0xFFFF) as u16, selector: 0x08, // Kernel Code Segment - interrupt_stack_table_offset: 1, + interrupt_stack_table_offset: ist, type_attr: 0x8E, // 0x8E = 10001110b // 1 00 0 1101 diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 6b5a7f8e3..49f9d2cf2 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1674,6 +1674,31 @@ fn exception_handler_installation_and_validation() { }); } +/// A guest exception handler writes a copy-on-write page, which faults while +/// the handler runs on the exception stack. Page faults use their own stack, +/// so the handler frame survives and the guest resumes. +#[test] +fn exception_handler_nested_page_fault() { + with_rust_sandbox(|mut sandbox| { + let count: i32 = sandbox.call("GetExceptionHandlerCallCount", ()).unwrap(); + assert_eq!(count, 0, "Handler should not have been called yet"); + + sandbox + .call::<()>("InstallCowFaultingHandler", 3i32) + .unwrap(); + + // The handler faults as it runs. The guest resumes from int3 and returns 0. + let trigger_result: i32 = sandbox.call("TriggerInt3Bare", ()).unwrap(); + assert_eq!( + trigger_result, 0, + "Guest should resume after the nested page fault" + ); + + let count: i32 = sandbox.call("GetExceptionHandlerCallCount", ()).unwrap(); + assert_eq!(count, 1, "Handler should have been called once"); + }); +} + /// Tests that an exception can be properly handled even when the heap is exhausted. /// The guest function fills the heap completely, then triggers a ud2 exception. /// This validates that the exception handling path does not require heap allocations. diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index acc176052..1e58d1cbe 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -187,6 +187,54 @@ fn trigger_int3() -> i32 { 0 } +/// Page-aligned probe written from [`cow_faulting_exception_handler`]. +/// Its page stays copy-on-write after the snapshot, so the handler's +/// first write faults while the handler runs on the exception stack. +#[repr(align(4096))] +struct CowFaultProbe([u64; 512]); +static mut COW_FAULT_PROBE: CowFaultProbe = CowFaultProbe([0; 512]); + +/// Handler that faults while it runs by writing a copy-on-write page. +fn cow_faulting_exception_handler( + exception_number: u64, + _exception_info: *mut ExceptionInfo, + _context: *mut Context, + _page_fault_address: u64, +) -> bool { + HANDLER_INVOCATION_COUNT.fetch_add(1, Ordering::SeqCst); + + // INT3 is exception vector 3 + assert_eq!(exception_number, 3); + + // First write to this page faults, here on the exception stack. + unsafe { + let probe = &raw mut COW_FAULT_PROBE.0; + core::ptr::write_volatile(&mut (*probe)[0], TEST_R10_VALUE); + } + + // Return true to resume execution. + true +} + +/// Install [`cow_faulting_exception_handler`] for a vector. +#[guest_function("InstallCowFaultingHandler")] +fn install_cow_faulting_handler(vector: i32) { + hyperlight_guest_bin::exception::arch::HANDLERS[vector as usize].store( + cow_faulting_exception_handler as *const () as usize as u64, + Ordering::Release, + ); +} + +/// Trigger an INT3 breakpoint (vector 3). Pairs with +/// [`install_cow_faulting_handler`]. +#[guest_function("TriggerInt3Bare")] +fn trigger_int3_bare() -> i32 { + unsafe { + core::arch::asm!("int3"); + } + 0 +} + #[guest_function("EchoFloat")] fn echo_float(value: f32) -> f32 { value From faa02a89bd4a13ec0a8de856c255f8e2cc23c2f8 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 23 Jun 2026 19:00:18 -0700 Subject: [PATCH 2/2] Golden tests Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .github/workflows/RegenSnapshotGoldens.yml | 241 +++++++++++++ .github/workflows/ValidatePullRequest.yml | 19 +- .github/workflows/dep_build_test.yml | 32 ++ Cargo.lock | 32 ++ Justfile | 59 +++- docs/github-labels.md | 6 + docs/snapshot-versioning.md | 328 ++++++++++++++++++ src/hyperlight_host/Cargo.toml | 10 + .../src/sandbox/snapshot/file/config.rs | 229 ++++++++++++ .../src/sandbox/snapshot/file/media_types.rs | 30 +- .../src/sandbox/snapshot/file/mod.rs | 8 +- .../src/sandbox/snapshot/file_tests.rs | 143 ++++++++ .../src/sandbox/snapshot/mod.rs | 1 + .../src/sandbox/snapshot/tripwires.rs | 75 ++++ src/hyperlight_host/tests/integration_test.rs | 8 +- .../tests/snapshot_goldens/checks.rs | 270 ++++++++++++++ .../tests/snapshot_goldens/fixtures.rs | 127 +++++++ .../tests/snapshot_goldens/goldens_version.rs | 24 ++ .../tests/snapshot_goldens/main.rs | 125 +++++++ .../tests/snapshot_goldens/oci.rs | 52 +++ .../tests/snapshot_goldens/platform.rs | 158 +++++++++ src/tests/rust_guests/simpleguest/src/main.rs | 126 +++++++ 22 files changed, 2077 insertions(+), 26 deletions(-) create mode 100644 .github/workflows/RegenSnapshotGoldens.yml create mode 100644 docs/snapshot-versioning.md create mode 100644 src/hyperlight_host/src/sandbox/snapshot/tripwires.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/checks.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/fixtures.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/main.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/oci.rs create mode 100644 src/hyperlight_host/tests/snapshot_goldens/platform.rs diff --git a/.github/workflows/RegenSnapshotGoldens.yml b/.github/workflows/RegenSnapshotGoldens.yml new file mode 100644 index 000000000..d456527ba --- /dev/null +++ b/.github/workflows/RegenSnapshotGoldens.yml @@ -0,0 +1,241 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json + +# Publish snapshot goldens to +# ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens. +# +# Runs automatically when a merge to main changes GOLDENS_VERSION (the +# version string lives in +# src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs). The check-published +# job reads that version and checks GHCR for its `{version}-complete` +# marker. If the marker is absent, the matrix walks every (hv, cpu, +# config) combination, dumps the canonical snapshot, and uploads it as a +# workflow artifact. A single publish job then downloads every artifact, +# pushes each as a tag named `{version}-{hv}-{cpu}-{profile}`, and +# pushes the marker last. Publishing the whole set from one job means a +# partial run leaves no marker and is republished on the next run. +# +# A version whose marker exists is left untouched, so a merge that does +# not bump the version, or a re-run of the same version, is a no-op. +# Manual dispatch with `force: true` overwrites an existing version and +# exists for recovery only. +# +# See docs/snapshot-versioning.md + +name: Regenerate Snapshot Goldens + +on: + push: + branches: [main] + paths: + - src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs + workflow_dispatch: + inputs: + version: + description: Goldens version string. Must match GOLDENS_VERSION in source (e.g. "v1.0"). + required: true + type: string + force: + description: Overwrite tags even if the version is already published (recovery only). + type: boolean + default: false + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: full + GHCR_IMAGE: ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens + +permissions: + contents: read + packages: write + +concurrency: + group: regen-snapshot-goldens-${{ github.ref }} + cancel-in-progress: false + +defaults: + run: + shell: bash + +jobs: + check-published: + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + outputs: + version: ${{ steps.decide.outputs.version }} + needs_publish: ${{ steps.decide.outputs.needs_publish }} + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + + - name: Install oras + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.2 + + - name: Decide version and whether to publish + id: decide + env: + EVENT_NAME: ${{ github.event_name }} + INPUT_VERSION: ${{ inputs.version }} + FORCE: ${{ inputs.force }} + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + SRC=$(grep -oE 'GOLDENS_VERSION: &str = "[^"]+"' src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs | head -n1 | sed -E 's/.*"([^"]+)".*/\1/') + if ! [[ "${SRC}" =~ ^v[0-9]+\.[0-9]+$ ]]; then + echo "::error::GOLDENS_VERSION in source must match ^v[0-9]+\.[0-9]+$ (e.g. v1.0), found '${SRC}'" + exit 1 + fi + + # On manual dispatch the input must name the version that the + # dispatched ref actually carries. This catches a stale input. + if [ "${EVENT_NAME}" = "workflow_dispatch" ] && [ "${INPUT_VERSION}" != "${SRC}" ]; then + echo "::error::version input '${INPUT_VERSION}' does not match GOLDENS_VERSION in source '${SRC}'" + exit 1 + fi + + echo "version=${SRC}" >> "$GITHUB_OUTPUT" + + if [ "${EVENT_NAME}" = "workflow_dispatch" ] && [ "${FORCE}" = "true" ]; then + echo "force requested: will publish ${SRC} even if it already exists" + echo "needs_publish=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # A version is frozen once its completion marker exists on + # GHCR. The marker is pushed only after every matrix job has + # uploaded its tag, so a partial push (some jobs failed) + # leaves no marker and the next run republishes the missing + # combinations. Publishing only when the marker is absent makes the + # workflow idempotent and never clobbers a complete baseline. + echo "${GHCR_TOKEN}" | oras login ghcr.io -u "${GHCR_USER}" --password-stdin + if oras repo tags "${GHCR_IMAGE}" 2>/dev/null | grep -qxF "${SRC}-complete"; then + echo "${SRC} already published (marker ${SRC}-complete present). Nothing to do." + echo "needs_publish=false" >> "$GITHUB_OUTPUT" + else + echo "${SRC} not fully published yet. Will publish." + echo "needs_publish=true" >> "$GITHUB_OUTPUT" + fi + + build-guests: + needs: check-published + if: needs.check-published.outputs.needs_publish == 'true' + strategy: + matrix: + config: [debug, release] + uses: ./.github/workflows/dep_build_guests.yml + with: + config: ${{ matrix.config }} + secrets: inherit + + generate-snapshots: + needs: [check-published, build-guests] + if: needs.check-published.outputs.needs_publish == 'true' + strategy: + fail-fast: false + matrix: + hypervisor: [kvm, mshv3, hyperv-ws2025] + cpu: [amd, intel] + config: [debug, release] + runs-on: ${{ fromJson( + format('["self-hosted", "{0}", "X64", "1ES.Pool=hld-{1}-{2}", "JobId=regen-goldens-{3}-{4}-{5}-{6}"]', + matrix.hypervisor == 'hyperv-ws2025' && 'Windows' || 'Linux', + matrix.hypervisor == 'hyperv-ws2025' && 'win2025' || matrix.hypervisor == 'mshv3' && 'azlinux3-mshv' || matrix.hypervisor, + matrix.cpu, + matrix.config, + github.run_id, + github.run_number, + github.run_attempt)) }} + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + + - uses: hyperlight-dev/ci-setup-workflow@f6bd9cc86d0737976d2128c8b8ced8edc017cbb4 # v1.9.0 + with: + rust-toolchain: "1.94" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Fix cargo home permissions + if: runner.os == 'Linux' + run: sudo chown -R $(id -u):$(id -g) /opt/cargo || true + + - name: Download Rust guests + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: rust-guests-${{ matrix.config }} + path: src/tests/rust_guests/bin/${{ matrix.config }}/ + + - name: Confirm source matches resolved version + env: + RESOLVED_VERSION: ${{ needs.check-published.outputs.version }} + run: | + set -euo pipefail + SRC=$(grep -oE 'GOLDENS_VERSION: &str = "[^"]+"' src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs | head -n1 | sed -E 's/.*"([^"]+)".*/\1/') + if [ "${SRC}" != "${RESOLVED_VERSION}" ]; then + echo "::error::source GOLDENS_VERSION '${SRC}' does not match resolved '${RESOLVED_VERSION}'" + exit 1 + fi + + - name: Generate snapshots + run: just snapshot-goldens-generate ${{ matrix.config }} + + - name: Resolve produced tag + id: tag + env: + GOLDENS_VERSION: ${{ needs.check-published.outputs.version }} + run: | + set -euo pipefail + layout=$(echo "target/snapshot-goldens/${GOLDENS_VERSION}"/*/) + echo "tag=$(basename "${layout%/}")" >> "$GITHUB_OUTPUT" + + - name: Upload golden layout + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: golden-${{ steps.tag.outputs.tag }} + path: target/snapshot-goldens/${{ needs.check-published.outputs.version }}/${{ steps.tag.outputs.tag }}/ + if-no-files-found: error + retention-days: 1 + + # Push every matrix job's snapshot from this single job, so the published set is + # whole or absent. `generate-snapshots` runs `fail-fast: false` and uploads each + # snapshot as an artifact, so this job's `needs` succeeds only when + # all matrix jobs did. It downloads every artifact, pushes each tag, then + # pushes the `{version}-complete` marker that `check-published` gates on. A + # push that dies partway leaves no marker, so the next run republishes. + publish: + needs: [check-published, generate-snapshots] + if: needs.check-published.outputs.needs_publish == 'true' + runs-on: ubuntu-latest + steps: + - name: Install oras + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.2 + + - name: Download all golden layouts + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + pattern: golden-* + path: layouts + + - name: Push goldens and completion marker + env: + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GOLDENS_VERSION: ${{ needs.check-published.outputs.version }} + run: | + set -euo pipefail + echo "${GHCR_TOKEN}" | oras login ghcr.io -u "${GHCR_USER}" --password-stdin + for layout in layouts/golden-*/; do + tag=$(basename "${layout%/}") + tag=${tag#golden-} + echo "::group::push ${tag}" + oras cp --from-oci-layout "${layout%/}:${tag}" "${GHCR_IMAGE}:${tag}" + echo "::endgroup::" + done + printf '%s' "${GOLDENS_VERSION}" > complete.txt + oras push "${GHCR_IMAGE}:${GOLDENS_VERSION}-complete" \ + --artifact-type application/vnd.hyperlight.goldens.complete.v1 \ + complete.txt:text/plain diff --git a/.github/workflows/ValidatePullRequest.yml b/.github/workflows/ValidatePullRequest.yml index 659ec9acc..5493012eb 100644 --- a/.github/workflows/ValidatePullRequest.yml +++ b/.github/workflows/ValidatePullRequest.yml @@ -79,17 +79,33 @@ jobs: with: docs_only: ${{ needs.docs-pr.outputs.docs-only }} + # Pick the goldens mode. The `regen-goldens` label means regenerate. No label means pull. + goldens-mode: + runs-on: ubuntu-latest + outputs: + regen: ${{ steps.check.outputs.regen }} + steps: + - id: check + if: github.event_name == 'pull_request' + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr view ${{ github.event.pull_request.number }} --repo ${{ github.repository }} \ + --json labels -q '.labels[].name' | grep -qx regen-goldens \ + && echo "regen=true" >> "$GITHUB_OUTPUT" || echo "regen=false" >> "$GITHUB_OUTPUT" + # Build and test - needs guest artifacts build-test: needs: - docs-pr - build-guests + - goldens-mode # Required because update-guest-locks is skipped on non-dependabot PRs, # and a skipped dependency transitively skips all downstream jobs. # See: https://github.com/actions/runner/issues/2205 if: ${{ !cancelled() && !failure() }} strategy: - fail-fast: true + fail-fast: false matrix: hypervisor: ['hyperv-ws2025', mshv3, kvm] cpu: [amd, intel] @@ -101,6 +117,7 @@ jobs: hypervisor: ${{ matrix.hypervisor }} cpu: ${{ matrix.cpu }} config: ${{ matrix.config }} + regen_goldens: ${{ needs.goldens-mode.outputs.regen }} # Run examples - needs guest artifacts, runs in parallel with build-test run-examples: diff --git a/.github/workflows/dep_build_test.yml b/.github/workflows/dep_build_test.yml index 91ce867aa..bebaa5295 100644 --- a/.github/workflows/dep_build_test.yml +++ b/.github/workflows/dep_build_test.yml @@ -22,6 +22,11 @@ on: description: CPU architecture for the build (passed from caller matrix) required: true type: string + regen_goldens: + description: Regenerate snapshot goldens from the branch and skip pulling published ones + required: false + type: string + default: "false" env: CARGO_TERM_COLOR: always @@ -29,6 +34,7 @@ env: permissions: contents: read + packages: read defaults: run: @@ -138,3 +144,29 @@ jobs: env: RUST_LOG: debug run: just test-rust-tracing ${{ inputs.config }} + + - name: Install oras + if: ${{ inputs.regen_goldens != 'true' }} + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.2 + + # Pull the published goldens for this cell and load them with the + # branch. A missing tag fails the job and flags a format break. + - name: Snapshot goldens (pull and verify) + if: ${{ inputs.regen_goldens != 'true' }} + env: + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "${GHCR_TOKEN}" | oras login ghcr.io -u "${GHCR_USER}" --password-stdin + just snapshot-goldens-pull ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens ${{ inputs.config }} + just snapshot-goldens-verify ${{ inputs.config }} + + # Label path: generate the goldens from the branch and load them + # back. Used when no published tag set exists yet. + - name: Snapshot goldens (regenerate and verify) + if: ${{ inputs.regen_goldens == 'true' }} + run: | + just snapshot-goldens-generate ${{ inputs.config }} + just snapshot-goldens-verify ${{ inputs.config }} diff --git a/Cargo.lock b/Cargo.lock index 14effcde1..2dafe9870 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -504,6 +504,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -518,6 +519,18 @@ dependencies = [ "strsim", ] +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "clap_lex" version = "1.0.0" @@ -947,6 +960,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "escape8259" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" + [[package]] name = "euclid" version = "0.22.13" @@ -1709,6 +1728,7 @@ dependencies = [ "kvm-ioctls", "lazy_static", "libc", + "libtest-mimic", "log", "metrics", "metrics-exporter-prometheus", @@ -2164,6 +2184,18 @@ dependencies = [ "libc", ] +[[package]] +name = "libtest-mimic" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33" +dependencies = [ + "anstream 1.0.0", + "anstyle", + "clap", + "escape8259", +] + [[package]] name = "libz-sys" version = "1.1.23" diff --git a/Justfile b/Justfile index 2bdb842b9..26c4bbbdc 100644 --- a/Justfile +++ b/Justfile @@ -249,8 +249,10 @@ test-integration target=default-target features="": (witguest-wit) @# run component-util integration tests that depend on generated WIT inputs {{ cargo-cmd }} test -p hyperlight-component-util --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test wasmtime_guest_codegen - @# run the rest of the integration tests - {{ cargo-cmd }} test -p hyperlight-host {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test '*' + @# run the rest of the integration tests. `snapshot_goldens` is + @# left out here. It runs in its own step against a filled golden + @# cache (see the snapshot-goldens recipes). + {{ cargo-cmd }} test -p hyperlight-host {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test integration_test --test sandbox_host_tests --test wit_test # tests compilation with no default features on different platforms test-compilation-no-default-features target=default-target: @@ -573,3 +575,56 @@ install-vcpkg: install-flatbuffers-with-vcpkg: install-vcpkg cd ../vcpkg && ./vcpkg install flatbuffers || cd - + +################################### +### SNAPSHOT GOLDEN HELPERS ### +################################### +# Test binary that checks or rebuilds snapshot goldens. It reads +# snapshots from target/snapshot-goldens/{version}/{tag}/. +# `snapshot-goldens-pull` fills that directory. It uses `oras` to copy +# from the registry (install from https://oras.land). + +# Default OCI registry image (without tag) that hosts the goldens. +default-snapshot-goldens-image := "ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens" + +# Check the local snapshots against the goldens for the current +# GOLDENS_VERSION. Run `snapshot-goldens-pull` first to fill the +# local directory. A missing entry fails the test. +snapshot-goldens-verify target=default-target: + cargo test {{ if target == "release" { "--release" } else { "" } }} \ + -p hyperlight-host --test snapshot_goldens + +# Pull the golden for this host from `image` into the +# directory that `snapshot-goldens-verify` reads. It picks the +# hypervisor and CPU vendor from the host. Pass `profile=release` +# to fetch the release tags. +snapshot-goldens-pull image=default-snapshot-goldens-image profile="debug": + #!/usr/bin/env bash + set -euo pipefail + if [[ -e /dev/mshv ]]; then hv=mshv + elif [[ -e /dev/kvm ]]; then hv=kvm + elif [[ "${OS:-}" == "Windows_NT" ]]; then hv=whp + else echo "snapshot-goldens-pull: no hypervisor found" >&2; exit 1 + fi + if [[ -r /proc/cpuinfo ]]; then vendor=$(awk -F: '/vendor_id/{print $2; exit}' /proc/cpuinfo) + else vendor="${PROCESSOR_IDENTIFIER:-}" + fi + case "${vendor}" in + *GenuineIntel*) cpu=intel ;; + *AuthenticAMD*) cpu=amd ;; + *) echo "snapshot-goldens-pull: unknown CPU vendor" >&2; exit 1 ;; + esac + version=$(awk -F'"' '/GOLDENS_VERSION: &str =/{print $2; exit}' src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs) + # Mirror of `Platform::tag` in platform.rs. Keep both in sync. + tag="${version}-${hv}-${cpu}-{{ profile }}" + dir="target/snapshot-goldens/${version}/${tag}" + mkdir -p "${dir}" + oras cp --to-oci-layout "{{ image }}:${tag}" "${dir}:${tag}" + +# Build the local snapshots into the directory that +# `snapshot-goldens-verify` reads. Run `snapshot-goldens-generate` +# then `snapshot-goldens-verify` to test the round trip on one host. +# Pass `out` to write the snapshots to another directory. +snapshot-goldens-generate target=default-target out="": + cargo test {{ if target == "release" { "--release" } else { "" } }} \ + -p hyperlight-host --test snapshot_goldens -- generate {{ out }} diff --git a/docs/github-labels.md b/docs/github-labels.md index 5133f048a..e1f28c2ed 100644 --- a/docs/github-labels.md +++ b/docs/github-labels.md @@ -55,6 +55,12 @@ In addition to **kind/*** labels, we use optional **area/*** labels to specify t - **area/security** - Involves security-related changes or fixes. - **area/testing** - Related to tests or testing infrastructure. +## Workflow labels + +Some labels change CI behaviour on a PR rather than categorizing it: + +- **regen-goldens** - Switches the snapshot golden verify job into regenerate mode. A PR that intentionally changes the snapshot format and bumps `GOLDENS_VERSION` carries this label so the verify job generates the goldens from the branch and runs them back through the branch loader, rather than pulling a published tag set that does not exist yet. See [snapshot-versioning.md](snapshot-versioning.md). + ## Notes This document is a work in progress and may be updated as needed. The labels and categories are subject to change based on the evolving needs of the project and community feedback. diff --git a/docs/snapshot-versioning.md b/docs/snapshot-versioning.md new file mode 100644 index 000000000..a1030f172 --- /dev/null +++ b/docs/snapshot-versioning.md @@ -0,0 +1,328 @@ +# Snapshot versioning + +Hyperlight snapshots are written to disk as OCI image layouts and may be +loaded by a different build than the one that produced them. This +document describes how to evolve the snapshot format while keeping +existing snapshots loadable, or while rejecting them with a clear error. + +## What is versioned + +A snapshot carries three independently evolvable version markers: + +* **Memory blob ABI**, `SNAPSHOT_ABI_VERSION` (a `u32` inside the + config blob, defined in + [src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs](../src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs)). + This is the host/guest runtime contract baked into the captured + memory: the `HyperlightPEB` layout (the struct host and guest share + to exchange state, field offsets and types), the `OutBAction` port + numbers (the I/O ports the guest writes to for `Log`, `CallFunction`, + `Abort`, `DebugPrint`), the layout of the sandbox memory regions + (stack, heap, guest binary, input and output buffers, page tables), + and the calling convention used for guest function entry. The loader + trusts the captured bytes to match this contract, so any change here + invalidates older snapshots unless an explicit compat path translates + them. +* **Snapshot blob encoding**, `MT_SNAPSHOT_V1` + (`application/vnd.hyperlight.snapshot.memory.v1`), aliased as + `MT_SNAPSHOT_CURRENT`. This is the on-wire format of the snapshot + blob: framing, section ordering, alignment, dirty/zero-page elision, + anything about how the bytes are packed inside the OCI layer. +* **Config schema**, `MT_CONFIG_V1` + (`application/vnd.hyperlight.snapshot.config.v1+json`), aliased as + `MT_CONFIG_CURRENT`. This is the JSON shape of the config blob: + field names, types, required vs optional, the descriptors the loader + needs in order to reconstruct the sandbox (memory sizes, buffer + sizes, `abi_version`, `hyperlight_version`, etc.). Renaming a field, + changing its type, or adding a required field is a schema change and + bumps this constant. + +The `OCI_LAYOUT_VERSION` constant is pinned by the OCI image-layout +spec at `1.0.0`. + +Each media-type axis is a `_VN` constant with a `_CURRENT` alias. The +writer emits `_CURRENT`. The loader matches each `_VN` explicitly. To +add a version, declare `MT_FOO_V2`, point `MT_FOO_CURRENT` at it, and +add a loader arm that translates the old version or rejects it. + +The config blob also records `hyperlight_version`, the `CARGO_PKG_VERSION` +of the host crate at write time. This is informational only. The loader +records it for diagnostics and does not gate loading on it. + +## Enforcement + +The format is large and easy to change by accident. Two mechanisms +catch a change to it so reviewers do not have to spot every break by +eye, and so a developer who breaks the format unintentionally finds +out at build time rather than in production. + +Compile-time tripwires in +[src/hyperlight_host/src/sandbox/snapshot/tripwires.rs](../src/hyperlight_host/src/sandbox/snapshot/tripwires.rs) +hold a copy of every value that defines the format: +`SNAPSHOT_ABI_VERSION`, the snapshot and config media-type strings, the +OCI layout version, every `HyperlightPEB` field offset and the struct's +total size, and every `OutBAction` discriminant. If the source value +drifts from the copy in `tripwires.rs`, the crate fails to compile. + +The snapshot golden verify test +(`cargo test -p hyperlight-host --test snapshot_goldens`) loads +snapshots from a local directory (populated by `just snapshot-goldens-pull`, +which fetches the tag set for the current `GOLDENS_VERSION` from GHCR) +and runs them through the current loader. If the new loader cannot +decode the old bytes, the test fails. + +On a pull request the verify test runs on every supported hypervisor +runner. The default path pulls the published tag set for the current +`GOLDENS_VERSION` and verifies it against the branch's loader. A pull +request that intentionally changes the format takes the labelled path +described in [Breaking the format on a pull request](#breaking-the-format-on-a-pull-request). + +## Changing the format + +When you change anything on the list above, you have three options. + +### Option 1: avoid the break + +Restructure the change so the on-disk contract stays put. Prefer this +whenever possible. + +### Option 2: backwards-compatible break + +You break the ABI for new snapshots, and you teach the loader to +accept the older version as well by translating it into the current +contract on the fly. For example, if you renumber the `OutBAction` +ports, the host's port dispatch keeps a match arm for the old port +number alongside the new one, so a resumed v1 guest that still writes +to the old port is handled correctly. + +Steps: + +1. Make the source change. +2. Update `Snapshot::to_oci` to write the new format. +3. Bump `SNAPSHOT_ABI_VERSION`. The writer stamps this value into + every config blob it produces. +4. Update `Snapshot::from_oci` to load both the old and the new + format, dispatching on `abi_version`. +5. Update the tripwire assertions in `tripwires.rs` and any affected + tests to match the new values. +6. Bump `GOLDENS_VERSION` to the next major. Apply the `regen-goldens` + label to the pull request so the verify job regenerates against the + branch. See + [Breaking the format on a pull request](#breaking-the-format-on-a-pull-request) + and [Goldens version numbering](#goldens-version-numbering). +7. Keep the old goldens on GHCR and extend the verify test to exercise + them as well, so the compatibility path stays covered. See + [Verifying multiple golden versions](#verifying-multiple-golden-versions). + +Old snapshots on disk continue to load. New snapshots use the new +contract. The compatibility path becomes part of the supported surface +and must stay correct until you formally drop the old major. + +### Option 3: hard break + +You change the contract and the loader rejects old snapshots outright. +Using the same `OutBAction` example, the host's port dispatch only +matches on the new port number, and a resumed v1 guest writing to the +old port has nowhere to land. + +Steps: + +1. Make the source change. +2. Update `Snapshot::to_oci` to write the new format. +3. Bump `SNAPSHOT_ABI_VERSION`. +4. Update the tripwire assertions in `tripwires.rs` and any affected + tests to match the new values. +5. Bump `GOLDENS_VERSION` to the next major. Apply the `regen-goldens` + label to the pull request so the verify job regenerates against the + branch. See + [Breaking the format on a pull request](#breaking-the-format-on-a-pull-request) + and [Goldens version numbering](#goldens-version-numbering). +6. Record the break in `CHANGELOG.md`. Anyone holding old snapshots on + disk has to regenerate them against the new build. + +The loader's single-version check enforces the rejection. An old +snapshot loaded against the new build fails the +`abi_version == SNAPSHOT_ABI_VERSION` test with a clear error. + +## Regenerating goldens + +The verify test (`cargo test -p hyperlight-host --test snapshot_goldens`) +loads the tag `{GOLDENS_VERSION}-{hv}-{cpu}-{profile}` from a +local directory that `just snapshot-goldens-pull` populates from GHCR. A +freshly bumped `GOLDENS_VERSION` has no tags on GHCR until the bump +merges to `main` and the publish workflow runs, so pull requests that +bump the version verify through the `regen-goldens` label instead (see +[Breaking the format on a pull request](#breaking-the-format-on-a-pull-request)). + +### Iterating locally + +`just snapshot-goldens-generate` regenerates the directory for the current +`GOLDENS_VERSION` from the local source, so the verify test runs green +against your in-progress changes on your own platform. Use this loop +for iteration that does not need to cross hypervisor boundaries. +Cross-platform coverage comes from the publish workflow's matrix, which +runs automatically when the bump merges to `main` (see +[Publishing a new version](#publishing-a-new-version)). + +### Goldens version numbering + +`GOLDENS_VERSION` follows a `vMAJOR.MINOR` scheme. The tag set on GHCR +for a given version is keyed by the full string, so `v1.0`, `v1.1`, and +`v2.0` are independent namespaces that never collide. + +* Bump **MAJOR** when the snapshot ABI changes (Option 2 or Option 3 + above). MAJOR tracks `SNAPSHOT_ABI_VERSION`: every format break bumps + both, so a new MAJOR means the on-disk contract moved and old + snapshots load through a compatibility path or not at all. The old + tag set stays on GHCR untouched. +* Bump **MINOR** when the set of golden checks changes but the ABI does + not (for example, a new check/test is added). The on-disk contract is + unchanged, so `SNAPSHOT_ABI_VERSION` stays put. The new tag set + contains every check, including the unchanged ones, regenerated + against the current source. + +`GOLDENS_VERSION` and `SNAPSHOT_ABI_VERSION` are two separate counters +with different purposes. `SNAPSHOT_ABI_VERSION` is the integer stamped into +every snapshot blob, and the loader reads it to decide how to parse the +bytes. `GOLDENS_VERSION` names the published golden tag set on GHCR. A +format break bumps both. A check-set change bumps only +`GOLDENS_VERSION`. + +A version is published once, when the bump merges to `main`, and is +frozen from then on. The publish workflow only publishes a version +whose completion marker is absent from GHCR, so a published baseline +cannot be clobbered by a later run. While a developer iterates on a v1 +to v2 bump the new version is unpublished, so they verify locally with +`just snapshot-goldens-generate` and the `regen-goldens` label rather +than pushing to GHCR. + +The freeze is enforced by the publish workflow's marker check, not by a +registry policy. Each `(hv, cpu, profile)` combination generates its snapshot +and uploads it as a workflow artifact. A single publish job downloads +every artifact, pushes each as its tag, then pushes a +`{version}-complete` marker last. Pushing the whole set from one job +means a partial run leaves no marker, so the next run republishes +rather than freezing an incomplete set. Republishing a complete version +takes a manual dispatch with `force: true`, reserved for recovering a +corrupted push. + +### Breaking the format on a pull request + +A pull request that bumps `GOLDENS_VERSION` introduces a tag set that +GHCR does not carry yet, so the default pull-and-verify path has nothing +to load. The `regen-goldens` label switches the verify job into +regenerate mode for that pull request. + +* **Without the label**, the job pulls the published tag set for the + current `GOLDENS_VERSION` and verifies it against the branch. Missing + tags fail the job. This is what turns an accidental format break into + a red build: the published bytes stop loading, and the author must + either restructure the change or own the break with the label. +* **With the `regen-goldens` label**, the job generates the goldens + from the branch source and runs them straight back through the + branch loader. This proves the new format is internally loadable on + each runner. It does not prove anything about the old tag set, which + belongs to a different version namespace. + +The label is an explicit, reviewable assertion that the format break is +intended. The verify job never regenerates on its own initiative, so a +flaky pull or a mistyped version stays a hard failure rather than +silently degrading into a self-check. + +### Publishing a new version + +Publishing is automatic. When a bump to `GOLDENS_VERSION` merges to +`main`, the `Regenerate Snapshot Goldens` workflow runs on the push and +publishes the new version's tag set. No manual step is needed, and a +merge that does not change `GOLDENS_VERSION` does not publish (the push +trigger is filtered to the file that holds the version, +`tests/snapshot_goldens/goldens_version.rs`). + +The workflow walks every supported `(hypervisor, cpu, profile)` +combination on the self-hosted runner pool, generates the canonical +snapshot with +`cargo test --test snapshot_goldens -- generate `, and uploads each +OCI layout as a workflow artifact. A single publish job downloads them +all and pushes each with `oras cp` as the tag +`{version}-{hv}-{cpu}-{profile}`, then pushes the +`{version}-complete` marker. + +A lightweight `check-published` job gates the matrix. It reads `GOLDENS_VERSION` +from source and checks GHCR for the `{version}-complete` marker tag. If +the marker is present the version is fully published and the workflow +stops there, so re-running it, or merging an unrelated change, is a +no-op. The marker is pushed last by the publish job, which runs only +after every matrix job uploaded its snapshot, so a version counts as +published only as a whole set. This makes publishing idempotent, keeps +a complete baseline from being clobbered, and lets a run that follows a +partial push fill in the missing combinations. + +The workflow can also be dispatched manually. The `version` input must +equal `GOLDENS_VERSION` in the dispatched ref, which guards against +publishing a tag set the test binary would ignore. A manual dispatch +with `force: true` republishes a version that already exists, reserved +for recovering a corrupted or partial push. + +The push-triggered publish closes the window in which a pull request +that bumped the version needs the `regen-goldens` label. Once `main` +carries the bump and the publish lands, new pull requests pass on the +default pull-and-verify path. + +### Bootstrapping the first version + +The first publish runs through the normal path. The merge that adds +`goldens_version.rs` touches the file the push trigger watches, so the workflow +fires. `check-published` lists GHCR tags for the marker. An empty +registry returns an empty list, so the job publishes. The matrix lands +the first tag set and its marker. + +The first `oras` push creates the GHCR package +`ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens` on demand. The +organization must allow the Actions `GITHUB_TOKEN` to create packages. +A cold start that fails here means that setting is off. Turn it on and +re-run. + +To seed a version by hand, dispatch the workflow with `force: true` and +a `version` input equal to the `GOLDENS_VERSION` in the dispatched ref. + +## Adding a new check under the current ABI + +Adding a new entry to `CHECKS` does not change the snapshot ABI. It +does change the set of tags the verify test expects, so it requires a +minor `GOLDENS_VERSION` bump. + +Steps: + +1. Add the entry to `CHECKS` in + `src/hyperlight_host/tests/snapshot_goldens/`. +2. Bump `GOLDENS_VERSION` minor (e.g. `v1.2` to `v1.3`). The new prefix + has no published tags, so the default verify path fails until they + exist. +3. Apply the `regen-goldens` label to the pull request. The verify job + regenerates the full check set against the branch and runs it back + through the branch loader. See + [Breaking the format on a pull request](#breaking-the-format-on-a-pull-request). +4. Once the change lands, the new prefix is published per + [Publishing a new version](#publishing-a-new-version). The older + tag set stays on GHCR untouched. + +The older minor's tags can be deleted from GHCR once nothing depends +on them. + +## Verifying multiple golden versions + +The verify test pulls exactly one tag set, the one for the current +`GOLDENS_VERSION`. That covers the hard-break case (Option 3), where a +fresh tag set replaces the older one. + +The backwards-compatible case (Option 2) needs more. A v1 loader path +is only correct if real v1 goldens load against the new build, which +means verifying against multiple versions in the same run. + +The intended design is to replace the single `GOLDENS_VERSION` constant +with a slice of the supported major versions, e.g. +`pub const GOLDENS_VERSIONS: &[&str] = &["v1.3", "v2.0"];`, and have +the verify test run every check against every entry. Dropping an old +major is then a one-line removal from that slice. + +The single-version variant suffices for Option 3. Build the +multi-version variant the first time you take Option 2. diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index c663504dc..fe8c198af 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -109,6 +109,7 @@ metrics-util = "0.20.4" metrics-exporter-prometheus = { version = "0.18.3", default-features = false } serde_json = "1.0" hyperlight-component-macro = { workspace = true } +libtest-mimic = "0.8.2" [target.'cfg(windows)'.dev-dependencies] windows = { version = "0.62", features = [ @@ -144,3 +145,12 @@ build-metadata = ["dep:built"] [[bench]] name = "benchmarks" harness = false + +[[test]] +name = "snapshot_goldens" +path = "tests/snapshot_goldens/main.rs" +harness = false +# Excluded from `cargo test` so a normal run does not need the golden tests +# downloaded. A `--test '*'` glob still matches it, so callers name targets +# explicitly. +test = false diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/config.rs b/src/hyperlight_host/src/sandbox/snapshot/file/config.rs index 4e926a62a..107ccf164 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file/config.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file/config.rs @@ -696,3 +696,232 @@ mod tests { } } } + +#[cfg(test)] +mod schema_pin { + use super::*; + + const PINNED_CALL: &str = r#"{ + "hyperlight_version": "x.y.z", + "arch": "x86_64", + "abi_version": 1, + "hypervisor": "mshv", + "stack_top_gva": 3735928559, + "entrypoint_addr": 8192, + "sregs": { + "cs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ds": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "es": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "fs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "gs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ss": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "tr": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ldt": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "gdt": { + "base": 1, + "limit": 2 + }, + "idt": { + "base": 3, + "limit": 4 + }, + "cr0": 1, + "cr2": 2, + "cr4": 4, + "cr8": 5, + "efer": 6, + "apic_base": 7, + "interrupt_bitmap": [ + 8, + 9, + 10, + 11 + ] + }, + "layout": { + "input_data_size": 1, + "output_data_size": 2, + "heap_size": 3, + "code_size": 4, + "init_data_size": 5, + "init_data_permissions": null, + "scratch_size": 8, + "snapshot_size": 9, + "pt_size": null + }, + "memory_size": 65536, + "host_functions": [ + { + "function_name": "fn_void", + "parameter_types": [ + "bool" + ], + "return_type": "void" + } + ], + "snapshot_generation": 42 +}"#; + + const PINNED_ARCH: &str = r#"[ + "x86_64", + "aarch64" +]"#; + + const PINNED_HYPERVISOR: &str = r#"[ + "kvm", + "mshv", + "whp" +]"#; + + fn assert_round_trip(pinned: &str) { + let parsed: OciSnapshotConfig = + serde_json::from_str(pinned).expect("pinned JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!( + actual.trim(), + pinned.trim(), + "Snapshot config JSON schema changed. If the change can break \ + existing snapshots on disk, bump `MT_CONFIG_V1` in \ + `super::media_types` and follow `docs/snapshot-versioning.md`. \ + Either way, paste the actual output below into the matching \ + `PINNED_*`.\n\nactual:\n{actual}" + ); + } + + #[test] + fn call_round_trip() { + assert_round_trip(PINNED_CALL); + } + + #[test] + fn arch_variants_round_trip() { + let parsed: Vec = + serde_json::from_str(PINNED_ARCH).expect("pinned arch JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!(actual.trim(), PINNED_ARCH.trim(), "Arch variants changed."); + } + + #[test] + fn hypervisor_variants_round_trip() { + let parsed: Vec = serde_json::from_str(PINNED_HYPERVISOR) + .expect("pinned hypervisor JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!( + actual.trim(), + PINNED_HYPERVISOR.trim(), + "Hypervisor variants changed." + ); + } +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs b/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs index 0b3d64fba..31156a134 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs @@ -14,24 +14,20 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Media types are versioned by suffix. The loader matches each -// version specifically (no `_CURRENT` shortcut on the read side); the -// writer always emits `_CURRENT`. A new version is added by: -// -// 1. Declare `MT_FOO_V2` next to `MT_FOO_V1`. -// 2. Point `MT_FOO_CURRENT` at `MT_FOO_V2`. -// 3. Add a dispatch arm in the loader that converts v1 -> v2 (or -// rejects v1 if no compatibility window is offered). -pub(super) const MT_CONFIG_V1: &str = "application/vnd.hyperlight.snapshot.config.v1+json"; -pub(super) const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; -pub(super) const MT_SNAPSHOT_V1: &str = "application/vnd.hyperlight.snapshot.memory.v1"; -pub(super) const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; +// Media types are versioned by suffix. The writer emits `_CURRENT`. +// The loader matches each version explicitly. See +// docs/snapshot-versioning.md for how to add a version. +pub(in crate::sandbox::snapshot) const MT_CONFIG_V1: &str = + "application/vnd.hyperlight.snapshot.config.v1+json"; +pub(in crate::sandbox::snapshot) const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; +pub(in crate::sandbox::snapshot) const MT_SNAPSHOT_V1: &str = + "application/vnd.hyperlight.snapshot.memory.v1"; +pub(in crate::sandbox::snapshot) const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; -/// ABI version for the snapshot memory blob. Bumped whenever the -/// host-guest contract for the bytes inside the snapshot blob changes -/// (PEB layout, calling convention, init state, etc.). Independent of -/// the config blob's media-type version. -pub(super) const SNAPSHOT_ABI_VERSION: u32 = 1; +/// ABI version for the snapshot memory blob. Bumped when the +/// host-guest contract for the snapshot bytes changes. See +/// docs/snapshot-versioning.md. +pub(in crate::sandbox::snapshot) const SNAPSHOT_ABI_VERSION: u32 = 1; /// OCI standard annotation key for a manifest's tag inside an image /// index. Set on the manifest descriptor in `index.json`, not on the diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs index c95c129e6..59ecb82cb 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs @@ -35,9 +35,9 @@ use oci_spec::image::{ use self::config::{Arch, HostFunction, Hypervisor, MemoryLayout, OciSnapshotConfig, Sregs}; use self::digest::{Digest256, oci_digest, parse_oci_digest, verify_blob_bytes, verify_blob_file}; use self::fsutil::{put_blob, put_blob_if_absent, read_bounded, replace_file_atomic}; -use self::media_types::{ - ANNOTATION_ARCH, ANNOTATION_HYPERVISOR, ANNOTATION_REF_NAME, MT_CONFIG_CURRENT, MT_CONFIG_V1, - MT_SNAPSHOT_CURRENT, MT_SNAPSHOT_V1, SNAPSHOT_ABI_VERSION, +use self::media_types::{ANNOTATION_ARCH, ANNOTATION_HYPERVISOR, ANNOTATION_REF_NAME}; +pub(super) use self::media_types::{ + MT_CONFIG_CURRENT, MT_CONFIG_V1, MT_SNAPSHOT_CURRENT, MT_SNAPSHOT_V1, SNAPSHOT_ABI_VERSION, }; use self::reference::{OciDigest, OciReference, OciTag}; use super::{NextAction, Snapshot}; @@ -46,7 +46,7 @@ use crate::mem::layout::SandboxMemoryLayout; use crate::mem::memory_region::MemoryRegionFlags; use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; -const OCI_LAYOUT_VERSION: &str = "1.0.0"; +pub(super) const OCI_LAYOUT_VERSION: &str = "1.0.0"; /// Maximum size of any JSON blob read from disk during load: /// `oci-layout`, `index.json`, the OCI image manifest, and the diff --git a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs index 9383d1b8b..56f0cd1f2 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs @@ -2784,3 +2784,146 @@ fn read_blob_dir( }) .collect() } + +// ============================================================================= +// `from_snapshot` config plumbing. +// ============================================================================= +// +// `from_snapshot` accepts a caller-supplied `SandboxConfiguration`. +// Layout fields must be silently overridden by the snapshot (the +// on-disk memory blob already encodes those sizes). Runtime fields +// must take effect. + +/// Layout fields supplied via `SandboxConfiguration` must be silently +/// overridden. The snapshot's own layout is authoritative. +#[test] +fn from_snapshot_silently_ignores_layout_overrides() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let original_input = snapshot.layout().input_data_size; + let original_output = snapshot.layout().output_data_size; + let original_heap = snapshot.layout().heap_size; + let original_scratch = snapshot.layout().get_scratch_size(); + + let mut config = SandboxConfiguration::default(); + config.set_input_data_size(original_input * 2); + config.set_output_data_size(original_output * 2); + config.set_heap_size((original_heap as u64) * 2); + config.set_scratch_size(original_scratch * 2); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot.clone(), HostFunctions::default(), Some(config)) + .unwrap(); + + sbox2.call::("GetStatic", ()).unwrap(); + + let new_snap = sbox2.snapshot().unwrap(); + assert_eq!(new_snap.layout().input_data_size, original_input); + assert_eq!(new_snap.layout().output_data_size, original_output); + assert_eq!(new_snap.layout().heap_size, original_heap); + assert_eq!(new_snap.layout().get_scratch_size(), original_scratch); +} + +/// `from_snapshot` honors `guest_core_dump=true` so that +/// `generate_crashdump_to_dir` writes a file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_enabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(true); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + !entries.is_empty(), + "expected core dump file when guest_core_dump=true" + ); +} + +/// `from_snapshot` honors `guest_core_dump=false` so that +/// `generate_crashdump_to_dir` produces no file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_disabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(false); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + entries.is_empty(), + "expected no core dump file when guest_core_dump=false, found {:?}", + entries.iter().map(|e| e.path()).collect::>() + ); +} + +/// Non-default `init_data_permissions` survive an OCI round-trip +/// byte-for-byte. The default code path uses `READ`, so this pins +/// `READ | WRITE` instead. A regression in the permission +/// serialisation would silently downgrade or upgrade access to the +/// init_data region. +#[test] +fn round_trip_preserves_non_default_init_data_permissions() { + use crate::mem::memory_region::MemoryRegionFlags; + use crate::sandbox::uninitialized::{GuestBlob, GuestEnvironment}; + + let path = simple_guest_as_string().unwrap(); + let data: &[u8] = b"perm-pinned-init-data"; + let env = GuestEnvironment { + guest_binary: GuestBinary::FilePath(path), + init_data: Some(GuestBlob { + data, + permissions: MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, + }), + }; + let mut sbox = UninitializedSandbox::new(env, None) + .unwrap() + .evolve() + .unwrap(); + let snap = sbox.snapshot().unwrap(); + let expected = snap.layout().init_data_permissions; + assert_eq!( + expected, + Some(MemoryRegionFlags::READ | MemoryRegionFlags::WRITE), + "fixture must produce non-default init_data_permissions", + ); + + let dir = tempfile::tempdir().unwrap(); + let oci_dir = dir.path().join("layout"); + snap.save(&oci_dir, &OciTag::new("latest").unwrap()) + .unwrap(); + let loaded = Snapshot::checked_load(&oci_dir, OciTag::new("latest").unwrap()).unwrap(); + assert_eq!(loaded.layout().init_data_permissions, expected); +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index c9ec426b4..155195475 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -16,6 +16,7 @@ limitations under the License. mod file; mod file_tests; +mod tripwires; use std::collections::HashMap; diff --git a/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs b/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs new file mode 100644 index 000000000..41991656b --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs @@ -0,0 +1,75 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Compile-time tripwires for the snapshot ABI. +//! +//! Each assertion pins one piece of the contract that snapshots +//! depend on: the manifest media types, the OCI Image Layout version, +//! the `HyperlightPEB` field offsets, and the `OutBAction` port +//! numbers. A change to any of these breaks loading of older +//! snapshots. +//! +//! When an assertion fires, see `docs/snapshot-versioning.md`. + +use super::file::{ + MT_CONFIG_CURRENT, MT_SNAPSHOT_CURRENT, OCI_LAYOUT_VERSION, SNAPSHOT_ABI_VERSION, +}; + +const EXPECTED_ABI_VERSION: u32 = 1; +const EXPECTED_MT_CONFIG: &str = "application/vnd.hyperlight.snapshot.config.v1+json"; +const EXPECTED_MT_SNAPSHOT: &str = "application/vnd.hyperlight.snapshot.memory.v1"; +const EXPECTED_OCI_LAYOUT_VERSION: &str = "1.0.0"; + +const _: () = { + assert!(SNAPSHOT_ABI_VERSION == EXPECTED_ABI_VERSION); + assert!(str_eq(MT_CONFIG_CURRENT, EXPECTED_MT_CONFIG)); + assert!(str_eq(MT_SNAPSHOT_CURRENT, EXPECTED_MT_SNAPSHOT)); + assert!(str_eq(OCI_LAYOUT_VERSION, EXPECTED_OCI_LAYOUT_VERSION)); +}; + +const _: () = { + use hyperlight_common::mem::{GuestMemoryRegion, HyperlightPEB}; + assert!(std::mem::size_of::() == 16); + assert!(std::mem::size_of::() == 4 * 16); + assert!(std::mem::offset_of!(HyperlightPEB, input_stack) == 0); + assert!(std::mem::offset_of!(HyperlightPEB, output_stack) == 16); + assert!(std::mem::offset_of!(HyperlightPEB, init_data) == 32); + assert!(std::mem::offset_of!(HyperlightPEB, guest_heap) == 48); +}; + +const _: () = { + use hyperlight_common::outb::OutBAction; + assert!(OutBAction::Log as u16 == 99); + assert!(OutBAction::CallFunction as u16 == 101); + assert!(OutBAction::Abort as u16 == 102); + assert!(OutBAction::DebugPrint as u16 == 103); +}; + +const fn str_eq(a: &str, b: &str) -> bool { + let a = a.as_bytes(); + let b = b.as_bytes(); + if a.len() != b.len() { + return false; + } + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + true +} diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 49f9d2cf2..eaa3941ab 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -535,7 +535,9 @@ fn guest_malloc_abort() { }); // allocate a vector (on heap) that is bigger than the heap - let heap_size = 0x4000; + // Guest init registers every guest function into a heap map. + // 0x6000 leaves room for that so the sandbox can start. + let heap_size = 0x6000; let size_to_allocate = 0x10000; assert!( size_to_allocate > heap_size, @@ -616,7 +618,9 @@ fn corrupt_output_back_pointer_rejected() { #[test] fn guest_panic_no_alloc() { - let heap_size = 0x4000; + // Guest init registers every guest function into a heap map. + // 0x6000 leaves room for that so the sandbox can start. + let heap_size = 0x6000; let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(heap_size); diff --git a/src/hyperlight_host/tests/snapshot_goldens/checks.rs b/src/hyperlight_host/tests/snapshot_goldens/checks.rs new file mode 100644 index 000000000..912ee1b36 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/checks.rs @@ -0,0 +1,270 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Functional checks against goldens loaded from the on-disk goldens +//! directory. +//! +//! Each check runs against a fresh `MultiUseSandbox` built from +//! the golden, so checks are independent and one failure does not +//! poison the next. See `docs/snapshot-versioning.md` for how to +//! add a check. + +use std::sync::Arc; + +use hyperlight_host::sandbox::snapshot::{OciTag, Snapshot}; +use hyperlight_host::{HostFunctions, MultiUseSandbox}; + +use crate::fixtures::{CALL_COUNTER_BUMP, HEAP_PATTERN_LEN, register_host_echo_fns}; + +pub struct Check { + pub name: &'static str, + pub run: fn(&mut MultiUseSandbox) -> Result<(), String>, +} + +pub const CHECKS: &[Check] = &[ + Check { + name: "captured_bss", + run: captured_bss, + }, + Check { + name: "captured_heap_pattern", + run: captured_heap_pattern, + }, + Check { + name: "guest_types_round_trip", + run: guest_types_round_trip, + }, + Check { + name: "host_round_trips", + run: host_round_trips, + }, + Check { + name: "chained_snapshot", + run: chained_snapshot, + }, +]; + +/// Captured BSS restores exactly: `COUNTER == CALL_COUNTER_BUMP`. +/// Covers the dispatch convention, sregs apply, page-table +/// relocation, captured stack/BSS. +fn captured_bss(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let value: i32 = sbox + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic: {e}"))?; + if value != CALL_COUNTER_BUMP { + return Err(format!( + "captured COUNTER expected {CALL_COUNTER_BUMP}, got {value}", + )); + } + Ok(()) +} + +/// Captured heap state restores exactly: the pinned `Vec` +/// pattern produced by `AllocAndWritePattern` survives across +/// save/load. +fn captured_heap_pattern(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let got: Vec = sbox + .call("ReadPattern", ()) + .map_err(|e| format!("ReadPattern: {e}"))?; + let expected: Vec = (0..HEAP_PATTERN_LEN as usize) + .map(|i| (i & 0xff) as u8) + .collect(); + if got != expected { + return Err(format!( + "captured heap pattern mismatch (got len {} expected len {})", + got.len(), + expected.len(), + )); + } + Ok(()) +} + +/// Guest-call wire format for every primitive parameter and return +/// type. Each loop asserts an `EchoT` round-trips. Float NaN goes +/// through `is_nan` since `NaN != NaN`. +fn guest_types_round_trip(sbox: &mut MultiUseSandbox) -> Result<(), String> { + macro_rules! echo { + ($name:expr, $ty:ty, $values:expr) => {{ + for &v in $values.iter() { + let got: $ty = sbox + .call($name, v) + .map_err(|e| format!("{}({:?}): {e}", $name, v))?; + if got != v { + return Err(format!("{}({:?}) returned {:?}", $name, v, got)); + } + } + }}; + } + echo!("EchoI32", i32, [i32::MIN, -1, 0, 1, i32::MAX]); + echo!("EchoU32", u32, [0u32, 1, u32::MAX]); + echo!("EchoI64", i64, [i64::MIN, -1, 0, 1, i64::MAX]); + echo!("EchoU64", u64, [0u64, 1, u64::MAX]); + echo!( + "EchoFloat", + f32, + [ + 0.0f32, + -1.5, + 1.5, + f32::MIN, + f32::MAX, + f32::INFINITY, + f32::NEG_INFINITY, + ] + ); + let got: f32 = sbox + .call("EchoFloat", f32::NAN) + .map_err(|e| format!("EchoFloat(NaN): {e}"))?; + if !got.is_nan() { + return Err(format!("EchoFloat(NaN) returned {got}")); + } + echo!( + "EchoDouble", + f64, + [ + 0.0f64, + -1.5, + 1.5, + f64::MIN, + f64::MAX, + f64::INFINITY, + f64::NEG_INFINITY, + ] + ); + let got: f64 = sbox + .call("EchoDouble", f64::NAN) + .map_err(|e| format!("EchoDouble(NaN): {e}"))?; + if !got.is_nan() { + return Err(format!("EchoDouble(NaN) returned {got}")); + } + echo!("EchoBool", bool, [false, true]); + + for v in [String::new(), "hello".to_string(), "héllo 🌍".to_string()] { + let got: String = sbox + .call("Echo", v.clone()) + .map_err(|e| format!("Echo({v:?}): {e}"))?; + if got != v { + return Err(format!("Echo({v:?}) returned {got:?}")); + } + } + for v in [ + Vec::::new(), + vec![0u8, 1, 2, 3, 0xff], + (0..256u32).map(|i| (i & 0xff) as u8).collect::>(), + ] { + let got: Vec = sbox + .call("GetSizePrefixedBuffer", v.clone()) + .map_err(|e| format!("GetSizePrefixedBuffer(len={}): {e}", v.len()))?; + if got != v { + return Err(format!( + "GetSizePrefixedBuffer(len={}) did not round-trip", + v.len(), + )); + } + } + let _: () = sbox.call("NoOp", ()).map_err(|e| format!("NoOp: {e}"))?; + let mixed: i32 = sbox + .call( + "PrintElevenArgs", + ( + "a".to_string(), + 1i32, + 2i64, + "b".to_string(), + "c".to_string(), + true, + false, + 3u32, + 4u64, + 5i32, + 6.5f32, + ), + ) + .map_err(|e| format!("PrintElevenArgs: {e}"))?; + if mixed < 0 { + return Err(format!("PrintElevenArgs returned {mixed}")); + } + Ok(()) +} + +/// Host-call wire format for every primitive parameter and return +/// type. Each `RoundTripHostT` invokes the matching `HostEchoT` on +/// the registered host-fn set. +fn host_round_trips(sbox: &mut MultiUseSandbox) -> Result<(), String> { + macro_rules! rt { + ($name:expr, $ty:ty, $value:expr) => {{ + let v: $ty = $value; + let got: $ty = sbox + .call($name, v.clone()) + .map_err(|e| format!("{}({:?}): {e}", $name, v))?; + if got != v { + return Err(format!("{}({:?}) returned {:?}", $name, v, got)); + } + }}; + } + rt!("RoundTripHostI32", i32, -7); + rt!("RoundTripHostU32", u32, 0xdead_beef); + rt!("RoundTripHostI64", i64, i64::MIN); + rt!("RoundTripHostU64", u64, u64::MAX); + rt!("RoundTripHostF32", f32, -1.25); + rt!("RoundTripHostF64", f64, 1234.5); + rt!("RoundTripHostBool", bool, false); + rt!("RoundTripHostString", String, "round-trip".to_string()); + rt!("RoundTripHostVecBytes", Vec, vec![0u8, 1, 2, 3, 0xff]); + let _: () = sbox + .call("RoundTripHostNoOp", ()) + .map_err(|e| format!("RoundTripHostNoOp: {e}"))?; + Ok(()) +} + +/// Snapshot-from-loaded-snapshot path. Mutates state on the loaded +/// golden, takes a fresh snapshot, round-trips it through an +/// OCI layout on disk, and asserts the mutation survives. +fn chained_snapshot(sbox: &mut MultiUseSandbox) -> Result<(), String> { + let val: i32 = sbox + .call("AddToStatic", 5i32) + .map_err(|e| format!("AddToStatic: {e}"))?; + if val != CALL_COUNTER_BUMP + 5 { + return Err(format!( + "AddToStatic returned {val}, expected {}", + CALL_COUNTER_BUMP + 5, + )); + } + let snap = sbox + .snapshot() + .map_err(|e| format!("take chained snapshot: {e}"))?; + + let tmp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?; + let layout = tmp.path().join("chained"); + let tag = OciTag::new("chained").map_err(|e| format!("tag: {e}"))?; + snap.save(&layout, &tag).map_err(|e| format!("save: {e}"))?; + + let loaded = Snapshot::checked_load(&layout, tag).map_err(|e| format!("checked_load: {e}"))?; + let mut funcs = HostFunctions::default(); + register_host_echo_fns(&mut funcs); + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None) + .map_err(|e| format!("from_snapshot: {e}"))?; + let val: i32 = sbox2 + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic on chained: {e}"))?; + if val != CALL_COUNTER_BUMP + 5 { + return Err(format!( + "chained snapshot observed COUNTER={val}, expected {}", + CALL_COUNTER_BUMP + 5, + )); + } + Ok(()) +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs b/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs new file mode 100644 index 000000000..f45e19a76 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs @@ -0,0 +1,127 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Canonical fixture builders. These define exactly what bytes a +//! goldens push contains. Any change here is a snapshot content +//! change and requires a goldens regen. + +use std::sync::Arc; + +use hyperlight_host::func::Registerable; +use hyperlight_host::sandbox::SandboxConfiguration; +use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::{GuestBinary, MultiUseSandbox, UninitializedSandbox}; +use hyperlight_testing::simple_guest_as_string; + +/// Heap pattern length used by the golden. Small enough to +/// stay cheap, large enough to exercise non-trivial heap state. +pub const HEAP_PATTERN_LEN: u64 = 1024; + +/// Value the captured `COUNTER` static must hold in the golden. +/// Set by `AddToStatic(CALL_COUNTER_BUMP)` at generate time. +pub const CALL_COUNTER_BUMP: i32 = 42; + +/// Canonical `SandboxConfiguration` used to produce the goldens. +/// Layout knobs are deliberately bumped away from defaults so any +/// silent arithmetic change in `SandboxMemoryLayout::new` shifts at +/// least one region between generate-time and load-time. +fn golden_config() -> SandboxConfiguration { + let mut cfg = SandboxConfiguration::default(); + cfg.set_input_data_size(64 * 1024); + cfg.set_output_data_size(64 * 1024); + cfg.set_heap_size(256 * 1024); + cfg.set_scratch_size(512 * 1024); + cfg +} + +fn simpleguest_path() -> String { + simple_guest_as_string().expect("simpleguest_path") +} + +pub fn generate() -> Arc { + let mut u = UninitializedSandbox::new( + GuestBinary::FilePath(simpleguest_path()), + Some(golden_config()), + ) + .expect("UninitializedSandbox::new"); + register_host_echo_fns(&mut u); + let mut sbox = u.evolve().expect("evolve"); + run_canonical_calls(&mut sbox); + sbox.snapshot().expect("snapshot") +} + +/// Deterministic sequence of guest calls that mutate captured state +/// before snapshotting. Each call lands a specific bit of state +/// (BSS, heap, host-call wiring) that one of the per-surface +/// checks then asserts on after the golden is loaded. +fn run_canonical_calls(sbox: &mut MultiUseSandbox) { + let bumped: i32 = sbox + .call("AddToStatic", CALL_COUNTER_BUMP) + .expect("AddToStatic"); + assert_eq!(bumped, CALL_COUNTER_BUMP); + + let _: () = sbox + .call("AllocAndWritePattern", HEAP_PATTERN_LEN) + .expect("AllocAndWritePattern"); + + // Drive every host fn once so the captured host_function_details + // blob has known signatures and dispatch regressions surface at + // generate time. + sbox.call::("RoundTripHostI32", 1234i32) + .expect("RTH i32"); + sbox.call::("RoundTripHostU32", 4321u32) + .expect("RTH u32"); + sbox.call::("RoundTripHostI64", -42i64) + .expect("RTH i64"); + sbox.call::("RoundTripHostU64", 1u64 << 40) + .expect("RTH u64"); + sbox.call::("RoundTripHostF32", 3.5f32) + .expect("RTH f32"); + sbox.call::("RoundTripHostF64", -2.25f64) + .expect("RTH f64"); + sbox.call::("RoundTripHostBool", true) + .expect("RTH bool"); + sbox.call::("RoundTripHostString", "hi".to_string()) + .expect("RTH string"); + sbox.call::>("RoundTripHostVecBytes", vec![1u8, 2, 3]) + .expect("RTH vec"); + sbox.call::<()>("RoundTripHostNoOp", ()).expect("RTH noop"); +} + +/// Register the `HostEcho*` family used by the golden. Used at +/// both generate and load time so the registered set matches the +/// captured `host_function_details`. +pub fn register_host_echo_fns(r: &mut R) { + r.register_host_function("HostEchoI32", |v: i32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoU32", |v: u32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoI64", |v: i64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoU64", |v: u64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoF32", |v: f32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoF64", |v: f64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoBool", |v: bool| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoString", |v: String| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoVecBytes", |v: Vec| Ok(v)) + .unwrap(); + r.register_host_function("HostNoOp", || Ok(())).unwrap(); +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs b/src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs new file mode 100644 index 000000000..2d911434b --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs @@ -0,0 +1,24 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! The goldens version string, kept in its own file. +//! +//! The `.github/workflows/RegenSnapshotGoldens.yml` path filter watches +//! this file, so a version bump is the only edit that triggers a +//! publish. See `docs/snapshot-versioning.md`. + +/// Goldens version, a `vMAJOR.MINOR` string. +pub const GOLDENS_VERSION: &str = "v1.0"; diff --git a/src/hyperlight_host/tests/snapshot_goldens/main.rs b/src/hyperlight_host/tests/snapshot_goldens/main.rs new file mode 100644 index 000000000..ae3d83830 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/main.rs @@ -0,0 +1,125 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Snapshot goldens custom-harness test binary. +//! +//! Default mode runs the libtest-mimic harness with one trial per +//! row in `checks::CHECKS`, loading the golden from +//! `target/snapshot-goldens/{version}/{tag}/`. The +//! `generate [out-dir]` subcommand writes the canonical snapshot +//! for the local platform as an OCI Image Layout under `out-dir`, +//! defaulting to the verify directory for a local round-trip. +//! +//! Populate the directory with `just snapshot-goldens-pull` or +//! `just snapshot-goldens-generate`. + +use std::path::{Path, PathBuf}; +use std::process::ExitCode; +use std::sync::Arc; + +use hyperlight_host::sandbox::snapshot::{OciTag, Snapshot}; +use hyperlight_host::{HostFunctions, MultiUseSandbox}; +use libtest_mimic::{Arguments, Failed, Trial}; + +mod checks; +mod fixtures; +mod goldens_version; +mod oci; +mod platform; + +use checks::Check; +use platform::Platform; + +fn main() -> ExitCode { + let mut argv = std::env::args().skip(1); + if argv.next().as_deref() == Some("generate") { + let out = argv + .next() + .map(PathBuf::from) + .unwrap_or_else(oci::goldens_root); + return run_generate(&out); + } + run_verify() +} + +fn run_verify() -> ExitCode { + let args = Arguments::from_args(); + let Some(platform) = Platform::detect() else { + eprintln!("snapshot goldens: no (hypervisor, cpu, profile) platform detected on this host",); + return ExitCode::FAILURE; + }; + println!( + "snapshot goldens: verifying platform={} version={}", + platform.suffix(), + goldens_version::GOLDENS_VERSION, + ); + let trials = checks::CHECKS.iter().map(|c| trial(&platform, c)).collect(); + libtest_mimic::run(&args, trials).exit_code() +} + +fn trial(platform: &Platform, check: &'static Check) -> Trial { + let tag = platform.tag(); + Trial::test(check.name, move || { + let dir = oci::golden_dir(&tag).map_err(Failed::from)?; + let mut sbox = load_sandbox(&dir, &tag).map_err(Failed::from)?; + (check.run)(&mut sbox).map_err(Failed::from) + }) +} + +fn load_sandbox(golden_dir: &Path, tag: &str) -> Result { + let reference = OciTag::new(tag).map_err(|e| format!("invalid golden tag {tag}: {e}"))?; + let snap = Snapshot::checked_load(golden_dir, reference) + .map_err(|e| format!("Snapshot::checked_load({tag}): {e}"))?; + let mut funcs = HostFunctions::default(); + fixtures::register_host_echo_fns(&mut funcs); + MultiUseSandbox::from_snapshot(Arc::new(snap), funcs, None) + .map_err(|e| format!("MultiUseSandbox::from_snapshot({tag}): {e}")) +} + +fn run_generate(out_dir: &Path) -> ExitCode { + let Some(platform) = Platform::detect() else { + eprintln!( + "snapshot goldens: generate: no (hypervisor, cpu, profile) platform detected on this host", + ); + return ExitCode::FAILURE; + }; + if let Err(e) = std::fs::create_dir_all(out_dir) { + eprintln!("snapshot goldens: generate: create {out_dir:?}: {e}"); + return ExitCode::FAILURE; + } + println!( + "snapshot goldens: generating platform={} version={} into {}", + platform.suffix(), + goldens_version::GOLDENS_VERSION, + out_dir.display(), + ); + let tag = platform.tag(); + let oci_tag = match OciTag::new(&tag) { + Ok(t) => t, + Err(e) => { + eprintln!("snapshot goldens: generate: invalid tag {tag}: {e}"); + return ExitCode::FAILURE; + } + }; + let dir = out_dir.join(&tag); + let snap = fixtures::generate(); + if let Err(e) = snap.save(&dir, &oci_tag) { + eprintln!("snapshot goldens: generate: save({tag}): {e}"); + return ExitCode::FAILURE; + } + println!(" wrote {tag} -> {}", dir.display()); + ExitCode::SUCCESS +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/oci.rs b/src/hyperlight_host/tests/snapshot_goldens/oci.rs new file mode 100644 index 000000000..6cb3f9579 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/oci.rs @@ -0,0 +1,52 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::path::PathBuf; + +use crate::goldens_version::GOLDENS_VERSION; + +pub fn goldens_root() -> PathBuf { + // Workspace target dir is two levels up from this crate. + let target = std::env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| { + let raw = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("target"); + std::fs::canonicalize(&raw).unwrap_or(raw) + }); + target.join("snapshot-goldens").join(GOLDENS_VERSION) +} + +fn goldens_dir_for(tag: &str) -> PathBuf { + goldens_root().join(tag) +} + +/// Locate the golden OCI Image Layout for `tag` in the local +/// directory. A missing layout is an error with guidance to populate +/// it. +pub fn golden_dir(tag: &str) -> Result { + let dir = goldens_dir_for(tag); + if dir.join("oci-layout").is_file() { + return Ok(dir); + } + Err(format!( + "no golden OCI layout found at {dir:?} for tag `{tag}`. \ + Run `just snapshot-goldens-pull` to fetch the published goldens, \ + or `just snapshot-goldens-generate` to regenerate them locally.", + )) +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/platform.rs b/src/hyperlight_host/tests/snapshot_goldens/platform.rs new file mode 100644 index 000000000..bf0ab2f01 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/platform.rs @@ -0,0 +1,158 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Local platform detection and tag naming for snapshot goldens. +//! +//! A snapshot is not portable across `(hypervisor, cpu vendor, +//! build profile)`. Each such triple gets its own tag, named +//! `{GOLDENS_VERSION}-{hv}-{cpu}-{profile}`. + +use crate::goldens_version::GOLDENS_VERSION; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Hypervisor { + Kvm, + Mshv, + #[cfg_attr(not(target_os = "windows"), allow(dead_code))] + Whp, +} + +impl Hypervisor { + fn as_str(self) -> &'static str { + match self { + Self::Kvm => "kvm", + Self::Mshv => "mshv", + Self::Whp => "whp", + } + } + + /// Detect the locally available hypervisor. Order matches the + /// host crate's preference: `/dev/mshv` over `/dev/kvm` on + /// Linux, WHP on Windows. + fn detect() -> Option { + #[cfg(target_os = "linux")] + { + if std::path::Path::new("/dev/mshv").exists() { + return Some(Self::Mshv); + } + if std::path::Path::new("/dev/kvm").exists() { + return Some(Self::Kvm); + } + None + } + #[cfg(target_os = "windows")] + { + Some(Self::Whp) + } + #[cfg(not(any(target_os = "linux", target_os = "windows")))] + { + None + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum CpuVendor { + Intel, + Amd, +} + +impl CpuVendor { + fn as_str(self) -> &'static str { + match self { + Self::Intel => "intel", + Self::Amd => "amd", + } + } + + /// Detect the local CPU vendor via the `0` leaf of `cpuid`. + /// Returns `None` on non-`x86_64` targets or unknown vendor + /// strings. + fn detect() -> Option { + #[cfg(target_arch = "x86_64")] + { + let r = core::arch::x86_64::__cpuid(0); + let mut bytes = [0u8; 12]; + bytes[0..4].copy_from_slice(&r.ebx.to_le_bytes()); + bytes[4..8].copy_from_slice(&r.edx.to_le_bytes()); + bytes[8..12].copy_from_slice(&r.ecx.to_le_bytes()); + match &bytes { + b"GenuineIntel" => Some(Self::Intel), + b"AuthenticAMD" => Some(Self::Amd), + _ => None, + } + } + #[cfg(not(target_arch = "x86_64"))] + { + None + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Profile { + Debug, + Release, +} + +impl Profile { + fn as_str(self) -> &'static str { + match self { + Self::Debug => "debug", + Self::Release => "release", + } + } + + fn detect() -> Self { + if cfg!(debug_assertions) { + Self::Debug + } else { + Self::Release + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Platform { + hv: Hypervisor, + cpu: CpuVendor, + profile: Profile, +} + +impl Platform { + pub fn detect() -> Option { + Some(Self { + hv: Hypervisor::detect()?, + cpu: CpuVendor::detect()?, + profile: Profile::detect(), + }) + } + + pub fn suffix(&self) -> String { + // The `snapshot-goldens-pull` recipe in the Justfile rebuilds this + // same `{hv}-{cpu}-{profile}` string in bash. Keep both in sync. + format!( + "{}-{}-{}", + self.hv.as_str(), + self.cpu.as_str(), + self.profile.as_str(), + ) + } + + pub fn tag(&self) -> String { + format!("{}-{}", GOLDENS_VERSION, self.suffix()) + } +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 1e58d1cbe..6a37aea53 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -437,6 +437,132 @@ fn get_size_prefixed_buffer(data: Vec) -> Vec { data } +#[guest_function("EchoI32")] +fn echo_i32(v: i32) -> i32 { + v +} + +#[guest_function("EchoU32")] +fn echo_u32(v: u32) -> u32 { + v +} + +#[guest_function("EchoI64")] +fn echo_i64(v: i64) -> i64 { + v +} + +#[guest_function("EchoU64")] +fn echo_u64(v: u64) -> u64 { + v +} + +#[guest_function("EchoBool")] +fn echo_bool(v: bool) -> bool { + v +} + +#[guest_function("NoOp")] +fn no_op() {} + +#[host_function("HostEchoI32")] +fn host_echo_i32(v: i32) -> Result; + +#[host_function("HostEchoU32")] +fn host_echo_u32(v: u32) -> Result; + +#[host_function("HostEchoI64")] +fn host_echo_i64(v: i64) -> Result; + +#[host_function("HostEchoU64")] +fn host_echo_u64(v: u64) -> Result; + +#[host_function("HostEchoF32")] +fn host_echo_f32(v: f32) -> Result; + +#[host_function("HostEchoF64")] +fn host_echo_f64(v: f64) -> Result; + +#[host_function("HostEchoBool")] +fn host_echo_bool(v: bool) -> Result; + +#[host_function("HostEchoString")] +fn host_echo_string(v: String) -> Result; + +#[host_function("HostEchoVecBytes")] +fn host_echo_vec_bytes(v: Vec) -> Result>; + +#[host_function("HostNoOp")] +fn host_noop() -> Result<()>; + +#[guest_function("RoundTripHostI32")] +fn round_trip_host_i32(v: i32) -> Result { + host_echo_i32(v) +} + +#[guest_function("RoundTripHostU32")] +fn round_trip_host_u32(v: u32) -> Result { + host_echo_u32(v) +} + +#[guest_function("RoundTripHostI64")] +fn round_trip_host_i64(v: i64) -> Result { + host_echo_i64(v) +} + +#[guest_function("RoundTripHostU64")] +fn round_trip_host_u64(v: u64) -> Result { + host_echo_u64(v) +} + +#[guest_function("RoundTripHostF32")] +fn round_trip_host_f32(v: f32) -> Result { + host_echo_f32(v) +} + +#[guest_function("RoundTripHostF64")] +fn round_trip_host_f64(v: f64) -> Result { + host_echo_f64(v) +} + +#[guest_function("RoundTripHostBool")] +fn round_trip_host_bool(v: bool) -> Result { + host_echo_bool(v) +} + +#[guest_function("RoundTripHostString")] +fn round_trip_host_string(v: String) -> Result { + host_echo_string(v) +} + +#[guest_function("RoundTripHostVecBytes")] +fn round_trip_host_vec_bytes(v: Vec) -> Result> { + host_echo_vec_bytes(v) +} + +#[guest_function("RoundTripHostNoOp")] +fn round_trip_host_noop() -> Result<()> { + host_noop() +} + +static mut HEAP_PATTERN: Option> = None; + +#[guest_function("AllocAndWritePattern")] +fn alloc_and_write_pattern(len: u64) { + let v: Vec = (0..len as usize).map(|i| (i & 0xff) as u8).collect(); + // SAFETY: the guest is single threaded, so the static has no concurrent access. + unsafe { HEAP_PATTERN = Some(v) }; +} + +#[guest_function("ReadPattern")] +fn read_pattern() -> Vec { + // SAFETY: the guest is single threaded, so the static has no concurrent access. + #[allow(static_mut_refs)] + unsafe { + HEAP_PATTERN.clone().unwrap_or_default() + } +} + #[expect( clippy::empty_loop, reason = "This function is used to keep the CPU busy"