diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da241526e..728c611a3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: key: custom-out-${{ runner.os }}-${{ github.job }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('**/litebox_syscall_rewriter/**/*.rs') }} - run: ./.github/tools/github_actions_run_cargo fmt - run: | - ./.github/tools/github_actions_run_cargo clippy --all-targets --all-features --workspace --exclude litebox_platform_lvbs --exclude litebox_runner_lvbs --exclude litebox_runner_optee_on_linux_userland --exclude litebox_runner_snp + ./.github/tools/github_actions_run_cargo clippy --all-targets --all-features --workspace --exclude litebox_platform_lvbs --exclude litebox_runner_lvbs --exclude litebox_runner_optee_on_linux_userland --exclude litebox_runner_snp --exclude litebox_platform_kernel --exclude litebox_runner_optee_on_machine ./.github/tools/github_actions_run_cargo clippy --all-targets --all-features -p litebox_runner_optee_on_linux_userland # We exclude `litebox_platform_lvbs` and `litebox_runner_lvbs` because we cannot build them with a stable toolchain. # They depend on the unstable `abi_x86_interrupt` feature. `build_and_test_nightly` cover them. @@ -67,7 +67,7 @@ jobs: # aren't included in nextest at the moment. See relevant discussion at # https://github.com/nextest-rs/nextest/issues/16 - name: Build documentation (fail on warnings) - run: ./.github/tools/github_actions_run_cargo doc --no-deps --all-features --document-private-items --workspace --exclude litebox_platform_lvbs --exclude litebox_runner_lvbs --exclude litebox_runner_snp + run: ./.github/tools/github_actions_run_cargo doc --no-deps --all-features --document-private-items --workspace --exclude litebox_platform_lvbs --exclude litebox_runner_lvbs --exclude litebox_runner_snp --exclude litebox_platform_kernel --exclude litebox_runner_optee_on_machine build_and_test_32bit: name: Build and Test (32-bit) @@ -194,6 +194,36 @@ jobs: - name: Build documentation (fail on warnings) run: ./.github/tools/github_actions_run_cargo doc --no-deps --all-features --document-private-items + build_and_test_qemu: + name: Build and Test boot image with QEMU + runs-on: ubuntu-latest + env: + RUSTFLAGS: -Dwarnings + steps: + - name: Check out repo + uses: actions/checkout@v4 + - run: sudo apt update && sudo apt install qemu-system-x86 + - name: Set up Rust + run: | + RUST_CHANNEL=$(awk -F'"' '/channel/{print $2}' litebox_runner_optee_on_machine/rust-toolchain.toml) + rustup toolchain install ${RUST_CHANNEL} --profile minimal --no-self-update --component rustfmt,clippy --target x86_64-unknown-none + rustup component add rust-src --toolchain ${RUST_CHANNEL}-x86_64-unknown-linux-gnu + rustup default ${RUST_CHANNEL} + rustup override set ${RUST_CHANNEL} + rustup show + - uses: Swatinem/rust-cache@v2 + - run: ./.github/tools/github_actions_run_cargo clippy --all-features --target litebox_runner_optee_on_machine/x86_64-unknown-litebox.json --manifest-path=litebox_runner_optee_on_machine/Cargo.toml -Zbuild-std=core,compiler_builtins,alloc + - run: | + ./.github/tools/github_actions_run_cargo build -Zbuild-std=core,compiler_builtins,alloc -Zbuild-std-features=compiler-builtins-mem --manifest-path=litebox_runner_optee_on_machine/Cargo.toml --target litebox_runner_optee_on_machine/x86_64-unknown-litebox.json + - run: | + rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu + rustup component add llvm-tools-preview --toolchain nightly-x86_64-unknown-linux-gnu + cargo +nightly install bootimage + cargo +nightly bootimage -Zbuild-std=core,compiler_builtins,alloc -Zbuild-std-features=compiler-builtins-mem --manifest-path=litebox_runner_optee_on_machine/Cargo.toml --target litebox_runner_optee_on_machine/x86_64-unknown-litebox.json + qemu-system-x86_64 -machine q35 -cpu max -m 256M -drive format=raw,file=target/x86_64-unknown-litebox/debug/bootimage-litebox_runner_optee_on_machine.bin -nographic -no-reboot -device isa-debug-exit,iobase=0xf4,iosize=0x04 || true + - name: Build documentation (fail on warnings) + run: ./.github/tools/github_actions_run_cargo doc --no-deps --all-features --document-private-items + confirm_no_std: name: Confirm no_std runs-on: ubuntu-latest @@ -288,6 +318,8 @@ jobs: -not -path './litebox_shim_optee/Cargo.toml' \ -not -path './litebox_syscall_rewriter/Cargo.toml' \ -not -path './litebox_runner_snp/Cargo.toml' \ + -not -path './litebox_platform_kernel/Cargo.toml' \ + -not -path './litebox_runner_optee_on_machine/Cargo.toml' \ -not -path './dev_tests/Cargo.toml' \ -print0 | \ xargs -0 -I '{}' sh -c 'cd "$(dirname "{}")"; pwd; cargo build --locked --target x86_64-unknown-none || exit 1; echo; echo' diff --git a/Cargo.lock b/Cargo.lock index 4a1b1ded4..e248b9921 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -215,6 +215,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bootloader" +version = "0.9.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bdfddac270bbdd45903296bc1caf29a7fdce6b326aaf0bbab7f04c5f98b7447" + [[package]] name = "buddy_system_allocator" version = "0.11.0" @@ -819,6 +825,31 @@ dependencies = [ "num_enum", ] +[[package]] +name = "litebox_platform_kernel" +version = "0.1.0" +dependencies = [ + "aligned-vec", + "arrayvec", + "bitflags 2.9.4", + "buddy_system_allocator", + "cfg-if", + "hashbrown 0.15.5", + "libc", + "litebox", + "litebox_common_linux", + "litebox_common_optee", + "modular-bitfield", + "num_enum", + "once_cell", + "rangemap", + "raw-cpuid", + "slabmalloc", + "spin 0.10.0", + "thiserror", + "x86_64", +] + [[package]] name = "litebox_platform_linux_kernel" version = "0.1.0" @@ -892,6 +923,7 @@ version = "0.1.0" dependencies = [ "cfg-if", "litebox", + "litebox_platform_kernel", "litebox_platform_linux_kernel", "litebox_platform_linux_userland", "litebox_platform_lvbs", @@ -974,6 +1006,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "litebox_runner_optee_on_machine" +version = "0.1.0" +dependencies = [ + "bootloader", + "litebox", + "litebox_common_linux", + "litebox_common_optee", + "litebox_platform_kernel", + "litebox_platform_multiplex", + "litebox_shim_optee", + "spin 0.10.0", + "x86_64", +] + [[package]] name = "litebox_runner_snp" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index cc2d32948..f415e3bc0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "litebox", "litebox_common_linux", "litebox_common_optee", + "litebox_platform_kernel", "litebox_platform_linux_kernel", "litebox_platform_linux_userland", "litebox_platform_windows_userland", @@ -13,6 +14,7 @@ members = [ "litebox_runner_linux_on_windows_userland", "litebox_runner_lvbs", "litebox_runner_optee_on_linux_userland", + "litebox_runner_optee_on_machine", "litebox_shim_linux", "litebox_syscall_rewriter", "litebox_runner_snp", @@ -24,6 +26,7 @@ default-members = [ "litebox", "litebox_common_linux", "litebox_common_optee", + "litebox_platform_kernel", "litebox_platform_linux_kernel", "litebox_platform_linux_userland", "litebox_platform_windows_userland", diff --git a/dev_tests/src/ratchet.rs b/dev_tests/src/ratchet.rs index 8cc318916..0d2cd8c41 100644 --- a/dev_tests/src/ratchet.rs +++ b/dev_tests/src/ratchet.rs @@ -30,6 +30,7 @@ fn ratchet_globals() -> Result<()> { ("litebox_platform_linux_kernel/", 5), ("litebox_platform_linux_userland/", 6), ("litebox_platform_lvbs/", 19), + ("litebox_platform_kernel/", 10), ("litebox_platform_multiplex/", 1), ("litebox_platform_windows_userland/", 8), ("litebox_runner_linux_userland/", 1), @@ -64,6 +65,7 @@ fn ratchet_maybe_uninit() -> Result<()> { ("litebox_platform_linux_kernel/", 1), ("litebox_platform_linux_userland/", 3), ("litebox_platform_lvbs/", 6), + ("litebox_platform_kernel/", 1), ("litebox_shim_linux/", 5), ], |file| { diff --git a/litebox_platform_kernel/.config/nextest.toml b/litebox_platform_kernel/.config/nextest.toml new file mode 100644 index 000000000..fbf942ec6 --- /dev/null +++ b/litebox_platform_kernel/.config/nextest.toml @@ -0,0 +1,8 @@ +[profile.ci] +# Do not cancel the test run on the first failure. +fail-fast = false +# Show all tests _including_ skipped tests in output. +status-level = "all" +# Output failures as soon as they happen _and_ at the end of the test run; +# combination of "immediate" and "final". +failure-output = "immediate-final" diff --git a/litebox_platform_kernel/Cargo.toml b/litebox_platform_kernel/Cargo.toml new file mode 100644 index 000000000..6ddd0ba2b --- /dev/null +++ b/litebox_platform_kernel/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "litebox_platform_kernel" +version = "0.1.0" +edition = "2024" + + +[dependencies] +cfg-if = "1.0.0" +bitflags = "2.9.0" +buddy_system_allocator = { version = "0.11.0", default-features = false, features = ["use_spin"] } +slabmalloc = { git = "https://github.com/gz/rust-slabmalloc.git", rev = "19480b2e82704210abafe575fb9699184c1be110" } +litebox = { path = "../litebox/", version = "0.1.0" } +litebox_common_linux = { path = "../litebox_common_linux/", version = "0.1.0" } +litebox_common_optee = { path = "../litebox_common_optee/", version = "0.1.0" } +spin = { version = "0.10.0", default-features = false, features = [ + "spin_mutex", + "once", + "rwlock", +] } +libc = "0.2.169" +arrayvec = { version = "0.7.6", default-features = false } +rangemap = { version = "1.5.1", features = ["const_fn"] } +thiserror = { version = "2.0.6", default-features = false } +num_enum = { version = "0.7.3", default-features = false } +once_cell = { version = "1.20.2", default-features = false, features = ["alloc", "race"] } +modular-bitfield = { version = "0.12.0", default-features = false } +hashbrown = "0.15.2" +aligned-vec = { version = "0.6.4", default-features = false } +raw-cpuid = "11.6.0" + +[target.'cfg(target_arch = "x86_64")'.dependencies] +x86_64 = { version = "0.15.2", default-features = false, features = ["instructions"] } + +[features] +default = [] +interrupt = ["x86_64/abi_x86_interrupt"] + +[lints] +workspace = true diff --git a/litebox_platform_kernel/rust-toolchain.toml b/litebox_platform_kernel/rust-toolchain.toml new file mode 100644 index 000000000..e058752cd --- /dev/null +++ b/litebox_platform_kernel/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly-2025-09-29" diff --git a/litebox_platform_kernel/src/alloc_impl.rs b/litebox_platform_kernel/src/alloc_impl.rs new file mode 100644 index 000000000..d4a9bf771 --- /dev/null +++ b/litebox_platform_kernel/src/alloc_impl.rs @@ -0,0 +1,46 @@ +#[cfg(not(test))] +mod alloc { + const HEAP_ORDER: usize = 25; + + #[global_allocator] + static ALLOCATOR: litebox::mm::allocator::SafeZoneAllocator< + 'static, + HEAP_ORDER, + crate::LiteBoxKernel, + > = litebox::mm::allocator::SafeZoneAllocator::new(); + + // TODO: these alloc and free functions are for dynamic memory management which are + // often meaningless if there is no host. In that sense, we might want to remove these from + // `SafeZoneAllocator`. + impl litebox::mm::allocator::MemoryProvider for crate::LiteBoxKernel { + fn alloc(_layout: &core::alloc::Layout) -> Option<(usize, usize)> { + // For a (virtual) machine, this might be memory ballooning or hotplugging. + unimplemented!() + } + + unsafe fn free(_addr: usize) { + unimplemented!() + } + } + + impl crate::mm::MemoryProvider for crate::LiteBoxKernel { + // TODO: this offset should be configurable + const GVA_OFFSET: x86_64::VirtAddr = x86_64::VirtAddr::new(0x18000000000); + // TODO: this mask should be configurable + const PRIVATE_PTE_MASK: u64 = 0; + + fn mem_allocate_pages(order: u32) -> Option<*mut u8> { + ALLOCATOR.allocate_pages(order) + } + + unsafe fn mem_free_pages(ptr: *mut u8, order: u32) { + unsafe { + ALLOCATOR.free_pages(ptr, order); + } + } + + unsafe fn mem_fill_pages(start: usize, size: usize) { + unsafe { ALLOCATOR.fill_pages(start, size) }; + } + } +} diff --git a/litebox_platform_kernel/src/arch/mod.rs b/litebox_platform_kernel/src/arch/mod.rs new file mode 100644 index 000000000..eae60ce16 --- /dev/null +++ b/litebox_platform_kernel/src/arch/mod.rs @@ -0,0 +1,7 @@ +//! Arch-specific code + +#[cfg(target_arch = "x86_64")] +mod x86; + +#[cfg(target_arch = "x86_64")] +pub use x86::*; diff --git a/litebox_platform_kernel/src/arch/x86/gdt.rs b/litebox_platform_kernel/src/arch/x86/gdt.rs new file mode 100644 index 000000000..f33d86d8d --- /dev/null +++ b/litebox_platform_kernel/src/arch/x86/gdt.rs @@ -0,0 +1,120 @@ +//! Global Descriptor Table (GDT) and Task State Segment (TSS) + +use crate::per_cpu_variables::{with_per_cpu_variables, with_per_cpu_variables_mut}; +use alloc::boxed::Box; +use x86_64::{ + PrivilegeLevel, VirtAddr, + instructions::{ + segmentation::{CS, DS, Segment}, + tables::load_tss, + }, + structures::{ + gdt::{Descriptor, GlobalDescriptorTable, SegmentSelector}, + tss::TaskStateSegment, + }, +}; + +/// TSS with 16-byte alignment (HW requirement) +#[repr(align(16))] +#[derive(Clone, Copy)] +pub struct AlignedTss(pub TaskStateSegment); + +#[derive(Clone, Copy)] +struct Selectors { + kernel_code: SegmentSelector, + kernel_data: SegmentSelector, + tss: SegmentSelector, + user_data: SegmentSelector, + user_code: SegmentSelector, +} + +impl Selectors { + pub fn new() -> Self { + Selectors { + kernel_code: SegmentSelector::new(0, PrivilegeLevel::Ring0), + kernel_data: SegmentSelector::new(0, PrivilegeLevel::Ring0), + tss: SegmentSelector::new(0, PrivilegeLevel::Ring0), + user_data: SegmentSelector::new(0, PrivilegeLevel::Ring3), + user_code: SegmentSelector::new(0, PrivilegeLevel::Ring3), + } + } +} + +impl Default for Selectors { + fn default() -> Self { + Selectors::new() + } +} + +/// Package GDT and selectors +pub struct GdtWrapper { + gdt: GlobalDescriptorTable, + selectors: Selectors, +} + +impl GdtWrapper { + pub fn new() -> Self { + GdtWrapper { + gdt: GlobalDescriptorTable::new(), + selectors: Selectors::new(), + } + } + + /// Return kernel code, user code, and user data segment selectors + pub fn get_segment_selectors(&self) -> (u16, u16, u16) { + ( + self.selectors.kernel_code.0, + self.selectors.user_code.0, + self.selectors.user_data.0, + ) + } +} + +impl Default for GdtWrapper { + fn default() -> Self { + Self::new() + } +} + +fn setup_gdt_tss() { + const STACK_ALIGNMENT: u64 = 16; + + let stack_top = with_per_cpu_variables(|per_cpu_variables| { + per_cpu_variables.interrupt_stack_top() & !(STACK_ALIGNMENT - 1) + }); + + let mut tss = Box::new(AlignedTss(TaskStateSegment::new())); + tss.0.interrupt_stack_table[0] = VirtAddr::new(stack_top); + + let mut gdt = Box::new(GdtWrapper::new()); + + // `tss_segment()` requires `&'static TaskStateSegment`. Leaking `tss` is fine because + // it will be used until the kernel resets. + let tss = Box::leak(tss); + gdt.selectors.tss = gdt.gdt.append(Descriptor::tss_segment(&tss.0)); + + gdt.selectors.kernel_code = gdt.gdt.append(Descriptor::kernel_code_segment()); + gdt.selectors.kernel_data = gdt.gdt.append(Descriptor::kernel_data_segment()); + gdt.selectors.user_code = gdt.gdt.append(Descriptor::user_code_segment()); + gdt.selectors.user_data = gdt.gdt.append(Descriptor::user_data_segment()); + + // `gdt.load()` requires `&'static self`. Leaking `gdt` is fine because + // it will be used until the kernel resets. + let gdt = Box::leak(gdt); + gdt.gdt.load(); + + unsafe { + CS::set_reg(gdt.selectors.kernel_code); + DS::set_reg(gdt.selectors.kernel_data); + load_tss(gdt.selectors.tss); + } + + with_per_cpu_variables_mut(|per_cpu_variables| { + per_cpu_variables.gdt = Some(gdt); + }); +} + +/// Set up GDT and TSS (for a core) +pub fn init() { + setup_gdt_tss(); +} diff --git a/litebox_platform_kernel/src/arch/x86/instrs.rs b/litebox_platform_kernel/src/arch/x86/instrs.rs new file mode 100644 index 000000000..627dc0d61 --- /dev/null +++ b/litebox_platform_kernel/src/arch/x86/instrs.rs @@ -0,0 +1,45 @@ +//! Some Assembly instructions + +use core::arch::asm; + +#[expect(clippy::inline_always)] +#[inline(always)] +pub fn hlt_loop() -> ! { + loop { + unsafe { + asm!("hlt"); + } + } +} + +#[expect(clippy::inline_always)] +#[inline(always)] +pub fn rdmsr(msr: u32) -> u64 { + let low: u32; + let high: u32; + + unsafe { + asm!( + "rdmsr", + in("rcx") msr, out("rax") low, out("rdx") high, + options(nostack) + ); + } + + (u64::from(high) << 32) | u64::from(low) +} + +#[expect(clippy::inline_always)] +#[inline(always)] +pub fn wrmsr(msr: u32, value: u64) { + let low = (value & 0xffff_ffff) as u32; + let high = (value >> 32) as u32; + + unsafe { + asm!( + "wrmsr", + in("rcx") msr, in("rax") low, in("rdx") high, + options(nostack) + ); + } +} diff --git a/litebox_platform_kernel/src/arch/x86/interrupts.rs b/litebox_platform_kernel/src/arch/x86/interrupts.rs new file mode 100644 index 000000000..9e44d8f4a --- /dev/null +++ b/litebox_platform_kernel/src/arch/x86/interrupts.rs @@ -0,0 +1,84 @@ +//! Interrupt Descriptor Table (IDT) + +use spin::Once; +use x86_64::structures::idt::{ + HandlerFuncType, HandlerFuncWithErrCode, InterruptDescriptorTable, InterruptStackFrame, + PageFaultErrorCode, +}; + +const DOUBLE_FAULT_IST_INDEX: u16 = 0; + +fn idt() -> &'static InterruptDescriptorTable { + static IDT_ONCE: Once = Once::new(); + IDT_ONCE.call_once(|| { + let mut idt = InterruptDescriptorTable::new(); + idt.divide_error.set_handler_fn(divide_error_handler); + idt.breakpoint.set_handler_fn(breakpoint_handler); + unsafe { + // Rust no longer allows a function with the custom ABI to have a return type. + // Unfortunately, the `x86_64` crate has not caught up this change. + // Below is a workaround mentioned in [link](https://github.com/rust-lang/rust/issues/143072). + let addr = + HandlerFuncType::to_virt_addr(double_fault_handler as HandlerFuncWithErrCode); + idt.double_fault + .set_handler_addr(addr) + .set_stack_index(DOUBLE_FAULT_IST_INDEX); + } + idt.page_fault.set_handler_fn(page_fault_handler); + idt.invalid_opcode.set_handler_fn(invalid_opcode_handler); + idt.general_protection_fault + .set_handler_fn(general_protection_fault_handler); + idt + }) +} + +/// Initialize IDT (for a core) +pub fn init_idt() { + idt().load(); +} + +// TODO: carefully handle exceptions/interrupts. If an exception or interrupt is due to userspace code, +// we should destroy the corresponding user context rather than halt the entire kernel. + +extern "x86-interrupt" fn divide_error_handler(stack_frame: InterruptStackFrame) { + todo!("EXCEPTION: DIVIDE BY ZERO\n{:#?}", stack_frame); +} + +extern "x86-interrupt" fn breakpoint_handler(stack_frame: InterruptStackFrame) { + todo!("EXCEPTION: BREAKPOINT\n{:#?}", stack_frame); +} + +extern "x86-interrupt" fn double_fault_handler(stack_frame: InterruptStackFrame, _error_code: u64) { + panic!("EXCEPTION: DOUBLE FAULT\n{:#?}", stack_frame); +} + +extern "x86-interrupt" fn general_protection_fault_handler( + stack_frame: InterruptStackFrame, + _error_code: u64, +) { + todo!("EXCEPTION: GENERAL PROTECTION FAULT\n{:#?}", stack_frame); +} + +extern "x86-interrupt" fn page_fault_handler( + stack_frame: InterruptStackFrame, + error_code: PageFaultErrorCode, +) { + use x86_64::registers::control::Cr2; + + todo!( + "EXCEPTION: PAGE FAULT\nAccessed Address: {:?}\nError Code: {:?}\n{:#?}", + Cr2::read(), + error_code, + stack_frame + ); +} + +extern "x86-interrupt" fn invalid_opcode_handler(stack_frame: InterruptStackFrame) { + use x86_64::registers::control::Cr2; + + todo!( + "EXCEPTION: INVALID OPCODE\nAccessed Address: {:?}\n{:#?}", + Cr2::read(), + stack_frame + ); +} diff --git a/litebox_platform_kernel/src/arch/x86/ioport.rs b/litebox_platform_kernel/src/arch/x86/ioport.rs new file mode 100644 index 000000000..29934a08c --- /dev/null +++ b/litebox_platform_kernel/src/arch/x86/ioport.rs @@ -0,0 +1,177 @@ +//! I/O Port-based serial communication + +use core::{arch::asm, fmt}; +use spin::{Mutex, Once}; + +const COM_PORT_1: u16 = 0x3F8; + +const INTERRUPT_ENABLE_OFFSET: u16 = 1; +const OUT_FIFO_CONTROL_OFFSET: u16 = 2; +const SCRATCH_REGISTER_OFFSET: u16 = 7; +const MODEM_CONTROL_OFFSET: u16 = 4; +const IN_LINE_STATUS_OFFSET: u16 = 5; + +const MAX_WAIT_ITERATIONS: u32 = 1_000_000; + +#[expect(clippy::inline_always)] +#[inline(always)] +fn inb(port: u16) -> u8 { + let mut value: u8; + + unsafe { + asm!( + "in al, dx", + in("dx") port, out("al") value + ); + } + + value +} + +#[expect(clippy::inline_always)] +#[inline(always)] +fn outb(port: u16, value: u8) { + unsafe { + asm!( + "out dx, al", + in("dx") port, in("al") value + ); + } +} + +#[expect(clippy::inline_always)] +#[inline(always)] +fn interrupt_enable(port: u16, value: u8) { + outb(port + INTERRUPT_ENABLE_OFFSET, value); +} + +#[expect(clippy::inline_always)] +#[inline(always)] +fn fifo_control(port: u16, value: u8) { + outb(port + OUT_FIFO_CONTROL_OFFSET, value); +} + +#[expect(clippy::inline_always)] +#[inline(always)] +fn modem_control(port: u16, value: u8) { + outb(port + MODEM_CONTROL_OFFSET, value); +} + +#[expect(clippy::inline_always)] +#[inline(always)] +fn line_status(port: u16) -> u8 { + inb(port + IN_LINE_STATUS_OFFSET) +} + +pub struct ComPort { + port: u16, + available: bool, +} + +impl ComPort { + pub const fn new(port: u16) -> Self { + ComPort { + port, + available: false, + } + } + + pub fn init(&mut self) { + outb(self.port + SCRATCH_REGISTER_OFFSET, 0x55); + let scratch = inb(self.port + SCRATCH_REGISTER_OFFSET); + if scratch != 0x55 { + self.available = false; + return; + } + self.available = true; + interrupt_enable(self.port, 0x00); // Disable all interrupts + fifo_control(self.port, 0xc7); // Enable FIFO, clear them, with 14-byte threshold + modem_control(self.port, 0x0f); // Enable data terminal ready, request to send, and IRQ + } + + pub fn write_byte(&mut self, byte: u8) { + if !self.available { + return; + } + + /* Timeout to ensure that we do not loop indefinitely */ + let mut wait_iterations = 0; + loop { + if line_status(self.port) & 0x20 != 0 { + // transmittable + break; + } + wait_iterations += 1; + if wait_iterations >= MAX_WAIT_ITERATIONS { + return; + } + } + + match byte { + 0x20..=0x7e => outb(self.port, byte), + b'\n' => { + outb(self.port, b'\r'); + outb(self.port, b'\n'); + } + _ => outb(self.port, 0xfe), + } + } + + pub fn write_string(&mut self, s: &str) { + if !self.available { + return; + } + + for byte in s.bytes() { + self.write_byte(byte); + } + } +} + +fn com() -> &'static Mutex { + static COM_ONCE: Once> = Once::new(); + COM_ONCE.call_once(|| { + let mut com_port = ComPort::new(COM_PORT_1); + com_port.init(); + Mutex::new(com_port) + }) +} + +impl fmt::Write for ComPort { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.write_string(s); + Ok(()) + } +} + +#[doc(hidden)] +pub fn print(args: ::core::fmt::Arguments) { + use core::fmt::Write; + let _ = com().lock().write_fmt(args); +} + +#[macro_export] +macro_rules! serial_print { + ($($arg:tt)*) => ($crate::arch::ioport::print(format_args!($($arg)*))); +} + +#[macro_export] +macro_rules! serial_println { + () => ($crate::serial_print!("\n")); + ($($arg:tt)*) => ($crate::serial_print!("{}\n", format_args!($($arg)*))); +} + +#[macro_export] +macro_rules! debug_serial_print { + ($($arg:tt)*) => (#[cfg(debug_assertions)] $crate::arch::ioport::print(format_args!($($arg)*))); +} + +#[macro_export] +macro_rules! debug_serial_println { + () => (#[cfg(debug_assertions)] $crate::serial_print!("\n")); + ($($arg:tt)*) => (#[cfg(debug_assertions)] $crate::serial_print!("{}\n", format_args!($($arg)*))); +} + +pub fn serial_print_string(s: &str) { + com().lock().write_string(s); +} diff --git a/litebox_platform_kernel/src/arch/x86/mm/mod.rs b/litebox_platform_kernel/src/arch/x86/mm/mod.rs new file mode 100644 index 000000000..08017180d --- /dev/null +++ b/litebox_platform_kernel/src/arch/x86/mm/mod.rs @@ -0,0 +1,3 @@ +//! Memory management module for x86 architecture. + +pub(crate) mod paging; diff --git a/litebox_platform_kernel/src/arch/x86/mm/paging.rs b/litebox_platform_kernel/src/arch/x86/mm/paging.rs new file mode 100644 index 000000000..75ad741dc --- /dev/null +++ b/litebox_platform_kernel/src/arch/x86/mm/paging.rs @@ -0,0 +1,529 @@ +use litebox::mm::linux::{PageFaultError, PageRange, VmFlags, VmemPageFaultHandler}; +use litebox::platform::page_mgmt; +use x86_64::{ + PhysAddr, VirtAddr, + structures::{ + idt::PageFaultErrorCode, + paging::{ + FrameAllocator, FrameDeallocator, MappedPageTable, Mapper, Page, PageSize, PageTable, + PageTableFlags, PhysFrame, Size4KiB, Translate, + frame::PhysFrameRange, + mapper::{ + CleanUp, FlagUpdateError, MapToError, PageTableFrameMapping, TranslateResult, + UnmapError as X64UnmapError, + }, + }, + }, +}; + +use crate::{ + mm::{ + MemoryProvider, + pgtable::{PageTableAllocator, PageTableImpl}, + }, + ptr::UserMutPtr, +}; + +#[cfg(not(test))] +const FLUSH_TLB: bool = true; +#[cfg(test)] +const FLUSH_TLB: bool = false; + +#[inline] +fn frame_to_pointer(frame: PhysFrame) -> *mut PageTable { + let virt = M::pa_to_va(frame.start_address()); + virt.as_mut_ptr() +} + +pub struct X64PageTable<'a, M: MemoryProvider, const ALIGN: usize> { + inner: spin::mutex::SpinMutex>>, +} + +struct FrameMapping { + _provider: core::marker::PhantomData, +} + +unsafe impl PageTableFrameMapping for FrameMapping { + fn frame_to_pointer(&self, frame: PhysFrame) -> *mut PageTable { + frame_to_pointer::(frame) + } +} + +unsafe impl FrameAllocator for PageTableAllocator { + fn allocate_frame(&mut self) -> Option> { + Self::allocate_frame(true) + } +} + +impl FrameDeallocator for PageTableAllocator { + unsafe fn deallocate_frame(&mut self, frame: PhysFrame) { + let vaddr = M::pa_to_va(frame.start_address()); + unsafe { M::mem_free_pages(vaddr.as_mut_ptr(), 0) }; + } +} + +pub(crate) fn vmflags_to_pteflags(values: VmFlags) -> PageTableFlags { + let mut flags = PageTableFlags::empty(); + if values.intersects(VmFlags::VM_READ | VmFlags::VM_WRITE) { + flags |= PageTableFlags::USER_ACCESSIBLE; + } + if values.contains(VmFlags::VM_WRITE) { + flags |= PageTableFlags::WRITABLE; + } + if !values.contains(VmFlags::VM_EXEC) { + flags |= PageTableFlags::NO_EXECUTE; + } + flags +} + +impl X64PageTable<'_, M, ALIGN> { + pub(crate) unsafe fn new(item: PhysAddr) -> Self { + unsafe { Self::init(item) } + } + + pub(crate) fn map_pages( + &self, + range: PageRange, + flags: VmFlags, + populate_pages: bool, + ) -> UserMutPtr { + if populate_pages { + let flags = vmflags_to_pteflags(flags); + for page in range { + let page = + Page::::from_start_address(VirtAddr::new(page as u64)).unwrap(); + unsafe { + PageTableImpl::handle_page_fault(self, page, flags, PageFaultErrorCode::empty()) + } + .expect("Failed to handle page fault"); + } + } + UserMutPtr { + inner: range.start as *mut u8, + } + } + + /// Unmap 4KiB pages from the page table + /// Set `dealloc_frames` to `true` to free the corresponding physical frames. + /// + /// Note it does not free the allocated frames for page table itself (only those allocated to + /// user space). + pub(crate) unsafe fn unmap_pages( + &self, + range: PageRange, + dealloc_frames: bool, + ) -> Result<(), page_mgmt::DeallocationError> { + let start_va = VirtAddr::new(range.start as _); + let start = Page::::from_start_address(start_va) + .or(Err(page_mgmt::DeallocationError::Unaligned))?; + let end_va = VirtAddr::new(range.end as _); + let end = Page::::from_start_address(end_va) + .or(Err(page_mgmt::DeallocationError::Unaligned))?; + let mut allocator = PageTableAllocator::::new(); + + // Note this implementation is slow as each page requires a full page table walk. + // If we have N pages, it will be N times slower. + let mut inner = self.inner.lock(); + for page in Page::range(start, end) { + match inner.unmap(page) { + Ok((frame, fl)) => { + if dealloc_frames { + unsafe { allocator.deallocate_frame(frame) }; + } + if FLUSH_TLB { + fl.flush(); + } + } + Err(X64UnmapError::PageNotMapped) => {} + Err(X64UnmapError::ParentEntryHugePage) => { + unreachable!("we do not support huge pages"); + } + Err(X64UnmapError::InvalidFrameAddress(pa)) => { + todo!("Invalid frame address: {:#x}", pa); + } + } + } + Ok(()) + } + + pub(crate) unsafe fn remap_pages( + &self, + old_range: PageRange, + new_range: PageRange, + ) -> Result, page_mgmt::RemapError> { + let mut start: Page = + Page::from_start_address(VirtAddr::new(old_range.start as u64)) + .or(Err(page_mgmt::RemapError::Unaligned))?; + let mut new_start: Page = + Page::from_start_address(VirtAddr::new(new_range.start as u64)) + .or(Err(page_mgmt::RemapError::Unaligned))?; + let end: Page = Page::from_start_address(VirtAddr::new(old_range.end as u64)) + .or(Err(page_mgmt::RemapError::Unaligned))?; + + // Note this implementation is slow as each page requires three full page table walks. + // If we have N pages, it will be 3N times slower. + let mut allocator = PageTableAllocator::::new(); + let mut inner = self.inner.lock(); + while start < end { + match inner.translate(start.start_address()) { + TranslateResult::Mapped { + frame: _, + offset: _, + flags, + } => match inner.unmap(start) { + Ok((frame, fl)) => { + match unsafe { inner.map_to(new_start, frame, flags, &mut allocator) } { + Ok(_) => {} + Err(e) => match e { + MapToError::PageAlreadyMapped(_) => { + return Err(page_mgmt::RemapError::AlreadyAllocated); + } + MapToError::ParentEntryHugePage => { + todo!("return Err(page_mgmt::RemapError::RemapToHugePage);") + } + MapToError::FrameAllocationFailed => { + return Err(page_mgmt::RemapError::OutOfMemory); + } + }, + } + if FLUSH_TLB { + fl.flush(); + } + } + Err(X64UnmapError::PageNotMapped) => { + unreachable!() + } + Err(X64UnmapError::ParentEntryHugePage) => { + todo!("return Err(page_mgmt::RemapError::RemapToHugePage);") + } + Err(X64UnmapError::InvalidFrameAddress(pa)) => { + // TODO: `panic!()` -> `todo!()` because user-driven interrupts or exceptions must not halt the kernel. + // We should handle this exception carefully (i.e., clean up the context and data structures belonging to an errorneous process). + todo!("Invalid frame address: {:#x}", pa); + } + }, + TranslateResult::NotMapped => {} + TranslateResult::InvalidFrameAddress(pa) => { + todo!("Invalid frame address: {:#x}", pa); + } + } + start += 1; + new_start += 1; + } + + Ok(UserMutPtr { + inner: new_range.start as *mut u8, + }) + } + + pub(crate) unsafe fn mprotect_pages( + &self, + range: PageRange, + new_flags: VmFlags, + ) -> Result<(), page_mgmt::PermissionUpdateError> { + let start = VirtAddr::new(range.start as _); + let end = VirtAddr::new(range.end as _); + let new_flags = vmflags_to_pteflags(new_flags) & Self::MPROTECT_PTE_MASK; + let start: Page = + Page::from_start_address(start).or(Err(page_mgmt::PermissionUpdateError::Unaligned))?; + let end: Page = Page::containing_address(end - 1); + + // TODO: this implementation is slow as each page requires two full page table walks. + // If we have N pages, it will be 2N times slower. + let mut inner = self.inner.lock(); + for page in Page::range(start, end + 1) { + match inner.translate(page.start_address()) { + TranslateResult::Mapped { + frame: _, + offset: _, + flags, + } => { + // If it is changed to writable, we leave it to page fault handler (COW) + let change_to_write = new_flags.contains(PageTableFlags::WRITABLE) + && !flags.contains(PageTableFlags::WRITABLE); + let new_flags = if change_to_write { + new_flags - PageTableFlags::WRITABLE + } else { + new_flags + }; + if flags != new_flags { + match unsafe { + inner.update_flags(page, (flags & !Self::MPROTECT_PTE_MASK) | new_flags) + } { + Ok(fl) => { + if FLUSH_TLB { + fl.flush(); + } + } + Err(e) => match e { + FlagUpdateError::PageNotMapped => unreachable!(), + FlagUpdateError::ParentEntryHugePage => { + todo!("return Err(ProtectError::ProtectHugePage);") + } + }, + } + } + } + TranslateResult::NotMapped => {} + TranslateResult::InvalidFrameAddress(pa) => { + todo!("Invalid frame address: {:#x}", pa); + } + } + } + + Ok(()) + } + + /// Map physical frame range to the page table + /// + /// Note it does not rely on the page fault handler based mapping to avoid double faults. + #[expect(dead_code)] + pub(crate) fn map_phys_frame_range( + &self, + frame_range: PhysFrameRange, + flags: PageTableFlags, + ) -> Result<*mut u8, MapToError> { + let mut allocator = PageTableAllocator::::new(); + + let mut inner = self.inner.lock(); + for target_frame in frame_range { + let page: Page = + Page::containing_address(M::pa_to_va(target_frame.start_address())); + + match inner.translate(page.start_address()) { + TranslateResult::Mapped { + frame, + offset: _, + flags: _, + } => { + assert!( + target_frame.start_address() == frame.start_address(), + "{page:?} is already mapped to {frame:?} instead of {target_frame:?}" + ); + + continue; + } + TranslateResult::NotMapped => {} + TranslateResult::InvalidFrameAddress(pa) => { + todo!("Invalid frame address: {:#x}", pa); + } + } + + match unsafe { + inner.map_to_with_table_flags(page, target_frame, flags, flags, &mut allocator) + } { + Ok(fl) => { + if FLUSH_TLB { + fl.flush(); + } + } + Err(e) => return Err(e), + } + } + + Ok(M::pa_to_va(frame_range.start.start_address()).as_mut_ptr()) + } + + /// This function creates a new empty top-level page table. + #[expect(dead_code)] + pub(crate) unsafe fn new_top_level() -> Self { + let frame = PageTableAllocator::::allocate_frame(true) + .expect("Failed to allocate a new page table frame"); + unsafe { Self::init(frame.start_address()) } + } + + /// This function changes the address space of the current processor/core using the given page table + /// (e.g., its CR3 register) and returns the physical frame of the previous top-level page table. + /// It preserves the CR3 flags. + /// + /// # Safety + /// The caller must ensure that the page table is valid and maps the entire kernel address space. + /// Currently, we do not support KPTI-like kernel/user space page table separation. + /// + /// # Panics + /// Panics if the page table is invalid + #[expect(dead_code)] + #[allow(clippy::similar_names)] + pub(crate) fn change_address_space(&self) -> PhysFrame { + let p4_va = core::ptr::from_ref::(self.inner.lock().level_4_table()); + let p4_pa = M::va_to_pa(VirtAddr::new(p4_va as u64)); + let p4_frame = PhysFrame::containing_address(p4_pa); + + let (frame, flags) = x86_64::registers::control::Cr3::read(); + unsafe { + x86_64::registers::control::Cr3::write(p4_frame, flags); + } + + frame + } + + /// This function returns the physical frame containing a top-level page table. + /// When we handle a system call or interrupt, it is difficult to figure out the corresponding user context + /// because kernel and user contexts are not tightly coupled (i.e., we do not know `userspace_id`). + /// To this end, we use this function to match the physical frame of the page table contained in each user + /// context structure with the CR3 value in a system call context (before changing the page table). + #[allow(clippy::similar_names)] + #[allow(dead_code)] + pub(crate) fn get_physical_frame(&self) -> PhysFrame { + let p4_va = core::ptr::from_ref::(self.inner.lock().level_4_table()); + let p4_pa = M::va_to_pa(VirtAddr::new(p4_va as u64)); + PhysFrame::containing_address(p4_pa) + } + + /// Deallocate physical frames of all level 1--3 page tables except for the top-level page table. + /// This is a wrapper function for `MappedPageTable::clean_up()`. + /// + /// # Safety + /// The caller is expected to unmap all non-page-table pages before calling this function. + /// Also, the caller must ensure no page table frame is shared with other page tables. + /// This function expects that `Drop` will deallocate the top-level page table frame. It does not + /// deallocate the top-level page table frame because this can result in an undefined behavior. + #[allow(dead_code)] + pub(crate) unsafe fn clean_up(&self) { + let mut allocator = PageTableAllocator::::new(); + unsafe { + self.inner.lock().clean_up(&mut allocator); + } + } +} + +impl Drop for X64PageTable<'_, M, ALIGN> { + /// Deallocate the physical frame of the top-level page table + #[allow(clippy::similar_names)] + fn drop(&mut self) { + let mut allocator = PageTableAllocator::::new(); + let p4_va = + core::ptr::from_mut::(self.inner.lock().level_4_table_mut()).cast::(); + let p4_pa = M::va_to_pa(VirtAddr::new(p4_va as u64)); + unsafe { + allocator.deallocate_frame(PhysFrame::containing_address(p4_pa)); + } + } +} + +impl PageTableImpl for X64PageTable<'_, M, ALIGN> { + unsafe fn init(p4: PhysAddr) -> Self { + assert!(p4.is_aligned(Size4KiB::SIZE)); + let frame = PhysFrame::from_start_address(p4).unwrap(); + let mapping = FrameMapping:: { + _provider: core::marker::PhantomData, + }; + let p4_va = mapping.frame_to_pointer(frame); + let p4 = unsafe { &mut *p4_va }; + X64PageTable { + inner: unsafe { MappedPageTable::new(p4, mapping) }.into(), + } + } + + #[cfg(test)] + fn translate(&self, addr: VirtAddr) -> TranslateResult { + self.inner.lock().translate(addr) + } + + unsafe fn handle_page_fault( + &self, + page: Page, + flags: PageTableFlags, + error_code: PageFaultErrorCode, + ) -> Result<(), PageFaultError> { + let mut inner = self.inner.lock(); + match inner.translate(page.start_address()) { + TranslateResult::Mapped { + frame: _, + offset: _, + flags, + } => { + if error_code.contains(PageFaultErrorCode::CAUSED_BY_WRITE) { + if flags.contains(PageTableFlags::WRITABLE) { + // probably set by other threads concurrently + return Ok(()); + } else { + // Copy-on-Write + todo!("COW"); + } + } + + if !error_code.contains(PageFaultErrorCode::PROTECTION_VIOLATION) { + // not present error but PTE says it is present, probably due to race condition + return Ok(()); + } + + todo!("Page fault on present page: {:#x}", page.start_address()); + } + TranslateResult::NotMapped => { + let mut allocator = PageTableAllocator::::new(); + // TODO: if it is file-backed, we need to read the page from file + let frame = PageTableAllocator::::allocate_frame(true).unwrap(); + let table_flags = PageTableFlags::PRESENT + | PageTableFlags::WRITABLE + | PageTableFlags::USER_ACCESSIBLE; + match unsafe { + inner.map_to_with_table_flags( + page, + frame, + flags | PageTableFlags::PRESENT, + table_flags, + &mut allocator, + ) + } { + Ok(fl) => { + if FLUSH_TLB { + fl.flush(); + } + } + Err(e) => { + unsafe { allocator.deallocate_frame(frame) }; + match e { + MapToError::PageAlreadyMapped(_) => { + unreachable!() + } + MapToError::ParentEntryHugePage => { + return Err(PageFaultError::HugePage); + } + MapToError::FrameAllocationFailed => { + return Err(PageFaultError::AllocationFailed); + } + } + } + } + } + TranslateResult::InvalidFrameAddress(pa) => { + todo!("Invalid frame address: {:#x}", pa); + } + } + Ok(()) + } +} + +impl VmemPageFaultHandler for X64PageTable<'_, M, ALIGN> { + unsafe fn handle_page_fault( + &self, + fault_addr: usize, + flags: VmFlags, + error_code: u64, + ) -> Result<(), PageFaultError> { + let page = Page::::containing_address(VirtAddr::new(fault_addr as u64)); + let error_code = PageFaultErrorCode::from_bits_truncate(error_code); + let flags = vmflags_to_pteflags(flags); + unsafe { PageTableImpl::handle_page_fault(self, page, flags, error_code) } + } + + fn access_error(error_code: u64, flags: VmFlags) -> bool { + let error_code = PageFaultErrorCode::from_bits_truncate(error_code); + if error_code.contains(PageFaultErrorCode::CAUSED_BY_WRITE) { + return !flags.contains(VmFlags::VM_WRITE); + } + + // read, present + if error_code.contains(PageFaultErrorCode::PROTECTION_VIOLATION) { + return true; + } + + // read, not present + if (flags & VmFlags::VM_ACCESS_FLAGS).is_empty() { + return true; + } + + false + } +} diff --git a/litebox_platform_kernel/src/arch/x86/mod.rs b/litebox_platform_kernel/src/arch/x86/mod.rs new file mode 100644 index 000000000..da19d10e1 --- /dev/null +++ b/litebox_platform_kernel/src/arch/x86/mod.rs @@ -0,0 +1,119 @@ +pub mod gdt; +pub mod instrs; +pub mod ioport; +pub mod mm; +pub mod msr; + +#[cfg(feature = "interrupt")] +pub mod interrupts; + +use x86_64::registers::control::Cr4Flags; +pub(crate) use x86_64::{ + addr::{PhysAddr, VirtAddr}, + structures::{ + idt::PageFaultErrorCode, + paging::{Page, PageSize, PageTableFlags, PhysFrame, Size4KiB}, + }, +}; + +#[cfg(test)] +pub(crate) use x86_64::structures::paging::mapper::{MappedFrame, TranslateResult}; + +/// Get the APIC ID of the current core. +#[inline] +pub fn get_core_id() -> usize { + use core::arch::x86_64::__cpuid_count as cpuid_count; + const CPU_VERSION_INFO: u32 = 1; + + let result = unsafe { cpuid_count(CPU_VERSION_INFO, 0x0) }; + let apic_id = (result.ebx >> 24) & 0xff; + + apic_id as usize +} + +/// Enable FSGSBASE instructions +#[inline] +pub fn enable_fsgsbase() { + let mut flags = x86_64::registers::control::Cr4::read(); + flags.insert(x86_64::registers::control::Cr4Flags::FSGSBASE); + unsafe { + x86_64::registers::control::Cr4::write(flags); + } +} + +/// The maximum number of supported CPU cores. It depends on the number of VCPUs that +/// Hyper-V supports. We set it to 128 for now. +pub const MAX_CORES: usize = 128; + +/// Enable CPU extended states such as XMM and instructions to use and manage them +/// such as SSE and XSAVE +#[cfg(target_arch = "x86_64")] +pub fn enable_extended_states() { + let mut flags = x86_64::registers::control::Cr0::read(); + flags.remove(x86_64::registers::control::Cr0Flags::EMULATE_COPROCESSOR); + flags.insert(x86_64::registers::control::Cr0Flags::MONITOR_COPROCESSOR); + unsafe { + x86_64::registers::control::Cr0::write(flags); + } + + let mut flags = x86_64::registers::control::Cr4::read(); + flags.insert(x86_64::registers::control::Cr4Flags::OSFXSR); + flags.insert(x86_64::registers::control::Cr4Flags::OSXMMEXCPT_ENABLE); + flags.insert(x86_64::registers::control::Cr4Flags::OSXSAVE); + unsafe { + x86_64::registers::control::Cr4::write(flags); + } + + let mut flags = x86_64::registers::xcontrol::XCr0::read(); + // if necessary, we can enable AVX and other extended states here + flags.insert(x86_64::registers::xcontrol::XCr0Flags::SSE); + flags.insert(x86_64::registers::xcontrol::XCr0Flags::X87); + unsafe { + x86_64::registers::xcontrol::XCr0::write(flags); + } +} + +/// Enable SMEP +#[inline] +pub fn enable_smep() { + let mut flags = x86_64::registers::control::Cr4::read(); + flags.insert(Cr4Flags::SUPERVISOR_MODE_EXECUTION_PROTECTION); + unsafe { + x86_64::registers::control::Cr4::write(flags); + } +} + +/// Disable SMEP +#[inline] +pub fn disable_smep() { + let mut flags = x86_64::registers::control::Cr4::read(); + flags.remove(Cr4Flags::SUPERVISOR_MODE_EXECUTION_PROTECTION); + unsafe { + x86_64::registers::control::Cr4::write(flags); + } +} + +/// Enable SMAP +#[inline] +pub fn enable_smap() { + let mut flags = x86_64::registers::control::Cr4::read(); + flags.insert(Cr4Flags::SUPERVISOR_MODE_ACCESS_PREVENTION); + unsafe { + x86_64::registers::control::Cr4::write(flags); + } +} + +/// Disable SMAP +#[inline] +pub fn disable_smap() { + let mut flags = x86_64::registers::control::Cr4::read(); + flags.remove(Cr4Flags::SUPERVISOR_MODE_ACCESS_PREVENTION); + unsafe { + x86_64::registers::control::Cr4::write(flags); + } +} + +#[inline] +pub fn write_kernel_gsbase_msr(addr: VirtAddr) { + x86_64::registers::model_specific::KernelGsBase::write(addr); +} diff --git a/litebox_platform_kernel/src/arch/x86/msr.rs b/litebox_platform_kernel/src/arch/x86/msr.rs new file mode 100644 index 000000000..dd99a7a85 --- /dev/null +++ b/litebox_platform_kernel/src/arch/x86/msr.rs @@ -0,0 +1,2 @@ +pub const MSR_EFER: u32 = 0xc0000080; +pub const MSR_IA32_CR_PAT: u32 = 0x00000277; diff --git a/litebox_platform_kernel/src/lib.rs b/litebox_platform_kernel/src/lib.rs new file mode 100644 index 000000000..33c1a7eb8 --- /dev/null +++ b/litebox_platform_kernel/src/lib.rs @@ -0,0 +1,669 @@ +//! A [LiteBox platform](../litebox/platform/index.html) for running LiteBox in kernel mode + +#![cfg(target_arch = "x86_64")] +#![no_std] +#![cfg_attr(feature = "interrupt", feature(abi_x86_interrupt))] + +use crate::user_context::UserContextMap; + +use core::{ + arch::asm, + sync::atomic::{AtomicU32, AtomicU64}, +}; +use litebox::platform::{ + DebugLogProvider, IPInterfaceProvider, ImmediatelyWokenUp, PageManagementProvider, + Punchthrough, RawMutexProvider, StdioProvider, TimeProvider, UnblockedOrTimedOut, +}; +use litebox::platform::{ + PunchthroughProvider, PunchthroughToken, RawMutPointer, RawPointerProvider, +}; +use litebox::shim::ContinueOperation; +use litebox::{mm::linux::PageRange, platform::page_mgmt::FixedAddressBehavior}; +use litebox_common_linux::{PunchthroughSyscall, errno::Errno}; +use ptr::UserMutPtr; +use x86_64::VirtAddr; + +extern crate alloc; + +pub mod arch; +pub mod mm; +pub mod ptr; + +pub mod per_cpu_variables; +pub mod syscall_entry; +pub mod user_context; + +mod alloc_impl; + +#[cfg(test)] +pub mod mock; + +const PAGE_SIZE: usize = 4096; + +static CPU_MHZ: AtomicU64 = AtomicU64::new(0); + +/// This is the platform for running LiteBox in kernel mode. +pub struct LiteBoxKernel { + page_table: mm::PageTable, + user_contexts: UserContextMap, +} + +impl RawPointerProvider for LiteBoxKernel { + type RawConstPointer = ptr::UserConstPtr; + type RawMutPointer = ptr::UserMutPtr; +} + +pub struct LiteBoxPunchthroughToken { + punchthrough: PunchthroughSyscall, +} + +impl PunchthroughToken for LiteBoxPunchthroughToken { + type Punchthrough = PunchthroughSyscall; + + fn execute( + self, + ) -> Result< + ::ReturnSuccess, + litebox::platform::PunchthroughError<::ReturnFailure>, + > { + let r = match self.punchthrough { + PunchthroughSyscall::SetFsBase { addr } => { + unsafe { litebox_common_linux::wrfsbase(addr) }; + Ok(0) + } + PunchthroughSyscall::GetFsBase { addr } => { + let fs_base = unsafe { litebox_common_linux::rdfsbase() }; + let ptr: UserMutPtr = addr.cast(); + unsafe { ptr.write_at_offset(0, fs_base) } + .map(|()| 0) + .ok_or(Errno::EFAULT) + } + _ => unimplemented!(), + }; + match r { + Ok(v) => Ok(v), + Err(e) => Err(litebox::platform::PunchthroughError::Failure(e)), + } + } +} + +impl PunchthroughProvider for LiteBoxKernel { + type PunchthroughToken = LiteBoxPunchthroughToken; + + fn get_punchthrough_token_for( + &self, + punchthrough: ::Punchthrough, + ) -> Option { + Some(LiteBoxPunchthroughToken { punchthrough }) + } +} + +impl LiteBoxKernel { + /// This function initializes the kernel platform (mostly the kernel page table). + /// `init_page_table_addr` specifies the physical address of the initial page table prepared by the kernel. + /// + /// # Panics + /// + /// Panics if the heap is not initialized yet or it does not have enough space to allocate page table entries. + pub fn new(init_page_table_addr: x86_64::PhysAddr) -> &'static Self { + // There is only one long-running platform ever expected, thus this leak is perfectly ok in + // order to simplify usage of the platform. + alloc::boxed::Box::leak(alloc::boxed::Box::new(Self { + page_table: unsafe { mm::PageTable::new(init_page_table_addr) }, + user_contexts: UserContextMap::new(), + })) + } + + pub fn init(&self, cpu_mhz: u64) { + CPU_MHZ.store(cpu_mhz, core::sync::atomic::Ordering::Relaxed); + } + + /// Create a new page table for user space. Currently, it maps the entire kernel memory for + /// proper operations (e.g., syscall handling). We should consider implementing + /// partial mapping to mitigate side-channel attacks and shallow copying to get rid of redundant + /// page table data structures for kernel space. + #[allow(clippy::unused_self)] + pub(crate) fn new_user_page_table(&self) -> mm::PageTable { + // TODO: use separate page table later + let (cr3, _) = x86_64::registers::control::Cr3::read_raw(); + unsafe { mm::PageTable::new(cr3.start_address()) } + } + + /// Register the shim. This function must be called for each core to program + /// its MSRs. + pub fn register_shim( + shim: &'static dyn litebox::shim::EnterShim, + ) { + syscall_entry::init(shim); + } + + // TODO: replace it with actual implementation (e.g., atomically increment PID/TID) + pub fn init_task(&self) -> litebox_common_linux::TaskParams { + litebox_common_linux::TaskParams { + pid: 1, + ppid: 1, + uid: 1000, + gid: 1000, + euid: 1000, + egid: 1000, + } + } +} + +impl RawMutexProvider for LiteBoxKernel { + type RawMutex = RawMutex; + + fn new_raw_mutex(&self) -> Self::RawMutex { + Self::RawMutex { + inner: AtomicU32::new(0), + } + } +} + +/// An implementation of [`litebox::platform::RawMutex`] +pub struct RawMutex { + inner: AtomicU32, +} + +/// TODO: common mutex implementation could be moved to a shared crate +impl litebox::platform::RawMutex for RawMutex { + fn underlying_atomic(&self) -> &core::sync::atomic::AtomicU32 { + &self.inner + } + + fn wake_many(&self, _n: usize) -> usize { + unimplemented!() + } + + fn block(&self, val: u32) -> Result<(), ImmediatelyWokenUp> { + match self.block_or_maybe_timeout(val, None) { + Ok(UnblockedOrTimedOut::Unblocked) => Ok(()), + Ok(UnblockedOrTimedOut::TimedOut) => unreachable!(), + Err(ImmediatelyWokenUp) => Err(ImmediatelyWokenUp), + } + } + + fn block_or_timeout( + &self, + val: u32, + time: core::time::Duration, + ) -> Result { + self.block_or_maybe_timeout(val, Some(time)) + } +} + +impl RawMutex { + fn block_or_maybe_timeout( + &self, + _val: u32, + _timeout: Option, + ) -> Result { + unimplemented!() + } +} + +impl DebugLogProvider for LiteBoxKernel { + fn debug_log_print(&self, msg: &str) { + crate::arch::ioport::serial_print_string(msg); + } +} + +/// An implementation of [`litebox::platform::Instant`] +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Instant(u64); + +/// An implementation of [`litebox::platform::SystemTime`] +pub struct SystemTime(); + +impl TimeProvider for LiteBoxKernel { + type Instant = Instant; + type SystemTime = SystemTime; + + fn now(&self) -> Self::Instant { + Instant::now() + } + + fn current_time(&self) -> Self::SystemTime { + unimplemented!() + } +} + +impl litebox::platform::Instant for Instant { + fn checked_duration_since(&self, earlier: &Self) -> Option { + self.0.checked_sub(earlier.0).map(|v| { + core::time::Duration::from_micros( + v / CPU_MHZ.load(core::sync::atomic::Ordering::Relaxed), + ) + }) + } + + fn checked_add(&self, duration: core::time::Duration) -> Option { + let duration_micros: u64 = duration.as_micros().try_into().ok()?; + Some(Instant(self.0.checked_add( + duration_micros.checked_mul(CPU_MHZ.load(core::sync::atomic::Ordering::Relaxed))?, + )?)) + } +} + +impl Instant { + fn rdtsc() -> u64 { + let lo: u32; + let hi: u32; + unsafe { + asm!( + "rdtsc", + out("eax") lo, + out("edx") hi, + ); + } + (u64::from(hi) << 32) | u64::from(lo) + } + + fn now() -> Self { + Instant(Self::rdtsc()) + } +} + +impl litebox::platform::SystemTime for SystemTime { + const UNIX_EPOCH: Self = SystemTime(); + + fn duration_since( + &self, + _earlier: &Self, + ) -> Result { + unimplemented!() + } +} + +impl IPInterfaceProvider for LiteBoxKernel { + fn send_ip_packet(&self, _packet: &[u8]) -> Result<(), litebox::platform::SendError> { + unimplemented!() + } + + fn receive_ip_packet( + &self, + _packet: &mut [u8], + ) -> Result { + unimplemented!() + } +} + +impl PageManagementProvider for LiteBoxKernel { + const TASK_ADDR_MIN: usize = 0x1_0000; // default linux config + const TASK_ADDR_MAX: usize = 0x7FFF_FFFF_F000; // (1 << 47) - PAGE_SIZE; + + fn allocate_pages( + &self, + suggested_range: core::ops::Range, + initial_permissions: litebox::platform::page_mgmt::MemoryRegionPermissions, + can_grow_down: bool, + populate_pages_immediately: bool, + fixed_address_behavior: FixedAddressBehavior, + ) -> Result, litebox::platform::page_mgmt::AllocationError> { + let range = PageRange::new(suggested_range.start, suggested_range.end) + .ok_or(litebox::platform::page_mgmt::AllocationError::Unaligned)?; + match fixed_address_behavior { + FixedAddressBehavior::Hint | FixedAddressBehavior::NoReplace => {} + FixedAddressBehavior::Replace => { + // Clear the existing mappings first. + unsafe { self.page_table.unmap_pages(range, true).unwrap() }; + } + } + let flags = u32::from(initial_permissions.bits()) + | if can_grow_down { + litebox::mm::linux::VmFlags::VM_GROWSDOWN.bits() + } else { + 0 + }; + let flags = litebox::mm::linux::VmFlags::from_bits(flags).unwrap(); + Ok(self + .page_table + .map_pages(range, flags, populate_pages_immediately)) + } + + unsafe fn deallocate_pages( + &self, + range: core::ops::Range, + ) -> Result<(), litebox::platform::page_mgmt::DeallocationError> { + let range = PageRange::new(range.start, range.end) + .ok_or(litebox::platform::page_mgmt::DeallocationError::Unaligned)?; + unsafe { self.page_table.unmap_pages(range, true) } + } + + unsafe fn remap_pages( + &self, + old_range: core::ops::Range, + new_range: core::ops::Range, + _permissions: litebox::platform::page_mgmt::MemoryRegionPermissions, + ) -> Result, litebox::platform::page_mgmt::RemapError> { + let old_range = PageRange::new(old_range.start, old_range.end) + .ok_or(litebox::platform::page_mgmt::RemapError::Unaligned)?; + let new_range = PageRange::new(new_range.start, new_range.end) + .ok_or(litebox::platform::page_mgmt::RemapError::Unaligned)?; + if old_range.start.max(new_range.start) <= old_range.end.min(new_range.end) { + return Err(litebox::platform::page_mgmt::RemapError::Overlapping); + } + unsafe { self.page_table.remap_pages(old_range, new_range) } + } + + unsafe fn update_permissions( + &self, + range: core::ops::Range, + new_permissions: litebox::platform::page_mgmt::MemoryRegionPermissions, + ) -> Result<(), litebox::platform::page_mgmt::PermissionUpdateError> { + let range = PageRange::new(range.start, range.end) + .ok_or(litebox::platform::page_mgmt::PermissionUpdateError::Unaligned)?; + let new_flags = + litebox::mm::linux::VmFlags::from_bits(new_permissions.bits().into()).unwrap(); + unsafe { self.page_table.mprotect_pages(range, new_flags) } + } + + fn reserved_pages(&self) -> impl Iterator> { + // TODO: Consider whether we need to reserve some pages in the kernel context. + // For example, we might have to reserve some pages for hardware operations like + // memory-mapped I/O. + core::iter::empty() + } +} + +impl litebox::mm::linux::VmemPageFaultHandler for LiteBoxKernel { + unsafe fn handle_page_fault( + &self, + fault_addr: usize, + flags: litebox::mm::linux::VmFlags, + error_code: u64, + ) -> Result<(), litebox::mm::linux::PageFaultError> { + unsafe { + self.page_table + .handle_page_fault(fault_addr, flags, error_code) + } + } + + fn access_error(error_code: u64, flags: litebox::mm::linux::VmFlags) -> bool { + mm::PageTable::::access_error(error_code, flags) + } +} + +impl StdioProvider for LiteBoxKernel { + fn read_from_stdin(&self, _buf: &mut [u8]) -> Result { + unimplemented!() + } + + fn write_to( + &self, + _stream: litebox::platform::StdioOutStream, + _buf: &[u8], + ) -> Result { + unimplemented!() + } + + fn is_a_tty(&self, _stream: litebox::platform::StdioStream) -> bool { + unimplemented!() + } +} + +/// Runs a guest thread with the given initial context. +/// +/// # Safety +/// The context must be valid guest context. +/// # Panics +/// Panics if `gsbase` is larger than `u64::MAX`. +pub unsafe fn run_thread( + shim: impl litebox::shim::EnterShim, + ctx: &mut litebox_common_linux::PtRegs, +) { + // Currently, `litebox_platform_kernel` uses `swapgs` to efficiently switch between + // kernel and user GS base values during kernel-user mode transitions. + // This `swapgs` usage can pontetially leak a kernel address to the user, so + // we clear the `KernelGsBase` MSR before running the user thread. + crate::arch::write_kernel_gsbase_msr(VirtAddr::zero()); + run_thread_inner(&shim, ctx); +} + +struct ThreadContext<'a> { + shim: &'a dyn litebox::shim::EnterShim, + ctx: &'a mut litebox_common_linux::PtRegs, +} + +fn run_thread_inner( + shim: &dyn litebox::shim::EnterShim, + ctx: &mut litebox_common_linux::PtRegs, +) { + let ctx_ptr = core::ptr::from_mut(ctx); + let mut thread_ctx = ThreadContext { shim, ctx }; + unsafe { run_thread_arch(&mut thread_ctx, ctx_ptr) }; +} + +#[cfg(target_arch = "x86_64")] +#[unsafe(naked)] +unsafe extern "C" fn run_thread_arch( + thread_ctx: &mut ThreadContext, + ctx: *mut litebox_common_linux::PtRegs, +) { + core::arch::naked_asm!( + "push rbp", + "mov rbp, rsp", + "push rbx", + "push r12", + "push r13", + "push r14", + "push r15", + "push rdi", // save thread context + // Save host rsp and rbp and guest context top in TLS. + "mov gs:host_sp@tpoff, rsp", + "mov gs:host_bp@tpoff, rbp", + "lea r8, [rsi + {GUEST_CONTEXT_SIZE}]", + "mov gs:guest_context_top@tpoff, r8", + "call {init_handler}", + "jmp done", + ".globl syscall_callback", + "syscall_callback:", + "swapgs", + "mov r11, rsp", + "mov rsp, gs:guest_context_top@tpoff", + "push 0x33", // USER_DS + "push r11", + "pushfq", + "push 0x2b", // USER_CS + "push rcx", + "push rax", + "push rdi", + "push rsi", + "push rdx", + "push rcx", + "push -38", + "push r8", + "push r9", + "push r10", + "push [rsp + 88]", + "push rbx", + "push rbp", + "push r12", + "push r13", + "push r14", + "push r15", + "mov rsp, gs:host_sp@tpoff", + "mov rbp, gs:host_bp@tpoff", + "mov rdi, [rsp]", // pass thread_ctx + "call {syscall_handler}", + "jmp done", + "done:", + "mov rbp, gs:host_bp@tpoff", + "mov rsp, gs:host_sp@tpoff", + "lea rsp, [rbp - 5 * 8]", + "pop r15", + "pop r14", + "pop r13", + "pop r12", + "pop rbx", + "pop rbp", + "ret", + GUEST_CONTEXT_SIZE = const core::mem::size_of::(), + init_handler = sym init_handler, + syscall_handler = sym syscall_handler, + ); +} + +#[allow(dead_code)] +unsafe extern "C" { + // Defined in asm blocks above + fn syscall_callback() -> isize; +} + +#[allow(clippy::cast_sign_loss)] +unsafe extern "C" fn syscall_handler(thread_ctx: &mut ThreadContext) { + thread_ctx.call_shim(|shim, ctx| shim.syscall(ctx)); +} + +/// Calls `f` in order to call into a shim entrypoint. +impl ThreadContext<'_> { + fn call_shim( + &mut self, + f: impl FnOnce( + &dyn litebox::shim::EnterShim, + &mut litebox_common_linux::PtRegs, + ) -> ContinueOperation, + ) { + // TODO: clear the interrupt flag before calling the shim + let op = f(self.shim, self.ctx); + match op { + ContinueOperation::ResumeGuest => unsafe { switch_to_guest(self.ctx) }, + ContinueOperation::ExitThread => {} + } + } +} + +/// TODO: call shim init. +#[cfg(target_arch = "x86_64")] +fn init_handler(thread_ctx: &mut ThreadContext) { + thread_ctx.call_shim(|shim, ctx| shim.init(ctx)) +} + +#[cfg(target_arch = "x86_64")] +core::arch::global_asm!( + " + .section .tbss + .align 8 +scratch: + .quad 0 +scratch2: + .quad 0 +host_sp: + .quad 0 +host_bp: + .quad 0 +guest_context_top: + .quad 0 + " +); + +/// Switches to the provided guest context in kernel mode (for testing). +/// +/// # Safety +/// The context must be valid guest context. +/// +/// Do not call this at a point where the stack needs to be unwound to run +/// destructors. +#[allow(dead_code)] +#[cfg(target_arch = "x86_64")] +#[unsafe(naked)] +unsafe extern "C" fn switch_to_guest_kernel_mode(ctx: &litebox_common_linux::PtRegs) -> ! { + core::arch::naked_asm!( + // Restore guest context from ctx. + "mov rsp, rdi", + "pop r15", + "pop r14", + "pop r13", + "pop r12", + "pop rbp", + "pop rbx", + "pop r11", + "pop r10", + "pop r9", + "pop r8", + "pop rax", + "pop rcx", + "pop rdx", + "pop rsi", + "pop rdi", + "add rsp, 8", // skip orig_rax + "pop gs:scratch@tpoff", // read rip into scratch + "add rsp, 8", // skip cs + "popfq", + "pop rsp", + "jmp gs:scratch@tpoff", // jump to the guest + ); +} + +/// Switches to the provided guest context with the user mode. +/// +/// # Safety +/// The context must be valid guest context. +/// +/// Do not call this at a point where the stack needs to be unwound to run +/// destructors. +#[cfg(target_arch = "x86_64")] +unsafe extern "C" fn switch_to_guest(_ctx: &litebox_common_linux::PtRegs) -> ! { + unsafe { + core::arch::asm!( + // Flush TLB by reloading CR3 + "mov rax, cr3", + "mov cr3, rax", + "xor eax, eax", + // Restore guest context from ctx. + "mov rsp, rdi", + "pop r15", + "pop r14", + "pop r13", + "pop r12", + "pop rbp", + "pop rbx", + "pop r11", + "pop r10", + "pop r9", + "pop r8", + "pop rax", + "pop rcx", + "pop rdx", + "pop rsi", + "pop rdi", + "add rsp, 8", // skip orig_rax + // Stack already has all the values needed for iretq (rip, cs, flags, rsp, ds) + // from the PtRegs structure. + // clear the GS base register (as the `KernelGsBase` MSR contains 0) + // while writing the current GS base value to `KernelGsBase`. + "swapgs", + "iretq", + options(noreturn) + ); + } +} + +unsafe impl litebox::platform::ThreadLocalStorageProvider for LiteBoxKernel { + fn get_thread_local_storage() -> *mut () { + let tls = per_cpu_variables::with_per_cpu_variables_mut(|pcv| pcv.tls); + tls.as_mut_ptr::<()>() + } + + unsafe fn replace_thread_local_storage(value: *mut ()) -> *mut () { + per_cpu_variables::with_per_cpu_variables_mut(|pcv| { + let old = pcv.tls; + pcv.tls = x86_64::VirtAddr::new(value as u64); + old.as_u64() as *mut () + }) + } +} + +impl litebox::platform::CrngProvider for LiteBoxKernel { + fn fill_bytes_crng(&self, buf: &mut [u8]) { + // FIXME: generate real random data. + static RANDOM: spin::mutex::SpinMutex = + spin::mutex::SpinMutex::new(litebox::utils::rng::FastRng::new_from_seed( + core::num::NonZeroU64::new(0x4d595df4d0f33173).unwrap(), + )); + let mut random = RANDOM.lock(); + for b in buf.chunks_mut(8) { + b.copy_from_slice(&random.next_u64().to_ne_bytes()[..b.len()]); + } + } +} diff --git a/litebox_platform_kernel/src/mm/mod.rs b/litebox_platform_kernel/src/mm/mod.rs new file mode 100644 index 000000000..76f4c424b --- /dev/null +++ b/litebox_platform_kernel/src/mm/mod.rs @@ -0,0 +1,61 @@ +//! Memory management module + +use crate::arch::{PhysAddr, VirtAddr}; + +pub(crate) mod pgtable; + +#[cfg(test)] +pub mod tests; + +/// Memory provider trait for global allocator. +pub trait MemoryProvider { + /// Global virtual address offset for one-to-one mapping of physical memory + /// to kernel virtual memory. + const GVA_OFFSET: VirtAddr; + /// Mask for private page table entry (e.g., SNP encryption bit). + /// For simplicity, we assume the mask is constant. + const PRIVATE_PTE_MASK: u64; + + /// Allocate (1 << `order`) virtually and physically contiguous pages from global allocator. + fn mem_allocate_pages(order: u32) -> Option<*mut u8>; + + /// De-allocates virtually and physically contiguous pages returned from [`Self::mem_allocate_pages`]. + /// + /// # Safety + /// + /// The caller must ensure that the `ptr` is valid and was allocated by this allocator. + /// + /// `order` must be the same as the one used during allocation. + unsafe fn mem_free_pages(ptr: *mut u8, order: u32); + + /// Add a range of memory to global allocator. + /// Morally, the global allocator takes ownership of this range of memory. + /// + /// # Safety + /// + /// The caller must ensure that the memory range is valid and not used by any others. + unsafe fn mem_fill_pages(start: usize, size: usize); + + /// Obtain physical address (PA) of a page given its VA + fn va_to_pa(va: VirtAddr) -> PhysAddr { + PhysAddr::new_truncate(va - Self::GVA_OFFSET) + } + + /// Obtain virtual address (VA) of a page given its PA + fn pa_to_va(pa: PhysAddr) -> VirtAddr { + let pa = pa.as_u64() & !Self::PRIVATE_PTE_MASK; + VirtAddr::new_truncate(pa + Self::GVA_OFFSET.as_u64()) + } + + /// Set physical address as private via mask. + fn make_pa_private(pa: PhysAddr) -> PhysAddr { + PhysAddr::new_truncate(pa.as_u64() | Self::PRIVATE_PTE_MASK) + } +} + +#[cfg(all(target_arch = "x86_64", not(test)))] +pub type PageTable = + crate::arch::mm::paging::X64PageTable<'static, crate::LiteBoxKernel, ALIGN>; +#[cfg(all(target_arch = "x86_64", test))] +pub type PageTable = + crate::arch::mm::paging::X64PageTable<'static, crate::mock::MockKernel, ALIGN>; diff --git a/litebox_platform_kernel/src/mm/pgtable.rs b/litebox_platform_kernel/src/mm/pgtable.rs new file mode 100644 index 000000000..374b46051 --- /dev/null +++ b/litebox_platform_kernel/src/mm/pgtable.rs @@ -0,0 +1,80 @@ +use litebox::mm::linux::PageFaultError; + +use crate::arch::{ + Page, PageFaultErrorCode, PageSize, PageTableFlags, PhysAddr, PhysFrame, Size4KiB, VirtAddr, +}; + +/// Page table allocator +pub(crate) struct PageTableAllocator { + _provider: core::marker::PhantomData, +} + +impl Default for PageTableAllocator { + fn default() -> Self { + Self::new() + } +} + +impl PageTableAllocator { + pub fn new() -> Self { + Self { + _provider: core::marker::PhantomData, + } + } + + /// Allocate a frame + /// + /// # Panics + /// + /// Panics if the address is not correctly aligned (i.e. is not a valid frame start) + pub fn allocate_frame(clear: bool) -> Option> { + M::mem_allocate_pages(0).map(|addr| { + if clear { + unsafe { + core::slice::from_raw_parts_mut(addr, usize::try_from(Size4KiB::SIZE).unwrap()) + .fill(0); + } + } + PhysFrame::from_start_address(M::make_pa_private(M::va_to_pa(VirtAddr::new( + addr as u64, + )))) + .unwrap() + }) + } +} + +pub trait PageTableImpl { + /// Flags that `mprotect` can change: + /// [`PageTableFlags::WRITABLE`] | [`PageTableFlags::USER_ACCESSIBLE`] | [`PageTableFlags::NO_EXECUTE`] + const MPROTECT_PTE_MASK: PageTableFlags = PageTableFlags::from_bits_truncate( + PageTableFlags::WRITABLE.bits() + | PageTableFlags::USER_ACCESSIBLE.bits() + | PageTableFlags::NO_EXECUTE.bits(), + ); + + /// Initialize the page table with the physical address of the top-level page table. + /// + /// # Safety + /// + /// The caller must ensure that the `p` is valid and properly aligned. + unsafe fn init(p: PhysAddr) -> Self; + + /// Translate a virtual address to a physical address + #[cfg(test)] + fn translate(&self, addr: VirtAddr) -> crate::arch::TranslateResult; + + /// Handle page fault + /// + /// `flags` presents the PTE flags to be set for the page. + /// + /// # Safety + /// + /// The caller must also ensure that the `page` is valid and user has + /// access to it. + unsafe fn handle_page_fault( + &self, + page: Page, + flags: PageTableFlags, + error_code: PageFaultErrorCode, + ) -> Result<(), PageFaultError>; +} diff --git a/litebox_platform_kernel/src/mm/tests.rs b/litebox_platform_kernel/src/mm/tests.rs new file mode 100644 index 000000000..00243b3c9 --- /dev/null +++ b/litebox_platform_kernel/src/mm/tests.rs @@ -0,0 +1,298 @@ +use core::alloc::{GlobalAlloc, Layout}; + +use alloc::vec; +use alloc::vec::Vec; +use arrayvec::ArrayVec; +use litebox::{ + LiteBox, + mm::{ + PageManager, + allocator::SafeZoneAllocator, + linux::{PAGE_SIZE, PageFaultError, PageRange, VmFlags}, + }, + platform::RawConstPointer, +}; +use spin::mutex::SpinMutex; + +use crate::{ + arch::{ + MappedFrame, Page, PageFaultErrorCode, PageTableFlags, PhysAddr, TranslateResult, VirtAddr, + mm::paging::{X64PageTable, vmflags_to_pteflags}, + }, + mm::{MemoryProvider, pgtable::PageTableAllocator}, + mock::MockKernel, + mock_log_println, + ptr::UserMutPtr, +}; + +use super::pgtable::PageTableImpl; + +const MAX_ORDER: usize = 23; + +static ALLOCATOR: SafeZoneAllocator<'static, MAX_ORDER, MockKernel> = SafeZoneAllocator::new(); +/// const Array for VA to PA mapping +static MAPPING: SpinMutex> = SpinMutex::new(ArrayVec::new_const()); + +impl litebox::mm::allocator::MemoryProvider for MockKernel { + fn alloc(_layout: &core::alloc::Layout) -> Option<(usize, usize)> { + unimplemented!() + } + + unsafe fn free(_addr: usize) { + unimplemented!() + } +} + +impl super::MemoryProvider for MockKernel { + const GVA_OFFSET: super::VirtAddr = super::VirtAddr::new(0); + const PRIVATE_PTE_MASK: u64 = 0; + + fn mem_allocate_pages(order: u32) -> Option<*mut u8> { + ALLOCATOR.allocate_pages(order) + } + + unsafe fn mem_free_pages(ptr: *mut u8, order: u32) { + unsafe { ALLOCATOR.free_pages(ptr, order) } + } + + unsafe fn mem_fill_pages(start: usize, size: usize) { + unsafe { ALLOCATOR.fill_pages(start, size) } + } + + fn va_to_pa(va: VirtAddr) -> PhysAddr { + let idx = MAPPING.lock().iter().position(|x| *x == va); + assert!(idx.is_some()); + PhysAddr::new((idx.unwrap() * PAGE_SIZE + 0x1000_0000) as u64) + } + + fn pa_to_va(pa: PhysAddr) -> VirtAddr { + let mapping = MAPPING.lock(); + let idx = (pa.as_u64() - 0x1000_0000) / PAGE_SIZE as u64; + let va = mapping.get(usize::try_from(idx).unwrap()); + assert!(va.is_some()); + let va = *va.unwrap(); + if va.is_null() { + mock_log_println!("Invalid PA"); + panic!("Invalid PA"); + } + va + } +} + +#[ignore = "test code is not ready"] +#[test] +fn test_buddy() { + let ptr = MockKernel::mem_allocate_pages(1); + assert!(ptr.is_some_and(|p| p as usize != 0)); + unsafe { + MockKernel::mem_free_pages(ptr.unwrap(), 1); + } +} + +#[ignore = "test code is not ready"] +#[test] +fn test_slab() { + unsafe { + let ptr1 = ALLOCATOR.alloc(Layout::from_size_align(0x1000, 0x1000).unwrap()); + assert!(ptr1 as usize != 0); + let ptr2 = ALLOCATOR.alloc(Layout::from_size_align(0x10, 0x10).unwrap()); + assert!(ptr2 as usize != 0); + ALLOCATOR.dealloc(ptr1, Layout::from_size_align(0x1000, 0x1000).unwrap()); + ALLOCATOR.dealloc(ptr2, Layout::from_size_align(0x10, 0x10).unwrap()); + } +} + +fn check_flags( + pgtable: &X64PageTable<'_, MockKernel, PAGE_SIZE>, + addr: usize, + flags: PageTableFlags, +) { + match pgtable.translate(VirtAddr::new(addr as _)) { + TranslateResult::Mapped { + frame, + offset, + flags: f, + } => { + assert!(matches!(frame, MappedFrame::Size4KiB(_))); + assert_eq!(offset, 0); + assert_eq!(flags, f); + } + other => panic!("unexpected: {other:?}"), + } +} + +fn get_test_pgtable<'a>( + range: PageRange, + fault_flags: PageTableFlags, +) -> X64PageTable<'a, MockKernel, PAGE_SIZE> { + let p4 = PageTableAllocator::::allocate_frame(true).unwrap(); + let pgtable = unsafe { X64PageTable::::init(p4.start_address()) }; + + for page in range { + unsafe { + pgtable + .handle_page_fault( + Page::containing_address(VirtAddr::new(page as _)), + fault_flags, + PageFaultErrorCode::USER_MODE, + ) + .unwrap(); + } + } + + for page in range { + check_flags(&pgtable, page, fault_flags); + } + + pgtable +} + +#[ignore = "test code is not ready"] +#[test] +fn test_page_table() { + let start_addr: usize = 0x1000; + let vmflags = VmFlags::VM_READ; + let pteflags = vmflags_to_pteflags(vmflags) | PageTableFlags::PRESENT; + let range = PageRange::new(start_addr, start_addr + 4 * PAGE_SIZE).unwrap(); + let pgtable = get_test_pgtable(range, pteflags); + + // update flags + let new_vmflags = VmFlags::empty(); + let new_pteflags = vmflags_to_pteflags(new_vmflags) | PageTableFlags::PRESENT; + unsafe { + assert!( + pgtable + .mprotect_pages( + PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 6 * PAGE_SIZE).unwrap(), + new_vmflags + ) + .is_ok() + ); + } + for page in PageRange::::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap() { + check_flags(&pgtable, page, pteflags); + } + for page in + PageRange::::new(start_addr + 2 * PAGE_SIZE, start_addr + 4 * PAGE_SIZE).unwrap() + { + check_flags(&pgtable, page, new_pteflags); + } + + // remap pages + let new_addr: usize = 0x20_1000; + unsafe { + assert!( + pgtable + .remap_pages( + PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), + PageRange::new(new_addr, new_addr + 2 * PAGE_SIZE).unwrap() + ) + .is_ok() + ); + } + for page in PageRange::::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap() { + assert!(matches!( + pgtable.translate(VirtAddr::new(page as _)), + TranslateResult::NotMapped + )); + } + for page in PageRange::::new(new_addr, new_addr + 2 * PAGE_SIZE).unwrap() { + check_flags(&pgtable, page, pteflags); + } + + // unmap all pages + let range = PageRange::new(start_addr, new_addr + 4 * PAGE_SIZE).unwrap(); + unsafe { pgtable.unmap_pages(range, true) }.unwrap(); + for page in PageRange::::new(start_addr, new_addr + 4 * PAGE_SIZE).unwrap() { + assert!(matches!( + pgtable.translate(VirtAddr::new(page as _)), + TranslateResult::NotMapped + )); + } +} + +#[ignore = "test code is not ready"] +#[test] +fn test_vmm_page_fault() { + let start_addr: usize = 0x1_0000; + let p4 = PageTableAllocator::::allocate_frame(true).unwrap(); + let platform = MockKernel::new(p4.start_address()); + let litebox = LiteBox::new(platform); + let vmm = PageManager::<_, PAGE_SIZE>::new(&litebox); + unsafe { + assert_eq!( + vmm.create_writable_pages( + Some(litebox::mm::linux::NonZeroAddress::new(start_addr).unwrap()), + litebox::mm::linux::NonZeroPageSize::new(4 * PAGE_SIZE).unwrap(), + litebox::mm::linux::CreatePagesFlags::FIXED_ADDR, + |_: UserMutPtr| Ok(0), + ) + .unwrap() + .as_usize(), + start_addr + ); + } + // [0x1_0000, 0x1_4000) + + // Access page w/o mapping + assert!(matches!( + unsafe { + vmm.handle_page_fault( + start_addr + 6 * PAGE_SIZE, + PageFaultErrorCode::USER_MODE.bits(), + ) + }, + Err(PageFaultError::AccessError(_)) + )); + + // Access non-present page w/ mapping + assert!( + unsafe { + vmm.handle_page_fault( + start_addr + 2 * PAGE_SIZE, + PageFaultErrorCode::USER_MODE.bits(), + ) + } + .is_ok() + ); + + // insert stack mapping + let stack_addr: usize = 0x1000_0000; + unsafe { + assert_eq!( + vmm.create_stack_pages( + Some(litebox::mm::linux::NonZeroAddress::new(stack_addr).unwrap()), + litebox::mm::linux::NonZeroPageSize::new(4 * PAGE_SIZE).unwrap(), + litebox::mm::linux::CreatePagesFlags::FIXED_ADDR, + ) + .unwrap() + .as_usize(), + stack_addr + ); + } + // [0x1_0000, 0x1_4000), [0x1000_0000, 0x1000_4000) + // Test stack growth + assert!( + unsafe { + vmm.handle_page_fault(stack_addr - PAGE_SIZE, PageFaultErrorCode::USER_MODE.bits()) + } + .is_ok() + ); + assert_eq!( + vmm.mappings() + .iter() + .map(|v| v.0.clone()) + .collect::>(), + vec![0x1_0000..0x1_4000, 0x0fff_f000..0x1000_4000] + ); + // Cannot grow stack too far + assert!(matches!( + unsafe { + vmm.handle_page_fault( + start_addr + 100 * PAGE_SIZE, + PageFaultErrorCode::USER_MODE.bits(), + ) + }, + Err(PageFaultError::AllocationFailed) + )); +} diff --git a/litebox_platform_kernel/src/mock.rs b/litebox_platform_kernel/src/mock.rs new file mode 100644 index 000000000..4b1a0bfc2 --- /dev/null +++ b/litebox_platform_kernel/src/mock.rs @@ -0,0 +1,10 @@ +pub type MockKernel = crate::LiteBoxKernel; + +#[macro_export] +macro_rules! mock_log_println { + ($($tt:tt)*) => {{ + use core::fmt::Write; + let mut t: arrayvec::ArrayString<1024> = arrayvec::ArrayString::new(); + writeln!(t, $($tt)*).unwrap(); + }}; +} diff --git a/litebox_platform_kernel/src/per_cpu_variables.rs b/litebox_platform_kernel/src/per_cpu_variables.rs new file mode 100644 index 000000000..30d154077 --- /dev/null +++ b/litebox_platform_kernel/src/per_cpu_variables.rs @@ -0,0 +1,254 @@ +//! Per-CPU kernel variables + +use crate::arch::{MAX_CORES, gdt, get_core_id}; +use aligned_vec::avec; +use alloc::boxed::Box; +use core::cell::RefCell; +use litebox_common_linux::{rdgsbase, wrgsbase}; +use x86_64::VirtAddr; + +const PAGE_SIZE: usize = 4096; +pub const INTERRUPT_STACK_SIZE: usize = 2 * PAGE_SIZE; +pub const KERNEL_STACK_SIZE: usize = 10 * PAGE_SIZE; + +/// Per-CPU kernel variables +#[repr(align(4096))] +#[derive(Clone, Copy)] +pub struct PerCpuVariables { + interrupt_stack: [u8; INTERRUPT_STACK_SIZE], + _guard_page_0: [u8; PAGE_SIZE], + kernel_stack: [u8; KERNEL_STACK_SIZE], + _guard_page_1: [u8; PAGE_SIZE], + pub gdt: Option<&'static gdt::GdtWrapper>, + xsave_area_addr: VirtAddr, + pub tls: VirtAddr, +} + +impl PerCpuVariables { + const XSAVE_ALIGNMENT: usize = 64; // XSAVE and XRSTORE require a 64-byte aligned buffer + const XSAVE_MASK: u64 = 0b11; // let XSAVE and XRSTORE deal with x87 and SSE states + + pub fn kernel_stack_top(&self) -> u64 { + &raw const self.kernel_stack as u64 + (self.kernel_stack.len() - 1) as u64 + } + + pub(crate) fn interrupt_stack_top(&self) -> u64 { + &raw const self.interrupt_stack as u64 + (self.interrupt_stack.len() - 1) as u64 + } + + /// Return kernel code, user code, and user data segment selectors + pub(crate) fn get_segment_selectors(&self) -> Option<(u16, u16, u16)> { + self.gdt.map(gdt::GdtWrapper::get_segment_selectors) + } + + /// Allocate XSAVE areas for saving/restoring the extended states of each core. + /// These buffers are allocated once and never deallocated. + #[expect(dead_code)] + pub(crate) fn allocate_xsave_area(&mut self) { + assert!( + self.xsave_area_addr.is_null(), + "XSAVE areas are already allocated" + ); + let xsave_area_size = get_xsave_area_size(); + // Leaking `xsave_area` buffers are okay because they are never reused + // until the core gets reset. + let xsave_area = Box::leak( + avec![[{ Self::XSAVE_ALIGNMENT }] | 0u8; xsave_area_size] + .into_boxed_slice() + .into(), + ); + self.xsave_area_addr = VirtAddr::new(xsave_area.as_ptr() as u64); + } + + #[expect(dead_code)] + pub(crate) fn save_extended_states(&self) { + if self.xsave_area_addr.is_null() { + panic!("XSAVE areas are not allocated"); + } else { + let xsave_area_addr = self.xsave_area_addr.as_u64(); + unsafe { + core::arch::asm!( + "xsaveopt [{}]", + in(reg) xsave_area_addr, + in("eax") Self::XSAVE_MASK & 0xffff_ffff, + in("edx") (Self::XSAVE_MASK & 0xffff_ffff_0000_0000) >> 32, + options(nostack, preserves_flags) + ); + } + } + } + + /// Restore the extended states of each core + #[expect(dead_code)] + pub(crate) fn restore_extended_states(&self) { + if self.xsave_area_addr.is_null() { + panic!("XSAVE areas are not allocated"); + } else { + let xsave_area_addr = self.xsave_area_addr.as_u64(); + unsafe { + core::arch::asm!( + "xrstor [{}]", + in(reg) xsave_area_addr, + in("eax") Self::XSAVE_MASK & 0xffff_ffff, + in("edx") (Self::XSAVE_MASK & 0xffff_ffff_0000_0000) >> 32, + options(nostack, preserves_flags) + ); + } + } + } +} + +/// per-CPU variables for core 0 (or BSP). This must use static memory because kernel heap is not ready. +static mut BSP_VARIABLES: PerCpuVariables = PerCpuVariables { + interrupt_stack: [0u8; INTERRUPT_STACK_SIZE], + _guard_page_0: [0u8; PAGE_SIZE], + kernel_stack: [0u8; KERNEL_STACK_SIZE], + _guard_page_1: [0u8; PAGE_SIZE], + gdt: const { None }, + xsave_area_addr: VirtAddr::zero(), + tls: VirtAddr::zero(), +}; + +/// Store the addresses of per-CPU variables. The kernel threads are expected to access +/// the corresponding per-CPU variables via the GS registers which will store the addresses later. +/// Instead of maintaining this map, we might be able to use a hypercall to directly program each core's GS register. +static mut PER_CPU_VARIABLE_ADDRESSES: [RefCell<*mut PerCpuVariables>; MAX_CORES] = + [const { RefCell::new(core::ptr::null_mut()) }; MAX_CORES]; + +/// Execute a closure with a reference to the current core's per-CPU variables. +/// +/// # Safety +/// This function assumes the following: +/// - The GSBASE register values of individual cores must be properly set (i.e., they must be different). +/// - `get_core_id()` must return distinct APIC IDs for different cores. +/// +/// If we cannot guarantee these assumptions, this function may result in unsafe or undefined behaviors. +/// +/// # Panics +/// Panics if GSBASE is not set, it contains a non-canonical address, or no per-CPU variables are allocated. +/// Panics if this function is recursively called (`BorrowMutError`). +pub fn with_per_cpu_variables(f: F) -> R +where + F: FnOnce(&PerCpuVariables) -> R, + R: Sized + 'static, +{ + let Some(refcell) = get_or_init_refcell_of_per_cpu_variables() else { + panic!("No per-CPU variables are allocated"); + }; + let borrow = refcell.borrow(); + let per_cpu_variables = unsafe { &**borrow }; + + f(per_cpu_variables) +} + +/// Execute a closure with a mutable reference to the current core's per-CPU variables. +/// +/// # Safety +/// This function assumes the following: +/// - The GSBASE register values of individual cores must be properly set (i.e., they must be different). +/// - `get_core_id()` must return distinct APIC IDs for different cores. +/// +/// If we cannot guarantee these assumptions, this function may result in unsafe or undefined behaviors. +/// +/// # Panics +/// Panics if GSBASE is not set, it contains a non-canonical address, or no per-CPU variables are allocated. +/// Panics if this function is recursively called (`BorrowMutError`). +pub fn with_per_cpu_variables_mut(f: F) -> R +where + F: FnOnce(&mut PerCpuVariables) -> R, + R: Sized + 'static, +{ + let Some(refcell) = get_or_init_refcell_of_per_cpu_variables() else { + panic!("No per-CPU variables are allocated"); + }; + let mut borrow = refcell.borrow_mut(); + let per_cpu_variables = unsafe { &mut **borrow }; + + f(per_cpu_variables) +} + +/// Get or initialize a `RefCell` that contains a pointer to the current core's per-CPU variables. +/// This `RefCell` is expected to be stored in the GS register. +fn get_or_init_refcell_of_per_cpu_variables() -> Option<&'static RefCell<*mut PerCpuVariables>> { + let gsbase = unsafe { rdgsbase() }; + if gsbase == 0 { + let core_id = get_core_id(); + let refcell = if core_id == 0 { + let addr = &raw mut BSP_VARIABLES; + unsafe { + PER_CPU_VARIABLE_ADDRESSES[0] = RefCell::new(addr); + &PER_CPU_VARIABLE_ADDRESSES[0] + } + } else { + unsafe { &PER_CPU_VARIABLE_ADDRESSES[core_id] } + }; + if refcell.borrow().is_null() { + None + } else { + let addr = x86_64::VirtAddr::new(&raw const *refcell as u64); + unsafe { + wrgsbase(usize::try_from(addr.as_u64()).unwrap()); + } + Some(refcell) + } + } else { + let addr = x86_64::VirtAddr::try_new(u64::try_from(gsbase).unwrap()) + .expect("GS contains a non-canonical address"); + let refcell = unsafe { &*addr.as_ptr::>() }; + if refcell.borrow().is_null() { + None + } else { + Some(refcell) + } + } +} + +// Allocate per-CPU variables in heap for all possible cores. We expect that the BSP will call +// this function to allocate per-CPU variables for other APs because our per-CPU variables are +// huge such that each AP without a proper stack cannot allocate its own per-CPU variables. +// # Panics +// Panics if the number of possible CPUs exceeds `MAX_CORES` +// pub fn allocate_per_cpu_variables() { +// let num_cores = +// usize::try_from(get_num_possible_cpus().expect("Failed to get number of possible CPUs")) +// .unwrap(); +// assert!( +// num_cores <= MAX_CORES, +// "# of possible CPUs ({num_cores}) exceeds MAX_CORES", +// ); + +// with_per_cpu_variables_mut(|per_cpu_variables| { +// per_cpu_variables.allocate_xsave_area(); +// }); + +// // TODO: use `cpu_online_mask` to selectively allocate per-CPU variables only for online CPUs. +// // Note. `PER_CPU_VARIABLE_ADDRESSES[0]` is expected to be already initialized to point to +// // `BSP_VARIABLES` before calling this function by `get_or_init_refcell_of_per_cpu_variables()`. +// #[allow(clippy::needless_range_loop)] +// for i in 1..num_cores { +// let mut per_cpu_variables = Box::::new_uninit(); +// // Safety: `PerCpuVariables` is larger than the stack size, so we manually `memset` it to zero. +// let per_cpu_variables = unsafe { +// let ptr = per_cpu_variables.as_mut_ptr(); +// ptr.write_bytes(0, 1); +// (*ptr).allocate_xsave_area(); +// per_cpu_variables.assume_init() +// }; +// unsafe { +// PER_CPU_VARIABLE_ADDRESSES[i] = RefCell::new(Box::into_raw(per_cpu_variables)); +// } +// } +// } + +/// Get the XSAVE area size based on enabled features (XCR0) +fn get_xsave_area_size() -> usize { + let cpuid = raw_cpuid::CpuId::new(); + let finfo = cpuid + .get_feature_info() + .expect("Failed to get cpuid feature info"); + assert!(finfo.has_xsave(), "XSAVE is not supported"); + let sinfo = cpuid + .get_extended_state_info() + .expect("Failed to get cpuid extended state info"); + usize::try_from(sinfo.xsave_area_size_enabled_features()).unwrap() +} diff --git a/litebox_platform_kernel/src/ptr.rs b/litebox_platform_kernel/src/ptr.rs new file mode 100644 index 000000000..a12f85911 --- /dev/null +++ b/litebox_platform_kernel/src/ptr.rs @@ -0,0 +1,210 @@ +//! Userspace Pointer Abstraction +//! additional sanity checks and extra mode switches (e.g., SMAP/SMEP) + +use litebox::platform::{RawConstPointer, RawMutPointer}; + +/// Represent a user space pointer to a read-only object +#[repr(C)] +#[derive(Clone)] +pub struct UserConstPtr { + pub inner: *const T, +} + +impl core::fmt::Debug for UserConstPtr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("UserConstPtr").field(&self.inner).finish() + } +} + +unsafe fn read_at_offset<'a, T: Clone>( + ptr: *const T, + count: isize, +) -> Option> { + if ptr.is_null() { + return None; + } + if ptr.is_aligned() { + Some(alloc::borrow::Cow::Borrowed(unsafe { &*ptr.offset(count) })) + } else { + // TODO: consider whether we should use `litebox_platform_linux_kernel`'s `memcpy_fallible`. + // `litebox_platform_kernel` currently preallocates all memory, so there would be no page fault. + let mut buffer = core::mem::MaybeUninit::::uninit(); + let buffer = unsafe { + core::ptr::copy_nonoverlapping( + ptr.offset(count).cast::(), + buffer.as_mut_ptr().cast::(), + core::mem::size_of::(), + ); + buffer.assume_init() + }; + Some(alloc::borrow::Cow::Owned(buffer)) + } +} + +unsafe fn to_cow_slice<'a, T: Clone>( + ptr: *const T, + len: usize, +) -> Option> { + if ptr.is_null() { + return None; + } + if len == 0 { + return Some(alloc::borrow::Cow::Owned(alloc::vec::Vec::new())); + } + if ptr.is_aligned() { + Some(alloc::borrow::Cow::Borrowed(unsafe { + core::slice::from_raw_parts(ptr, len) + })) + } else { + // TODO: consider whether we should need `litebox_platform_linux_kernel`'s `memcpy_fallible`. + // `litebox_platform` currently preallocates all memory, so there would be no page fault. + let mut buffer = alloc::vec::Vec::::with_capacity(len); + unsafe { + core::ptr::copy_nonoverlapping( + ptr.cast::(), + buffer.as_mut_ptr().cast::(), + len * core::mem::size_of::(), + ); + buffer.set_len(len); + } + Some(alloc::borrow::Cow::Owned(buffer)) + } +} + +impl Copy for UserConstPtr {} +impl RawConstPointer for UserConstPtr { + unsafe fn read_at_offset<'a>(self, count: isize) -> Option> { + unsafe { read_at_offset(self.inner, count) } + } + + unsafe fn to_cow_slice<'a>(self, len: usize) -> Option> { + unsafe { to_cow_slice(self.inner, len) } + } + + fn as_usize(&self) -> usize { + self.inner.expose_provenance() + } + fn from_usize(addr: usize) -> Self { + Self { + inner: core::ptr::with_exposed_provenance(addr), + } + } +} + +impl UserConstPtr { + /// Check if it's null + pub fn is_null(self) -> bool { + self.inner.is_null() + } + + /// Read from user space at the `off` offset + pub fn from_user_at_offset(self, off: isize) -> Option { + unsafe { Some(self.read_at_offset(off)?.into_owned()) } + } +} + +/// Represent a user space pointer to a mutable object +#[repr(C)] +#[derive(Clone)] +pub struct UserMutPtr { + pub inner: *mut T, +} + +impl core::fmt::Debug for UserMutPtr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("UserMutPtr").field(&self.inner).finish() + } +} + +impl Copy for UserMutPtr {} +impl RawConstPointer for UserMutPtr { + unsafe fn read_at_offset<'a>(self, count: isize) -> Option> { + unsafe { read_at_offset(self.inner, count) } + } + + unsafe fn to_cow_slice<'a>(self, len: usize) -> Option> { + unsafe { to_cow_slice(self.inner, len) } + } + + fn as_usize(&self) -> usize { + self.inner.expose_provenance() + } + fn from_usize(addr: usize) -> Self { + Self { + inner: core::ptr::with_exposed_provenance_mut(addr), + } + } +} + +impl RawMutPointer for UserMutPtr { + unsafe fn write_at_offset(self, count: isize, value: T) -> Option<()> { + if self.inner.is_null() { + return None; + } + if self.inner.is_aligned() { + unsafe { + *self.inner.offset(count) = value; + } + } else { + unsafe { + core::ptr::copy_nonoverlapping( + (&raw const value).cast::(), + self.inner.offset(count).cast::(), + core::mem::size_of::(), + ); + } + } + Some(()) + } + + fn mutate_subslice_with( + self, + range: impl core::ops::RangeBounds, + f: impl FnOnce(&mut [T]) -> R, + ) -> Option { + if self.inner.is_null() || !self.inner.is_aligned() { + return None; + } + let start = match range.start_bound() { + core::ops::Bound::Included(&x) => x, + core::ops::Bound::Excluded(_) => unreachable!(), + core::ops::Bound::Unbounded => 0, + }; + let end = match range.end_bound() { + core::ops::Bound::Included(&x) => x.checked_add(1)?, + core::ops::Bound::Excluded(&x) => x, + core::ops::Bound::Unbounded => { + return None; + } + }; + let len = if start <= end { + start.abs_diff(end) + } else { + return None; + }; + let _ = start.checked_mul(size_of::().try_into().ok()?)?; + let data = unsafe { self.inner.offset(start) }; + let _ = isize::try_from(len.checked_mul(size_of::())?).ok()?; + let slice = unsafe { core::slice::from_raw_parts_mut(data, len) }; + Some(f(slice)) + } +} + +impl UserMutPtr { + /// Check if it's null + pub fn is_null(self) -> bool { + self.inner.is_null() + } + + /// Write to user space at the `off` offset + pub fn to_user_at_offset(self, off: isize, value: T) -> Option<()> { + unsafe { self.write_at_offset(off, value) } + } + + /// Cast to a pointer with different underlying type + pub fn cast(self) -> UserMutPtr { + UserMutPtr { + inner: self.inner.cast(), + } + } +} diff --git a/litebox_platform_kernel/src/syscall_entry.rs b/litebox_platform_kernel/src/syscall_entry.rs new file mode 100644 index 000000000..d918eb9d5 --- /dev/null +++ b/litebox_platform_kernel/src/syscall_entry.rs @@ -0,0 +1,165 @@ +use crate::per_cpu_variables::with_per_cpu_variables; +use core::arch::naked_asm; +use litebox_common_linux::PtRegs; +use litebox_common_optee::SyscallContext; +use x86_64::{ + VirtAddr, + registers::{ + model_specific::{Efer, EferFlags, LStar, SFMask, Star}, + rflags::RFlags, + }, +}; + +// Generic x86_64 syscall support with a minor extension for realizing OP-TEE's +// up to 8 syscall arguments (r12 and r13 for the 6th and 7th arguments). +// +// rax: system call number +// rdi: arg0 +// rsi: arg1 +// rdx: arg2 +// r10: arg3 +// r8: arg4 +// r9: arg5 +// r12: arg6 (*) +// r13: arg7 (*) +// +// the `syscall` instruction automatically sets the following registers: +// rcx: userspace return address (note. arg3 for normal func call) +// r11: userspace rflags +// +// the `sysretq` instruction uses the following registers: +// rax: syscall return value +// rcx: userspace return address +// r11: userspace rflags +// Note. rsp should point to the userspace stack before calling `sysretq` + +pub(crate) static SHIM: spin::Once< + &'static dyn litebox::shim::EnterShim, +> = spin::Once::new(); + +#[cfg(target_arch = "x86_64")] +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct SyscallContextRaw { + rdi: u64, // arg0 + rsi: u64, // arg1 + rdx: u64, // arg2 + r10: u64, // arg3 + r8: u64, // arg4 + r9: u64, // arg5 + r12: u64, // arg6 + r13: u64, // arg7 + rcx: u64, // userspace return address + r11: u64, // userspace rflags + rsp: u64, // userspace stack pointer +} + +impl SyscallContextRaw { + /// # Panics + /// Panics if the index is out of bounds (greater than 7). + pub fn arg_index(&self, index: usize) -> u64 { + match index { + 0 => self.rdi, + 1 => self.rsi, + 2 => self.rdx, + 3 => self.r10, + 4 => self.r8, + 5 => self.r9, + 6 => self.r12, + 7 => self.r13, + _ => panic!("BUG: Invalid syscall argument index: {}", index), + } + } + + pub fn user_rip(&self) -> Option { + VirtAddr::try_new(self.rcx).ok() + } + + pub fn user_rflags(&self) -> RFlags { + RFlags::from_bits_truncate(self.r11) + } + + pub fn user_rsp(&self) -> Option { + VirtAddr::try_new(self.rsp).ok() + } + + #[expect(clippy::cast_possible_truncation)] + pub fn syscall_context(&self) -> SyscallContext { + SyscallContext::new(&[ + self.rdi as usize, + self.rsi as usize, + self.rdx as usize, + self.r10 as usize, + self.r8 as usize, + self.r9 as usize, + self.r12 as usize, + self.r13 as usize, + ]) + } + + #[expect(clippy::cast_possible_truncation)] + pub fn to_pt_regs(&self, rax: u64) -> PtRegs { + PtRegs { + r15: 0, + r14: 0, + r13: self.r13 as usize, + r12: self.r12 as usize, + rbp: 0, + rbx: 0, + r11: self.r11 as usize, + r10: self.r10 as usize, + r9: self.r9 as usize, + r8: self.r8 as usize, + rax: 0, + rcx: self.rcx as usize, + rdx: self.rdx as usize, + rsi: self.rsi as usize, + rdi: self.rdi as usize, + orig_rax: rax as usize, + rip: 0, + cs: 0, + eflags: 0, + rsp: self.rsp as usize, + ss: 0, + } + } +} + +#[unsafe(naked)] +unsafe extern "C" fn syscall_entry_wrapper() { + naked_asm!("jmp syscall_callback"); +} + +/// This function enables 64-bit syscall extensions and sets up the necessary MSRs. +/// It must be called for each core. +/// # Panics +/// Panics if GDT is not initialized for the current core. +#[cfg(target_arch = "x86_64")] +pub(crate) fn init(shim: &'static dyn litebox::shim::EnterShim) { + SHIM.call_once(|| shim); + + // enable 64-bit syscall/sysret + let mut efer = Efer::read(); + efer.insert(EferFlags::SYSTEM_CALL_EXTENSIONS); + unsafe { Efer::write(efer) }; + + let syscall_entry_addr = syscall_entry_wrapper as *const () as u64; + LStar::write(VirtAddr::new(syscall_entry_addr)); + + let rflags = RFlags::INTERRUPT_FLAG; + SFMask::write(rflags); + + // configure STAR MSR for CS/SS selectors + let (kernel_cs, user_cs, _) = with_per_cpu_variables(|per_cpu_variables| { + per_cpu_variables + .get_segment_selectors() + .expect("GDT not initialized for the current core") + }); + unsafe { Star::write_raw(user_cs, kernel_cs) }; +} + +#[cfg(target_arch = "x86")] +pub(crate) fn init(_syscall_handler: SyscallHandler) { + todo!("we don't support 32-bit mode syscalls for now"); + // AMD and Intel CPUs have different syscall mechanisms in 32-bit mode. +} diff --git a/litebox_platform_kernel/src/user_context.rs b/litebox_platform_kernel/src/user_context.rs new file mode 100644 index 000000000..bda391d91 --- /dev/null +++ b/litebox_platform_kernel/src/user_context.rs @@ -0,0 +1,110 @@ +//! User context +//! A user context is created for process, TA session, task, or something like that. + +use crate::LiteBoxKernel; +use hashbrown::HashMap; +use litebox_common_linux::errno::Errno; +use x86_64::{VirtAddr, registers::rflags::RFlags}; + +const PAGE_SIZE: usize = 4096; + +/// TODO: Let us consider how to manage multiple user contexts (do we need to maintain inside the platform? +/// can the runner manage this?) For now, this is mostly a placeholder without any meaningful functionality. +/// UserSpace management trait for creating and managing a separate address space for a user process, task, or session. +/// Define it as a trait because it might need to work for various configurations like different page sizes. +#[allow(dead_code)] +pub trait UserSpaceManagement { + /// Create a new user address space (i.e., a new user page table) and context, and returns `userspace_id` for it. + /// The page table also maps the kernel address space (the entire physical space for now, a portion of it in the future) + /// for handling system calls. + fn create_userspace(&self) -> Result; + + /// Delete resources associated with the userspace (`userspace_id`) including its context and page tables. + /// + /// # Safety + /// The caller must ensure that any virtual address pages assigned to this userspace must be unmapped through + /// `LiteBox::PageManager` before calling this function. Otherwise, there will be a memory leak. `PageManager` + /// manages every virtual address page allocated through or for the Shim and apps. + fn delete_userspace(&self, userspace_id: usize) -> Result<(), Errno>; + + /// Check whether the userspace with the given `userspace_id` exists. + fn check_userspace(&self, userspace_id: usize) -> bool; +} + +/// Data structure to hold user context information. All other registers will be stored into a user stack +/// (pointed by `rsp`) and restored by the system call or interrupt handler. +/// TODO: Since the user stack might have no space to store all registers, we can extend this structure in +/// the future to store these registers. +pub struct UserContext { + pub page_table: crate::mm::PageTable, + pub rip: VirtAddr, + pub rsp: VirtAddr, + pub rflags: RFlags, +} + +impl UserContext { + /// Create a new user context with the given user page table + #[allow(dead_code)] + pub fn new(user_pt: crate::mm::PageTable) -> Self { + UserContext { + page_table: user_pt, + rip: VirtAddr::new(0), + rsp: VirtAddr::new(0), + rflags: RFlags::INTERRUPT_FLAG, + } + } +} + +/// Data structure to hold a map of user contexts indexed by their ID. +pub struct UserContextMap { + inner: spin::mutex::SpinMutex>, +} + +impl UserContextMap { + pub fn new() -> Self { + UserContextMap { + inner: spin::mutex::SpinMutex::new(HashMap::new()), + } + } +} + +impl Default for UserContextMap { + fn default() -> Self { + Self::new() + } +} + +impl UserSpaceManagement for LiteBoxKernel { + fn create_userspace(&self) -> Result { + let mut inner = self.user_contexts.inner.lock(); + let userspace_id = match inner.keys().max() { + Some(&id) => id.checked_add(1).ok_or(Errno::ENOMEM)?, + None => 1usize, + }; + let user_pt = self.new_user_page_table(); + + let user_ctx: UserContext = UserContext::new(user_pt); + inner.insert(userspace_id, user_ctx); + Ok(userspace_id) + } + + fn delete_userspace(&self, userspace_id: usize) -> Result<(), Errno> { + let mut inner = self.user_contexts.inner.lock(); + let user_pt = inner.get(&userspace_id).unwrap(); + + unsafe { + user_pt.page_table.clean_up(); + } + + let _ = inner.remove(&userspace_id); + Ok(()) + } + + fn check_userspace(&self, userspace_id: usize) -> bool { + let inner = self.user_contexts.inner.lock(); + if inner.contains_key(&userspace_id) { + return true; + } + false + } +} diff --git a/litebox_platform_lvbs/src/host/lvbs_impl.rs b/litebox_platform_lvbs/src/host/lvbs_impl.rs index 354062a09..d3111da3e 100644 --- a/litebox_platform_lvbs/src/host/lvbs_impl.rs +++ b/litebox_platform_lvbs/src/host/lvbs_impl.rs @@ -74,8 +74,11 @@ unsafe impl litebox::platform::ThreadLocalStorageProvider for LvbsLinuxKernel { } unsafe fn replace_thread_local_storage(value: *mut ()) -> *mut () { - let tls = with_per_cpu_variables_mut(|pcv| pcv.tls); - core::mem::replace(&mut tls.as_mut_ptr::<()>(), value.cast()).cast() + with_per_cpu_variables_mut(|pcv| { + let old = pcv.tls; + pcv.tls = x86_64::VirtAddr::new(value as u64); + old.as_u64() as *mut () + }) } } diff --git a/litebox_platform_multiplex/Cargo.toml b/litebox_platform_multiplex/Cargo.toml index 53abe0148..04c70c101 100644 --- a/litebox_platform_multiplex/Cargo.toml +++ b/litebox_platform_multiplex/Cargo.toml @@ -9,6 +9,7 @@ litebox_platform_linux_userland = { path = "../litebox_platform_linux_userland/" litebox_platform_linux_kernel = { path = "../litebox_platform_linux_kernel/", version = "0.1.0", default-features = false, optional = true } litebox_platform_windows_userland = { path = "../litebox_platform_windows_userland/", version = "0.1.0", default-features = false, optional = true } litebox_platform_lvbs = { path = "../litebox_platform_lvbs/", version = "0.1.0", default-features = false, optional = true } +litebox_platform_kernel = { path = "../litebox_platform_kernel/", version = "0.1.0", default-features = false, optional = true } once_cell = { version = "1.20.2", default-features = false, features = ["alloc", "race"] } cfg-if = "1.0.0" @@ -18,6 +19,7 @@ platform_linux_userland = ["dep:litebox_platform_linux_userland"] platform_windows_userland = ["dep:litebox_platform_windows_userland"] platform_lvbs = ["dep:litebox_platform_lvbs"] platform_linux_snp = ["dep:litebox_platform_linux_kernel"] +platform_kernel = ["dep:litebox_platform_kernel"] systrap_backend = ["platform_linux_userland", "litebox_platform_linux_userland/systrap_backend"] platform_linux_userland_with_linux_syscall = ["platform_linux_userland", "litebox_platform_linux_userland/linux_syscall"] platform_linux_userland_with_optee_syscall = ["platform_linux_userland", "litebox_platform_linux_userland/optee_syscall"] diff --git a/litebox_platform_multiplex/src/lib.rs b/litebox_platform_multiplex/src/lib.rs index fdccec5ea..7ae718128 100644 --- a/litebox_platform_multiplex/src/lib.rs +++ b/litebox_platform_multiplex/src/lib.rs @@ -32,6 +32,8 @@ cfg_if::cfg_if! { pub type Platform = litebox_platform_lvbs::host::LvbsLinuxKernel; } else if #[cfg(feature = "platform_linux_snp")] { pub type Platform = litebox_platform_linux_kernel::host::snp::snp_impl::SnpLinuxKernel; + } else if #[cfg(feature = "platform_kernel")] { + pub type Platform = litebox_platform_kernel::LiteBoxKernel; } else { compile_error!( r##"Hint: you might have forgotten to mark 'default-features = false'."## diff --git a/litebox_runner_optee_on_machine/.cargo/config.toml b/litebox_runner_optee_on_machine/.cargo/config.toml new file mode 100644 index 000000000..6199c4530 --- /dev/null +++ b/litebox_runner_optee_on_machine/.cargo/config.toml @@ -0,0 +1,6 @@ +[unstable] +build-std-features = ["compiler-builtins-mem"] +build-std = ["core", "compiler_builtins", "alloc"] + +[build] +target = "x86_64-unknown-litebox.json" diff --git a/litebox_runner_optee_on_machine/Cargo.toml b/litebox_runner_optee_on_machine/Cargo.toml new file mode 100644 index 000000000..1447acc58 --- /dev/null +++ b/litebox_runner_optee_on_machine/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "litebox_runner_optee_on_machine" +version = "0.1.0" +edition = "2024" + +[dependencies] +litebox = { version = "0.1.0", path = "../litebox" } +litebox_platform_kernel = { version = "0.1.0", path = "../litebox_platform_kernel", default-features = false, features = ["interrupt"] } +litebox_platform_multiplex = { version = "0.1.0", path = "../litebox_platform_multiplex", default-features = false, features = ["platform_kernel"] } +litebox_common_linux = { version = "0.1.0", path = "../litebox_common_linux" } +litebox_common_optee = { version = "0.1.0", path = "../litebox_common_optee" } +litebox_shim_optee = { path = "../litebox_shim_optee/", version = "0.1.0", default-features = false, features = ["platform_kernel"] } +bootloader = { version = "0.9", features = ["map_physical_memory"] } +spin = { version = "0.10.0", default-features = false, features = ["spin_mutex"] } + +[target.'cfg(target_arch = "x86_64")'.dependencies] +x86_64 = { version = "0.15.2", default-features = false, features = ["instructions"] } + +[features] +default = ["qemu"] +qemu = [] + +[lints] +workspace = true diff --git a/litebox_runner_optee_on_machine/rust-toolchain.toml b/litebox_runner_optee_on_machine/rust-toolchain.toml new file mode 100644 index 000000000..e058752cd --- /dev/null +++ b/litebox_runner_optee_on_machine/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly-2025-09-29" diff --git a/litebox_runner_optee_on_machine/src/main.rs b/litebox_runner_optee_on_machine/src/main.rs new file mode 100644 index 000000000..bc7a2a286 --- /dev/null +++ b/litebox_runner_optee_on_machine/src/main.rs @@ -0,0 +1,211 @@ +#![cfg(target_arch = "x86_64")] +#![no_std] +#![no_main] + +use bootloader::{BootInfo, bootinfo::MemoryRegionType, entry_point}; +use core::panic::PanicInfo; +use litebox_common_optee::{TeeIdentity, TeeLogin, TeeUuid, UteeEntryFunc, UteeParamOwned}; +use litebox_platform_kernel::{ + arch::{enable_extended_states, enable_fsgsbase, enable_smep, gdt, interrupts}, + debug_serial_println, + mm::MemoryProvider, + per_cpu_variables::{PerCpuVariables, with_per_cpu_variables}, + serial_println, + user_context::UserSpaceManagement, +}; +use litebox_platform_multiplex::Platform; +use litebox_shim_optee::loader::ElfLoadInfo; +use x86_64::VirtAddr; + +#[cfg(not(feature = "qemu"))] +use litebox_platform_kernel::arch::instrs::hlt_loop; + +entry_point!(kernel_start); + +fn kernel_start(bootinfo: &'static BootInfo) -> ! { + enable_fsgsbase(); + enable_extended_states(); + + let stack_top = with_per_cpu_variables(PerCpuVariables::kernel_stack_top); + + unsafe { + core::arch::asm!( + "mov rsp, rax", + "and rsp, -16", + "call {kernel_main}", + in("rdi") bootinfo, in("rax") stack_top, + kernel_main = sym kernel_main, + options(nostack, preserves_flags) + ); + } + + #[cfg(feature = "qemu")] + qemu_exit(); + #[cfg(not(feature = "qemu"))] + hlt_loop(); +} + +unsafe extern "C" fn kernel_main(bootinfo: &'static BootInfo) -> ! { + serial_println!("==========================================="); + serial_println!(" Hello from LiteBox for (Virtual) Machine! "); + serial_println!("==========================================="); + + let phys_mem_offset = VirtAddr::new(bootinfo.physical_memory_offset); + debug_serial_println!("Physical memory offset: {:#x}", phys_mem_offset.as_u64()); + + let memory_map = &bootinfo.memory_map; + for region in memory_map.iter() { + if region.region_type == MemoryRegionType::Usable { + let mem_fill_start = + usize::try_from(phys_mem_offset.as_u64() + region.range.start_frame_number * 4096) + .unwrap(); + let mem_fill_size = usize::try_from( + (region.range.end_frame_number - region.range.start_frame_number) * 4096, + ) + .unwrap(); + unsafe { + Platform::mem_fill_pages(mem_fill_start, mem_fill_size); + } + debug_serial_println!( + "adding a range of memory to the global allocator: start = {:#x}, size = {:#x}", + mem_fill_start, + mem_fill_size + ); + } + } + + let l4_table = unsafe { active_level_4_table_addr() }; + debug_serial_println!("L4 table physical Address: {:#x}", l4_table.as_u64()); + + let platform = Platform::new(l4_table); + debug_serial_println!("LiteBox Platform created at {:p}.", platform); + + gdt::init(); + interrupts::init_idt(); + debug_serial_println!("GDT and IDT initialized."); + + litebox_platform_multiplex::set_platform(platform); + Platform::register_shim(&litebox_shim_optee::OpteeShim); + debug_serial_println!("OP-TEE Shim registered."); + + enable_smep(); + + if let Ok(session_id) = platform.create_userspace() { + let loaded_ta = litebox_shim_optee::loader::load_elf_buffer(TA_BINARY).unwrap(); + run_ta_with_default_commands(session_id, &loaded_ta); + } + + serial_println!("BYE!"); + // TODO: this is QEMU/KVM specific instructions to terminate VM/VMM via + // the `isa-debug-exit` device. Different VMMs have different ways for this. + #[cfg(feature = "qemu")] + qemu_exit(); + #[cfg(not(feature = "qemu"))] + hlt_loop(); +} + +/// This function simply opens and closes a session to the TA to verify that +/// it can be loaded and run. Note that an OP-TEE TA does nothing without +/// a client invoking commands on it. +fn run_ta_with_default_commands(session_id: usize, ta_info: &ElfLoadInfo) { + for func_id in [ + UteeEntryFunc::OpenSession, + UteeEntryFunc::InvokeCommand, + UteeEntryFunc::CloseSession, + ] { + match func_id { + UteeEntryFunc::OpenSession => { + let _litebox = litebox_shim_optee::init_session( + &TeeUuid::default(), + &TeeIdentity { + login: TeeLogin::User, + uuid: TeeUuid::default(), + }, + ); + let params = [const { UteeParamOwned::None }; UteeParamOwned::TEE_NUM_PARAMS]; + + // In OP-TEE TA, each command invocation is like (re)starting the TA with a new stack with + // loaded binary and heap. In that sense, we can create (and destroy) a stack + // for each command freely. + let stack = litebox_shim_optee::loader::init_stack( + Some(ta_info.stack_base), + params.as_slice(), + ) + .expect("Failed to initialize stack with parameters"); + let mut pt_regs = litebox_shim_optee::loader::prepare_registers( + ta_info, + &stack, + u32::try_from(session_id).unwrap(), + func_id as u32, + None, + ); + unsafe { + litebox_platform_kernel::run_thread( + litebox_shim_optee::OpteeShim, + &mut pt_regs, + ); + }; + } + UteeEntryFunc::InvokeCommand => { + let mut params = [const { UteeParamOwned::None }; UteeParamOwned::TEE_NUM_PARAMS]; + params[0] = UteeParamOwned::ValueInout { + value_a: 200, + value_b: 0, + out_address: None, + }; + + let stack = litebox_shim_optee::loader::init_stack( + Some(ta_info.stack_base), + params.as_slice(), + ) + .expect("Failed to initialize stack with parameters"); + let mut pt_regs = litebox_shim_optee::loader::prepare_registers( + ta_info, + &stack, + u32::try_from(session_id).unwrap(), + func_id as u32, + Some(1), + ); + unsafe { + litebox_platform_kernel::run_thread( + litebox_shim_optee::OpteeShim, + &mut pt_regs, + ); + }; + } + UteeEntryFunc::CloseSession => { + litebox_shim_optee::deinit_session(); + } + UteeEntryFunc::Unknown => panic!("BUG: Unsupported function ID"), + } + } +} + +unsafe fn active_level_4_table_addr() -> x86_64::PhysAddr { + use x86_64::registers::control::Cr3; + let (level_4_table_frame, _) = Cr3::read(); + level_4_table_frame.start_address() +} + +#[cfg(feature = "qemu")] +fn qemu_exit() -> ! { + const ISA_DEBUG_EXIT_IOBASE: u16 = 0xf4; + const EXIT_CODE: u8 = 1; + unsafe { + core::arch::asm!( + "mov dx, {}; mov al, {}; out dx, al; hlt", + const ISA_DEBUG_EXIT_IOBASE, + const EXIT_CODE, + options(noreturn) + ) + } +} + +// TODO: support loading other TAs (dynamically) +const TA_BINARY: &[u8] = + include_bytes!("../../litebox_runner_optee_on_linux_userland/tests/hello-ta.elf"); + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + loop {} +} diff --git a/litebox_runner_optee_on_machine/x86_64-unknown-litebox.json b/litebox_runner_optee_on_machine/x86_64-unknown-litebox.json new file mode 100644 index 000000000..4b172a610 --- /dev/null +++ b/litebox_runner_optee_on_machine/x86_64-unknown-litebox.json @@ -0,0 +1,16 @@ +{ + "llvm-target": "x86_64-unknown-none", + "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", + "arch": "x86_64", + "target-endian": "little", + "target-pointer-width": 64, + "target-c-int-width": 32, + "os": "none", + "executables": true, + "linker-flavor": "ld.lld", + "linker": "rust-lld", + "panic-strategy": "abort", + "disable-redzone": true, + "features": "-mmx,-sse,+soft-float", + "rustc-abi": "x86-softfloat" + } diff --git a/litebox_shim_optee/Cargo.toml b/litebox_shim_optee/Cargo.toml index 5079b9340..8a1736f5b 100644 --- a/litebox_shim_optee/Cargo.toml +++ b/litebox_shim_optee/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] -aes = { version = "0.7", default-features = false } +aes = { version = "0.7", default-features = false, features = ["force-soft" ] } arrayvec = { version = "0.7.6", default-features = false } bitflags = "2.9.0" cfg-if = "1.0.0" @@ -33,6 +33,7 @@ elf_loader = { version = "0.12.0", default-features = false, features = ["rel"] default = ["platform_lvbs"] platform_linux_userland = ["litebox_platform_multiplex/platform_linux_userland_with_optee_syscall"] platform_lvbs = ["litebox_platform_multiplex/platform_lvbs_with_optee_syscall"] +platform_kernel = ["litebox_platform_multiplex/platform_kernel"] [lints] workspace = true diff --git a/litebox_shim_optee/src/loader/elf.rs b/litebox_shim_optee/src/loader/elf.rs index 3ce83f998..1b7850be0 100644 --- a/litebox_shim_optee/src/loader/elf.rs +++ b/litebox_shim_optee/src/loader/elf.rs @@ -140,14 +140,18 @@ impl ElfLoaderMmap { prot: ProtFlags, flags: MapFlags, ) -> elf_loader::Result { + let flags = litebox_common_linux::MapFlags::from_bits( + flags.bits() | MapFlags::MAP_ANONYMOUS.bits(), + ) + .expect("unsupported flags"); + // TODO: implement demand paging in the following platforms to avoid MAP_POPULATE + #[cfg(any(feature = "platform_lvbs", feature = "platform_kernel"))] + let flags = flags | litebox_common_linux::MapFlags::MAP_POPULATE; match crate::syscalls::mm::sys_mmap( addr.unwrap_or(0), len, litebox_common_linux::ProtFlags::from_bits_truncate(prot.bits()), - litebox_common_linux::MapFlags::from_bits( - flags.bits() | MapFlags::MAP_ANONYMOUS.bits(), - ) - .expect("unsupported flags"), + flags, -1, 0, ) { diff --git a/litebox_shim_optee/src/loader/mod.rs b/litebox_shim_optee/src/loader/mod.rs index c409515c2..398da4d33 100644 --- a/litebox_shim_optee/src/loader/mod.rs +++ b/litebox_shim_optee/src/loader/mod.rs @@ -52,10 +52,10 @@ pub fn prepare_registers( rdi: usize::try_from(func_id).unwrap(), orig_rax: 0, rip: ta_info.entry_point, - cs: 0x33, // __USER_CS + cs: 0x2b, // __USER_CS eflags: 0, rsp: stack.get_cur_stack_top(), - ss: 0x2b, // __USER_DS + ss: 0x33, // __USER_DS } } diff --git a/litebox_shim_optee/src/loader/ta_stack.rs b/litebox_shim_optee/src/loader/ta_stack.rs index 282879912..b3ecb7f8f 100644 --- a/litebox_shim_optee/src/loader/ta_stack.rs +++ b/litebox_shim_optee/src/loader/ta_stack.rs @@ -266,9 +266,13 @@ pub(crate) fn allocate_stack(stack_base: Option) -> Option { } else { let length = litebox::mm::linux::NonZeroPageSize::new(super::DEFAULT_STACK_SIZE) .expect("DEFAULT_STACK_SIZE is not page-aligned"); + let flags = CreatePagesFlags::empty(); + // TODO: implement demand paging in the following platforms to avoid MAP_POPULATE + #[cfg(any(feature = "platform_lvbs", feature = "platform_kernel"))] + let flags = flags | CreatePagesFlags::POPULATE_PAGES_IMMEDIATELY; unsafe { litebox_page_manager() - .create_stack_pages(None, length, CreatePagesFlags::empty()) + .create_stack_pages(None, length, flags) .ok()? } };