From e0ee6f119103c973b7f67f9faba0a67229b3a4e8 Mon Sep 17 00:00:00 2001
From: Ruslan Pislari <ruslan.pislari@gcore.com>
Date: Wed, 10 Jun 2026 13:02:17 +0300
Subject: [PATCH] feat: improve WasmConfig initialization for performance and
 memory efficiency

---
 crates/runtime/src/lib.rs | 107 ++++++++------------------------------
 1 file changed, 21 insertions(+), 86 deletions(-)
diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs
index cc1c973..0e20791 100644
--- a/crates/runtime/src/lib.rs
+++ b/crates/runtime/src/lib.rs
@@ -16,8 +16,8 @@ use http_backend::Backend;
 use limiter::ProxyLimiter;
 use wasmtime::component::Component;
 use wasmtime::{
-    Engine, InstanceAllocationStrategy, Module, OptLevel, PoolingAllocationConfig,
-    ProfilingStrategy, WasmBacktraceDetails,
+    Engine, InstanceAllocationStrategy, Module, PoolingAllocationConfig, ProfilingStrategy,
+    WasmBacktraceDetails,
 };
 use wit_component::ComponentEncoder;
 
@@ -194,15 +194,11 @@ impl<T> Data<T> {
     }
 }
 
-/// Global Engine configuration for `WasmEngineBuilder`.
+/// Global Engine configuration used to build a [`wasmtime::Engine`].
 pub struct WasmConfig {
     inner: wasmtime::Config,
 }
 
-pub struct WasmConfigBuilder {
-    max_execution_stacks: Option<u32>,
-}
-
 impl Deref for WasmConfig {
     type Target = wasmtime::Config;
 
@@ -236,13 +232,27 @@ impl Default for WasmConfig {
     fn default() -> Self {
         let mut inner = wasmtime::Config::new();
         inner.debug_info(false); // Keep this disabled - wasmtime will hang if enabled
+
+        // Standalone, non-concurrent: spend compile time once to get the fastest
+        // generated code, since each module is compiled then executed hot.
+        inner.cranelift_opt_level(wasmtime::OptLevel::Speed);
+
+        // Debug build: keep full, symbolized guest backtraces. We are optimizing
+        // execution CPU, not trap-path cost, and this is the standalone debug
+        // runner — detailed backtraces are the whole point.
+        inner.wasm_backtrace(true);
         inner.wasm_backtrace_details(WasmBacktraceDetails::Enable);
+
         inner.async_support(true);
         inner.consume_fuel(false); // this is custom Gcore setting
         inner.profiler(ProfilingStrategy::None);
-        inner.epoch_interruption(true); // this is custom Gcore setting
+        inner.epoch_interruption(true); // required by store.rs timeout mechanism
         inner.wasm_component_model(true);
 
+        // Fast instantiation: map the initialized image copy-on-write instead of
+        // memcpy'ing it on every instantiation.
+        inner.memory_init_cow(true);
+
         const MB: usize = 1 << 20;
         let mut pooling_allocation_config = PoolingAllocationConfig::default();
 
@@ -264,84 +274,9 @@ impl Default for WasmConfig {
         // function can end up in the table
         pooling_allocation_config.table_elements(98765);
 
-        // Maximum number of slots in the pooling allocator to keep "warm", or those
-        // to keep around to possibly satisfy an affine allocation request or an
-        // instantiation of a module previously instantiated within the pool.
-        pooling_allocation_config.max_unused_warm_slots(10);
-
-        inner.allocation_strategy(InstanceAllocationStrategy::Pooling(
-            pooling_allocation_config,
-        ));
-
-        WasmConfig { inner }
-    }
-}
-
-impl WasmConfig {
-    pub fn builder() -> WasmConfigBuilder {
-        WasmConfigBuilder {
-            max_execution_stacks: None,
-        }
-    }
-}
-
-impl WasmConfigBuilder {
-    pub fn max_execution_stacks(&mut self, max: u32) {
-        self.max_execution_stacks = Some(max);
-    }
-
-    pub fn build(self) -> WasmConfig {
-        let mut inner = wasmtime::Config::new();
-        inner.debug_info(false); // Keep this disabled - wasmtime will hang if enabled
-        inner.wasm_backtrace_details(WasmBacktraceDetails::Enable);
-        inner.async_support(true);
-        inner.consume_fuel(false); // this is custom Gcore setting
-        inner.profiler(ProfilingStrategy::None);
-        inner.epoch_interruption(true); // this is custom Gcore setting
-        inner.wasm_component_model(true);
-
-        // Performance: explicit opt level and make PC→wasm address map generation configurable.
-        // The address map improves trap/backtrace diagnostics, but increases compiled artifact
-        // size and per-instantiation overhead. Keep the current production behavior by default
-        // in release builds, enable it by default in debug builds, and allow explicit override
-        // for investigations via WASM_GENERATE_ADDRESS_MAP=true/false.
-        inner.cranelift_opt_level(OptLevel::Speed);
-        let generate_address_map = std::env::var("WASM_GENERATE_ADDRESS_MAP")
-            .ok()
-            .and_then(|value| match value.trim().to_ascii_lowercase().as_str() {
-                "1" | "true" | "yes" | "on" => Some(true),
-                "0" | "false" | "no" | "off" => Some(false),
-                _ => None,
-            })
-            .unwrap_or(cfg!(debug_assertions));
-        inner.generate_address_map(generate_address_map);
-
-        const MB: usize = 1 << 20;
-        let mut pooling_allocation_config = PoolingAllocationConfig::default();
-
-        if let Some(total) = self.max_execution_stacks {
-            pooling_allocation_config.total_stacks(total);
-            pooling_allocation_config.total_memories(total);
-            pooling_allocation_config.total_tables(total);
-            pooling_allocation_config.total_component_instances(total);
-            pooling_allocation_config.total_gc_heaps(total);
-            pooling_allocation_config.total_core_instances(total);
-        }
-
-        pooling_allocation_config.max_core_instance_size(MB);
-        pooling_allocation_config.max_tables_per_module(10);
-        pooling_allocation_config.max_memories_per_module(10);
-        pooling_allocation_config.table_elements(98765);
-        pooling_allocation_config.max_unused_warm_slots(10);
-
-        // Performance: keep pages warm between instantiations (Linux only).
-        // Avoids madvise(MADV_DONTNEED) syscalls when a pooled slot is reused, replacing them
-        // with a cheaper memset up to the threshold — reducing per-request deallocation latency.
-        pooling_allocation_config.linear_memory_keep_resident(2 * MB);
-        pooling_allocation_config.table_keep_resident(512 * 1024);
-
-        // Performance: batch decommit operations to amortize syscall overhead.
-        pooling_allocation_config.decommit_batch_size(16);
+        // No concurrency: at most one instance is ever live, so don't keep extra
+        // slots warm (was 10, tuned for a multi-tenant server).
+        pooling_allocation_config.max_unused_warm_slots(1);
 
         inner.allocation_strategy(InstanceAllocationStrategy::Pooling(
             pooling_allocation_config,