diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e7ee8b3d..81f443fb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,7 @@ set(mi_sources src/options.c src/os.c src/page.c + src/profile.c src/random.c src/segment.c src/segment-map.c @@ -746,7 +747,7 @@ if (MI_BUILD_TESTS) enable_testing() # static link tests - foreach(TEST_NAME api api-fill stress) + foreach(TEST_NAME api api-fill stress profile) add_executable(mimalloc-test-${TEST_NAME} test/test-${TEST_NAME}.c) target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags}) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index fe3528fac..106e3d0c8 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -1162,5 +1162,43 @@ static inline void _mi_memzero_aligned(void* dst, size_t n) { } #endif +// ------------------------------------------------------------------ +// Heap profiler fast-path hooks (profile.c / profile.h) +// +// Inlined here so that the common case (profiling disabled or no +// sample due) adds only a handful of instructions at each call site. +// The slow paths are noinline and defined in profile.c. +// +// Thread safety: see profile.h for the acquire/release protocol on +// _mi_profiler.enabled. +// ------------------------------------------------------------------ +#include "mimalloc/profile.h" + +static inline void _mi_profiler_on_alloc(mi_heap_t* heap, mi_page_t* page, void* ptr, size_t size) { + // Relaxed load: we only need to know whether to do any work. The acquire + // that synchronizes on_alloc/on_free/record_extra_bytes visibility is in + // the slow path, which is the first place we actually read those fields. + if mi_likely(!mi_atomic_load_relaxed(&_mi_profiler.enabled)) return; + mi_profiler_tld_t* ptld = &heap->tld->profiler; + if (ptld->in_profiler) return; + ptld->bytes_since_sample += size; // unsigned: wraps at 2^64 bytes (~18 EB), harmless in practice + if mi_likely(ptld->bytes_since_sample < ptld->next_threshold) return; + _mi_profiler_on_alloc_slow(heap, page, ptr, size); +} + +static inline void _mi_profiler_on_free_local(mi_page_t* page, void* ptr) { + // No acquire load on enabled: has_metadata is only ever set by the owning thread + // after it observed enabled=true via an acquire load in _mi_profiler_on_alloc. + // If has_metadata is true, all profiler fields are already visible to this thread. + if mi_likely(!page->has_metadata) return; + _mi_profiler_on_free_local_slow(page, ptr); +} + +static inline void _mi_profiler_on_free_collected(mi_page_t* page, mi_block_t* head) { + // Same argument as _mi_profiler_on_free_local. + if mi_likely(!page->has_metadata) return; + _mi_profiler_on_free_collected_slow(page, head); +} + #endif diff --git a/include/mimalloc/profile.h b/include/mimalloc/profile.h new file mode 100644 index 000000000..38ac2c42c --- /dev/null +++ b/include/mimalloc/profile.h @@ -0,0 +1,118 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_PROFILE_H +#define MIMALLOC_PROFILE_H + +#include +#include +#include +#include "mimalloc/atomic.h" + +// Forward declarations — full types come from types.h / internal.h. +typedef struct mi_page_s mi_page_t; +typedef struct mi_block_s mi_block_t; +typedef struct mi_heap_s mi_heap_t; + +// ------------------------------------------------------------------ +// Allocation record: one node per sampled live allocation, stored in +// a singly-linked list at page->metadata. Opaque to callers; the +// callbacks receive user_data directly. +// +// `ptr` — the sampled user pointer; used internally to match frees. +// `user_data` — flexible array member for caller-owned metadata. +// The number of bytes available is +// _mi_profiler.record_extra_bytes, set at +// mi_profiler_enable() time. Typical uses: a captured +// stack trace, allocation size and weight for on_free, +// a pointer to an external profiler node, or a timestamp. +// The profiler does not initialize this region and never +// reads it. +// Alignment: user_data begins at offset sizeof(mi_alloc_record_t) +// from the allocation base (2 * sizeof(void*): 16 bytes on 64-bit, +// 8 bytes on 32-bit), so it is suitably aligned for any scalar or +// pointer type. SIMD types requiring > 16-byte alignment are not +// guaranteed to be aligned. +// ------------------------------------------------------------------ +typedef struct mi_alloc_record_s { + void* ptr; + struct mi_alloc_record_s* next; + char user_data[]; // length = _mi_profiler.record_extra_bytes +} mi_alloc_record_t; + +// ------------------------------------------------------------------ +// User-supplied callbacks. +// +// on_alloc: called when a sample is taken. +// `user_data` — caller-owned region (record_extra_bytes bytes); +// may write anything here for use in on_free. +// NULL if record_extra_bytes is 0. +// `ptr` — the sampled user pointer. +// `requested_size` — size passed by the caller to malloc/calloc/etc. +// `usable_size` — actual usable bytes after size-class rounding; +// reflects true memory consumption. +// `threshold` — the threshold (bytes) that triggered this sample. +// `bytes_since_last_sample` — bytes accumulated since the last sample; the +// statistical weight of this sample. +// `heap_tag` — tag of the heap that made the allocation, set via +// mi_heap_new_ex(). Zero for the default heap. +// Returns the number of bytes to accumulate before the next sample. +// Returning 0 causes the next allocation to be sampled immediately. +// +// on_free: called when a sampled allocation is freed. +// `user_data` — the same region written during on_alloc. Valid only +// for the duration of the callback; do not retain the pointer. +// `ptr` — the freed user pointer. +// May be NULL if free-time notification is not needed. +// ------------------------------------------------------------------ +typedef size_t (*mi_profiler_alloc_cb)(void* user_data, void* ptr, size_t requested_size, size_t usable_size, size_t threshold, size_t bytes_since_last_sample, uint8_t heap_tag); +typedef void (*mi_profiler_free_cb)(void* user_data, void* ptr); + +// ------------------------------------------------------------------ +// Global profiler configuration. +// +// Profiling is one-way: once enabled it cannot be disabled. +// +// `enabled` is _Atomic(bool) so that mi_profiler_enable() can be called +// from any thread. The store uses release order; reads in the inline +// fast-path hooks (in internal.h) use relaxed order (sufficient to decide +// whether to do any work); the slow path uses acquire order to ensure +// on_alloc, on_free, and record_extra_bytes are visible before they are read. +// ------------------------------------------------------------------ +typedef struct mi_profiler_s { + _Atomic(bool) enabled; + mi_profiler_alloc_cb on_alloc; // non-NULL when enabled=true + mi_profiler_free_cb on_free; // may be NULL + size_t record_extra_bytes; // bytes allocated after each mi_alloc_record_t for user_data +} mi_profiler_t; + +extern mi_profiler_t _mi_profiler; + +// ------------------------------------------------------------------ +// Public API — must be called at most once. May be called from any +// thread, before or after other threads have started. Each thread +// samples its first allocation immediately; the on_alloc callback +// controls all subsequent thresholds. +// +// Returns true on success, false if: +// - profiling was already enabled (called more than once), or +// - on_alloc is NULL, or +// - on_free is NULL but record_extra_bytes > 0, or +// - record_extra_bytes would overflow the record allocation size. +// ------------------------------------------------------------------ +bool mi_profiler_enable(size_t record_extra_bytes, mi_profiler_alloc_cb on_alloc, mi_profiler_free_cb on_free); + +// ------------------------------------------------------------------ +// Slow-path implementations (defined in profile.c). +// The inline fast-path wrappers are in internal.h so they have +// access to the full type definitions they need. +// ------------------------------------------------------------------ +void _mi_profiler_on_alloc_slow(mi_heap_t* heap, mi_page_t* page, void* ptr, size_t size); +void _mi_profiler_on_free_local_slow(mi_page_t* page, void* ptr); +void _mi_profiler_on_free_collected_slow(mi_page_t* page, mi_block_t* head); + +#endif // MIMALLOC_PROFILE_H diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index f13cd331a..990fa4a1e 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -317,6 +317,9 @@ typedef uintptr_t mi_thread_free_t; // at least one block that will be added, or as already been added, to // the owning heap `thread_delayed_free` list. This guarantees that pages // will be freed correctly even if only other threads free blocks. +// Forward declaration for the profiler record list stored in page->metadata. +struct mi_alloc_record_s; + typedef struct mi_page_s { // "owned" by the segment uint32_t slice_count; // slices in this page (0 if not a page) @@ -330,7 +333,9 @@ typedef struct mi_page_s { uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized - uint8_t retire_expire:7; // expiration count for retired blocks + uint8_t has_metadata:1; // `true` if page->metadata is non-NULL; on the same cache line as + // the hot free fields to avoid a cache miss on every deallocation + uint8_t retire_expire:6; // expiration count for retired blocks (max value is MI_RETIRE_CYCLES=16, so 6 bits suffices) mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) @@ -352,7 +357,12 @@ typedef struct mi_page_s { struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // 64-bit 11 words, 32-bit 13 words, (+2 for secure) - void* padding[1]; + // `metadata` is the head of a singly-linked list of profiler allocation + // records (mi_alloc_record_t, defined in profile.h). It is zeroed when + // the page is cleared (segment.c:mi_segment_page_clear) and must be + // re-initialized when the page is reused. NULL when profiling is off or + // no sampled allocations are live on this page. + struct mi_alloc_record_s* metadata; } mi_page_t; @@ -624,6 +634,13 @@ typedef struct mi_segments_tld_s { } mi_segments_tld_t; // Thread local data +// Per-thread profiler state (see include/mimalloc/profile.h) +typedef struct mi_profiler_tld_s { + size_t bytes_since_sample; // bytes allocated since the last sample + size_t next_threshold; // sample when bytes_since_sample reaches this value + bool in_profiler; // reentrancy guard: skip profiling inside profiler code +} mi_profiler_tld_t; + struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count bool recurse; // true if deferred was called; used to prevent infinite recursion. @@ -631,6 +648,7 @@ struct mi_tld_s { mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) mi_segments_tld_t segments; // segment tld mi_stats_t stats; // statistics + mi_profiler_tld_t profiler; // heap profiler state }; diff --git a/src/alloc.c b/src/alloc.c index 682557293..435d5069a 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -111,6 +111,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ #endif #endif + _mi_profiler_on_alloc(heap, page, block, size - MI_PADDING_SIZE); return block; } @@ -303,6 +304,10 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero, // mi_track_resize(p,size,newsize) // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); } if (usable_post!=NULL) { *usable_post = mi_page_usable_block_size(page); } + // TODO(profiler): if p has a live profiler record, notify the profiler of the + // resize so it can update the recorded size. The allocate-and-copy path below + // is handled correctly because it goes through mi_free + mi_heap_umalloc which + // hit the existing on_free and on_alloc hooks. return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_umalloc(heap,newsize,usable_post); diff --git a/src/free.c b/src/free.c index cd8396c32..a9f7cf93d 100644 --- a/src/free.c +++ b/src/free.c @@ -34,6 +34,11 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool // checks if mi_unlikely(mi_check_is_double_free(page, block)) return; if (!was_guarded) { mi_check_padding(page, block); } + // Profiler hook fires before any page state (local_free, used) is modified, + // so page is fully consistent. mi_record_free may call mi_free internally, + // which is safe because in_profiler suppresses recursion and the page is + // in a valid pre-free state. + _mi_profiler_on_free_local(page, block); if (track_stats) { mi_stat_free(page, block); } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN if (!mi_page_is_huge(page)) { // huge page content may be already decommitted diff --git a/src/init.c b/src/init.c index f1882108b..6f934f3ca 100644 --- a/src/init.c +++ b/src/init.c @@ -20,6 +20,7 @@ const mi_page_t _mi_page_empty = { 0, // reserved capacity { 0 }, // flags false, // is_zero + false, // has_metadata 0, // retire_expire NULL, // free NULL, // local_free @@ -34,7 +35,7 @@ const mi_page_t _mi_page_empty = { MI_ATOMIC_VAR_INIT(0), // xthread_free MI_ATOMIC_VAR_INIT(0), // xheap NULL, NULL - , { 0 } // padding + , NULL // metadata }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) @@ -140,7 +141,8 @@ mi_decl_cache_align static const mi_tld_t tld_empty = { false, NULL, NULL, { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, tld_empty_stats }, // segments - { sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL } // stats + { sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats + { 0, 0, false } // profiler }; mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { @@ -156,7 +158,8 @@ static mi_decl_cache_align mi_tld_t tld_main = { 0, false, &_mi_heap_main, & _mi_heap_main, { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, &tld_main.stats }, // segments - { sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL } // stats + { sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats + { 0, 0, false } // profiler }; mi_decl_cache_align mi_heap_t _mi_heap_main = { diff --git a/src/page.c b/src/page.c index 9dd4c4afd..acd4ed05b 100644 --- a/src/page.c +++ b/src/page.c @@ -211,6 +211,8 @@ static void _mi_page_thread_free_collect(mi_page_t* page) return; // the thread-free items cannot be freed } + _mi_profiler_on_free_collected(page, head); + // and append the current local free list mi_block_set_next(page,tail, page->local_free); page->local_free = head; diff --git a/src/profile.c b/src/profile.c new file mode 100644 index 000000000..a3ad3c671 --- /dev/null +++ b/src/profile.c @@ -0,0 +1,217 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// Heap profiler implementation. +// +// Thread safety: +// _mi_profiler.enabled — _Atomic(bool). +// Written with release order in mi_profiler_enable. +// Read with relaxed order in the inline fast paths +// (internal.h); acquire order in the slow path. +// Any thread observing enabled=true +// is guaranteed to see on_alloc, on_free, +// and record_extra_bytes written before the +// release store (C11 release-acquire). +// enabled is never set back to false. +// _mi_profiler.on_alloc/on_free, +// _mi_profiler.record_extra_bytes — non-atomic; only read after observing +// enabled=true via an acquire load. +// tld->profiler.* — owning thread only; no atomics needed. +// page->metadata — owning thread only for all accesses: +// alloc: _mi_profiler_on_alloc (owning thread) +// local free: _mi_profiler_on_free_local (owning thread) +// collected free: _mi_profiler_on_free_collected called from +// _mi_page_thread_free_collect on the owning thread +// after the atomic claim of xthread_free completes. + +#include "mimalloc.h" +#include "mimalloc/internal.h" + +// On sampling rate and has_metadata: +// +// The on_alloc callback controls the sampling rate by returning the next +// threshold. At a constant rate of 1 MiB: E[live records per page] = +// page_capacity / rate, so: +// small pages (64 KiB): ~0.06 records → P(has_metadata) ≈ 6% +// medium pages (512 KiB): ~0.5 records → P(has_metadata) ≈ 39% +// +// The has_metadata bit (checked on every free) is therefore a reliable +// fast-path skip for small pages but not for medium pages. A rate of +// ~5 MiB would bring medium pages below 10%, at the cost of coarser +// profiling resolution. +// +// A future improvement would be adaptive per-size-class sampling: separate +// (bytes_since_sample, next_threshold) pairs per size class in the tld, +// with bytes from each class counted independently. This makes has_metadata +// unlikely across all classes without biasing the profile. + +// ------------------------------------------------------------------ +// Global state +// ------------------------------------------------------------------ + +mi_decl_cache_align mi_profiler_t _mi_profiler = { .enabled = false, .on_alloc = NULL, .on_free = NULL, .record_extra_bytes = 0 }; + +// ------------------------------------------------------------------ +// Public API +// ------------------------------------------------------------------ + +bool mi_profiler_enable(size_t record_extra_bytes, + mi_profiler_alloc_cb on_alloc, mi_profiler_free_cb on_free) { + if (on_alloc == NULL) return false; + if (on_free == NULL && record_extra_bytes > 0) return false; // user_data would always be NULL + if (record_extra_bytes > (SIZE_MAX - sizeof(mi_alloc_record_t))) return false; // overflow in mi_record_alloc + + // Claim exclusive write rights to the non-atomic fields. The CAS ensures + // that if two threads call enable concurrently only one proceeds; the other + // returns false without touching the fields (avoiding a data race). + static _Atomic(bool) s_claimed; + bool expected = false; + if (!mi_atomic_cas_strong_acq_rel(&s_claimed, &expected, true)) return false; + + _mi_profiler.record_extra_bytes = record_extra_bytes; + _mi_profiler.on_alloc = on_alloc; + _mi_profiler.on_free = on_free; + // Release store: any thread that subsequently observes enabled=true via an + // acquire load is guaranteed to see all the fields written above. + mi_atomic_store_release(&_mi_profiler.enabled, true); + return true; +} + +// ------------------------------------------------------------------ +// Record memory management +// +// Each record is sizeof(mi_alloc_record_t) + record_extra_bytes bytes. +// The extra bytes (user_data) are passed directly to the callbacks +// but are not initialized; the callback must set any fields it will read. +// +// Records are allocated from the same mimalloc heap as the sampled +// object by calling _mi_heap_malloc_zero from inside the allocator. +// This is safe because: +// 1. The profiler hook fires at the end of _mi_page_malloc_zero, after +// page->free and page->used have already been updated. The page is +// in a fully consistent state before mi_record_alloc is entered. +// 2. in_profiler is set to true before calling _mi_heap_malloc_zero, +// so the inline fast path returns immediately if the record +// allocation itself triggers the threshold check, preventing +// infinite recursion. +// 3. mimalloc holds no locks at the point the hook fires; all +// allocation state is thread-local, so there is nothing to +// deadlock against. +// Note: if the heap's free pages are exhausted, _mi_heap_malloc_zero +// may fall through to _mi_malloc_generic and perform segment allocation +// or OS calls. This only occurs on the slow path (when a sample is +// actually taken) so it does not affect steady-state allocation cost. +// ------------------------------------------------------------------ + +static mi_alloc_record_t* mi_record_alloc(mi_heap_t* heap) { + size_t sz = sizeof(mi_alloc_record_t) + _mi_profiler.record_extra_bytes; + heap->tld->profiler.in_profiler = true; + mi_alloc_record_t* rec = (mi_alloc_record_t*)_mi_heap_malloc_zero(heap, sz, false); + heap->tld->profiler.in_profiler = false; + return rec; +} + +static void mi_record_free(mi_heap_t* heap, mi_alloc_record_t* rec) { + heap->tld->profiler.in_profiler = true; + if (_mi_profiler.on_free != NULL) { + void* user_data = (_mi_profiler.record_extra_bytes > 0) ? rec->user_data : NULL; + _mi_profiler.on_free(user_data, rec->ptr); + } + mi_free(rec); + heap->tld->profiler.in_profiler = false; +} + +// ------------------------------------------------------------------ +// Per-page record list +// ------------------------------------------------------------------ + +static void mi_page_record_push(mi_page_t* page, mi_alloc_record_t* rec) { + rec->next = page->metadata; + page->metadata = rec; + page->has_metadata = true; +} + +// Returns the matching record (removed from the list), or NULL. +// Linear search: at typical sampling rates the expected list length << 1. +static mi_alloc_record_t* mi_page_record_pop(mi_page_t* page, void* ptr) { + mi_alloc_record_t** pp = &page->metadata; + mi_alloc_record_t* rec = *pp; + while (rec != NULL) { + if (rec->ptr == ptr) { + *pp = rec->next; + if (page->metadata == NULL) { page->has_metadata = false; } + return rec; + } + pp = &rec->next; + rec = *pp; + } + return NULL; +} + +// ------------------------------------------------------------------ +// Slow paths (noinline — only reached when work is needed) +// ------------------------------------------------------------------ + +void mi_decl_noinline _mi_profiler_on_alloc_slow(mi_heap_t* heap, mi_page_t* page, void* ptr, size_t size) { + // Acquire load here (not in the fast path) so that on_alloc, on_free, and + // record_extra_bytes are visible before we read them below. + if mi_unlikely(!mi_atomic_load_acquire(&_mi_profiler.enabled)) return; + + mi_profiler_tld_t* ptld = &heap->tld->profiler; + size_t usable_size = mi_page_usable_block_size(page); + + size_t bytes_since_last_sample = ptld->bytes_since_sample; + ptld->bytes_since_sample = 0; + + // on_alloc is non-NULL (asserted in mi_profiler_enable) and visible via the + // acquire load above. The callback returns the next threshold. + if (_mi_profiler.on_free != NULL) { + // Allocate a record to carry user_data from on_alloc to on_free. + mi_alloc_record_t* rec = mi_record_alloc(heap); + if mi_unlikely(rec == NULL) return; // OOM: skip this sample + rec->ptr = ptr; + void* user_data = (_mi_profiler.record_extra_bytes > 0) ? rec->user_data : NULL; + ptld->next_threshold = _mi_profiler.on_alloc(user_data, ptr, size, usable_size, ptld->next_threshold, bytes_since_last_sample, heap->tag); + mi_page_record_push(page, rec); + } else { + // No on_free: no record needed; user_data is NULL. + ptld->next_threshold = _mi_profiler.on_alloc(NULL, ptr, size, usable_size, ptld->next_threshold, bytes_since_last_sample, heap->tag); + } +} + +void mi_decl_noinline _mi_profiler_on_free_local_slow(mi_page_t* page, void* ptr) { + mi_heap_t* heap = mi_page_heap(page); + // Guard against re-entry: mi_record_free calls mi_free to release the record + // itself, which would re-enter this hook. The record object is never sampled + // (in_profiler suppresses sampling during record allocation too), so skipping + // it here is correct. + if (heap->tld->profiler.in_profiler) return; + + mi_alloc_record_t* rec = mi_page_record_pop(page, ptr); + if mi_unlikely(rec != NULL) { + mi_record_free(heap, rec); + } +} + +void mi_decl_noinline _mi_profiler_on_free_collected_slow(mi_page_t* page, mi_block_t* head) { + mi_heap_t* heap = mi_page_heap(page); + // Same re-entry guard as _mi_profiler_on_free_local_slow. + if (heap->tld->profiler.in_profiler) return; + + mi_block_t* block = head; + while (block != NULL) { + mi_block_t* next = mi_block_next(page, block); + + mi_alloc_record_t* rec = mi_page_record_pop(page, (void*)block); + if mi_unlikely(rec != NULL) { + mi_record_free(heap, rec); + } + if (page->metadata == NULL) break; // no more records; skip remaining blocks + + block = next; + } +} diff --git a/src/segment.c b/src/segment.c index f440dc01a..c3057f814 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1034,6 +1034,9 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld _mi_os_reset(start, psize); } + // profiler records must have been freed before the page is recycled + mi_assert_internal(page->metadata == NULL && !page->has_metadata); + // zero the page data, but not the segment fields and heap tag page->is_zero_init = false; uint8_t heap_tag = page->heap_tag; diff --git a/test/test-profile.c b/test/test-profile.c new file mode 100644 index 000000000..2530127dd --- /dev/null +++ b/test/test-profile.c @@ -0,0 +1,182 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// Tests for the mimalloc heap profiler (src/profile.c). + +#include +#include +#include +#include +#include + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/profile.h" +#include "testhelper.h" + +// --------------------------------------------------------------------------- +// Shared callback state +// --------------------------------------------------------------------------- + +typedef struct { + int alloc_count; + int free_count; + size_t last_size; + size_t last_upscaled; + void* last_ptr; +} profile_state_t; + +// We store ptr in user_data so on_free can verify the round-trip. + +static profile_state_t g_state; + +#define TEST_THRESHOLD (16 * 1024) + +static size_t on_alloc(void* user_data, void* ptr, size_t requested_size, size_t usable_size, size_t threshold, size_t bytes_since_last_sample, uint8_t heap_tag) { + (void)usable_size; + (void)threshold; + (void)heap_tag; + g_state.alloc_count++; + g_state.last_ptr = ptr; + g_state.last_size = requested_size; + g_state.last_upscaled = bytes_since_last_sample; + // store ptr in user_data so on_free can verify the round-trip + memcpy(user_data, &ptr, sizeof(ptr)); + return TEST_THRESHOLD; +} + +static void on_free(void* user_data, void* ptr) { + g_state.free_count++; + // verify the user_data round-trip + void* stored; + memcpy(&stored, user_data, sizeof(stored)); + assert(stored == ptr); +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +// Force at least one sample by allocating well over the threshold. +static void allocate_past_threshold(void) { + size_t total = 0; + while (total < TEST_THRESHOLD * 3) { + void* p = mi_malloc(4096); + mi_free(p); + total += 4096; + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +bool test_profiler_samples(void) { + CHECK_BODY("profiler: on_alloc called after threshold") { + int before = g_state.alloc_count; + allocate_past_threshold(); + result = (g_state.alloc_count > before); + } + return true; +} + +bool test_profiler_record_fields(void) { + CHECK_BODY("profiler: record ptr and size are non-zero") { + int before = g_state.alloc_count; + while (g_state.alloc_count == before) { + void* p = mi_malloc(1024); + mi_free(p); + } + result = (g_state.last_ptr != NULL && g_state.last_size > 0 && g_state.last_upscaled > 0); + } + return true; +} + +bool test_profiler_on_free_called(void) { + CHECK_BODY("profiler: on_free called for sampled allocation") { + int alloc_before = g_state.alloc_count; + int free_before = g_state.free_count; + + // Keep the pointer live until we confirm a sample was taken, then free it. + void* sampled = NULL; + while (g_state.alloc_count == alloc_before) { + if (sampled) { mi_free(sampled); } + sampled = mi_malloc(1024); + } + // At this point g_state.last_ptr is the sampled pointer. + // Free it and check on_free fires. + void* expected = g_state.last_ptr; + mi_free(expected); + sampled = NULL; + + result = (g_state.free_count > free_before); + } + return true; +} + +bool test_profiler_upscaled_at_least_size(void) { + CHECK_BODY("profiler: upscaled_size >= size") { + int before = g_state.alloc_count; + while (g_state.alloc_count == before) { + void* p = mi_malloc(256); + mi_free(p); + } + result = (g_state.last_upscaled >= g_state.last_size); + } + return true; +} + +bool test_profiler_free_count_le_alloc_count(void) { + CHECK_BODY("profiler: on_free never called more times than on_alloc") { + // Free can only fire for sampled allocations, so free_count <= alloc_count + // must hold at all times. + allocate_past_threshold(); + result = (g_state.free_count <= g_state.alloc_count); + } + return true; +} + +bool test_profiler_enable_invalid_params(void) { + CHECK_BODY("profiler: enable rejects NULL on_alloc") { + result = !mi_profiler_enable(0, NULL, NULL); + } + CHECK_BODY("profiler: enable rejects non-zero record_extra_bytes with NULL on_free") { + result = !mi_profiler_enable(sizeof(void*), on_alloc, NULL); + } + CHECK_BODY("profiler: enable rejects record_extra_bytes overflow") { + result = !mi_profiler_enable(SIZE_MAX, on_alloc, on_free); + } + return true; +} + +bool test_profiler_enable_once(void) { + CHECK_BODY("profiler: enable returns true on first call") { + result = mi_profiler_enable(sizeof(void*), on_alloc, on_free); + } + CHECK_BODY("profiler: enable returns false on second call") { + result = !mi_profiler_enable(sizeof(void*), on_alloc, on_free); + } + return true; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- + +int main(void) { + // Invalid-param and double-call tests must run before profiling is enabled. + test_profiler_enable_invalid_params(); + test_profiler_enable_once(); // enables profiling with sizeof(void*) extra bytes + + test_profiler_samples(); + test_profiler_record_fields(); + test_profiler_on_free_called(); + test_profiler_upscaled_at_least_size(); + test_profiler_free_count_le_alloc_count(); + + return print_test_summary(); +}