Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ set(mi_sources
src/options.c
src/os.c
src/page.c
src/profile.c
src/random.c
src/segment.c
src/segment-map.c
Expand Down Expand Up @@ -746,7 +747,7 @@ if (MI_BUILD_TESTS)
enable_testing()

# static link tests
foreach(TEST_NAME api api-fill stress)
foreach(TEST_NAME api api-fill stress profile)
add_executable(mimalloc-test-${TEST_NAME} test/test-${TEST_NAME}.c)
target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines})
target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags})
Expand Down
38 changes: 38 additions & 0 deletions include/mimalloc/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1162,5 +1162,43 @@ static inline void _mi_memzero_aligned(void* dst, size_t n) {
}
#endif

// ------------------------------------------------------------------
// Heap profiler fast-path hooks (profile.c / profile.h)
//
// Inlined here so that the common case (profiling disabled or no
// sample due) adds only a handful of instructions at each call site.
// The slow paths are noinline and defined in profile.c.
//
// Thread safety: see profile.h for the acquire/release protocol on
// _mi_profiler.enabled.
// ------------------------------------------------------------------
#include "mimalloc/profile.h"

static inline void _mi_profiler_on_alloc(mi_heap_t* heap, mi_page_t* page, void* ptr, size_t size) {
// Relaxed load: we only need to know whether to do any work. The acquire
// that synchronizes on_alloc/on_free/record_extra_bytes visibility is in
// the slow path, which is the first place we actually read those fields.
if mi_likely(!mi_atomic_load_relaxed(&_mi_profiler.enabled)) return;
mi_profiler_tld_t* ptld = &heap->tld->profiler;
if (ptld->in_profiler) return;
ptld->bytes_since_sample += size; // unsigned: wraps at 2^64 bytes (~18 EB), harmless in practice
if mi_likely(ptld->bytes_since_sample < ptld->next_threshold) return;
_mi_profiler_on_alloc_slow(heap, page, ptr, size);
}

static inline void _mi_profiler_on_free_local(mi_page_t* page, void* ptr) {
// No acquire load on enabled: has_metadata is only ever set by the owning thread
// after it observed enabled=true via an acquire load in _mi_profiler_on_alloc.
// If has_metadata is true, all profiler fields are already visible to this thread.
if mi_likely(!page->has_metadata) return;
_mi_profiler_on_free_local_slow(page, ptr);
}

static inline void _mi_profiler_on_free_collected(mi_page_t* page, mi_block_t* head) {
// Same argument as _mi_profiler_on_free_local.
if mi_likely(!page->has_metadata) return;
_mi_profiler_on_free_collected_slow(page, head);
}


#endif
118 changes: 118 additions & 0 deletions include/mimalloc/profile.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_PROFILE_H
#define MIMALLOC_PROFILE_H

#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include "mimalloc/atomic.h"

// Forward declarations — full types come from types.h / internal.h.
typedef struct mi_page_s mi_page_t;
typedef struct mi_block_s mi_block_t;
typedef struct mi_heap_s mi_heap_t;

// ------------------------------------------------------------------
// Allocation record: one node per sampled live allocation, stored in
// a singly-linked list at page->metadata. Opaque to callers; the
// callbacks receive user_data directly.
//
// `ptr` — the sampled user pointer; used internally to match frees.
// `user_data` — flexible array member for caller-owned metadata.
// The number of bytes available is
// _mi_profiler.record_extra_bytes, set at
// mi_profiler_enable() time. Typical uses: a captured
// stack trace, allocation size and weight for on_free,
// a pointer to an external profiler node, or a timestamp.
// The profiler does not initialize this region and never
// reads it.
// Alignment: user_data begins at offset sizeof(mi_alloc_record_t)
// from the allocation base (2 * sizeof(void*): 16 bytes on 64-bit,
// 8 bytes on 32-bit), so it is suitably aligned for any scalar or
// pointer type. SIMD types requiring > 16-byte alignment are not
// guaranteed to be aligned.
// ------------------------------------------------------------------
typedef struct mi_alloc_record_s {
void* ptr;
struct mi_alloc_record_s* next;
char user_data[]; // length = _mi_profiler.record_extra_bytes
} mi_alloc_record_t;

// ------------------------------------------------------------------
// User-supplied callbacks.
//
// on_alloc: called when a sample is taken.
// `user_data` — caller-owned region (record_extra_bytes bytes);
// may write anything here for use in on_free.
// NULL if record_extra_bytes is 0.
// `ptr` — the sampled user pointer.
// `requested_size` — size passed by the caller to malloc/calloc/etc.
// `usable_size` — actual usable bytes after size-class rounding;
// reflects true memory consumption.
// `threshold` — the threshold (bytes) that triggered this sample.
// `bytes_since_last_sample` — bytes accumulated since the last sample; the
// statistical weight of this sample.
// `heap_tag` — tag of the heap that made the allocation, set via
// mi_heap_new_ex(). Zero for the default heap.
// Returns the number of bytes to accumulate before the next sample.
// Returning 0 causes the next allocation to be sampled immediately.
//
// on_free: called when a sampled allocation is freed.
// `user_data` — the same region written during on_alloc. Valid only
// for the duration of the callback; do not retain the pointer.
// `ptr` — the freed user pointer.
// May be NULL if free-time notification is not needed.
// ------------------------------------------------------------------
typedef size_t (*mi_profiler_alloc_cb)(void* user_data, void* ptr, size_t requested_size, size_t usable_size, size_t threshold, size_t bytes_since_last_sample, uint8_t heap_tag);
typedef void (*mi_profiler_free_cb)(void* user_data, void* ptr);

// ------------------------------------------------------------------
// Global profiler configuration.
//
// Profiling is one-way: once enabled it cannot be disabled.
//
// `enabled` is _Atomic(bool) so that mi_profiler_enable() can be called
// from any thread. The store uses release order; reads in the inline
// fast-path hooks (in internal.h) use relaxed order (sufficient to decide
// whether to do any work); the slow path uses acquire order to ensure
// on_alloc, on_free, and record_extra_bytes are visible before they are read.
// ------------------------------------------------------------------
typedef struct mi_profiler_s {
_Atomic(bool) enabled;
mi_profiler_alloc_cb on_alloc; // non-NULL when enabled=true
mi_profiler_free_cb on_free; // may be NULL
size_t record_extra_bytes; // bytes allocated after each mi_alloc_record_t for user_data
} mi_profiler_t;

extern mi_profiler_t _mi_profiler;

// ------------------------------------------------------------------
// Public API — must be called at most once. May be called from any
// thread, before or after other threads have started. Each thread
// samples its first allocation immediately; the on_alloc callback
// controls all subsequent thresholds.
//
// Returns true on success, false if:
// - profiling was already enabled (called more than once), or
// - on_alloc is NULL, or
// - on_free is NULL but record_extra_bytes > 0, or
// - record_extra_bytes would overflow the record allocation size.
// ------------------------------------------------------------------
bool mi_profiler_enable(size_t record_extra_bytes, mi_profiler_alloc_cb on_alloc, mi_profiler_free_cb on_free);

// ------------------------------------------------------------------
// Slow-path implementations (defined in profile.c).
// The inline fast-path wrappers are in internal.h so they have
// access to the full type definitions they need.
// ------------------------------------------------------------------
void _mi_profiler_on_alloc_slow(mi_heap_t* heap, mi_page_t* page, void* ptr, size_t size);
void _mi_profiler_on_free_local_slow(mi_page_t* page, void* ptr);
void _mi_profiler_on_free_collected_slow(mi_page_t* page, mi_block_t* head);

#endif // MIMALLOC_PROFILE_H
22 changes: 20 additions & 2 deletions include/mimalloc/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ typedef uintptr_t mi_thread_free_t;
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
// Forward declaration for the profiler record list stored in page->metadata.
struct mi_alloc_record_s;

typedef struct mi_page_s {
// "owned" by the segment
uint32_t slice_count; // slices in this page (0 if not a page)
Expand All @@ -330,7 +333,9 @@ typedef struct mi_page_s {
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
uint8_t has_metadata:1; // `true` if page->metadata is non-NULL; on the same cache line as
// the hot free fields to avoid a cache miss on every deallocation
uint8_t retire_expire:6; // expiration count for retired blocks (max value is MI_RETIRE_CYCLES=16, so 6 bits suffices)

mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
Expand All @@ -352,7 +357,12 @@ typedef struct mi_page_s {
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`

// 64-bit 11 words, 32-bit 13 words, (+2 for secure)
void* padding[1];
// `metadata` is the head of a singly-linked list of profiler allocation
// records (mi_alloc_record_t, defined in profile.h). It is zeroed when
// the page is cleared (segment.c:mi_segment_page_clear) and must be
// re-initialized when the page is reused. NULL when profiling is off or
// no sampled allocations are live on this page.
struct mi_alloc_record_s* metadata;
} mi_page_t;


Expand Down Expand Up @@ -624,13 +634,21 @@ typedef struct mi_segments_tld_s {
} mi_segments_tld_t;

// Thread local data
// Per-thread profiler state (see include/mimalloc/profile.h)
typedef struct mi_profiler_tld_s {
size_t bytes_since_sample; // bytes allocated since the last sample
size_t next_threshold; // sample when bytes_since_sample reaches this value
bool in_profiler; // reentrancy guard: skip profiling inside profiler code
} mi_profiler_tld_t;

struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count
bool recurse; // true if deferred was called; used to prevent infinite recursion.
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
mi_segments_tld_t segments; // segment tld
mi_stats_t stats; // statistics
mi_profiler_tld_t profiler; // heap profiler state
};


Expand Down
5 changes: 5 additions & 0 deletions src/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
#endif
#endif

_mi_profiler_on_alloc(heap, page, block, size - MI_PADDING_SIZE);
return block;
}

Expand Down Expand Up @@ -303,6 +304,10 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero,
// mi_track_resize(p,size,newsize)
// if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); }
if (usable_post!=NULL) { *usable_post = mi_page_usable_block_size(page); }
// TODO(profiler): if p has a live profiler record, notify the profiler of the
// resize so it can update the recorded size. The allocate-and-copy path below
// is handled correctly because it goes through mi_free + mi_heap_umalloc which
// hit the existing on_free and on_alloc hooks.
return p; // reallocation still fits and not more than 50% waste
}
void* newp = mi_heap_umalloc(heap,newsize,usable_post);
Expand Down
5 changes: 5 additions & 0 deletions src/free.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
// checks
if mi_unlikely(mi_check_is_double_free(page, block)) return;
if (!was_guarded) { mi_check_padding(page, block); }
// Profiler hook fires before any page state (local_free, used) is modified,
// so page is fully consistent. mi_record_free may call mi_free internally,
// which is safe because in_profiler suppresses recursion and the page is
// in a valid pre-free state.
_mi_profiler_on_free_local(page, block);
if (track_stats) { mi_stat_free(page, block); }
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
if (!mi_page_is_huge(page)) { // huge page content may be already decommitted
Expand Down
9 changes: 6 additions & 3 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const mi_page_t _mi_page_empty = {
0, // reserved capacity
{ 0 }, // flags
false, // is_zero
false, // has_metadata
0, // retire_expire
NULL, // free
NULL, // local_free
Expand All @@ -34,7 +35,7 @@ const mi_page_t _mi_page_empty = {
MI_ATOMIC_VAR_INIT(0), // xthread_free
MI_ATOMIC_VAR_INIT(0), // xheap
NULL, NULL
, { 0 } // padding
, NULL // metadata
};

#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
Expand Down Expand Up @@ -140,7 +141,8 @@ mi_decl_cache_align static const mi_tld_t tld_empty = {
false,
NULL, NULL,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, tld_empty_stats }, // segments
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL } // stats
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats
{ 0, 0, false } // profiler
};

mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
Expand All @@ -156,7 +158,8 @@ static mi_decl_cache_align mi_tld_t tld_main = {
0, false,
&_mi_heap_main, & _mi_heap_main,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, &tld_main.stats }, // segments
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL } // stats
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats
{ 0, 0, false } // profiler
};

mi_decl_cache_align mi_heap_t _mi_heap_main = {
Expand Down
2 changes: 2 additions & 0 deletions src/page.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
return; // the thread-free items cannot be freed
}

_mi_profiler_on_free_collected(page, head);

// and append the current local free list
mi_block_set_next(page,tail, page->local_free);
page->local_free = head;
Expand Down
Loading