diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c
index 6700be3f3..8a74b110c 100644
--- a/src/backend/utils/adt/agtype.c
+++ b/src/backend/utils/adt/agtype.c
@@ -4871,6 +4871,22 @@ Datum agtype_hash_cmp(PG_FUNCTION_ARGS)
     agtype_iterator_token tok;
     agtype_value *r;
     uint64 seed = 0xF0F0F0F0;
+    /*
+     * Stack of "is the current open array a raw_scalar pseudo-array?" so
+     * that the WAGT_END_ARRAY case can match the choice we made at
+     * WAGT_BEGIN_ARRAY. The iterator does not populate *val on END tokens
+     * (see comment above agtype_iterator_next), so r->val.array.raw_scalar
+     * is uninitialized at that point and reading it as a bool is undefined
+     * behavior (UBSan flagged values like 127 here).
+     *
+     * Arrays in agtype are bounded by AGTYPE_CONTAINER_SIZE which fits in
+     * AGT_CMASK; a fixed-size stack of 64 levels is far more than any real
+     * agtype graph value will ever produce. Bail out with an error if we
+     * exceed it rather than silently corrupting the hash.
+     */
+#define HASH_RAW_SCALAR_STACK_DEPTH 64
+    bool raw_scalar_stack[HASH_RAW_SCALAR_STACK_DEPTH];
+    int raw_scalar_top = -1;
 
     /* this function returns INTEGER which is 32 bits */
     if (PG_ARGISNULL(0))
@@ -4887,18 +4903,46 @@ Datum agtype_hash_cmp(PG_FUNCTION_ARGS)
     {
         if (IS_A_AGTYPE_SCALAR(r) && AGTYPE_ITERATOR_TOKEN_IS_HASHABLE(tok))
             agtype_hash_scalar_value_extended(r, &hash, seed);
-        else if (tok == WAGT_BEGIN_ARRAY && !r->val.array.raw_scalar)
-            seed = LEFT_ROTATE(seed, 4);
+        else if (tok == WAGT_BEGIN_ARRAY)
+        {
+            /* push raw_scalar state for the matching END token */
+            raw_scalar_top++;
+            if (raw_scalar_top >= HASH_RAW_SCALAR_STACK_DEPTH)
+            {
+                ereport(ERROR,
+                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                         errmsg("agtype_hash_cmp: array nesting depth exceeds %d",
+                                HASH_RAW_SCALAR_STACK_DEPTH)));
+            }
+            raw_scalar_stack[raw_scalar_top] = r->val.array.raw_scalar;
+            if (!r->val.array.raw_scalar)
+            {
+                seed = LEFT_ROTATE(seed, 4);
+            }
+        }
         else if (tok == WAGT_BEGIN_OBJECT)
             seed = LEFT_ROTATE(seed, 6);
-        else if (tok == WAGT_END_ARRAY && !r->val.array.raw_scalar)
-            seed = RIGHT_ROTATE(seed, 4);
+        else if (tok == WAGT_END_ARRAY)
+        {
+            bool was_raw_scalar;
+
+            /* pop matching BEGIN's raw_scalar state */
+            Assert(raw_scalar_top >= 0);
+            was_raw_scalar = raw_scalar_stack[raw_scalar_top];
+            raw_scalar_top--;
+            if (!was_raw_scalar)
+            {
+                seed = RIGHT_ROTATE(seed, 4);
+            }
+        }
         else if (tok == WAGT_END_OBJECT)
             seed = RIGHT_ROTATE(seed, 4);
 
         seed = LEFT_ROTATE(seed, 1);
     }
 
+#undef HASH_RAW_SCALAR_STACK_DEPTH
+
     pfree_if_not_null(r);
     PG_FREE_IF_COPY(agt, 0);
 
diff --git a/src/backend/utils/adt/agtype_ext.c b/src/backend/utils/adt/agtype_ext.c
index 7a0ea991d..e4a38e3d1 100644
--- a/src/backend/utils/adt/agtype_ext.c
+++ b/src/backend/utils/adt/agtype_ext.c
@@ -57,7 +57,14 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry,
         /* copy in the int_value data */
         numlen = sizeof(int64);
         offset = reserve_from_buffer(buffer, numlen);
-        *((int64 *)(buffer->data + offset)) = scalar_val->val.int_value;
+        /*
+         * Use memcpy because the AGT_HEADER (4 bytes) leaves the buffer
+         * write position 4-byte-aligned but not necessarily 8-byte-aligned.
+         * Direct *((int64 *) ...) = ... is undefined behavior under strict
+         * alignment rules (UBSan flags it; works in practice on x86_64).
+         */
+        memcpy(buffer->data + offset, &scalar_val->val.int_value,
+               sizeof(int64));
 
         *agtentry = AGTENTRY_IS_AGTYPE | (padlen + numlen + AGT_HEADER_SIZE);
         break;
@@ -68,7 +75,8 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry,
         /* copy in the float_value data */
         numlen = sizeof(scalar_val->val.float_value);
         offset = reserve_from_buffer(buffer, numlen);
-        *((float8 *)(buffer->data + offset)) = scalar_val->val.float_value;
+        memcpy(buffer->data + offset, &scalar_val->val.float_value,
+               sizeof(float8));
 
         *agtentry = AGTENTRY_IS_AGTYPE | (padlen + numlen + AGT_HEADER_SIZE);
         break;
@@ -154,12 +162,17 @@ void ag_deserialize_extended_type(char *base_addr, uint32 offset,
     {
     case AGT_HEADER_INTEGER:
         result->type = AGTV_INTEGER;
-        result->val.int_value = *((int64 *)(base + AGT_HEADER_SIZE));
+        /* See comment in ag_serialize_extended_type. The 4-byte AGT_HEADER
+         * leaves the int64 at a 4-byte-aligned but not 8-byte-aligned
+         * address, so a direct typed load is undefined behavior. */
+        memcpy(&result->val.int_value, base + AGT_HEADER_SIZE,
+               sizeof(int64));
         break;
 
     case AGT_HEADER_FLOAT:
         result->type = AGTV_FLOAT;
-        result->val.float_value = *((float8 *)(base + AGT_HEADER_SIZE));
+        memcpy(&result->val.float_value, base + AGT_HEADER_SIZE,
+               sizeof(float8));
         break;
 
     case AGT_HEADER_VERTEX:
diff --git a/src/backend/utils/adt/agtype_raw.c b/src/backend/utils/adt/agtype_raw.c
index d8bad3d24..3d56d94d5 100644
--- a/src/backend/utils/adt/agtype_raw.c
+++ b/src/backend/utils/adt/agtype_raw.c
@@ -197,9 +197,20 @@ void write_graphid(agtype_build_state *bstate, graphid graphid)
     write_const(AGT_HEADER_INTEGER, AGT_HEADER_TYPE);
     length += AGT_HEADER_SIZE;
 
-    /* graphid value */
-    write_const(graphid, int64);
-    length += sizeof(int64);
+    /*
+     * graphid value (int64). The 4-byte AGT_HEADER above leaves the buffer
+     * write position 4-byte-aligned but not 8-byte-aligned, so the typed
+     * write done by write_const(graphid, int64) is undefined behavior under
+     * strict alignment rules. Use memcpy. (UBSan flags the typed write as
+     * "store to misaligned address ... requires 8 byte alignment".)
+     */
+    {
+        int numlen = sizeof(int64);
+        int g_offset = BUFFER_RESERVE(numlen);
+
+        memcpy(bstate->buffer->data + g_offset, &graphid, sizeof(int64));
+        length += numlen;
+    }
 
     /* agtentry */
     write_agt(AGTENTRY_IS_AGTYPE | length);
@@ -214,8 +225,15 @@ void write_container(agtype_build_state *bstate, agtype *agtype)
     /* padding */
     length += BUFFER_WRITE_PAD();
 
-    /* varlen data */
-    length += write_ptr((char *) &agtype->root, VARSIZE(agtype));
+    /*
+     * Copy the inner agtype_container only, NOT the outer varlena header.
+     * VARSIZE(agtype) reports the total varlena size (including the 4-byte
+     * vl_len_ header), but we are starting our copy at &agtype->root, which
+     * is already past that header. Subtracting VARHDRSZ avoids reading
+     * VARHDRSZ bytes past the source allocation (caught by ASan as
+     * heap-buffer-overflow in __interceptor_memcpy from write_pointer).
+     */
+    length += write_ptr((char *) &agtype->root, VARSIZE(agtype) - VARHDRSZ);
 
     /* agtentry */
     write_agt(AGTENTRY_IS_CONTAINER | length);
diff --git a/src/backend/utils/adt/agtype_util.c b/src/backend/utils/adt/agtype_util.c
index b39723413..a05d2cc8f 100644
--- a/src/backend/utils/adt/agtype_util.c
+++ b/src/backend/utils/adt/agtype_util.c
@@ -103,6 +103,66 @@ static int get_type_sort_priority(enum agtype_value_type type);
 static void pfree_iterator_agtype_value_token(agtype_iterator_token token,
                                               agtype_value *agtv);
 
+/*
+ * Per-call agtype build arena.
+ *
+ * Hot Datum functions in agtype.c build an agtype_value tree, serialize it
+ * via agtype_value_to_agtype(), then recursively free it with
+ * pfree_agtype_value_content(). The recursive free is O(N) in the number of
+ * tree nodes; the arena helpers below let callers replace it with an O(K)
+ * block reset (K = number of allocated AllocSet blocks; typically 1 for
+ * small trees) by allocating the entire tree inside a short-lived
+ * MemoryContext.
+ *
+ * The arena is sized for typical agtype_value trees; it grows on demand.
+ * Cost of creating and deleting the context is amortized against the
+ * eliminated per-node pfree walk and per-node AllocSetFree calls.
+ *
+ * agt_arena_end() is a no-op on NULL, allowing simple cleanup paths.
+ *
+ * The AGT_ARENA_BEGIN_SHARED() macro (declared in agtype.h) returns a
+ * context parented under CacheMemoryContext (process lifetime) so the
+ * caller may stash it in a file-static variable for reuse across many
+ * invocations. The first caller pays AllocSetContextCreate cost;
+ * subsequent callers reuse the context and pay only MemoryContextReset
+ * (cheap; ~100 ns when nothing was allocated since last reset). Use this
+ * for hot inner loops (sort comparators, per-tuple deep-copy workspaces).
+ */
+MemoryContext agt_arena_begin(void)
+{
+    /*
+     * ALLOCSET_SMALL_SIZES (initial=1KB, max=8KB). Most agtype value trees
+     * fit in a single 1KB block.
+     */
+    return AllocSetContextCreate(CurrentMemoryContext,
+                                 "agtype build arena",
+                                 ALLOCSET_SMALL_SIZES);
+}
+
+/*
+ * Pattern B (shared, long-lived arena) is exposed as the macro
+ * AGT_ARENA_BEGIN_SHARED in agtype.h, because PG's AllocSetContextCreate
+ * enforces a compile-time-constant name via StaticAssertStmt. The macro
+ * inlines the AllocSet creation at the call site so the name literal is
+ * visible to the StaticAssert.
+ */
+
+void agt_arena_reset(MemoryContext arena)
+{
+    if (arena != NULL)
+    {
+        MemoryContextReset(arena);
+    }
+}
+
+void agt_arena_end(MemoryContext arena)
+{
+    if (arena != NULL)
+    {
+        MemoryContextDelete(arena);
+    }
+}
+
 /*
  * Turn an in-memory agtype_value into an agtype for on-disk storage.
  *
@@ -834,12 +894,18 @@ static void fill_agtype_value_no_copy(agtype_container *container, int index,
         {
         case AGT_HEADER_INTEGER:
             result->type = AGTV_INTEGER;
-            result->val.int_value = *((int64 *)(base + AGT_HEADER_SIZE));
+            /* See ag_serialize_extended_type: the 4-byte AGT_HEADER leaves
+             * the int64 at a 4-byte-aligned but not 8-byte-aligned address.
+             * Use memcpy to avoid undefined behavior on strict alignment
+             * platforms (caught by UBSan). */
+            memcpy(&result->val.int_value, base + AGT_HEADER_SIZE,
+                   sizeof(int64));
             break;
 
         case AGT_HEADER_FLOAT:
             result->type = AGTV_FLOAT;
-            result->val.float_value = *((float8 *)(base + AGT_HEADER_SIZE));
+            memcpy(&result->val.float_value, base + AGT_HEADER_SIZE,
+                   sizeof(float8));
             break;
 
         default:
@@ -880,8 +946,113 @@ static void fill_agtype_value_no_copy(agtype_container *container, int index,
  * of the full iterator machinery. It extracts the scalar values using no-copy
  * fill and compares them directly.
  *
+ * For composite types (VERTEX, EDGE, PATH) the no-copy fill still
+ * deserializes into newly-allocated memory. To avoid the per-call
+ * pfree_agtype_value_content recursive walk on every sort comparator
+ * invocation, those allocations are routed through a long-lived shared
+ * arena that is reset (O(1)) at the end of each call. The arena is
+ * created lazily on the first call that needs it (i.e. the first
+ * VERTEX/EDGE/PATH comparison) and reused across all subsequent calls in
+ * the process.
+ *
+ * Additional fast path (A2): when both containers are VERTEX or EDGE of
+ * the same type, the comparator only needs the graphid `id` field
+ * (compare_agtype_scalar_values does this; all other fields are ignored).
+ * Skip the full ag_deserialize_composite agtype_value tree build entirely
+ * and walk the binary representation directly to read just the int64 id.
+ * This is the dominant cost of sort-bound queries (notably IC4) since the
+ * tree build is O(properties + label) per row when only one int64 is
+ * needed for ordering.
+ *
  * Returns: negative if a < b, 0 if a == b, positive if a > b
  */
+static MemoryContext compare_scalar_arena = NULL;
+
+/*
+ * Fast int64-id extraction from a binary VERTEX or EDGE container.
+ *
+ * Layout of an extended VERTEX/EDGE entry (set up by ag_serialize_extended_type
+ * with convert_extended_object): a 4-byte AGT_HEADER followed by a standard
+ * agtype_container holding an object whose first field (sorted by key length
+ * ascending) is "id". The id is itself stored as an extended AGTV_INTEGER:
+ * a 4-byte AGT_HEADER_INTEGER followed by an int64.
+ *
+ * Pair layout in an object container: children[0..N-1] are the keys, then
+ * children[N..2N-1] are the values; data follows. The "id" key is at index 0
+ * because "id" (length 2) is the shortest key in both VERTEX (id, label,
+ * properties) and EDGE (id, label, end_id, start_id, properties), and the
+ * agtype object representation sorts pairs by key length ascending.
+ *
+ * Returns the extracted graphid. The caller is responsible for ensuring the
+ * input container actually holds a VERTEX or EDGE (i.e. this is only called
+ * after AGTE_IS_AGTYPE indicates an extended type and the AGT_HEADER matches
+ * AGT_HEADER_VERTEX or AGT_HEADER_EDGE).
+ */
+static graphid extract_composite_id_fast(const agtype_container *outer_scalar)
+{
+    const char *base_addr;
+    const agtype_container *obj;
+    int n_pairs;
+    const agtentry *children;
+    uint32 id_value_offset;
+    char *id_value_base;
+    char *id_extended_base;
+    AGT_HEADER_TYPE id_header;
+    int id_value_index;
+
+    /* outer_scalar is a single-element pseudo-array; element 0 is our extended type */
+    base_addr = (const char *)&outer_scalar->children[1];
+
+    /*
+     * Element 0 is an extended type. Its data starts at base_addr + 0 and
+     * begins with AGT_HEADER_VERTEX or AGT_HEADER_EDGE; the inner
+     * agtype_container follows immediately.
+     */
+    obj = (const agtype_container *)((const char *)base_addr + AGT_HEADER_SIZE);
+    n_pairs = AGTYPE_CONTAINER_SIZE(obj);
+    children = obj->children;
+
+    /*
+     * Pair value 0 ("id") lives at child index n_pairs (values come after
+     * keys). Compute its offset within the object's data area.
+     */
+    id_value_index = n_pairs;
+    id_value_offset = get_agtype_offset(obj, id_value_index);
+
+    /* Data area for this object starts after children[2*n_pairs] */
+    id_value_base = (char *)&children[2 * n_pairs];
+
+    /*
+     * The id value is an extended AGTV_INTEGER: AGT_HEADER (aligned) followed
+     * by the int64. INTALIGN matches what ag_deserialize_extended_type does.
+     */
+    id_extended_base = id_value_base + INTALIGN(id_value_offset);
+    /*
+     * The header is uint32 (4-byte) and lives at a 4-byte-aligned address,
+     * so a typed load is fine. The int64 that follows starts at offset
+     * AGT_HEADER_SIZE = 4 from id_extended_base, so it is 4-byte- but not
+     * necessarily 8-byte-aligned; use memcpy to avoid alignment UB.
+     */
+    memcpy(&id_header, id_extended_base, sizeof(AGT_HEADER_TYPE));
+
+    /*
+     * Defensive: if the value isn't actually an extended integer, fall back
+     * to the slow deserialize path by signaling. AGT_HEADER_INTEGER is the
+     * only valid header for the id field of a well-formed VERTEX/EDGE.
+     */
+    if (id_header != AGT_HEADER_INTEGER)
+    {
+        return PG_INT64_MIN;
+    }
+
+    {
+        graphid id;
+
+        memcpy(&id, id_extended_base + AGT_HEADER_SIZE, sizeof(int64));
+        return id;
+    }
+}
+
 static int compare_agtype_scalar_containers(agtype_container *a,
                                             agtype_container *b)
 {
@@ -892,6 +1063,7 @@ static int compare_agtype_scalar_containers(agtype_container *a,
     int result;
     bool need_free_a = false;
     bool need_free_b = false;
+    MemoryContext saved_ctx = NULL;
 
     Assert(AGTYPE_CONTAINER_IS_SCALAR(a));
     Assert(AGTYPE_CONTAINER_IS_SCALAR(b));
@@ -900,13 +1072,65 @@ static int compare_agtype_scalar_containers(agtype_container *a,
     base_addr_a = (char *)&a->children[1];
     base_addr_b = (char *)&b->children[1];
 
+    /*
+     * A2 fast path: when both sides are extended-type scalars holding the
+     * same composite kind (VERTEX or EDGE), we only need the int64 id field
+     * to determine ordering (see compare_agtype_scalar_values' AGTV_VERTEX
+     * and AGTV_EDGE cases). Skip the full agtype_value tree build entirely.
+     */
+    if (AGTE_IS_AGTYPE(a->children[0]) && AGTE_IS_AGTYPE(b->children[0]))
+    {
+        AGT_HEADER_TYPE ha = *(AGT_HEADER_TYPE *)
+                                 (base_addr_a +
+                                  INTALIGN(get_agtype_offset(a, 0)));
+        AGT_HEADER_TYPE hb = *(AGT_HEADER_TYPE *)
+                                 (base_addr_b +
+                                  INTALIGN(get_agtype_offset(b, 0)));
+
+        if (ha == hb &&
+            (ha == AGT_HEADER_VERTEX || ha == AGT_HEADER_EDGE))
+        {
+            graphid ida = extract_composite_id_fast(a);
+            graphid idb = extract_composite_id_fast(b);
+
+            /* Defensive: PG_INT64_MIN signals malformed; fall through to
+             * slow path in that rare case rather than producing a wrong
+             * comparison. */
+            if (ida != PG_INT64_MIN && idb != PG_INT64_MIN)
+            {
+                if (ida == idb)
+                {
+                    return 0;
+                }
+                return (ida > idb) ? 1 : -1;
+            }
+        }
+    }
+
+    /*
+     * Peek at the entry types without filling, so we can route any composite
+     * allocations into the shared arena instead of CurrentMemoryContext.
+     * This avoids the recursive pfree walk after every comparison.
+     */
+    if (AGTE_IS_AGTYPE(a->children[0]) || AGTE_IS_AGTYPE(b->children[0]))
+    {
+        if (compare_scalar_arena == NULL)
+        {
+            compare_scalar_arena =
+                AGT_ARENA_BEGIN_SHARED("agtype scalar comparator");
+        }
+        saved_ctx = MemoryContextSwitchTo(compare_scalar_arena);
+    }
+
     /* Use no-copy fill to avoid allocations for simple types */
     fill_agtype_value_no_copy(a, 0, base_addr_a, 0, &va);
     fill_agtype_value_no_copy(b, 0, base_addr_b, 0, &vb);
 
     /*
-     * Check if we need to free the values after comparison.
-     * Only VERTEX, EDGE, and PATH types allocate memory in no-copy mode.
+     * Track which sides allocated composite content (VERTEX/EDGE/PATH).
+     * For the arena-backed allocations we still need this so we know to
+     * reset the arena at the end; for the (rare) caller-context-backed
+     * fallback path we still need to recursively free.
      */
     if (va.type == AGTV_VERTEX || va.type == AGTV_EDGE || va.type == AGTV_PATH)
     {
@@ -936,14 +1160,17 @@ static int compare_agtype_scalar_containers(agtype_container *a,
                   get_type_sort_priority(vb.type)) ? -1 : 1;
     }
 
-    /* Free any allocated memory from composite types */
-    if (need_free_a)
-    {
-        pfree_agtype_value_content(&va);
-    }
-    if (need_free_b)
+    /*
+     * Cleanup. If we allocated into the shared arena, reset it (O(1));
+     * otherwise the no-copy fill made no allocations to free.
+     */
+    if (saved_ctx != NULL)
     {
-        pfree_agtype_value_content(&vb);
+        MemoryContextSwitchTo(saved_ctx);
+        if (need_free_a || need_free_b)
+        {
+            agt_arena_reset(compare_scalar_arena);
+        }
     }
 
     return result;
@@ -2127,7 +2354,17 @@ int reserve_from_buffer(StringInfo buffer, int len)
 static void copy_to_buffer(StringInfo buffer, int offset, const char *data,
                            int len)
 {
-    memcpy(buffer->data + offset, data, len);
+    /*
+     * Guard against memcpy(dst, NULL, 0). Some callers (notably empty PATH
+     * scalars with no element bytes) reach here with data == NULL and len
+     * == 0; the C standard says memcpy with a NULL pointer is undefined even
+     * when len == 0. UBSan flags this as "null pointer passed as argument 2,
+     * which is declared to never be null".
+     */
+    if (len > 0)
+    {
+        memcpy(buffer->data + offset, data, len);
+    }
 }
 
 /*
diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h
index 807d3795e..a05a85437 100644
--- a/src/include/utils/agtype.h
+++ b/src/include/utils/agtype.h
@@ -32,6 +32,7 @@
 #define AG_AGTYPE_H
 
 #include "utils/array.h"
+#include "utils/memutils.h"
 #include "utils/numeric.h"
 
 #include "utils/graphid.h"
@@ -581,6 +582,64 @@ agtype_iterator_token agtype_iterator_next(agtype_iterator **it,
 agtype *agtype_value_to_agtype(agtype_value *val);
 bool agtype_deep_contains(agtype_iterator **val,
                           agtype_iterator **m_contained, bool skip_nested);
+
+/*
+ * Per-call agtype build arena.
+ *
+ * Many top-level Datum functions in agtype.c follow the pattern:
+ *   1. Allocate / build an agtype_value tree
+ *   2. Serialize it via agtype_value_to_agtype()
+ *   3. Recursively pfree the tree
+ *
+ * Step 3 (pfree_agtype_value_content) is an O(N) walk that frees each tree
+ * node individually. By building the tree inside a dedicated AllocSet
+ * MemoryContext and resetting the context after serialization, we trade
+ * the O(N) walk for an O(K) block reset (K << N) and eliminate per-node
+ * AllocSetFree overhead. The AllocSetAlloc cost is also reduced because
+ * the arena uses a small initial block size and grows on demand.
+ *
+ * Two usage patterns:
+ *
+ *   Pattern A — disposable per-call arena (one-shot):
+ *       MemoryContext arena = agt_arena_begin();
+ *       MemoryContext caller = MemoryContextSwitchTo(arena);
+ *       ... build tree ...
+ *       MemoryContextSwitchTo(caller);
+ *       result = agtype_value_to_agtype(val);  // copies into caller
+ *       agt_arena_end(arena);                   // resets / discards
+ *
+ *   Pattern B — long-lived shared arena (sort comparator, hot inner loop):
+ *       static MemoryContext shared_arena = NULL;
+ *       if (shared_arena == NULL)
+ *       {
+ *           shared_arena = AGT_ARENA_BEGIN_SHARED("comparator");
+ *       }
+ *       MemoryContext caller = MemoryContextSwitchTo(shared_arena);
+ *       ... allocate / use ...
+ *       MemoryContextSwitchTo(caller);
+ *       agt_arena_reset(shared_arena);          // O(1) cheap reset
+ *
+ * Pattern A creates a fresh context per call (microseconds). Pattern B
+ * amortizes the create cost across many calls (single AllocSetCreate at
+ * the very first call), and pays only a MemoryContextReset (~100 ns) per
+ * subsequent call. Use Pattern B when the function is invoked many times
+ * per query (e.g. a sort comparator called O(N log N) times).
+ */
+MemoryContext agt_arena_begin(void);
+/*
+ * Pattern-B shared-arena creator. Implemented as a macro because PG's
+ * AllocSetContextCreate enforces memory-context names be compile-time
+ * constants via a StaticAssert. Use with a string literal:
+ *     static MemoryContext my_arena = NULL;
+ *     if (my_arena == NULL)
+ *     {
+ *         my_arena = AGT_ARENA_BEGIN_SHARED("my comparator workspace");
+ *     }
+ */
+#define AGT_ARENA_BEGIN_SHARED(_name) \
+    AllocSetContextCreate(CacheMemoryContext, (_name), ALLOCSET_SMALL_SIZES)
+void agt_arena_reset(MemoryContext arena);
+void agt_arena_end(MemoryContext arena);
 void agtype_hash_scalar_value(const agtype_value *scalar_val, uint32 *hash);
 void agtype_hash_scalar_value_extended(const agtype_value *scalar_val,
                                        uint64 *hash, uint64 seed);