From 13a581e28a6b2b461834fe886c45617867176795 Mon Sep 17 00:00:00 2001 From: floor-licker Date: Tue, 11 Nov 2025 23:41:28 -0500 Subject: [PATCH 1/2] perf: eliminate mutex contention in parallel trie commit by deferring NodeSet merges until after goroutine synchronization --- trie/committer.go | 85 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/trie/committer.go b/trie/committer.go index 2a2142e0ffaa..5cce108702ad 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -87,10 +87,38 @@ func (c *committer) commit(path []byte, n node, parallel bool) node { // commitChildren commits the children of the given fullnode func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) { - var ( - wg sync.WaitGroup - nodesMu sync.Mutex - ) + if !parallel { + // Sequential path: process children one by one + for i := 0; i < 16; i++ { + child := n.Children[i] + if child == nil { + continue + } + // If it's the hashed child, save the hash value directly. + // Note: it's impossible that the child in range [0, 15] + // is a valueNode. Values are only stored in Children[16] + // (the 17th slot) when a key terminates at a branch node. + // Children[0-15] are structural slots for hex digit routing. + if _, ok := child.(hashNode); ok { + continue + } + // Commit the child recursively and store the "hashed" value. + // Note the returned node can be some embedded nodes, so it's + // possible the type is not hashNode. + n.Children[i] = c.commit(append(path, byte(i)), child, false) + } + return + } + + // Parallel path: collect all child NodeSets first, then merge without lock contention + type childResult struct { + index int + set *trienode.NodeSet + } + results := make([]childResult, 0, 16) + resultsMu := sync.Mutex{} + var wg sync.WaitGroup + for i := 0; i < 16; i++ { child := n.Children[i] if child == nil { @@ -98,33 +126,36 @@ func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) { } // If it's the hashed child, save the hash value directly. // Note: it's impossible that the child in range [0, 15] - // is a valueNode. + // is a valueNode. Values are only stored in Children[16] + // (the 17th slot) when a key terminates at a branch node. + // Children[0-15] are structural slots for hex digit routing. if _, ok := child.(hashNode); ok { continue } - // Commit the child recursively and store the "hashed" value. - // Note the returned node can be some embedded nodes, so it's - // possible the type is not hashNode. - if !parallel { - n.Children[i] = c.commit(append(path, byte(i)), child, false) - } else { - wg.Add(1) - go func(index int) { - defer wg.Done() - - p := append(path, byte(index)) - childSet := trienode.NewNodeSet(c.nodes.Owner) - childCommitter := newCommitter(childSet, c.tracer, c.collectLeaf) - n.Children[index] = childCommitter.commit(p, child, false) - - nodesMu.Lock() - c.nodes.MergeDisjoint(childSet) - nodesMu.Unlock() - }(i) - } + + wg.Add(1) + go func(index int) { + defer wg.Done() + + p := append(path, byte(index)) + childSet := trienode.NewNodeSet(c.nodes.Owner) + childCommitter := newCommitter(childSet, c.tracer, c.collectLeaf) + n.Children[index] = childCommitter.commit(p, child, false) + + // Collect results without merging yet - only lock to append to slice + resultsMu.Lock() + results = append(results, childResult{index: index, set: childSet}) + resultsMu.Unlock() + }(i) } - if parallel { - wg.Wait() + + // Wait for all goroutines to complete + wg.Wait() + + // Now merge all results sequentially without any lock contention + // This is safe because all goroutines have completed + for _, result := range results { + c.nodes.MergeDisjoint(result.set) } } From ae97dfa234cac46f49ed46d2e7c969ef9e09b81f Mon Sep 17 00:00:00 2001 From: floor-licker Date: Thu, 13 Nov 2025 19:52:22 -0500 Subject: [PATCH 2/2] simplify by omitting mutex --- trie/committer.go | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/trie/committer.go b/trie/committer.go index 5cce108702ad..8ae848f313dd 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -111,14 +111,10 @@ func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) { } // Parallel path: collect all child NodeSets first, then merge without lock contention - type childResult struct { - index int - set *trienode.NodeSet - } - results := make([]childResult, 0, 16) - resultsMu := sync.Mutex{} - var wg sync.WaitGroup - + var ( + wg sync.WaitGroup + nodeset = make([]*trienode.NodeSet, 16) + ) for i := 0; i < 16; i++ { child := n.Children[i] if child == nil { @@ -134,7 +130,7 @@ func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) { } wg.Add(1) - go func(index int) { + go func(index int, child node) { defer wg.Done() p := append(path, byte(index)) @@ -142,11 +138,10 @@ func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) { childCommitter := newCommitter(childSet, c.tracer, c.collectLeaf) n.Children[index] = childCommitter.commit(p, child, false) - // Collect results without merging yet - only lock to append to slice - resultsMu.Lock() - results = append(results, childResult{index: index, set: childSet}) - resultsMu.Unlock() - }(i) + // Store the nodeset at the child's index. No mutex needed since + // each goroutine writes to a unique index. + nodeset[index] = childSet + }(i, child) } // Wait for all goroutines to complete @@ -154,8 +149,11 @@ func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) { // Now merge all results sequentially without any lock contention // This is safe because all goroutines have completed - for _, result := range results { - c.nodes.MergeDisjoint(result.set) + for _, set := range nodeset { + if set == nil { + continue + } + c.nodes.MergeDisjoint(set) } }