From 5d0242faa04951b01ebaba2e4162167bb2ecc8d3 Mon Sep 17 00:00:00 2001 From: Matt Joiner Date: Mon, 9 Mar 2026 21:47:13 +1100 Subject: [PATCH 1/2] Add BSI.FromBitmaps --- roaring64/bsi64.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index 5d6019db..24f4c8d0 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -814,6 +814,11 @@ func (b *BSI) ParOr(parallelism int, bsis ...*BSI) { b.eBM = *ParOr(parallelism, x...) } +func (b *BSI) FromBitmaps(bms []Bitmap) { + b.eBM = bms[0] + b.bA = bms[1:] +} + // UnmarshalBinary de-serialize a BSI. The value at bitData[0] is the EBM. Other indices are in least to most // significance order starting at bitData[1] (bit position 0). func (b *BSI) UnmarshalBinary(bitData [][]byte) error { From 399570797c1547dc82f327179c39091e44d55b51 Mon Sep 17 00:00:00 2001 From: Matt Joiner Date: Mon, 9 Mar 2026 21:34:02 +1100 Subject: [PATCH 2/2] Document and guard FromBitmaps Add a bounds check (panic if len(bms) < 1) and a doc comment explaining the ownership-transfer contract and why no-copy is intentional. The primary caller (bsiDocFreqs.ReadFrom in caterwaul) reads bitmaps freshly from a stream, reconstructs the existence bitmap by ORing the bit planes (the eBM is not stored on disk to save space), then hands the whole slice to FromBitmaps before it goes out of scope. All bitmaps are newly allocated from the stream at that point, so copying would be pure waste. The design also leaves the door open for a future zero-copy path once roaring gains FromUnsafeBytes support. --- roaring64/bsi64.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index 24f4c8d0..fc71a232 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -814,7 +814,26 @@ func (b *BSI) ParOr(parallelism int, bsis ...*BSI) { b.eBM = *ParOr(parallelism, x...) } +// FromBitmaps initializes the BSI from a pre-built slice of bitmaps. +// bms[0] is the existence bitmap (eBM); bms[1:] are the bit planes in +// least-to-most-significant order (bit position 0 first), matching the +// layout used by MarshalBinary/UnmarshalBinary. +// +// The caller transfers ownership of the slice and all bitmaps within it; +// the BSI aliases the slice directly without copying. The caller must not +// modify the slice or any of its elements after this call. +// +// The no-copy design is intentional. The primary use case is deserialization +// pipelines where the existence bitmap is not stored on disk but reconstructed +// by ORing the bit planes, and all bitmaps are freshly allocated from the +// stream. Copying at that point would be wasteful. The caller's slice goes out +// of scope immediately after the call, so aliasing is safe. +// +// Panics if len(bms) < 1. func (b *BSI) FromBitmaps(bms []Bitmap) { + if len(bms) < 1 { + panic("FromBitmaps: bms must have at least one element (the existence bitmap)") + } b.eBM = bms[0] b.bA = bms[1:] }