From 858601a894a79d72f470cf455d9362f8505581f1 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Mon, 22 Jun 2026 18:07:31 -0400 Subject: [PATCH 1/7] DX-122953 Preserve empty list offset buffer --- .../arrow/vector/complex/LargeListVector.java | 16 +++++++-- .../arrow/vector/complex/ListVector.java | 16 +++++++-- .../arrow/vector/TestLargeListVector.java | 33 +++++++++++++++++++ .../apache/arrow/vector/TestListVector.java | 33 +++++++++++++++++++ 4 files changed, 94 insertions(+), 4 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index ed075352c9..1f44474a73 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -305,15 +305,27 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long /** Set the reader and writer indexes for the inner buffers. */ private void setReaderAndWriterIndex() { + final long requiredOffsetBufferCapacity = (long) (valueCount + 1) * OFFSET_WIDTH; validityBuffer.readerIndex(0); offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); + ensureEmptyOffsetBufferCapacity(requiredOffsetBufferCapacity); } else { validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); } + // IPC serializers use readerIndex and writerIndex to determine readable bytes. Even when the + // list is empty, the Arrow layout requires the offset buffer to contain offset[0]. + offsetBuffer.writerIndex(requiredOffsetBufferCapacity); + } + + private void ensureEmptyOffsetBufferCapacity(long requiredCapacity) { + if (offsetBuffer.capacity() >= requiredCapacity) { + return; + } + ArrowBuf oldOffsetBuffer = offsetBuffer; + offsetBuffer = allocateOffsetBuffer(requiredCapacity); + oldOffsetBuffer.getReferenceManager().release(); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 3daeb6d77b..09b8b0b92c 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -263,15 +263,27 @@ public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long /** Set the reader and writer indexes for the inner buffers. */ private void setReaderAndWriterIndex() { + final long requiredOffsetBufferCapacity = (long) (valueCount + 1) * OFFSET_WIDTH; validityBuffer.readerIndex(0); offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); + ensureEmptyOffsetBufferCapacity(requiredOffsetBufferCapacity); } else { validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); } + // IPC serializers use readerIndex and writerIndex to determine readable bytes. Even when the + // list is empty, the Arrow layout requires the offset buffer to contain offset[0]. + offsetBuffer.writerIndex(requiredOffsetBufferCapacity); + } + + private void ensureEmptyOffsetBufferCapacity(long requiredCapacity) { + if (offsetBuffer.capacity() >= requiredCapacity) { + return; + } + ArrowBuf oldOffsetBuffer = offsetBuffer; + offsetBuffer = allocateOffsetBuffer(requiredCapacity); + oldOffsetBuffer.getReferenceManager().release(); } /** diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index 101d942d2a..fdeff3da5e 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import io.netty.buffer.NettyArrowBuf; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; @@ -955,6 +956,38 @@ public void testGetBufferSizeFor() { } } + @Test + public void testEmptyLargeListOffsetBuffer() { + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.allocateNew(); + list.setValueCount(0); + + assertEmptyLargeListOffsetBuffer(list); + } + } + + @Test + public void testUnallocatedEmptyLargeListOffsetBufferCanBeUnwrappedAsNettyBuffer() { + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); + + ArrowBuf offsetBuffer = assertEmptyLargeListOffsetBuffer(list); + NettyArrowBuf nettyBuffer = NettyArrowBuf.unwrapBuffer(offsetBuffer); + assertEquals(LargeListVector.OFFSET_WIDTH, nettyBuffer.readableBytes()); + } + } + + private ArrowBuf assertEmptyLargeListOffsetBuffer(LargeListVector list) { + List buffers = list.getFieldBuffers(); + ArrowBuf offsetBuffer = buffers.get(1); + assertEquals(LargeListVector.OFFSET_WIDTH, offsetBuffer.readableBytes()); + assertTrue(offsetBuffer.capacity() >= LargeListVector.OFFSET_WIDTH); + assertEquals(0L, offsetBuffer.getLong(0)); + return offsetBuffer; + } + @Test public void testIsEmpty() { try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 1d6fa39f9e..df8548d6ce 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import io.netty.buffer.NettyArrowBuf; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.AutoCloseables; @@ -1135,6 +1136,38 @@ public void testGetBufferSizeFor() { } } + @Test + public void testEmptyListOffsetBuffer() { + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.allocateNew(); + list.setValueCount(0); + + assertEmptyListOffsetBuffer(list); + } + } + + @Test + public void testUnallocatedEmptyListOffsetBufferCanBeUnwrappedAsNettyBuffer() { + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); + + ArrowBuf offsetBuffer = assertEmptyListOffsetBuffer(list); + NettyArrowBuf nettyBuffer = NettyArrowBuf.unwrapBuffer(offsetBuffer); + assertEquals(BaseRepeatedValueVector.OFFSET_WIDTH, nettyBuffer.readableBytes()); + } + } + + private ArrowBuf assertEmptyListOffsetBuffer(ListVector list) { + List buffers = list.getFieldBuffers(); + ArrowBuf offsetBuffer = buffers.get(1); + assertEquals(BaseRepeatedValueVector.OFFSET_WIDTH, offsetBuffer.readableBytes()); + assertTrue(offsetBuffer.capacity() >= BaseRepeatedValueVector.OFFSET_WIDTH); + assertEquals(0, offsetBuffer.getInt(0)); + return offsetBuffer; + } + @Test public void testIsEmpty() { try (final ListVector vector = ListVector.empty("list", allocator)) { From f5cfa00fea1c104220c71b4fd0696bc975229748 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Mon, 22 Jun 2026 18:34:35 -0400 Subject: [PATCH 2/7] DX-122953 Avoid NettyArrowBuf test import --- .../java/org/apache/arrow/vector/TestLargeListVector.java | 7 ++----- .../test/java/org/apache/arrow/vector/TestListVector.java | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index fdeff3da5e..1d089c93e3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -25,7 +25,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import io.netty.buffer.NettyArrowBuf; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; @@ -968,14 +967,12 @@ public void testEmptyLargeListOffsetBuffer() { } @Test - public void testUnallocatedEmptyLargeListOffsetBufferCanBeUnwrappedAsNettyBuffer() { + public void testUnallocatedEmptyLargeListOffsetBuffer() { try (LargeListVector list = LargeListVector.empty("list", allocator)) { list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); list.setValueCount(0); - ArrowBuf offsetBuffer = assertEmptyLargeListOffsetBuffer(list); - NettyArrowBuf nettyBuffer = NettyArrowBuf.unwrapBuffer(offsetBuffer); - assertEquals(LargeListVector.OFFSET_WIDTH, nettyBuffer.readableBytes()); + assertEmptyLargeListOffsetBuffer(list); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index df8548d6ce..f128358394 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -26,7 +26,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import io.netty.buffer.NettyArrowBuf; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.AutoCloseables; @@ -1148,14 +1147,12 @@ public void testEmptyListOffsetBuffer() { } @Test - public void testUnallocatedEmptyListOffsetBufferCanBeUnwrappedAsNettyBuffer() { + public void testUnallocatedEmptyListOffsetBuffer() { try (ListVector list = ListVector.empty("list", allocator)) { list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); list.setValueCount(0); - ArrowBuf offsetBuffer = assertEmptyListOffsetBuffer(list); - NettyArrowBuf nettyBuffer = NettyArrowBuf.unwrapBuffer(offsetBuffer); - assertEquals(BaseRepeatedValueVector.OFFSET_WIDTH, nettyBuffer.readableBytes()); + assertEmptyListOffsetBuffer(list); } } From 10b259b665204ba0a135cf015f76be277f08cdc9 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Mon, 22 Jun 2026 18:35:23 -0400 Subject: [PATCH 3/7] MINOR: Fix workflow trailing whitespace --- .github/workflows/jarbuild.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml index ae2981cd6d..286cc8b910 100644 --- a/.github/workflows/jarbuild.yml +++ b/.github/workflows/jarbuild.yml @@ -16,7 +16,7 @@ # under the License. name: JarBuild -on: +on: workflow_dispatch: inputs: arrow_branch: From e29398982eafb1efc128e9c625d1050ae16fc062 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Mon, 22 Jun 2026 21:46:25 -0400 Subject: [PATCH 4/7] MINOR: Reset list test helper capacity --- .../arrow/vector/testing/ValueVectorDataPopulator.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index 849fe6d667..d7bc987afd 100644 --- a/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -633,9 +633,10 @@ public static void setVector(LargeVarCharVector vector, String... values) { /** Populate values for {@link ListVector}. */ public static void setVector(ListVector vector, List... values) { - vector.allocateNewSafe(); Types.MinorType type = Types.MinorType.INT; vector.addOrGetVector(FieldType.nullable(type.getType())); + vector.setInitialCapacity(values.length); + vector.allocateNewSafe(); IntVector dataVector = (IntVector) vector.getDataVector(); dataVector.allocateNew(); @@ -662,9 +663,10 @@ public static void setVector(ListVector vector, List... values) { /** Populate values for {@link LargeListVector}. */ public static void setVector(LargeListVector vector, List... values) { - vector.allocateNewSafe(); Types.MinorType type = Types.MinorType.INT; vector.addOrGetVector(FieldType.nullable(type.getType())); + vector.setInitialCapacity(values.length); + vector.allocateNewSafe(); IntVector dataVector = (IntVector) vector.getDataVector(); dataVector.allocateNew(); From 91dafa228356a2584cd85ba44519623add47831c Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Mon, 22 Jun 2026 21:52:34 -0400 Subject: [PATCH 5/7] Revert "MINOR: Reset list test helper capacity" This reverts commit e29398982eafb1efc128e9c625d1050ae16fc062. --- .../arrow/vector/testing/ValueVectorDataPopulator.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index d7bc987afd..849fe6d667 100644 --- a/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -633,10 +633,9 @@ public static void setVector(LargeVarCharVector vector, String... values) { /** Populate values for {@link ListVector}. */ public static void setVector(ListVector vector, List... values) { + vector.allocateNewSafe(); Types.MinorType type = Types.MinorType.INT; vector.addOrGetVector(FieldType.nullable(type.getType())); - vector.setInitialCapacity(values.length); - vector.allocateNewSafe(); IntVector dataVector = (IntVector) vector.getDataVector(); dataVector.allocateNew(); @@ -663,10 +662,9 @@ public static void setVector(ListVector vector, List... values) { /** Populate values for {@link LargeListVector}. */ public static void setVector(LargeListVector vector, List... values) { + vector.allocateNewSafe(); Types.MinorType type = Types.MinorType.INT; vector.addOrGetVector(FieldType.nullable(type.getType())); - vector.setInitialCapacity(values.length); - vector.allocateNewSafe(); IntVector dataVector = (IntVector) vector.getDataVector(); dataVector.allocateNew(); From b9a3433e60eb4552d67b1d427a0a40e7f2078125 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Tue, 23 Jun 2026 08:14:26 -0400 Subject: [PATCH 6/7] DX-122953 Preserve list offset allocation size --- .../java/org/apache/arrow/vector/complex/LargeListVector.java | 2 ++ .../main/java/org/apache/arrow/vector/complex/ListVector.java | 2 ++ 2 files changed, 4 insertions(+) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 1f44474a73..b44f96c9d0 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -323,8 +323,10 @@ private void ensureEmptyOffsetBufferCapacity(long requiredCapacity) { if (offsetBuffer.capacity() >= requiredCapacity) { return; } + long previousOffsetAllocationSizeInBytes = offsetAllocationSizeInBytes; ArrowBuf oldOffsetBuffer = offsetBuffer; offsetBuffer = allocateOffsetBuffer(requiredCapacity); + offsetAllocationSizeInBytes = previousOffsetAllocationSizeInBytes; oldOffsetBuffer.getReferenceManager().release(); } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 09b8b0b92c..84b1e2b882 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -281,8 +281,10 @@ private void ensureEmptyOffsetBufferCapacity(long requiredCapacity) { if (offsetBuffer.capacity() >= requiredCapacity) { return; } + long previousOffsetAllocationSizeInBytes = offsetAllocationSizeInBytes; ArrowBuf oldOffsetBuffer = offsetBuffer; offsetBuffer = allocateOffsetBuffer(requiredCapacity); + offsetAllocationSizeInBytes = previousOffsetAllocationSizeInBytes; oldOffsetBuffer.getReferenceManager().release(); } From 0ab8ab15c6dd23dc5f7e3960eb494d8692a33b7d Mon Sep 17 00:00:00 2001 From: Chris Pride Date: Wed, 24 Jun 2026 10:00:34 -0700 Subject: [PATCH 7/7] Ensure empty split transfers allocate list offsets splitAndTransfer should always return a valid allocated vector. A vector with no entries still needs, by spec, a value of 0 in its offsetBuffer. A list vector with a zero-capacity offsetBuffer is therefore not valid. This moves the empty-offset repair closer to where the invalid state is introduced by ensuring zero-length ListVector and LargeListVector split transfers materialize the required offset entry. Nested zero-length list transfers get the same treatment through the child transfer pair. One grey area remains: getFieldBuffers() triggering allocation for an otherwise unallocated vector is useful as a last-line guard for serialization/export, but it is not the cleanest owner of the allocation invariant. --- .../arrow/vector/complex/LargeListVector.java | 38 ++++++++++-------- .../arrow/vector/complex/ListVector.java | 4 ++ .../arrow/vector/TestLargeListVector.java | 39 +++++++++++++++++++ .../apache/arrow/vector/TestListVector.java | 39 +++++++++++++++++++ .../arrow/vector/TestSplitAndTransfer.java | 13 ++++--- 5 files changed, 112 insertions(+), 21 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index b44f96c9d0..5455215535 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -686,24 +686,30 @@ public void splitAndTransfer(int startIndex, int length) { startIndex, length, valueCount); - final long startPoint = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH); - final long sliceLength = - offsetBuffer.getLong((long) (startIndex + length) * OFFSET_WIDTH) - startPoint; to.clear(); - to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); - /* splitAndTransfer offset buffer */ - for (int i = 0; i < length + 1; i++) { - final long relativeOffset = - offsetBuffer.getLong((long) (startIndex + i) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeOffset); + if (length > 0) { + final long startPoint = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH); + final long sliceLength = + offsetBuffer.getLong((long) (startIndex + length) * OFFSET_WIDTH) - startPoint; + to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); + /* splitAndTransfer offset buffer */ + for (int i = 0; i < length + 1; i++) { + final long relativeOffset = + offsetBuffer.getLong((long) (startIndex + i) * OFFSET_WIDTH) - startPoint; + to.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeOffset); + } + /* splitAndTransfer validity buffer */ + splitAndTransferValidityBuffer(startIndex, length, to); + /* splitAndTransfer data buffer */ + dataTransferPair.splitAndTransfer( + checkedCastToInt(startPoint), checkedCastToInt(sliceLength)); + to.lastSet = length - 1; + to.setValueCount(length); + } else { + to.ensureEmptyOffsetBufferCapacity(OFFSET_WIDTH); + dataTransferPair.splitAndTransfer(0, 0); + to.setValueCount(0); } - /* splitAndTransfer validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - /* splitAndTransfer data buffer */ - dataTransferPair.splitAndTransfer( - checkedCastToInt(startPoint), checkedCastToInt(sliceLength)); - to.lastSet = length - 1; - to.setValueCount(length); } /* diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 84b1e2b882..86ea184994 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -584,6 +584,10 @@ public void splitAndTransfer(int startIndex, int length) { dataTransferPair.splitAndTransfer(startPoint, sliceLength); to.lastSet = length - 1; to.setValueCount(length); + } else { + to.ensureEmptyOffsetBufferCapacity(OFFSET_WIDTH); + dataTransferPair.splitAndTransfer(0, 0); + to.setValueCount(0); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index 1d089c93e3..b654854362 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -976,6 +976,39 @@ public void testUnallocatedEmptyLargeListOffsetBuffer() { } } + @Test + public void testSplitAndTransferEmptyLargeListAllocatesOffsetBuffer() { + try (LargeListVector fromVector = LargeListVector.empty("fromVector", allocator); + LargeListVector toVector = LargeListVector.empty("toVector", allocator)) { + fromVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + fromVector.allocateNew(); + fromVector.setValueCount(0); + + TransferPair transferPair = fromVector.makeTransferPair(toVector); + transferPair.splitAndTransfer(0, 0); + + assertAllocatedEmptyLargeListOffsetBuffer(toVector); + } + } + + @Test + public void testSplitAndTransferEmptyNestedLargeListAllocatesOffsetBuffers() { + try (LargeListVector fromVector = LargeListVector.empty("fromVector", allocator); + LargeListVector toVector = LargeListVector.empty("toVector", allocator)) { + fromVector.addOrGetVector(FieldType.nullable(MinorType.LARGELIST.getType())); + LargeListVector childVector = (LargeListVector) fromVector.getDataVector(); + childVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + fromVector.allocateNew(); + fromVector.setValueCount(0); + + TransferPair transferPair = fromVector.makeTransferPair(toVector); + transferPair.splitAndTransfer(0, 0); + + assertAllocatedEmptyLargeListOffsetBuffer(toVector); + assertAllocatedEmptyLargeListOffsetBuffer((LargeListVector) toVector.getDataVector()); + } + } + private ArrowBuf assertEmptyLargeListOffsetBuffer(LargeListVector list) { List buffers = list.getFieldBuffers(); ArrowBuf offsetBuffer = buffers.get(1); @@ -985,6 +1018,12 @@ private ArrowBuf assertEmptyLargeListOffsetBuffer(LargeListVector list) { return offsetBuffer; } + private void assertAllocatedEmptyLargeListOffsetBuffer(LargeListVector list) { + ArrowBuf offsetBuffer = list.getOffsetBuffer(); + assertTrue(offsetBuffer.capacity() >= LargeListVector.OFFSET_WIDTH); + assertEquals(0L, offsetBuffer.getLong(0)); + } + @Test public void testIsEmpty() { try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index f128358394..30f85c0ec3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1156,6 +1156,39 @@ public void testUnallocatedEmptyListOffsetBuffer() { } } + @Test + public void testSplitAndTransferEmptyListAllocatesOffsetBuffer() { + try (ListVector fromVector = ListVector.empty("fromVector", allocator); + ListVector toVector = ListVector.empty("toVector", allocator)) { + fromVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + fromVector.allocateNew(); + fromVector.setValueCount(0); + + TransferPair transferPair = fromVector.makeTransferPair(toVector); + transferPair.splitAndTransfer(0, 0); + + assertAllocatedEmptyListOffsetBuffer(toVector); + } + } + + @Test + public void testSplitAndTransferEmptyNestedListAllocatesOffsetBuffers() { + try (ListVector fromVector = ListVector.empty("fromVector", allocator); + ListVector toVector = ListVector.empty("toVector", allocator)) { + fromVector.addOrGetVector(FieldType.nullable(MinorType.LIST.getType())); + ListVector childVector = (ListVector) fromVector.getDataVector(); + childVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + fromVector.allocateNew(); + fromVector.setValueCount(0); + + TransferPair transferPair = fromVector.makeTransferPair(toVector); + transferPair.splitAndTransfer(0, 0); + + assertAllocatedEmptyListOffsetBuffer(toVector); + assertAllocatedEmptyListOffsetBuffer((ListVector) toVector.getDataVector()); + } + } + private ArrowBuf assertEmptyListOffsetBuffer(ListVector list) { List buffers = list.getFieldBuffers(); ArrowBuf offsetBuffer = buffers.get(1); @@ -1165,6 +1198,12 @@ private ArrowBuf assertEmptyListOffsetBuffer(ListVector list) { return offsetBuffer; } + private void assertAllocatedEmptyListOffsetBuffer(ListVector list) { + ArrowBuf offsetBuffer = list.getOffsetBuffer(); + assertTrue(offsetBuffer.capacity() >= BaseRepeatedValueVector.OFFSET_WIDTH); + assertEquals(0, offsetBuffer.getInt(0)); + } + @Test public void testIsEmpty() { try (final ListVector vector = ListVector.empty("list", allocator)) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index adf4eba10c..725f1ef023 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -111,13 +111,16 @@ private void populateDenseUnionVector(final DenseUnionVector vector, int valueCo @Test public void testWithEmptyVector() { // MapVector use TransferImpl from ListVector - ListVector listVector = ListVector.empty("", allocator); - TransferPair transferPair = listVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); + try (ListVector listVector = ListVector.empty("", allocator)) { + TransferPair transferPair = listVector.getTransferPair(allocator); + try (ValueVector toVector = transferPair.getTo()) { + transferPair.splitAndTransfer(0, 0); + assertEquals(0, toVector.getValueCount()); + } + } // BaseFixedWidthVector IntVector intVector = new IntVector("", allocator); - transferPair = intVector.getTransferPair(allocator); + TransferPair transferPair = intVector.getTransferPair(allocator); transferPair.splitAndTransfer(0, 0); assertEquals(0, transferPair.getTo().getValueCount()); // BaseVariableWidthVector