diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml
index c310b18eeed..4b1ebd5b332 100644
--- a/vortex-array/Cargo.toml
+++ b/vortex-array/Cargo.toml
@@ -169,5 +169,9 @@ harness = false
 name = "take_fsl"
 harness = false
 
+[[bench]]
+name = "listview_rebuild"
+harness = false
+
 [package.metadata.cargo-machete]
 ignored = ["getrandom_v03"]
diff --git a/vortex-array/benches/listview_rebuild.rs b/vortex-array/benches/listview_rebuild.rs
new file mode 100644
index 00000000000..dec1f18fa41
--- /dev/null
+++ b/vortex-array/benches/listview_rebuild.rs
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Benchmarks for ListView rebuild across different element types and scenarios.
+//!
+//! The heuristic internally picks between bulk-take (single `take`) and per-list-copy (per-list
+//! `slice` + builder copy) strategies. These scenarios exercise both paths:
+//! - `i32_at_heuristic_threshold` straddles the decision boundary
+//! - `i8_large_lists_per_list_copy` and `i32_small_overlapping_bulk_take` exercise the extremes
+//! - `varbinview_always_bulk_take` and `struct_always_bulk_take` exercise special-case types
+
+#![allow(clippy::unwrap_used)]
+#![allow(clippy::cast_possible_truncation)]
+
+use divan::Bencher;
+use vortex_array::IntoArray;
+use vortex_array::arrays::ListViewArray;
+use vortex_array::arrays::ListViewRebuildMode;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::arrays::StructArray;
+use vortex_array::arrays::VarBinViewArray;
+use vortex_array::validity::Validity;
+use vortex_buffer::Buffer;
+use vortex_dtype::FieldNames;
+
+fn main() {
+    divan::main();
+}
+
+fn make_primitive_lv(num_lists: usize, list_size: usize, step: usize) -> ListViewArray {
+    let element_count = step * num_lists + list_size;
+    let elements = PrimitiveArray::from_iter(0..element_count as i32).into_array();
+    let offsets: Buffer<u32> = (0..num_lists).map(|i| (i * step) as u32).collect();
+    let sizes: Buffer<u32> = std::iter::repeat_n(list_size as u32, num_lists).collect();
+    ListViewArray::new(
+        elements,
+        offsets.into_array(),
+        sizes.into_array(),
+        Validity::NonNullable,
+    )
+}
+
+fn make_i8_lv(num_lists: usize, list_size: usize, step: usize) -> ListViewArray {
+    let element_count = step * num_lists + list_size;
+    let elements = PrimitiveArray::from_iter((0..element_count).map(|i| i as i8)).into_array();
+    let offsets: Buffer<u32> = (0..num_lists).map(|i| (i * step) as u32).collect();
+    let sizes: Buffer<u32> = std::iter::repeat_n(list_size as u32, num_lists).collect();
+    ListViewArray::new(
+        elements,
+        offsets.into_array(),
+        sizes.into_array(),
+        Validity::NonNullable,
+    )
+}
+
+fn make_varbinview_lv(num_lists: usize, list_size: usize, step: usize) -> ListViewArray {
+    let element_count = step * num_lists + list_size;
+    let strings: Vec<String> = (0..element_count)
+        .map(|i| {
+            if i % 3 == 0 {
+                format!("long-string-value-{i:06}")
+            } else {
+                format!("s{i}")
+            }
+        })
+        .collect();
+    let elements = VarBinViewArray::from_iter_str(strings.iter().map(|s| s.as_str())).into_array();
+    let offsets: Buffer<u32> = (0..num_lists).map(|i| (i * step) as u32).collect();
+    let sizes: Buffer<u32> = std::iter::repeat_n(list_size as u32, num_lists).collect();
+    ListViewArray::new(
+        elements,
+        offsets.into_array(),
+        sizes.into_array(),
+        Validity::NonNullable,
+    )
+}
+
+fn make_struct_lv(num_lists: usize, list_size: usize, step: usize) -> ListViewArray {
+    let element_count = step * num_lists + list_size;
+    let field_a = PrimitiveArray::from_iter(0..element_count as i32).into_array();
+    let field_b = PrimitiveArray::from_iter((0..element_count).map(|i| i as f64)).into_array();
+    let elements = StructArray::try_new(
+        FieldNames::from(["a", "b"]),
+        vec![field_a, field_b],
+        element_count,
+        Validity::NonNullable,
+    )
+    .unwrap()
+    .into_array();
+
+    let offsets: Buffer<u32> = (0..num_lists).map(|i| (i * step) as u32).collect();
+    let sizes: Buffer<u32> = std::iter::repeat_n(list_size as u32, num_lists).collect();
+    ListViewArray::new(
+        elements,
+        offsets.into_array(),
+        sizes.into_array(),
+        Validity::NonNullable,
+    )
+}
+
+// ── i32 around threshold (8+4)*64 = 768: exercises both strategies ──────────
+const HEURISTIC_THRESHOLD_SIZES: &[usize] = &[512, 768, 1024];
+
+#[divan::bench(args = HEURISTIC_THRESHOLD_SIZES)]
+fn i32_at_heuristic_threshold(bencher: Bencher, list_size: usize) {
+    let lv = make_primitive_lv(1_000, list_size, list_size);
+    bencher
+        .with_inputs(|| &lv)
+        .bench_refs(|lv| lv.rebuild(ListViewRebuildMode::MakeZeroCopyToList).unwrap());
+}
+
+// ── i8 with 65K-element lists: deep into per-list-copy territory ─────────────
+#[divan::bench]
+fn i8_large_lists_per_list_copy(bencher: Bencher) {
+    let lv = make_i8_lv(1_000, 65_536, 65_536);
+    bencher
+        .with_inputs(|| &lv)
+        .bench_refs(|lv| lv.rebuild(ListViewRebuildMode::MakeZeroCopyToList).unwrap());
+}
+
+// ── i32 with 8-element overlapping lists: deep into bulk-take territory ──────
+#[divan::bench]
+fn i32_small_overlapping_bulk_take(bencher: Bencher) {
+    let lv = make_primitive_lv(1_000, 8, 1);
+    bencher
+        .with_inputs(|| &lv)
+        .bench_refs(|lv| lv.rebuild(ListViewRebuildMode::MakeZeroCopyToList).unwrap());
+}
+
+// ── VarBinView: variable-width always uses bulk-take ─────────────────────────
+#[divan::bench]
+fn varbinview_always_bulk_take(bencher: Bencher) {
+    let lv = make_varbinview_lv(1_000, 1_024, 1_024);
+    bencher
+        .with_inputs(|| &lv)
+        .bench_refs(|lv| lv.rebuild(ListViewRebuildMode::MakeZeroCopyToList).unwrap());
+}
+
+// ── Struct{i32, f64}: struct always uses bulk-take ───────────────────────────
+#[divan::bench]
+fn struct_always_bulk_take(bencher: Bencher) {
+    let lv = make_struct_lv(1_000, 1_024, 1_024);
+    bencher
+        .with_inputs(|| &lv)
+        .bench_refs(|lv| lv.rebuild(ListViewRebuildMode::MakeZeroCopyToList).unwrap());
+}
diff --git a/vortex-array/src/arrays/listview/rebuild.rs b/vortex-array/src/arrays/listview/rebuild.rs
index d2714b671e6..0b29e9ad509 100644
--- a/vortex-array/src/arrays/listview/rebuild.rs
+++ b/vortex-array/src/arrays/listview/rebuild.rs
@@ -3,6 +3,7 @@
 
 use num_traits::FromPrimitive;
 use vortex_buffer::BufferMut;
+use vortex_dtype::DType;
 use vortex_dtype::IntegerPType;
 use vortex_dtype::Nullability;
 use vortex_dtype::match_each_integer_ptype;
@@ -103,12 +104,147 @@ impl ListViewArray {
         })
     }
 
-    // TODO(connor)[ListView]: We should benchmark if it is faster to use `take` on the elements
-    // instead of using a builder.
-    /// The inner function for `rebuild_zero_copy_to_list`, which rebuilds a `ListViewArray` piece
-    /// by piece.
+    /// Picks between [`naive_rebuild_bulk_take`](Self::naive_rebuild_bulk_take) and
+    /// [`naive_rebuild_per_list_copy`](Self::naive_rebuild_per_list_copy) via heuristic.
     fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
         &self,
+    ) -> VortexResult<ListViewArray> {
+        let element_dtype = self
+            .dtype()
+            .as_list_element_opt()
+            .vortex_expect("somehow had a canonical list that was not a list");
+        let sizes_canonical = self.sizes().to_primitive();
+        let total: u64 = sizes_canonical
+            .as_slice::<S>()
+            .iter()
+            .map(|s| (*s).as_() as u64)
+            .sum();
+        let use_per_list_copy = Self::should_use_per_list_copy(element_dtype, total, self.len());
+
+        if use_per_list_copy {
+            self.naive_rebuild_per_list_copy::<O, NewOffset, S>()
+        } else {
+            self.naive_rebuild_bulk_take::<O, NewOffset, S>()
+        }
+    }
+
+    /// Decides whether the per-list-copy strategy should be used over bulk-take.
+    ///
+    /// Empirical heuristic: use per-list-copy when `avg_list_size >= (8 + E) * 64 / fsl_divisor`.
+    /// - `8` — extra per-element cost of bulk-take (writing a u64 index that per-list-copy avoids)
+    /// - `E` — element byte width; larger elements make the builder's per-call overhead
+    ///   (slice dispatch, `to_primitive()`, validity mask) relatively more expensive,
+    ///   raising the threshold before per-list-copy's sequential memcpy wins
+    /// - `64` — base number of elements per list needed to amortize per-list-copy's ~200ns
+    ///   per-list overhead (upfront canonicalize + per-list slice dispatch)
+    /// - `fsl_divisor` — for `FixedSizeList<_, N>`, `clamp(N, 1, 2)`: bulk-take expands each
+    ///   outer index into N inner sub-indices, so the threshold is lowered
+    fn should_use_per_list_copy(
+        element_dtype: &DType,
+        total_output_elements: u64,
+        num_lists: usize,
+    ) -> bool {
+        if num_lists == 0 {
+            return false;
+        }
+        let avg = total_output_elements / num_lists as u64;
+        match element_dtype {
+            // Struct: per-list-copy creates separate per-field builders and reconstructs
+            // the structure element-by-element, making it 10-40x slower than bulk-take at
+            // all tested sizes (2-field and 4-field). This overhead is fundamental to the
+            // builder architecture and does not amortize with larger lists.
+            DType::Struct(..) => false,
+            // FixedSizeList<T, N>: bulk-take expands each outer index into N inner
+            // sub-indices, making it ~2x more expensive per element than flat types.
+            // We account for this by dividing the base threshold by clamp(N, 1, 2):
+            //   - N >= 2: threshold halved (e.g. FSL<i32,4> → (8+4)*32 = 384)
+            //   - N == 1: threshold unchanged (same as flat types)
+            // Uses the *inner* element size, not the total FSL size, because per-list-copy
+            // extends the inner flat buffer directly.
+            DType::FixedSizeList(inner, n, ..) => inner
+                .element_size()
+                .is_some_and(|e| avg >= (8 + e as u64) * 64 / (*n as u64).clamp(1, 2)),
+            _ => {
+                if let Some(e) = element_dtype.element_size() {
+                    // Flat fixed-width types (primitives, bools): the base formula.
+                    // Both strategies copy E bytes per element, but bulk-take additionally
+                    // writes an 8-byte u64 index and per-list-copy has per-list overhead
+                    // (~200ns for slice dispatch + builder calls). The (8 + E) factor
+                    // also captures the builder's higher per-element overhead vs SIMD
+                    // bulk-take. Validated across i8/i16/i32/f64/bool.
+                    avg >= (8 + e as u64) * 64
+                } else {
+                    // List<fixed-width>: per-list-copy does cheap offset slicing + bulk
+                    // inner element copy, crossing over at ~1024. We use a fixed threshold
+                    // because inner list sizes aren't cheaply observable here.
+                    // Utf8/Binary: always bulk-take — their builder does expensive Arc
+                    // ref-count bumps and view metadata copying (crossover >50K).
+                    // List<variable-width>: always bulk-take (same reason as Utf8/Binary).
+                    element_dtype
+                        .as_list_element_opt()
+                        .and_then(|d| d.element_size())
+                        .is_some_and(|_| avg >= 1024)
+                }
+            }
+        }
+    }
+
+    /// Rebuilds elements using the **bulk-take** strategy: collect all element indices into a flat
+    /// `BufferMut<u64>`, perform a single bulk `take`, then canonicalize.
+    fn naive_rebuild_bulk_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
+        &self,
+    ) -> VortexResult<ListViewArray> {
+        let offsets_canonical = self.offsets().to_primitive();
+        let offsets_slice = offsets_canonical.as_slice::<O>();
+        let sizes_canonical = self.sizes().to_primitive();
+        let sizes_slice = sizes_canonical.as_slice::<S>();
+
+        let len = offsets_slice.len();
+
+        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
+        let mut new_sizes = BufferMut::<S>::with_capacity(len);
+        let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
+
+        let mut n_elements = NewOffset::zero();
+        for index in 0..len {
+            if !self.is_valid(index)? {
+                new_offsets.push(n_elements);
+                new_sizes.push(S::zero());
+                continue;
+            }
+
+            let offset = offsets_slice[index];
+            let size = sizes_slice[index];
+            let start = offset.as_();
+            let stop = start + size.as_();
+
+            new_offsets.push(n_elements);
+            new_sizes.push(size);
+            take_indices.extend(start as u64..stop as u64);
+            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
+        }
+
+        let elements = self
+            .elements()
+            .take(take_indices.into_array())?
+            .to_canonical()?
+            .into_array();
+        let offsets = new_offsets.into_array();
+        let sizes = new_sizes.into_array();
+
+        // SAFETY: same invariants as `naive_rebuild_per_list_copy` — offsets are sequential and
+        // non-overlapping, all (offset, size) pairs reference valid elements, and the validity
+        // array is preserved from the original.
+        Ok(unsafe {
+            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
+                .with_zero_copy_to_list(true)
+        })
+    }
+
+    /// Rebuilds elements using the **per-list-copy** strategy: canonicalize elements upfront, then
+    /// per-list `slice` + `extend_from_array` into a typed builder.
+    fn naive_rebuild_per_list_copy<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
+        &self,
     ) -> VortexResult<ListViewArray> {
         let element_dtype = self
             .dtype()
@@ -262,9 +398,13 @@ impl ListViewArray {
 }
 
 #[cfg(test)]
+#[allow(clippy::cast_possible_truncation)]
 mod tests {
+    use rstest::rstest;
     use vortex_buffer::BitBuffer;
+    use vortex_dtype::DType;
     use vortex_dtype::Nullability;
+    use vortex_dtype::PType;
     use vortex_error::VortexResult;
 
     use super::ListViewRebuildMode;
@@ -448,4 +588,81 @@ mod tests {
         );
         Ok(())
     }
+
+    // ── should_use_per_list_copy heuristic tests ───────────────────────────
+
+    #[test]
+    fn heuristic_rejects_zero_lists() {
+        let prim = DType::Primitive(PType::I32, Nullability::NonNullable);
+        assert!(!ListViewArray::should_use_per_list_copy(&prim, 0, 0));
+    }
+
+    #[test]
+    fn heuristic_rejects_struct_always() {
+        let struct_dtype = DType::struct_(
+            [
+                ("a", DType::Primitive(PType::I32, Nullability::NonNullable)),
+                ("b", DType::Primitive(PType::F64, Nullability::NonNullable)),
+            ],
+            Nullability::NonNullable,
+        );
+        assert!(!ListViewArray::should_use_per_list_copy(
+            &struct_dtype,
+            100_000,
+            100
+        ));
+    }
+
+    #[test]
+    fn heuristic_accepts_fsl() {
+        use std::sync::Arc;
+        // FixedSizeList<i32, 4>: threshold = (8+4)*64/2 = 384.
+        let fsl = DType::FixedSizeList(
+            Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)),
+            4,
+            Nullability::NonNullable,
+        );
+        assert!(!ListViewArray::should_use_per_list_copy(
+            &fsl, 256_000, 1_000
+        ));
+        assert!(ListViewArray::should_use_per_list_copy(
+            &fsl, 384_000, 1_000
+        ));
+    }
+
+    #[test]
+    fn heuristic_accepts_list_of_fixed_width() {
+        let list_i32 = DType::list(
+            DType::Primitive(PType::I32, Nullability::NonNullable),
+            Nullability::NonNullable,
+        );
+        // List<i32>: threshold = 1024.
+        assert!(!ListViewArray::should_use_per_list_copy(
+            &list_i32, 512_000, 1_000
+        ));
+        assert!(ListViewArray::should_use_per_list_copy(
+            &list_i32, 1_024_000, 1_000
+        ));
+    }
+
+    #[rstest]
+    #[case(PType::I32, 512, false)] // i32: threshold=(8+4)*64=768, 512<768
+    #[case(PType::I32, 768, true)] // i32: 768>=768
+    #[case(PType::I8, 512, false)] // i8: threshold=(8+1)*64=576, 512<576
+    #[case(PType::I8, 576, true)] // i8: 576>=576
+    #[case(PType::F64, 512, false)] // f64: threshold=(8+8)*64=1024, 512<1024
+    #[case(PType::F64, 1024, true)] // f64: 1024>=1024
+    fn heuristic_threshold(
+        #[case] ptype: PType,
+        #[case] avg: u64,
+        #[case] expect_per_list_copy: bool,
+    ) {
+        let dtype = DType::Primitive(ptype, Nullability::NonNullable);
+        let num_lists = 1000_usize;
+        let total = avg * num_lists as u64;
+        assert_eq!(
+            ListViewArray::should_use_per_list_copy(&dtype, total, num_lists),
+            expect_per_list_copy,
+        );
+    }
 }