diff --git a/rust/arrow/src/array/equal/boolean.rs b/rust/arrow/src/array/equal/boolean.rs index e777d4dbe8e..35c9786e49f 100644 --- a/rust/arrow/src/array/equal/boolean.rs +++ b/rust/arrow/src/array/equal/boolean.rs @@ -76,6 +76,9 @@ pub(super) fn boolean_equal( let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice(); let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice(); + let lhs_start = lhs.offset() + lhs_start; + let rhs_start = rhs.offset() + rhs_start; + (0..len).all(|i| { let lhs_pos = lhs_start + i; let rhs_pos = rhs_start + i; @@ -84,13 +87,7 @@ pub(super) fn boolean_equal( lhs_is_null || (lhs_is_null == rhs_is_null) - && equal_bits( - lhs_values, - rhs_values, - lhs_pos + lhs.offset(), - rhs_pos + rhs.offset(), - 1, - ) + && equal_bits(lhs_values, rhs_values, lhs_pos, rhs_pos, 1) }) } } diff --git a/rust/arrow/src/array/equal/decimal.rs b/rust/arrow/src/array/equal/decimal.rs index 0924835b954..1ee6ec9b543 100644 --- a/rust/arrow/src/array/equal/decimal.rs +++ b/rust/arrow/src/array/equal/decimal.rs @@ -60,7 +60,7 @@ pub(super) fn decimal_equal( let rhs_pos = rhs_start + i; let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset()); - let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + lhs.offset()); + let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset()); lhs_is_null || (lhs_is_null == rhs_is_null) diff --git a/rust/arrow/src/array/equal/dictionary.rs b/rust/arrow/src/array/equal/dictionary.rs index 4f4c148e8f0..81eedce9697 100644 --- a/rust/arrow/src/array/equal/dictionary.rs +++ b/rust/arrow/src/array/equal/dictionary.rs @@ -63,8 +63,8 @@ pub(super) fn dictionary_equal( let lhs_pos = lhs_start + i; let rhs_pos = rhs_start + i; - let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos); - let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos); + let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset()); + let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset()); lhs_is_null || (lhs_is_null == rhs_is_null) diff --git a/rust/arrow/src/array/equal/fixed_binary.rs b/rust/arrow/src/array/equal/fixed_binary.rs index 57158311703..5f8f93232d5 100644 --- a/rust/arrow/src/array/equal/fixed_binary.rs +++ b/rust/arrow/src/array/equal/fixed_binary.rs @@ -59,8 +59,8 @@ pub(super) fn fixed_binary_equal( let lhs_pos = lhs_start + i; let rhs_pos = rhs_start + i; - let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos); - let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos); + let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset()); + let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset()); lhs_is_null || (lhs_is_null == rhs_is_null) diff --git a/rust/arrow/src/array/equal/fixed_list.rs b/rust/arrow/src/array/equal/fixed_list.rs index f5065bb2918..a107285e5da 100644 --- a/rust/arrow/src/array/equal/fixed_list.rs +++ b/rust/arrow/src/array/equal/fixed_list.rs @@ -61,8 +61,8 @@ pub(super) fn fixed_list_equal( let lhs_pos = lhs_start + i; let rhs_pos = rhs_start + i; - let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos); - let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos); + let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset()); + let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset()); lhs_is_null || (lhs_is_null == rhs_is_null) diff --git a/rust/arrow/src/array/equal/list.rs b/rust/arrow/src/array/equal/list.rs index a7a6bd334c1..7acd47796c3 100644 --- a/rust/arrow/src/array/equal/list.rs +++ b/rust/arrow/src/array/equal/list.rs @@ -144,15 +144,15 @@ pub(super) fn list_equal( ) } else { // get a ref of the parent null buffer bytes, to use in testing for nullness - let lhs_null_bytes = rhs_nulls.unwrap().as_slice(); + let lhs_null_bytes = lhs_nulls.unwrap().as_slice(); let rhs_null_bytes = rhs_nulls.unwrap().as_slice(); // with nulls, we need to compare item by item whenever it is not null (0..len).all(|i| { let lhs_pos = lhs_start + i; let rhs_pos = rhs_start + i; - let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos); - let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos); + let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset()); + let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset()); lhs_is_null || (lhs_is_null == rhs_is_null) diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs index e2ee9bc70de..67dde553445 100644 --- a/rust/arrow/src/array/equal/mod.rs +++ b/rust/arrow/src/array/equal/mod.rs @@ -329,9 +329,10 @@ mod tests { let b = BooleanArray::from(vec![false, false, false]).data(); test_equal(a.as_ref(), b.as_ref(), false); + } - // Test the case where null_count > 0 - + #[test] + fn test_boolean_equal_null() { let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data(); let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data(); test_equal(a.as_ref(), b.as_ref(), true); @@ -341,23 +342,25 @@ mod tests { let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]).data(); test_equal(a.as_ref(), b.as_ref(), false); + } - // Test the case where offset != 0 - + #[test] + fn test_boolean_equal_offset() { let a = BooleanArray::from(vec![false, true, false, true, false, false, true]).data(); let b = - BooleanArray::from(vec![false, false, false, true, false, true, true]).data(); + BooleanArray::from(vec![true, false, false, false, true, false, true, true]) + .data(); assert_eq!(equal(a.as_ref(), b.as_ref()), false); assert_eq!(equal(b.as_ref(), a.as_ref()), false); let a_slice = a.slice(2, 3); - let b_slice = b.slice(2, 3); + let b_slice = b.slice(3, 3); assert_eq!(equal(&a_slice, &b_slice), true); assert_eq!(equal(&b_slice, &a_slice), true); let a_slice = a.slice(3, 4); - let b_slice = b.slice(3, 4); + let b_slice = b.slice(4, 4); assert_eq!(equal(&a_slice, &b_slice), false); assert_eq!(equal(&b_slice, &a_slice), false); @@ -437,6 +440,20 @@ mod tests { (2, 1), true, ), + ( + vec![None, Some(2), None], + (1, 1), + vec![None, None, Some(2)], + (2, 1), + true, + ), + ( + vec![Some(1), None, Some(2), None, Some(3)], + (2, 2), + vec![None, Some(2), None, Some(3)], + (1, 2), + true, + ), ]; for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases { @@ -541,6 +558,26 @@ mod tests { test_generic_binary_equal::() } + #[test] + fn test_string_offset() { + let a = StringArray::from(vec![Some("a"), None, Some("b")]).data(); + let a = a.slice(2, 1); + let b = StringArray::from(vec![Some("b")]).data(); + + test_equal(&a, b.as_ref(), true); + } + + #[test] + fn test_string_offset_larger() { + let a = + StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]).data(); + let b = StringArray::from(vec![None, Some("b"), None, Some("c")]).data(); + + test_equal(&a.slice(2, 2), &b.slice(0, 2), false); + test_equal(&a.slice(2, 2), &b.slice(1, 2), true); + test_equal(&a.slice(2, 2), &b.slice(2, 2), false); + } + #[test] fn test_null() { let a = NullArray::new(2).data(); @@ -781,6 +818,7 @@ mod tests { None, ]); let b = create_decimal_array(&[ + None, Some(8_887_000_000), None, None, @@ -790,23 +828,23 @@ mod tests { ]); let a_slice = a.slice(0, 3); - let b_slice = b.slice(0, 3); + let b_slice = b.slice(1, 3); test_equal(&a_slice, &b_slice, true); let a_slice = a.slice(0, 5); - let b_slice = b.slice(0, 5); + let b_slice = b.slice(1, 5); test_equal(&a_slice, &b_slice, false); let a_slice = a.slice(4, 1); - let b_slice = b.slice(4, 1); + let b_slice = b.slice(5, 1); test_equal(&a_slice, &b_slice, true); let a_slice = a.slice(3, 3); - let b_slice = b.slice(3, 3); + let b_slice = b.slice(4, 3); test_equal(&a_slice, &b_slice, false); let a_slice = a.slice(1, 3); - let b_slice = b.slice(1, 3); + let b_slice = b.slice(2, 3); test_equal(&a_slice, &b_slice, false); let b = create_decimal_array(&[ diff --git a/rust/arrow/src/array/equal/primitive.rs b/rust/arrow/src/array/equal/primitive.rs index ff061d16607..db7587915c8 100644 --- a/rust/arrow/src/array/equal/primitive.rs +++ b/rust/arrow/src/array/equal/primitive.rs @@ -56,8 +56,8 @@ pub(super) fn primitive_equal( (0..len).all(|i| { let lhs_pos = lhs_start + i; let rhs_pos = rhs_start + i; - let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos); - let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos); + let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset()); + let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset()); lhs_is_null || (lhs_is_null == rhs_is_null) diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs index 6ec71837b86..b3cc4029e9e 100644 --- a/rust/arrow/src/array/equal/structure.rs +++ b/rust/arrow/src/array/equal/structure.rs @@ -79,8 +79,8 @@ pub(super) fn struct_equal( let lhs_pos = lhs_start + i; let rhs_pos = rhs_start + i; // if both struct and child had no null buffers, - let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos); - let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos); + let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset()); + let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset()); lhs_is_null || (lhs_is_null == rhs_is_null) diff --git a/rust/arrow/src/array/equal/variable_size.rs b/rust/arrow/src/array/equal/variable_size.rs index 94fdf6b2980..ecb3bc2a3c2 100644 --- a/rust/arrow/src/array/equal/variable_size.rs +++ b/rust/arrow/src/array/equal/variable_size.rs @@ -60,9 +60,9 @@ pub(super) fn variable_sized_equal( let lhs_offsets = lhs.buffer::(0); let rhs_offsets = rhs.buffer::(0); - // these are bytes, and thus the offset does not need to be multiplied - let lhs_values = &lhs.buffers()[1].as_slice()[lhs.offset()..]; - let rhs_values = &rhs.buffers()[1].as_slice()[rhs.offset()..]; + // the offsets of the `ArrayData` are ignored as they are only applied to the offset buffer. + let lhs_values = lhs.buffers()[1].as_slice(); + let rhs_values = rhs.buffers()[1].as_slice(); let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len); let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len); @@ -88,10 +88,10 @@ pub(super) fn variable_sized_equal( // the null bits can still be `None`, so we don't unwrap let lhs_is_null = !lhs_nulls - .map(|v| get_bit(v.as_slice(), lhs_pos)) + .map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos)) .unwrap_or(false); let rhs_is_null = !rhs_nulls - .map(|v| get_bit(v.as_slice(), rhs_pos)) + .map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos)) .unwrap_or(false); lhs_is_null