Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions rust/arrow/src/array/equal/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ pub(super) fn boolean_equal(
let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();

let lhs_start = lhs.offset() + lhs_start;
let rhs_start = rhs.offset() + rhs_start;

(0..len).all(|i| {
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
Expand All @@ -84,13 +87,7 @@ pub(super) fn boolean_equal(

lhs_is_null
|| (lhs_is_null == rhs_is_null)
&& equal_bits(
lhs_values,
rhs_values,
lhs_pos + lhs.offset(),
rhs_pos + rhs.offset(),
1,
)
&& equal_bits(lhs_values, rhs_values, lhs_pos, rhs_pos, 1)
})
}
}
2 changes: 1 addition & 1 deletion rust/arrow/src/array/equal/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub(super) fn decimal_equal(
let rhs_pos = rhs_start + i;

let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + lhs.offset());
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while reviewing, got this one also. xD


lhs_is_null
|| (lhs_is_null == rhs_is_null)
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/array/equal/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ pub(super) fn dictionary_equal<T: ArrowNativeType>(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;

let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());

lhs_is_null
|| (lhs_is_null == rhs_is_null)
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/array/equal/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ pub(super) fn fixed_binary_equal(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;

let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());

lhs_is_null
|| (lhs_is_null == rhs_is_null)
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/array/equal/fixed_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ pub(super) fn fixed_list_equal(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;

let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());

lhs_is_null
|| (lhs_is_null == rhs_is_null)
Expand Down
6 changes: 3 additions & 3 deletions rust/arrow/src/array/equal/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,15 @@ pub(super) fn list_equal<T: OffsetSizeTrait>(
)
} else {
// get a ref of the parent null buffer bytes, to use in testing for nullness
let lhs_null_bytes = rhs_nulls.unwrap().as_slice();
let lhs_null_bytes = lhs_nulls.unwrap().as_slice();
let rhs_null_bytes = rhs_nulls.unwrap().as_slice();
// with nulls, we need to compare item by item whenever it is not null
(0..len).all(|i| {
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;

let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is another lhs/rhs mixup a few lines above (148), github won't let me comment on that line:

        // get a ref of the parent null buffer bytes, to use in testing for nullness
        let lhs_null_bytes = rhs_nulls.unwrap().as_slice();
                             ^

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirmed, at line 147

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great catch. Pushed a fix to it.

let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());

lhs_is_null
|| (lhs_is_null == rhs_is_null)
Expand Down
62 changes: 50 additions & 12 deletions rust/arrow/src/array/equal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,10 @@ mod tests {

let b = BooleanArray::from(vec![false, false, false]).data();
test_equal(a.as_ref(), b.as_ref(), false);
}

// Test the case where null_count > 0

#[test]
fn test_boolean_equal_null() {
let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data();
let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]).data();
test_equal(a.as_ref(), b.as_ref(), true);
Expand All @@ -341,23 +342,25 @@ mod tests {

let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]).data();
test_equal(a.as_ref(), b.as_ref(), false);
}

// Test the case where offset != 0

#[test]
fn test_boolean_equal_offset() {
let a =
BooleanArray::from(vec![false, true, false, true, false, false, true]).data();
let b =
BooleanArray::from(vec![false, false, false, true, false, true, true]).data();
BooleanArray::from(vec![true, false, false, false, true, false, true, true])
.data();
assert_eq!(equal(a.as_ref(), b.as_ref()), false);
assert_eq!(equal(b.as_ref(), a.as_ref()), false);

let a_slice = a.slice(2, 3);
let b_slice = b.slice(2, 3);
let b_slice = b.slice(3, 3);
assert_eq!(equal(&a_slice, &b_slice), true);
assert_eq!(equal(&b_slice, &a_slice), true);

let a_slice = a.slice(3, 4);
let b_slice = b.slice(3, 4);
let b_slice = b.slice(4, 4);
assert_eq!(equal(&a_slice, &b_slice), false);
assert_eq!(equal(&b_slice, &a_slice), false);

Expand Down Expand Up @@ -437,6 +440,20 @@ mod tests {
(2, 1),
true,
),
(
vec![None, Some(2), None],
(1, 1),
vec![None, None, Some(2)],
(2, 1),
true,
),
(
vec![Some(1), None, Some(2), None, Some(3)],
(2, 2),
vec![None, Some(2), None, Some(3)],
(1, 2),
true,
),
];

for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases {
Expand Down Expand Up @@ -541,6 +558,26 @@ mod tests {
test_generic_binary_equal::<i64>()
}

#[test]
fn test_string_offset() {
let a = StringArray::from(vec![Some("a"), None, Some("b")]).data();
let a = a.slice(2, 1);
let b = StringArray::from(vec![Some("b")]).data();

test_equal(&a, b.as_ref(), true);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may be missing something, but I tried a few more test cases and there still seems to be something wrong.

Namely I would expect this test to pass:

    #[test]
    fn test_string_offset_larger() {
        let a = StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]).data();
        let b = StringArray::from(vec![None, Some("b"), None, Some("c")]).data();

        test_equal(&a.slice(2,2), &b.slice(0,2), false);
        test_equal(&a.slice(2,2), &b.slice(1,2), true);
        test_equal(&a.slice(2,2), &b.slice(2,2), false);
    }

But instead if fails

---- array::equal::tests::test_string_offset_larger stdout ----
thread 'array::equal::tests::test_string_offset_larger' panicked at 'assertion failed: `(left == right)`
  left: `false`,
 right: `true`: 
ArrayData { data_type: Utf8, len: 2, null_count: 1, offset: 2, buffers: [Buffer { data: Bytes { ptr: 0x7fbc90504480, len: 24, data: [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0] }, offset: 0 }, Buffer { data: Bytes { ptr: 0x7fbc90504500, len: 3, data: [97, 98, 99] }, offset: 0 }], child_data: [], null_bitmap: Some(Bitmap { bits: Buffer { data: Bytes { ptr: 0x7fbc90504580, len: 1, data: [21] }, offset: 0 } }) }
ArrayData { data_type: Utf8, len: 2, null_count: 1, offset: 1, buffers: [Buffer { data: Bytes { ptr: 0x7fbc90604700, len: 20, data: [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0] }, offset: 0 }, Buffer { data: Bytes { ptr: 0x7fbc90604880, len: 2, data: [98, 99] }, offset: 0 }], child_data: [], null_bitmap: Some(Bitmap { bits: Buffer { data: Bytes { ptr: 0x7fbc90604780, len: 1, data: [10] }, offset: 0 } }) }', arrow/src/array/equal/mod.rs:457:9
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

#[test]
fn test_string_offset_larger() {
let a =
StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]).data();
let b = StringArray::from(vec![None, Some("b"), None, Some("c")]).data();

test_equal(&a.slice(2, 2), &b.slice(0, 2), false);
test_equal(&a.slice(2, 2), &b.slice(1, 2), true);
test_equal(&a.slice(2, 2), &b.slice(2, 2), false);
}

#[test]
fn test_null() {
let a = NullArray::new(2).data();
Expand Down Expand Up @@ -781,6 +818,7 @@ mod tests {
None,
]);
let b = create_decimal_array(&[
None,
Some(8_887_000_000),
None,
None,
Expand All @@ -790,23 +828,23 @@ mod tests {
]);

let a_slice = a.slice(0, 3);
let b_slice = b.slice(0, 3);
let b_slice = b.slice(1, 3);
test_equal(&a_slice, &b_slice, true);

let a_slice = a.slice(0, 5);
let b_slice = b.slice(0, 5);
let b_slice = b.slice(1, 5);
test_equal(&a_slice, &b_slice, false);

let a_slice = a.slice(4, 1);
let b_slice = b.slice(4, 1);
let b_slice = b.slice(5, 1);
test_equal(&a_slice, &b_slice, true);

let a_slice = a.slice(3, 3);
let b_slice = b.slice(3, 3);
let b_slice = b.slice(4, 3);
test_equal(&a_slice, &b_slice, false);

let a_slice = a.slice(1, 3);
let b_slice = b.slice(1, 3);
let b_slice = b.slice(2, 3);
test_equal(&a_slice, &b_slice, false);

let b = create_decimal_array(&[
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/array/equal/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ pub(super) fn primitive_equal<T>(
(0..len).all(|i| {
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());

lhs_is_null
|| (lhs_is_null == rhs_is_null)
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/array/equal/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ pub(super) fn struct_equal(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
// if both struct and child had no null buffers,
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());

lhs_is_null
|| (lhs_is_null == rhs_is_null)
Expand Down
10 changes: 5 additions & 5 deletions rust/arrow/src/array/equal/variable_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
let lhs_offsets = lhs.buffer::<T>(0);
let rhs_offsets = rhs.buffer::<T>(0);

// these are bytes, and thus the offset does not need to be multiplied
let lhs_values = &lhs.buffers()[1].as_slice()[lhs.offset()..];
let rhs_values = &rhs.buffers()[1].as_slice()[rhs.offset()..];
// the offsets of the `ArrayData` are ignored as they are only applied to the offset buffer.
let lhs_values = lhs.buffers()[1].as_slice();
let rhs_values = rhs.buffers()[1].as_slice();

let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
Expand All @@ -88,10 +88,10 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(

// the null bits can still be `None`, so we don't unwrap
let lhs_is_null = !lhs_nulls
.map(|v| get_bit(v.as_slice(), lhs_pos))
.map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
.unwrap_or(false);
let rhs_is_null = !rhs_nulls
.map(|v| get_bit(v.as_slice(), rhs_pos))
.map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
.unwrap_or(false);

lhs_is_null
Expand Down