From 61a6763dabcc4988dd2f4ed645e6d34ba19cd0c2 Mon Sep 17 00:00:00 2001 From: Robsdedude Date: Wed, 5 Nov 2025 12:50:12 +0100 Subject: [PATCH] Fix vector type native adapters `Vector.from_native` and `Vector.to_native` both used to have alternative implementations using NumPy or the Rust extensions when available to speed up the process. It turns out, that both didn't follow the reference Python implementation (internally relying on `struct.pack`/`struct.unpack`) for 32 bit float values. * Python 3.14+ preserves the signaling/quieting bit of `NaN` values whereas NumPy doesn't and Rust doesn't make strong guarantees on how f64s are cast to f32s: https://github.com/rust-lang/rfcs/blob/master/text/3514-float-semantics.md * Both NumPy and Rust turn large f64s into `inf` when cast to f32, while Python raises an `OverflowError`. To harmonize the behavior, only the Python implementation remains. It has been significantly sped up by not iterating over the elements and calling `struct.(un)pack` multiple times, but instead making use of the fact that the format string allows to specify a number of values to (un)pack. With this the performance of the Python implementation was never slower than 5 times of what Rust/NumPy offered and for low dimension vectors even faster. It's deemed not worth the effort of maintaining multiple implementations assuring their behavioral parity for such a small performance gain. The same multi-value (un)pack approach has been applied to the pure Python implementation of the byte swapping to improve it's performance by a factor of roughly 30 (still a factor of ~10 slower than using NumPy/Rust). --- src/neo4j/_typing.py | 2 + src/neo4j/vector.py | 510 +++++++----------------- tests/unit/common/vector/test_vector.py | 428 +++++++++++--------- 3 files changed, 389 insertions(+), 551 deletions(-) diff --git a/src/neo4j/_typing.py b/src/neo4j/_typing.py index 1961715c..2dfd9415 100644 --- a/src/neo4j/_typing.py +++ b/src/neo4j/_typing.py @@ -30,6 +30,7 @@ Mapping, Sequence, Set, + Sized, ValuesView, ) from importlib.util import find_spec as _find_spec @@ -79,6 +80,7 @@ "Protocol", "Sequence", "Set", + "Sized", "SupportsIndex", "TextIO", "TypeAlias", diff --git a/src/neo4j/vector.py b/src/neo4j/vector.py index 41a3c2e9..01dfc898 100644 --- a/src/neo4j/vector.py +++ b/src/neo4j/vector.py @@ -43,11 +43,9 @@ try: - from ._rust import vector as _vec_rust from ._rust.vector import swap_endian as _swap_endian_unchecked_rust except ImportError: _swap_endian_unchecked_rust = None - _vec_rust = None __all__ = [ @@ -602,11 +600,19 @@ def _swap_endian_unchecked_np(type_size: int, data: bytes, /) -> bytes: def _swap_endian_unchecked_py(type_size: int, data: bytes, /) -> bytes: - return bytes( - byte - for i in range(0, len(data), type_size) - for byte in data[i : i + type_size][::-1] - ) + match type_size: + case 2: + fmt = "h" + case 4: + fmt = "i" + case 8: + fmt = "q" + case _: + raise ValueError(f"Unsupported type size: {type_size}") + count = len(data) // type_size + fmt_be = f">{count}{fmt}" + fmt_le = f"<{count}{fmt}" + return _struct.pack(fmt_be, *_struct.unpack(fmt_le, data)) if _swap_endian_unchecked_rust is not None: @@ -777,58 +783,21 @@ class _VecF64(_InnerVectorFloat): cypher_inner_type_repr = "FLOAT NOT NULL" @classmethod - def _from_native_rust(cls, data: _t.Iterable[object], /) -> _t.Self: - return cls(_vec_rust.vec_f64_from_native(data)) + def from_native(cls, data: _t.Iterable[object], /) -> _t.Self: + if not isinstance(data, _t.Sized): + data = tuple(data) + if not all(isinstance(item, float) for item in data): + for item in data: + if not isinstance(item, float): + raise TypeError( + f"Cannot build f64 vector from {type(item).__name__}, " + "expected float." + ) + return cls(_struct.pack(f">{len(data)}d", *data)) - @classmethod - def _from_native_np(cls, data: _t.Iterable[object], /) -> _t.Self: - data = tuple(data) - non_float_gen = (item for item in data if not isinstance(item, float)) - non_float = next(non_float_gen, _DEFAULT) - if non_float is not _DEFAULT: - raise TypeError( - f"Cannot build f64 vector from {type(non_float).__name__}, " - "expected float." - ) - return cls(_np.fromiter(data, dtype=_np.dtype(">f8")).tobytes()) - - @classmethod - def _from_native_py(cls, data: _t.Iterable[object], /) -> _t.Self: - bytes_ = bytearray() - for item in data: - if not isinstance(item, float): - raise TypeError( - f"Cannot build f64 vector from {type(item).__name__}, " - "expected float." - ) - bytes_.extend(_struct.pack(">d", item)) - return cls(bytes(bytes_)) - - if _vec_rust is not None: - from_native = _from_native_rust - elif _np is not None: - from_native = _from_native_np - else: - from_native = _from_native_py - - def _to_native_rust(self) -> list[object]: - return _vec_rust.vec_f64_to_native(self.data) - - def _to_native_np(self) -> list[object]: - return _np.frombuffer(self.data, dtype=_np.dtype(">f8")).tolist() - - def _to_native_py(self) -> list[object]: - return [ - _struct.unpack(">d", self.data[i : i + self.size])[0] - for i in range(0, len(self.data), self.size) - ] - - if _vec_rust is not None: - to_native = _to_native_rust - elif _np is not None: - to_native = _to_native_np - else: - to_native = _to_native_py + def to_native(self) -> list[object]: + struct_format = f">{len(self.data) // self.size}d" + return list(_struct.unpack(struct_format, self.data)) def to_numpy(self) -> _np.ndarray: import numpy @@ -852,58 +821,33 @@ class _VecF32(_InnerVectorFloat): cypher_inner_type_repr = "FLOAT32 NOT NULL" @classmethod - def _from_native_rust(cls, data: _t.Iterable[object], /) -> _t.Self: - return cls(_vec_rust.vec_f32_from_native(data)) + def from_native(cls, data: _t.Iterable[object], /) -> _t.Self: + if not isinstance(data, _t.Sized): + data = tuple(data) + if not all(isinstance(item, float) for item in data): + for item in data: + if not isinstance(item, float): + raise TypeError( + f"Cannot build f32 vector from {type(item).__name__}, " + "expected float." + ) + try: + bytes_ = _struct.pack(f">{len(data)}f", *data) + except OverflowError: + for item in data: + try: + _struct.pack(">f", item) + except OverflowError: + raise OverflowError( + f"Value {item} is out of range for f32: " + f"[-3.4028234e+38, 3.4028234e+38]" + ) from None + raise + return cls(bytes_) - @classmethod - def _from_native_np(cls, data: _t.Iterable[object], /) -> _t.Self: - data = tuple(data) - non_float_gen = (item for item in data if not isinstance(item, float)) - non_float = next(non_float_gen, _DEFAULT) - if non_float is not _DEFAULT: - raise TypeError( - f"Cannot build f32 vector from {type(non_float).__name__}, " - "expected float." - ) - return cls(_np.fromiter(data, dtype=_np.dtype(">f4")).tobytes()) - - @classmethod - def _from_native_py(cls, data: _t.Iterable[object], /) -> _t.Self: - bytes_ = bytearray() - for item in data: - if not isinstance(item, float): - raise TypeError( - f"Cannot build f32 vector from {type(item).__name__}, " - "expected float." - ) - bytes_.extend(_struct.pack(">f", item)) - return cls(bytes(bytes_)) - - if _vec_rust is not None: - from_native = _from_native_rust - elif _np is not None: - from_native = _from_native_np - else: - from_native = _from_native_py - - def _to_native_rust(self) -> list[object]: - return _vec_rust.vec_f32_to_native(self.data) - - def _to_native_np(self) -> list[object]: - return _np.frombuffer(self.data, dtype=_np.dtype(">f4")).tolist() - - def _to_native_py(self) -> list[object]: - return [ - _struct.unpack(">f", self.data[i : i + self.size])[0] - for i in range(0, len(self.data), self.size) - ] - - if _vec_rust is not None: - to_native = _to_native_rust - elif _np is not None: - to_native = _to_native_np - else: - to_native = _to_native_py + def to_native(self) -> list[object]: + struct_format = f">{len(self.data) // self.size}f" + return list(_struct.unpack(struct_format, self.data)) def to_numpy(self) -> _np.ndarray: import numpy @@ -938,72 +882,29 @@ class _VecI64(_InnerVectorInt): cypher_inner_type_repr = "INTEGER NOT NULL" @classmethod - def _from_native_rust(cls, data: _t.Iterable[object], /) -> _t.Self: - return cls(_vec_rust.vec_i64_from_native(data)) - - @classmethod - def _from_native_np(cls, data: _t.Iterable[object], /) -> _t.Self: - data = tuple(data) - non_int_gen = (item for item in data if not isinstance(item, int)) - non_int = next(non_int_gen, _DEFAULT) - if non_int is not _DEFAULT: - raise TypeError( - f"Cannot build i64 vector from {type(non_int).__name__}, " - "expected int." - ) - data = _t.cast(tuple[int, ...], data) - overflow_int = tuple( - item for item in data if not _I64_MIN <= item <= _I64_MAX - ) - if overflow_int: - raise OverflowError( - f"Value {overflow_int[0]} is out of range for i64: " - f"[-{_I64_MIN}, {_I64_MAX}]" - ) - return cls(_np.fromiter(data, dtype=_np.dtype(">i8")).tobytes()) - - @classmethod - def _from_native_py(cls, data: _t.Iterable[object], /) -> _t.Self: - bytes_ = bytearray() - for item in data: - if not isinstance(item, int): - raise TypeError( - f"Cannot build i64 vector from {type(item).__name__}, " - "expected int." - ) - if not _I64_MIN <= item <= _I64_MAX: - raise OverflowError( - f"Value {item} is out of range for i64: " - f"[-{_I64_MIN}, {_I64_MAX}]" - ) - bytes_.extend(_struct.pack(">q", item)) - return cls(bytes(bytes_)) - - if _vec_rust is not None: - from_native = _from_native_rust - elif _np is not None: - from_native = _from_native_np - else: - from_native = _from_native_py - - def _to_native_rust(self) -> list[object]: - return _vec_rust.vec_i64_to_native(self.data) - - def _to_native_np(self) -> list[object]: - return _np.frombuffer(self.data, dtype=_np.dtype(">i8")).tolist() - - def _to_native_py(self) -> list[object]: - return [ - _struct.unpack(">q", self.data[i : i + self.size])[0] - for i in range(0, len(self.data), self.size) - ] + def from_native(cls, data: _t.Iterable[object], /) -> _t.Self: + if not isinstance(data, _t.Sized): + data = tuple(data) + try: + bytes_ = _struct.pack(f">{len(data)}q", *data) + except _struct.error: + for item in data: + if not isinstance(item, int): + raise TypeError( + f"Cannot build i64 vector from {type(item).__name__}, " + "expected int." + ) from None + if not _I64_MIN <= item <= _I64_MAX: + raise OverflowError( + f"Value {item} is out of range for i64: " + f"[{_I64_MIN}, {_I64_MAX}]" + ) from None + raise + return cls(bytes_) - if _vec_rust is not None: - to_native = _to_native_rust - elif _np is not None: - to_native = _to_native_np - else: - to_native = _to_native_py + def to_native(self) -> list[object]: + struct_format = f">{len(self.data) // self.size}q" + return list(_struct.unpack(struct_format, self.data)) def to_numpy(self) -> _np.ndarray: import numpy @@ -1031,72 +932,29 @@ class _VecI32(_InnerVectorInt): cypher_inner_type_repr = "INTEGER32 NOT NULL" @classmethod - def _from_native_rust(cls, data: _t.Iterable[object], /) -> _t.Self: - return cls(_vec_rust.vec_i32_from_native(data)) + def from_native(cls, data: _t.Iterable[object], /) -> _t.Self: + if not isinstance(data, _t.Sized): + data = tuple(data) + try: + bytes_ = _struct.pack(f">{len(data)}i", *data) + except _struct.error: + for item in data: + if not isinstance(item, int): + raise TypeError( + f"Cannot build i32 vector from {type(item).__name__}, " + "expected int." + ) from None + if not _I32_MIN <= item <= _I32_MAX: + raise OverflowError( + f"Value {item} is out of range for i32: " + f"[{_I32_MIN}, {_I32_MAX}]" + ) from None + raise + return cls(bytes_) - @classmethod - def _from_native_np(cls, data: _t.Iterable[object], /) -> _t.Self: - data = tuple(data) - non_int_gen = (item for item in data if not isinstance(item, int)) - non_int = next(non_int_gen, _DEFAULT) - if non_int is not _DEFAULT: - raise TypeError( - f"Cannot build i32 vector from {type(non_int).__name__}, " - "expected int." - ) - data = _t.cast(tuple[int, ...], data) - overflow_int = tuple( - item for item in data if not _I32_MIN <= item <= _I32_MAX - ) - if overflow_int: - raise OverflowError( - f"Value {overflow_int[0]} is out of range for i32: " - f"[-{_I32_MIN}, {_I32_MAX}]" - ) - return cls(_np.fromiter(data, dtype=_np.dtype(">i4")).tobytes()) - - @classmethod - def _from_native_py(cls, data: _t.Iterable[object], /) -> _t.Self: - bytes_ = bytearray() - for item in data: - if not isinstance(item, int): - raise TypeError( - f"Cannot build i32 vector from {type(item).__name__}, " - "expected int." - ) - if not _I32_MIN <= item <= _I32_MAX: - raise OverflowError( - f"Value {item} is out of range for i32: " - f"[-{_I32_MIN}, {_I32_MAX}]" - ) - bytes_.extend(_struct.pack(">i", item)) - return cls(bytes(bytes_)) - - if _vec_rust is not None: - from_native = _from_native_rust - elif _np is not None: - from_native = _from_native_np - else: - from_native = _from_native_py - - def _to_native_rust(self) -> list[object]: - return _vec_rust.vec_i32_to_native(self.data) - - def _to_native_np(self) -> list[object]: - return _np.frombuffer(self.data, dtype=_np.dtype(">i4")).tolist() - - def _to_native_py(self) -> list[object]: - return [ - _struct.unpack(">i", self.data[i : i + self.size])[0] - for i in range(0, len(self.data), self.size) - ] - - if _vec_rust is not None: - to_native = _to_native_rust - elif _np is not None: - to_native = _to_native_np - else: - to_native = _to_native_py + def to_native(self) -> list[object]: + struct_format = f">{len(self.data) // self.size}i" + return list(_struct.unpack(struct_format, self.data)) def to_numpy(self) -> _np.ndarray: import numpy @@ -1124,72 +982,29 @@ class _VecI16(_InnerVectorInt): cypher_inner_type_repr = "INTEGER16 NOT NULL" @classmethod - def _from_native_rust(cls, data: _t.Iterable[object], /) -> _t.Self: - return cls(_vec_rust.vec_i16_from_native(data)) - - @classmethod - def _from_native_np(cls, data: _t.Iterable[object], /) -> _t.Self: - data = tuple(data) - non_int_gen = (item for item in data if not isinstance(item, int)) - non_int = next(non_int_gen, _DEFAULT) - if non_int is not _DEFAULT: - raise TypeError( - f"Cannot build i16 vector from {type(non_int).__name__}, " - "expected int." - ) - data = _t.cast(tuple[int, ...], data) - overflow_int = tuple( - item for item in data if not _I16_MIN <= item <= _I16_MAX - ) - if overflow_int: - raise OverflowError( - f"Value {overflow_int[0]} is out of range for i16: " - f"[-{_I16_MIN}, {_I16_MAX}]" - ) - return cls(_np.fromiter(data, dtype=_np.dtype(">i2")).tobytes()) - - @classmethod - def _from_native_py(cls, data: _t.Iterable[object], /) -> _t.Self: - bytes_ = bytearray() - for item in data: - if not isinstance(item, int): - raise TypeError( - f"Cannot build i16 vector from {type(item).__name__}, " - "expected int." - ) - if not _I16_MIN <= item <= _I16_MAX: - raise OverflowError( - f"Value {item} is out of range for i16: " - f"[-{_I16_MIN}, {_I16_MAX}]" - ) - bytes_.extend(_struct.pack(">h", item)) - return cls(bytes(bytes_)) - - if _vec_rust is not None: - from_native = _from_native_rust - elif _np is not None: - from_native = _from_native_np - else: - from_native = _from_native_py - - def _to_native_rust(self) -> list[object]: - return _vec_rust.vec_i16_to_native(self.data) - - def _to_native_np(self) -> list[object]: - return _np.frombuffer(self.data, dtype=_np.dtype(">i2")).tolist() - - def _to_native_py(self) -> list[object]: - return [ - _struct.unpack(">h", self.data[i : i + self.size])[0] - for i in range(0, len(self.data), self.size) - ] + def from_native(cls, data: _t.Iterable[object], /) -> _t.Self: + if not isinstance(data, _t.Sized): + data = tuple(data) + try: + bytes_ = _struct.pack(f">{len(data)}h", *data) + except _struct.error: + for item in data: + if not isinstance(item, int): + raise TypeError( + f"Cannot build i16 vector from {type(item).__name__}, " + "expected int." + ) from None + if not _I16_MIN <= item <= _I16_MAX: + raise OverflowError( + f"Value {item} is out of range for i16: " + f"[{_I16_MIN}, {_I16_MAX}]" + ) from None + raise + return cls(bytes_) - if _vec_rust is not None: - to_native = _to_native_rust - elif _np is not None: - to_native = _to_native_np - else: - to_native = _to_native_py + def to_native(self) -> list[object]: + struct_format = f">{len(self.data) // self.size}h" + return list(_struct.unpack(struct_format, self.data)) def to_numpy(self) -> _np.ndarray: import numpy @@ -1217,72 +1032,29 @@ class _VecI8(_InnerVectorInt): cypher_inner_type_repr = "INTEGER8 NOT NULL" @classmethod - def _from_native_rust(cls, data: _t.Iterable[object], /) -> _t.Self: - return cls(_vec_rust.vec_i8_from_native(data)) - - @classmethod - def _from_native_np(cls, data: _t.Iterable[object], /) -> _t.Self: - data = tuple(data) - non_int_gen = (item for item in data if not isinstance(item, int)) - non_int = next(non_int_gen, _DEFAULT) - if non_int is not _DEFAULT: - raise TypeError( - f"Cannot build i8 vector from {type(non_int).__name__}, " - "expected int." - ) - data = _t.cast(tuple[int, ...], data) - overflow_int = tuple( - item for item in data if not _I8_MIN <= item <= _I8_MAX - ) - if overflow_int: - raise OverflowError( - f"Value {overflow_int[0]} is out of range for i8: " - f"[-{_I8_MIN}, {_I8_MAX}]" - ) - return cls(_np.fromiter(data, dtype=_np.dtype(">i1")).tobytes()) - - @classmethod - def _from_native_py(cls, data: _t.Iterable[object], /) -> _t.Self: - bytes_ = bytearray() - for item in data: - if not isinstance(item, int): - raise TypeError( - f"Cannot build i8 vector from {type(item).__name__}, " - "expected int." - ) - if not _I8_MIN <= item <= _I8_MAX: - raise OverflowError( - f"Value {item} is out of range for i8: " - f"[-{_I8_MIN}, {_I8_MAX}]" - ) - bytes_.extend(_struct.pack(">b", item)) - return cls(bytes(bytes_)) - - if _vec_rust is not None: - from_native = _from_native_rust - elif _np is not None: - from_native = _from_native_np - else: - from_native = _from_native_py - - def _to_native_rust(self) -> list[object]: - return _vec_rust.vec_i8_to_native(self.data) - - def _to_native_np(self) -> list[object]: - return _np.frombuffer(self.data, dtype=_np.dtype(">i1")).tolist() - - def _to_native_py(self) -> list[object]: - return [ - _struct.unpack(">b", self.data[i : i + self.size])[0] - for i in range(0, len(self.data), self.size) - ] + def from_native(cls, data: _t.Iterable[object], /) -> _t.Self: + if not isinstance(data, _t.Sized): + data = tuple(data) + try: + bytes_ = _struct.pack(f">{len(data)}b", *data) + except _struct.error: + for item in data: + if not isinstance(item, int): + raise TypeError( + f"Cannot build i8 vector from {type(item).__name__}, " + "expected int." + ) from None + if not _I8_MIN <= item <= _I8_MAX: + raise OverflowError( + f"Value {item} is out of range for i8: " + f"[{_I8_MIN}, {_I8_MAX}]" + ) from None + raise + return cls(bytes_) - if _vec_rust is not None: - to_native = _to_native_rust - elif _np is not None: - to_native = _to_native_np - else: - to_native = _to_native_py + def to_native(self) -> list[object]: + struct_format = f">{len(self.data) // self.size}b" + return list(_struct.unpack(struct_format, self.data)) def to_numpy(self) -> _np.ndarray: import numpy diff --git a/tests/unit/common/vector/test_vector.py b/tests/unit/common/vector/test_vector.py index 7844517d..d7702b48 100644 --- a/tests/unit/common/vector/test_vector.py +++ b/tests/unit/common/vector/test_vector.py @@ -16,6 +16,7 @@ from __future__ import annotations +import abc import math import random import struct @@ -39,6 +40,7 @@ if t.TYPE_CHECKING: import numpy import pyarrow + from pytest_mock import MockFixture T_ENDIAN_LITERAL: t.TypeAlias = t.Literal["big", "little"] | VectorEndian T_DTYPE_LITERAL: t.TypeAlias = ( @@ -57,6 +59,7 @@ T_DTYPE_FLOAT_LITERAL: t.TypeAlias = t.Literal[ "f32", "f64", VectorDType.F32, VectorDType.F64 ] + T_EXT_LITERAL: t.TypeAlias = t.Literal["numpy", "rust", "python"] ENDIAN_LITERALS: tuple[T_ENDIAN_LITERAL, ...] = ( @@ -152,17 +155,56 @@ def _get_type_size(dtype: str) -> t.Literal[1, 2, 4, 8]: return lookup[dtype] -def _normalize_float_bytes(dtype: str, data: bytes) -> bytes: - if dtype not in {"f32", "f64"}: - raise ValueError(f"Invalid dtype {dtype}") - type_size = _get_type_size(dtype) - pack_format = _dtype_to_pack_format(dtype) - chunks = (data[i : i + type_size] for i in range(0, len(data), type_size)) - return bytes( - b - for chunk in chunks - for b in struct.pack(pack_format, struct.unpack(pack_format, chunk)[0]) - ) +class NormalizableBytes(abc.ABC): + @abc.abstractmethod + def normalized_bytes(self) -> bytes: ... + + @abc.abstractmethod + def raw_bytes(self) -> bytes: ... + + +class Bytes(NormalizableBytes): + _data: bytes + + def __init__(self, data: bytes) -> None: + self._data = data + + def normalized_bytes(self) -> bytes: + return self._data + + def raw_bytes(self) -> bytes: + return self._data + + +class Float32NanPayloadBytes(NormalizableBytes): + _data: bytes + + def __init__(self, data: bytes) -> None: + self._data = data + + def normalized_bytes(self) -> bytes: + type_size = _get_type_size("f32") + pack_format = _dtype_to_pack_format("f32") + + # Python <3.14 does not preserve NaN payloads on struct pack/unpack + # for float32: + # https://github.com/python/cpython/issues/130317 + if sys.version_info >= (3, 14): + return self._data + chunks = ( + self._data[i : i + type_size] + for i in range(0, len(self._data), type_size) + ) + return bytes( + b + for chunk in chunks + for b in struct.pack( + pack_format, struct.unpack(pack_format, chunk)[0] + ) + ) + + def raw_bytes(self) -> bytes: + return self._data def _dtype_to_pack_format(dtype: str) -> str: @@ -176,20 +218,15 @@ def _dtype_to_pack_format(dtype: str) -> str: }[dtype] -def _mock_mask_extensions(mocker, used_ext): +def _mock_mask_extensions( + used_ext: T_EXT_LITERAL, mocker: MockFixture +) -> None: from neo4j.vector import ( _swap_endian_unchecked_np, _swap_endian_unchecked_py, _swap_endian_unchecked_rust, - _VecF32, - _VecF64, - _VecI8, - _VecI16, - _VecI32, - _VecI64, ) - vec_types = (_VecF64, _VecF32, _VecI64, _VecI32, _VecI16, _VecI8) match used_ext: case "numpy": if _swap_endian_unchecked_np is None: @@ -198,15 +235,6 @@ def _mock_mask_extensions(mocker, used_ext): "neo4j.vector._swap_endian_unchecked", new=_swap_endian_unchecked_np, ) - for vec_type in vec_types: - mocker.patch( - f"neo4j.vector.{vec_type.__name__}.from_native", - new=vec_type._from_native_np, - ) - mocker.patch( - f"neo4j.vector.{vec_type.__name__}.to_native", - new=vec_type._to_native_np, - ) case "rust": if _swap_endian_unchecked_rust is None: pytest.skip("rust extensions are not installed") @@ -214,37 +242,19 @@ def _mock_mask_extensions(mocker, used_ext): "neo4j.vector._swap_endian_unchecked", new=_swap_endian_unchecked_rust, ) - for vec_type in vec_types: - mocker.patch( - f"neo4j.vector.{vec_type.__name__}.from_native", - new=vec_type._from_native_rust, - ) - mocker.patch( - f"neo4j.vector.{vec_type.__name__}.to_native", - new=vec_type._to_native_rust, - ) case "python": mocker.patch( "neo4j.vector._swap_endian_unchecked", new=_swap_endian_unchecked_py, ) - for vec_type in vec_types: - mocker.patch( - f"neo4j.vector.{vec_type.__name__}.from_native", - new=vec_type._from_native_py, - ) - mocker.patch( - f"neo4j.vector.{vec_type.__name__}.to_native", - new=vec_type._to_native_py, - ) case _: raise ValueError(f"Invalid ext value {used_ext}") @pytest.mark.parametrize("ext", ("numpy", "rust", "python")) -def test_swap_endian(mocker, ext): +def test_swap_endian(mocker: MockFixture, ext: T_EXT_LITERAL) -> None: data = bytes(range(1, 17)) - _mock_mask_extensions(mocker, ext) + _mock_mask_extensions(ext, mocker) res = _swap_endian(2, data) assert isinstance(res, bytes) assert res == bytes( @@ -264,9 +274,11 @@ def test_swap_endian(mocker, ext): @pytest.mark.parametrize("ext", ("numpy", "rust", "python")) @pytest.mark.parametrize("type_size", (-1, 0, 3, 5, 7, 9, 16, 32)) -def test_swap_endian_unhandled_size(mocker, ext, type_size): +def test_swap_endian_unhandled_size( + ext: T_EXT_LITERAL, type_size: int, mocker: MockFixture +) -> None: data = bytes(i % 256 for i in range(1, abs(type_size) * 4)) - _mock_mask_extensions(mocker, ext) + _mock_mask_extensions(ext, mocker) with pytest.raises(ValueError, match=str(type_size)): _swap_endian(type_size, data) @@ -282,12 +294,12 @@ def test_swap_endian_unhandled_size(mocker, ext, type_size): ), pytest.param( "i8", - b"\x01", + bytes.fromhex("01"), id="i8-single", ), pytest.param( "i8", - b"\x01\x02\x03\x04", + bytes.fromhex("01020304"), id="i8-some", ), pytest.param( @@ -302,12 +314,12 @@ def test_swap_endian_unhandled_size(mocker, ext, type_size): ), pytest.param( "i16", - b"\x00\x01", + bytes.fromhex("0001"), id="i16-single", ), pytest.param( "i16", - b"\x00\x01\x00\x02", + bytes.fromhex("00010002"), id="i16-some", ), pytest.param( @@ -322,12 +334,12 @@ def test_swap_endian_unhandled_size(mocker, ext, type_size): ), pytest.param( "i32", - b"\x00\x00\x00\x01", + bytes.fromhex("00000001"), id="i32-single", ), pytest.param( "i32", - b"\x00\x00\x00\x01\x00\x00\x00\x02", + bytes.fromhex("0000000100000002"), id="i32-some", ), pytest.param( @@ -342,15 +354,12 @@ def test_swap_endian_unhandled_size(mocker, ext, type_size): ), pytest.param( "i64", - b"\x00\x00\x00\x00\x00\x00\x00\x01", + bytes.fromhex("0000000000000001"), id="i64-single", ), pytest.param( "i64", - ( - b"\x00\x00\x00\x00\x00\x00\x00\x01" - b"\x00\x00\x00\x00\x00\x00\x00\x02" - ), + bytes.fromhex("0000000000000001 0000000000000002"), id="i64-some", ), pytest.param( @@ -426,17 +435,13 @@ def nan_equals(a: list[object], b: list[object]) -> bool: @pytest.mark.parametrize("dtype", DTYPE_INT_LITERALS) @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) -@pytest.mark.parametrize("ext", ("numpy", "rust", "python")) @pytest.mark.parametrize("use_init", (False, True)) def test_from_native_int_random( dtype: T_DTYPE_INT_LITERAL, repeat: int, size: int, - ext: str, use_init: bool, - mocker: t.Any, ) -> None: - _mock_mask_extensions(mocker, ext) type_size = _get_type_size(dtype) for _ in range(repeat): data = _random_value_be_bytes(type_size, size) @@ -452,24 +457,20 @@ def test_from_native_int_random( else: v = Vector.from_native(values, dtype) expected_raw = data - if dtype.startswith("f"): - expected_raw = _normalize_float_bytes(dtype, data) + if dtype == "f32": + expected_raw = Float32NanPayloadBytes(data).normalized_bytes() assert v.raw() == expected_raw @pytest.mark.parametrize("dtype", DTYPE_FLOAT_LITERALS) @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) -@pytest.mark.parametrize("ext", ("numpy", "rust", "python")) @pytest.mark.parametrize("use_init", (False, True)) -def test_from_native_floatgst_random( +def test_from_native_float_random( dtype: T_DTYPE_FLOAT_LITERAL, repeat: int, size: int, - ext: str, use_init: bool, - mocker: t.Any, ) -> None: - _mock_mask_extensions(mocker, ext) type_size = _get_type_size(dtype) for _ in range(repeat): data = _random_value_be_bytes(type_size, size) @@ -485,156 +486,212 @@ def test_from_native_floatgst_random( else: v = Vector.from_native(values, dtype) expected_raw = data - if dtype.startswith("f"): - expected_raw = _normalize_float_bytes(dtype, data) + if dtype == "f32": + expected_raw = Float32NanPayloadBytes(data).normalized_bytes() assert v.raw() == expected_raw -SPECIAL_INT_VALUES: tuple[tuple[T_DTYPE_INT_LITERAL, int, bytes], ...] = ( +SPECIAL_INT_VALUES: tuple[ + tuple[T_DTYPE_INT_LITERAL, int, NormalizableBytes], ... +] = ( # (dtype, value, packed_bytes_be) # i8 - ("i8", -128, b"\x80"), - ("i8", 0, b"\x00"), - ("i8", 127, b"\x7f"), + ("i8", -128, Bytes(bytes.fromhex("80"))), + ("i8", 0, Bytes(bytes.fromhex("00"))), + ("i8", 127, Bytes(bytes.fromhex("7f"))), # i16 - ("i16", -32768, b"\x80\x00"), - ("i16", 0, b"\x00\x00"), - ("i16", 32767, b"\x7f\xff"), + ("i16", -32768, Bytes(bytes.fromhex("8000"))), + ("i16", 0, Bytes(bytes.fromhex("0000"))), + ("i16", 32767, Bytes(bytes.fromhex("7fff"))), # i32 - ("i32", -2147483648, b"\x80\x00\x00\x00"), - ("i32", 0, b"\x00\x00\x00\x00"), - ("i32", 2147483647, b"\x7f\xff\xff\xff"), + ("i32", -2147483648, Bytes(bytes.fromhex("80000000"))), + ("i32", 0, Bytes(bytes.fromhex("00000000"))), + ("i32", 2147483647, Bytes(bytes.fromhex("7fffffff"))), # i64 - ("i64", -9223372036854775808, b"\x80\x00\x00\x00\x00\x00\x00\x00"), - ("i64", 0, b"\x00\x00\x00\x00\x00\x00\x00\x00"), - ("i64", 9223372036854775807, b"\x7f\xff\xff\xff\xff\xff\xff\xff"), + ("i64", -9223372036854775808, Bytes(bytes.fromhex("8000000000000000"))), + ("i64", 0, Bytes(bytes.fromhex("0000000000000000"))), + ("i64", 9223372036854775807, Bytes(bytes.fromhex("7fffffffffffffff"))), ) SPECIAL_FLOAT_VALUES: tuple[ - tuple[T_DTYPE_FLOAT_LITERAL, float, bytes], ... + tuple[T_DTYPE_FLOAT_LITERAL, float, NormalizableBytes], ... ] = ( # (dtype, value, packed_bytes_be) # f32 # NaN - ("f32", float("nan"), b"\x7f\xc0\x00\x00"), - ("f32", float("-nan"), b"\xff\xc0\x00\x00"), ( "f32", - struct.unpack(">f", b"\x7f\xc0\x00\x11")[0], - b"\x7f\xc0\x00\x11", + float("nan"), + Bytes(bytes.fromhex("7fc00000")), + ), + ( + "f32", + float("-nan"), + Bytes(bytes.fromhex("ffc00000")), + ), + ( + "f32", + struct.unpack(">f", bytes.fromhex("7fc00011"))[0], + Bytes(bytes.fromhex("7fc00011")), ), ( "f32", - struct.unpack(">f", b"\x7f\x80\x00\x01")[0], - # Python < 3.14 does not properly preserver all NaN payload - # when calling struct.pack - _normalize_float_bytes("f32", b"\x7f\x80\x00\x01"), + struct.unpack(">f", bytes.fromhex("7f800001"))[0], + Float32NanPayloadBytes(bytes.fromhex("7f800001")), ), # ±inf - ("f32", float("inf"), b"\x7f\x80\x00\x00"), - ("f32", float("-inf"), b"\xff\x80\x00\x00"), + ( + "f32", + float("inf"), + Bytes(bytes.fromhex("7f800000")), + ), + ( + "f32", + float("-inf"), + Bytes(bytes.fromhex("ff800000")), + ), # ±0.0 - ("f32", 0.0, b"\x00\x00\x00\x00"), - ("f32", -0.0, b"\x80\x00\x00\x00"), + ( + "f32", + 0.0, + Bytes(bytes.fromhex("00000000")), + ), + ( + "f32", + -0.0, + Bytes(bytes.fromhex("80000000")), + ), # smallest normal ( "f32", - struct.unpack(">f", b"\x00\x80\x00\x00")[0], - b"\x00\x80\x00\x00", + struct.unpack(">f", bytes.fromhex("00800000"))[0], + Bytes(bytes.fromhex("00800000")), ), ( "f32", - struct.unpack(">f", b"\x80\x80\x00\x00")[0], - b"\x80\x80\x00\x00", + struct.unpack(">f", bytes.fromhex("80800000"))[0], + Bytes(bytes.fromhex("80800000")), ), # subnormal ( "f32", - struct.unpack(">f", b"\x00\x00\x00\x01")[0], - b"\x00\x00\x00\x01", + struct.unpack(">f", bytes.fromhex("00000001"))[0], + Bytes(bytes.fromhex("00000001")), ), ( "f32", - struct.unpack(">f", b"\x80\x00\x00\x01")[0], - b"\x80\x00\x00\x01", + struct.unpack(">f", bytes.fromhex("80000001"))[0], + Bytes(bytes.fromhex("80000001")), ), # largest normal ( "f32", - struct.unpack(">f", b"\x7f\x7f\xff\xff")[0], - b"\x7f\x7f\xff\xff", + struct.unpack(">f", bytes.fromhex("7f7fffff"))[0], + Bytes(bytes.fromhex("7f7fffff")), + ), + ( + "f32", + struct.unpack(">f", bytes.fromhex("ff7fffff"))[0], + Bytes(bytes.fromhex("ff7fffff")), ), + # very small f64 being rounded to ±0 in f32 ( "f32", - struct.unpack(">f", b"\xff\x7f\xff\xff")[0], - b"\xff\x7f\xff\xff", + struct.unpack(">d", bytes.fromhex("3686d601ad376ab9"))[0], + Bytes(bytes.fromhex("00000000")), + ), + ( + "f32", + struct.unpack(">d", bytes.fromhex("b686d601ad376ab9"))[0], + Bytes(bytes.fromhex("80000000")), ), # f64 # NaN - ("f64", float("nan"), b"\x7f\xf8\x00\x00\x00\x00\x00\x00"), - ("f64", float("-nan"), b"\xff\xf8\x00\x00\x00\x00\x00\x00"), ( "f64", - struct.unpack(">d", b"\x7f\xf8\x00\x00\x00\x00\x00\x11")[0], - b"\x7f\xf8\x00\x00\x00\x00\x00\x11", + float("nan"), + Bytes(bytes.fromhex("7ff8000000000000")), + ), + ( + "f64", + float("-nan"), + Bytes(bytes.fromhex("fff8000000000000")), + ), + ( + "f64", + struct.unpack(">d", bytes.fromhex("7ff8000000000011"))[0], + Bytes(bytes.fromhex("7ff8000000000011")), ), ( "f64", - struct.unpack(">d", b"\x7f\xf0\x00\x01\x00\x00\x00\x01")[0], - b"\x7f\xf0\x00\x01\x00\x00\x00\x01", + struct.unpack(">d", bytes.fromhex("7ff0000100000001"))[0], + Bytes(bytes.fromhex("7ff0000100000001")), ), # ±inf - ("f64", float("inf"), b"\x7f\xf0\x00\x00\x00\x00\x00\x00"), - ("f64", float("-inf"), b"\xff\xf0\x00\x00\x00\x00\x00\x00"), + ( + "f64", + float("inf"), + Bytes(bytes.fromhex("7ff0000000000000")), + ), + ( + "f64", + float("-inf"), + Bytes(bytes.fromhex("fff0000000000000")), + ), # ±0.0 - ("f64", 0.0, b"\x00\x00\x00\x00\x00\x00\x00\x00"), - ("f64", -0.0, b"\x80\x00\x00\x00\x00\x00\x00\x00"), + ( + "f64", + 0.0, + Bytes(bytes.fromhex("0000000000000000")), + ), + ( + "f64", + -0.0, + Bytes(bytes.fromhex("8000000000000000")), + ), # smallest normal ( "f64", - struct.unpack(">d", b"\x00\x10\x00\x00\x00\x00\x00\x00")[0], - b"\x00\x10\x00\x00\x00\x00\x00\x00", + struct.unpack(">d", bytes.fromhex("0010000000000000"))[0], + Bytes(bytes.fromhex("0010000000000000")), ), ( "f64", - struct.unpack(">d", b"\x80\x10\x00\x00\x00\x00\x00\x00")[0], - b"\x80\x10\x00\x00\x00\x00\x00\x00", + struct.unpack(">d", bytes.fromhex("8010000000000000"))[0], + Bytes(bytes.fromhex("8010000000000000")), ), # subnormal ( "f64", - struct.unpack(">d", b"\x00\x00\x00\x00\x00\x00\x00\x01")[0], - b"\x00\x00\x00\x00\x00\x00\x00\x01", + struct.unpack(">d", bytes.fromhex("0000000000000001"))[0], + Bytes(bytes.fromhex("0000000000000001")), ), ( "f64", - struct.unpack(">d", b"\x80\x00\x00\x00\x00\x00\x00\x01")[0], - b"\x80\x00\x00\x00\x00\x00\x00\x01", + struct.unpack(">d", bytes.fromhex("8000000000000001"))[0], + Bytes(bytes.fromhex("8000000000000001")), ), # largest normal ( "f64", - struct.unpack(">d", b"\x7f\xef\xff\xff\xff\xff\xff\xff")[0], - b"\x7f\xef\xff\xff\xff\xff\xff\xff", + struct.unpack(">d", bytes.fromhex("7fefffffffffffff"))[0], + Bytes(bytes.fromhex("7fefffffffffffff")), ), ( "f64", - struct.unpack(">d", b"\xff\xef\xff\xff\xff\xff\xff\xff")[0], - b"\xff\xef\xff\xff\xff\xff\xff\xff", + struct.unpack(">d", bytes.fromhex("ffefffffffffffff"))[0], + Bytes(bytes.fromhex("ffefffffffffffff")), ), ) SPECIAL_VALUES = SPECIAL_INT_VALUES + SPECIAL_FLOAT_VALUES -@pytest.mark.parametrize(("dtype", "value", "data_be"), SPECIAL_VALUES) -@pytest.mark.parametrize("ext", ("numpy", "rust", "python")) +@pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) def test_from_native_special_values( dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], value: object, - data_be: bytes, - ext: str, - mocker: t.Any, + data_be_raw: NormalizableBytes, ) -> None: - _mock_mask_extensions(mocker, ext) + data_be = data_be_raw.normalized_bytes() if dtype in {"f32", "f64"}: assert isinstance(value, float) dtype_f = t.cast(t.Literal["f32", "f64"], dtype) @@ -671,14 +728,10 @@ def test_from_native_special_values( ("f64", 1), ), ) -@pytest.mark.parametrize("ext", ("numpy", "rust", "python")) def test_from_native_wrong_type( dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], value: object, - ext: str, - mocker: t.Any, ) -> None: - _mock_mask_extensions(mocker, ext) with pytest.raises(TypeError) as exc: Vector.from_native([value], dtype) # type: ignore @@ -697,16 +750,22 @@ def test_from_native_wrong_type( ("i32", 2147483648), ("i64", -9223372036854775809), ("i64", 9223372036854775808), + # positive value, positive exponent overflow + ("f32", struct.unpack(">d", bytes.fromhex("47f0000020000000"))[0]), + # negative value, positive exponent overflow + ("f32", struct.unpack(">d", bytes.fromhex("c7f0000020000000"))[0]), + # no such thing as negative exponent overflow: + # very small values become 0.0 + # positive value, positive exponent, mantiassa overflow + ("f32", struct.unpack(">d", bytes.fromhex("47effffff0000000"))[0]), + # negative value, positive exponent, mantiassa overflow + ("f32", struct.unpack(">d", bytes.fromhex("c7effffff0000000"))[0]), ), ) -@pytest.mark.parametrize("ext", ("numpy", "rust", "python")) def test_from_native_overflow( dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], value: object, - ext: str, - mocker: t.Any, ) -> None: - _mock_mask_extensions(mocker, ext) with pytest.raises(OverflowError) as exc: Vector.from_native([value], dtype) # type: ignore @@ -759,12 +818,13 @@ def test_to_native_random( assert nan_equals(v.to_native(), expected) -@pytest.mark.parametrize(("dtype", "value", "data_be"), SPECIAL_VALUES) +@pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) def test_to_native_special_values( dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], value: object, - data_be: bytes, + data_be_raw: NormalizableBytes, ) -> None: + data_be = data_be_raw.raw_bytes() type_size = _get_type_size(dtype) pack_format = _dtype_to_pack_format(dtype) expected = [ @@ -829,14 +889,15 @@ def test_from_numpy_random( @pytest.mark.skipif(np is None, reason="numpy not installed") -@pytest.mark.parametrize(("dtype", "value", "data_be"), SPECIAL_VALUES) +@pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) @pytest.mark.parametrize("endian", ("big", "little", "native")) def test_from_numpy_special_values( dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], endian: t.Literal["big", "little", "native"], value: object, - data_be: bytes, + data_be_raw: NormalizableBytes, ) -> None: + data_be = data_be_raw.raw_bytes() array = _get_numpy_array(data_be, dtype, endian) v = Vector.from_numpy(array) assert v.dtype == dtype @@ -873,7 +934,7 @@ def test_to_numpy_random( @pytest.mark.skipif(np is None, reason="numpy not installed") -@pytest.mark.parametrize(("dtype", "value", "data_be"), SPECIAL_VALUES) +@pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) @pytest.mark.parametrize( "endian", ( @@ -885,8 +946,9 @@ def test_to_numpy_special_values( dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], endian: T_ENDIAN_LITERAL | None, value: object, - data_be: bytes, + data_be_raw: NormalizableBytes, ) -> None: + data_be = data_be_raw.raw_bytes() np_type = _get_numpy_dtype(dtype) v = _vector_from_data(data_be, dtype, endian) array = v.to_numpy() @@ -942,12 +1004,13 @@ def test_from_pyarrow_random( @pytest.mark.skipif(pa is None, reason="pyarrow not installed") -@pytest.mark.parametrize(("dtype", "value", "data_be"), SPECIAL_VALUES) +@pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) def test_from_pyarrow_special_values( dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], value: object, - data_be: bytes, + data_be_raw: NormalizableBytes, ) -> None: + data_be = data_be_raw.raw_bytes() array = _get_pyarrow_array(data_be, dtype) v = Vector.from_pyarrow(array) assert v.dtype == dtype @@ -990,7 +1053,7 @@ def test_to_pyarrow_random( @pytest.mark.skipif(pa is None, reason="pyarrow not installed") -@pytest.mark.parametrize(("dtype", "value", "data_be"), SPECIAL_VALUES) +@pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) @pytest.mark.parametrize( "endian", ( @@ -1002,8 +1065,9 @@ def test_to_pyarrow_special_values( dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], endian: T_ENDIAN_LITERAL | None, value: object, - data_be: bytes, + data_be_raw: NormalizableBytes, ) -> None: + data_be = data_be_raw.raw_bytes() type_size = _get_type_size(dtype) data_ne = data_be if sys.byteorder == "little": @@ -1021,31 +1085,26 @@ def test_to_pyarrow_special_values( @pytest.mark.parametrize( - ("vector", "expected"), + "vector", ( - (Vector([], "i8"), "Vector(b'', 'i8')"), - (Vector([], "i16"), "Vector(b'', 'i16')"), - (Vector([], "i32"), "Vector(b'', 'i32')"), - (Vector([], "i64"), "Vector(b'', 'i64')"), - (Vector([], "f32"), "Vector(b'', 'f32')"), - (Vector([], "f64"), "Vector(b'', 'f64')"), + Vector([], "i8"), + Vector([], "i16"), + Vector([], "i32"), + Vector([], "i64"), + Vector([], "f32"), + Vector([], "f64"), *( - ( - Vector([value], dtype), - f"Vector({packed_bytes_be!r}, {dtype!r})", - ) - for (dtype, value, packed_bytes_be) in SPECIAL_INT_VALUES + Vector([value], dtype) + for (dtype, value, packed_bytes_be_) in SPECIAL_INT_VALUES ), *( - ( - Vector([value], dtype), - f"Vector({packed_bytes_be!r}, {dtype!r})", - ) - for (dtype, value, packed_bytes_be) in SPECIAL_FLOAT_VALUES + Vector([value], dtype) + for (dtype, value, packed_bytes_be_) in SPECIAL_FLOAT_VALUES ), ), ) -def test_vector_repr(vector: Vector, expected: str) -> None: +def test_vector_repr(vector: Vector) -> None: + expected = f"Vector({vector.raw()!r}, {vector.dtype.value!r})" assert repr(vector) == expected @@ -1079,12 +1138,16 @@ def _dtype_to_cypher_type(dtype: T_DTYPE_LITERAL) -> str: }[dtype] -def _vec_element_cypher_repr(value: t.Any) -> str: - if isinstance(value, float): +def _vec_element_cypher_repr(value: t.Any, dtype: T_DTYPE_LITERAL) -> str: + if isinstance(value, float) and dtype in {"f32", "f64"}: if math.isnan(value): return "NaN" if math.isinf(value): return "Infinity" if value > 0 else "-Infinity" + if dtype == "f32": + # account for float32 precision loss + compressed = struct.unpack(">f", struct.pack(">f", value))[0] + return repr(compressed) return repr(value) @@ -1101,7 +1164,7 @@ def _vec_element_cypher_repr(value: t.Any) -> str: ( Vector([value], dtype), ( - f"vector([{_vec_element_cypher_repr(value)}], 1, " + f"vector([{_vec_element_cypher_repr(value, dtype)}], 1, " f"{_dtype_to_cypher_type(dtype)})" ), ) @@ -1111,7 +1174,7 @@ def _vec_element_cypher_repr(value: t.Any) -> str: ( Vector([value], dtype), ( - f"vector([{_vec_element_cypher_repr(value)}], 1, " + f"vector([{_vec_element_cypher_repr(value, dtype)}], 1, " f"{_dtype_to_cypher_type(dtype)})" ), ) @@ -1135,8 +1198,9 @@ def test_vector_str_random( for _ in range(repeat): data = _random_value_be_bytes(type_size, size) v = Vector(data, dtype) - values_repr = ( - f"[{', '.join(map(_vec_element_cypher_repr, v.to_native()))}]" + values_reprs = ( + _vec_element_cypher_repr(value, dtype) for value in v.to_native() ) + values_repr = f"[{', '.join(values_reprs)}]" expected = f"vector({values_repr}, {size}, {cypher_dtype})" assert str(v) == expected