diff --git a/marklogic/vectors.py b/marklogic/vectors.py new file mode 100644 index 0000000..f166f63 --- /dev/null +++ b/marklogic/vectors.py @@ -0,0 +1,45 @@ +import base64 +import struct +from typing import List + + +class VectorUtil: + """ + Supports encoding and decoding vectors using the same approach as the vec:base64-encode and vec:base64-decode + functions supported by the MarkLogic server. + """ + + @staticmethod + def base64_encode(vector: List[float]) -> str: + """ + Encodes a list of floats as a base64 string compatible with MarkLogic's vec:base64-encode. + """ + dimensions = len(vector) + # version (int32, 0) + dimensions (int32) + floats (little-endian) + buffer = struct.pack(" List[float]: + """ + Decodes a base64 string to a list of floats compatible with MarkLogic's vec:base64-decode. + """ + buffer = base64.b64decode(encoded_vector) + if len(buffer) < 8: + raise ValueError( + "Buffer is too short to contain version and dimensions." + ) + version, dimensions = struct.unpack("