From 0d9c32468544dbf5e60532528a17ad8dad844edb Mon Sep 17 00:00:00 2001 From: Phil Barber Date: Thu, 10 Jul 2025 20:08:27 -0400 Subject: [PATCH] Can now encode and decode MarkLogic internal vectors --- marklogic/vectors.py | 45 ++++++++++++++++++++++++++++++ test-app/docker-compose.yml | 4 +-- tests/test_vectors.py | 55 +++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 marklogic/vectors.py create mode 100644 tests/test_vectors.py diff --git a/marklogic/vectors.py b/marklogic/vectors.py new file mode 100644 index 0000000..f166f63 --- /dev/null +++ b/marklogic/vectors.py @@ -0,0 +1,45 @@ +import base64 +import struct +from typing import List + + +class VectorUtil: + """ + Supports encoding and decoding vectors using the same approach as the vec:base64-encode and vec:base64-decode + functions supported by the MarkLogic server. + """ + + @staticmethod + def base64_encode(vector: List[float]) -> str: + """ + Encodes a list of floats as a base64 string compatible with MarkLogic's vec:base64-encode. + """ + dimensions = len(vector) + # version (int32, 0) + dimensions (int32) + floats (little-endian) + buffer = struct.pack(" List[float]: + """ + Decodes a base64 string to a list of floats compatible with MarkLogic's vec:base64-decode. + """ + buffer = base64.b64decode(encoded_vector) + if len(buffer) < 8: + raise ValueError( + "Buffer is too short to contain version and dimensions." + ) + version, dimensions = struct.unpack("