From 520e7d385a1f2d52cc48b571805f57530bc1a794 Mon Sep 17 00:00:00 2001 From: tinder-igorsokolov Date: Wed, 7 Jan 2026 10:25:15 -0800 Subject: [PATCH 1/5] Added serialization/deserialization capabilities to the XorBinaryFuse16 --- .../src/main/java/org/fastfilter/Filter.java | 22 +- .../org/fastfilter/xor/XorBinaryFuse16.java | 75 ++++- .../org/fastfilter/xor/SerializationTest.java | 266 ++++++++++++++++++ 3 files changed, 350 insertions(+), 13 deletions(-) create mode 100644 fastfilter/src/test/java/org/fastfilter/xor/SerializationTest.java diff --git a/fastfilter/src/main/java/org/fastfilter/Filter.java b/fastfilter/src/main/java/org/fastfilter/Filter.java index 5eedbcb..05bfe43 100644 --- a/fastfilter/src/main/java/org/fastfilter/Filter.java +++ b/fastfilter/src/main/java/org/fastfilter/Filter.java @@ -1,5 +1,7 @@ package org.fastfilter; +import java.nio.ByteBuffer; + /** * An approximate membership filter. */ @@ -14,7 +16,7 @@ public interface Filter { boolean mayContain(long key); /** - * Get the number of bits in thhe filter. + * Get the number of bits in the filter. * * @return the number of bits */ @@ -65,4 +67,22 @@ default long cardinality() { return -1; } + /** + * Get the serialized size of the filter. + * + * @return the size in bytes + */ + default int getSerializedSize() { + return -1; + } + + /** + * Serializes the filter state into the provided {@code ByteBuffer}. + * + * @param buffer the byte buffer where the serialized state of the filter will be written + * @throws UnsupportedOperationException if the operation is not supported by the filter implementation + */ + default void serialize(ByteBuffer buffer) { + throw new UnsupportedOperationException(); + } } diff --git a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java index aad3c77..4d1c0fb 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java @@ -1,5 +1,7 @@ package org.fastfilter.xor; +import java.lang.reflect.Constructor; +import java.nio.ByteBuffer; import java.util.Arrays; import org.fastfilter.Filter; @@ -20,19 +22,25 @@ public class XorBinaryFuse16 implements Filter { private final short[] fingerprints; private long seed; - public XorBinaryFuse16(int segmentCount, int segmentLength) { + private XorBinaryFuse16(int segmentCount, int segmentLength, long seed, short[] fingerprints) { if (segmentLength < 0 || Integer.bitCount(segmentLength) != 1) { throw new IllegalArgumentException("Segment length needs to be a power of 2, is " + segmentLength); } if (segmentCount <= 0) { throw new IllegalArgumentException("Illegal segment count: " + segmentCount); } - this.segmentLength = segmentLength; + this.segmentCount = segmentCount; - this.segmentLengthMask = segmentLength - 1; this.segmentCountLength = segmentCount * segmentLength; - this.arrayLength = (segmentCount + ARITY - 1) * segmentLength; - this.fingerprints = new short[arrayLength]; + this.segmentLength = segmentLength; + this.segmentLengthMask = segmentLength - 1; + this.arrayLength = fingerprints.length; + this.fingerprints = fingerprints; + this.seed = seed; + } + + public XorBinaryFuse16(int segmentCount, int segmentLength) { + this(segmentCount, segmentLength, 0L, new short[(segmentCount + ARITY - 1) * segmentLength]); } public long getBitCount() { @@ -143,8 +151,7 @@ private void addAll(long[] keys) { countMask |= t2count[index]; } } - startPos = null; - if (countMask < 0) { + if (countMask < 0) { // we have a possible counter overflow continue mainloop; } @@ -210,11 +217,8 @@ private void addAll(long[] keys) { // use a new random numbers seed = Hash.randomSeed(); } - alone = null; - t2count = null; - t2hash = null; - for (int i = reverseOrderPos - 1; i >= 0; i--) { + for (int i = reverseOrderPos - 1; i >= 0; i--) { long hash = reverseOrder[i]; int found = reverseH[i]; short xor2 = fingerprint(hash); @@ -261,4 +265,51 @@ private short fingerprint(long hash) { return (short) hash; } -} \ No newline at end of file + @Override + public int getSerializedSize() { + return 2 * Integer.BYTES + Long.BYTES + Integer.BYTES + fingerprints.length * Short.BYTES; + } + + @Override + public void serialize(ByteBuffer buffer) { + if (buffer.remaining() < getSerializedSize()) { + throw new IllegalArgumentException("Buffer too small"); + } + + buffer.putInt(segmentLength); + buffer.putInt(segmentCountLength); + buffer.putLong(seed); + buffer.putInt(fingerprints.length); + for (final short fp : fingerprints) { + buffer.putShort(fp); + } + } + + public static XorBinaryFuse16 deserialize(ByteBuffer buffer) { + // Check minimum size for header (2 ints + 1 long + 1 int for length) + if (buffer.remaining() < 2 * Integer.BYTES + Long.BYTES + Integer.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final int segmentLength = buffer.getInt(); + final int segmentCountLength = buffer.getInt(); + final long seed = buffer.getLong(); + + final int len = buffer.getInt(); + + // Check if buffer has enough bytes for all fingerprints + if (buffer.remaining() < len * Short.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final short[] fingerprints = new short[len]; + for (int i = 0; i < len; i++) { + fingerprints[i] = buffer.getShort(); + } + + // Calculate segmentCount from segmentCountLength and segmentLength + final int segmentCount = segmentCountLength / segmentLength; + + return new XorBinaryFuse16(segmentCount, segmentLength, seed, fingerprints); + } +} diff --git a/fastfilter/src/test/java/org/fastfilter/xor/SerializationTest.java b/fastfilter/src/test/java/org/fastfilter/xor/SerializationTest.java new file mode 100644 index 0000000..d2113e1 --- /dev/null +++ b/fastfilter/src/test/java/org/fastfilter/xor/SerializationTest.java @@ -0,0 +1,266 @@ +package org.fastfilter.xor; + +import org.junit.Test; + +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class SerializationTest { + + @Test + public void shouldSerializeAndDeserializeEmptyFilter() { + // Arrange + final var keys = new long[]{1L, 2L, 3L, 4L, 5L}; + final var originalFilter = XorBinaryFuse16.construct(keys); + final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); + + // Act + originalFilter.serialize(buffer); + buffer.flip(); + final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + + // Assert + for (final long key : keys) { + assertTrue("Key " + key + " should be present in deserialized filter", + deserializedFilter.mayContain(key)); + } + } + + @Test + public void shouldSerializeAndDeserializeSmallFilter() { + // Arrange + final var keys = new long[]{100L, 200L, 300L, 400L, 500L, 600L, 700L, 800L, 900L, 1000L}; + final var originalFilter = XorBinaryFuse16.construct(keys); + final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); + + // Act + originalFilter.serialize(buffer); + buffer.flip(); + final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + + // Assert + for (final long key : keys) { + assertTrue("Key " + key + " should be present in deserialized filter", + deserializedFilter.mayContain(key)); + } + assertFalse("Key 50L should not be in filter", deserializedFilter.mayContain(50L)); + assertFalse("Key 1500L should not be in filter", deserializedFilter.mayContain(1500L)); + } + + @Test + public void shouldSerializeAndDeserializeLargeFilter() { + // Arrange + final int size = 10000; + final var keys = new long[size]; + for (int i = 0; i < size; i++) { + keys[i] = i * 100L; + } + final var originalFilter = XorBinaryFuse16.construct(keys); + final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); + + // Act + originalFilter.serialize(buffer); + buffer.flip(); + final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + + // Assert + for (int i = 0; i < size; i++) { + final long key = i * 100L; + assertTrue("Key " + key + " should be present in deserialized filter", + deserializedFilter.mayContain(key)); + } + // Test some keys that should not be in the filter + assertFalse("Key 1L should not be in filter", deserializedFilter.mayContain(1L)); + assertFalse("Key 50L should not be in filter", deserializedFilter.mayContain(50L)); + assertFalse("Key 99L should not be in filter", deserializedFilter.mayContain(99L)); + } + + @Test + public void shouldPreserveFilterCharacteristicsAfterDeserialization() { + // Arrange + final var keys = new long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L}; + final var originalFilter = XorBinaryFuse16.construct(keys); + final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); + + // Act + originalFilter.serialize(buffer); + buffer.flip(); + final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + + // Assert + assertEquals("Bit count should be preserved", + originalFilter.getBitCount(), deserializedFilter.getBitCount()); + assertEquals("Serialized size should be preserved", + originalFilter.getSerializedSize(), deserializedFilter.getSerializedSize()); + assertEquals("String representation should match", + originalFilter.toString(), deserializedFilter.toString()); + } + + @Test + public void shouldHandleMultipleSerializationRounds() { + // Arrange + final var keys = new long[]{10L, 20L, 30L, 40L, 50L}; + final var originalFilter = XorBinaryFuse16.construct(keys); + final var buffer1 = ByteBuffer.allocate(originalFilter.getSerializedSize()); + + // Act - First round + originalFilter.serialize(buffer1); + buffer1.flip(); + final var filter1 = XorBinaryFuse16.deserialize(buffer1); + + // Act - Second round + final var buffer2 = ByteBuffer.allocate(filter1.getSerializedSize()); + filter1.serialize(buffer2); + buffer2.flip(); + final var filter2 = XorBinaryFuse16.deserialize(buffer2); + + // Assert + for (final long key : keys) { + assertTrue("Key " + key + " should be present after first deserialization", + filter1.mayContain(key)); + assertTrue("Key " + key + " should be present after second deserialization", + filter2.mayContain(key)); + } + } + + @Test + public void shouldThrowExceptionWhenSerializeBufferTooSmall() { + // Arrange + final var keys = new long[]{1L, 2L, 3L, 4L, 5L}; + final var filter = XorBinaryFuse16.construct(keys); + final var smallBuffer = ByteBuffer.allocate(filter.getSerializedSize() - 1); + + // Act & Assert + try { + filter.serialize(smallBuffer); + fail("Should have thrown IllegalArgumentException for buffer too small"); + } catch (IllegalArgumentException e) { + assertEquals("Buffer too small", e.getMessage()); + } + } + + @Test + public void shouldThrowExceptionWhenDeserializeBufferTooSmall() { + // Arrange + final var tooSmallBuffer = ByteBuffer.allocate(10); + + // Act & Assert + try { + XorBinaryFuse16.deserialize(tooSmallBuffer); + fail("Should have thrown IllegalArgumentException for buffer too small"); + } catch (IllegalArgumentException e) { + assertEquals("Buffer too small", e.getMessage()); + } + } + + @Test + public void shouldHandleFilterWithSequentialKeys() { + // Arrange + final int size = 1000; + final var keys = new long[size]; + for (int i = 0; i < size; i++) { + keys[i] = i; + } + final var originalFilter = XorBinaryFuse16.construct(keys); + final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); + + // Act + originalFilter.serialize(buffer); + buffer.flip(); + final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + + // Assert + for (int i = 0; i < size; i++) { + assertTrue("Sequential key " + i + " should be present", + deserializedFilter.mayContain(i)); + } + assertFalse("Key outside range should not be in filter", + deserializedFilter.mayContain(size + 1000)); + } + + @Test + public void shouldHandleFilterWithRandomLargeKeys() { + // Arrange + final var keys = new long[]{ + Long.MAX_VALUE - 1, + Long.MAX_VALUE - 100, + Long.MAX_VALUE - 1000, + Long.MAX_VALUE / 2, + Long.MAX_VALUE / 3 + }; + final var originalFilter = XorBinaryFuse16.construct(keys); + final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); + + // Act + originalFilter.serialize(buffer); + buffer.flip(); + final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + + // Assert + for (final long key : keys) { + assertTrue("Large key " + key + " should be present", + deserializedFilter.mayContain(key)); + } + } + + @Test + public void shouldCorrectlyCalculateSerializedSize() { + // Arrange + final var keys = new long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L}; + final var filter = XorBinaryFuse16.construct(keys); + final int expectedSizeInBytes = filter.getSerializedSize(); + final var buffer = ByteBuffer.allocate(expectedSizeInBytes); + + // Act + filter.serialize(buffer); + + // Assert + assertEquals("Buffer position should equal serialized size", + expectedSizeInBytes, buffer.position()); + assertEquals("Buffer should have no remaining space", + 0, buffer.remaining()); + } + + @Test + public void shouldHandleExactBufferSize() { + // Arrange + final var keys = new long[]{100L, 200L, 300L}; + final var filter = XorBinaryFuse16.construct(keys); + final var exactBuffer = ByteBuffer.allocate(filter.getSerializedSize()); + + // Act + filter.serialize(exactBuffer); + exactBuffer.flip(); + final var deserializedFilter = XorBinaryFuse16.deserialize(exactBuffer); + + // Assert + for (final long key : keys) { + assertTrue("Key " + key + " should be present with exact buffer", + deserializedFilter.mayContain(key)); + } + assertEquals("No bytes should remain in buffer", 0, exactBuffer.remaining()); + } + + @Test + public void shouldHandleLargerBufferThanNeeded() { + // Arrange + final var keys = new long[]{1L, 2L, 3L}; + final var filter = XorBinaryFuse16.construct(keys); + final var largeBuffer = ByteBuffer.allocate(filter.getSerializedSize() + 1000); + + // Act + filter.serialize(largeBuffer); + largeBuffer.flip(); + final var deserializedFilter = XorBinaryFuse16.deserialize(largeBuffer); + + // Assert + for (final long key : keys) { + assertTrue("Key " + key + " should be present with larger buffer", + deserializedFilter.mayContain(key)); + } + } +} From ec1e7a5b17da7d3313d04f63a82134e1a0f896a5 Mon Sep 17 00:00:00 2001 From: tinder-igorsokolov Date: Wed, 7 Jan 2026 13:55:12 -0800 Subject: [PATCH 2/5] small style fixes --- .../src/main/java/org/fastfilter/utils/Hash.java | 12 ++++++++---- .../java/org/fastfilter/xor/XorBinaryFuse16.java | 10 ++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fastfilter/src/main/java/org/fastfilter/utils/Hash.java b/fastfilter/src/main/java/org/fastfilter/utils/Hash.java index 6e6b02f..709c407 100644 --- a/fastfilter/src/main/java/org/fastfilter/utils/Hash.java +++ b/fastfilter/src/main/java/org/fastfilter/utils/Hash.java @@ -2,11 +2,15 @@ import java.util.Random; -public class Hash { +public final class Hash { + private Hash() { - private static Random random = new Random(); + } + + private static final Random random = new Random(); public static void setSeed(long seed) { + // shouldn't we use ThreadLocalRandom.current() instead? random.setSeed(seed); } @@ -23,7 +27,7 @@ public static long randomSeed() { } /** - * Shrink the hash to a value 0..n. Kind of like modulo, but using + * Shrink the hash to value 0..n. Kind of like modulo, but using * multiplication and shift, which are faster to compute. * * @param hash the hash @@ -37,7 +41,7 @@ public static int reduce(int hash, int n) { /** * Multiply two unsigned 64-bit values. - * See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8188044 + * See JDK-8188044 * * @param a the first value * @param b the second value diff --git a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java index 4d1c0fb..341034d 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java @@ -48,7 +48,7 @@ public long getBitCount() { } static int calculateSegmentLength(int arity, int size) { - int segmentLength; + final int segmentLength; if (arity == 3) { segmentLength = 1 << (int) Math.floor(Math.log(size) / Math.log(3.33) + 2.11); } else if (arity == 4) { @@ -61,7 +61,7 @@ static int calculateSegmentLength(int arity, int size) { } static double calculateSizeFactor(int arity, int size) { - double sizeFactor; + final double sizeFactor; if (arity == 3) { sizeFactor = Math.max(1.125, 0.875 + 0.25 * Math.log(1000000) / Math.log(size)); } else if (arity == 4) { @@ -209,12 +209,10 @@ private void addAll(long[] keys) { // if construction doesn't succeed eventually, // then there is likely a problem with the hash function // let us not crash the system: - for(int i = 0; i < fingerprints.length; i++) { - fingerprints[i] = (short)0xFFFF; - } + Arrays.fill(fingerprints, (short) 0xFFFF); return; } - // use a new random numbers + // use a new random number seed = Hash.randomSeed(); } From ea80a7da42020301342a44f7be26200857fbeda2 Mon Sep 17 00:00:00 2001 From: tinder-igorsokolov Date: Thu, 8 Jan 2026 12:02:34 -0500 Subject: [PATCH 3/5] reverting unrelated changes --- .../src/main/java/org/fastfilter/utils/Hash.java | 11 +++-------- .../java/org/fastfilter/xor/XorBinaryFuse16.java | 12 +++++++----- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/fastfilter/src/main/java/org/fastfilter/utils/Hash.java b/fastfilter/src/main/java/org/fastfilter/utils/Hash.java index 709c407..1eeb8de 100644 --- a/fastfilter/src/main/java/org/fastfilter/utils/Hash.java +++ b/fastfilter/src/main/java/org/fastfilter/utils/Hash.java @@ -2,15 +2,10 @@ import java.util.Random; -public final class Hash { - private Hash() { - - } - +public class Hash { private static final Random random = new Random(); public static void setSeed(long seed) { - // shouldn't we use ThreadLocalRandom.current() instead? random.setSeed(seed); } @@ -27,7 +22,7 @@ public static long randomSeed() { } /** - * Shrink the hash to value 0..n. Kind of like modulo, but using + * Shrink the hash to a value 0..n. Kind of like modulo, but using * multiplication and shift, which are faster to compute. * * @param hash the hash @@ -41,7 +36,7 @@ public static int reduce(int hash, int n) { /** * Multiply two unsigned 64-bit values. - * See JDK-8188044 + * See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8188044 * * @param a the first value * @param b the second value diff --git a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java index 341034d..ca8a235 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java @@ -1,9 +1,7 @@ package org.fastfilter.xor; -import java.lang.reflect.Constructor; import java.nio.ByteBuffer; import java.util.Arrays; - import org.fastfilter.Filter; import org.fastfilter.utils.Hash; @@ -61,7 +59,7 @@ static int calculateSegmentLength(int arity, int size) { } static double calculateSizeFactor(int arity, int size) { - final double sizeFactor; + double sizeFactor; if (arity == 3) { sizeFactor = Math.max(1.125, 0.875 + 0.25 * Math.log(1000000) / Math.log(size)); } else if (arity == 4) { @@ -151,7 +149,8 @@ private void addAll(long[] keys) { countMask |= t2count[index]; } } - if (countMask < 0) { + startPos = null; + if (countMask < 0) { // we have a possible counter overflow continue mainloop; } @@ -216,7 +215,10 @@ private void addAll(long[] keys) { seed = Hash.randomSeed(); } - for (int i = reverseOrderPos - 1; i >= 0; i--) { + alone = null; + t2count = null; + t2hash = null; + for (int i = reverseOrderPos - 1; i >= 0; i--) { long hash = reverseOrder[i]; int found = reverseH[i]; short xor2 = fingerprint(hash); From 69abf1a9f7db279c7ff1665d4e5b2467c3bf9823 Mon Sep 17 00:00:00 2001 From: tinder-igorsokolov Date: Thu, 8 Jan 2026 12:04:08 -0500 Subject: [PATCH 4/5] reverting unrelated changes --- fastfilter/src/main/java/org/fastfilter/utils/Hash.java | 2 +- .../src/main/java/org/fastfilter/xor/XorBinaryFuse16.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fastfilter/src/main/java/org/fastfilter/utils/Hash.java b/fastfilter/src/main/java/org/fastfilter/utils/Hash.java index 1eeb8de..57fc3fe 100644 --- a/fastfilter/src/main/java/org/fastfilter/utils/Hash.java +++ b/fastfilter/src/main/java/org/fastfilter/utils/Hash.java @@ -3,7 +3,7 @@ import java.util.Random; public class Hash { - private static final Random random = new Random(); + private static Random random = new Random(); public static void setSeed(long seed) { random.setSeed(seed); diff --git a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java index ca8a235..db49863 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse16.java @@ -46,7 +46,7 @@ public long getBitCount() { } static int calculateSegmentLength(int arity, int size) { - final int segmentLength; + int segmentLength; if (arity == 3) { segmentLength = 1 << (int) Math.floor(Math.log(size) / Math.log(3.33) + 2.11); } else if (arity == 4) { @@ -218,6 +218,7 @@ private void addAll(long[] keys) { alone = null; t2count = null; t2hash = null; + for (int i = reverseOrderPos - 1; i >= 0; i--) { long hash = reverseOrder[i]; int found = reverseH[i]; From 1cfc00bc2e515dddfd6cfb2149f0065299e8ad07 Mon Sep 17 00:00:00 2001 From: tinder-igorsokolov Date: Mon, 12 Jan 2026 10:57:27 -0500 Subject: [PATCH 5/5] added serialization / deserialization to Xor8, Xor16, XorBinaryFuse8, XorBinaryFuse32 --- .../main/java/org/fastfilter/xor/Xor16.java | 53 ++++++++ .../main/java/org/fastfilter/xor/Xor8.java | 48 +++++++ .../org/fastfilter/xor/XorBinaryFuse32.java | 64 ++++++++- .../org/fastfilter/xor/XorBinaryFuse8.java | 60 ++++++++- .../org/fastfilter/xor/SerializationTest.java | 126 +++++++++++------- 5 files changed, 294 insertions(+), 57 deletions(-) diff --git a/fastfilter/src/main/java/org/fastfilter/xor/Xor16.java b/fastfilter/src/main/java/org/fastfilter/xor/Xor16.java index bca40a6..8cdc4d8 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/Xor16.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/Xor16.java @@ -1,5 +1,7 @@ package org.fastfilter.xor; +import java.nio.ByteBuffer; + import org.fastfilter.Filter; import org.fastfilter.utils.Hash; @@ -143,4 +145,55 @@ private int fingerprint(long hash) { return (int) (hash & ((1 << BITS_PER_FINGERPRINT) - 1)); } + private Xor16(int blockLength, int bitCount, long seed, short[] fingerprints) { + this.blockLength = blockLength; + this.bitCount = bitCount; + this.seed = seed; + this.fingerprints = fingerprints; + } + + @Override + public int getSerializedSize() { + return Integer.BYTES + Long.BYTES + Integer.BYTES + fingerprints.length * Short.BYTES; + } + + @Override + public void serialize(ByteBuffer buffer) { + if (buffer.remaining() < getSerializedSize()) { + throw new IllegalArgumentException("Buffer too small"); + } + + buffer.putInt(blockLength); + buffer.putLong(seed); + buffer.putInt(fingerprints.length); + for (final short fp : fingerprints) { + buffer.putShort(fp); + } + } + + public static Xor16 deserialize(ByteBuffer buffer) { + // Check minimum size for header (1 int + 1 long + 1 int for length) + if (buffer.remaining() < Integer.BYTES + Long.BYTES + Integer.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final int blockLength = buffer.getInt(); + final long seed = buffer.getLong(); + + final int len = buffer.getInt(); + + // Check if buffer has enough bytes for all fingerprints + if (buffer.remaining() < len * Short.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final short[] fingerprints = new short[len]; + for (int i = 0; i < len; i++) { + fingerprints[i] = buffer.getShort(); + } + + final int bitCount = len * BITS_PER_FINGERPRINT; + + return new Xor16(blockLength, bitCount, seed, fingerprints); + } } diff --git a/fastfilter/src/main/java/org/fastfilter/xor/Xor8.java b/fastfilter/src/main/java/org/fastfilter/xor/Xor8.java index 86ac870..bb3b5ff 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/Xor8.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/Xor8.java @@ -1,6 +1,7 @@ package org.fastfilter.xor; import java.io.*; +import java.nio.ByteBuffer; import org.fastfilter.Filter; import org.fastfilter.utils.Hash; @@ -187,4 +188,51 @@ public Xor8(InputStream in) { } } + private Xor8(int size, long seed, byte[] fingerprints) { + this.size = size; + this.arrayLength = getArrayLength(size); + this.bitCount = arrayLength * BITS_PER_FINGERPRINT; + this.blockLength = arrayLength / HASHES; + this.seed = seed; + this.fingerprints = fingerprints; + } + + @Override + public int getSerializedSize() { + return Integer.BYTES + Long.BYTES + Integer.BYTES + fingerprints.length * Byte.BYTES; + } + + @Override + public void serialize(ByteBuffer buffer) { + if (buffer.remaining() < getSerializedSize()) { + throw new IllegalArgumentException("Buffer too small"); + } + + buffer.putInt(size); + buffer.putLong(seed); + buffer.putInt(fingerprints.length); + buffer.put(fingerprints); + } + + public static Xor8 deserialize(ByteBuffer buffer) { + // Check minimum size for header (1 int + 1 long + 1 int for length) + if (buffer.remaining() < Integer.BYTES + Long.BYTES + Integer.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final int size = buffer.getInt(); + final long seed = buffer.getLong(); + + final int len = buffer.getInt(); + + // Check if buffer has enough bytes for all fingerprints + if (buffer.remaining() < len * Byte.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final byte[] fingerprints = new byte[len]; + buffer.get(fingerprints); + + return new Xor8(size, seed, fingerprints); + } } diff --git a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse32.java b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse32.java index d3f6125..760fc3f 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse32.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse32.java @@ -1,5 +1,6 @@ package org.fastfilter.xor; +import java.nio.ByteBuffer; import java.util.Arrays; import org.fastfilter.Filter; @@ -20,19 +21,25 @@ public class XorBinaryFuse32 implements Filter { private final int[] fingerprints; private long seed; - public XorBinaryFuse32(int segmentCount, int segmentLength) { + private XorBinaryFuse32(int segmentCount, int segmentLength, long seed, int[] fingerprints) { if (segmentLength < 0 || Integer.bitCount(segmentLength) != 1) { throw new IllegalArgumentException("Segment length needs to be a power of 2, is " + segmentLength); } if (segmentCount <= 0) { throw new IllegalArgumentException("Illegal segment count: " + segmentCount); } - this.segmentLength = segmentLength; + this.segmentCount = segmentCount; - this.segmentLengthMask = segmentLength - 1; this.segmentCountLength = segmentCount * segmentLength; - this.arrayLength = (segmentCount + ARITY - 1) * segmentLength; - this.fingerprints = new int[arrayLength]; + this.segmentLength = segmentLength; + this.segmentLengthMask = segmentLength - 1; + this.arrayLength = fingerprints.length; + this.fingerprints = fingerprints; + this.seed = seed; + } + + public XorBinaryFuse32(int segmentCount, int segmentLength) { + this(segmentCount, segmentLength, 0L, new int[(segmentCount + ARITY - 1) * segmentLength]); } public long getBitCount() { @@ -261,4 +268,51 @@ private int fingerprint(long hash) { return (int) (hash ^ (hash >>> 32)); } + @Override + public int getSerializedSize() { + return 2 * Integer.BYTES + Long.BYTES + Integer.BYTES + fingerprints.length * Integer.BYTES; + } + + @Override + public void serialize(ByteBuffer buffer) { + if (buffer.remaining() < getSerializedSize()) { + throw new IllegalArgumentException("Buffer too small"); + } + + buffer.putInt(segmentLength); + buffer.putInt(segmentCountLength); + buffer.putLong(seed); + buffer.putInt(fingerprints.length); + for (final int fp : fingerprints) { + buffer.putInt(fp); + } + } + + public static XorBinaryFuse32 deserialize(ByteBuffer buffer) { + // Check minimum size for header (2 ints + 1 long + 1 int for length) + if (buffer.remaining() < 2 * Integer.BYTES + Long.BYTES + Integer.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final int segmentLength = buffer.getInt(); + final int segmentCountLength = buffer.getInt(); + final long seed = buffer.getLong(); + + final int len = buffer.getInt(); + + // Check if buffer has enough bytes for all fingerprints + if (buffer.remaining() < len * Integer.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final int[] fingerprints = new int[len]; + for (int i = 0; i < len; i++) { + fingerprints[i] = buffer.getInt(); + } + + // Calculate segmentCount from segmentCountLength and segmentLength + final int segmentCount = segmentCountLength / segmentLength; + + return new XorBinaryFuse32(segmentCount, segmentLength, seed, fingerprints); + } } diff --git a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse8.java b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse8.java index dfd5f45..ea16611 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse8.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/XorBinaryFuse8.java @@ -1,5 +1,6 @@ package org.fastfilter.xor; +import java.nio.ByteBuffer; import java.util.Arrays; import org.fastfilter.Filter; @@ -20,19 +21,25 @@ public class XorBinaryFuse8 implements Filter { private final byte[] fingerprints; private long seed; - public XorBinaryFuse8(int segmentCount, int segmentLength) { + private XorBinaryFuse8(int segmentCount, int segmentLength, long seed, byte[] fingerprints) { if (segmentLength < 0 || Integer.bitCount(segmentLength) != 1) { throw new IllegalArgumentException("Segment length needs to be a power of 2, is " + segmentLength); } if (segmentCount <= 0) { throw new IllegalArgumentException("Illegal segment count: " + segmentCount); } - this.segmentLength = segmentLength; + this.segmentCount = segmentCount; - this.segmentLengthMask = segmentLength - 1; this.segmentCountLength = segmentCount * segmentLength; - this.arrayLength = (segmentCount + ARITY - 1) * segmentLength; - this.fingerprints = new byte[arrayLength]; + this.segmentLength = segmentLength; + this.segmentLengthMask = segmentLength - 1; + this.arrayLength = fingerprints.length; + this.fingerprints = fingerprints; + this.seed = seed; + } + + public XorBinaryFuse8(int segmentCount, int segmentLength) { + this(segmentCount, segmentLength, 0L, new byte[(segmentCount + ARITY - 1) * segmentLength]); } public long getBitCount() { @@ -261,4 +268,47 @@ private byte fingerprint(long hash) { return (byte) hash; } + @Override + public int getSerializedSize() { + return 2 * Integer.BYTES + Long.BYTES + Integer.BYTES + fingerprints.length * Byte.BYTES; + } + + @Override + public void serialize(ByteBuffer buffer) { + if (buffer.remaining() < getSerializedSize()) { + throw new IllegalArgumentException("Buffer too small"); + } + + buffer.putInt(segmentLength); + buffer.putInt(segmentCountLength); + buffer.putLong(seed); + buffer.putInt(fingerprints.length); + buffer.put(fingerprints); + } + + public static XorBinaryFuse8 deserialize(ByteBuffer buffer) { + // Check minimum size for header (2 ints + 1 long + 1 int for length) + if (buffer.remaining() < 2 * Integer.BYTES + Long.BYTES + Integer.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final int segmentLength = buffer.getInt(); + final int segmentCountLength = buffer.getInt(); + final long seed = buffer.getLong(); + + final int len = buffer.getInt(); + + // Check if buffer has enough bytes for all fingerprints + if (buffer.remaining() < len * Byte.BYTES) { + throw new IllegalArgumentException("Buffer too small"); + } + + final byte[] fingerprints = new byte[len]; + buffer.get(fingerprints); + + // Calculate segmentCount from segmentCountLength and segmentLength + final int segmentCount = segmentCountLength / segmentLength; + + return new XorBinaryFuse8(segmentCount, segmentLength, seed, fingerprints); + } } diff --git a/fastfilter/src/test/java/org/fastfilter/xor/SerializationTest.java b/fastfilter/src/test/java/org/fastfilter/xor/SerializationTest.java index d2113e1..df6a204 100644 --- a/fastfilter/src/test/java/org/fastfilter/xor/SerializationTest.java +++ b/fastfilter/src/test/java/org/fastfilter/xor/SerializationTest.java @@ -1,54 +1,88 @@ package org.fastfilter.xor; -import org.junit.Test; - -import java.nio.ByteBuffer; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.function.Function; +import org.fastfilter.Filter; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) public class SerializationTest { + private final String filterName; + private final Function constructor; + private final Function deserializer; + + public SerializationTest(String filterName, + Function constructor, + Function deserializer) { + this.filterName = filterName; + this.constructor = constructor; + this.deserializer = deserializer; + } + + @Parameters(name = "{0}") + public static List filters() { + return List.of( + new Object[] {"Xor8", (Function) Xor8::construct, + (Function) Xor8::deserialize}, + new Object[] {"Xor16", (Function) Xor16::construct, + (Function) Xor16::deserialize}, + new Object[] {"XorBinaryFuse8", (Function) XorBinaryFuse8::construct, + (Function) XorBinaryFuse8::deserialize}, + new Object[] {"XorBinaryFuse16", (Function) XorBinaryFuse16::construct, + (Function) XorBinaryFuse16::deserialize}, + new Object[] {"XorBinaryFuse32", (Function) XorBinaryFuse32::construct, + (Function) XorBinaryFuse32::deserialize} + ); + } + @Test - public void shouldSerializeAndDeserializeEmptyFilter() { + public void shouldSerializeAndDeserializeSmallFilter() { // Arrange final var keys = new long[]{1L, 2L, 3L, 4L, 5L}; - final var originalFilter = XorBinaryFuse16.construct(keys); + final var originalFilter = constructor.apply(keys); final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); // Act originalFilter.serialize(buffer); buffer.flip(); - final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + final var deserializedFilter = deserializer.apply(buffer); // Assert for (final long key : keys) { - assertTrue("Key " + key + " should be present in deserialized filter", + assertTrue("Key " + key + " should be present in deserialized " + filterName + " filter", deserializedFilter.mayContain(key)); } } @Test - public void shouldSerializeAndDeserializeSmallFilter() { + public void shouldSerializeAndDeserializeMediumFilter() { // Arrange final var keys = new long[]{100L, 200L, 300L, 400L, 500L, 600L, 700L, 800L, 900L, 1000L}; - final var originalFilter = XorBinaryFuse16.construct(keys); + final var originalFilter = constructor.apply(keys); final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); // Act originalFilter.serialize(buffer); buffer.flip(); - final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + final var deserializedFilter = deserializer.apply(buffer); // Assert for (final long key : keys) { - assertTrue("Key " + key + " should be present in deserialized filter", + assertTrue("Key " + key + " should be present in deserialized " + filterName + " filter", deserializedFilter.mayContain(key)); } - assertFalse("Key 50L should not be in filter", deserializedFilter.mayContain(50L)); - assertFalse("Key 1500L should not be in filter", deserializedFilter.mayContain(1500L)); + assertFalse("Key 50L should not be in " + filterName + " filter", deserializedFilter.mayContain(50L)); + assertFalse("Key 1500L should not be in " + filterName + " filter", deserializedFilter.mayContain(1500L)); } @Test @@ -59,18 +93,18 @@ public void shouldSerializeAndDeserializeLargeFilter() { for (int i = 0; i < size; i++) { keys[i] = i * 100L; } - final var originalFilter = XorBinaryFuse16.construct(keys); + final var originalFilter = constructor.apply(keys); final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); // Act originalFilter.serialize(buffer); buffer.flip(); - final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + final var deserializedFilter = deserializer.apply(buffer); // Assert for (int i = 0; i < size; i++) { final long key = i * 100L; - assertTrue("Key " + key + " should be present in deserialized filter", + assertTrue("Key " + key + " should be present in deserialized " + filterName + " filter", deserializedFilter.mayContain(key)); } // Test some keys that should not be in the filter @@ -83,46 +117,44 @@ public void shouldSerializeAndDeserializeLargeFilter() { public void shouldPreserveFilterCharacteristicsAfterDeserialization() { // Arrange final var keys = new long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L}; - final var originalFilter = XorBinaryFuse16.construct(keys); + final var originalFilter = constructor.apply(keys); final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); // Act originalFilter.serialize(buffer); buffer.flip(); - final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + final var deserializedFilter = deserializer.apply(buffer); // Assert - assertEquals("Bit count should be preserved", + assertEquals("Bit count should be preserved for " + filterName, originalFilter.getBitCount(), deserializedFilter.getBitCount()); - assertEquals("Serialized size should be preserved", + assertEquals("Serialized size should be preserved for " + filterName, originalFilter.getSerializedSize(), deserializedFilter.getSerializedSize()); - assertEquals("String representation should match", - originalFilter.toString(), deserializedFilter.toString()); } @Test public void shouldHandleMultipleSerializationRounds() { // Arrange final var keys = new long[]{10L, 20L, 30L, 40L, 50L}; - final var originalFilter = XorBinaryFuse16.construct(keys); + final var originalFilter = constructor.apply(keys); final var buffer1 = ByteBuffer.allocate(originalFilter.getSerializedSize()); // Act - First round originalFilter.serialize(buffer1); buffer1.flip(); - final var filter1 = XorBinaryFuse16.deserialize(buffer1); + final var filter1 = deserializer.apply(buffer1); // Act - Second round final var buffer2 = ByteBuffer.allocate(filter1.getSerializedSize()); filter1.serialize(buffer2); buffer2.flip(); - final var filter2 = XorBinaryFuse16.deserialize(buffer2); + final var filter2 = deserializer.apply(buffer2); // Assert for (final long key : keys) { - assertTrue("Key " + key + " should be present after first deserialization", + assertTrue("Key " + key + " should be present after first deserialization of " + filterName, filter1.mayContain(key)); - assertTrue("Key " + key + " should be present after second deserialization", + assertTrue("Key " + key + " should be present after second deserialization of " + filterName, filter2.mayContain(key)); } } @@ -131,7 +163,7 @@ public void shouldHandleMultipleSerializationRounds() { public void shouldThrowExceptionWhenSerializeBufferTooSmall() { // Arrange final var keys = new long[]{1L, 2L, 3L, 4L, 5L}; - final var filter = XorBinaryFuse16.construct(keys); + final var filter = constructor.apply(keys); final var smallBuffer = ByteBuffer.allocate(filter.getSerializedSize() - 1); // Act & Assert @@ -150,7 +182,7 @@ public void shouldThrowExceptionWhenDeserializeBufferTooSmall() { // Act & Assert try { - XorBinaryFuse16.deserialize(tooSmallBuffer); + deserializer.apply(tooSmallBuffer); fail("Should have thrown IllegalArgumentException for buffer too small"); } catch (IllegalArgumentException e) { assertEquals("Buffer too small", e.getMessage()); @@ -165,20 +197,20 @@ public void shouldHandleFilterWithSequentialKeys() { for (int i = 0; i < size; i++) { keys[i] = i; } - final var originalFilter = XorBinaryFuse16.construct(keys); + final var originalFilter = constructor.apply(keys); final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); // Act originalFilter.serialize(buffer); buffer.flip(); - final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + final var deserializedFilter = deserializer.apply(buffer); // Assert for (int i = 0; i < size; i++) { - assertTrue("Sequential key " + i + " should be present", + assertTrue("Sequential key " + i + " should be present in " + filterName, deserializedFilter.mayContain(i)); } - assertFalse("Key outside range should not be in filter", + assertFalse("Key outside range should not be in " + filterName + " filter", deserializedFilter.mayContain(size + 1000)); } @@ -192,17 +224,17 @@ public void shouldHandleFilterWithRandomLargeKeys() { Long.MAX_VALUE / 2, Long.MAX_VALUE / 3 }; - final var originalFilter = XorBinaryFuse16.construct(keys); + final var originalFilter = constructor.apply(keys); final var buffer = ByteBuffer.allocate(originalFilter.getSerializedSize()); // Act originalFilter.serialize(buffer); buffer.flip(); - final var deserializedFilter = XorBinaryFuse16.deserialize(buffer); + final var deserializedFilter = deserializer.apply(buffer); // Assert for (final long key : keys) { - assertTrue("Large key " + key + " should be present", + assertTrue("Large key " + key + " should be present in " + filterName, deserializedFilter.mayContain(key)); } } @@ -211,7 +243,7 @@ public void shouldHandleFilterWithRandomLargeKeys() { public void shouldCorrectlyCalculateSerializedSize() { // Arrange final var keys = new long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L}; - final var filter = XorBinaryFuse16.construct(keys); + final var filter = constructor.apply(keys); final int expectedSizeInBytes = filter.getSerializedSize(); final var buffer = ByteBuffer.allocate(expectedSizeInBytes); @@ -219,9 +251,9 @@ public void shouldCorrectlyCalculateSerializedSize() { filter.serialize(buffer); // Assert - assertEquals("Buffer position should equal serialized size", + assertEquals("Buffer position should equal serialized size for " + filterName, expectedSizeInBytes, buffer.position()); - assertEquals("Buffer should have no remaining space", + assertEquals("Buffer should have no remaining space for " + filterName, 0, buffer.remaining()); } @@ -229,37 +261,37 @@ public void shouldCorrectlyCalculateSerializedSize() { public void shouldHandleExactBufferSize() { // Arrange final var keys = new long[]{100L, 200L, 300L}; - final var filter = XorBinaryFuse16.construct(keys); + final var filter = constructor.apply(keys); final var exactBuffer = ByteBuffer.allocate(filter.getSerializedSize()); // Act filter.serialize(exactBuffer); exactBuffer.flip(); - final var deserializedFilter = XorBinaryFuse16.deserialize(exactBuffer); + final var deserializedFilter = deserializer.apply(exactBuffer); // Assert for (final long key : keys) { - assertTrue("Key " + key + " should be present with exact buffer", + assertTrue("Key " + key + " should be present with exact buffer in " + filterName, deserializedFilter.mayContain(key)); } - assertEquals("No bytes should remain in buffer", 0, exactBuffer.remaining()); + assertEquals("No bytes should remain in buffer for " + filterName, 0, exactBuffer.remaining()); } @Test public void shouldHandleLargerBufferThanNeeded() { // Arrange final var keys = new long[]{1L, 2L, 3L}; - final var filter = XorBinaryFuse16.construct(keys); + final var filter = constructor.apply(keys); final var largeBuffer = ByteBuffer.allocate(filter.getSerializedSize() + 1000); // Act filter.serialize(largeBuffer); largeBuffer.flip(); - final var deserializedFilter = XorBinaryFuse16.deserialize(largeBuffer); + final var deserializedFilter = deserializer.apply(largeBuffer); // Assert for (final long key : keys) { - assertTrue("Key " + key + " should be present with larger buffer", + assertTrue("Key " + key + " should be present with larger buffer in " + filterName, deserializedFilter.mayContain(key)); } }