-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Optimizing Variant read path with lazy caching #3481
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
nssalian
wants to merge
1
commit into
apache:master
Choose a base branch
from
nssalian:variant-read-changes
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+140
−72
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,6 +36,26 @@ public final class Variant { | |
| */ | ||
| final ByteBuffer metadata; | ||
|
|
||
| /** | ||
| * Pre-computed metadata dictionary size | ||
| */ | ||
| private final int dictSize; | ||
|
|
||
| /** | ||
| * Lazy cache for metadata dictionary strings. | ||
| */ | ||
| private volatile String[] metadataCache; | ||
|
|
||
| /** | ||
| * Lazy cache for the parsed object header. | ||
| */ | ||
| private volatile VariantUtil.ObjectInfo cachedObjectInfo; | ||
|
|
||
| /** | ||
| * Lazy cache for the parsed array header. | ||
| */ | ||
| private volatile VariantUtil.ArrayInfo cachedArrayInfo; | ||
|
|
||
| /** | ||
| * The threshold to switch from linear search to binary search when looking up a field by key in | ||
| * an object. This is a performance optimization to avoid the overhead of binary search for a | ||
|
|
@@ -67,6 +87,26 @@ public Variant(ByteBuffer value, ByteBuffer metadata) { | |
| "Unsupported variant metadata version: %d", | ||
| metadata.get(metadata.position()) & VariantUtil.VERSION_MASK)); | ||
| } | ||
|
|
||
| // Pre-compute dictionary size for lazy metadata cache allocation. | ||
| int pos = this.metadata.position(); | ||
| int metaOffsetSize = ((this.metadata.get(pos) >> 6) & 0x3) + 1; | ||
| if (this.metadata.remaining() > 1) { | ||
| this.dictSize = VariantUtil.readUnsigned(this.metadata, pos + 1, metaOffsetSize); | ||
| } else { | ||
| this.dictSize = 0; | ||
|
nssalian marked this conversation as resolved.
|
||
| } | ||
| this.metadataCache = null; | ||
| } | ||
|
|
||
| /** | ||
| * Package-private constructor that shares pre-parsed metadata state from a parent Variant. | ||
| */ | ||
| Variant(ByteBuffer value, ByteBuffer metadata, String[] metadataCache, int dictSize) { | ||
| this.value = value.asReadOnlyBuffer(); | ||
| this.metadata = metadata.asReadOnlyBuffer(); | ||
| this.metadataCache = metadataCache; | ||
| this.dictSize = dictSize; | ||
| } | ||
|
|
||
| public ByteBuffer getValueBuffer() { | ||
|
|
@@ -194,7 +234,7 @@ public Type getType() { | |
| * @throws IllegalArgumentException if `getType()` does not return `Type.OBJECT` | ||
| */ | ||
| public int numObjectElements() { | ||
| return VariantUtil.getObjectInfo(value).numElements; | ||
| return objectInfo().numElements; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -206,22 +246,18 @@ public int numObjectElements() { | |
| * @throws IllegalArgumentException if `getType()` does not return `Type.OBJECT` | ||
| */ | ||
| public Variant getFieldByKey(String key) { | ||
| VariantUtil.ObjectInfo info = VariantUtil.getObjectInfo(value); | ||
| // Use linear search for a short list. Switch to binary search when the length reaches | ||
| // `BINARY_SEARCH_THRESHOLD`. | ||
| VariantUtil.ObjectInfo info = objectInfo(); | ||
| int idStart = value.position() + info.idStartOffset; | ||
| int offsetStart = value.position() + info.offsetStartOffset; | ||
| int dataStart = value.position() + info.dataStartOffset; | ||
|
|
||
| if (info.numElements < BINARY_SEARCH_THRESHOLD) { | ||
| for (int i = 0; i < info.numElements; ++i) { | ||
| ObjectField field = getFieldAtIndex( | ||
| i, | ||
| value, | ||
| metadata, | ||
| info.idSize, | ||
| info.offsetSize, | ||
| value.position() + info.idStartOffset, | ||
| value.position() + info.offsetStartOffset, | ||
| value.position() + info.dataStartOffset); | ||
| if (field.key.equals(key)) { | ||
| return field.value; | ||
| int id = VariantUtil.readUnsigned(value, idStart + info.idSize * i, info.idSize); | ||
|
nssalian marked this conversation as resolved.
|
||
| String fieldKey = getMetadataKeyCached(id); | ||
| if (fieldKey.equals(key)) { | ||
| int offset = VariantUtil.readUnsigned(value, offsetStart + info.offsetSize * i, info.offsetSize); | ||
| return childVariant(VariantUtil.slice(value, dataStart + offset)); | ||
| } | ||
| } | ||
| } else { | ||
|
|
@@ -232,22 +268,16 @@ public Variant getFieldByKey(String key) { | |
| // performance optimization, because it can properly handle the case where `low + high` | ||
| // overflows int. | ||
| int mid = (low + high) >>> 1; | ||
| ObjectField field = getFieldAtIndex( | ||
| mid, | ||
| value, | ||
| metadata, | ||
| info.idSize, | ||
| info.offsetSize, | ||
| value.position() + info.idStartOffset, | ||
| value.position() + info.offsetStartOffset, | ||
| value.position() + info.dataStartOffset); | ||
| int cmp = field.key.compareTo(key); | ||
| int midId = VariantUtil.readUnsigned(value, idStart + info.idSize * mid, info.idSize); | ||
| String midKey = getMetadataKeyCached(midId); | ||
| int cmp = midKey.compareTo(key); | ||
| if (cmp < 0) { | ||
| low = mid + 1; | ||
| } else if (cmp > 0) { | ||
| high = mid - 1; | ||
| } else { | ||
| return field.value; | ||
| int offset = VariantUtil.readUnsigned(value, offsetStart + info.offsetSize * mid, info.offsetSize); | ||
| return childVariant(VariantUtil.slice(value, dataStart + offset)); | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -275,35 +305,14 @@ public ObjectField(String key, Variant value) { | |
| * @throws IllegalArgumentException if `getType()` does not return `Type.OBJECT` | ||
| */ | ||
| public ObjectField getFieldAtIndex(int idx) { | ||
| VariantUtil.ObjectInfo info = VariantUtil.getObjectInfo(value); | ||
| // Use linear search for a short list. Switch to binary search when the length reaches | ||
| // `BINARY_SEARCH_THRESHOLD`. | ||
| ObjectField field = getFieldAtIndex( | ||
| idx, | ||
| value, | ||
| metadata, | ||
| info.idSize, | ||
| info.offsetSize, | ||
| value.position() + info.idStartOffset, | ||
| value.position() + info.offsetStartOffset, | ||
| value.position() + info.dataStartOffset); | ||
| return field; | ||
| } | ||
|
|
||
| static ObjectField getFieldAtIndex( | ||
| int index, | ||
| ByteBuffer value, | ||
| ByteBuffer metadata, | ||
| int idSize, | ||
| int offsetSize, | ||
| int idStart, | ||
| int offsetStart, | ||
| int dataStart) { | ||
| // idStart, offsetStart, and dataStart are absolute positions in the `value` buffer. | ||
| int id = VariantUtil.readUnsigned(value, idStart + idSize * index, idSize); | ||
| int offset = VariantUtil.readUnsigned(value, offsetStart + offsetSize * index, offsetSize); | ||
| String key = VariantUtil.getMetadataKey(metadata, id); | ||
| Variant v = new Variant(VariantUtil.slice(value, dataStart + offset), metadata); | ||
| VariantUtil.ObjectInfo info = objectInfo(); | ||
| int idStart = value.position() + info.idStartOffset; | ||
| int offsetStart = value.position() + info.offsetStartOffset; | ||
| int dataStart = value.position() + info.dataStartOffset; | ||
| int id = VariantUtil.readUnsigned(value, idStart + info.idSize * idx, info.idSize); | ||
| int offset = VariantUtil.readUnsigned(value, offsetStart + info.offsetSize * idx, info.offsetSize); | ||
| String key = getMetadataKeyCached(id); | ||
| Variant v = childVariant(VariantUtil.slice(value, dataStart + offset)); | ||
| return new ObjectField(key, v); | ||
| } | ||
|
|
||
|
|
@@ -312,7 +321,7 @@ static ObjectField getFieldAtIndex( | |
| * @throws IllegalArgumentException if `getType()` does not return `Type.ARRAY` | ||
| */ | ||
| public int numArrayElements() { | ||
| return VariantUtil.getArrayInfo(value).numElements; | ||
| return arrayInfo().numElements; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -324,23 +333,64 @@ public int numArrayElements() { | |
| * @throws IllegalArgumentException if `getType()` does not return `Type.ARRAY` | ||
| */ | ||
| public Variant getElementAtIndex(int index) { | ||
| VariantUtil.ArrayInfo info = VariantUtil.getArrayInfo(value); | ||
| VariantUtil.ArrayInfo info = arrayInfo(); | ||
| if (index < 0 || index >= info.numElements) { | ||
| return null; | ||
| } | ||
| return getElementAtIndex( | ||
| index, | ||
| value, | ||
| metadata, | ||
| info.offsetSize, | ||
| value.position() + info.offsetStartOffset, | ||
| value.position() + info.dataStartOffset); | ||
| int offsetStart = value.position() + info.offsetStartOffset; | ||
| int dataStart = value.position() + info.dataStartOffset; | ||
| int offset = VariantUtil.readUnsigned(value, offsetStart + info.offsetSize * index, info.offsetSize); | ||
| return childVariant(VariantUtil.slice(value, dataStart + offset)); | ||
| } | ||
|
|
||
| /** | ||
| * Creates a child Variant that shares this instance's metadata cache. | ||
| */ | ||
| private Variant childVariant(ByteBuffer childValue) { | ||
| return new Variant(childValue, metadata, metadataCache, dictSize); | ||
| } | ||
|
|
||
| private static Variant getElementAtIndex( | ||
| int index, ByteBuffer value, ByteBuffer metadata, int offsetSize, int offsetStart, int dataStart) { | ||
| // offsetStart and dataStart are absolute positions in the `value` buffer. | ||
| int offset = VariantUtil.readUnsigned(value, offsetStart + offsetSize * index, offsetSize); | ||
| return new Variant(VariantUtil.slice(value, dataStart + offset), metadata); | ||
| /** | ||
| * Returns the metadata dictionary string for the given ID, caching the result. | ||
| */ | ||
| String getMetadataKeyCached(int id) { | ||
| // Fall back to uncached lookup for out-of-range IDs | ||
| if (id < 0 || id >= dictSize) { | ||
|
nssalian marked this conversation as resolved.
|
||
| return VariantUtil.getMetadataKey(metadata, id); | ||
| } | ||
| // Demand-create shared dictionary cache | ||
| String[] cache = metadataCache; | ||
| if (cache == null) { | ||
| cache = new String[dictSize]; | ||
| metadataCache = cache; | ||
| } | ||
| if (cache[id] == null) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set cache to be metadataCache here, so if there is any race condition the same cache array is updated. If two threads are writing to the same [id], well, one lookup is wasted. The joint cache is still (probably) updated with each value |
||
| cache[id] = VariantUtil.getMetadataKey(metadata, id); | ||
| } | ||
| return cache[id]; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the cached object header, parsing it on first access. | ||
| */ | ||
| private VariantUtil.ObjectInfo objectInfo() { | ||
| VariantUtil.ObjectInfo info = cachedObjectInfo; | ||
| if (info == null) { | ||
| info = VariantUtil.getObjectInfo(value); | ||
| cachedObjectInfo = info; | ||
| } | ||
| return info; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the cached array header, parsing it on first access. | ||
| */ | ||
| private VariantUtil.ArrayInfo arrayInfo() { | ||
| VariantUtil.ArrayInfo info = cachedArrayInfo; | ||
| if (info == null) { | ||
| info = VariantUtil.getArrayInfo(value); | ||
| cachedArrayInfo = info; | ||
| } | ||
| return info; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit. add a "." at the end as some java versions' javadocs really blow up if you don't.