From 2a1dbcfd06669b36b2e76629c08f83577b3d05b3 Mon Sep 17 00:00:00 2001 From: "Claude (on behalf of Steven Schlansker)" Date: Fri, 29 May 2026 15:32:53 +0000 Subject: [PATCH] refactor(format): inline custom-codec dispatch in row codecs Replace CustomTypeEncoderRegistry$Gen, a per-registration Janino-generated class whose static encode/decode/newCollection methods the row codec called into, with one static final CustomCodec / CustomCollectionFactory field per registered codec on the row codec class itself. Fields are initialized at class load from CustomTypeEncoderRegistry; encode/decode invoke the cached instance. Per-call cost is one indirect call; the Janino compile-and-define cycle on first registration is gone. CustomTypeEncoderRegistry collapses to two ConcurrentHashMap lookups plus a singleton handler view. findCodec / findCollectionFactory are public so a row codec generated under a child classloader can reach them. Key the collection-factory codegen cache on (container, element) rather than container alone, so two SortedSet-typed sibling fields with different element types dispatch to their own factories. --- .../encoder/BaseBinaryEncoderBuilder.java | 93 +++++-- .../type/CustomTypeEncoderRegistry.java | 254 +++--------------- .../fory/format/encoder/CustomCodecTest.java | 34 +++ 3 files changed, 146 insertions(+), 235 deletions(-) diff --git a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java index 2d214e287e..a0a732e191 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/encoder/BaseBinaryEncoderBuilder.java @@ -69,6 +69,7 @@ import org.apache.fory.format.row.binary.writer.BinaryWriter; import org.apache.fory.format.type.CustomTypeEncoderRegistry; import org.apache.fory.format.type.CustomTypeHandler; +import org.apache.fory.format.type.CustomTypeRegistration; import org.apache.fory.format.type.DataTypes; import org.apache.fory.format.type.Field; import org.apache.fory.format.type.Schema; @@ -768,15 +769,19 @@ protected Expression newCollection(Expression arrayData, TypeRef typeRef) { CustomCollectionFactory customFactory = customTypeHandler.findCollectionFactory(clazz, componentClass.getRawType()); if (customFactory != null) { + Reference factoryRef = + customCollectionFactoryFieldRef(clazz, componentClass.getRawType()); collection = - new StaticInvoke( - customTypeHandler.getClass(), - "newCollection", + new Cast( + new Invoke( + factoryRef, + "newCollection", + "newCollectionObject", + TypeRef.of(Object.class), + false, + size), typeRef, - false, - new Expression.Null(typeRef, true), - new Expression.Null(componentClass, true), - size); + "newCollection"); } else if (TypeRef.of(clazz).isSupertypeOf(TypeRef.of(ArrayList.class))) { collection = new NewInstance(TypeRef.of(ArrayList.class), size); } else if (TypeRef.of(clazz).isSupertypeOf(TypeRef.of(HashSet.class))) { @@ -921,26 +926,70 @@ protected Expression deserializeForObject(Expression value, TypeRef typeRef) return new Invoke(foryRef, "deserialize", typeRef, value); } + // Static fields the row codec calls into. Initialized once at class load from + // CustomTypeEncoderRegistry; encode/decode call sites invoke the cached instance. + private final Map, Reference> customCodecFieldRefs = new HashMap<>(); + private final Map customCollectionFactoryFieldRefs = + new HashMap<>(); + + private Reference customCodecFieldRef(TypeRef fieldType) { + return customCodecFieldRefs.computeIfAbsent( + fieldType, + ft -> { + String name = ctx.newName("_customCodec"); + Expression init = + new StaticInvoke( + CustomTypeEncoderRegistry.class, + "findCodec", + "", + TypeRef.of(CustomCodec.class), + false, + false, + false, + Literal.ofClass(beanType.getRawType()), + Literal.ofClass(ft.getRawType())); + ctx.addField(true, true, ctx.type(CustomCodec.class), name, init); + return new Reference(name, TypeRef.of(CustomCodec.class)); + }); + } + + private Reference customCollectionFactoryFieldRef( + Class collectionType, Class elementType) { + return customCollectionFactoryFieldRefs.computeIfAbsent( + new CustomTypeRegistration(collectionType, elementType), + reg -> { + String name = ctx.newName("_customCollectionFactory"); + Expression init = + new StaticInvoke( + CustomTypeEncoderRegistry.class, + "findCollectionFactory", + "", + TypeRef.of(CustomCollectionFactory.class), + false, + false, + false, + Literal.ofClass(reg.getBeanType()), + Literal.ofClass(reg.getFieldType())); + ctx.addField(true, true, ctx.type(CustomCollectionFactory.class), name, init); + return new Reference(name, TypeRef.of(CustomCollectionFactory.class)); + }); + } + protected Expression customEncode(Expression inputObject, TypeRef rewrittenType) { - return new Expression.StaticInvoke( - customTypeHandler.getClass(), - "encode", - "rewrittenValue", + Reference codecRef = customCodecFieldRef(inputObject.type()); + return new Cast( + new Invoke( + codecRef, "encode", "rewrittenObject", TypeRef.of(Object.class), true, inputObject), rewrittenType, - true, - new Expression.Null(beanType, true), - inputObject); + "rewrittenValue"); } protected Expression customDecode(TypeRef typeRef, final Expression deserializedValue) { - return new Expression.StaticInvoke( - customTypeHandler.getClass(), - "decode", - "decodedValue", + Reference codecRef = customCodecFieldRef(typeRef); + return new Cast( + new Invoke( + codecRef, "decode", "decodedObject", TypeRef.of(Object.class), true, deserializedValue), typeRef, - true, - new Expression.Null(beanType, true), - new Expression.Null(typeRef, true), - deserializedValue); + "decodedValue"); } } diff --git a/java/fory-format/src/main/java/org/apache/fory/format/type/CustomTypeEncoderRegistry.java b/java/fory-format/src/main/java/org/apache/fory/format/type/CustomTypeEncoderRegistry.java index b952087970..50a3ee6846 100644 --- a/java/fory-format/src/main/java/org/apache/fory/format/type/CustomTypeEncoderRegistry.java +++ b/java/fory-format/src/main/java/org/apache/fory/format/type/CustomTypeEncoderRegistry.java @@ -19,246 +19,74 @@ package org.apache.fory.format.type; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.Map; -import java.util.function.BiConsumer; +import java.util.concurrent.ConcurrentHashMap; import org.apache.fory.annotation.Internal; -import org.apache.fory.codegen.CodegenContext; -import org.apache.fory.codegen.CompileUnit; -import org.apache.fory.codegen.Expression; -import org.apache.fory.codegen.Expression.Reference; -import org.apache.fory.codegen.JaninoUtils; import org.apache.fory.format.encoder.CustomCodec; import org.apache.fory.format.encoder.CustomCollectionFactory; -import org.apache.fory.reflect.TypeRef; -import org.apache.fory.util.unsafe.DefineClass; /** - * Keep a registry of custom codecs and collection factories. In order to deliver peak performance, - * the configuration is generated into a class with static methods to avoid the overhead of virtual - * dispatch. The method signatures match the requested bean / field or collection / element types. - * Then, we dispatch to the user code via a {@code static final} field. This should allow the JIT to - * easily inline the calls. + * Registry of custom codecs and collection factories. Generated row codecs cache the registered + * instance in a {@code static final} field, so each encode/decode call is a direct invocation. */ @Internal public class CustomTypeEncoderRegistry { - static final Map> REGISTRY = new HashMap<>(); + static final Map> REGISTRY = new ConcurrentHashMap<>(); static final Map> COLLECTION_REGISTRY = - new HashMap<>(); - private static int generation = 0; - private static CustomTypeHandler generatedHandler = CustomTypeHandler.EMPTY; + new ConcurrentHashMap<>(); - static synchronized void registerCustomCodec( + private static final CustomTypeHandler HANDLER = new RegistryHandler(); + + static void registerCustomCodec( final CustomTypeRegistration registration, final CustomCodec encoder) { REGISTRY.put(registration, encoder); - generatedHandler = null; } - static synchronized void registerCustomCollection( + static void registerCustomCollection( final Class iterableType, final Class elementType, final CustomCollectionFactory decoder) { COLLECTION_REGISTRY.put(new CustomTypeRegistration(iterableType, elementType), decoder); - generatedHandler = null; } - public static synchronized CustomTypeHandler customTypeHandler() { - if (generatedHandler != null) { - return generatedHandler; - } - generation++; - genCode(); - return generatedHandler; + /** Handler view used by codegen for type discovery; not on the runtime dispatch path. */ + public static CustomTypeHandler customTypeHandler() { + return HANDLER; } - private static void genCode() { - final String pkg = CustomTypeEncoderRegistry.class.getPackage().getName(); - final String className = "CustomTypeEncoderRegistry$Gen" + generation; - final CodegenContext ctx = new CodegenContext(pkg, new HashSet<>(), new LinkedHashSet<>()); - ctx.setClassName(className); - ctx.implementsInterfaces(CustomTypeHandler.class.getName()); - - // Copy mutable state so the generated class is immutable - ctx.addField( - true, - true, - "java.util.Map>", - "REGISTRY", - new Expression.NewInstance( - TypeRef.of(HashMap.class), - Reference.fieldRef("CustomTypeEncoderRegistry.REGISTRY", TypeRef.of(Map.class)))); - ctx.addField( - true, - true, - "java.util.Map>", - "COLLECTION_REGISTRY", - new Expression.NewInstance( - TypeRef.of(HashMap.class), - Reference.fieldRef( - "CustomTypeEncoderRegistry.COLLECTION_REGISTRY", TypeRef.of(Map.class)))); - // Dynamic lookup, not used by generated row codec - ctx.addMethod( - "public", - "findCodec", - "return CustomTypeEncoderRegistry.findCodec(REGISTRY, beanType, fieldType);", - CustomCodec.class, - Class.class, - "beanType", - Class.class, - "fieldType"); - ctx.addMethod( - "public", - "findCollectionFactory", - "return CustomTypeEncoderRegistry.findCollectionFactory(COLLECTION_REGISTRY, containerType, elementType);", - CustomCollectionFactory.class, - Class.class, - "containerType", - Class.class, - "elementType"); - // Static dispatch table for custom codecs - REGISTRY.forEach( - new BiConsumer>() { - int codecCount = 0; - - @Override - public void accept(final CustomTypeRegistration reg, final CustomCodec enc) { - codecCount++; - final String codecFieldName = - "CODEC_" - + reg.getBeanType().getSimpleName() - + "_" - + reg.getFieldType().getSimpleName() - + "_" - + codecCount; - ctx.addStaticMethod( - "encode", - "return (" - + ctx.type(enc.encodedType()) - + ")" - + codecFieldName - + ".encode(fieldValue);", - enc.encodedType().getRawType(), - reg.getBeanType(), - "bean", - reg.getFieldType(), - "fieldValue"); - ctx.addStaticMethod( - "decode", - "return (" - + reg.getFieldType().getName() - + ")" - + codecFieldName - + ".decode(encodedValue);", - reg.getFieldType(), - reg.getBeanType(), - "bean", - reg.getFieldType(), - "fieldNull", - enc.encodedType().getRawType(), - "encodedValue"); - ctx.addField( - true, - true, - CustomCodec.class.getName(), - codecFieldName, - Expression.Invoke.inlineInvoke( - new Expression.Reference("CustomTypeEncoderRegistry"), - "findCodec", - TypeRef.of(CustomCodec.class), - false, - new Expression.Reference("REGISTRY", TypeRef.of(Map.class)), - Expression.Literal.ofClass(reg.getBeanType()), - Expression.Literal.ofClass(reg.getFieldType()))); - } - }); - - // Static dispatch table for custom collection factories - COLLECTION_REGISTRY.forEach( - new BiConsumer>() { - int factoryCount = 0; - - @Override - public void accept( - final CustomTypeRegistration reg, final CustomCollectionFactory dec) { - factoryCount++; - final String factoryFieldName = - "FACTORY_" - + reg.getBeanType().getSimpleName() - + "_" - + reg.getFieldType().getSimpleName() - + "_" - + factoryCount; - ctx.addStaticMethod( - "newCollection", - "return (" - + ctx.type(reg.getBeanType()) - + ")" - + factoryFieldName - + ".newCollection(numElements);", - reg.getBeanType(), - reg.getBeanType(), - "collectionNull", - reg.getFieldType(), - "elementNull", - int.class, - "numElements"); - ctx.addField( - true, - true, - ctx.type(CustomCollectionFactory.class), - factoryFieldName, - Expression.Invoke.inlineInvoke( - new Expression.Reference("CustomTypeEncoderRegistry"), - "findCollectionFactory", - TypeRef.of(CustomCollectionFactory.class), - false, - new Expression.Reference("COLLECTION_REGISTRY", TypeRef.of(Map.class)), - Expression.Literal.ofClass(reg.getBeanType()), - Expression.Literal.ofClass(reg.getFieldType()))); - } - }); - - final CompileUnit compileUnit = new CompileUnit(pkg, className, ctx.genCode()); - final Map classByteCodes = - JaninoUtils.toBytecode(CustomTypeEncoderRegistry.class.getClassLoader(), compileUnit); - final byte[] code = classByteCodes.values().iterator().next(); - try { - generatedHandler = - (CustomTypeHandler) - DefineClass.defineClass( - compileUnit.getQualifiedClassName(), - CustomTypeEncoderRegistry.class, - CustomTypeEncoderRegistry.class.getClassLoader(), - CustomTypeEncoderRegistry.class.getProtectionDomain(), - code) - .getConstructor() - .newInstance(); - } catch (final Exception e) { - if (e instanceof RuntimeException) { - throw (RuntimeException) e; - } else { - throw new RuntimeException(e); - } - } - } - - static CustomCodec findCodec( - final Map> registry, - final Class beanType, - final Class fieldType) { - CustomCodec result = registry.get(new CustomTypeRegistration(beanType, fieldType)); + /** + * Look up a codec for {@code fieldType}, preferring a {@code beanType}-scoped registration and + * falling back to one registered against {@code Object.class}. Public so generated row codecs + * can invoke it across classloaders. + */ + public static CustomCodec findCodec(Class beanType, Class fieldType) { + CustomCodec result = REGISTRY.get(new CustomTypeRegistration(beanType, fieldType)); if (result == null) { - result = registry.get(new CustomTypeRegistration(Object.class, fieldType)); + result = REGISTRY.get(new CustomTypeRegistration(Object.class, fieldType)); } return result; } - static CustomCollectionFactory findCollectionFactory( - final Map> registry, - final Class containerType, - final Class elementType) { - return registry.get(new CustomTypeRegistration(containerType, elementType)); + /** + * Look up a collection factory by container and element type. Public for the same reason as + * {@link #findCodec}. + */ + public static CustomCollectionFactory findCollectionFactory( + Class containerType, Class elementType) { + return COLLECTION_REGISTRY.get(new CustomTypeRegistration(containerType, elementType)); + } + + private static final class RegistryHandler implements CustomTypeHandler { + @Override + @SuppressWarnings({"rawtypes", "unchecked"}) + public CustomCodec findCodec(Class beanType, Class fieldType) { + return (CustomCodec) CustomTypeEncoderRegistry.findCodec(beanType, fieldType); + } + + @Override + public CustomCollectionFactory findCollectionFactory( + Class containerType, Class elementType) { + return CustomTypeEncoderRegistry.findCollectionFactory(containerType, elementType); + } } } diff --git a/java/fory-format/src/test/java/org/apache/fory/format/encoder/CustomCodecTest.java b/java/fory-format/src/test/java/org/apache/fory/format/encoder/CustomCodecTest.java index 7f8e6d3c11..7ea0d0d36a 100644 --- a/java/fory-format/src/test/java/org/apache/fory/format/encoder/CustomCodecTest.java +++ b/java/fory-format/src/test/java/org/apache/fory/format/encoder/CustomCodecTest.java @@ -49,6 +49,8 @@ public class CustomCodecTest { Encoders.registerCustomCodec(InterceptedType.class, new InterceptedTypeEncoder()); Encoders.registerCustomCollectionFactory( SortedSet.class, UUID.class, new SortedSetOfUuidDecoder()); + Encoders.registerCustomCollectionFactory( + SortedSet.class, Long.class, new SortedSetOfLongDecoder()); } @Data @@ -97,6 +99,14 @@ public static class UuidType { public UuidType() {} } + @Data + public static class TwoSortedSetsType { + public SortedSet uuids; + public SortedSet longs; + + public TwoSortedSetsType() {} + } + @Test public void testCustomTypes() { final CustomType bean = new CustomType(); @@ -138,6 +148,23 @@ public void testUuidFields() { Assert.assertEquals(deserializedBean.f3.comparator(), UnsignedUuidComparator.INSTANCE); } + // Sibling SortedSet fields with different element types must each dispatch + // to their own factory; the codegen cache must key on (container, element). + @Test + public void testSiblingSortedSetsDifferentElementTypes() { + final TwoSortedSetsType bean = new TwoSortedSetsType(); + bean.uuids = new TreeSet<>(Arrays.asList(new UUID(7, 8), new UUID(9, 10))); + bean.longs = new TreeSet<>(Arrays.asList(1L, 2L, 3L)); + final RowEncoder encoder = Encoders.bean(TwoSortedSetsType.class); + final BinaryRow row = encoder.toRow(bean); + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final TwoSortedSetsType deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean, bean); + Assert.assertEquals(deserializedBean.uuids.comparator(), UnsignedUuidComparator.INSTANCE); + Assert.assertEquals(deserializedBean.longs.comparator(), Comparator.reverseOrder()); + } + static class ZoneIdEncoder implements CustomCodec { @Override public Field getForyField(final String fieldName) { @@ -223,6 +250,13 @@ public SortedSet newCollection(final int size) { } } + static class SortedSetOfLongDecoder implements CustomCollectionFactory> { + @Override + public SortedSet newCollection(final int size) { + return new TreeSet<>(Comparator.reverseOrder()); + } + } + private enum UnsignedUuidComparator implements Comparator { INSTANCE;