Skip to content

Commit a3123ec

Browse files
committed
AVRO-3527: codegen equals and hashCode for Records
Update the compiler to generate the implementation of the `.equals()` and `.hashCode() function, instead of relying on the implementation of GenericData. This improves the performance of those functions significantly. The generated implementations are factor 10 to 20 faster for `.equals()` and a factor 5 to 10 for `.hashCode()`. Result of Perf test before the change: ``` Benchmark Mode Cnt Score Error Units SpecficTest.equals thrpt 3 12598610.194 +/- 11160265.279 ops/s SpecficTest.hashCode thrpt 3 24729446.862 +/- 29051332.794 ops/s ``` Results using generated functions: ``` Benchmark Mode Cnt Score Error Units SpecficTest.equals thrpt 3 211314296.950 +/- 104154793.126 ops/s SpecficTest.hashCode thrpt 3 180349506.632 +/- 143639246.771 ops/s ``` Signed-off-by: Steven Aerts <steven.aerts@gmail.com>
1 parent 80cebcf commit a3123ec

File tree

14 files changed

+423
-4
lines changed

14 files changed

+423
-4
lines changed

lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,9 +1314,9 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) {
13141314
case NULL:
13151315
return 0;
13161316
case STRING:
1317-
Utf8 u1 = o1 instanceof Utf8 ? (Utf8) o1 : new Utf8(o1.toString());
1318-
Utf8 u2 = o2 instanceof Utf8 ? (Utf8) o2 : new Utf8(o2.toString());
1319-
return u1.compareTo(u2);
1317+
CharSequence cs1 = o1 instanceof CharSequence ? (CharSequence) o1 : o1.toString();
1318+
CharSequence cs2 = o2 instanceof CharSequence ? (CharSequence) o2 : o2.toString();
1319+
return Utf8.compareSequences(cs1, cs2);
13201320
default:
13211321
return ((Comparable) o1).compareTo(o2);
13221322
}

lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,4 +230,28 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundExcept
230230
setByteLength(in.readInt());
231231
in.readFully(bytes);
232232
}
233+
234+
public static int compareSequences(CharSequence cs1, CharSequence cs2) {
235+
if (cs1 == cs2) {
236+
return 0;
237+
}
238+
239+
if (cs1 == null || cs2 == null) {
240+
return cs1 == null ? 1 : -1;
241+
}
242+
243+
if (cs1.getClass() == cs2.getClass() && cs1 instanceof Comparable) {
244+
return ((Comparable<Object>) cs1).compareTo(cs2);
245+
}
246+
247+
for (int i = 0, len = Math.min(cs1.length(), cs2.length()); i < len; i++) {
248+
char a = cs1.charAt(i);
249+
char b = cs2.charAt(i);
250+
if (a != b) {
251+
return a - b;
252+
}
253+
}
254+
255+
return cs1.length() - cs2.length();
256+
}
233257
}

lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,6 +1166,46 @@ public static String mangle(String word, Set<String> reservedWords, boolean isMe
11661166
return SpecificData.mangle(word, reservedWords, isMethod);
11671167
}
11681168

1169+
public boolean canGenerateEqualsAndHashCode(Schema schema) {
1170+
return getUsedCustomLogicalTypeFactories(schema).isEmpty();
1171+
}
1172+
1173+
public boolean isPrimitiveType(Schema schema) {
1174+
return !isUnboxedJavaTypeNullable(schema) && getConvertedLogicalType(schema) == null;
1175+
}
1176+
1177+
public String hashCodeFor(Schema schema, String name) {
1178+
switch (javaUnbox(schema, false)) {
1179+
case "int":
1180+
return "Integer.hashCode(" + name + ")";
1181+
case "long":
1182+
return "Long.hashCode(" + name + ")";
1183+
case "float":
1184+
return "Float.hashCode(" + name + ")";
1185+
case "double":
1186+
return "Double.hashCode(" + name + ")";
1187+
case "boolean":
1188+
return "Boolean.hashCode(" + name + ")";
1189+
default:
1190+
// Hashcode of Union is expected to match ordinal
1191+
if (schema.getType() == Schema.Type.ENUM || ((schema.getType() == Schema.Type.UNION)
1192+
&& (schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.ENUM)))) {
1193+
if (schema.getType() == Schema.Type.ENUM
1194+
|| (schema.getTypes().size() == 2 && schema.getTypes().contains(NULL_SCHEMA))) {
1195+
return "(" + name + " == null ? 0 : ((java.lang.Enum) " + name + ").ordinal())";
1196+
} else {
1197+
return "(" + name + " == null ? 0 : " + name + " instanceof java.lang.Enum ? ((java.lang.Enum) " + name
1198+
+ ").ordinal() : " + name + ".hashCode())";
1199+
}
1200+
}
1201+
return "(" + name + " == null ? 0 : " + name + ".hashCode())";
1202+
}
1203+
}
1204+
1205+
public boolean ignoredField(Field field) {
1206+
return field.order() == Field.Order.IGNORE;
1207+
}
1208+
11691209
/**
11701210
* Utility for use by templates. Return schema fingerprint as a long.
11711211
*/

lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,47 @@ public class ${this.mangleTypeIdentifier($schema.getName())} extends ${this.getS
616616
}
617617
}
618618
#end
619+
#if ($this.canGenerateEqualsAndHashCode($schema))
620+
621+
@Override
622+
public int hashCode() {
623+
int result = 1;
624+
#foreach ($field in $schema.getFields())
625+
#if (!${this.ignoredField($field)})
626+
#set ($n = ${this.mangle($field.name(), $schema.isError())})
627+
result = 31 * result + ${this.hashCodeFor($field.schema(), $n)};
628+
#end
629+
#end
630+
return result;
631+
}
632+
633+
@Override
634+
public boolean equals(Object o) {
635+
if (this == o) {
636+
return true;
637+
}
638+
if (!(o instanceof ${this.mangleTypeIdentifier($schema.getName())})) {
639+
return false;
640+
}
641+
${this.mangleTypeIdentifier($schema.getName())} other = (${this.mangleTypeIdentifier($schema.getName())}) o;
642+
#foreach ($field in $schema.getFields())
643+
#if (!${this.ignoredField($field)})
644+
#set ($n = ${this.mangle($field.name(), $schema.isError())})
645+
#set ($s = $field.schema())
646+
#if (${this.isPrimitiveType($s)})
647+
if (this.$n != other.$n) {
648+
#elseif (${this.javaType($field.schema()).equals("java.lang.CharSequence")})
649+
if (Utf8.compareSequences(this.$n, other.$n) != 0) {
650+
#else
651+
if (!java.util.Objects.equals(this.$n, other.$n)) {
652+
#end
653+
return false;
654+
}
655+
#end
656+
#end
657+
return true;
658+
}
659+
#end
619660
}
620661

621662
#macro( encodeVar $indent $var $s )

lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/FieldTest.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
@org.apache.avro.specific.AvroGenerated
1616
public class FieldTest extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
1717
private static final long serialVersionUID = 4609235620572341636L;
18+
19+
1820
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"FieldTest\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"Test various field types\",\"fields\":[{\"name\":\"number\",\"type\":\"int\",\"doc\":\"The number of the player\"},{\"name\":\"last_name\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"timestamp\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},{\"name\":\"timestampMicros\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}},{\"name\":\"timeMillis\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}},{\"name\":\"timeMicros\",\"type\":{\"type\":\"long\",\"logicalType\":\"time-micros\"}}]}");
1921
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
2022

@@ -659,6 +661,48 @@ public FieldTest build() {
659661
READER$.read(this, SpecificData.getDecoder(in));
660662
}
661663

664+
665+
@Override
666+
public int hashCode() {
667+
int result = 1;
668+
result = 31 * result + Integer.hashCode(number);
669+
result = 31 * result + (last_name == null ? 0 : last_name.hashCode());
670+
result = 31 * result + (timestamp == null ? 0 : timestamp.hashCode());
671+
result = 31 * result + (timestampMicros == null ? 0 : timestampMicros.hashCode());
672+
result = 31 * result + (timeMillis == null ? 0 : timeMillis.hashCode());
673+
result = 31 * result + (timeMicros == null ? 0 : timeMicros.hashCode());
674+
return result;
675+
}
676+
677+
@Override
678+
public boolean equals(Object o) {
679+
if (this == o) {
680+
return true;
681+
}
682+
if (!(o instanceof FieldTest)) {
683+
return false;
684+
}
685+
FieldTest other = (FieldTest) o;
686+
if (this.number != other.number) {
687+
return false;
688+
}
689+
if (!java.util.Objects.equals(this.last_name, other.last_name)) {
690+
return false;
691+
}
692+
if (!java.util.Objects.equals(this.timestamp, other.timestamp)) {
693+
return false;
694+
}
695+
if (!java.util.Objects.equals(this.timestampMicros, other.timestampMicros)) {
696+
return false;
697+
}
698+
if (!java.util.Objects.equals(this.timeMillis, other.timeMillis)) {
699+
return false;
700+
}
701+
if (!java.util.Objects.equals(this.timeMicros, other.timeMicros)) {
702+
return false;
703+
}
704+
return true;
705+
}
662706
}
663707

664708

lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/JSpecifyNullSafeAnnotationsFieldsTest.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,40 @@ public JSpecifyNullSafeAnnotationsFieldsTest build() {
581581
}
582582
}
583583
}
584+
585+
@Override
586+
public int hashCode() {
587+
int result = 1;
588+
result = 31 * result + (name == null ? 0 : name.hashCode());
589+
result = 31 * result + (nullable_name == null ? 0 : nullable_name.hashCode());
590+
result = 31 * result + Integer.hashCode(favorite_number);
591+
result = 31 * result + (nullable_favorite_number == null ? 0 : nullable_favorite_number.hashCode());
592+
return result;
593+
}
594+
595+
@Override
596+
public boolean equals(Object o) {
597+
if (this == o) {
598+
return true;
599+
}
600+
if (!(o instanceof JSpecifyNullSafeAnnotationsFieldsTest)) {
601+
return false;
602+
}
603+
JSpecifyNullSafeAnnotationsFieldsTest other = (JSpecifyNullSafeAnnotationsFieldsTest) o;
604+
if (!java.util.Objects.equals(this.name, other.name)) {
605+
return false;
606+
}
607+
if (!java.util.Objects.equals(this.nullable_name, other.nullable_name)) {
608+
return false;
609+
}
610+
if (this.favorite_number != other.favorite_number) {
611+
return false;
612+
}
613+
if (!java.util.Objects.equals(this.nullable_favorite_number, other.nullable_favorite_number)) {
614+
return false;
615+
}
616+
return true;
617+
}
584618
}
585619

586620

lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/JetBrainsNullSafeAnnotationsFieldsTest.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,40 @@ public JetBrainsNullSafeAnnotationsFieldsTest build() {
581581
}
582582
}
583583
}
584+
585+
@Override
586+
public int hashCode() {
587+
int result = 1;
588+
result = 31 * result + (name == null ? 0 : name.hashCode());
589+
result = 31 * result + (nullable_name == null ? 0 : nullable_name.hashCode());
590+
result = 31 * result + Integer.hashCode(favorite_number);
591+
result = 31 * result + (nullable_favorite_number == null ? 0 : nullable_favorite_number.hashCode());
592+
return result;
593+
}
594+
595+
@Override
596+
public boolean equals(Object o) {
597+
if (this == o) {
598+
return true;
599+
}
600+
if (!(o instanceof JetBrainsNullSafeAnnotationsFieldsTest)) {
601+
return false;
602+
}
603+
JetBrainsNullSafeAnnotationsFieldsTest other = (JetBrainsNullSafeAnnotationsFieldsTest) o;
604+
if (!java.util.Objects.equals(this.name, other.name)) {
605+
return false;
606+
}
607+
if (!java.util.Objects.equals(this.nullable_name, other.nullable_name)) {
608+
return false;
609+
}
610+
if (this.favorite_number != other.favorite_number) {
611+
return false;
612+
}
613+
if (!java.util.Objects.equals(this.nullable_favorite_number, other.nullable_favorite_number)) {
614+
return false;
615+
}
616+
return true;
617+
}
584618
}
585619

586620

lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/Player.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
@org.apache.avro.specific.AvroGenerated
1616
public class Player extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
1717
private static final long serialVersionUID = 3865593031278745715L;
18+
19+
1820
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Player\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"選手 is Japanese for player.\",\"fields\":[{\"name\":\"number\",\"type\":\"int\",\"doc\":\"The number of the player\"},{\"name\":\"first_name\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"last_name\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"enum\",\"name\":\"Position\",\"symbols\":[\"P\",\"C\",\"B1\",\"B2\",\"B3\",\"SS\",\"LF\",\"CF\",\"RF\",\"DH\"]}}}]}");
1921
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
2022

@@ -587,6 +589,40 @@ public Player build() {
587589
}
588590
}
589591
}
592+
593+
@Override
594+
public int hashCode() {
595+
int result = 1;
596+
result = 31 * result + Integer.hashCode(number);
597+
result = 31 * result + (first_name == null ? 0 : first_name.hashCode());
598+
result = 31 * result + (last_name == null ? 0 : last_name.hashCode());
599+
result = 31 * result + (position == null ? 0 : position.hashCode());
600+
return result;
601+
}
602+
603+
@Override
604+
public boolean equals(Object o) {
605+
if (this == o) {
606+
return true;
607+
}
608+
if (!(o instanceof Player)) {
609+
return false;
610+
}
611+
Player other = (Player) o;
612+
if (this.number != other.number) {
613+
return false;
614+
}
615+
if (!java.util.Objects.equals(this.first_name, other.first_name)) {
616+
return false;
617+
}
618+
if (!java.util.Objects.equals(this.last_name, other.last_name)) {
619+
return false;
620+
}
621+
if (!java.util.Objects.equals(this.position, other.position)) {
622+
return false;
623+
}
624+
return true;
625+
}
590626
}
591627

592628

lang/java/tools/src/test/compiler/output-string/avro/examples/baseball/Proto.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ public interface Callback extends Proto {
2424
*/
2525
void bar(org.apache.avro.ipc.Callback<java.lang.Void> callback) throws java.io.IOException;
2626
}
27-
}
27+
}

lang/java/tools/src/test/compiler/output/AddExtraOptionalGettersTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
@org.apache.avro.specific.AvroGenerated
1616
public class AddExtraOptionalGettersTest extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
1717
private static final long serialVersionUID = -3300987256178011215L;
18+
19+
1820
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"AddExtraOptionalGettersTest\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"Test that extra optional getters are added\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"favorite_number\",\"type\":[\"int\",\"null\"]}]}");
1921
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
2022

@@ -428,6 +430,32 @@ public AddExtraOptionalGettersTest build() {
428430
}
429431
}
430432
}
433+
434+
@Override
435+
public int hashCode() {
436+
int result = 1;
437+
result = 31 * result + (name == null ? 0 : name.hashCode());
438+
result = 31 * result + (favorite_number == null ? 0 : favorite_number.hashCode());
439+
return result;
440+
}
441+
442+
@Override
443+
public boolean equals(Object o) {
444+
if (this == o) {
445+
return true;
446+
}
447+
if (!(o instanceof AddExtraOptionalGettersTest)) {
448+
return false;
449+
}
450+
AddExtraOptionalGettersTest other = (AddExtraOptionalGettersTest) o;
451+
if (Utf8.compareSequences(this.name, other.name) != 0) {
452+
return false;
453+
}
454+
if (!java.util.Objects.equals(this.favorite_number, other.favorite_number)) {
455+
return false;
456+
}
457+
return true;
458+
}
431459
}
432460

433461

0 commit comments

Comments
 (0)