Skip to content

Commit f90caa7

Browse files
committed
[GR-69574] Hoist type check guards out of Vector API loops
PullRequest: graal/22352
2 parents 27b9be5 + aa9073d commit f90caa7

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/vector/replacements/vectorapi/VectorAPIExpansionPhase.java

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,24 @@
4747
import jdk.graal.compiler.graph.NodeUnionFind;
4848
import jdk.graal.compiler.nodes.AbstractBeginNode;
4949
import jdk.graal.compiler.nodes.ConstantNode;
50+
import jdk.graal.compiler.nodes.FixedGuardNode;
5051
import jdk.graal.compiler.nodes.FixedNode;
5152
import jdk.graal.compiler.nodes.FixedWithNextNode;
5253
import jdk.graal.compiler.nodes.FrameState;
5354
import jdk.graal.compiler.nodes.GraphState;
5455
import jdk.graal.compiler.nodes.Invoke;
56+
import jdk.graal.compiler.nodes.LogicConstantNode;
57+
import jdk.graal.compiler.nodes.LogicNode;
5558
import jdk.graal.compiler.nodes.NodeView;
59+
import jdk.graal.compiler.nodes.PiNode;
5660
import jdk.graal.compiler.nodes.ReturnNode;
5761
import jdk.graal.compiler.nodes.StructuredGraph;
5862
import jdk.graal.compiler.nodes.ValueNode;
5963
import jdk.graal.compiler.nodes.ValuePhiNode;
6064
import jdk.graal.compiler.nodes.ValueProxyNode;
6165
import jdk.graal.compiler.nodes.calc.MinMaxNode;
6266
import jdk.graal.compiler.nodes.extended.FixedValueAnchorNode;
67+
import jdk.graal.compiler.nodes.java.InstanceOfNode;
6368
import jdk.graal.compiler.nodes.java.MethodCallTargetNode;
6469
import jdk.graal.compiler.nodes.spi.CoreProviders;
6570
import jdk.graal.compiler.nodes.spi.SimplifierTool;
@@ -71,13 +76,17 @@
7176
import jdk.graal.compiler.phases.common.PostRunCanonicalizationPhase;
7277
import jdk.graal.compiler.phases.tiers.HighTierContext;
7378
import jdk.graal.compiler.replacements.nodes.MacroWithExceptionNode;
79+
import jdk.graal.compiler.serviceprovider.SpeculationReasonGroup;
7480
import jdk.graal.compiler.vector.architecture.VectorArchitecture;
7581
import jdk.graal.compiler.vector.architecture.VectorLoweringProvider;
7682
import jdk.graal.compiler.vector.nodes.simd.SimdStamp;
7783
import jdk.graal.compiler.vector.replacements.vectorapi.nodes.VectorAPIMacroNode;
7884
import jdk.graal.compiler.vector.replacements.vectorapi.nodes.VectorAPISinkNode;
85+
import jdk.vm.ci.meta.DeoptimizationReason;
7986
import jdk.vm.ci.meta.JavaKind;
87+
import jdk.vm.ci.meta.ResolvedJavaMethod;
8088
import jdk.vm.ci.meta.ResolvedJavaType;
89+
import jdk.vm.ci.meta.SpeculationLog;
8190

8291
/**
8392
* Expands {@link VectorAPIMacroNode}s to SIMD operations if they are supported by the target
@@ -140,6 +149,9 @@
140149
*/
141150
public class VectorAPIExpansionPhase extends PostRunCanonicalizationPhase<HighTierContext> {
142151

152+
private static final SpeculationReasonGroup FIXED_GUARD_HOISTING_SPECULATIONS = new SpeculationReasonGroup("VectorAPIFixedGuardHoisting", ResolvedJavaMethod.class, int.class,
153+
DeoptimizationReason.class);
154+
143155
public VectorAPIExpansionPhase(CanonicalizerPhase canonicalizer) {
144156
super(canonicalizer.copyWithCustomSimplification(new VectorAPIExpansionPhase.VectorAPISimplification()));
145157
}
@@ -258,6 +270,10 @@ protected void run(StructuredGraph graph, HighTierContext context) {
258270
return;
259271
}
260272

273+
if (graph.getSpeculationLog() != null) {
274+
speculativelyHoistGuardsThroughPhis(graph, context);
275+
}
276+
261277
/*
262278
* Canonicalize first. Needed for computing SIMD stamps, since we delay their computation to
263279
* compile time. We can't generally compute SIMD stamps at the time we build the macro nodes
@@ -1000,6 +1016,99 @@ private static void replaceComponentNodes(StructuredGraph graph, HighTierContext
10001016
graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "after adding duplicates for %s", component);
10011017
}
10021018

1019+
/*
1020+
* A limit on the width of phis that we are willing to hoist through. The exact value doesn't
1021+
* matter, but as hoisting guards through phis duplicates code, we want some limit to avoid
1022+
* explosive surprises.
1023+
*/
1024+
private static final int MAX_PHI_PREDECESSORS = 4;
1025+
1026+
/**
1027+
* Try to improve a graph shape involving loop phis that don't have precise Vector API type
1028+
* stamps. Given code like this:
1029+
*
1030+
* <pre>
1031+
* Object init = [some generic Object value];
1032+
* Object phi = init;
1033+
* loop {
1034+
* Byte128Vector v = (Byte128Vector) phi;
1035+
* Byte128Vector w = v.add(1);
1036+
* phi = w;
1037+
* }
1038+
* </pre>
1039+
*
1040+
* This method will hoist the cast through the phi, placing the guards in all phi predecessors
1041+
* that don't have a precise stamp yet (i.e., in this case, at the loop entry):
1042+
*
1043+
* <pre>
1044+
* Object init = [some generic Object value];
1045+
* Byte128Vector castInit = (Byte128Vector) init; // hoisted type check guard
1046+
* Byte128Vector phi = castInit;
1047+
* loop {
1048+
* phi = phi.add(1);
1049+
* }
1050+
* </pre>
1051+
*
1052+
* In the original code, we have a type check on every loop iteration, plus we would have to
1053+
* insert unboxing/boxing code around the SIMD add operation. In the modified code, we only have
1054+
* one type check and one unboxing before the loop, and the SIMD computation in the loop can be
1055+
* fully unboxed. Reasonably written code should not contain such patterns, but Truffle OSR
1056+
* compilations have such code shapes because OSR locals have generic object stamps.
1057+
* <p>
1058+
*
1059+
* The hoisting of the type check is guarded by a speculation, so we do not repeat this
1060+
* transformation if we ever see the hoisted guard fail.
1061+
*/
1062+
private void speculativelyHoistGuardsThroughPhis(StructuredGraph graph, HighTierContext context) {
1063+
for (VectorAPIMacroNode macro : graph.getNodes(VectorAPIMacroNode.TYPE)) {
1064+
for (ValueNode vectorInput : macro.vectorInputs()) {
1065+
if (vectorInput instanceof PiNode pi && pi.getGuard() instanceof FixedGuardNode guard && guard.canFloat()) {
1066+
if (guard.getCondition() instanceof InstanceOfNode instanceOf &&
1067+
!guard.isNegated() && // if (!(x instanceof T)) { deopt; }
1068+
instanceOf.getValue() == pi.getOriginalNode() &&
1069+
instanceOf.getCheckedStamp().nonNull() &&
1070+
instanceOf.getCheckedStamp().equals(pi.piStamp()) &&
1071+
instanceOf.getValue() instanceof ValuePhiNode phi &&
1072+
phi.valueCount() <= MAX_PHI_PREDECESSORS &&
1073+
phi.isLoopPhi() &&
1074+
VectorAPIBoxingUtils.asUnboxableVectorType(pi, context) != null) {
1075+
SpeculationLog.SpeculationReason speculationReason = FIXED_GUARD_HOISTING_SPECULATIONS.createSpeculationReason(phi.merge().stateAfter().getMethod(),
1076+
phi.merge().stateAfter().bci, guard.getReason());
1077+
if (graph.getSpeculationLog().maySpeculate(speculationReason)) {
1078+
SpeculationLog.Speculation hoistingSpeculation = graph.getSpeculationLog().speculate(speculationReason);
1079+
for (int i = 0; i < phi.valueCount(); i++) {
1080+
LogicNode newCondition = InstanceOfNode.create(instanceOf.type(), phi.valueAt(i));
1081+
if (newCondition instanceof LogicConstantNode logicConstant && logicConstant.getValue() == !guard.isNegated()) {
1082+
/*
1083+
* This phi input already has a precise stamp that doesn't need
1084+
* to be improved.
1085+
*/
1086+
continue;
1087+
}
1088+
newCondition = graph.addOrUnique(newCondition);
1089+
FixedGuardNode newGuard = graph.add(new FixedGuardNode(newCondition, guard.getReason(), guard.getAction(), hoistingSpeculation, guard.isNegated(),
1090+
guard.getNoDeoptSuccessorPosition()));
1091+
graph.addBeforeFixed(phi.merge().phiPredecessorAt(i), newGuard);
1092+
ValueNode newPi = graph.addOrUnique(PiNode.create(phi.valueAt(i), pi.piStamp(), newGuard));
1093+
if (newPi != phi.valueAt(i)) {
1094+
phi.setValueAt(i, newPi);
1095+
}
1096+
}
1097+
/*
1098+
* Improve the phi and canonicalize its usages right away. The original
1099+
* guard and its pi will fold away, and other macros using the same pi
1100+
* will now see the phi with its precise stamp. This way, we don't
1101+
* repeat the same work for other usages of the pi.
1102+
*/
1103+
phi.inferStamp();
1104+
canonicalizer.applyIncremental(graph, context, phi.usages());
1105+
}
1106+
}
1107+
}
1108+
}
1109+
}
1110+
}
1111+
10031112
public static class VectorAPISimplification implements CanonicalizerPhase.CustomSimplification {
10041113

10051114
@Override

0 commit comments

Comments
 (0)