|
47 | 47 | import jdk.graal.compiler.graph.NodeUnionFind; |
48 | 48 | import jdk.graal.compiler.nodes.AbstractBeginNode; |
49 | 49 | import jdk.graal.compiler.nodes.ConstantNode; |
| 50 | +import jdk.graal.compiler.nodes.FixedGuardNode; |
50 | 51 | import jdk.graal.compiler.nodes.FixedNode; |
51 | 52 | import jdk.graal.compiler.nodes.FixedWithNextNode; |
52 | 53 | import jdk.graal.compiler.nodes.FrameState; |
53 | 54 | import jdk.graal.compiler.nodes.GraphState; |
54 | 55 | import jdk.graal.compiler.nodes.Invoke; |
| 56 | +import jdk.graal.compiler.nodes.LogicConstantNode; |
| 57 | +import jdk.graal.compiler.nodes.LogicNode; |
55 | 58 | import jdk.graal.compiler.nodes.NodeView; |
| 59 | +import jdk.graal.compiler.nodes.PiNode; |
56 | 60 | import jdk.graal.compiler.nodes.ReturnNode; |
57 | 61 | import jdk.graal.compiler.nodes.StructuredGraph; |
58 | 62 | import jdk.graal.compiler.nodes.ValueNode; |
59 | 63 | import jdk.graal.compiler.nodes.ValuePhiNode; |
60 | 64 | import jdk.graal.compiler.nodes.ValueProxyNode; |
61 | 65 | import jdk.graal.compiler.nodes.calc.MinMaxNode; |
62 | 66 | import jdk.graal.compiler.nodes.extended.FixedValueAnchorNode; |
| 67 | +import jdk.graal.compiler.nodes.java.InstanceOfNode; |
63 | 68 | import jdk.graal.compiler.nodes.java.MethodCallTargetNode; |
64 | 69 | import jdk.graal.compiler.nodes.spi.CoreProviders; |
65 | 70 | import jdk.graal.compiler.nodes.spi.SimplifierTool; |
|
71 | 76 | import jdk.graal.compiler.phases.common.PostRunCanonicalizationPhase; |
72 | 77 | import jdk.graal.compiler.phases.tiers.HighTierContext; |
73 | 78 | import jdk.graal.compiler.replacements.nodes.MacroWithExceptionNode; |
| 79 | +import jdk.graal.compiler.serviceprovider.SpeculationReasonGroup; |
74 | 80 | import jdk.graal.compiler.vector.architecture.VectorArchitecture; |
75 | 81 | import jdk.graal.compiler.vector.architecture.VectorLoweringProvider; |
76 | 82 | import jdk.graal.compiler.vector.nodes.simd.SimdStamp; |
77 | 83 | import jdk.graal.compiler.vector.replacements.vectorapi.nodes.VectorAPIMacroNode; |
78 | 84 | import jdk.graal.compiler.vector.replacements.vectorapi.nodes.VectorAPISinkNode; |
| 85 | +import jdk.vm.ci.meta.DeoptimizationReason; |
79 | 86 | import jdk.vm.ci.meta.JavaKind; |
| 87 | +import jdk.vm.ci.meta.ResolvedJavaMethod; |
80 | 88 | import jdk.vm.ci.meta.ResolvedJavaType; |
| 89 | +import jdk.vm.ci.meta.SpeculationLog; |
81 | 90 |
|
82 | 91 | /** |
83 | 92 | * Expands {@link VectorAPIMacroNode}s to SIMD operations if they are supported by the target |
|
140 | 149 | */ |
141 | 150 | public class VectorAPIExpansionPhase extends PostRunCanonicalizationPhase<HighTierContext> { |
142 | 151 |
|
| 152 | + private static final SpeculationReasonGroup FIXED_GUARD_HOISTING_SPECULATIONS = new SpeculationReasonGroup("VectorAPIFixedGuardHoisting", ResolvedJavaMethod.class, int.class, |
| 153 | + DeoptimizationReason.class); |
| 154 | + |
143 | 155 | public VectorAPIExpansionPhase(CanonicalizerPhase canonicalizer) { |
144 | 156 | super(canonicalizer.copyWithCustomSimplification(new VectorAPIExpansionPhase.VectorAPISimplification())); |
145 | 157 | } |
@@ -258,6 +270,10 @@ protected void run(StructuredGraph graph, HighTierContext context) { |
258 | 270 | return; |
259 | 271 | } |
260 | 272 |
|
| 273 | + if (graph.getSpeculationLog() != null) { |
| 274 | + speculativelyHoistGuardsThroughPhis(graph, context); |
| 275 | + } |
| 276 | + |
261 | 277 | /* |
262 | 278 | * Canonicalize first. Needed for computing SIMD stamps, since we delay their computation to |
263 | 279 | * compile time. We can't generally compute SIMD stamps at the time we build the macro nodes |
@@ -1000,6 +1016,99 @@ private static void replaceComponentNodes(StructuredGraph graph, HighTierContext |
1000 | 1016 | graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "after adding duplicates for %s", component); |
1001 | 1017 | } |
1002 | 1018 |
|
| 1019 | + /* |
| 1020 | + * A limit on the width of phis that we are willing to hoist through. The exact value doesn't |
| 1021 | + * matter, but as hoisting guards through phis duplicates code, we want some limit to avoid |
| 1022 | + * explosive surprises. |
| 1023 | + */ |
| 1024 | + private static final int MAX_PHI_PREDECESSORS = 4; |
| 1025 | + |
| 1026 | + /** |
| 1027 | + * Try to improve a graph shape involving loop phis that don't have precise Vector API type |
| 1028 | + * stamps. Given code like this: |
| 1029 | + * |
| 1030 | + * <pre> |
| 1031 | + * Object init = [some generic Object value]; |
| 1032 | + * Object phi = init; |
| 1033 | + * loop { |
| 1034 | + * Byte128Vector v = (Byte128Vector) phi; |
| 1035 | + * Byte128Vector w = v.add(1); |
| 1036 | + * phi = w; |
| 1037 | + * } |
| 1038 | + * </pre> |
| 1039 | + * |
| 1040 | + * This method will hoist the cast through the phi, placing the guards in all phi predecessors |
| 1041 | + * that don't have a precise stamp yet (i.e., in this case, at the loop entry): |
| 1042 | + * |
| 1043 | + * <pre> |
| 1044 | + * Object init = [some generic Object value]; |
| 1045 | + * Byte128Vector castInit = (Byte128Vector) init; // hoisted type check guard |
| 1046 | + * Byte128Vector phi = castInit; |
| 1047 | + * loop { |
| 1048 | + * phi = phi.add(1); |
| 1049 | + * } |
| 1050 | + * </pre> |
| 1051 | + * |
| 1052 | + * In the original code, we have a type check on every loop iteration, plus we would have to |
| 1053 | + * insert unboxing/boxing code around the SIMD add operation. In the modified code, we only have |
| 1054 | + * one type check and one unboxing before the loop, and the SIMD computation in the loop can be |
| 1055 | + * fully unboxed. Reasonably written code should not contain such patterns, but Truffle OSR |
| 1056 | + * compilations have such code shapes because OSR locals have generic object stamps. |
| 1057 | + * <p> |
| 1058 | + * |
| 1059 | + * The hoisting of the type check is guarded by a speculation, so we do not repeat this |
| 1060 | + * transformation if we ever see the hoisted guard fail. |
| 1061 | + */ |
| 1062 | + private void speculativelyHoistGuardsThroughPhis(StructuredGraph graph, HighTierContext context) { |
| 1063 | + for (VectorAPIMacroNode macro : graph.getNodes(VectorAPIMacroNode.TYPE)) { |
| 1064 | + for (ValueNode vectorInput : macro.vectorInputs()) { |
| 1065 | + if (vectorInput instanceof PiNode pi && pi.getGuard() instanceof FixedGuardNode guard && guard.canFloat()) { |
| 1066 | + if (guard.getCondition() instanceof InstanceOfNode instanceOf && |
| 1067 | + !guard.isNegated() && // if (!(x instanceof T)) { deopt; } |
| 1068 | + instanceOf.getValue() == pi.getOriginalNode() && |
| 1069 | + instanceOf.getCheckedStamp().nonNull() && |
| 1070 | + instanceOf.getCheckedStamp().equals(pi.piStamp()) && |
| 1071 | + instanceOf.getValue() instanceof ValuePhiNode phi && |
| 1072 | + phi.valueCount() <= MAX_PHI_PREDECESSORS && |
| 1073 | + phi.isLoopPhi() && |
| 1074 | + VectorAPIBoxingUtils.asUnboxableVectorType(pi, context) != null) { |
| 1075 | + SpeculationLog.SpeculationReason speculationReason = FIXED_GUARD_HOISTING_SPECULATIONS.createSpeculationReason(phi.merge().stateAfter().getMethod(), |
| 1076 | + phi.merge().stateAfter().bci, guard.getReason()); |
| 1077 | + if (graph.getSpeculationLog().maySpeculate(speculationReason)) { |
| 1078 | + SpeculationLog.Speculation hoistingSpeculation = graph.getSpeculationLog().speculate(speculationReason); |
| 1079 | + for (int i = 0; i < phi.valueCount(); i++) { |
| 1080 | + LogicNode newCondition = InstanceOfNode.create(instanceOf.type(), phi.valueAt(i)); |
| 1081 | + if (newCondition instanceof LogicConstantNode logicConstant && logicConstant.getValue() == !guard.isNegated()) { |
| 1082 | + /* |
| 1083 | + * This phi input already has a precise stamp that doesn't need |
| 1084 | + * to be improved. |
| 1085 | + */ |
| 1086 | + continue; |
| 1087 | + } |
| 1088 | + newCondition = graph.addOrUnique(newCondition); |
| 1089 | + FixedGuardNode newGuard = graph.add(new FixedGuardNode(newCondition, guard.getReason(), guard.getAction(), hoistingSpeculation, guard.isNegated(), |
| 1090 | + guard.getNoDeoptSuccessorPosition())); |
| 1091 | + graph.addBeforeFixed(phi.merge().phiPredecessorAt(i), newGuard); |
| 1092 | + ValueNode newPi = graph.addOrUnique(PiNode.create(phi.valueAt(i), pi.piStamp(), newGuard)); |
| 1093 | + if (newPi != phi.valueAt(i)) { |
| 1094 | + phi.setValueAt(i, newPi); |
| 1095 | + } |
| 1096 | + } |
| 1097 | + /* |
| 1098 | + * Improve the phi and canonicalize its usages right away. The original |
| 1099 | + * guard and its pi will fold away, and other macros using the same pi |
| 1100 | + * will now see the phi with its precise stamp. This way, we don't |
| 1101 | + * repeat the same work for other usages of the pi. |
| 1102 | + */ |
| 1103 | + phi.inferStamp(); |
| 1104 | + canonicalizer.applyIncremental(graph, context, phi.usages()); |
| 1105 | + } |
| 1106 | + } |
| 1107 | + } |
| 1108 | + } |
| 1109 | + } |
| 1110 | + } |
| 1111 | + |
1003 | 1112 | public static class VectorAPISimplification implements CanonicalizerPhase.CustomSimplification { |
1004 | 1113 |
|
1005 | 1114 | @Override |
|
0 commit comments