@@ -2971,7 +2971,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29712971 fixupIVUsers (Entry.first , Entry.second ,
29722972 getOrCreateVectorTripCount (nullptr ),
29732973 IVEndValues[Entry.first ], LoopMiddleBlock, State);
2974- IVEndValues[Entry.first ], LoopMiddleBlock, Plan, State);
29752974 }
29762975
29772976 for (Instruction *PI : PredicatedInstructions)
@@ -8705,13 +8704,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87058704 // directly, enabling more efficient codegen.
87068705 PhiRecipe = new VPFirstOrderRecurrencePHIRecipe (Phi, *StartV);
87078706 } else if (Legal->isCSAPhi (Phi)) {
8708- VPCSAState *State = Plan.getCSAStates ().find (Phi)->second ;
8709- VPValue *InitData = State->getVPInitData ();
8707+ VPValue *InitScalar = Plan.getOrAddLiveIn (
8708+ Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8709+
8710+ // Don't build full CSA for VF=ElementCount::getFixed(1)
8711+ bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8712+ [&](ElementCount VF) { return VF.isScalar (); }, Range);
8713+
87108714 // When the VF=getFixed(1), InitData is just InitScalar.
8711- if (!InitData)
8712- InitData = State->getVPInitScalar ();
8715+ VPValue *InitData =
8716+ IsScalarVF ? InitScalar
8717+ : getVPValueOrAddLiveIn (PoisonValue::get (Phi->getType ()));
87138718 PhiRecipe = new VPCSAHeaderPHIRecipe (Phi, InitData);
8714- State->setPhiRecipe (cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
87158719 } else {
87168720 llvm_unreachable (
87178721 " can only widen reductions, fixed-order recurrences, and CSAs here" );
@@ -8752,13 +8756,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87528756 return CSADescriptor::isCSASelect (CSA.second , SI);
87538757 });
87548758 if (CSADescIt != Legal->getCSAs ().end ()) {
8755- PHINode *CSAPhi = CSADescIt->first ;
8756- VPCSAState *State = Plan.getCSAStates ().find (CSAPhi)->second ;
8757- VPValue *VPDataPhi = State->getPhiRecipe ();
8758- auto *R = new VPCSADataUpdateRecipe (
8759- SI, {VPDataPhi, Operands[0 ], Operands[1 ], Operands[2 ]});
8760- State->setDataUpdate (R);
8761- return R;
8759+ for (VPRecipeBase &R :
8760+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
8761+ if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8762+ if (PhiR->getUnderlyingInstr () == CSADescIt->first ) {
8763+ auto *R = new VPCSADataUpdateRecipe (
8764+ SI, {PhiR, Operands[0 ], Operands[1 ], Operands[2 ]});
8765+ PhiR->setDataUpdate (R);
8766+ return R;
8767+ }
8768+ }
8769+ }
87628770 }
87638771
87648772 return new VPWidenSelectRecipe (
@@ -8773,44 +8781,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87738781 return tryToWiden (Instr, Operands, VPBB);
87748782}
87758783
8776- // / Add CSA Recipes that can occur before each instruction in the input IR
8777- // / is processed and introduced into VPlan.
8778- static void
8779- addCSAPreprocessRecipes (const LoopVectorizationLegality::CSAList &CSAs,
8780- Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8781- VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8782- VPlan &Plan, VPRecipeBuilder &Builder) {
8783-
8784- // Don't build full CSA for VF=ElementCount::getFixed(1)
8785- bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange (
8786- [&](ElementCount VF) { return VF.isScalar (); }, Range);
8787-
8788- for (const auto &CSA : CSAs) {
8789- VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8790- CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8791-
8792- // Scalar VF builds the scalar version of the loop. In that case,
8793- // no maintenence of mask nor extraction in middle block is needed.
8794- if (IsScalarVF) {
8795- VPCSAState *S = new VPCSAState (VPInitScalar);
8796- Plan.addCSAState (CSA.first , S);
8797- continue ;
8798- }
8799-
8800- VPBuilder PHB (PreheaderVPBB);
8801- auto *VPInitMask = Builder.getVPValueOrAddLiveIn (
8802- ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8803- auto *VPInitData =
8804- Builder.getVPValueOrAddLiveIn (PoisonValue::get (CSA.first ->getType ()));
8805-
8806- VPBuilder HB (HeaderVPBB);
8807- auto *VPMaskPhi = HB.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8808-
8809- auto *S = new VPCSAState (VPInitScalar, VPInitData, VPMaskPhi);
8810- Plan.addCSAState (CSA.first , S);
8811- }
8812- }
8813-
88148784// / Add CSA Recipes that must occur after each instruction in the input IR
88158785// / is processed and introduced into VPlan.
88168786static void
@@ -8823,60 +8793,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
88238793 [&](ElementCount VF) { return VF.isScalar (); }, Range))
88248794 return ;
88258795
8796+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
88268797 for (const auto &CSA : CSAs) {
8827- VPCSAState *CSAState = Plan.getCSAStates ().find (CSA.first )->second ;
8828- VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate ();
8798+ // Build the MaskPhi recipe.
8799+ auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn (
8800+ ConstantInt::getFalse (Type::getInt1Ty (CSA.first ->getContext ())));
8801+ VPBuilder B;
8802+ B.setInsertPoint (Header, Header->getFirstNonPhi ());
8803+ auto *VPMaskPhi = B.createCSAMaskPhi (VPInitMask, DL, " csa.mask.phi" );
8804+ B.clearInsertionPoint ();
88298805
8830- assert (VPDataUpdate &&
8831- " VPDataUpdate must have been introduced prior to postprocess" );
8832- assert (CSA.second .getCond () &&
8833- " CSADescriptor must know how to describe the condition" );
88348806 auto GetVPValue = [&](Value *I) {
88358807 return RecipeBuilder.getRecipe (cast<Instruction>(I))->getVPSingleValue ();
88368808 };
8837- VPValue *WidenedCond = GetVPValue (CSA. second . getCond ());
8838- VPValue *VPInitScalar = CSAState-> getVPInitScalar ( );
8809+ VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8810+ cast<VPCSAHeaderPHIRecipe>( GetVPValue (CSA. first ))-> getVPNewData () );
88398811
88408812 // The CSA optimization wants to use a condition such that when it is
88418813 // true, a new value is assigned. However, it is possible that a true lane
88428814 // in WidenedCond corresponds to selection of the initial value instead.
88438815 // In that case, we must use the negation of WidenedCond.
88448816 // i.e. select cond new_val old_val versus select cond.not old_val new_val
8817+ assert (CSA.second .getCond () &&
8818+ " CSADescriptor must know how to describe the condition" );
8819+ VPValue *WidenedCond = GetVPValue (CSA.second .getCond ());
88458820 VPValue *CondToUse = WidenedCond;
8846- VPBuilder B;
88478821 if (cast<SelectInst>(CSA.second .getAssignment ())->getTrueValue () ==
88488822 CSA.first ) {
88498823 auto *VPNotCond = B.createNot (WidenedCond, DL);
8850- VPNotCond->insertBefore (
8851- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8824+ VPNotCond->insertBefore (VPDataUpdate);
88528825 CondToUse = VPNotCond;
88538826 }
88548827
8855- auto *VPAnyActive =
8856- B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8857- VPAnyActive->insertBefore (
8858- GetVPValue (CSA.second .getAssignment ())->getDefiningRecipe ());
8828+ auto *VPAnyActive = B.createAnyActive (CondToUse, DL, " csa.cond.anyactive" );
8829+ VPAnyActive->insertBefore (VPDataUpdate);
88598830
8860- auto *VPMaskSel = B.createCSAMaskSel (CondToUse, CSAState-> getVPMaskPhi () ,
8861- VPAnyActive, DL, " csa.mask.sel" );
8831+ auto *VPMaskSel = B.createCSAMaskSel (CondToUse, VPMaskPhi, VPAnyActive, DL ,
8832+ " csa.mask.sel" );
88628833 VPMaskSel->insertAfter (VPAnyActive);
8834+
88638835 VPDataUpdate->setVPNewMaskAndVPAnyActive (VPMaskSel, VPAnyActive);
8836+ VPValue *VPInitScalar = Plan.getOrAddLiveIn (
8837+ CSA.first ->getIncomingValueForBlock (OrigLoop->getLoopPreheader ()));
8838+ SmallVector<PHINode *> PhiToFix;
8839+ for (User *U : VPDataUpdate->getUnderlyingValue ()->users ())
8840+ if (auto *Phi = dyn_cast<PHINode>(U);
8841+ Phi && Phi->getParent () == OrigLoop->getUniqueExitBlock ())
8842+ PhiToFix.emplace_back (Phi);
88648843 VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8865- new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate});
8866-
8844+ new VPCSAExtractScalarRecipe ({VPInitScalar, VPMaskSel, VPDataUpdate},
8845+ PhiToFix);
88678846 MiddleVPBB->insert (ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi ());
8868-
8869- // Update CSAState with new recipes
8870- CSAState->setExtractScalarRecipe (ExtractScalarRecipe);
8871- CSAState->setVPAnyActive (VPAnyActive);
8872-
8873- // Add live out for the CSA. We should be in LCSSA, so we are looking for
8874- // Phi users in the unique exit block of the original updated value.
8875- BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock ();
8876- assert (OrigExit && " Expected a single exit block" );
8877- for (User *U :VPDataUpdate->getUnderlyingValue ()->users ())
8878- if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent () == OrigExit)
8879- Plan.addLiveOut (Phi, ExtractScalarRecipe);
88808847 }
88818848}
88828849
@@ -9194,11 +9161,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91949161
91959162 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
91969163
9197- addCSAPreprocessRecipes (Legal->getCSAs (), OrigLoop, Plan->getPreheader (),
9198- Plan->getVectorLoopRegion ()->getEntryBasicBlock (), DL,
9199- Range, *Plan, RecipeBuilder);
9200-
9201-
92029164 // ---------------------------------------------------------------------------
92039165 // Pre-construction: record ingredients whose recipes we'll need to further
92049166 // process after constructing the initial VPlan.
0 commit comments