@@ -503,10 +503,6 @@ class InnerLoopVectorizer {
503503 // / Fix the vectorized code, taking care of header phi's, and more.
504504 void fixVectorizedLoop (VPTransformState &State);
505505
506- // / For all vectorized CSAs, replace uses of live-out scalar from the orignal
507- // / loop with the extracted scalar from the vector loop for.
508- void fixCSALiveOuts (VPTransformState &State, VPlan &Plan);
509-
510506 // Return true if any runtime check is added.
511507 bool areSafetyChecksAdded () { return AddedSafetyChecks; }
512508
@@ -2940,25 +2936,6 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
29402936 TargetTransformInfo::TCK_RecipThroughput);
29412937}
29422938
2943- void InnerLoopVectorizer::fixCSALiveOuts (VPTransformState &State, VPlan &Plan) {
2944- for (const auto &CSA : Plan.getCSAStates ()) {
2945- VPCSADataUpdateRecipe *VPDataUpdate = CSA.second ->getDataUpdate ();
2946- assert (VPDataUpdate &&
2947- " VPDataUpdate must have been introduced prior to fixing live outs" );
2948- Value *V = VPDataUpdate->getUnderlyingValue ();
2949- Value *ExtractedScalar =
2950- State.get (CSA.second ->getExtractScalarRecipe (), /* NeedsScalar=*/ true );
2951- // Fix LCSSAPhis
2952- llvm::SmallPtrSet<PHINode *, 2 > ToFix;
2953- for (User *U : V->users ())
2954- if (auto *Phi = dyn_cast<PHINode>(U);
2955- Phi && Phi->getParent () == LoopExitBlock)
2956- ToFix.insert (Phi);
2957- for (PHINode *Phi : ToFix)
2958- Phi->addIncoming (ExtractedScalar, LoopMiddleBlock);
2959- }
2960- }
2961-
29622939void InnerLoopVectorizer::fixVectorizedLoop (VPTransformState &State) {
29632940 // Fix widened non-induction PHIs by setting up the PHI operands.
29642941 if (EnableVPlanNativePath)
@@ -2994,7 +2971,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29942971 fixupIVUsers (Entry.first , Entry.second ,
29952972 getOrCreateVectorTripCount (nullptr ),
29962973 IVEndValues[Entry.first ], LoopMiddleBlock, State);
2997- fixCSALiveOuts (State, Plan);
2974+ IVEndValues[Entry. first ], LoopMiddleBlock, Plan, State );
29982975 }
29992976
30002977 for (Instruction *PI : PredicatedInstructions)
@@ -8840,7 +8817,7 @@ static void
88408817addCSAPostprocessRecipes (VPRecipeBuilder &RecipeBuilder,
88418818 const LoopVectorizationLegality::CSAList &CSAs,
88428819 VPBasicBlock *MiddleVPBB, DebugLoc DL, VFRange &Range,
8843- VPlan &Plan) {
8820+ VPlan &Plan, Loop *OrigLoop ) {
88448821 // Don't build CSA for VF=ElementCount::getFixed(1)
88458822 if (LoopVectorizationPlanner::getDecisionAndClampRange (
88468823 [&](ElementCount VF) { return VF.isScalar (); }, Range))
@@ -8892,6 +8869,14 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
88928869 // Update CSAState with new recipes
88938870 CSAState->setExtractScalarRecipe (ExtractScalarRecipe);
88948871 CSAState->setVPAnyActive (VPAnyActive);
8872+
8873+ // Add live out for the CSA. We should be in LCSSA, so we are looking for
8874+ // Phi users in the unique exit block of the original updated value.
8875+ BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock ();
8876+ assert (OrigExit && " Expected a single exit block" );
8877+ for (User *U :VPDataUpdate->getUnderlyingValue ()->users ())
8878+ if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent () == OrigExit)
8879+ Plan.addLiveOut (Phi, ExtractScalarRecipe);
88958880 }
88968881}
88978882
0 commit comments