@@ -2489,6 +2489,24 @@ void VPCSAHeaderPHIRecipe::execute(VPTransformState &State) {
24892489 State.set (this , DataPhi, Part);
24902490}
24912491
2492+ InstructionCost VPCSAHeaderPHIRecipe::computeCost (ElementCount VF,
2493+ VPCostContext &Ctx) const {
2494+ if (VF.isScalar ())
2495+ return 0 ;
2496+
2497+ InstructionCost C = 0 ;
2498+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2499+ const TargetTransformInfo &TTI = Ctx.TTI ;
2500+
2501+ // FIXME: These costs should be moved into VPInstruction::computeCost. We put
2502+ // them here for now since there is no VPInstruction::computeCost support.
2503+ // CSAInitMask
2504+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VTy);
2505+ // CSAInitData
2506+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VTy);
2507+ return C;
2508+ }
2509+
24922510#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
24932511void VPCSADataUpdateRecipe::print (raw_ostream &O, const Twine &Indent,
24942512 VPSlotTracker &SlotTracker) const {
@@ -2517,6 +2535,34 @@ void VPCSADataUpdateRecipe::execute(VPTransformState &State) {
25172535 }
25182536}
25192537
2538+ InstructionCost VPCSADataUpdateRecipe::computeCost (ElementCount VF,
2539+ VPCostContext &Ctx) const {
2540+ if (VF.isScalar ())
2541+ return 0 ;
2542+
2543+ InstructionCost C = 0 ;
2544+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2545+ auto *MaskTy = VectorType::get (IntegerType::getInt1Ty (VTy->getContext ()), VF);
2546+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2547+ const TargetTransformInfo &TTI = Ctx.TTI ;
2548+
2549+ // Data Update
2550+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2551+
2552+ // FIXME: These costs should be moved into VPInstruction::computeCost. We put
2553+ // them here for now since they are related to updating the data and there is
2554+ // no VPInstruction::computeCost support at the moment. CSAInitMask AnyActive
2555+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2556+ // vp.reduce.or
2557+ C += TTI.getArithmeticReductionCost (Instruction::Or, VTy, std::nullopt ,
2558+ CostKind);
2559+ // VPVLSel
2560+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2561+ // MaskUpdate
2562+ C += TTI.getArithmeticInstrCost (Instruction::Select, MaskTy, CostKind);
2563+ return C;
2564+ }
2565+
25202566#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
25212567void VPCSAExtractScalarRecipe::print (raw_ostream &O, const Twine &Indent,
25222568 VPSlotTracker &SlotTracker) const {
@@ -2577,6 +2623,60 @@ void VPCSAExtractScalarRecipe::execute(VPTransformState &State) {
25772623 State.set (this , ChooseFromVecOrInit, 0 , /* IsScalar=*/ true );
25782624}
25792625
2626+ InstructionCost
2627+ VPCSAExtractScalarRecipe::computeCost (ElementCount VF,
2628+ VPCostContext &Ctx) const {
2629+ if (VF.isScalar ())
2630+ return 0 ;
2631+
2632+ InstructionCost C = 0 ;
2633+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2634+ auto *Int32VTy =
2635+ VectorType::get (IntegerType::getInt32Ty (VTy->getContext ()), VF);
2636+ auto *MaskTy = VectorType::get (IntegerType::getInt1Ty (VTy->getContext ()), VF);
2637+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2638+ const TargetTransformInfo &TTI = Ctx.TTI ;
2639+
2640+ // StepVector
2641+ ArrayRef<Value *> Args;
2642+ IntrinsicCostAttributes CostAttrs (Intrinsic::stepvector, Int32VTy, Args);
2643+ C += TTI.getIntrinsicInstrCost (CostAttrs, CostKind);
2644+ // NegOneSplat
2645+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, Int32VTy);
2646+ // LastIdx
2647+ if (usesEVL ()) {
2648+ C += TTI.getMinMaxReductionCost (Intrinsic::smax, Int32VTy, FastMathFlags (),
2649+ CostKind);
2650+ } else {
2651+ // ActiveLaneIdxs
2652+ C += TTI.getArithmeticInstrCost (Instruction::Select,
2653+ MaskTy->getScalarType (), CostKind);
2654+ // MaybeLastIdx
2655+ C += TTI.getMinMaxReductionCost (Intrinsic::smax, Int32VTy, FastMathFlags (),
2656+ CostKind);
2657+ // IsLaneZeroActive
2658+ C += TTI.getArithmeticInstrCost (Instruction::ExtractElement, MaskTy,
2659+ CostKind);
2660+ // MaybeLastIdxEQZero
2661+ C += TTI.getArithmeticInstrCost (Instruction::ICmp, MaskTy->getScalarType (),
2662+ CostKind);
2663+ // And
2664+ C += TTI.getArithmeticInstrCost (Instruction::And, MaskTy->getScalarType (),
2665+ CostKind);
2666+ // LastIdx
2667+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy->getScalarType (),
2668+ CostKind);
2669+ }
2670+ // ExtractFromVec
2671+ C += TTI.getArithmeticInstrCost (Instruction::ExtractElement, VTy, CostKind);
2672+ // LastIdxGeZero
2673+ C += TTI.getArithmeticInstrCost (Instruction::ICmp, Int32VTy, CostKind);
2674+ // ChooseFromVecOrInit
2675+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy->getScalarType (),
2676+ CostKind);
2677+ return C;
2678+ }
2679+
25802680void VPBranchOnMaskRecipe::execute (VPTransformState &State) {
25812681 assert (State.Lane && " Branch on Mask works only on single instance." );
25822682
0 commit comments