@@ -12417,78 +12417,59 @@ void Optimizer::newDoNoMaskWA()
1241712417 return flagVar;
1241812418 };
1241912419
12420- // Check if condMod or dst is full write. If so, add pseudo kill for it.
12421- auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
12420+ auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
1242212421 {
1242312422 // Only NoMask Inst without predicate will call this function!
12424- // isCondMod = true: check condMod
12425- // = false: check dst
1242612423 G4_INST* I = *aII;
12427- if (I->getImplAccSrc() != nullptr || I->isSend())
12424+ G4_DstRegRegion* aDst = I->getDst();
12425+ if (!aDst || aDst->isNullReg() ||
12426+ I->getImplAccSrc() != nullptr || I->isSend() ||
12427+ !aDst->getBase()->isRegVar() || aDst->getBase()->asRegVar()->getPhyReg())
1242812428 {
1242912429 return;
1243012430 }
12431- G4_CondMod* condMod = I->getCondMod();
12432- G4_DstRegRegion* dst = I->getDst();
12433- if (isCondMod)
12434- {
12435- if (!condMod || !condMod->getBase()->isRegVar() ||
12436- condMod->getBase()->asRegVar()->getPhyReg())
12437- {
12438- return;
12439- }
12440- }
12441- else
12442- {
12443- if (!dst || dst->isNullReg() ||
12444- !dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
12445- {
12446- return;
12447- }
12448- }
1244912431
12450- G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
1245112432 // Make sure dst var is not used in this inst.
1245212433 {
1245312434 G4_Operand* src0_0 = I->getSrc(0);
1245412435 G4_Operand* src0_1 = I->getSrc(1);
1245512436 G4_Operand* src0_2 = I->getSrc(2);
1245612437 G4_Operand* src0_3 = I->getSrc(3);
1245712438
12458- if ((src0_0 && src0_0->compareOperand(D ) != Rel_disjoint) ||
12459- (src0_1 && src0_1->compareOperand(D ) != Rel_disjoint) ||
12460- (src0_2 && src0_2->compareOperand(D ) != Rel_disjoint) ||
12461- (src0_3 && src0_3->compareOperand(D ) != Rel_disjoint))
12439+ if ((src0_0 && src0_0->compareOperand(aDst ) != Rel_disjoint) ||
12440+ (src0_1 && src0_1->compareOperand(aDst ) != Rel_disjoint) ||
12441+ (src0_2 && src0_2->compareOperand(aDst ) != Rel_disjoint) ||
12442+ (src0_3 && src0_3->compareOperand(aDst ) != Rel_disjoint))
1246212443 {
1246312444 return;
1246412445 }
1246512446 }
1246612447
1246712448 bool needKill = false;
12468- const G4_Declare* decl = ((const G4_RegVar*)D ->getBase())->getDeclare();
12449+ const G4_Declare* decl = ((const G4_RegVar*)aDst ->getBase())->getDeclare();
1246912450 const G4_Declare* primaryDcl = decl->getRootDeclare();
1247012451
12471- if (D ->isFlag() || isCondMod )
12452+ if (aDst ->isFlag())
1247212453 {
1247312454 // Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
1247412455 // For example, "mov (1|M0) P3:uw 0"
12475- needKill = (D ->getRightBound() - D ->getLeftBound() + 1) >=
12476- D ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
12456+ needKill = (aDst ->getRightBound() - aDst ->getLeftBound() + 1) >=
12457+ aDst ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
1247712458 }
1247812459 else
1247912460 {
1248012461 if (decl->getAliasOffset() != 0 ||
12481- dst ->getRegAccess() != Direct ||
12482- dst ->getRegOff() != 0 ||
12483- dst ->getSubRegOff() != 0 ||
12484- dst ->getHorzStride() != 1 ||
12462+ aDst ->getRegAccess() != Direct ||
12463+ aDst ->getRegOff() != 0 ||
12464+ aDst ->getSubRegOff() != 0 ||
12465+ aDst ->getHorzStride() != 1 ||
1248512466 I->isPartialWrite())
1248612467 {
1248712468 return;
1248812469 }
1248912470 if (fg.isPseudoDcl(primaryDcl) ||
1249012471 primaryDcl->getRegVar()->isRegVarTransient() ||
12491- ((dst ->getTypeSize() * I->getExecSize()) ==
12472+ ((aDst ->getTypeSize() * I->getExecSize()) ==
1249212473 (primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
1249312474 {
1249412475 needKill = true;
@@ -12611,7 +12592,7 @@ void Optimizer::newDoNoMaskWA()
1261112592 assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
1261212593
1261312594 // add pseudoKill
12614- addPseudoKillIfFullWrite (aBB, aII, false); // dst
12595+ addPseudoKillIfFullDstWrite (aBB, aII);
1261512596
1261612597 // Create a temp that's big enough to hold data and possible gap
1261712598 // b/w data due to alignment/hw restriction.
@@ -12702,9 +12683,8 @@ void Optimizer::newDoNoMaskWA()
1270212683 return;
1270312684 }
1270412685
12705- // Add pseudo kill for dst and condMod
12706- addPseudoKillIfFullWrite(aBB, aII, false); // dst
12707- addPseudoKillIfFullWrite(aBB, aII, true); // condMod
12686+ // Add pseudo kill for dst
12687+ addPseudoKillIfFullDstWrite(aBB, aII);
1270812688
1270912689 const bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
1271012690 G4_Declare* modDcl = P->getTopDcl();
@@ -13033,7 +13013,7 @@ void Optimizer::newDoNoMaskWA()
1303313013 if (!condmod && !pred)
1303413014 {
1303513015 // Add pseudo Kill
13036- addPseudoKillIfFullWrite (BB, II, false); // dst
13016+ addPseudoKillIfFullDstWrite (BB, II);
1303713017
1303813018 // case 1: no predicate, no flagModifier (common case)
1303913019 G4_Predicate* newPred = builder.createPredicate(
@@ -13233,78 +13213,58 @@ void Optimizer::doNoMaskWA()
1323313213 return flagVar;
1323413214 };
1323513215
13236- // Check if condMod or dst is full write. If so, add pseudo kill for it.
13237- auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
13216+ auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
1323813217 {
1323913218 // Only NoMask Inst without predicate will call this function!
13240- // isCondMod = true: check condMod
13241- // = false: check dst
1324213219 G4_INST* I = *aII;
13243- if (I->getImplAccSrc() != nullptr || I->isSend())
13220+ G4_DstRegRegion* aDst = I->getDst();
13221+ if (!aDst || aDst->isNullReg() ||
13222+ I->getImplAccSrc() != nullptr || I->isSend() ||
13223+ aDst->getBase()->asRegVar()->getPhyReg())
1324413224 {
1324513225 return;
1324613226 }
13247- G4_CondMod* condMod = I->getCondMod();
13248- G4_DstRegRegion* dst = I->getDst();
13249- if (isCondMod)
13250- {
13251- if (!condMod || !condMod->getBase()->isRegVar() ||
13252- condMod->getBase()->asRegVar()->getPhyReg())
13253- {
13254- return;
13255- }
13256- }
13257- else
13258- {
13259- if (!dst || dst->isNullReg() ||
13260- !dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
13261- {
13262- return;
13263- }
13264- }
1326513227
13266- G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
1326713228 // Make sure dst var is not used in this inst.
1326813229 {
1326913230 G4_Operand* src0_0 = I->getSrc(0);
1327013231 G4_Operand* src0_1 = I->getSrc(1);
1327113232 G4_Operand* src0_2 = I->getSrc(2);
1327213233 G4_Operand* src0_3 = I->getSrc(3);
1327313234
13274- if ((src0_0 && src0_0->compareOperand(D ) != Rel_disjoint) ||
13275- (src0_1 && src0_1->compareOperand(D ) != Rel_disjoint) ||
13276- (src0_2 && src0_2->compareOperand(D ) != Rel_disjoint) ||
13277- (src0_3 && src0_3->compareOperand(D ) != Rel_disjoint))
13235+ if ((src0_0 && src0_0->compareOperand(aDst ) != Rel_disjoint) ||
13236+ (src0_1 && src0_1->compareOperand(aDst ) != Rel_disjoint) ||
13237+ (src0_2 && src0_2->compareOperand(aDst ) != Rel_disjoint) ||
13238+ (src0_3 && src0_3->compareOperand(aDst ) != Rel_disjoint))
1327813239 {
1327913240 return;
1328013241 }
1328113242 }
1328213243
1328313244 bool needKill = false;
13284- const G4_Declare* decl = ((const G4_RegVar*)D ->getBase())->getDeclare();
13245+ const G4_Declare* decl = ((const G4_RegVar*)aDst ->getBase())->getDeclare();
1328513246 const G4_Declare* primaryDcl = decl->getRootDeclare();
13286-
13287- if (D->isFlag() || isCondMod)
13247+ if (aDst->isFlag())
1328813248 {
1328913249 // Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
1329013250 // For example, "mov (1|M0) P3:uw 0"
13291- needKill = (D ->getRightBound() - D ->getLeftBound() + 1) >=
13292- D ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
13251+ needKill = (aDst ->getRightBound() - aDst ->getLeftBound() + 1) >=
13252+ aDst ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
1329313253 }
1329413254 else
1329513255 {
1329613256 if (decl->getAliasOffset() != 0 ||
13297- dst ->getRegAccess() != Direct ||
13298- dst ->getRegOff() != 0 ||
13299- dst ->getSubRegOff() != 0 ||
13300- dst ->getHorzStride() != 1 ||
13257+ aDst ->getRegAccess() != Direct ||
13258+ aDst ->getRegOff() != 0 ||
13259+ aDst ->getSubRegOff() != 0 ||
13260+ aDst ->getHorzStride() != 1 ||
1330113261 I->isPartialWrite())
1330213262 {
1330313263 return;
1330413264 }
1330513265 if (fg.isPseudoDcl(primaryDcl) ||
1330613266 primaryDcl->getRegVar()->isRegVarTransient() ||
13307- ((dst ->getTypeSize() * I->getExecSize()) ==
13267+ ((aDst ->getTypeSize() * I->getExecSize()) ==
1330813268 (primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
1330913269 {
1331013270 needKill = true;
@@ -13407,7 +13367,7 @@ void Optimizer::doNoMaskWA()
1340713367 assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
1340813368
1340913369 // add pseudoKill
13410- addPseudoKillIfFullWrite (currBB, currII, false); // dst
13370+ addPseudoKillIfFullDstWrite (currBB, currII);
1341113371
1341213372 // Create a temp that's big enough to hold data and possible gap
1341313373 // b/w data due to alignment/hw restriction.
@@ -13497,8 +13457,7 @@ void Optimizer::doNoMaskWA()
1349713457 }
1349813458
1349913459 // Add pseudo kill for dst
13500- addPseudoKillIfFullWrite(currBB, currII, false); // dst
13501- addPseudoKillIfFullWrite(currBB, currII, true); // condMod
13460+ addPseudoKillIfFullDstWrite(currBB, currII);
1350213461
1350313462 bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
1350413463 G4_Declare* modDcl = P->getTopDcl();
@@ -13769,7 +13728,7 @@ void Optimizer::doNoMaskWA()
1376913728 if (!condmod && !pred)
1377013729 {
1377113730 // Add pseudo Kill
13772- addPseudoKillIfFullWrite (BB, II, false); // dst
13731+ addPseudoKillIfFullDstWrite (BB, II);
1377313732
1377413733 // case 1: no predicate, no flagModifier (common case)
1377513734 G4_Predicate* newPred = builder.createPredicate(
@@ -13867,7 +13826,7 @@ void Optimizer::doNoMaskWA()
1386713826 if (!condmod && !pred)
1386813827 {
1386913828 // Add pseudo Kill
13870- addPseudoKillIfFullWrite (BB, II, false );
13829+ addPseudoKillIfFullDstWrite (BB, II);
1387113830
1387213831 // case 1: no predicate, no flagModifier (common case)
1387313832 G4_Predicate* newPred = builder.createPredicate(
0 commit comments