@@ -12417,59 +12417,78 @@ void Optimizer::newDoNoMaskWA()
1241712417 return flagVar;
1241812418 };
1241912419
12420- auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
12420+ // Check if condMod or dst is full write. If so, add pseudo kill for it.
12421+ auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
1242112422 {
1242212423 // Only NoMask Inst without predicate will call this function!
12424+ // isCondMod = true: check condMod
12425+ // = false: check dst
1242312426 G4_INST* I = *aII;
12424- G4_DstRegRegion* aDst = I->getDst();
12425- if (!aDst || aDst->isNullReg() ||
12426- I->getImplAccSrc() != nullptr || I->isSend() ||
12427- !aDst->getBase()->isRegVar() || aDst->getBase()->asRegVar()->getPhyReg())
12427+ if (I->getImplAccSrc() != nullptr || I->isSend())
1242812428 {
1242912429 return;
1243012430 }
12431+ G4_CondMod* condMod = I->getCondMod();
12432+ G4_DstRegRegion* dst = I->getDst();
12433+ if (isCondMod)
12434+ {
12435+ if (!condMod || !condMod->getBase()->isRegVar() ||
12436+ condMod->getBase()->asRegVar()->getPhyReg())
12437+ {
12438+ return;
12439+ }
12440+ }
12441+ else
12442+ {
12443+ if (!dst || dst->isNullReg() ||
12444+ !dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
12445+ {
12446+ return;
12447+ }
12448+ }
1243112449
12450+ G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
1243212451 // Make sure dst var is not used in this inst.
1243312452 {
1243412453 G4_Operand* src0_0 = I->getSrc(0);
1243512454 G4_Operand* src0_1 = I->getSrc(1);
1243612455 G4_Operand* src0_2 = I->getSrc(2);
1243712456 G4_Operand* src0_3 = I->getSrc(3);
1243812457
12439- if ((src0_0 && src0_0->compareOperand(aDst ) != Rel_disjoint) ||
12440- (src0_1 && src0_1->compareOperand(aDst ) != Rel_disjoint) ||
12441- (src0_2 && src0_2->compareOperand(aDst ) != Rel_disjoint) ||
12442- (src0_3 && src0_3->compareOperand(aDst ) != Rel_disjoint))
12458+ if ((src0_0 && src0_0->compareOperand(D ) != Rel_disjoint) ||
12459+ (src0_1 && src0_1->compareOperand(D ) != Rel_disjoint) ||
12460+ (src0_2 && src0_2->compareOperand(D ) != Rel_disjoint) ||
12461+ (src0_3 && src0_3->compareOperand(D ) != Rel_disjoint))
1244312462 {
1244412463 return;
1244512464 }
1244612465 }
1244712466
1244812467 bool needKill = false;
12449- const G4_Declare* decl = ((const G4_RegVar*)aDst ->getBase())->getDeclare();
12468+ const G4_Declare* decl = ((const G4_RegVar*)D ->getBase())->getDeclare();
1245012469 const G4_Declare* primaryDcl = decl->getRootDeclare();
1245112470
12452- if (aDst ->isFlag())
12471+ if (D ->isFlag() || isCondMod )
1245312472 {
1245412473 // Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
1245512474 // For example, "mov (1|M0) P3:uw 0"
12456- needKill = (aDst ->getRightBound() - aDst ->getLeftBound() + 1) >=
12457- aDst ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
12475+ needKill = (D ->getRightBound() - D ->getLeftBound() + 1) >=
12476+ D ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
1245812477 }
1245912478 else
1246012479 {
1246112480 if (decl->getAliasOffset() != 0 ||
12462- aDst ->getRegAccess() != Direct ||
12463- aDst ->getRegOff() != 0 ||
12464- aDst ->getSubRegOff() != 0 ||
12465- aDst ->getHorzStride() != 1 ||
12481+ dst ->getRegAccess() != Direct ||
12482+ dst ->getRegOff() != 0 ||
12483+ dst ->getSubRegOff() != 0 ||
12484+ dst ->getHorzStride() != 1 ||
1246612485 I->isPartialWrite())
1246712486 {
1246812487 return;
1246912488 }
1247012489 if (fg.isPseudoDcl(primaryDcl) ||
1247112490 primaryDcl->getRegVar()->isRegVarTransient() ||
12472- ((aDst ->getTypeSize() * I->getExecSize()) ==
12491+ ((dst ->getTypeSize() * I->getExecSize()) ==
1247312492 (primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
1247412493 {
1247512494 needKill = true;
@@ -12592,7 +12611,7 @@ void Optimizer::newDoNoMaskWA()
1259212611 assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
1259312612
1259412613 // add pseudoKill
12595- addPseudoKillIfFullDstWrite (aBB, aII);
12614+ addPseudoKillIfFullWrite (aBB, aII, false); // dst
1259612615
1259712616 // Create a temp that's big enough to hold data and possible gap
1259812617 // b/w data due to alignment/hw restriction.
@@ -12683,8 +12702,9 @@ void Optimizer::newDoNoMaskWA()
1268312702 return;
1268412703 }
1268512704
12686- // Add pseudo kill for dst
12687- addPseudoKillIfFullDstWrite(aBB, aII);
12705+ // Add pseudo kill for dst and condMod
12706+ addPseudoKillIfFullWrite(aBB, aII, false); // dst
12707+ addPseudoKillIfFullWrite(aBB, aII, true); // condMod
1268812708
1268912709 const bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
1269012710 G4_Declare* modDcl = P->getTopDcl();
@@ -13013,7 +13033,7 @@ void Optimizer::newDoNoMaskWA()
1301313033 if (!condmod && !pred)
1301413034 {
1301513035 // Add pseudo Kill
13016- addPseudoKillIfFullDstWrite (BB, II);
13036+ addPseudoKillIfFullWrite (BB, II, false); // dst
1301713037
1301813038 // case 1: no predicate, no flagModifier (common case)
1301913039 G4_Predicate* newPred = builder.createPredicate(
@@ -13213,58 +13233,78 @@ void Optimizer::doNoMaskWA()
1321313233 return flagVar;
1321413234 };
1321513235
13216- auto addPseudoKillIfFullDstWrite = [&](G4_BB* aBB, INST_LIST_ITER aII)
13236+ // Check if condMod or dst is full write. If so, add pseudo kill for it.
13237+ auto addPseudoKillIfFullWrite = [&](G4_BB* aBB, INST_LIST_ITER aII, bool isCondMod)
1321713238 {
1321813239 // Only NoMask Inst without predicate will call this function!
13240+ // isCondMod = true: check condMod
13241+ // = false: check dst
1321913242 G4_INST* I = *aII;
13220- G4_DstRegRegion* aDst = I->getDst();
13221- if (!aDst || aDst->isNullReg() ||
13222- I->getImplAccSrc() != nullptr || I->isSend() ||
13223- aDst->getBase()->asRegVar()->getPhyReg())
13243+ if (I->getImplAccSrc() != nullptr || I->isSend())
1322413244 {
1322513245 return;
1322613246 }
13247+ G4_CondMod* condMod = I->getCondMod();
13248+ G4_DstRegRegion* dst = I->getDst();
13249+ if (isCondMod)
13250+ {
13251+ if (!condMod || !condMod->getBase()->isRegVar() ||
13252+ condMod->getBase()->asRegVar()->getPhyReg())
13253+ {
13254+ return;
13255+ }
13256+ }
13257+ else
13258+ {
13259+ if (!dst || dst->isNullReg() ||
13260+ !dst->getBase()->isRegVar() || dst->getBase()->asRegVar()->getPhyReg())
13261+ {
13262+ return;
13263+ }
13264+ }
1322713265
13266+ G4_Operand* D = isCondMod ? condMod : (G4_Operand*)dst;
1322813267 // Make sure dst var is not used in this inst.
1322913268 {
1323013269 G4_Operand* src0_0 = I->getSrc(0);
1323113270 G4_Operand* src0_1 = I->getSrc(1);
1323213271 G4_Operand* src0_2 = I->getSrc(2);
1323313272 G4_Operand* src0_3 = I->getSrc(3);
1323413273
13235- if ((src0_0 && src0_0->compareOperand(aDst ) != Rel_disjoint) ||
13236- (src0_1 && src0_1->compareOperand(aDst ) != Rel_disjoint) ||
13237- (src0_2 && src0_2->compareOperand(aDst ) != Rel_disjoint) ||
13238- (src0_3 && src0_3->compareOperand(aDst ) != Rel_disjoint))
13274+ if ((src0_0 && src0_0->compareOperand(D ) != Rel_disjoint) ||
13275+ (src0_1 && src0_1->compareOperand(D ) != Rel_disjoint) ||
13276+ (src0_2 && src0_2->compareOperand(D ) != Rel_disjoint) ||
13277+ (src0_3 && src0_3->compareOperand(D ) != Rel_disjoint))
1323913278 {
1324013279 return;
1324113280 }
1324213281 }
1324313282
1324413283 bool needKill = false;
13245- const G4_Declare* decl = ((const G4_RegVar*)aDst ->getBase())->getDeclare();
13284+ const G4_Declare* decl = ((const G4_RegVar*)D ->getBase())->getDeclare();
1324613285 const G4_Declare* primaryDcl = decl->getRootDeclare();
13247- if (aDst->isFlag())
13286+
13287+ if (D->isFlag() || isCondMod)
1324813288 {
1324913289 // Using >= instead of = as dcl may be 8bits, but flag dst could be 16 bits
1325013290 // For example, "mov (1|M0) P3:uw 0"
13251- needKill = (aDst ->getRightBound() - aDst ->getLeftBound() + 1) >=
13252- aDst ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
13291+ needKill = (D ->getRightBound() - D ->getLeftBound() + 1) >=
13292+ D ->getBase()->asRegVar()->getDeclare()->getNumberFlagElements();
1325313293 }
1325413294 else
1325513295 {
1325613296 if (decl->getAliasOffset() != 0 ||
13257- aDst ->getRegAccess() != Direct ||
13258- aDst ->getRegOff() != 0 ||
13259- aDst ->getSubRegOff() != 0 ||
13260- aDst ->getHorzStride() != 1 ||
13297+ dst ->getRegAccess() != Direct ||
13298+ dst ->getRegOff() != 0 ||
13299+ dst ->getSubRegOff() != 0 ||
13300+ dst ->getHorzStride() != 1 ||
1326113301 I->isPartialWrite())
1326213302 {
1326313303 return;
1326413304 }
1326513305 if (fg.isPseudoDcl(primaryDcl) ||
1326613306 primaryDcl->getRegVar()->isRegVarTransient() ||
13267- ((aDst ->getTypeSize() * I->getExecSize()) ==
13307+ ((dst ->getTypeSize() * I->getExecSize()) ==
1326813308 (primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())))
1326913309 {
1327013310 needKill = true;
@@ -13367,7 +13407,7 @@ void Optimizer::doNoMaskWA()
1336713407 assert((dst && !dst->isNullReg()) && "ICE: expect dst to be non-null!");
1336813408
1336913409 // add pseudoKill
13370- addPseudoKillIfFullDstWrite (currBB, currII);
13410+ addPseudoKillIfFullWrite (currBB, currII, false); // dst
1337113411
1337213412 // Create a temp that's big enough to hold data and possible gap
1337313413 // b/w data due to alignment/hw restriction.
@@ -13457,7 +13497,8 @@ void Optimizer::doNoMaskWA()
1345713497 }
1345813498
1345913499 // Add pseudo kill for dst
13460- addPseudoKillIfFullDstWrite(currBB, currII);
13500+ addPseudoKillIfFullWrite(currBB, currII, false); // dst
13501+ addPseudoKillIfFullWrite(currBB, currII, true); // condMod
1346113502
1346213503 bool condModGlb = fg.globalOpndHT.isOpndGlobal(P);
1346313504 G4_Declare* modDcl = P->getTopDcl();
@@ -13728,7 +13769,7 @@ void Optimizer::doNoMaskWA()
1372813769 if (!condmod && !pred)
1372913770 {
1373013771 // Add pseudo Kill
13731- addPseudoKillIfFullDstWrite (BB, II);
13772+ addPseudoKillIfFullWrite (BB, II, false); // dst
1373213773
1373313774 // case 1: no predicate, no flagModifier (common case)
1373413775 G4_Predicate* newPred = builder.createPredicate(
@@ -13826,7 +13867,7 @@ void Optimizer::doNoMaskWA()
1382613867 if (!condmod && !pred)
1382713868 {
1382813869 // Add pseudo Kill
13829- addPseudoKillIfFullDstWrite (BB, II);
13870+ addPseudoKillIfFullWrite (BB, II, false );
1383013871
1383113872 // case 1: no predicate, no flagModifier (common case)
1383213873 G4_Predicate* newPred = builder.createPredicate(
0 commit comments