@@ -2048,10 +2048,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
20482048
20492049 // need extra move for dst
20502050 if (!IS_DTYPE (origDst->getType ()) || origDst->getHorzStride () != 1 ||
2051- !builder.isOpndAligned (origDst, getGRFSize () ))
2051+ !builder.isOpndAligned (origDst, 32 ))
20522052 {
20532053 // macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2054- G4_DstRegRegion* tmpDst = insertMovAfter (it, origDst, tmpType, bb, GRFALIGN );
2054+ G4_DstRegRegion* tmpDst = insertMovAfter (it, origDst, tmpType, bb);
20552055 mulInst->setDest (tmpDst);
20562056 }
20572057 }
@@ -2090,10 +2090,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
20902090 machIter = bb->insertBefore (++machIter, maclInst);
20912091
20922092 if (!IS_DTYPE (origDst->getType ()) || origDst->getHorzStride () != 1 ||
2093- !builder.isOpndAligned (origDst, getGRFSize () ))
2093+ !builder.isOpndAligned (origDst, 32 ))
20942094 {
20952095 // macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2096- G4_DstRegRegion* tmpDst = insertMovAfter (machIter, origDst, tmpType, bb, GRFALIGN );
2096+ G4_DstRegRegion* tmpDst = insertMovAfter (machIter, origDst, tmpType, bb);
20972097 maclInst->setDest (tmpDst);
20982098 }
20992099 }
@@ -2460,9 +2460,10 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
24602460// Translate MULH into
24612461// MUL acc src0 src1
24622462// MACH dst src0 src1
2463- void HWConformity::fixMULHInst (INST_LIST_ITER& i, G4_BB* bb)
2463+ bool HWConformity::fixMULHInst (INST_LIST_ITER& i, G4_BB* bb)
24642464{
24652465 G4_INST* inst = *i;
2466+ INST_LIST_ITER iter = i;
24662467 G4_ExecSize execSize = inst->getExecSize ();
24672468
24682469 int inst_opt = inst->getOption ();
@@ -2529,20 +2530,23 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25292530 execSize > 1 ? builder.getRegionStride2 () : builder.getRegionScalar (),
25302531 dst->getType ());
25312532
2533+ ++iter;
2534+
25322535 G4_INST* tmpMov = builder.createMov (execSize, dst, tmpSrc, inst->getOption (), false );
25332536 tmpMov->setPredicate (builder.duplicateOperand (inst->getPredicate ()));
25342537
2535- bb->insertAfter (i, tmpMov);
2538+ bb->insertBefore (iter, tmpMov);
2539+ // it will decrement back to mov
2540+ i = iter;
25362541
2537- // Check the new inserted mov inst
2538- i++;
2539-
2540- // Need to remove dst from uses list of mulh, and add them to movInst useList
2541- // add movInst to uselist of mulh.
2542- // Add mulh to def instruction list of movInst
2542+ /*
2543+ Need to remove dst from uses list of mulh, and add them to movInst useList
2544+ add movInst to uselist of mulh.
2545+ Add mulh to def instruction list of movInst
2546+ */
25432547 inst->transferUse (tmpMov);
25442548 inst->addDefUse (tmpMov, Opnd_src0);
2545- return ;
2549+ return true ;
25462550 }
25472551
25482552 // src1 does not support modifier
@@ -2571,6 +2575,8 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25712575 // Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Mul->Mul + Macl expanding will
25722576 // be done in expandMulPostSchedule pass.
25732577
2578+ bool newInstInserted = false ;
2579+
25742580 // sat cannot be used at all in the macro sequence
25752581 // this effectivly means sat is broken for mul D D D
25762582 inst->setSaturate (g4::NOSAT);
@@ -2589,30 +2595,32 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25892595 }
25902596
25912597 INST_LIST_ITER end_iter = i;
2592- // this mul will be expanded into mul+macl in expandMulPostSchedule pass. Since expanded macl
2593- // must be grf-aligned, so need to make mul to be grf-aligned.
2598+ // check if the ACC source is aligned to mach dst
2599+ // ToDo: this should be checked by fixAcc?
25942600 G4_DstRegRegion* dst = inst->getDst ();
25952601 if (inst->getSaturate () ||
25962602 dst->getExecTypeSize () > TypeSize (Type_D) ||
2597- isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
2598- !builder.isOpndAligned (dst, getGRFSize ()))
2603+ isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))
25992604 {
26002605 // add a tmp mov
2601- inst->setDest (insertMovAfter (i, dst, dst->getType (), bb, GRFALIGN ));
2606+ inst->setDest (insertMovAfter (i, dst, dst->getType (), bb));
26022607 end_iter++;
2608+ newInstInserted = true ;
26032609 }
26042610
26052611 if (execSize > builder.getNativeExecSize ())
26062612 {
26072613 auto start_iter = i;
2608- splitDWMULInst (start_iter, end_iter, bb);
2609- // start_iter points to the first half of mulh. Need double check this new inserted mulh to see if need split again
2610- i = start_iter;
2614+ splitDWMULInst (i, end_iter, bb);
2615+ newInstInserted = true ;
26112616 }
2612- else
2617+
2618+ if (newInstInserted)
26132619 {
2620+ // it will decrease back to mulh
26142621 i++;
26152622 }
2623+ return newInstInserted;
26162624 }
26172625 else
26182626 {
@@ -2626,7 +2634,7 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
26262634 G4_INST* newMul = builder.createBinOp (G4_mul, execSize,
26272635 acc_dst_opnd, builder.duplicateOperand (src0), builder.duplicateOperand (src1), inst_opt, false );
26282636
2629- bb->insertBefore (i , newMul);
2637+ bb->insertBefore (iter , newMul);
26302638 inst->copyDefsTo (newMul, false );
26312639
26322640 fixMulSrc1 (std::prev (i), bb);
@@ -2673,16 +2681,10 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
26732681 {
26742682 auto start_iter = std::prev (i);
26752683 splitDWMULInst (start_iter, end_iter, bb);
2676- // start_iter ponits to the first half of mul. Need to check the new inserted mul/mach instructions
2677- i = start_iter;
2678- }
2679- else
2680- {
2681- // i points to mach, and need to check the new inserted mul before mach
2682- i = std::prev (i);
2684+ i = end_iter;
26832685 }
2686+ return true ;
26842687 }
2685- return ;
26862688}
26872689
26882690//
@@ -3567,11 +3569,6 @@ void HWConformity::splitDWMULInst(INST_LIST_ITER& start, INST_LIST_ITER& end, G4
35673569 evenlySplitInst (iter, bb);
35683570 G4_INST* expand_sec_half_op = *iter;
35693571 bb->insertBefore (last_iter, expand_sec_half_op);
3570- // For the case that only one instruction needed to split, that is to say start equals to end
3571- if (start == end)
3572- {
3573- start--;
3574- }
35753572 end--;
35763573 bb->erase (iter);
35773574 }
@@ -5274,9 +5271,14 @@ void HWConformity::conformBB(G4_BB* bb)
52745271
52755272 if (inst->opcode () == G4_mulh)
52765273 {
5277- fixMULHInst (i, bb);
5278- next_iter = i;
5279- continue ;
5274+ if (fixMULHInst (i, bb))
5275+ {
5276+ // inserted mul before
5277+ // check the newly added MUL inst
5278+ i--;
5279+ next_iter = i;
5280+ continue ;
5281+ }
52805282 }
52815283
52825284#ifdef _DEBUG
@@ -7170,4 +7172,4 @@ void HWConformity::fixSrc1Region(INST_LIST_ITER it, G4_BB* bb)
71707172 G4_Operand* new_src1 = insertMovBefore (it, 1 , src1->getType (), bb);
71717173 inst->setSrc (new_src1, 1 );
71727174 }
7173- }
7175+ }
0 commit comments