@@ -10424,7 +10424,9 @@ G4_SrcRegRegion *IR_Builder::coalescePayload(
1042410424 unsigned payloadAlignment,
1042510425 uint32_t payloadWidth, // number of elements for one payload in the send.
1042610426 uint32_t srcSize, // number of elements provided by src
10427- std::initializer_list<G4_SrcRegRegion *> srcs)
10427+ std::initializer_list<G4_SrcRegRegion *> srcs,
10428+ VISA_EMask_Ctrl emask // the send's emask
10429+ )
1042810430{
1042910431 MUST_BE_TRUE (sourceAlignment != 0 && payloadAlignment != 0 ,
1043010432 " alignment mustn't be 0" );
@@ -10494,11 +10496,11 @@ G4_SrcRegRegion *IR_Builder::coalescePayload(
1049410496 auto copyRegion =
1049510497 [&] (G4_Type type) {
1049610498 uint32_t numMoves = std::max (1u , totalSize / (2 * getGRFSize ()));
10499+ auto moveMask = emask;
1049710500 unsigned MAX_SIMD = std::min (srcSize, getNativeExecSize () * (laneSize == 8 ? 1 : 2 ));
1049810501 for (unsigned i = 0 ; i < numMoves; i++) {
1049910502 auto rowOffset = i * 2 ;
10500- unsigned int instOpt =
10501- Get_Gen4_Emask (vISA_EMASK_M1_NM, MAX_SIMD);
10503+ unsigned int instOpt = Get_Gen4_Emask (moveMask, MAX_SIMD);
1050210504 G4_DstRegRegion* dstRegion =
1050310505 createDst (
1050410506 payloadDeclUD->getRegVar (),
@@ -10512,6 +10514,7 @@ G4_SrcRegRegion *IR_Builder::coalescePayload(
1051210514 type);
1051310515 createMov (MAX_SIMD,
1051410516 dstRegion, srcRegion, instOpt, true );
10517+ moveMask = Get_Next_EMask (moveMask, MAX_SIMD);
1051510518 }
1051610519 };
1051710520
0 commit comments