@@ -92,7 +92,7 @@ int IR_Builder::translateVISAQWScatterInst(
9292 unsigned int instOpt = Get_Gen4_Emask (eMask, instExSize);
9393 bool useSplitSend = useSends ();
9494
95- PayloadSource sources[2 ]; // Maximal 2 sources, optional header + offsets
95+ PayloadSource sources[2 ]; // Maximal 2 sources, offsets + src
9696 unsigned len = 0 ;
9797
9898 sources[len].opnd = addresses;
@@ -109,7 +109,11 @@ int IR_Builder::translateVISAQWScatterInst(
109109
110110 G4_SrcRegRegion *msgs[2 ] {0 , 0 };
111111 unsigned sizes[2 ] {0 , 0 };
112- preparePayload (msgs, sizes, exSize, useSplitSend, sources, len);
112+ // For send that has smaller execsize than exSize, like
113+ // "send (4) ..."
114+ // Make sure to use send's execsize (4) as batchsize, not 8/16/32.
115+ // Thus, batchsize is min(exSize, instExSize).
116+ preparePayload (msgs, sizes, std::min (exSize, instExSize), useSplitSend, sources, len);
113117
114118 uint32_t desc = buildDescForScatter (DC_QWORD_SCATTERED_WRITE, numBlocks,
115119 execSize == EXEC_SIZE_8 ? MDC_SM2_SIMD8 : MDC_SM2_SIMD16);
@@ -1484,8 +1488,9 @@ int IR_Builder::translateVISADwordAtomicInst(
14841488 G4_SrcRegRegion *header
14851489 = createSrcRegRegion (dcl, getRegionStride1 ());
14861490 sources[len].opnd = header;
1487- sources[len].numElts = g4::SIMD8 ;
1491+ sources[len].numElts = numEltPerGRF<Type_UD>() ;
14881492 sources[len].instOpt = InstOpt_WriteEnable;
1493+ sources[len].copyExecSize = g4::SIMD8; // header has 8 DWs
14891494 ++len;
14901495 }
14911496
@@ -1510,7 +1515,7 @@ int IR_Builder::translateVISADwordAtomicInst(
15101515
15111516 G4_SrcRegRegion *msgs[2 ] = {0 , 0 };
15121517 unsigned sizes[2 ] = {0 , 0 };
1513- preparePayload (msgs, sizes, exSize, useSplitSend, sources, len);
1518+ preparePayload (msgs, sizes, std::min ( exSize, instExSize) , useSplitSend, sources, len);
15141519
15151520 SFID sfid = SFID::DP_DC1;
15161521 unsigned MD = 0 ;
@@ -1669,8 +1674,9 @@ int IR_Builder::translateVISAGather4TypedInst(
16691674 G4_SrcRegRegion *header
16701675 = createSrcRegRegion (dcl, getRegionStride1 ());
16711676 sources[len].opnd = header;
1672- sources[len].numElts = g4::SIMD8 ;
1677+ sources[len].numElts = numEltPerGRF<Type_UD>() ;
16731678 sources[len].instOpt = InstOpt_WriteEnable;
1679+ sources[len].copyExecSize = g4::SIMD8;
16741680 ++len;
16751681 }
16761682
@@ -1755,8 +1761,9 @@ int IR_Builder::translateVISAScatter4TypedInst(
17551761 G4_SrcRegRegion *header
17561762 = createSrcRegRegion (dcl, getRegionStride1 ());
17571763 sources[len].opnd = header;
1758- sources[len].numElts = g4::SIMD8 ;
1764+ sources[len].numElts = numEltPerGRF<Type_UD>() ;
17591765 sources[len].instOpt = InstOpt_WriteEnable;
1766+ sources[len].copyExecSize = g4::SIMD8;
17601767 ++len;
17611768 }
17621769
@@ -1877,7 +1884,7 @@ int IR_Builder::translateVISATypedAtomicInst(
18771884
18781885 G4_SrcRegRegion *msgs[2 ] = {0 , 0 };
18791886 unsigned sizes[2 ] = {0 , 0 };
1880- preparePayload (msgs, sizes, exSize, useSplitSend, sources, len);
1887+ preparePayload (msgs, sizes, std::min ( exSize, instExSize) , useSplitSend, sources, len);
18811888
18821889 unsigned dstLength = dst->isNullReg () ? 0 : 1 ;
18831890
@@ -2081,8 +2088,9 @@ int IR_Builder::translateGather4Inst(
20812088 G4_SrcRegRegion *header
20822089 = createSrcRegRegion (dcl, getRegionStride1 ());
20832090 sources[len].opnd = header;
2084- sources[len].numElts = g4::SIMD8 ;
2091+ sources[len].numElts = numEltPerGRF<Type_UD>() ;
20852092 sources[len].instOpt = InstOpt_WriteEnable;
2093+ sources[len].copyExecSize = g4::SIMD8;
20862094 ++len;
20872095 }
20882096
@@ -2093,7 +2101,7 @@ int IR_Builder::translateGather4Inst(
20932101
20942102 G4_SrcRegRegion *msgs[2 ] = {0 , 0 };
20952103 unsigned sizes[2 ] = {0 , 0 };
2096- preparePayload (msgs, sizes, exSize, useSplitSend, sources, len);
2104+ preparePayload (msgs, sizes, std::min ( exSize, instExSize) , useSplitSend, sources, len);
20972105
20982106 SFID sfid = SFID::DP_DC1;
20992107
@@ -2183,8 +2191,9 @@ int IR_Builder::translateScatter4Inst(
21832191 G4_SrcRegRegion *header
21842192 = createSrcRegRegion (dcl, getRegionStride1 ());
21852193 sources[len].opnd = header;
2186- sources[len].numElts = g4::SIMD8 ;
2194+ sources[len].numElts = numEltPerGRF<Type_UD>() ;
21872195 sources[len].instOpt = InstOpt_WriteEnable;
2196+ sources[len].copyExecSize = g4::SIMD8;
21882197 ++len;
21892198 }
21902199
@@ -2199,7 +2208,7 @@ int IR_Builder::translateScatter4Inst(
21992208
22002209 G4_SrcRegRegion *msgs[2 ] = {0 , 0 };
22012210 unsigned sizes[2 ] = {0 , 0 };
2202- preparePayload (msgs, sizes, exSize, useSplitSend, sources, len);
2211+ preparePayload (msgs, sizes, std::min ( exSize, instExSize) , useSplitSend, sources, len);
22032212
22042213 SFID sfid = SFID::DP_DC1;
22052214
@@ -2373,8 +2382,9 @@ int IR_Builder::translateByteGatherInst(
23732382 G4_SrcRegRegion *header
23742383 = createSrcRegRegion (dcl, getRegionStride1 ());
23752384 sources[len].opnd = header;
2376- sources[len].numElts = g4::SIMD8 ;
2385+ sources[len].numElts = numEltPerGRF<Type_UD>() ;
23772386 sources[len].instOpt = InstOpt_WriteEnable;
2387+ sources[len].copyExecSize = g4::SIMD8;
23782388 ++len;
23792389 }
23802390
@@ -2385,7 +2395,7 @@ int IR_Builder::translateByteGatherInst(
23852395
23862396 G4_SrcRegRegion *msgs[2 ] = {0 , 0 };
23872397 unsigned sizes[2 ] = {0 , 0 };
2388- preparePayload (msgs, sizes, exSize, useSplitSend, sources, len);
2398+ preparePayload (msgs, sizes, std::min ( exSize, instExSize) , useSplitSend, sources, len);
23892399
23902400 SFID sfid = SFID::DP_DC0;
23912401
@@ -2483,8 +2493,9 @@ int IR_Builder::translateByteScatterInst(
24832493 G4_SrcRegRegion *header
24842494 = createSrcRegRegion (dcl, getRegionStride1 ());
24852495 sources[len].opnd = header;
2486- sources[len].numElts = g4::SIMD8 ;
2496+ sources[len].numElts = numEltPerGRF<Type_UD>() ;
24872497 sources[len].instOpt = InstOpt_WriteEnable;
2498+ sources[len].copyExecSize = g4::SIMD8;
24882499 ++len;
24892500 }
24902501
@@ -2499,7 +2510,7 @@ int IR_Builder::translateByteScatterInst(
24992510
25002511 G4_SrcRegRegion *msgs[2 ] = {0 , 0 };
25012512 unsigned sizes[2 ] = {0 , 0 };
2502- preparePayload (msgs, sizes, exSize, useSplitSend, sources, len);
2513+ preparePayload (msgs, sizes, std::min ( exSize, instExSize) , useSplitSend, sources, len);
25032514
25042515 SFID sfid = SFID::DP_DC0;
25052516
@@ -2652,8 +2663,9 @@ int IR_Builder::translateVISASVMBlockWriteInst(
26522663 unsigned len = 0 ;
26532664
26542665 sources[len].opnd = createSrcRegRegion (dcl, getRegionStride1 ());
2655- sources[len].numElts = g4::SIMD8 ;
2666+ sources[len].numElts = numEltPerGRF<Type_UD>() ;
26562667 sources[len].instOpt = InstOpt_WriteEnable;
2668+ sources[len].copyExecSize = g4::SIMD8; // block msg header has 8 DWs
26572669 ++len;
26582670
26592671 if (src->getElemSize () < TypeSize (Type_UD))
@@ -2824,7 +2836,7 @@ int IR_Builder::translateVISASVMScatterWriteInst(
28242836
28252837 bool useSplitSend = useSends ();
28262838
2827- PayloadSource sources[2 ]; // Maximal 2 sources, optional header + offsets
2839+ PayloadSource sources[2 ]; // Maximal 2 sources, offsets + src
28282840 unsigned len = 0 ;
28292841
28302842 sources[len].opnd = addresses;
@@ -2860,7 +2872,7 @@ int IR_Builder::translateVISASVMScatterWriteInst(
28602872 (TypeSize (srcType) != 4 ))
28612873 src->setType (*this , Type_UD);
28622874
2863- preparePayload (msgs, sizes, exSize, useSplitSend, sources, len);
2875+ preparePayload (msgs, sizes, std::min ( exSize, instExSize) , useSplitSend, sources, len);
28642876
28652877 // set the type back in case we changed it for preparePayload
28662878 src->setType (*this , srcType);
0 commit comments