@@ -168,9 +168,8 @@ G4_SendDescLdSt::G4_SendDescLdSt(
168168 G4_Operand *surf,
169169 ImmOff _immOff,
170170 LdStAttrs _attrs)
171- : G4_SendDesc(G4_SendDesc::Kind::LDST, sfid),
171+ : G4_SendDesc(G4_SendDesc::Kind::LDST, sfid, _execSize ),
172172 op(_op),
173- execSize(_execSize),
174173 //
175174 addrType(at), addrBits(_addrBits), addrDims(_addrDims),
176175 //
@@ -185,7 +184,7 @@ G4_SendDescLdSt::G4_SendDescLdSt(
185184static size_t toExecSlots (const G4_SendDescLdSt &d)
186185{
187186 int minExecSize = 8 ;
188- int execSlots = std::max ((int )d.execSize , minExecSize);
187+ int execSlots = std::max ((int )d.getExecSize () , minExecSize);
189188 return (size_t )execSlots;
190189}
191190
@@ -416,15 +415,28 @@ G4_SendDescRaw::G4_SendDescRaw(
416415 }
417416}
418417
418+ G4_SendDescRaw::G4_SendDescRaw (
419+ SFID _sfid,
420+ uint32_t _desc,
421+ uint32_t _extDesc,
422+ int _src1Len,
423+ SendAccess access,
424+ G4_Operand* bti,
425+ bool isValidFuncCtrl)
426+ : G4_SendDescRaw(_sfid, _desc, _extDesc, _src1Len, access, bti,
427+ g4::SIMD_UNDEFINED, isValidFuncCtrl)
428+ {}
429+
419430G4_SendDescRaw::G4_SendDescRaw (
420431 SFID _sfid,
421432 uint32_t _desc,
422433 uint32_t _extDesc,
423434 int _src1Len,
424435 SendAccess access,
425436 G4_Operand *bti,
437+ G4_ExecSize execSize,
426438 bool isValidFuncCtrl)
427- : G4_SendDesc(G4_SendDesc::Kind::RAW, _sfid),
439+ : G4_SendDesc(G4_SendDesc::Kind::RAW, _sfid, execSize ),
428440 accessType(access), m_sti(nullptr ), m_bti(bti), funcCtrlValid(isValidFuncCtrl)
429441{
430442 desc.value = _desc;
@@ -586,6 +598,149 @@ bool G4_SendDescRaw::is16BitReturn() const
586598 return desc.layout .returnFormat == 1 ;
587599}
588600
601+ bool G4_SendDescRaw::isByteScatterRW () const
602+ {
603+ auto funcID = getSFID ();
604+ switch (funcID) {
605+ case SFID::DP_DC0:
606+ switch (getHdcMessageType ()) {
607+ case DC_BYTE_SCATTERED_READ:
608+ case DC_BYTE_SCATTERED_WRITE:
609+ return true ;
610+ default :
611+ break ;
612+ }
613+ break ;
614+ case SFID::DP_DC1:
615+ switch (getHdcMessageType ()) {
616+ case DC1_A64_SCATTERED_READ:
617+ case DC1_A64_SCATTERED_WRITE:
618+ return (getBlockSize () == 1 );
619+ default :
620+ break ;
621+ }
622+ break ;
623+ case SFID::DP_DC2:
624+ switch (getHdcMessageType ()) {
625+ case DC2_A64_SCATTERED_READ:
626+ case DC2_A64_SCATTERED_WRITE:
627+ return (getBlockSize () == 1 );
628+ case DC2_BYTE_SCATTERED_READ:
629+ case DC2_BYTE_SCATTERED_WRITE:
630+ return true ;
631+ default :
632+ break ;
633+ }
634+ break ;
635+ default :
636+ break ;
637+ }
638+ return false ;
639+ }
640+
641+ bool G4_SendDescRaw::isDWScatterRW () const
642+ {
643+ auto funcID = getSFID ();
644+ switch (funcID) {
645+ case SFID::DP_DC0:
646+ switch (getHdcMessageType ()) {
647+ case DC_DWORD_SCATTERED_READ:
648+ case DC_DWORD_SCATTERED_WRITE:
649+ return true ;
650+ default :
651+ break ;
652+ }
653+ break ;
654+ case SFID::DP_DC1:
655+ switch (getHdcMessageType ()) {
656+ case DC1_A64_SCATTERED_READ:
657+ case DC1_A64_SCATTERED_WRITE:
658+ return (getBlockSize () == 4 );
659+ default :
660+ break ;
661+ }
662+ break ;
663+ case SFID::DP_DC2:
664+ switch (getHdcMessageType ()) {
665+ case DC2_A64_SCATTERED_READ:
666+ case DC2_A64_SCATTERED_WRITE:
667+ return (getBlockSize () == 4 );
668+ default :
669+ break ;
670+ }
671+ break ;
672+ default :
673+ break ;
674+ }
675+ return false ;
676+ }
677+
678+ bool G4_SendDescRaw::isQWScatterRW () const
679+ {
680+ auto funcID = getSFID ();
681+ switch (funcID) {
682+ case SFID::DP_DC0:
683+ switch (getHdcMessageType ()) {
684+ default :
685+ break ;
686+ }
687+ break ;
688+ case SFID::DP_DC1:
689+ switch (getHdcMessageType ()) {
690+ case DC1_A64_SCATTERED_READ:
691+ case DC1_A64_SCATTERED_WRITE:
692+ return (getBlockSize () == 8 );
693+ default :
694+ break ;
695+ }
696+ break ;
697+ case SFID::DP_DC2:
698+ switch (getHdcMessageType ()) {
699+ case DC2_A64_SCATTERED_READ:
700+ case DC2_A64_SCATTERED_WRITE:
701+ return (getBlockSize () == 8 );
702+ default :
703+ break ;
704+ }
705+ break ;
706+ default :
707+ break ;
708+ }
709+ return false ;
710+ }
711+
712+ bool G4_SendDescRaw::isUntypedRW () const
713+ {
714+ auto funcID = getSFID ();
715+ switch (funcID) {
716+ case SFID::DP_DC1:
717+ switch (getHdcMessageType ()) {
718+ case DC1_UNTYPED_SURFACE_READ:
719+ case DC1_UNTYPED_SURFACE_WRITE:
720+ case DC1_A64_UNTYPED_SURFACE_READ:
721+ case DC1_A64_UNTYPED_SURFACE_WRITE:
722+ return true ;
723+ default :
724+ break ;
725+ }
726+ break ;
727+ case SFID::DP_DC2:
728+ switch (getHdcMessageType ()) {
729+ case DC2_UNTYPED_SURFACE_READ:
730+ case DC2_UNTYPED_SURFACE_WRITE:
731+ case DC2_A64_UNTYPED_SURFACE_READ:
732+ case DC2_A64_UNTYPED_SURFACE_WRITE:
733+ return true ;
734+ default :
735+ break ;
736+ }
737+ break ;
738+ default :
739+ break ;
740+ }
741+ return false ;
742+ }
743+
589744bool G4_SendDescRaw::isA64Message () const
590745{
591746 if (!isHDC ()) {
@@ -882,6 +1037,24 @@ size_t G4_SendDescRaw::getDstLenBytes() const
8821037 return 32 * getScratchRWSize (); // HWords
8831038 } else if (isOwordLoad ()) {
8841039 return 16 * getOwordsAccessed (); // OWords
1040+ #if 0
1041+ // Use macro fo easy testing.
1042+ } else if (isByteScatterRW()) {
1043+ uint16_t nbytes = getBlockNum();
1044+ // assume 4 at least
1045+ nbytes = (nbytes >= 4 ? nbytes : 4);
1046+ assert(getExecSize() != g4::SIMD_UNDEFINED);
1047+ return nbytes * getExecSize();
1048+ } else if (isDWScatterRW()) {
1049+ assert(getExecSize() != g4::SIMD_UNDEFINED);
1050+ return 4 * getBlockNum() * getExecSize();
1051+ } else if (isQWScatterRW()) {
1052+ assert(getExecSize() != g4::SIMD_UNDEFINED);
1053+ return 8 * getBlockNum() * getExecSize();
1054+ } else if (isUntypedRW()) {
1055+ assert(getExecSize() != g4::SIMD_UNDEFINED);
1056+ return 4 * getEnabledChannelNum() * getExecSize();
1057+ #endif
8851058 } else {
8861059 // fallback to the raw GRF count
8871060 return ResponseLength () * (size_t )getGRFSize ();
0 commit comments