@@ -815,13 +815,16 @@ SpillManagerGRF::isUnalignedRegion (
815815
816816 bool needs32ByteAlign = useScratchMsg_;
817817
818+ auto bytePerGRF = numEltPerGRF<Type_UB>();
818819 if (needs32ByteAlign)
819820 {
820- if (regionDisp%numEltPerGRF<Type_UB>() == 0 && regionByteSize%numEltPerGRF<Type_UB>() == 0 )
821+ if (regionDisp % bytePerGRF == 0 && regionByteSize % bytePerGRF == 0 )
822+ {
821823 return
822- regionByteSize / numEltPerGRF<Type_UB>() != 1 &&
823- regionByteSize / numEltPerGRF<Type_UB>() != 2 &&
824- regionByteSize / numEltPerGRF<Type_UB>() != 4 ;
824+ regionByteSize / bytePerGRF != 1 &&
825+ regionByteSize / bytePerGRF != 2 &&
826+ regionByteSize / bytePerGRF != 4 ;
827+ }
825828 else
826829 return true ;
827830 }
@@ -834,8 +837,8 @@ SpillManagerGRF::isUnalignedRegion (
834837 // mov (16) V91(6,0)<1>:ub %retval_ub(0,0)<1;1,0>:ub {H1, Align1}
835838 // mov (16) V91(6,16)<1>:ub %retval_ub(0,16)<1;1,0>:ub {H1, Align1}
836839 G4_RegVar* var = getRegVar (region);
837- if ((var->getDeclare ()->getByteSize () > numEltPerGRF<Type_UB>() ) &&
838- (regionByteSize < numEltPerGRF<Type_UB>() || regionDisp % numEltPerGRF<Type_UB>() ))
840+ if ((var->getDeclare ()->getByteSize () > bytePerGRF ) &&
841+ (regionByteSize < bytePerGRF || regionDisp % bytePerGRF ))
839842 {
840843 return true ;
841844 }
@@ -2512,15 +2515,13 @@ SpillManagerGRF::shouldPreloadSpillRange(
25122515 isUnalignedRegion (spilledRangeRegion, execSize) ||
25132516 instContext->isPartialWriteForSpill (!parentBB->isAllLaneActive ()))
25142517 {
2515- #if 0
2516- // special check for scalar variables: no need for pre-fill if instruction is not predicated
2517- // FIXME: need to update this if we ever decide to pack scalar variables in memory
2518- if (spilledRangeRegion->getTopDcl()->getNumElems() == 1 &&
2519- instContext->getPredicate() == nullptr)
2518+ // special check for scalar variables: no need for pre-fill if instruction writes to whole variable and is not predicated
2519+ auto spilledDcl = spilledRangeRegion->getTopDcl ()->getRootDeclare ();
2520+ if (execSize == g4::SIMD1 && getTypeSize (spilledRangeRegion->getType ()) == spilledDcl->getByteSize () && !instContext->getPredicate ())
25202521 {
2522+ // ToDo: investigate why we are spilling so many scalar variables
25212523 return false ;
25222524 }
2523- #endif
25242525 return true ;
25252526 }
25262527 // No pre-load for whole and aligned region writes
0 commit comments