@@ -39,6 +39,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3939#include " common/LLVMWarningsPop.hpp"
4040#include " Probe/Assertion.h"
4141
42+ #include < utility> // std::pair, std::make_pair
43+
4244using namespace llvm ;
4345using namespace IGC ;
4446
@@ -323,8 +325,7 @@ namespace
323325 // VP_SPLIT_SIZE is at least 8 bytes (largest element size) and
324326 // must be power of 2.
325327 VP_SPLIT_SIZE = 32 , // 32 bytes (must power of 2)
326- VP_RAW_SPLIT_SIZE = 16 ,
327- VP_MAX_VECTOR_SIZE = 128 // max vector length
328+ VP_RAW_SPLIT_SIZE = 16
328329 };
329330
330331 static char ID; // Pass identification, replacement for typeid
@@ -352,7 +353,7 @@ namespace
352353 private:
353354
354355 void getOrGenScalarValues (
355- Function& F, Value* VecVal, Value** scalars, Instruction*& availBeforeInst);
356+ Function& F, Value* VecVal, ValVector& scalars, Instruction*& availBeforeInst);
356357 void replaceAllVectorUsesWithScalars (Instruction* VI,
357358 ValVector& SVals);
358359
@@ -373,9 +374,7 @@ namespace
373374 Type* ETy,
374375 uint32_t NElts,
375376 uint32_t SplitSize,
376- Type** SVTypes,
377- uint32_t * SVCounts,
378- uint32_t & Len);
377+ SmallVector<std::pair<Type*, uint32_t >, 8 >& SplitInfo);
379378
380379 private:
381380 const DataLayout* m_DL;
@@ -519,78 +518,53 @@ void VectorPreProcess::replaceAllVectorUsesWithScalars(Instruction* VI, ValVecto
519518 }
520519}
521520
522-
523521void VectorPreProcess::createSplitVectorTypes (
524522 Type* ETy,
525523 uint32_t NElts,
526524 uint32_t SplitSize,
527- Type** SVTypes,
528- uint32_t * SVCounts,
529- uint32_t & Len)
525+ SmallVector<std::pair<Type*, uint32_t >, 8 >& SplitInfo)
530526{
531527 uint32_t ebytes = (unsigned int )ETy->getPrimitiveSizeInBits () / 8 ;
532528 if (ETy->isPointerTy ())
533529 {
534530 ebytes = m_DL->getPointerTypeSize (ETy);
535531 }
536532
533+ // todo: generalize splitting for cases whose element size is bigger than splitsize!
537534 if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
538535 {
539536 if (ebytes > SplitSize)
540537 {
541- SVCounts[ 0 ] = NElts * ebytes / SplitSize;
542- SVTypes[ 0 ] = IntegerType::get (ETy->getContext (), SplitSize * 8 );
543- Len = 1 ;
538+ uint32_t M = NElts * ebytes / SplitSize;
539+ Type* Ty = IntegerType::get (ETy->getContext (), SplitSize * 8 );
540+ SplitInfo. push_back ( std::make_pair (Ty, M)) ;
544541 return ;
545542 }
546543 }
547544
548- IGC_ASSERT ((SplitSize % ebytes) == 0 &&
549- " Internal Error: Wrong split size!" );
550-
551- // the number of elements of a new vector
552- uint32_t E = SplitSize / ebytes;
553- // number of vectors
554- uint32_t N = NElts / E;
555- // remaining number of elements.
556- uint32_t R = NElts % E;
557-
558- int j = 0 ;
559- if (N > 0 )
560- {
561- SVCounts[0 ] = N;
562- SVTypes[0 ] = VectorType::get (ETy, E);
563- ++j;
564- }
545+ // Both SplitSize and ebytes shall be a power of 2
546+ IGC_ASSERT ((SplitSize % ebytes) == 0 && " Internal Error: Wrong split size!" );
565547
566- // Sub-vectors are
567- // 1. ebytes >=4, the remaing is a single sub-vector; or
568- // 2. ebytes < 4, the remaining is splitted into
569- // one sub-vector of multiple 4xebytes, and
570- // the remaining vector of 3|2|1 elements.
571- //
572- // Note that we keep vector 3 here so that we may convert
573- // vector3 to vector4 later when special-handling vector3.
574- if (ebytes < 4 && R > 0 )
548+ uint32_t E = SplitSize / ebytes; // split size in elements
549+ uint32_t N = NElts; // the number of elements to be split
550+ while (N > 4 )
575551 {
576- N = R / 4 ;
577- R = R % 4 ;
578- if (N > 0 )
552+ uint32_t M = N / E; // the number of subvectors for split size E
553+ if (M > 0 )
579554 {
580- SVCounts[j] = 1 ;
581- SVTypes[j] = VectorType::get (ETy, 4 * N);
582- ++j;
555+ SplitInfo.push_back (std::make_pair (VectorType::get (ETy, E), M));
583556 }
557+ // The remaining elts are ones to be split for next iteration.
558+ N = N % E;
559+ E = E / 2 ; // next split size
584560 }
585561
586- // remaining sub-vector
587- if (R > 0 )
562+ if (N > 0 )
588563 {
589- SVCounts[j] = 1 ;
590- SVTypes[j] = (R == 1 ) ? ETy : VectorType::get (ETy, R );
591- ++j ;
564+ // A vector of 1|2|3|4 elements. No further splitting!
565+ Type* Ty = (N == 1 ) ? ETy : VectorType::get (ETy, N );
566+ SplitInfo. push_back ( std::make_pair (Ty, 1 )) ;
592567 }
593- Len = j;
594568}
595569
596570bool VectorPreProcess::splitStore (
@@ -602,14 +576,10 @@ bool VectorPreProcess::splitStore(
602576 Type* ETy = VTy->getElementType ();
603577 uint32_t nelts = int_cast<uint32_t >(VTy->getNumElements ());
604578
605- IGC_ASSERT (nelts <= VP_MAX_VECTOR_SIZE && " Vector length is too big!" );
606-
607- Type* tys[6 ];
608- uint32_t tycnts[6 ];
609- uint32_t len;
610- // Generate splitted loads and save them in the map
579+ // splitInfo: Keep track of all pairs of (sub-vec type, #sub-vec).
580+ SmallVector<std::pair<Type*, uint32_t >, 8 > splitInfo;
611581 bool isStoreInst = isa<StoreInst>(SI);
612-
582+ uint32_t splitSize = isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE;
613583 if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
614584 {
615585 // byte and word-aligned stores can only store a dword at a time.
@@ -623,16 +593,16 @@ bool VectorPreProcess::splitStore(
623593 m_CGCtx->m_DriverInfo .splitUnalignedVectors () ||
624594 !WI.isUniform (ASI.getInst ()))
625595 && ASI.getAlignment () < 4 ;
626- const uint32_t splitSize = needsDWordSplit ? 4 : (isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE);
627- createSplitVectorTypes (ETy, nelts, splitSize, tys, tycnts, len);
628- }
629- else
630- {
631- createSplitVectorTypes (ETy, nelts, isStoreInst ? VP_SPLIT_SIZE : VP_RAW_SPLIT_SIZE, tys, tycnts, len);
596+ if (needsDWordSplit)
597+ {
598+ splitSize = 4 ;
599+ }
632600 }
601+ createSplitVectorTypes (ETy, nelts, splitSize, splitInfo);
633602
634603 // return if no split
635- if (len == 1 && tycnts[0 ] == 1 )
604+ uint32_t len = splitInfo.size ();
605+ if (len == 1 && splitInfo[0 ].second == 1 )
636606 {
637607 return false ;
638608 }
@@ -642,19 +612,20 @@ bool VectorPreProcess::splitStore(
642612 {
643613 // Need to create splitted values.
644614 Instruction* insertBeforeInst = nullptr ;
645- Value* scalars[VP_MAX_VECTOR_SIZE] ;
615+ ValVector scalars (nelts, nullptr ) ;
646616 getOrGenScalarValues (*SI->getParent ()->getParent (),
647617 StoredVal, scalars, insertBeforeInst);
648618 insertBeforeInst = insertBeforeInst ? insertBeforeInst : SI;
649619 IRBuilder<> aBuilder (insertBeforeInst);
650620
621+ Type* Ty1 = splitInfo[0 ].first ;
651622 if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
652623 {
653- if (ETy->getPrimitiveSizeInBits () > tys[ 0 ] ->getScalarSizeInBits ())
624+ if (ETy->getPrimitiveSizeInBits () > Ty1 ->getScalarSizeInBits ())
654625 {
655626 std::vector<Value*> splitScalars;
656- const uint32_t vectorSize = (unsigned int )ETy->getPrimitiveSizeInBits () / tys[ 0 ] ->getScalarSizeInBits ();
657- Type* splitType = llvm::VectorType::get (tys[ 0 ] , vectorSize);
627+ const uint32_t vectorSize = (unsigned int )ETy->getPrimitiveSizeInBits () / Ty1 ->getScalarSizeInBits ();
628+ Type* splitType = llvm::VectorType::get (Ty1 , vectorSize);
658629 for (uint32_t i = 0 ; i < nelts; i++)
659630 {
660631 Value* splitInst = aBuilder.CreateBitCast (scalars[i], splitType);
@@ -663,7 +634,7 @@ bool VectorPreProcess::splitStore(
663634 splitScalars.push_back (aBuilder.CreateExtractElement (splitInst, j));
664635 }
665636 }
666- IGC_ASSERT (splitScalars.size () < VP_MAX_VECTOR_SIZE );
637+ scalars. resize (splitScalars.size ());
667638 for (uint32_t i = 0 ; i < splitScalars.size (); i++)
668639 {
669640 scalars[i] = splitScalars[i];
@@ -674,8 +645,10 @@ bool VectorPreProcess::splitStore(
674645 // Now generate svals
675646 for (uint32_t i = 0 , Idx = 0 ; i < len; ++i)
676647 {
677- VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
678- for (uint32_t j = 0 ; j < tycnts[i]; ++j)
648+ Type* Ty1 = splitInfo[i].first ;
649+ uint32_t len1 = splitInfo[i].second ;
650+ VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
651+ for (uint32_t j = 0 ; j < len1; ++j)
679652 {
680653 Value* subVec;
681654 if (!VTy1)
@@ -685,7 +658,7 @@ bool VectorPreProcess::splitStore(
685658 }
686659 else
687660 {
688- subVec = UndefValue::get (tys[i] );
661+ subVec = UndefValue::get (Ty1 );
689662 uint32_t n1 = int_cast<uint32_t >(VTy1->getNumElements ());
690663 for (uint32_t k = 0 ; k < n1; ++k)
691664 {
@@ -709,10 +682,12 @@ bool VectorPreProcess::splitStore(
709682
710683 for (uint32_t i = 0 , subIdx = 0 ; i < len; ++i)
711684 {
712- VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
713- for (uint32_t j = 0 ; j < tycnts[i]; ++j)
685+ Type* Ty1 = splitInfo[i].first ;
686+ uint32_t len1 = splitInfo[i].second ;
687+ VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
688+ for (uint32_t j = 0 ; j < len1; ++j)
714689 {
715- uint32_t vAlign = (uint32_t )MinAlign (Align, eOffset * EBytes);
690+ uint32_t vAlign = (uint32_t )MinAlign (Align, ( uint32_t ) eOffset * EBytes);
716691 Value* offsetAddr = ASI.CreateConstScalarGEP (svals[subIdx]->getType (), Addr, eOffset);
717692 Instruction* newST = ASI.Create (svals[subIdx], offsetAddr, vAlign, IsVolatile);
718693 eOffset += (VTy1 ? int_cast<uint32_t >(VTy1->getNumElements ()) : 1 );
@@ -757,10 +732,12 @@ bool VectorPreProcess::splitLoad(
757732 Type* ETy = VTy->getElementType ();
758733 uint32_t nelts = int_cast<uint32_t >(VTy->getNumElements ());
759734
760- Type* tys[6 ];
761- uint32_t tycnts[6 ];
762- uint32_t len;
763- // Generate splitted loads and save them in the map
735+ // Split a vector type into multiple sub-types:
736+ // 'len0' number of sub-vectors of type 'vecTy0'
737+ // 'len1' number of sub-vectors of type 'vecTy1'
738+ // ...
739+ // SplitInfo : all pairs, each of which is (sub-vector's type, #sub-vectors).
740+ SmallVector< std::pair<Type*, uint32_t >, 8 > splitInfo;
764741 uint32_t splitSize = isLdRaw ? VP_RAW_SPLIT_SIZE : VP_SPLIT_SIZE;
765742 if (IGC_IS_FLAG_ENABLED (EnableSplitUnalignedVector))
766743 {
@@ -774,11 +751,11 @@ bool VectorPreProcess::splitLoad(
774751 if ((isLdRaw || !WI.isUniform (ALI.getInst ())) && ALI.getAlignment () < 4 )
775752 splitSize = 4 ;
776753 }
777-
778- createSplitVectorTypes (ETy, nelts, splitSize, tys, tycnts, len);
754+ createSplitVectorTypes (ETy, nelts, splitSize, splitInfo);
779755
780756 // return if no split
781- if (len == 1 && tycnts[0 ] == 1 )
757+ uint32_t len = splitInfo.size ();
758+ if (len == 1 && splitInfo[0 ].second == 1 )
782759 {
783760 return false ;
784761 }
@@ -795,12 +772,14 @@ bool VectorPreProcess::splitLoad(
795772
796773 for (uint32_t i = 0 ; i < len; ++i)
797774 {
798- VectorType* VTy1 = dyn_cast<VectorType>(tys[i]);
799- for (uint32_t j = 0 ; j < tycnts[i]; ++j)
775+ Type* Ty1 = splitInfo[i].first ;
776+ uint32_t len1 = splitInfo[i].second ;
777+ VectorType* VTy1 = dyn_cast<VectorType>(Ty1);
778+ for (uint32_t j = 0 ; j < len1; ++j)
800779 {
801780 uint32_t vAlign = (uint32_t )MinAlign (Align, eOffset * EBytes);
802- Value* offsetAddr = ALI.CreateConstScalarGEP (tys[i] , Addr, eOffset);
803- Instruction* I = ALI.Create (tys[i] , offsetAddr, vAlign, IsVolatile);
781+ Value* offsetAddr = ALI.CreateConstScalarGEP (Ty1 , Addr, eOffset);
782+ Instruction* I = ALI.Create (Ty1 , offsetAddr, vAlign, IsVolatile);
804783 eOffset += (VTy1 ? int_cast<uint32_t >(VTy1->getNumElements ()) : 1 );
805784
806785 svals.push_back (I);
@@ -1081,11 +1060,11 @@ bool VectorPreProcess::splitVector3LoadStore(Instruction* Inst)
10811060}
10821061
10831062// availBeforeInst:
1084- // Used to indicate that all scalar values of VecVal are available right
1085- // before the instruction pointed to availBeforeInst.
1086- // If availBeforeInst is null, it means all scalar values are constants.
1063+ // Indicate that all scalar values of VecVal are available right before
1064+ // instruction 'availBeforeInst'. If availBeforeInst is null, it means
1065+ // all scalar values are constants.
10871066void VectorPreProcess::getOrGenScalarValues (
1088- Function& F, Value* VecVal, Value** scalars, Instruction*& availBeforeInst)
1067+ Function& F, Value* VecVal, ValVector& scalars, Instruction*& availBeforeInst)
10891068{
10901069 availBeforeInst = nullptr ;
10911070
0 commit comments