@@ -9716,7 +9716,7 @@ void EmitPass::emitReturn(llvm::ReturnInst* inst)
97169716/// Initializes the kernel for stack call by initializing the SP and FP
97179717void EmitPass::InitializeKernelStack(Function* pKernel)
97189718{
9719- m_currShader->CreateFPAndSP ();
9719+ m_currShader->InitializeStackVariables ();
97209720 auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
97219721 auto pModuleMetadata = pCtx->getModuleMetaData();
97229722
@@ -9788,39 +9788,26 @@ void EmitPass::InitializeKernelStack(Function* pKernel)
97889788uint EmitPass::emitStackArgumentLoadOrStore(std::vector<CVariable*>& Args, bool isWrite)
97899789{
97909790 uint32_t offsetS = 0;
9791- SmallVector<std::tuple<CVariable*, uint32_t, uint32_t, uint32_t>, 8> owordBlks;
9792-
9791+ SmallVector<std::tuple<CVariable*, uint32_t, uint32_t, uint32_t>, 8> dataBlks;
97939792 for (auto Arg : Args)
97949793 {
97959794 // stack offset is always oword-aligned
97969795 offsetS = int_cast<unsigned>(llvm::alignTo(offsetS, SIZE_OWORD));
97979796
97989797 // calculate block sizes for each arg
97999798 int32_t RmnBytes = Arg->GetSize();
9800- uint32_t BlkBytes = 0;
9799+ uint32_t ArgOffset = 0;
98019800 do
98029801 {
98039802 uint32_t BlkSize = 0;
9804- if (RmnBytes >= SIZE_OWORD * 8)
9805- {
9806- BlkSize = 8 * SIZE_OWORD;
9807- }
9808- else if (RmnBytes >= SIZE_OWORD * 4)
9809- {
9810- BlkSize = 4 * SIZE_OWORD;
9811- }
9812- else if (RmnBytes >= SIZE_OWORD * 2)
98139803 {
9814- BlkSize = 2 * SIZE_OWORD;
9804+ BlkSize = getBlockMsgSize(RmnBytes, m_currShader->m_Platform->getMaxBlockMsgSize(false));
9805+ IGC_ASSERT(BlkSize % SIZE_OWORD == 0);
98159806 }
9816- else
9817- {
9818- BlkSize = SIZE_OWORD;
9819- }
9820- owordBlks.push_back(std::make_tuple(Arg, offsetS, BlkSize, BlkBytes));
9807+ dataBlks.push_back(std::make_tuple(Arg, offsetS, BlkSize, ArgOffset));
98219808
98229809 offsetS += BlkSize;
9823- BlkBytes += BlkSize;
9810+ ArgOffset += BlkSize;
98249811 RmnBytes -= BlkSize;
98259812 } while (RmnBytes > 0);
98269813 }
@@ -9829,7 +9816,6 @@ uint EmitPass::emitStackArgumentLoadOrStore(std::vector<CVariable*>& Args, bool
98299816 {
98309817 // Get current SP
98319818 CVariable* pSP = m_currShader->GetSP();
9832- bool is64BitSP = (pSP->GetSize() > 4);
98339819 if (isWrite)
98349820 {
98359821 // If storing to stack, first push SP by total store bytes
@@ -9838,62 +9824,57 @@ uint EmitPass::emitStackArgumentLoadOrStore(std::vector<CVariable*>& Args, bool
98389824 }
98399825
98409826 // Load or store each OWORD block to stack
9841- for (auto& I : owordBlks )
9827+ for (auto& I : dataBlks )
98429828 {
98439829 CVariable* Arg = std::get<0>(I);
98449830 uint32_t StackOffset = std::get<1>(I);
98459831 uint32_t BlkSize = std::get<2>(I);
98469832 uint32_t ArgOffset = std::get<3>(I);
9833+ // spOffset is a negative offset from SP
9834+ int32_t spOffset = StackOffset - offsetS;
98479835
9848- // offset for each block
9849- CVariable* pStackOffset = m_currShader->ImmToVariable(StackOffset - offsetS, ISA_TYPE_D);
9850-
9851- CVariable* pTempSP = m_currShader->GetNewVariable(pSP);
9852- emitAddPointer(pTempSP, pSP, pStackOffset);
9853-
9854- if (isWrite)
9836+ if (isWrite) // Write args to stack
98559837 {
9856- // Write oword block data to stack
9857- if (is64BitSP)
9838+ {
9839+ // SP offset for each block
9840+ CVariable* pTempSP = m_currShader->GetNewVariable(pSP);
9841+ emitAddPointer(pTempSP, pSP, m_currShader->ImmToVariable(spOffset, ISA_TYPE_D));
9842+
98589843 m_encoder->OWStoreA64(Arg, pTempSP, BlkSize, ArgOffset);
9859- else
9860- m_encoder->OWStore(Arg, ESURFACE_STATELESS, nullptr, pTempSP, BlkSize, ArgOffset);
9861- m_encoder->Push();
9844+ m_encoder->Push();
9845+ }
98629846 }
9863- else
9847+ else // Read args from stack
98649848 {
9865- // Read oword block data from stack
98669849 CVariable* LdDst = Arg;
98679850 if (Arg->GetType() == ISA_TYPE_BOOL)
98689851 {
98699852 LdDst = m_currShader->GetNewVariable(numLanes(m_currShader->m_dispatchSize), ISA_TYPE_W, EALIGN_HWORD, false, 1, CName::NONE);
98709853 }
98719854
9872- ResourceDescriptor resource;
9873- resource.m_surfaceType = ESURFACE_STATELESS;
98749855 int RmnBytes = LdDst->GetSize() - ArgOffset;
98759856 bool needRmCopy = BlkSize == SIZE_OWORD && RmnBytes > 0 && RmnBytes < SIZE_OWORD;
9876- if (!needRmCopy)
98779857 {
9878- if (is64BitSP)
9858+ // SP offset for each block
9859+ CVariable* pTempSP = m_currShader->GetNewVariable(pSP);
9860+ emitAddPointer(pTempSP, pSP, m_currShader->ImmToVariable(spOffset, ISA_TYPE_D));
9861+
9862+ if (!needRmCopy)
9863+ {
98799864 m_encoder->OWLoadA64(LdDst, pTempSP, BlkSize, ArgOffset);
9865+ m_encoder->Push();
9866+ }
98809867 else
9881- m_encoder->OWLoad(LdDst, resource, pTempSP, false, BlkSize, ArgOffset);
9882- m_encoder->Push();
9883- }
9884- else
9885- {
9886- // Reading less than one oword, read one oword, then copy
9887- uint ldDstElemSize = LdDst->GetElemSize();
9888- if (ldDstElemSize > 0)
98899868 {
9890- CVariable* pTempDst = m_currShader->GetNewVariable(SIZE_OWORD / ldDstElemSize, LdDst->GetType(), m_currShader->getGRFAlignment(), true, 1, CName::NONE);
9891- if (is64BitSP)
9869+ // Reading less than one oword, read one oword, then copy
9870+ uint ldDstElemSize = LdDst->GetElemSize();
9871+ if (ldDstElemSize > 0)
9872+ {
9873+ CVariable* pTempDst = m_currShader->GetNewVariable(SIZE_OWORD / ldDstElemSize, LdDst->GetType(), m_currShader->getGRFAlignment(), true, 1, CName::NONE);
98929874 m_encoder->OWLoadA64(pTempDst, pTempSP, SIZE_OWORD);
9893- else
9894- m_encoder->OWLoad(pTempDst, resource, pTempSP, false, SIZE_OWORD);
9895- m_encoder->Push();
9896- emitVectorCopy(LdDst, pTempDst, RmnBytes / ldDstElemSize, ArgOffset, 0);
9875+ m_encoder->Push();
9876+ emitVectorCopy(LdDst, pTempDst, RmnBytes / ldDstElemSize, ArgOffset, 0);
9877+ }
98979878 }
98989879 }
98999880 if (LdDst != Arg)
@@ -10121,7 +10102,7 @@ void EmitPass::emitStackCall(llvm::CallInst* inst)
1012110102void EmitPass::emitStackFuncEntry(Function* F)
1012210103{
1012310104 m_encoder->SetDispatchSimdSize();
10124- m_currShader->CreateFPAndSP ();
10105+ m_currShader->InitializeStackVariables ();
1012510106
1012610107 if (F->hasFnAttribute("IndirectlyCalled"))
1012710108 {
@@ -16431,7 +16412,6 @@ void EmitPass::emitPushFrameToStack(unsigned& pushSize)
1643116412 if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
1643216413 {
1643316414 // Store old FP value to current FP
16434- bool is64BitAddr = (pFP->GetSize() > 4);
1643516415 CVariable* pOldFP = m_currShader->GetPrevFP();
1643616416 // If previous FP is null (for kernel frame), we initialize it to 0
1643716417 if (pOldFP == nullptr)
@@ -16440,17 +16420,10 @@ void EmitPass::emitPushFrameToStack(unsigned& pushSize)
1644016420 m_encoder->Copy(pOldFP, m_currShader->ImmToVariable(0, ISA_TYPE_UQ));
1644116421 m_encoder->Push();
1644216422 }
16443- // Align current FP to GRF
16444- CVariable* pCurrFP = m_currShader->GetNewVariable(1, ISA_TYPE_UQ, EALIGN_GRF, true, 1, "currFP");
16445- m_encoder->Copy(pCurrFP, pFP);
16446- m_encoder->Push();
1644716423 {
16448- if (is64BitAddr)
16449- m_encoder->OWStoreA64(pOldFP, pCurrFP, SIZE_OWORD, 0);
16450- else
16451- m_encoder->OWStore(pOldFP, ESURFACE_STATELESS, nullptr, pCurrFP, SIZE_OWORD, 0);
16424+ m_encoder->OWStoreA64(pOldFP, pFP, SIZE_OWORD, 0);
16425+ m_encoder->Push();
1645216426 }
16453- m_encoder->Push();
1645416427 }
1645516428}
1645616429
0 commit comments