diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index f5cff305322e8a..a38bca80d34c69 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -3050,7 +3050,8 @@ BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* //------------------------------------------------------------------------ // AsyncTransformation::CopyReturnValueOnResumption: // Create IR that copies the return value from the continuation object to the -// right local. +// right local. When continuations may be reused, also clears out any GC +// references in the return value from the continuation afterwards. // // Parameters: // call - The async call. @@ -3149,6 +3150,96 @@ void AsyncTransformation::CopyReturnValueOnResumption(GenTreeCall* LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, storeResult)); } + + if (ReuseContinuations()) + { + ClearReturnValueOnResumption(retInfo, resultOffset, storeResultBB); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::ClearReturnValueOnResumption: +// Create IR that clears out any GC references in the return value from the +// continuation object. This is used after the return value has been copied +// out to ensure that a reused continuation does not keep those references +// alive. +// +// Parameters: +// retInfo - Information about the return value in the continuation. +// resultOffset - Offset of the return value from the start of the continuation object. +// storeResultBB - Basic block to append IR to. +// +void AsyncTransformation::ClearReturnValueOnResumption(const ReturnInfo* retInfo, + unsigned resultOffset, + BasicBlock* storeResultBB) +{ + auto clearGCRef = [=](unsigned offset, var_types type) { + GenTree* base = m_compiler->gtNewLclvNode(m_compiler->lvaAsyncContinuationArg, TYP_REF); + GenTree* zero = m_compiler->gtNewZeroConNode(type); + GenTree* clear = StoreAtOffset(base, offset, zero, type); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, clear)); + }; + + if (retInfo->Type.ReturnType == TYP_STRUCT) + { + ClassLayout* retLayout = retInfo->Type.ReturnLayout; + unsigned gcPtrCount = retLayout->GetGCPtrCount(); + if (gcPtrCount == 0) + { + return; + } + + // Find the range of slots spanning the first to the last GC reference. A block store only + // needs to cover this range, since everything outside it is non-GC. + unsigned firstSlot = 0; + while (!retLayout->IsGCPtr(firstSlot)) + { + firstSlot++; + } + + unsigned lastSlot = retLayout->GetSlotCount() - 1; + while (!retLayout->IsGCPtr(lastSlot)) + { + lastSlot--; + } + + unsigned sliceSlotCount = lastSlot - firstSlot + 1; + + // If there are few GC references, and at most half of the slice is made up of GC references, + // then clear the individual GC pointers instead of zeroing out the slice. + // Otherwise we prefer to clear the entire slice of GC references as a TYP_STRUCT store to allow + // the backend to use SIMD instructions. + if ((gcPtrCount <= 4) && ((gcPtrCount * 2) <= sliceSlotCount)) + { + for (unsigned i = firstSlot; i <= lastSlot; i++) + { + if (retLayout->IsGCPtr(i)) + { + clearGCRef(resultOffset + (i * TARGET_POINTER_SIZE), retLayout->GetGCPtrType(i)); + } + } + } + else + { + unsigned sliceOffset = firstSlot * TARGET_POINTER_SIZE; + unsigned sliceSize = sliceSlotCount * TARGET_POINTER_SIZE; + + ClassLayout* sliceLayout = retLayout->SliceLayout(m_compiler, sliceOffset, sliceSize); + + GenTree* base = m_compiler->gtNewLclvNode(m_compiler->lvaAsyncContinuationArg, TYP_REF); + GenTree* offset = m_compiler->gtNewIconNode((ssize_t)(resultOffset + sliceOffset), TYP_I_IMPL); + GenTree* addr = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, base, offset); + GenTreeFlags indirFlags = + GTF_IND_NONFAULTING | (retInfo->HeapAlignment() < retInfo->Alignment ? GTF_IND_UNALIGNED : GTF_EMPTY); + GenTree* zero = m_compiler->gtNewIconNode(0); + GenTree* store = m_compiler->gtNewStoreValueNode(sliceLayout, addr, zero, indirFlags); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, store)); + } + } + else if (retInfo->Type.ReturnType == TYP_REF) + { + clearGCRef(resultOffset, TYP_REF); + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/async.h b/src/coreclr/jit/async.h index 6e194fef03019b..fb9abf2d8be779 100644 --- a/src/coreclr/jit/async.h +++ b/src/coreclr/jit/async.h @@ -476,6 +476,7 @@ class AsyncTransformation const CallDefinitionInfo& callDefInfo, const ContinuationLayout& layout, BasicBlock* storeResultBB); + void ClearReturnValueOnResumption(const ReturnInfo* retInfo, unsigned resultOffset, BasicBlock* storeResultBB); GenTreeIndir* LoadFromOffset(GenTree* base, unsigned offset, diff --git a/src/coreclr/jit/layout.cpp b/src/coreclr/jit/layout.cpp index ae42615b878df8..b0965462a39159 100644 --- a/src/coreclr/jit/layout.cpp +++ b/src/coreclr/jit/layout.cpp @@ -698,6 +698,58 @@ const SegmentList& ClassLayout::GetNonPadding(Compiler* comp) return *m_nonPadding; } +//------------------------------------------------------------------------ +// SliceLayout: +// Slice this class layout into the specified range. +// +// Parameters: +// compiler - The compiler instance +// offset - Start offset of the slice +// size - Size of the slice +// +// Returns: +// New layout of size 'size' +// +ClassLayout* ClassLayout::SliceLayout(Compiler* compiler, unsigned offset, unsigned size) +{ + if (offset == 0 && size == GetSize()) + { + return this; + } + + ClassLayoutBuilder builder(compiler, size); + INDEBUG(builder.SetName(compiler->printfAlloc("%s[%03u..%03u)", GetClassName(), offset, offset + size), + compiler->printfAlloc("%s[%03u..%03u)", GetShortClassName(), offset, offset + size))); + + if (((offset % TARGET_POINTER_SIZE) == 0) && ((size % TARGET_POINTER_SIZE) == 0) && HasGCPtr()) + { + for (unsigned i = 0; i < size; i += TARGET_POINTER_SIZE) + { + builder.SetGCPtrType(i / TARGET_POINTER_SIZE, GetGCPtrType((offset + i) / TARGET_POINTER_SIZE)); + } + } + else + { + assert(!HasGCPtr()); + } + + builder.AddPadding(SegmentList::Segment(0, size)); + + for (const SegmentList::Segment& nonPadding : GetNonPadding(compiler)) + { + if ((nonPadding.End <= offset) || (nonPadding.Start >= offset + size)) + { + continue; + } + + unsigned start = nonPadding.Start <= offset ? 0 : (nonPadding.Start - offset); + unsigned end = nonPadding.End >= (offset + size) ? size : (nonPadding.End - offset); + + builder.RemovePadding(SegmentList::Segment(start, end)); + } + return compiler->typGetCustomLayout(builder); +} + //------------------------------------------------------------------------ // AreCompatible: check if 2 layouts are the same for copying. // diff --git a/src/coreclr/jit/layout.h b/src/coreclr/jit/layout.h index 0c79cce85f305c..42d4ce2917e2d9 100644 --- a/src/coreclr/jit/layout.h +++ b/src/coreclr/jit/layout.h @@ -266,6 +266,8 @@ class ClassLayout const SegmentList& GetNonPadding(Compiler* comp); + ClassLayout* SliceLayout(Compiler* compiler, unsigned offset, unsigned size); + static bool AreCompatible(const ClassLayout* layout1, const ClassLayout* layout2); bool CanAssignFrom(const ClassLayout* sourceLayout); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index b122b8de775259..a48e1198b706ea 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1703,9 +1703,10 @@ void Lowering::SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* JITDUMP("Dividing split arg [%06u] with %u registers, %u stack space into two arguments\n", Compiler::dspTreeID(arg), numRegs, stackSeg.Size); - ClassLayout* registersLayout = SliceLayout(callArg->GetSignatureLayout(), 0, stackSeg.Offset); - ClassLayout* stackLayout = SliceLayout(callArg->GetSignatureLayout(), stackSeg.Offset, - callArg->GetSignatureLayout()->GetSize() - stackSeg.Offset); + ClassLayout* registersLayout = callArg->GetSignatureLayout()->SliceLayout(m_compiler, 0, stackSeg.Offset); + ClassLayout* stackLayout = + callArg->GetSignatureLayout()->SliceLayout(m_compiler, stackSeg.Offset, + callArg->GetSignatureLayout()->GetSize() - stackSeg.Offset); GenTree* stackNode = nullptr; GenTree* registersNode = nullptr; @@ -1898,54 +1899,6 @@ void Lowering::SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* DISPTREERANGE(BlockRange(), call); } -//------------------------------------------------------------------------ -// SliceLayout: -// Slice a class layout into the specified range. -// -// Parameters: -// layout - The layout -// offset - Start offset of the slice -// size - Size of the slice -// -// Returns: -// New layout of size 'size' -// -ClassLayout* Lowering::SliceLayout(ClassLayout* layout, unsigned offset, unsigned size) -{ - ClassLayoutBuilder builder(m_compiler, size); - INDEBUG( - builder.SetName(m_compiler->printfAlloc("%s[%03u..%03u)", layout->GetClassName(), offset, offset + size), - m_compiler->printfAlloc("%s[%03u..%03u)", layout->GetShortClassName(), offset, offset + size))); - - if (((offset % TARGET_POINTER_SIZE) == 0) && ((size % TARGET_POINTER_SIZE) == 0) && layout->HasGCPtr()) - { - for (unsigned i = 0; i < size; i += TARGET_POINTER_SIZE) - { - builder.SetGCPtrType(i / TARGET_POINTER_SIZE, layout->GetGCPtrType((offset + i) / TARGET_POINTER_SIZE)); - } - } - else - { - assert(!layout->HasGCPtr()); - } - - builder.AddPadding(SegmentList::Segment(0, size)); - - for (const SegmentList::Segment& nonPadding : layout->GetNonPadding(m_compiler)) - { - if ((nonPadding.End <= offset) || (nonPadding.Start >= offset + size)) - { - continue; - } - - unsigned start = nonPadding.Start <= offset ? 0 : (nonPadding.Start - offset); - unsigned end = nonPadding.End >= (offset + size) ? size : (nonPadding.End - offset); - - builder.RemovePadding(SegmentList::Segment(start, end)); - } - return m_compiler->typGetCustomLayout(builder); -} - //------------------------------------------------------------------------ // InsertBitCastIfNecessary: // Insert a bitcast if a primitive argument being passed in a register is not diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index e7c59dca277742..d20b3c39034c3a 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -217,12 +217,11 @@ class Lowering final : public Phase void LowerSpecialCopyArgs(GenTreeCall* call); void InsertSpecialCopyArg(GenTreePutArgStk* putArgStk, CORINFO_CLASS_HANDLE argType, unsigned lclNum); #endif // defined(TARGET_X86) && defined(FEATURE_IJW) - void LowerArg(GenTreeCall* call, CallArg* callArg); - void SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* callArg); - ClassLayout* SliceLayout(ClassLayout* layout, unsigned offset, unsigned size); - void InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegment& registerSegment); - void InsertPutArgReg(GenTree** node, const ABIPassingSegment& registerSegment); - void LegalizeArgPlacement(GenTreeCall* call); + void LowerArg(GenTreeCall* call, CallArg* callArg); + void SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* callArg); + void InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegment& registerSegment); + void InsertPutArgReg(GenTree** node, const ABIPassingSegment& registerSegment); + void LegalizeArgPlacement(GenTreeCall* call); void InsertPInvokeCallProlog(GenTreeCall* call); void InsertPInvokeCallEpilog(GenTreeCall* call);