@@ -69,7 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
6969STATISTIC (NumMoveToCpy, " Number of memmoves converted to memcpy" );
7070STATISTIC (NumCpyToSet, " Number of memcpys converted to memset" );
7171STATISTIC (NumCallSlot, " Number of call slot optimizations performed" );
72- STATISTIC (NumStackMove, " Number of stack-move optimizations performed" );
7372
7473namespace {
7574
@@ -731,23 +730,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
731730 return true ;
732731 }
733732
734- // If this is a load-store pair from a stack slot to a stack slot, we
735- // might be able to perform the stack-move optimization just as we do for
736- // memcpys from an alloca to an alloca.
737- if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand ())) {
738- if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand ())) {
739- if (performStackMoveOptzn (LI, SI, DestAlloca, SrcAlloca,
740- DL.getTypeStoreSize (T), BAA)) {
741- // Avoid invalidating the iterator.
742- BBI = SI->getNextNonDebugInstruction ()->getIterator ();
743- eraseInstruction (SI);
744- eraseInstruction (LI);
745- ++NumMemCpyInstr;
746- return true ;
747- }
748- }
749- }
750-
751733 return false ;
752734}
753735
@@ -1426,217 +1408,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
14261408 return true ;
14271409}
14281410
1429- // Attempts to optimize the pattern whereby memory is copied from an alloca to
1430- // another alloca, where the two allocas don't have conflicting mod/ref. If
1431- // successful, the two allocas can be merged into one and the transfer can be
1432- // deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1433- // move operations in that language.
1434- //
1435- // Once we determine that the optimization is safe to perform, we replace all
1436- // uses of the destination alloca with the source alloca. We also "shrink wrap"
1437- // the lifetime markers of the single merged alloca to before the first use
1438- // and after the last use. Note that the "shrink wrapping" procedure is a safe
1439- // transformation only because we restrict the scope of this optimization to
1440- // allocas that aren't captured.
1441- bool MemCpyOptPass::performStackMoveOptzn (Instruction *Load, Instruction *Store,
1442- AllocaInst *DestAlloca,
1443- AllocaInst *SrcAlloca, uint64_t Size,
1444- BatchAAResults &BAA) {
1445- LLVM_DEBUG (dbgs () << " Stack Move: Attempting to optimize:\n "
1446- << *Store << " \n " );
1447-
1448- // Make sure the two allocas are in the same address space.
1449- if (SrcAlloca->getAddressSpace () != DestAlloca->getAddressSpace ()) {
1450- LLVM_DEBUG (dbgs () << " Stack Move: Address space mismatch\n " );
1451- return false ;
1452- }
1453-
1454- // 1. Check that copy is full. Calculate the static size of the allocas to be
1455- // merged, bail out if we can't.
1456- const DataLayout &DL = DestAlloca->getModule ()->getDataLayout ();
1457- std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize (DL);
1458- if (!SrcSize || SrcSize->isScalable () || Size != SrcSize->getFixedValue ()) {
1459- LLVM_DEBUG (dbgs () << " Stack Move: Source alloca size mismatch\n " );
1460- return false ;
1461- }
1462- std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize (DL);
1463- if (!DestSize || DestSize->isScalable () ||
1464- Size != DestSize->getFixedValue ()) {
1465- LLVM_DEBUG (dbgs () << " Stack Move: Destination alloca size mismatch\n " );
1466- return false ;
1467- }
1468-
1469- // 2-1. Check that src and dest are static allocas, which are not affected by
1470- // stacksave/stackrestore.
1471- if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca () ||
1472- SrcAlloca->getParent () != Load->getParent () ||
1473- SrcAlloca->getParent () != Store->getParent ())
1474- return false ;
1475-
1476- // 2-2. Check that src and dest are never captured, unescaped allocas. Also
1477- // collect lifetime markers first/last users in order to shrink wrap the
1478- // lifetimes, and instructions with noalias metadata to remove them.
1479-
1480- SmallVector<Instruction *, 4 > LifetimeMarkers;
1481- Instruction *FirstUser = nullptr , *LastUser = nullptr ;
1482- SmallSet<Instruction *, 4 > NoAliasInstrs;
1483-
1484- // Recursively track the user and check whether modified alias exist.
1485- auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1486- bool CanBeNull, CanBeFreed;
1487- return V->getPointerDereferenceableBytes (DL, CanBeNull, CanBeFreed);
1488- };
1489-
1490- auto CaptureTrackingWithModRef =
1491- [&](Instruction *AI,
1492- function_ref<bool (Instruction *)> ModRefCallback) -> bool {
1493- SmallVector<Instruction *, 8 > Worklist;
1494- Worklist.push_back (AI);
1495- unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking ();
1496- Worklist.reserve (MaxUsesToExplore);
1497- SmallSet<const Use *, 20 > Visited;
1498- while (!Worklist.empty ()) {
1499- Instruction *I = Worklist.back ();
1500- Worklist.pop_back ();
1501- for (const Use &U : I->uses ()) {
1502- if (Visited.size () >= MaxUsesToExplore) {
1503- LLVM_DEBUG (
1504- dbgs ()
1505- << " Stack Move: Exceeded max uses to see ModRef, bailing\n " );
1506- return false ;
1507- }
1508- if (!Visited.insert (&U).second )
1509- continue ;
1510- switch (DetermineUseCaptureKind (U, IsDereferenceableOrNull)) {
1511- case UseCaptureKind::MAY_CAPTURE:
1512- return false ;
1513- case UseCaptureKind::PASSTHROUGH:
1514- // Instructions cannot have non-instruction users.
1515- Worklist.push_back (cast<Instruction>(U.getUser ()));
1516- continue ;
1517- case UseCaptureKind::NO_CAPTURE: {
1518- auto *UI = cast<Instruction>(U.getUser ());
1519- if (DestAlloca->getParent () != UI->getParent ())
1520- return false ;
1521- if (!FirstUser || UI->comesBefore (FirstUser))
1522- FirstUser = UI;
1523- if (!LastUser || LastUser->comesBefore (UI))
1524- LastUser = UI;
1525- if (UI->isLifetimeStartOrEnd ()) {
1526- // We note the locations of these intrinsic calls so that we can
1527- // delete them later if the optimization succeeds, this is safe
1528- // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1529- // conceptually fill all the bytes of the alloca with an undefined
1530- // value.
1531- int64_t Size = cast<ConstantInt>(UI->getOperand (0 ))->getSExtValue ();
1532- if (Size < 0 || Size == DestSize) {
1533- LifetimeMarkers.push_back (UI);
1534- continue ;
1535- }
1536- }
1537- if (UI->hasMetadata (LLVMContext::MD_noalias))
1538- NoAliasInstrs.insert (UI);
1539- if (!ModRefCallback (UI))
1540- return false ;
1541- }
1542- }
1543- }
1544- }
1545- return true ;
1546- };
1547-
1548- // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
1549- // from the alloca to the Store.
1550- ModRefInfo DestModRef = ModRefInfo::NoModRef;
1551- MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
1552- auto DestModRefCallback = [&](Instruction *UI) -> bool {
1553- // We don't care about the store itself.
1554- if (UI == Store)
1555- return true ;
1556- ModRefInfo Res = BAA.getModRefInfo (UI, DestLoc);
1557- // FIXME: For multi-BB cases, we need to see reachability from it to
1558- // store.
1559- // Bailout if Dest may have any ModRef before Store.
1560- if (UI->comesBefore (Store) && isModOrRefSet (Res))
1561- return false ;
1562- DestModRef |= BAA.getModRefInfo (UI, DestLoc);
1563-
1564- return true ;
1565- };
1566-
1567- if (!CaptureTrackingWithModRef (DestAlloca, DestModRefCallback))
1568- return false ;
1569-
1570- // 3. Check that, from after the Load to the end of the BB,
1571- // 3-1. if the dest has any Mod, src has no Ref, and
1572- // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
1573- MemoryLocation SrcLoc (SrcAlloca, LocationSize::precise (Size));
1574-
1575- auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1576- // Any ModRef before Load doesn't matter, also Load and Store can be
1577- // ignored.
1578- if (UI->comesBefore (Load) || UI == Load || UI == Store)
1579- return true ;
1580- ModRefInfo Res = BAA.getModRefInfo (UI, SrcLoc);
1581- if ((isModSet (DestModRef) && isRefSet (Res)) ||
1582- (isRefSet (DestModRef) && isModSet (Res)))
1583- return false ;
1584-
1585- return true ;
1586- };
1587-
1588- if (!CaptureTrackingWithModRef (SrcAlloca, SrcModRefCallback))
1589- return false ;
1590-
1591- // We can do the transformation. First, align the allocas appropriately.
1592- SrcAlloca->setAlignment (
1593- std::max (SrcAlloca->getAlign (), DestAlloca->getAlign ()));
1594-
1595- // Merge the two allocas.
1596- DestAlloca->replaceAllUsesWith (SrcAlloca);
1597- eraseInstruction (DestAlloca);
1598-
1599- // Drop metadata on the source alloca.
1600- SrcAlloca->dropUnknownNonDebugMetadata ();
1601-
1602- // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists.
1603- if (!LifetimeMarkers.empty ()) {
1604- LLVMContext &C = SrcAlloca->getContext ();
1605- IRBuilder<> Builder (C);
1606-
1607- ConstantInt *AllocaSize = ConstantInt::get (Type::getInt64Ty (C), Size);
1608- // Create a new lifetime start marker before the first user of src or alloca
1609- // users.
1610- Builder.SetInsertPoint (FirstUser->getParent (), FirstUser->getIterator ());
1611- Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1612-
1613- // Create a new lifetime end marker after the last user of src or alloca
1614- // users.
1615- // FIXME: If the last user is the terminator for the bb, we can insert
1616- // lifetime.end marker to the immidiate post-dominator, but currently do
1617- // nothing.
1618- if (!LastUser->isTerminator ()) {
1619- Builder.SetInsertPoint (LastUser->getParent (), ++LastUser->getIterator ());
1620- Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1621- }
1622-
1623- // Remove all other lifetime markers.
1624- for (Instruction *I : LifetimeMarkers)
1625- eraseInstruction (I);
1626- }
1627-
1628- // As this transformation can cause memory accesses that didn't previously
1629- // alias to begin to alias one another, we remove !noalias metadata from any
1630- // uses of either alloca. This is conservative, but more precision doesn't
1631- // seem worthwhile right now.
1632- for (Instruction *I : NoAliasInstrs)
1633- I->setMetadata (LLVMContext::MD_noalias, nullptr );
1634-
1635- LLVM_DEBUG (dbgs () << " Stack Move: Performed staack-move optimization\n " );
1636- NumStackMove++;
1637- return true ;
1638- }
1639-
16401411// / Perform simplification of memcpy's. If we have memcpy A
16411412// / which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
16421413// / B to be a memcpy from X to Z (or potentially a memmove, depending on
@@ -1693,14 +1464,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
16931464 MemoryAccess *SrcClobber = MSSA->getWalker ()->getClobberingMemoryAccess (
16941465 AnyClobber, MemoryLocation::getForSource (M), BAA);
16951466
1696- // There are five possible optimizations we can do for memcpy:
1467+ // There are four possible optimizations we can do for memcpy:
16971468 // a) memcpy-memcpy xform which exposes redundance for DSE.
16981469 // b) call-memcpy xform for return slot optimization.
16991470 // c) memcpy from freshly alloca'd space or space that has just started
17001471 // its lifetime copies undefined data, and we can therefore eliminate
17011472 // the memcpy in favor of the data that was already at the destination.
17021473 // d) memcpy from a just-memset'd source can be turned into memset.
1703- // e) elimination of memcpy via stack-move optimization.
17041474 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
17051475 if (Instruction *MI = MD->getMemoryInst ()) {
17061476 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength ())) {
@@ -1719,8 +1489,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
17191489 }
17201490 }
17211491 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1722- if (processMemCpyMemCpyDependence (M, MDep, BAA))
1723- return true ;
1492+ return processMemCpyMemCpyDependence (M, MDep, BAA);
17241493 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
17251494 if (performMemCpyToMemSetOptzn (M, MDep, BAA)) {
17261495 LLVM_DEBUG (dbgs () << " Converted memcpy to memset\n " );
@@ -1739,27 +1508,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
17391508 }
17401509 }
17411510
1742- // If the transfer is from a stack slot to a stack slot, then we may be able
1743- // to perform the stack-move optimization. See the comments in
1744- // performStackMoveOptzn() for more details.
1745- auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest ());
1746- if (!DestAlloca)
1747- return false ;
1748- auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource ());
1749- if (!SrcAlloca)
1750- return false ;
1751- ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength ());
1752- if (Len == nullptr )
1753- return false ;
1754- if (performStackMoveOptzn (M, M, DestAlloca, SrcAlloca, Len->getZExtValue (),
1755- BAA)) {
1756- // Avoid invalidating the iterator.
1757- BBI = M->getNextNonDebugInstruction ()->getIterator ();
1758- eraseInstruction (M);
1759- ++NumMemCpyInstr;
1760- return true ;
1761- }
1762-
17631511 return false ;
17641512}
17651513
0 commit comments