Skip to content

Commit 2ea3686

Browse files
performance: Make memory resident before cpu access
Related-To: NEO-13403 Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
1 parent f3a7278 commit 2ea3686

File tree

3 files changed

+72
-19
lines changed

3 files changed

+72
-19
lines changed

shared/source/os_interface/linux/drm_memory_manager.cpp

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,15 +1631,7 @@ void *DrmMemoryManager::lockResourceImpl(GraphicsAllocation &graphicsAllocation)
16311631
return cpuPtr;
16321632
}
16331633

1634-
auto rootDeviceIndex = graphicsAllocation.getRootDeviceIndex();
1635-
auto ioctlHelper = this->getDrm(rootDeviceIndex).getIoctlHelper();
1636-
1637-
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
1638-
auto memoryOperationsInterface = static_cast<DrmMemoryOperationsHandler *>(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get());
1639-
auto graphicsAllocationPtr = &graphicsAllocation;
1640-
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&graphicsAllocationPtr, 1), false, false, true) == MemoryOperationsStatus::success;
1641-
DEBUG_BREAK_IF(!ret);
1642-
}
1634+
makeAllocationResidentIfNeeded(&graphicsAllocation);
16431635

16441636
auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
16451637
if (graphicsAllocation.getAllocationType() == AllocationType::writeCombined) {
@@ -1698,6 +1690,16 @@ Drm &DrmMemoryManager::getDrm(uint32_t rootDeviceIndex) const {
16981690
return *this->executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->getDriverModel()->as<Drm>();
16991691
}
17001692

1693+
void DrmMemoryManager::makeAllocationResidentIfNeeded(GraphicsAllocation *allocation) {
1694+
auto rootDeviceIndex = allocation->getRootDeviceIndex();
1695+
auto ioctlHelper = this->getDrm(rootDeviceIndex).getIoctlHelper();
1696+
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
1697+
auto memoryOperationsInterface = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get();
1698+
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&allocation, 1), false, false, true) == MemoryOperationsStatus::success;
1699+
DEBUG_BREAK_IF(!ret);
1700+
}
1701+
}
1702+
17011703
uint32_t DrmMemoryManager::getRootDeviceIndex(const Drm *drm) {
17021704
auto rootDeviceCount = this->executionEnvironment.rootDeviceEnvironments.size();
17031705

@@ -2676,12 +2678,7 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
26762678
auto canonizedGpuAddress = gmmHelper->canonize(bo->peekAddress());
26772679
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, 1u /*num gmms*/, allocationData.type, bo.get(), nullptr, canonizedGpuAddress, alignedSize, memoryPool);
26782680

2679-
if (ioctlHelper->makeResidentBeforeLockNeeded()) {
2680-
auto memoryOperationsInterface = static_cast<DrmMemoryOperationsHandler *>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->memoryOperationsInterface.get());
2681-
GraphicsAllocation *allocationPtr = allocation.get();
2682-
[[maybe_unused]] auto ret = memoryOperationsInterface->makeResidentWithinOsContext(getDefaultOsContext(allocationData.rootDeviceIndex), ArrayRef<NEO::GraphicsAllocation *>(&allocationPtr, 1), false, false, true) == MemoryOperationsStatus::success;
2683-
DEBUG_BREAK_IF(!ret);
2684-
}
2681+
makeAllocationResidentIfNeeded(allocation.get());
26852682

26862683
[[maybe_unused]] auto retPtr = ioctlHelper->mmapFunction(*this, cpuPointer, alignedSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
26872684
DEBUG_BREAK_IF(retPtr != cpuPointer);
@@ -2826,6 +2823,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
28262823
auto alignSize = alignUp(remainingSize, MemoryConstants::pageSize64k);
28272824
auto remainingMemoryBanks = allocationData.storageInfo.memoryBanks;
28282825
auto numHandles = GraphicsAllocation::getNumHandlesForKmdSharedAllocation(allocationData.storageInfo.getNumBanks());
2826+
bool makeResidentBeforeLock = ioctlHelper->makeResidentBeforeLockNeeded();
28292827

28302828
bool useChunking = false;
28312829
uint32_t numOfChunks = 0;
@@ -2879,6 +2877,12 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
28792877
return nullptr;
28802878
}
28812879

2880+
if (makeResidentBeforeLock) {
2881+
bo->requireImmediateBinding(true);
2882+
[[maybe_unused]] auto ret = bo->bind(getDefaultOsContext(allocationData.rootDeviceIndex), 0, false);
2883+
DEBUG_BREAK_IF(ret != 0);
2884+
}
2885+
28822886
ioctlHelper->mmapFunction(*this, currentAddress, currentSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
28832887

28842888
bo->setAddress(castToUint64(currentAddress));
@@ -2902,6 +2906,12 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
29022906
allocation->storageInfo = allocationData.storageInfo;
29032907
allocation->storageInfo.isChunked = useChunking;
29042908
allocation->storageInfo.numOfChunks = numOfChunks;
2909+
2910+
if (makeResidentBeforeLock) {
2911+
auto osContext = getDefaultOsContext(allocationData.rootDeviceIndex);
2912+
allocation->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, osContext->getContextId());
2913+
}
2914+
29052915
if (!allocation->setCacheRegion(&drm, static_cast<CacheRegion>(allocationData.cacheRegion))) {
29062916
ioctlHelper->munmapFunction(*this, cpuBasePointer, totalSizeToAlloc);
29072917
for (auto bo : bos) {
@@ -3039,9 +3049,6 @@ DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandl
30393049
return nullptr;
30403050
}
30413051

3042-
[[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
3043-
DEBUG_BREAK_IF(retPtr != cpuPointer);
3044-
30453052
AllocationData allocationData = {};
30463053
allocationData.rootDeviceIndex = properties.rootDeviceIndex;
30473054
allocationData.size = size;
@@ -3058,7 +3065,14 @@ DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandl
30583065

30593066
pushSharedBufferObject(bo);
30603067

3061-
auto drmAllocation = std::make_unique<DrmAllocation>(properties.rootDeviceIndex, 1u /*num gmms*/, properties.allocationType, bo, cpuPointer, bo->peekAddress(), bo->peekSize(), memoryPool);
3068+
auto drmAllocation = std::make_unique<DrmAllocation>(properties.rootDeviceIndex, 1u /*num gmms*/, properties.allocationType, bo, nullptr, bo->peekAddress(), bo->peekSize(), memoryPool);
3069+
3070+
makeAllocationResidentIfNeeded(drmAllocation.get());
3071+
3072+
[[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
3073+
DEBUG_BREAK_IF(retPtr != cpuPointer);
3074+
3075+
drmAllocation->setCpuPtrAndGpuAddress(cpuPointer, bo->peekAddress());
30623076
drmAllocation->setMmapPtr(cpuPointer);
30633077
drmAllocation->setMmapSize(size);
30643078
drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(cpuPointer), size);

shared/source/os_interface/linux/drm_memory_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class DrmMemoryManager : public MemoryManager {
158158
void emitPinningRequest(BufferObject *bo, const AllocationData &allocationData) const;
159159
uint32_t getDefaultDrmContextId(uint32_t rootDeviceIndex) const;
160160
OsContextLinux *getDefaultOsContext(uint32_t rootDeviceIndex) const;
161+
void makeAllocationResidentIfNeeded(GraphicsAllocation *allocation);
161162

162163
StorageInfo createStorageInfoFromProperties(const AllocationProperties &properties) override;
163164
GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override;

shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8540,6 +8540,44 @@ HWTEST_TEMPLATED_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmWhenRetrieveMmap
85408540
}
85418541
}
85428542

8543+
HWTEST_TEMPLATED_F(DrmMemoryManagerWithLocalMemoryTest, givenMakeResidentBeforeLockNeededWhenCreateSharedUnifiedMemoryAllocationThenRequireImmediateBindingIsSetAndBindIsCalled) {
8544+
mock->ioctlExpected.gemWait = 1;
8545+
mock->ioctlExpected.gemClose = 1;
8546+
mock->ioctlExpected.gemCreateExt = 1;
8547+
mock->ioctlExpected.gemMmapOffset = 1;
8548+
8549+
auto mockIoctlHelper = new MockIoctlHelper(*mock);
8550+
mockIoctlHelper->makeResidentBeforeLockNeededResult = true;
8551+
mockIoctlHelper->callBaseVmAdviseAtomicAttribute = false;
8552+
mockIoctlHelper->vmAdviseAtomicAttribute = std::nullopt;
8553+
8554+
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(rootDeviceIndex));
8555+
8556+
std::vector<MemoryRegion> regionInfo(1);
8557+
regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0};
8558+
drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm));
8559+
drm.ioctlHelper.reset(mockIoctlHelper);
8560+
8561+
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.reset(
8562+
new DrmMemoryOperationsHandlerBind(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get(), 0));
8563+
8564+
AllocationData allocationData{};
8565+
allocationData.size = MemoryConstants::pageSize64k;
8566+
allocationData.rootDeviceIndex = rootDeviceIndex;
8567+
allocationData.type = AllocationType::unifiedSharedMemory;
8568+
allocationData.storageInfo.subDeviceBitfield = 0x1;
8569+
allocationData.alignment = MemoryConstants::pageSize;
8570+
allocationData.useMmapObject = true;
8571+
8572+
auto sharedUSM = memoryManager->createSharedUnifiedMemoryAllocation(allocationData);
8573+
ASSERT_NE(nullptr, sharedUSM);
8574+
8575+
auto osContext = device->getDefaultEngine().osContext;
8576+
EXPECT_TRUE(sharedUSM->isAlwaysResident(osContext->getContextId()));
8577+
8578+
memoryManager->freeGraphicsMemory(sharedUSM);
8579+
}
8580+
85438581
HWTEST_TEMPLATED_F(DrmMemoryManagerTest, givenDrmWhenRetrieveMmapOffsetForBufferObjectIsCalledForSystemMemoryThenApplyCorrectFlags) {
85448582
mock->ioctlExpected.gemMmapOffset = 8;
85458583
BufferObject bo(rootDeviceIndex, mock, 3, 1, 1024, 0);

0 commit comments

Comments
 (0)