Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions lib/HLSL/HLOperationLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4338,9 +4338,20 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
Type *EltTy = Ty->getScalarType();
const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
const bool isBool = EltTy->isIntegerTy(1);
// Check for min precision types: their alloc size (from data layout padding
// like i16:32, f16:32) exceeds their primitive size. RawBufferVectorLoad
// should use the widened type (i32/f32) to match how pre-SM6.9
// RawBufferLoad handles min precision (load i32, then trunc to i16).
const bool isMinPrec = !isBool && DL.getTypeAllocSizeInBits(EltTy) >
EltTy->getPrimitiveSizeInBits();
Type *OrigEltTy = EltTy;
// Values will be loaded in memory representations.
if (isBool || (is64 && isTyped))
EltTy = Builder.getInt32Ty();
if (isBool || (is64 && isTyped) || isMinPrec) {
if (isMinPrec && EltTy->isFloatingPointTy())
EltTy = Builder.getFloatTy();
else
EltTy = Builder.getInt32Ty();
}

// Calculate load size with the scalar memory element type.
unsigned LdSize = DL.getTypeAllocSize(EltTy);
Expand Down Expand Up @@ -4454,6 +4465,16 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
retValNew = Builder.CreateICmpNE(
retValNew, Constant::getNullValue(retValNew->getType()));

// Truncate widened min precision loads back to original type.
// e.g., <3 x i32> from rawBufferVectorLoad.v3i32 -> <3 x i16>
if (isMinPrec) {
Type *TargetTy = Ty;
if (OrigEltTy->isIntegerTy())
retValNew = Builder.CreateTrunc(retValNew, TargetTy);
else
retValNew = Builder.CreateFPTrunc(retValNew, TargetTy);
}

helper.retVal->replaceAllUsesWith(retValNew);
helper.retVal = retValNew;

Expand Down Expand Up @@ -4574,6 +4595,27 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
val = Builder.CreateZExt(val, Ty);
}

// Widen min precision types to i32/f32 for RawBufferVectorStore, matching
// how pre-SM6.9 RawBufferStore handles min precision (store as i32).
if (opcode == OP::OpCode::RawBufferVectorStore) {
const DataLayout &DL =
OP->GetModule()->GetHLModule().GetModule()->getDataLayout();
if (DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits()) {
Type *WideTy = EltTy->isFloatingPointTy() ? (Type *)Builder.getFloatTy()
: (Type *)i32Ty;
Type *WideVecTy =
Ty->isVectorTy()
? (Type *)VectorType::get(WideTy, Ty->getVectorNumElements())
: WideTy;
if (EltTy->isFloatingPointTy())
val = Builder.CreateFPExt(val, WideVecTy);
else
val = Builder.CreateSExt(val, WideVecTy);
EltTy = WideTy;
Ty = WideVecTy;
}
}

// If RawBuffer store of 64-bit value, don't set alignment to 8,
// since buffer alignment isn't known to be anything over 4.
unsigned alignValue = OP->GetAllocSizeForType(EltTy);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s

// Regression test for min precision rawBufferVectorLoad/Store.
// Min precision types should use i32/f32 vector operations (not i16/f16)
// to match how pre-SM6.9 RawBufferLoad handles min precision.

RWByteAddressBuffer g_buf : register(u0);

[numthreads(1,1,1)]
void main() {
// min16int: should load as v3i32, not v3i16
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
min16int3 vi = g_buf.Load< min16int3 >(0);
// CHECK: call void @dx.op.rawBufferVectorStore.v3i32
g_buf.Store< min16int3 >(12, vi);

// min16uint: should load as v3i32, not v3i16
// CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
min16uint3 vu = g_buf.Load< min16uint3 >(24);
// CHECK: call void @dx.op.rawBufferVectorStore.v3i32
g_buf.Store< min16uint3 >(36, vu);

// min16float: should load as v3f32, not v3f16
// CHECK: call %dx.types.ResRet.v3f32 @dx.op.rawBufferVectorLoad.v3f32
// CHECK: fptrunc <3 x float> {{.*}} to <3 x half>
min16float3 vf = g_buf.Load< min16float3 >(48);
// CHECK: fpext <3 x half> {{.*}} to <3 x float>
// CHECK: call void @dx.op.rawBufferVectorStore.v3f32
g_buf.Store< min16float3 >(60, vf);

// Verify i16/f16 vector ops are NOT used.
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16
// CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16
// CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16
}
Loading