diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 4f22a4598d..8b55de827c 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -4338,9 +4338,20 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Type *EltTy = Ty->getScalarType(); const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy()); const bool isBool = EltTy->isIntegerTy(1); + // Check for min precision types: their alloc size (from data layout padding + // like i16:32, f16:32) exceeds their primitive size. RawBufferVectorLoad + // should use the widened type (i32/f32) to match how pre-SM6.9 + // RawBufferLoad handles min precision (load i32, then trunc to i16). + const bool isMinPrec = !isBool && DL.getTypeAllocSizeInBits(EltTy) > + EltTy->getPrimitiveSizeInBits(); + Type *OrigEltTy = EltTy; // Values will be loaded in memory representations. - if (isBool || (is64 && isTyped)) - EltTy = Builder.getInt32Ty(); + if (isBool || (is64 && isTyped) || isMinPrec) { + if (isMinPrec && EltTy->isFloatingPointTy()) + EltTy = Builder.getFloatTy(); + else + EltTy = Builder.getInt32Ty(); + } // Calculate load size with the scalar memory element type. unsigned LdSize = DL.getTypeAllocSize(EltTy); @@ -4454,6 +4465,16 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, retValNew = Builder.CreateICmpNE( retValNew, Constant::getNullValue(retValNew->getType())); + // Truncate widened min precision loads back to original type. + // e.g., <3 x i32> from rawBufferVectorLoad.v3i32 -> <3 x i16> + if (isMinPrec) { + Type *TargetTy = Ty; + if (OrigEltTy->isIntegerTy()) + retValNew = Builder.CreateTrunc(retValNew, TargetTy); + else + retValNew = Builder.CreateFPTrunc(retValNew, TargetTy); + } + helper.retVal->replaceAllUsesWith(retValNew); helper.retVal = retValNew; @@ -4574,6 +4595,27 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, val = Builder.CreateZExt(val, Ty); } + // Widen min precision types to i32/f32 for RawBufferVectorStore, matching + // how pre-SM6.9 RawBufferStore handles min precision (store as i32). + if (opcode == OP::OpCode::RawBufferVectorStore) { + const DataLayout &DL = + OP->GetModule()->GetHLModule().GetModule()->getDataLayout(); + if (DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits()) { + Type *WideTy = EltTy->isFloatingPointTy() ? (Type *)Builder.getFloatTy() + : (Type *)i32Ty; + Type *WideVecTy = + Ty->isVectorTy() + ? (Type *)VectorType::get(WideTy, Ty->getVectorNumElements()) + : WideTy; + if (EltTy->isFloatingPointTy()) + val = Builder.CreateFPExt(val, WideVecTy); + else + val = Builder.CreateSExt(val, WideVecTy); + EltTy = WideTy; + Ty = WideVecTy; + } + } + // If RawBuffer store of 64-bit value, don't set alignment to 8, // since buffer alignment isn't known to be anything over 4. unsigned alignValue = OP->GetAllocSizeForType(EltTy); diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_vector_load_store.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_vector_load_store.hlsl new file mode 100644 index 0000000000..6748357d43 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_vector_load_store.hlsl @@ -0,0 +1,36 @@ +// RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s + +// Regression test for min precision rawBufferVectorLoad/Store. +// Min precision types should use i32/f32 vector operations (not i16/f16) +// to match how pre-SM6.9 RawBufferLoad handles min precision. + +RWByteAddressBuffer g_buf : register(u0); + +[numthreads(1,1,1)] +void main() { + // min16int: should load as v3i32, not v3i16 + // CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32 + min16int3 vi = g_buf.Load< min16int3 >(0); + // CHECK: call void @dx.op.rawBufferVectorStore.v3i32 + g_buf.Store< min16int3 >(12, vi); + + // min16uint: should load as v3i32, not v3i16 + // CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32 + min16uint3 vu = g_buf.Load< min16uint3 >(24); + // CHECK: call void @dx.op.rawBufferVectorStore.v3i32 + g_buf.Store< min16uint3 >(36, vu); + + // min16float: should load as v3f32, not v3f16 + // CHECK: call %dx.types.ResRet.v3f32 @dx.op.rawBufferVectorLoad.v3f32 + // CHECK: fptrunc <3 x float> {{.*}} to <3 x half> + min16float3 vf = g_buf.Load< min16float3 >(48); + // CHECK: fpext <3 x half> {{.*}} to <3 x float> + // CHECK: call void @dx.op.rawBufferVectorStore.v3f32 + g_buf.Store< min16float3 >(60, vf); + + // Verify i16/f16 vector ops are NOT used. + // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16 + // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16 + // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16 + // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16 +}