diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b786fee9fc..d0d6152da0 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -1510,7 +1510,7 @@ static const OP::OpCodeProperty CoreOps_OpCodeProps[] = { "SetMeshOutputCounts", OCC::SetMeshOutputCounts, "setMeshOutputCounts", - Attribute::None, + Attribute::NoDuplicate, 0, {}, {}}, // Overloads: v diff --git a/lib/HLSL/HLOperations.cpp b/lib/HLSL/HLOperations.cpp index 2cb3c489e8..9ee0be6eca 100644 --- a/lib/HLSL/HLOperations.cpp +++ b/lib/HLSL/HLOperations.cpp @@ -531,6 +531,7 @@ static AttributeSet GetHLFunctionAttributes(LLVMContext &C, case IntrinsicOp::IOP_GroupMemoryBarrier: case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync: case IntrinsicOp::IOP_AllMemoryBarrier: + case IntrinsicOp::IOP_SetMeshOutputCounts: addAttr(Attribute::NoDuplicate); break; } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/SetMeshOutputCounts-noduplicate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/SetMeshOutputCounts-noduplicate.hlsl new file mode 100644 index 0000000000..cbe8a1757e --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/SetMeshOutputCounts-noduplicate.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc /T ms_6_5 -E main -fcgl %s | FileCheck %s + +// Verify that SetMeshOutputCounts is emitted with the noduplicate function +// attribute during AST->IR generation. This prevents passes such as +// JumpThreading from cloning the call when its arguments are computed in a +// scalar branch (see GitHub issue #8104). + +// CHECK: call void @"dx.hl.op.nd.void (i32, i32, i32)"(i32 68, +// CHECK: declare void @"dx.hl.op.nd.void (i32, i32, i32)"(i32, i32, i32) [[ATTR:#[0-9]+]] +// CHECK: attributes [[ATTR]] = { noduplicate nounwind } + +struct Payload { uint nv; uint np; }; + +[outputtopology("triangle")] +[numthreads(1, 1, 1)] +void main(in payload Payload pl) { + SetMeshOutputCounts(pl.nv, pl.np); +} diff --git a/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-dxilgen.ll b/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-dxilgen.ll new file mode 100644 index 0000000000..a179f7079b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-dxilgen.ll @@ -0,0 +1,57 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Verify that DXIL Op Lowering preserves the noduplicate attribute on the +; SetMeshOutputCounts intrinsic when generating the dx.op.setMeshOutputCounts +; DXIL operation. See GitHub issue #8104. + +; CHECK: call void @dx.op.setMeshOutputCounts(i32 168, +; CHECK: declare void @dx.op.setMeshOutputCounts(i32, i32, i32) [[ATTR:#[0-9]+]] +; CHECK: attributes [[ATTR]] = { noduplicate nounwind } + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.Payload = type { i32, i32 } + +; Function Attrs: nounwind +define void @main(%struct.Payload* %pl) #0 { + %1 = getelementptr inbounds %struct.Payload, %struct.Payload* %pl, i32 0, i32 1 + %2 = load i32, i32* %1, align 4 + %3 = getelementptr inbounds %struct.Payload, %struct.Payload* %pl, i32 0, i32 0 + %4 = load i32, i32* %3, align 4 + call void @"dx.hl.op.nd.void (i32, i32, i32)"(i32 68, i32 %4, i32 %2) + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @"dx.hl.op.nd.void (i32, i32, i32)"(i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!pauseresume = !{!0} +!dx.version = !{!1} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !8} +!dx.entryPoints = !{!13} +!dx.fnprops = !{!14} +!dx.options = !{!15, !16} + +!0 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!1 = !{i32 1, i32 5} +!2 = !{i32 1, i32 10} +!3 = !{!"ms", i32 6, i32 5} +!4 = !{i32 0, %struct.Payload undef, !5} +!5 = !{i32 8, !6, !7} +!6 = !{i32 6, !"nv", i32 3, i32 0, i32 7, i32 5} +!7 = !{i32 6, !"np", i32 3, i32 4, i32 7, i32 5} +!8 = !{i32 1, void (%struct.Payload*)* @main, !9} +!9 = !{!10, !12} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{i32 13, !11, !11} +!13 = !{void (%struct.Payload*)* @main, !"main", null, null, null} +!14 = !{void (%struct.Payload*)* @main, i32 13, i32 1, i32 1, i32 1, i32 0, i32 0, i8 2, i32 8} +!15 = !{i32 64} +!16 = !{i32 -1} diff --git a/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-jumpthreading.ll b/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-jumpthreading.ll new file mode 100644 index 0000000000..e5defa90b2 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-jumpthreading.ll @@ -0,0 +1,44 @@ +; RUN: %dxopt %s -jump-threading -S | FileCheck %s + +; Verify that the JumpThreading pass respects the noduplicate attribute on +; the dx.op.setMeshOutputCounts DXIL operation. Prior to GitHub issue #8104, +; SetMeshOutputCounts did not have noduplicate, and JumpThreading would +; clone the call into multiple predecessor blocks when its arguments were +; PHIs whose incoming values came from a scalar branch, causing +; "SetMeshOutputCounts cannot be called multiple times" validation +; failures. + +; The noduplicate attribute on the call site (and the declaration) must +; prevent JumpThreading from duplicating the call. + +; CHECK-LABEL: define void @test +; CHECK: call void @dx.op.setMeshOutputCounts(i32 168, +; CHECK-NOT: call void @dx.op.setMeshOutputCounts + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +define void @test(i1 %c, i32 %a, i32 %b) { +entry: + br i1 %c, label %then, label %merge + +then: + br label %merge + +merge: + %nv = phi i32 [ %a, %then ], [ 0, %entry ] + %np = phi i32 [ %b, %then ], [ 0, %entry ] + call void @dx.op.setMeshOutputCounts(i32 168, i32 %nv, i32 %np) + br i1 %c, label %t2, label %t3 + +t2: + ret void + +t3: + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.setMeshOutputCounts(i32, i32, i32) #0 + +attributes #0 = { noduplicate nounwind } diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 634b62cc6f..1403d78719 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -4156,7 +4156,7 @@ def UFI(name, **mappings): "SetMeshOutputCounts", "Mesh shader intrinsic SetMeshOutputCounts", "v", - "", + "nd", [ retvoid_param, db_dxil_param(2, "i32", "numVertices", "number of output vertices"),