From b62b3953f0eb3b73bfd1446c99d24de8d1ea404c Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Sat, 9 May 2026 19:26:32 -0500 Subject: [PATCH] Mark SetMeshOutputCounts as noduplicate The SetMeshOutputCounts function can only appear once in generated DXIL, so it should not be legal for the compiler to duplicate the function through any optimization pass. In the reproduction case attached to the issue, the JumpThreading pass duplicates calls to SetMeshOutputCounts, this can be prevented by marking the HL and DXIL operations with noduplicate, which is a trivial change. Fixes #8104 --- lib/DXIL/DxilOperations.cpp | 2 +- lib/HLSL/HLOperations.cpp | 1 + .../SetMeshOutputCounts-noduplicate.hlsl | 18 ++++++ ...SetMeshOutputCounts-noduplicate-dxilgen.ll | 57 +++++++++++++++++++ ...hOutputCounts-noduplicate-jumpthreading.ll | 44 ++++++++++++++ utils/hct/hctdb.py | 2 +- 6 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/SetMeshOutputCounts-noduplicate.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-dxilgen.ll create mode 100644 tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-jumpthreading.ll diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b786fee9fc..d0d6152da0 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -1510,7 +1510,7 @@ static const OP::OpCodeProperty CoreOps_OpCodeProps[] = { "SetMeshOutputCounts", OCC::SetMeshOutputCounts, "setMeshOutputCounts", - Attribute::None, + Attribute::NoDuplicate, 0, {}, {}}, // Overloads: v diff --git a/lib/HLSL/HLOperations.cpp b/lib/HLSL/HLOperations.cpp index 2cb3c489e8..9ee0be6eca 100644 --- a/lib/HLSL/HLOperations.cpp +++ b/lib/HLSL/HLOperations.cpp @@ -531,6 +531,7 @@ static AttributeSet GetHLFunctionAttributes(LLVMContext &C, case IntrinsicOp::IOP_GroupMemoryBarrier: case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync: case IntrinsicOp::IOP_AllMemoryBarrier: + case IntrinsicOp::IOP_SetMeshOutputCounts: addAttr(Attribute::NoDuplicate); break; } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/SetMeshOutputCounts-noduplicate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/SetMeshOutputCounts-noduplicate.hlsl new file mode 100644 index 0000000000..cbe8a1757e --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/SetMeshOutputCounts-noduplicate.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc /T ms_6_5 -E main -fcgl %s | FileCheck %s + +// Verify that SetMeshOutputCounts is emitted with the noduplicate function +// attribute during AST->IR generation. This prevents passes such as +// JumpThreading from cloning the call when its arguments are computed in a +// scalar branch (see GitHub issue #8104). + +// CHECK: call void @"dx.hl.op.nd.void (i32, i32, i32)"(i32 68, +// CHECK: declare void @"dx.hl.op.nd.void (i32, i32, i32)"(i32, i32, i32) [[ATTR:#[0-9]+]] +// CHECK: attributes [[ATTR]] = { noduplicate nounwind } + +struct Payload { uint nv; uint np; }; + +[outputtopology("triangle")] +[numthreads(1, 1, 1)] +void main(in payload Payload pl) { + SetMeshOutputCounts(pl.nv, pl.np); +} diff --git a/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-dxilgen.ll b/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-dxilgen.ll new file mode 100644 index 0000000000..a179f7079b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-dxilgen.ll @@ -0,0 +1,57 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Verify that DXIL Op Lowering preserves the noduplicate attribute on the +; SetMeshOutputCounts intrinsic when generating the dx.op.setMeshOutputCounts +; DXIL operation. See GitHub issue #8104. + +; CHECK: call void @dx.op.setMeshOutputCounts(i32 168, +; CHECK: declare void @dx.op.setMeshOutputCounts(i32, i32, i32) [[ATTR:#[0-9]+]] +; CHECK: attributes [[ATTR]] = { noduplicate nounwind } + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.Payload = type { i32, i32 } + +; Function Attrs: nounwind +define void @main(%struct.Payload* %pl) #0 { + %1 = getelementptr inbounds %struct.Payload, %struct.Payload* %pl, i32 0, i32 1 + %2 = load i32, i32* %1, align 4 + %3 = getelementptr inbounds %struct.Payload, %struct.Payload* %pl, i32 0, i32 0 + %4 = load i32, i32* %3, align 4 + call void @"dx.hl.op.nd.void (i32, i32, i32)"(i32 68, i32 %4, i32 %2) + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @"dx.hl.op.nd.void (i32, i32, i32)"(i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!pauseresume = !{!0} +!dx.version = !{!1} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !8} +!dx.entryPoints = !{!13} +!dx.fnprops = !{!14} +!dx.options = !{!15, !16} + +!0 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!1 = !{i32 1, i32 5} +!2 = !{i32 1, i32 10} +!3 = !{!"ms", i32 6, i32 5} +!4 = !{i32 0, %struct.Payload undef, !5} +!5 = !{i32 8, !6, !7} +!6 = !{i32 6, !"nv", i32 3, i32 0, i32 7, i32 5} +!7 = !{i32 6, !"np", i32 3, i32 4, i32 7, i32 5} +!8 = !{i32 1, void (%struct.Payload*)* @main, !9} +!9 = !{!10, !12} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{i32 13, !11, !11} +!13 = !{void (%struct.Payload*)* @main, !"main", null, null, null} +!14 = !{void (%struct.Payload*)* @main, i32 13, i32 1, i32 1, i32 1, i32 0, i32 0, i8 2, i32 8} +!15 = !{i32 64} +!16 = !{i32 -1} diff --git a/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-jumpthreading.ll b/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-jumpthreading.ll new file mode 100644 index 0000000000..e5defa90b2 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/SetMeshOutputCounts-noduplicate-jumpthreading.ll @@ -0,0 +1,44 @@ +; RUN: %dxopt %s -jump-threading -S | FileCheck %s + +; Verify that the JumpThreading pass respects the noduplicate attribute on +; the dx.op.setMeshOutputCounts DXIL operation. Prior to GitHub issue #8104, +; SetMeshOutputCounts did not have noduplicate, and JumpThreading would +; clone the call into multiple predecessor blocks when its arguments were +; PHIs whose incoming values came from a scalar branch, causing +; "SetMeshOutputCounts cannot be called multiple times" validation +; failures. + +; The noduplicate attribute on the call site (and the declaration) must +; prevent JumpThreading from duplicating the call. + +; CHECK-LABEL: define void @test +; CHECK: call void @dx.op.setMeshOutputCounts(i32 168, +; CHECK-NOT: call void @dx.op.setMeshOutputCounts + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +define void @test(i1 %c, i32 %a, i32 %b) { +entry: + br i1 %c, label %then, label %merge + +then: + br label %merge + +merge: + %nv = phi i32 [ %a, %then ], [ 0, %entry ] + %np = phi i32 [ %b, %then ], [ 0, %entry ] + call void @dx.op.setMeshOutputCounts(i32 168, i32 %nv, i32 %np) + br i1 %c, label %t2, label %t3 + +t2: + ret void + +t3: + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.setMeshOutputCounts(i32, i32, i32) #0 + +attributes #0 = { noduplicate nounwind } diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 634b62cc6f..1403d78719 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -4156,7 +4156,7 @@ def UFI(name, **mappings): "SetMeshOutputCounts", "Mesh shader intrinsic SetMeshOutputCounts", "v", - "", + "nd", [ retvoid_param, db_dxil_param(2, "i32", "numVertices", "number of output vertices"),