From f8f800ba3576197feab88e280908758f02895c60 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 20 Mar 2026 17:28:08 -0700 Subject: [PATCH] [SM6.10] Add alignment argument to LinAlg Matrix ops Load/Store/AccumulateToDescriptor Fixes #8284 --- include/dxc/DXIL/DxilInstructions.h | 15 +++++-- lib/DXIL/DxilOperations.cpp | 3 ++ lib/HLSL/HLOperationLower.cpp | 10 +++-- .../matrixaccumulatetodescriptor/nominal.hlsl | 10 ++--- .../matrixloadfromdescriptor/nominal.hlsl | 10 ++--- .../matrixstoretodescriptor/nominal.hlsl | 10 ++--- .../LinAlgMatrix/linalgmatrix-as.ll | 12 +++--- .../LinAlgMatrix/linalgmatrix-cs.ll | 12 +++--- .../LinAlgMatrix/linalgmatrix-ds.ll | 12 +++--- .../LinAlgMatrix/linalgmatrix-gs.ll | 12 +++--- .../LinAlgMatrix/linalgmatrix-hs.ll | 12 +++--- .../LinAlgMatrix/linalgmatrix-ms.ll | 12 +++--- .../LinAlgMatrix/linalgmatrix-node.ll | 12 +++--- .../LinAlgMatrix/linalgmatrix-ps.ll | 12 +++--- .../LinAlgMatrix/linalgmatrix-raytracing.ll | 42 +++++++++---------- .../LinAlgMatrix/linalgmatrix-vs.ll | 12 +++--- .../linalg/builtins/matrix-builtins-ast.hlsl | 15 ++++--- .../hlsl/linalg/builtins/stage-errors.hlsl | 6 +-- .../builtins/unavailable_pre_sm610.hlsl | 6 +-- utils/hct/gen_intrin_main.txt | 8 ++-- utils/hct/hctdb.py | 3 ++ 21 files changed, 133 insertions(+), 113 deletions(-) diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 941eab6474..c840a2fc6a 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10606,7 +10606,7 @@ struct DxilInst_LinAlgMatrixLoadFromDescriptor { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -10618,6 +10618,7 @@ struct DxilInst_LinAlgMatrixLoadFromDescriptor { arg_offset = 2, arg_stride = 3, arg_layout = 4, + arg_align = 5, }; // Accessors llvm::Value *get_handle() const { return Instr->getOperand(1); } @@ -10628,6 +10629,8 @@ struct DxilInst_LinAlgMatrixLoadFromDescriptor { void set_stride(llvm::Value *val) { Instr->setOperand(3, val); } llvm::Value *get_layout() const { return Instr->getOperand(4); } void set_layout(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_align() const { return Instr->getOperand(5); } + void set_align(llvm::Value *val) { Instr->setOperand(5, val); } }; /// This instruction fills a matrix with data from a groupshared array @@ -10805,7 +10808,7 @@ struct DxilInst_LinAlgMatrixStoreToDescriptor { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (7 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -10818,6 +10821,7 @@ struct DxilInst_LinAlgMatrixStoreToDescriptor { arg_offset = 3, arg_stride = 4, arg_layout = 5, + arg_align = 6, }; // Accessors llvm::Value *get_matrix() const { return Instr->getOperand(1); } @@ -10830,6 +10834,8 @@ struct DxilInst_LinAlgMatrixStoreToDescriptor { void set_stride(llvm::Value *val) { Instr->setOperand(4, val); } llvm::Value *get_layout() const { return Instr->getOperand(5); } void set_layout(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_align() const { return Instr->getOperand(6); } + void set_align(llvm::Value *val) { Instr->setOperand(6, val); } }; /// This instruction stores a matrix to groupshared memory @@ -11042,7 +11048,7 @@ struct DxilInst_LinAlgMatrixAccumulateToDescriptor { // Validation support bool isAllowed() const { return true; } bool isArgumentListValid() const { - if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) + if (7 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; return true; } @@ -11055,6 +11061,7 @@ struct DxilInst_LinAlgMatrixAccumulateToDescriptor { arg_offset = 3, arg_stride = 4, arg_layout = 5, + arg_align = 6, }; // Accessors llvm::Value *get_matrix() const { return Instr->getOperand(1); } @@ -11067,6 +11074,8 @@ struct DxilInst_LinAlgMatrixAccumulateToDescriptor { void set_stride(llvm::Value *val) { Instr->setOperand(4, val); } llvm::Value *get_layout() const { return Instr->getOperand(5); } void set_layout(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_align() const { return Instr->getOperand(6); } + void set_align(llvm::Value *val) { Instr->setOperand(6, val); } }; /// This instruction accumulates a matrix to groupshared memory diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index ffff4eccd9..424a9a0fb7 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -6603,6 +6603,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); A(pI32); A(pI32); + A(pI32); break; case OpCode::LinAlgMatrixLoadFromMemory: A(EXT(0)); @@ -6644,6 +6645,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); A(pI32); A(pI32); + A(pI32); break; case OpCode::LinAlgMatrixStoreToMemory: A(pV); @@ -6694,6 +6696,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pI32); A(pI32); A(pI32); + A(pI32); break; case OpCode::LinAlgMatrixAccumulateToMemory: A(pV); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 6d718257d4..6cd2f83ad7 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6943,12 +6943,13 @@ Value *TranslateLinAlgMatrixAccumStoreToDescriptor( Value *Offset = CI->getArgOperand(3); Value *Stride = CI->getArgOperand(4); Value *Layout = CI->getArgOperand(5); + Value *Align = CI->getArgOperand(6); Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); Function *DxilFunc = HlslOp->GetOpFunc(OpCode, Matrix->getType()); - return Builder.CreateCall(DxilFunc, - {OpArg, Matrix, ResHandle, Offset, Stride, Layout}); + return Builder.CreateCall( + DxilFunc, {OpArg, Matrix, ResHandle, Offset, Stride, Layout, Align}); } Value *TranslateLinAlgMatVecMul(CallInst *CI, IntrinsicOp IOP, @@ -7024,12 +7025,13 @@ Value *TranslateLinAlgMatrixLoadFromDescriptor( Value *Offset = CI->getArgOperand(3); Value *Stride = CI->getArgOperand(4); Value *Layout = CI->getArgOperand(5); + Value *Align = CI->getArgOperand(6); Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); Function *DxilFunc = HlslOp->GetOpFunc(OpCode, MatrixType); - Value *Matrix = - Builder.CreateCall(DxilFunc, {OpArg, ResHandle, Offset, Stride, Layout}); + Value *Matrix = Builder.CreateCall( + DxilFunc, {OpArg, ResHandle, Offset, Stride, Layout, Align}); Builder.CreateStore(Matrix, MatrixPtr); return nullptr; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl index de0ebd0123..39de2ed2c9 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl @@ -9,11 +9,11 @@ void main() { // CHECK-LABEL: define void @main() // CHECK: call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U1S2(i32 -2147483621, - // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle %{{.*}}, i32 5, i32 5, i32 5) - // CHECK-SAME: ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle %{{.*}}, i32 5, i32 5, i32 5, i32 4) + // CHECK-SAME: ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) - // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32)" - // CHECK2-SAME: (i32 419, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 5, i32 5, i32 5) + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32, i32)" + // CHECK2-SAME: (i32 419, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 5, i32 5, i32 5, i32 4) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat; - __builtin_LinAlg_MatrixAccumulateToDescriptor(mat, outbuf, 5, 5, 5); + __builtin_LinAlg_MatrixAccumulateToDescriptor(mat, outbuf, 5, 5, 5, 4); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl index 4a7e5f30ea..a58ac98117 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl @@ -9,11 +9,11 @@ void main() { // CHECK-LABEL: define void @main() // CHECK: %{{.*}} = call %dx.types.LinAlgMatrixC1M1N1U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC1M1N1U0S0 - // CHECK-SAME: (i32 -2147483634, %dx.types.Handle %{{.*}}, i32 0, i32 0, i32 0) - // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + // CHECK-SAME: (i32 -2147483634, %dx.types.Handle %{{.*}}, i32 0, i32 0, i32 0, i32 4) + // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) - // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC1M1N1U0S0*, %dx.types.Handle, i32, i32, i32) - // CHECK2-SAME: "(i32 410, %dx.types.LinAlgMatrixC1M1N1U0S0* %mat, %dx.types.Handle {{.*}}, i32 0, i32 0, i32 0) + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC1M1N1U0S0*, %dx.types.Handle, i32, i32, i32, i32) + // CHECK2-SAME: "(i32 410, %dx.types.LinAlgMatrixC1M1N1U0S0* %mat, %dx.types.Handle {{.*}}, i32 0, i32 0, i32 0, i32 4) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 1, 1, 0, 0)]] mat; - __builtin_LinAlg_MatrixLoadFromDescriptor(mat, inbuf, 0, 0, 0); + __builtin_LinAlg_MatrixLoadFromDescriptor(mat, inbuf, 0, 0, 0, 4); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl index 049ec1fe5e..84018dce7b 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl @@ -9,11 +9,11 @@ void main() { // CHECK-LABEL: define void @main() // CHECK: call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U1S2(i32 -2147483628, - // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle %{{.*}}, i32 1, i32 1, i32 0) - // CHECK-SAME: ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle %{{.*}}, i32 1, i32 1, i32 0, i32 4) + // CHECK-SAME: ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) - // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32) - // CHECK2-SAME: "(i32 413, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 1, i32 1, i32 0) + // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32, i32) + // CHECK2-SAME: "(i32 413, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 1, i32 1, i32 0, i32 4) __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1; - __builtin_LinAlg_MatrixStoreToDescriptor(mat1, outbuf, 1, 1, 0); + __builtin_LinAlg_MatrixStoreToDescriptor(mat1, outbuf, 1, 1, 0, 4); } diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll index 8295d09ba7..8094da77a0 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-as.ll @@ -30,13 +30,13 @@ define void @mainAS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -76,7 +76,7 @@ define void @mainAS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -100,16 +100,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll index 68e4bf24a6..d01ff35ae5 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-cs.ll @@ -29,13 +29,13 @@ define void @mainCS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -75,7 +75,7 @@ define void @mainCS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -96,16 +96,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll index 6f29147319..b6b28b479a 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ds.ll @@ -45,13 +45,13 @@ define void @MainDS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -91,7 +91,7 @@ define void @MainDS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -120,16 +120,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll index a56a3d1e0b..8586c38cf1 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-gs.ll @@ -45,13 +45,13 @@ define void @MainGS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -91,7 +91,7 @@ define void @MainGS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -119,16 +119,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll index 6dcc3accfb..ddb811ba1a 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-hs.ll @@ -45,13 +45,13 @@ define void @MainHS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -91,7 +91,7 @@ define void @MainHS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -125,16 +125,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll index 9f04cb0d46..1d7cbca8a5 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ms.ll @@ -30,13 +30,13 @@ define void @mainMeS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -76,7 +76,7 @@ define void @mainMeS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -103,16 +103,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll index c77999c15c..aeb89e9b2c 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-node.ll @@ -46,13 +46,13 @@ define void @mainNS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -92,7 +92,7 @@ define void @mainNS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -113,16 +113,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll index 7b8072e0ce..4c9cc9057f 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-ps.ll @@ -44,13 +44,13 @@ define void @mainPS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -90,7 +90,7 @@ define void @mainPS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -116,16 +116,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll index 14588f4b7f..5d7243d5ba 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-raytracing.ll @@ -135,9 +135,9 @@ define void @"\01?MainRG@@YAXXZ"() #0 { ; Built-ins allowed in all stages ; %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) @@ -153,7 +153,7 @@ define void @"\01?MainRG@@YAXXZ"() #0 { %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -169,9 +169,9 @@ define void @"\01?MainIS@@YAXXZ"() #0 { ; Built-ins allowed in all stages ; %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) @@ -187,7 +187,7 @@ define void @"\01?MainIS@@YAXXZ"() #0 { %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -203,9 +203,9 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a ; Built-ins allowed in all stages ; %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) @@ -221,7 +221,7 @@ define void @"\01?MainCL@@YAXUAttribs@@@Z"(%struct.Attribs* noalias nocapture %a %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -237,9 +237,9 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal ; Built-ins allowed in all stages ; %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) @@ -255,7 +255,7 @@ define void @"\01?MainAH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -271,9 +271,9 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal ; Built-ins allowed in all stages ; %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) @@ -289,7 +289,7 @@ define void @"\01?MainCH@@YAXURayPayload@@UAttribs@@@Z"(%struct.RayPayload* noal %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -305,9 +305,9 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt ; Built-ins allowed in all stages ; %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) %v5 = call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626) ; LinAlgMatrixQueryAccumulatorLayout() %v6 = call <4 x i32> @dx.op.linAlgMatVecMul.v4i32.mC4M5N4U0S2.v4i32(i32 -2147483623, %dx.types.LinAlgMatrixC4M5N4U0S2 %v4, <4 x i32> , i32 1) ; LinAlgMatVecMul(matrix,inputVector,interpretation) @@ -323,7 +323,7 @@ define void @"\01?MainMS@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapt %v12 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483625, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8) ; LinAlgMatrixMultiply(matrixA,matrixB) %v13 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiplyAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2.mC4M5N4U2S2(i32 -2147483637, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, %dx.types.LinAlgMatrixC4M4N5U1S2 %v8, %dx.types.LinAlgMatrixC4M5N4U2S2 %v12) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC) %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) %v15 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromMemory.mC4M5N4U0S2.f32(i32 -2147483633, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout) call void @dx.op.linAlgMatrixStoreToMemory.mC4M5N4U0S2.f32(i32 -2147483627, %dx.types.LinAlgMatrixC4M5N4U0S2 %v15, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout) @@ -338,16 +338,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll index cacff8b532..1130019c50 100644 --- a/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll +++ b/tools/clang/test/LitDXILValidation/LinAlgMatrix/linalgmatrix-vs.ll @@ -45,13 +45,13 @@ define void @mainVS() { %v1 = call %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32 -2147483624, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.LinAlgMatrixC4M4N5U1S2 undef) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) ; dx.op.linAlgMatrixAccumulateToDescriptor - call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32 -2147483621, %dx.types.LinAlgMatrixC4M5N4U0S2 undef, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout) ; dx.op.linAlgMatrixLength %v2 = call i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32 -2147483632, %dx.types.LinAlgMatrixC4M5N4U0S2 undef) ; LinAlgMatrixLength(matrix) ; dx.op.linAlgMatrixLoadFromDescriptor - %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout) + %v3 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32 -2147483634, %dx.types.Handle %handle, i32 5, i32 5, i32 5, i32 4) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) ; dx.op.linAlgMatrixOuterProduct %v4 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32 -2147483619, <4 x i32> , <4 x i32> ) ; LinAlgMatrixOuterProduct(vectorA,vectorB) @@ -91,7 +91,7 @@ define void @mainVS() { %v14 = call %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixSetElement.mC4M5N4U0S2.mC4M5N4U0S2.i32(i32 -2147483629, %dx.types.LinAlgMatrixC4M5N4U0S2 %v9, i32 1, i32 1) ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value) ; dx.op.linAlgMatrixStoreToDescriptor - call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) + call void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32 -2147483628, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, %dx.types.Handle %handle, i32 1, i32 2, i32 3, i32 4) ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout) ; dx.op.linAlgMatrixAccumulateToMemory call void @dx.op.linAlgMatrixAccumulateToMemory.mC4M5N4U0S2.f32(i32 -2147483620, %dx.types.LinAlgMatrixC4M5N4U0S2 %v14, float addrspace(3)* getelementptr inbounds ([64 x float], [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA", i32 0, i32 0), i32 0, i32 0, i32 0) ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout) @@ -117,16 +117,16 @@ declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixMultiply.mC4M5N4U2S2 declare %dx.types.LinAlgMatrixC4M5N4U2S2 @dx.op.linAlgMatrixAccumulate.mC4M5N4U2S2.mC4M5N4U0S2.mC4M4N5U1S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.LinAlgMatrixC4M4N5U1S2) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixStoreToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind -declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32) #0 +declare void @dx.op.linAlgMatrixAccumulateToDescriptor.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare i32 @dx.op.linAlgMatrixLength.mC4M5N4U0S2(i32, %dx.types.LinAlgMatrixC4M5N4U0S2) #0 ; Function Attrs: nounwind -declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32) #0 +declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixLoadFromDescriptor.mC4M5N4U0S2(i32, %dx.types.Handle, i32, i32, i32, i32) #0 ; Function Attrs: nounwind declare %dx.types.LinAlgMatrixC4M5N4U0S2 @dx.op.linAlgMatrixOuterProduct.mC4M5N4U0S2.v4i32.v4i32(i32, <4 x i32>, <4 x i32>) #0 diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl index 72af30e90d..625d75f3e1 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl @@ -34,15 +34,16 @@ void main() { // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" __builtin_LinAlg_MatrixAccumulate(mat1, mat2, mat3); -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToDescriptor 'void (__builtin_LinAlgMatrix {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToDescriptor 'void (__builtin_LinAlgMatrix {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' // CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} align 'unsigned int' // CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 419 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - __builtin_LinAlg_MatrixAccumulateToDescriptor(mat1, Buf, 1, 2, 3); + __builtin_LinAlg_MatrixAccumulateToDescriptor(mat1, Buf, 1, 2, 3, 4); // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixAccumulateToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' @@ -85,15 +86,16 @@ void main() { // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" __builtin_LinAlg_MatrixLength(mat1); -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromDescriptor 'void (__builtin_LinAlgMatrix & {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromDescriptor 'void (__builtin_LinAlgMatrix & {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix &&__restrict {{.*}}' // CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} align 'unsigned int' // CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 410 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - __builtin_LinAlg_MatrixLoadFromDescriptor(mat1, Buf, 0, 0, 0); + __builtin_LinAlg_MatrixLoadFromDescriptor(mat1, Buf, 0, 0, 0, 4); // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLoadFromMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}' @@ -146,15 +148,16 @@ void main() { // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" __builtin_LinAlg_MatrixSetElement(mat2, mat1, 1, 1); -// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToDescriptor 'void (__builtin_LinAlgMatrix {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int)' extern +// CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToDescriptor 'void (__builtin_LinAlgMatrix {{.*}}, RWByteAddressBuffer, unsigned int, unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' // CHECK-NEXT: ParmVarDecl {{.*}} buf 'RWByteAddressBuffer' // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int' // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int' // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} align 'unsigned int' // CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 413 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.10 0 0 "" - __builtin_LinAlg_MatrixStoreToDescriptor(mat1, Buf, 1, 2, 3); + __builtin_LinAlg_MatrixStoreToDescriptor(mat1, Buf, 1, 2, 3, 4); // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixStoreToMemory 'void (__builtin_LinAlgMatrix {{.*}}, float const __attribute__((address_space(3))) (&)[64], unsigned int, unsigned int, unsigned int)' extern // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}' diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl index c9ebd7adf8..c5a98e1f10 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/stage-errors.hlsl @@ -55,7 +55,7 @@ void CallFunction() #endif #ifdef MATRIX_STORE_TO_DESCRIPTOR - #define DO_FUNC __builtin_LinAlg_MatrixStoreToDescriptor(mat1, buf, 1, 2, 3); + #define DO_FUNC __builtin_LinAlg_MatrixStoreToDescriptor(mat1, buf, 1, 2, 3, 4); #endif #ifdef MATRIX_LENGTH @@ -81,9 +81,9 @@ void CallFunction() // The builtins below are allowed in all stages, if they raise an error // then the test will fail with "saw unexpected diagnostic" uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout(); - __builtin_LinAlg_MatrixLoadFromDescriptor(mat1, buf, 5, 5, 5); + __builtin_LinAlg_MatrixLoadFromDescriptor(mat1, buf, 5, 5, 5, 4); __builtin_LinAlg_MatrixOuterProduct(mat1, vecA, vecB); - __builtin_LinAlg_MatrixAccumulateToDescriptor(mat1, buf, 1, 2, 3); + __builtin_LinAlg_MatrixAccumulateToDescriptor(mat1, buf, 1, 2, 3, 4); __builtin_LinAlg_MatrixVectorMultiply(vecA, mat1, vecB, 1); __builtin_LinAlg_MatrixVectorMultiplyAdd(vecA, mat1, vecB, 2, vecC, 3); diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl index b128fbbe0c..deddeab7d1 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/unavailable_pre_sm610.hlsl @@ -22,7 +22,7 @@ void main() { __builtin_LinAlg_MatrixAccumulate(mat2, mat, mat); // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixAccumulateToDescriptor potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixAccumulateToDescriptor(mat, Buf, 9, 8, 7); + __builtin_LinAlg_MatrixAccumulateToDescriptor(mat, Buf, 9, 8, 7, 4); // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixGetCoordinate potentially used by ''main'' requires shader model 6.10 or greater}} uint2 coord = __builtin_LinAlg_MatrixGetCoordinate(mat, 0); @@ -34,7 +34,7 @@ void main() { __builtin_LinAlg_MatrixLength(mat); // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixLoadFromDescriptor potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixLoadFromDescriptor(mat, Buf, 1, 1, 1); + __builtin_LinAlg_MatrixLoadFromDescriptor(mat, Buf, 1, 1, 1, 4); // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixMatrixMultiply potentially used by ''main'' requires shader model 6.10 or greater}} __builtin_LinAlg_MatrixMatrixMultiply(mat2, mat, mat); @@ -52,7 +52,7 @@ void main() { __builtin_LinAlg_MatrixSetElement(mat, mat, 1, 1); // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixStoreToDescriptor potentially used by ''main'' requires shader model 6.10 or greater}} - __builtin_LinAlg_MatrixStoreToDescriptor(mat, Buf, 1, 1, 1); + __builtin_LinAlg_MatrixStoreToDescriptor(mat, Buf, 1, 1, 1, 4); // expected-error@+1{{intrinsic __builtin_LinAlg_MatrixVectorMultiply potentially used by ''main'' requires shader model 6.10 or greater}} __builtin_LinAlg_MatrixVectorMultiply(result, mat, vec1, 1); diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 78b3b7c5eb..70efad7803 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -399,14 +399,14 @@ void [[min_sm=6.10]] __builtin_VectorAccumulate(in LinAlg InputVector, in RWB void [[min_sm=6.10]] __builtin_LinAlg_FillMatrix(out LinAlgMatrix ret, in numeric value); void [[min_sm=6.10]] __builtin_LinAlg_CopyConvertMatrix(out LinAlgMatrix ret, in LinAlgMatrix source, in bool transpose); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in ByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in ByteAddressBuffer buf, in uint offset, in uint stride, in uint layout, in uint align); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromDescriptor(out LinAlgMatrix ret, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout, in uint align); void [[min_sm=6.10]] __builtin_LinAlg_MatrixLoadFromMemory(out LinAlgMatrix ret, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixLength(in LinAlgMatrix matrix); uint<2> [[min_sm=6.10]] __builtin_LinAlg_MatrixGetCoordinate(in LinAlgMatrix matrix, in uint threadLocalIndex); void [[min_sm=6.10]] __builtin_LinAlg_MatrixGetElement(out numeric ret, in LinAlgMatrix matrix, in uint threadLocalIndex); void [[min_sm=6.10]] __builtin_LinAlg_MatrixSetElement(out LinAlgMatrix ret, in LinAlgMatrix matrix, in uint threadLocalIndex, in numeric value); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout, in uint align); void [[min_sm=6.10]] __builtin_LinAlg_MatrixStoreToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); uint [[min_sm=6.10]] __builtin_LinAlg_MatrixQueryAccumulatorLayout(); void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiply(out LinAlgMatrix matrixC, in LinAlgMatrix matrixA, in LinAlgMatrix matrixB); @@ -414,7 +414,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(out LinAlgM void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulate(out LinAlgMatrix matrixC, in LinAlgMatrix matrixLHS, in LinAlgMatrix matrixRHS); void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiply(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp); void [[min_sm=6.10]] __builtin_LinAlg_MatrixVectorMultiplyAdd(out numeric<> ret, in LinAlgMatrix mat, in numeric<> input, in uint input_interp, in numeric<> bias, in uint bias_interp); -void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout); +void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatrix matrix, in RWByteAddressBuffer buf, in uint offset, in uint stride, in uint layout, in uint align); void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric<> vecA, in numeric<> vecB); diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index e88834aa62..b55601d11b 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -6404,6 +6404,7 @@ def populate_ExperimentalOps(self): "number of bytes between the start of each row or column", ), db_dxil_param(5, "i32", "layout", "memory layout of matrix elements"), + db_dxil_param(6, "i32", "align", "alignment of matrix elements"), ], ) @@ -6505,6 +6506,7 @@ def populate_ExperimentalOps(self): "number of bytes between the start of each row or column", ), db_dxil_param(6, "i32", "layout", "memory layout of matrix elements"), + db_dxil_param(7, "i32", "align", "alignment of matrix elements"), ], ) @@ -6622,6 +6624,7 @@ def populate_ExperimentalOps(self): "number of bytes between the start of each row or column", ), db_dxil_param(6, "i32", "layout", "memory layout of matrix elements"), + db_dxil_param(7, "i32", "align", "alignment of matrix elements"), ], )