Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/DXIL.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3096,8 +3096,8 @@ ID Name Description
2147483676 LinAlgMatrixAccumulateToMemory accumulates a matrix to groupshared memory
2147483677 LinAlgMatrixOuterProduct Outer products an M sized vector and a N sized vector producing an MxN matrix
2147483678 LinAlgConvert Convert vector components from one interpretation to another
2147483679 ReservedE0 reserved
2147483680 ReservedE1 reserved
2147483679 LinAlgVectorAccumulateToDescriptor Accumulates given vector to the buffer at the given offset
2147483680 ReservedE0 reserved
2147483681 DebugBreak triggers a breakpoint if a debugger is attached
2147483682 IsDebuggerPresent returns true if a debugger is attached
========== ======================================== ===================================================================================================================
Expand Down
17 changes: 11 additions & 6 deletions include/dxc/DXIL/DxilConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,7 @@ static const OpCodeTableID TableID = OpCodeTableID::ExperimentalOps;
// Enumeration for ExperimentalOps DXIL operations
enum class OpCode : unsigned {
//
ReservedE0 = 31, // reserved
ReservedE1 = 32, // reserved
ReservedE0 = 32, // reserved

// Debugging
DebugBreak = 33, // triggers a breakpoint if a debugger is attached
Expand Down Expand Up @@ -597,6 +596,8 @@ enum class OpCode : unsigned {
LinAlgMatrixStoreToDescriptor =
20, // stores a matrix to a RWByteAddressBuffer
LinAlgMatrixStoreToMemory = 21, // stores a matrix to groupshared memory
LinAlgVectorAccumulateToDescriptor =
31, // Accumulates given vector to the buffer at the given offset

// No-op
ExperimentalNop = 0, // nop does nothing
Expand Down Expand Up @@ -1355,10 +1356,13 @@ enum class OpCode : unsigned {
// LinAlgConvert = 0x8000001E, 2147483678U, -2147483618
EXP_OPCODE(ExperimentalOps, LinAlgConvert), // Convert vector components from
// one interpretation to another
// ReservedE0 = 0x8000001F, 2147483679U, -2147483617
// LinAlgVectorAccumulateToDescriptor = 0x8000001F, 2147483679U, -2147483617
EXP_OPCODE(
ExperimentalOps,
LinAlgVectorAccumulateToDescriptor), // Accumulates given vector to the
// buffer at the given offset
// ReservedE0 = 0x80000020, 2147483680U, -2147483616
EXP_OPCODE(ExperimentalOps, ReservedE0), // reserved
// ReservedE1 = 0x80000020, 2147483680U, -2147483616
EXP_OPCODE(ExperimentalOps, ReservedE1), // reserved
// DebugBreak = 0x80000021, 2147483681U, -2147483615
EXP_OPCODE(ExperimentalOps,
DebugBreak), // triggers a breakpoint if a debugger is attached
Expand Down Expand Up @@ -1544,6 +1548,7 @@ enum class OpCodeClass : unsigned {
LinAlgMatrixSetElement,
LinAlgMatrixStoreToDescriptor,
LinAlgMatrixStoreToMemory,
LinAlgVectorAccumulateToDescriptor,

// Mesh shader instructions
EmitIndices,
Expand Down Expand Up @@ -1730,7 +1735,7 @@ enum class OpCodeClass : unsigned {
NodeOutputIsValid,
OutputComplete,

NumOpClasses = 222, // exclusive last value of enumeration
NumOpClasses = 223, // exclusive last value of enumeration
};
// OPCODECLASS-ENUM:END

Expand Down
34 changes: 34 additions & 0 deletions include/dxc/DXIL/DxilInstructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -10960,6 +10960,40 @@ struct DxilInst_LinAlgConvert {
void set_outputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); }
};

/// This instruction Accumulates given vector to the buffer at the given offset
struct DxilInst_LinAlgVectorAccumulateToDescriptor {
llvm::Instruction *Instr;
// Construction and identification
DxilInst_LinAlgVectorAccumulateToDescriptor(llvm::Instruction *pInstr)
: Instr(pInstr) {}
operator bool() const {
return hlsl::OP::IsDxilOpFuncCallInst(
Instr, hlsl::OP::OpCode::LinAlgVectorAccumulateToDescriptor);
}
// Validation support
bool isAllowed() const { return true; }
bool isArgumentListValid() const {
if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
return false;
return true;
}
// Metadata
bool requiresUniformInputs() const { return false; }
// Operand indexes
enum OperandIdx {
arg_inputVector = 1,
arg_handle = 2,
arg_offset = 3,
};
// Accessors
llvm::Value *get_inputVector() const { return Instr->getOperand(1); }
void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); }
llvm::Value *get_handle() const { return Instr->getOperand(2); }
void set_handle(llvm::Value *val) { Instr->setOperand(2, val); }
llvm::Value *get_offset() const { return Instr->getOperand(3); }
void set_offset(llvm::Value *val) { Instr->setOperand(3, val); }
};

/// This instruction triggers a breakpoint if a debugger is attached
struct DxilInst_DebugBreak {
llvm::Instruction *Instr;
Expand Down
3 changes: 2 additions & 1 deletion include/dxc/HlslIntrinsicOp.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ enum class IntrinsicOp {
IOP___builtin_LinAlg_MatrixStoreToMemory = 410,
IOP___builtin_LinAlg_MatrixVectorMultiply = 418,
IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 419,
IOP___builtin_LinAlg_VectorAccumulateToDescriptor = 423,
IOP_abort = 102,
IOP_abs = 103,
IOP_acos = 104,
Expand Down Expand Up @@ -429,7 +430,7 @@ enum class IntrinsicOp {
IOP_usign = 355,
MOP_InterlockedUMax = 356,
MOP_InterlockedUMin = 357,
Num_Intrinsics = 423,
Num_Intrinsics = 424,
};
inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
switch (opcode) {
Expand Down
34 changes: 19 additions & 15 deletions lib/DXIL/DxilOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2984,6 +2984,14 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = {
2,
{{0x400}, {0x400}},
{{0xe7}, {0xe7}}}, // Overloads: <hfdwil,<hfdwil
{OC::LinAlgVectorAccumulateToDescriptor,
"LinAlgVectorAccumulateToDescriptor",
OCC::LinAlgVectorAccumulateToDescriptor,
"linAlgVectorAccumulateToDescriptor",
Attribute::None,
1,
{{0x400}},
{{0xe7}}}, // Overloads: <hfwidl

{OC::ReservedE0,
"ReservedE0",
Expand All @@ -2993,14 +3001,6 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = {
0,
{},
{}}, // Overloads: v
{OC::ReservedE1,
"ReservedE1",
OCC::Reserved,
"reserved",
Attribute::None,
0,
{},
{}}, // Overloads: v

// Debugging
{OC::DebugBreak,
Expand Down Expand Up @@ -3956,11 +3956,12 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
// LinAlgMatVecMulAdd=2147483674,
// LinAlgMatrixAccumulateToDescriptor=2147483675,
// LinAlgMatrixOuterProduct=2147483677, LinAlgConvert=2147483678,
// DebugBreak=2147483681, IsDebuggerPresent=2147483682
// LinAlgVectorAccumulateToDescriptor=2147483679, DebugBreak=2147483681,
// IsDebuggerPresent=2147483682
if (op == 2147483648 || (2147483652 <= op && op <= 2147483653) ||
(2147483656 <= op && op <= 2147483657) || op == 2147483662 ||
op == 2147483670 || (2147483673 <= op && op <= 2147483675) ||
(2147483677 <= op && op <= 2147483678) ||
(2147483677 <= op && op <= 2147483679) ||
(2147483681 <= op && op <= 2147483682)) {
major = 6;
minor = 10;
Expand Down Expand Up @@ -6683,13 +6684,16 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
A(pI32);
A(pI32);
break;

//
case OpCode::ReservedE0:
case OpCode::LinAlgVectorAccumulateToDescriptor:
A(pV);
A(pI32);
A(pETy);
A(pRes);
A(pI32);
break;
case OpCode::ReservedE1:

//
case OpCode::ReservedE0:
A(pV);
A(pI32);
break;
Expand Down Expand Up @@ -6882,6 +6886,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
case OpCode::LinAlgMatrixGetCoordinate:
case OpCode::LinAlgMatrixStoreToDescriptor:
case OpCode::LinAlgMatrixAccumulateToDescriptor:
case OpCode::LinAlgVectorAccumulateToDescriptor:
if (FT->getNumParams() <= 1)
return nullptr;
return FT->getParamType(1);
Expand Down Expand Up @@ -7009,7 +7014,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
case OpCode::ClusterID:
case OpCode::LinAlgMatrixQueryAccumulatorLayout:
case OpCode::ReservedE0:
case OpCode::ReservedE1:
case OpCode::DebugBreak:
case OpCode::IsDebuggerPresent:
return Type::getVoidTy(Ctx);
Expand Down
29 changes: 29 additions & 0 deletions lib/HLSL/HLOperationLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7163,6 +7163,31 @@ Value *TranslateLinAlgConvert(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
return nullptr;
}

Value *TranslateLinAlgVectorAccumulate(CallInst *CI, IntrinsicOp IOP,
OP::OpCode OpCode,
HLOperationLowerHelper &Helper,
HLObjectOperationLowerHelper *ObjHelper,
bool &Translated) {

hlsl::OP *HlslOp = &Helper.hlslOP;
IRBuilder<> Builder(CI);

Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));

// Input vector parameter
Value *InputVector = CI->getArgOperand(1);

// Matrix parameters
Value *MatrixBuffer = CI->getArgOperand(2);
Value *MatrixOffset = CI->getArgOperand(3);

// Get the DXIL function for the operation
Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType());

return Builder.CreateCall(DxilFunc,
{OpArg, InputVector, MatrixBuffer, MatrixOffset});
}

} // namespace

// Lower table.
Expand Down Expand Up @@ -7957,6 +7982,10 @@ constexpr IntrinsicLower gLowerTable[] = {

{IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert,
DXIL::OpCode::LinAlgConvert},
{IntrinsicOp::IOP___builtin_LinAlg_VectorAccumulateToDescriptor,
TranslateLinAlgVectorAccumulate,
DXIL::OpCode::LinAlgVectorAccumulateToDescriptor},

};
constexpr size_t NumLowerTableEntries =
sizeof(gLowerTable) / sizeof(gLowerTable[0]);
Expand Down
25 changes: 23 additions & 2 deletions tools/clang/lib/Headers/hlsl/dx/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -506,9 +506,23 @@ typename hlsl::enable_if<hlsl::is_arithmetic<InputElTy>::value,
Multiply(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
vector<InputElTy, K> Vec) {
vector<OutputElTy, M> Result;
__builtin_LinAlg_MatrixVectorMultiply(
Result, MatrixA.__handle, hlsl::is_signed<OutputElTy>::value, Vec,
__detail::TypeTraits<InputElTy>::CompType);
return Result;
}

template <typename OutputElTy, typename InputElTy, ComponentEnum InputInterp,
SIZE_TYPE M, SIZE_TYPE K, SIZE_TYPE VecK, ComponentEnum MatrixDT>
typename hlsl::enable_if<
InterpretedVector<InputElTy, VecK, InputInterp>::Size == K,
vector<OutputElTy, M> >::type
Multiply(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
InterpretedVector<InputElTy, VecK, InputInterp> InterpVec) {
vector<OutputElTy, M> Result;
__builtin_LinAlg_MatrixVectorMultiply(Result, MatrixA.__handle,
hlsl::is_signed<OutputElTy>::value, Vec,
MatrixDT);
hlsl::is_signed<OutputElTy>::value,
InterpVec.Data, InputInterp);
return Result;
}

Expand Down Expand Up @@ -650,6 +664,13 @@ OuterProduct(vector<InputElTy, M> VecA, vector<InputElTy, N> VecB) {
return Result;
}

template <typename InputElTy, SIZE_TYPE M>
typename hlsl::enable_if<hlsl::is_arithmetic<InputElTy>::value, void>::type
InterlockedAccumulate(vector<InputElTy, M> Vec, RWByteAddressBuffer Res,
uint StartOffset) {
__builtin_LinAlg_VectorAccumulateToDescriptor(Vec, Res, StartOffset);
}

} // namespace linalg

} // namespace dx
Expand Down
104 changes: 104 additions & 0 deletions tools/clang/test/CodeGenDXIL/hlsl/linalg/linalg-mat-vec-mul.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// ITY represents a type that may be an interpreted type
// NTY must be an unpacked native type
// PTY is a packed type either PackedS8x32 or PackedU8x32

// Two simple initial tests
// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=F32 -DITY=F32 -DPTY=I8 -DCTY=I32 %s | FileCheck %s -Dntype=float -Dnty=f32 -Dnen=9 -Dnsg=true -Ditype=float -Dity=f32 -Dien=9 -Dctype=i32 -Dcty=i32 -Dcen=4 -Dpen=19
// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=I32 -DITY=F16 -DPTY=F8_E4M3FN -DCTY=F16 %s | FileCheck %s -Dntype=i32 -Dnty=i32 -Dnen=4 -Dnsg=true -Ditype=half -Dity=f16 -Dien=8 -Dctype=half -Dcty=f16 -Dcen=8 -Dpen=21

// More exhaustive run through of all types verifying the dimension matching
// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=U64 -DITY=I16 -DPTY=F8_E4M3FN -DCTY=F64 %s | FileCheck %s -Dntype=i64 -Dnty=i64 -Dnen=7 -Dnsg=false -Ditype=i16 -Dity=i16 -Dien=2 -Dctype=double -Dcty=f64 -Dcen=10 -Dpen=21
// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=F16 -DITY=U32 -DPTY=F8_E5M2 -DCTY=F32 %s | FileCheck %s -Dntype=half -Dnty=f16 -Dnen=8 -Dnsg=true -Ditype=i32 -Dity=i32 -Dien=5 -Dctype=float -Dcty=f32 -Dcen=9 -Dpen=22
// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=F32 -DITY=I64 -DPTY=I8 -DCTY=I64 %s | FileCheck %s -Dntype=float -Dnty=f32 -Dnen=9 -Dnsg=true -Ditype=i64 -Dity=i64 -Dien=6 -Dctype=i64 -Dcty=i64 -Dcen=6 -Dpen=19
// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=F64 -DITY=F32 -DPTY=U8 -DCTY=F32 %s | FileCheck %s -Dntype=double -Dnty=f64 -Dnen=10 -Dnsg=true -Ditype=float -Dity=f32 -Dien=9 -Dctype=float -Dcty=f32 -Dcen=9 -Dpen=20
// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=I16 -DITY=F64 -DPTY=F8_E4M3FN -DCTY=U32 %s | FileCheck %s -Dntype=i16 -Dnty=i16 -Dnen=2 -Dnsg=true -Ditype=double -Dity=f64 -Dien=10 -Dctype=i32 -Dcty=i32 -Dcen=5 -Dpen=21


#include <dx/linalg.h>
using namespace dx::linalg;

ByteAddressBuffer Buf;
RWByteAddressBuffer OutBuf;


using nType = __detail::ComponentTypeTraits<ComponentType::NTY>::Type;
using iType = __detail::ComponentTypeTraits<ComponentType::ITY>::Type;
using cType = __detail::ComponentTypeTraits<ComponentType::CTY>::Type;

// CHECK: %dx.types.LinAlgMatrixC[[ien]]M8N4U0S0 = type { i8* }
// CHECK: %dx.types.LinAlgMatrixC[[ien]]M24N32U0S0 = type { i8* }
// CHECK: %dx.types.LinAlgMatrixC[[ien]]M124N32U0S0 = type { i8* }

// Basic test using unpacked types and native vectors
// CHECK-LABEL: define void @"\01?NativeTest
export void NativeTest(vector<nType, 4> Input) {

typedef Matrix<ComponentType::ITY, 8, 4, MatrixUse::A, MatrixScope::Thread> MatrixTy;

// CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}})
// CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 11, i32 0 })
// CHECK: [[lmtx:%.*]] = call %dx.types.LinAlgMatrixC[[ien]]M8N4U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC[[ien]]M8N4U0S0(i32 -2147483634, %dx.types.Handle [[buf]], i32 24, i32 {{[0-9]*}}, i32 1{{.*}}
MatrixTy Mat = MatrixTy::Load<MatrixLayout::ColMajor>(Buf, 24, 8 * sizeof(iType));

// CHECK: [[ret:%.*]] = call <8 x [[ntype]]> @dx.op.linAlgMatVecMul.v8[[nty]].mC[[ien]]M8N4U0S0.v4[[nty]](i32 -2147483623, %dx.types.LinAlgMatrixC[[ien]]M8N4U0S0 [[lmtx]], i1 [[nsg]], <4 x [[ntype]]> %Input, i32 [[nen]])
vector<nType, 8> OutVec = Multiply<nType>(Mat, Input);

// CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}})
// CHECK: [[rwbuf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
// CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v8[[nty]](i32 -2147483617, <8 x [[ntype]]> [[ret]], %dx.types.Handle [[rwbuf]], i32 47)
InterlockedAccumulate(OutVec, OutBuf, 47);
}

// Check matrix with interpreted input vector
// CHECK-LABEL: define void @"\01?InterpretedTest
export void InterpretedTest(vector<iType, 32> Input) {

typedef Matrix<ComponentType::ITY, 24, 32, MatrixUse::A, MatrixScope::Thread> MatrixTy;

// Create interpreted vector for uints containing 8-bit integers
InterpretedVector<iType, 32, ComponentType::ITY> IVec = MakeInterpretedVector<ComponentType::ITY>(Input);

// CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}})
// CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 11, i32 0 })
// CHECK: [[lmtx:%.*]] = call %dx.types.LinAlgMatrixC[[ien]]M24N32U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC[[ien]]M24N32U0S0(i32 -2147483634, %dx.types.Handle [[buf]], i32 184, i32 {{[0-9]*}}, i32 0{{.*}}
MatrixTy Mat = MatrixTy::Load<MatrixLayout::RowMajor>(Buf, 184, 24 * sizeof(iType));

// CHECK: [[ret:%.*]] = call <24 x [[ntype]]> @dx.op.linAlgMatVecMul.v24[[nty]].mC[[ien]]M24N32U0S0.v32[[ity]](i32 -2147483623, %dx.types.LinAlgMatrixC[[ien]]M24N32U0S0 [[lmtx]], i1 [[nsg]], <32 x [[itype]]> %Input, i32 [[ien]])
vector<nType, 24> OutVec = Multiply<nType>(Mat, IVec);

// CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}})
// CHECK: [[rwbuf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
// CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v24[[nty]](i32 -2147483617, <24 x [[ntype]]> [[ret]], %dx.types.Handle [[rwbuf]], i32 62)
InterlockedAccumulate(OutVec, OutBuf, 62);

}

// Check matrix with packed type interpreted input vector
// CHECK-LABEL: define void @"\01?PackedInterpretedTest
export void PackedInterpretedTest(vector<cType, 32> Input) {

typedef Matrix<ComponentType::ITY, 124, 32, MatrixUse::A, MatrixScope::Thread> MatrixTy;

// Create interpreted vector for uints containing 8-bit integers
// CHECK: [[ivec:%.*]] = call <8 x i32> @dx.op.linAlgConvert.v8i32.v32[[cty]](i32 -2147483618, <32 x [[ctype]]> %Input, i32 [[cen]], i32 [[pen]])
InterpretedVector<uint, 8, ComponentType::PTY> IVec = Convert<ComponentEnum::PTY, ComponentEnum::CTY>(Input);

// CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}})
// CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 11, i32 0 })
// CHECK: [[lmtx:%.*]] = call %dx.types.LinAlgMatrixC[[ien]]M124N32U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC[[ien]]M124N32U0S0(i32 -2147483634, %dx.types.Handle [[buf]], i32 184, i32 {{[0-9]*}}, i32 0{{.*}}
MatrixTy Mat = MatrixTy::Load<MatrixLayout::RowMajor>(Buf, 184, 124 * sizeof(iType));

// CHECK: [[ret:%.*]] = call <124 x [[ntype]]> @dx.op.linAlgMatVecMul.v124[[nty]].mC[[ien]]M124N32U0S0.v8i32(i32 -2147483623, %dx.types.LinAlgMatrixC[[ien]]M124N32U0S0 [[lmtx]], i1 [[nsg]], <8 x i32> [[ivec]], i32 [[pen]])
vector<nType, 124> OutVec = Multiply<nType>(Mat, IVec);

// CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}})
// CHECK: [[rwbuf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
// CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v124[[nty]](i32 -2147483617, <124 x [[ntype]]> [[ret]], %dx.types.Handle [[rwbuf]], i32 162)
InterlockedAccumulate(OutVec, OutBuf, 162);
}

// CHECK-LABEL: !dx.targetTypes
// CHECK-SAME: = !{[[md0:[!][0-9]*]], [[md1:[!][0-9]*]], [[md2:[!][0-9]*]]
// CHECK: [[md0]] = !{%dx.types.LinAlgMatrixC[[ien]]M8N4U0S0 undef, i32 [[ien]], i32 8, i32 4, i32 0, i32 0}
// CHECK: [[md1]] = !{%dx.types.LinAlgMatrixC[[ien]]M24N32U0S0 undef, i32 [[ien]], i32 24, i32 32, i32 0, i32 0}
// CHECK: [[md2]] = !{%dx.types.LinAlgMatrixC[[ien]]M124N32U0S0 undef, i32 [[ien]], i32 124, i32 32, i32 0, i32 0}
Loading
Loading