diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 8891e7e056..89ad4c5661 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3096,8 +3096,8 @@ ID Name Description 2147483676 LinAlgMatrixAccumulateToMemory accumulates a matrix to groupshared memory 2147483677 LinAlgMatrixOuterProduct Outer products an M sized vector and a N sized vector producing an MxN matrix 2147483678 LinAlgConvert Convert vector components from one interpretation to another -2147483679 ReservedE0 reserved -2147483680 ReservedE1 reserved +2147483679 LinAlgVectorAccumulateToDescriptor Accumulates given vector to the buffer at the given offset +2147483680 ReservedE0 reserved 2147483681 DebugBreak triggers a breakpoint if a debugger is attached 2147483682 IsDebuggerPresent returns true if a debugger is attached ========== ======================================== =================================================================================================================== diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index a535b8d768..4705b90c55 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -533,8 +533,7 @@ static const OpCodeTableID TableID = OpCodeTableID::ExperimentalOps; // Enumeration for ExperimentalOps DXIL operations enum class OpCode : unsigned { // - ReservedE0 = 31, // reserved - ReservedE1 = 32, // reserved + ReservedE0 = 32, // reserved // Debugging DebugBreak = 33, // triggers a breakpoint if a debugger is attached @@ -597,6 +596,8 @@ enum class OpCode : unsigned { LinAlgMatrixStoreToDescriptor = 20, // stores a matrix to a RWByteAddressBuffer LinAlgMatrixStoreToMemory = 21, // stores a matrix to groupshared memory + LinAlgVectorAccumulateToDescriptor = + 31, // Accumulates given vector to the buffer at the given offset // No-op ExperimentalNop = 0, // nop does nothing @@ -1355,10 +1356,13 @@ enum class OpCode : unsigned { // LinAlgConvert = 0x8000001E, 2147483678U, -2147483618 EXP_OPCODE(ExperimentalOps, LinAlgConvert), // Convert vector components from // one interpretation to another - // ReservedE0 = 0x8000001F, 2147483679U, -2147483617 + // LinAlgVectorAccumulateToDescriptor = 0x8000001F, 2147483679U, -2147483617 + EXP_OPCODE( + ExperimentalOps, + LinAlgVectorAccumulateToDescriptor), // Accumulates given vector to the + // buffer at the given offset + // ReservedE0 = 0x80000020, 2147483680U, -2147483616 EXP_OPCODE(ExperimentalOps, ReservedE0), // reserved - // ReservedE1 = 0x80000020, 2147483680U, -2147483616 - EXP_OPCODE(ExperimentalOps, ReservedE1), // reserved // DebugBreak = 0x80000021, 2147483681U, -2147483615 EXP_OPCODE(ExperimentalOps, DebugBreak), // triggers a breakpoint if a debugger is attached @@ -1544,6 +1548,7 @@ enum class OpCodeClass : unsigned { LinAlgMatrixSetElement, LinAlgMatrixStoreToDescriptor, LinAlgMatrixStoreToMemory, + LinAlgVectorAccumulateToDescriptor, // Mesh shader instructions EmitIndices, @@ -1730,7 +1735,7 @@ enum class OpCodeClass : unsigned { NodeOutputIsValid, OutputComplete, - NumOpClasses = 222, // exclusive last value of enumeration + NumOpClasses = 223, // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 511f5b476f..d18f04d06a 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -10960,6 +10960,40 @@ struct DxilInst_LinAlgConvert { void set_outputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); } }; +/// This instruction Accumulates given vector to the buffer at the given offset +struct DxilInst_LinAlgVectorAccumulateToDescriptor { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_LinAlgVectorAccumulateToDescriptor(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::LinAlgVectorAccumulateToDescriptor); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector = 1, + arg_handle = 2, + arg_offset = 3, + }; + // Accessors + llvm::Value *get_inputVector() const { return Instr->getOperand(1); } + void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_handle() const { return Instr->getOperand(2); } + void set_handle(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_offset() const { return Instr->getOperand(3); } + void set_offset(llvm::Value *val) { Instr->setOperand(3, val); } +}; + /// This instruction triggers a breakpoint if a debugger is attached struct DxilInst_DebugBreak { llvm::Instruction *Instr; diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 03e439b6b0..f70013a006 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -132,6 +132,7 @@ enum class IntrinsicOp { IOP___builtin_LinAlg_MatrixStoreToMemory = 410, IOP___builtin_LinAlg_MatrixVectorMultiply = 418, IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 419, + IOP___builtin_LinAlg_VectorAccumulateToDescriptor = 423, IOP_abort = 102, IOP_abs = 103, IOP_acos = 104, @@ -429,7 +430,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 423, + Num_Intrinsics = 424, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b786fee9fc..7bd78ff137 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2984,6 +2984,14 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = { 2, {{0x400}, {0x400}}, {{0xe7}, {0xe7}}}, // Overloads: getNumParams() <= 1) return nullptr; return FT->getParamType(1); @@ -7009,7 +7014,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::ClusterID: case OpCode::LinAlgMatrixQueryAccumulatorLayout: case OpCode::ReservedE0: - case OpCode::ReservedE1: case OpCode::DebugBreak: case OpCode::IsDebuggerPresent: return Type::getVoidTy(Ctx); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index abf0ad86be..ab3300d1e2 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7163,6 +7163,31 @@ Value *TranslateLinAlgConvert(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, return nullptr; } +Value *TranslateLinAlgVectorAccumulate(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input vector parameter + Value *InputVector = CI->getArgOperand(1); + + // Matrix parameters + Value *MatrixBuffer = CI->getArgOperand(2); + Value *MatrixOffset = CI->getArgOperand(3); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType()); + + return Builder.CreateCall(DxilFunc, + {OpArg, InputVector, MatrixBuffer, MatrixOffset}); +} + } // namespace // Lower table. @@ -7957,6 +7982,10 @@ constexpr IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert, DXIL::OpCode::LinAlgConvert}, + {IntrinsicOp::IOP___builtin_LinAlg_VectorAccumulateToDescriptor, + TranslateLinAlgVectorAccumulate, + DXIL::OpCode::LinAlgVectorAccumulateToDescriptor}, + }; constexpr size_t NumLowerTableEntries = sizeof(gLowerTable) / sizeof(gLowerTable[0]); diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 29e0d55ee6..4e337460f9 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -506,9 +506,23 @@ typename hlsl::enable_if::value, Multiply(Matrix MatrixA, vector Vec) { vector Result; + __builtin_LinAlg_MatrixVectorMultiply( + Result, MatrixA.__handle, hlsl::is_signed::value, Vec, + __detail::TypeTraits::CompType); + return Result; +} + +template +typename hlsl::enable_if< + InterpretedVector::Size == K, + vector >::type +Multiply(Matrix MatrixA, + InterpretedVector InterpVec) { + vector Result; __builtin_LinAlg_MatrixVectorMultiply(Result, MatrixA.__handle, - hlsl::is_signed::value, Vec, - MatrixDT); + hlsl::is_signed::value, + InterpVec.Data, InputInterp); return Result; } @@ -650,6 +664,13 @@ OuterProduct(vector VecA, vector VecB) { return Result; } +template +typename hlsl::enable_if::value, void>::type +InterlockedAccumulate(vector Vec, RWByteAddressBuffer Res, + uint StartOffset) { + __builtin_LinAlg_VectorAccumulateToDescriptor(Vec, Res, StartOffset); +} + } // namespace linalg } // namespace dx diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/linalg-mat-vec-mul.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/linalg-mat-vec-mul.hlsl new file mode 100644 index 0000000000..c1213d2b18 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/linalg-mat-vec-mul.hlsl @@ -0,0 +1,104 @@ +// ITY represents a type that may be an interpreted type +// NTY must be an unpacked native type +// PTY is a packed type either PackedS8x32 or PackedU8x32 + +// Two simple initial tests +// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=F32 -DITY=F32 -DPTY=I8 -DCTY=I32 %s | FileCheck %s -Dntype=float -Dnty=f32 -Dnen=9 -Dnsg=true -Ditype=float -Dity=f32 -Dien=9 -Dctype=i32 -Dcty=i32 -Dcen=4 -Dpen=19 +// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=I32 -DITY=F16 -DPTY=F8_E4M3FN -DCTY=F16 %s | FileCheck %s -Dntype=i32 -Dnty=i32 -Dnen=4 -Dnsg=true -Ditype=half -Dity=f16 -Dien=8 -Dctype=half -Dcty=f16 -Dcen=8 -Dpen=21 + +// More exhaustive run through of all types verifying the dimension matching +// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=U64 -DITY=I16 -DPTY=F8_E4M3FN -DCTY=F64 %s | FileCheck %s -Dntype=i64 -Dnty=i64 -Dnen=7 -Dnsg=false -Ditype=i16 -Dity=i16 -Dien=2 -Dctype=double -Dcty=f64 -Dcen=10 -Dpen=21 +// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=F16 -DITY=U32 -DPTY=F8_E5M2 -DCTY=F32 %s | FileCheck %s -Dntype=half -Dnty=f16 -Dnen=8 -Dnsg=true -Ditype=i32 -Dity=i32 -Dien=5 -Dctype=float -Dcty=f32 -Dcen=9 -Dpen=22 +// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=F32 -DITY=I64 -DPTY=I8 -DCTY=I64 %s | FileCheck %s -Dntype=float -Dnty=f32 -Dnen=9 -Dnsg=true -Ditype=i64 -Dity=i64 -Dien=6 -Dctype=i64 -Dcty=i64 -Dcen=6 -Dpen=19 +// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=F64 -DITY=F32 -DPTY=U8 -DCTY=F32 %s | FileCheck %s -Dntype=double -Dnty=f64 -Dnen=10 -Dnsg=true -Ditype=float -Dity=f32 -Dien=9 -Dctype=float -Dcty=f32 -Dcen=9 -Dpen=20 +// RUN: %dxc -HV 202x -I %hlsl_headers -T lib_6_10 -enable-16bit-types -DNTY=I16 -DITY=F64 -DPTY=F8_E4M3FN -DCTY=U32 %s | FileCheck %s -Dntype=i16 -Dnty=i16 -Dnen=2 -Dnsg=true -Ditype=double -Dity=f64 -Dien=10 -Dctype=i32 -Dcty=i32 -Dcen=5 -Dpen=21 + + +#include +using namespace dx::linalg; + +ByteAddressBuffer Buf; +RWByteAddressBuffer OutBuf; + + +using nType = __detail::ComponentTypeTraits::Type; +using iType = __detail::ComponentTypeTraits::Type; +using cType = __detail::ComponentTypeTraits::Type; + +// CHECK: %dx.types.LinAlgMatrixC[[ien]]M8N4U0S0 = type { i8* } +// CHECK: %dx.types.LinAlgMatrixC[[ien]]M24N32U0S0 = type { i8* } +// CHECK: %dx.types.LinAlgMatrixC[[ien]]M124N32U0S0 = type { i8* } + +// Basic test using unpacked types and native vectors +// CHECK-LABEL: define void @"\01?NativeTest +export void NativeTest(vector Input) { + + typedef Matrix MatrixTy; + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}}) + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 11, i32 0 }) + // CHECK: [[lmtx:%.*]] = call %dx.types.LinAlgMatrixC[[ien]]M8N4U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC[[ien]]M8N4U0S0(i32 -2147483634, %dx.types.Handle [[buf]], i32 24, i32 {{[0-9]*}}, i32 1{{.*}} + MatrixTy Mat = MatrixTy::Load(Buf, 24, 8 * sizeof(iType)); + + // CHECK: [[ret:%.*]] = call <8 x [[ntype]]> @dx.op.linAlgMatVecMul.v8[[nty]].mC[[ien]]M8N4U0S0.v4[[nty]](i32 -2147483623, %dx.types.LinAlgMatrixC[[ien]]M8N4U0S0 [[lmtx]], i1 [[nsg]], <4 x [[ntype]]> %Input, i32 [[nen]]) + vector OutVec = Multiply(Mat, Input); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}}) + // CHECK: [[rwbuf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v8[[nty]](i32 -2147483617, <8 x [[ntype]]> [[ret]], %dx.types.Handle [[rwbuf]], i32 47) + InterlockedAccumulate(OutVec, OutBuf, 47); +} + +// Check matrix with interpreted input vector +// CHECK-LABEL: define void @"\01?InterpretedTest +export void InterpretedTest(vector Input) { + + typedef Matrix MatrixTy; + + // Create interpreted vector for uints containing 8-bit integers + InterpretedVector IVec = MakeInterpretedVector(Input); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}}) + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 11, i32 0 }) + // CHECK: [[lmtx:%.*]] = call %dx.types.LinAlgMatrixC[[ien]]M24N32U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC[[ien]]M24N32U0S0(i32 -2147483634, %dx.types.Handle [[buf]], i32 184, i32 {{[0-9]*}}, i32 0{{.*}} + MatrixTy Mat = MatrixTy::Load(Buf, 184, 24 * sizeof(iType)); + + // CHECK: [[ret:%.*]] = call <24 x [[ntype]]> @dx.op.linAlgMatVecMul.v24[[nty]].mC[[ien]]M24N32U0S0.v32[[ity]](i32 -2147483623, %dx.types.LinAlgMatrixC[[ien]]M24N32U0S0 [[lmtx]], i1 [[nsg]], <32 x [[itype]]> %Input, i32 [[ien]]) + vector OutVec = Multiply(Mat, IVec); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}}) + // CHECK: [[rwbuf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v24[[nty]](i32 -2147483617, <24 x [[ntype]]> [[ret]], %dx.types.Handle [[rwbuf]], i32 62) + InterlockedAccumulate(OutVec, OutBuf, 62); + +} + +// Check matrix with packed type interpreted input vector +// CHECK-LABEL: define void @"\01?PackedInterpretedTest +export void PackedInterpretedTest(vector Input) { + + typedef Matrix MatrixTy; + + // Create interpreted vector for uints containing 8-bit integers + // CHECK: [[ivec:%.*]] = call <8 x i32> @dx.op.linAlgConvert.v8i32.v32[[cty]](i32 -2147483618, <32 x [[ctype]]> %Input, i32 [[cen]], i32 [[pen]]) + InterpretedVector IVec = Convert(Input); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}}) + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 11, i32 0 }) + // CHECK: [[lmtx:%.*]] = call %dx.types.LinAlgMatrixC[[ien]]M124N32U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC[[ien]]M124N32U0S0(i32 -2147483634, %dx.types.Handle [[buf]], i32 184, i32 {{[0-9]*}}, i32 0{{.*}} + MatrixTy Mat = MatrixTy::Load(Buf, 184, 124 * sizeof(iType)); + + // CHECK: [[ret:%.*]] = call <124 x [[ntype]]> @dx.op.linAlgMatVecMul.v124[[nty]].mC[[ien]]M124N32U0S0.v8i32(i32 -2147483623, %dx.types.LinAlgMatrixC[[ien]]M124N32U0S0 [[lmtx]], i1 [[nsg]], <8 x i32> [[ivec]], i32 [[pen]]) + vector OutVec = Multiply(Mat, IVec); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %{{.*}}) + // CHECK: [[rwbuf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.linAlgVectorAccumulateToDescriptor.v124[[nty]](i32 -2147483617, <124 x [[ntype]]> [[ret]], %dx.types.Handle [[rwbuf]], i32 162) + InterlockedAccumulate(OutVec, OutBuf, 162); +} + +// CHECK-LABEL: !dx.targetTypes +// CHECK-SAME: = !{[[md0:[!][0-9]*]], [[md1:[!][0-9]*]], [[md2:[!][0-9]*]] +// CHECK: [[md0]] = !{%dx.types.LinAlgMatrixC[[ien]]M8N4U0S0 undef, i32 [[ien]], i32 8, i32 4, i32 0, i32 0} +// CHECK: [[md1]] = !{%dx.types.LinAlgMatrixC[[ien]]M24N32U0S0 undef, i32 [[ien]], i32 24, i32 32, i32 0, i32 0} +// CHECK: [[md2]] = !{%dx.types.LinAlgMatrixC[[ien]]M124N32U0S0 undef, i32 [[ien]], i32 124, i32 32, i32 0, i32 0} diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 2768e1b65d..e3c836921e 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -410,6 +410,7 @@ void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToDescriptor(in LinAlgMatr void [[min_sm=6.10]] __builtin_LinAlg_MatrixAccumulateToMemory(in LinAlgMatrix matrix, groupshared numeric[] memory, in uint offset, in uint stride, in uint layout); void [[min_sm=6.10]] __builtin_LinAlg_MatrixOuterProduct(out LinAlgMatrix ret, in numeric vecA, in numeric vecB); void [[min_sm=6.10]] __builtin_LinAlg_Convert(out numeric ret, in numeric vec, in uint input_interp, in uint output_interp); +void [[min_sm=6.10]] __builtin_LinAlg_VectorAccumulateToDescriptor(in numeric<> InputVector, in RWByteAddressBuffer buf, in uint offset); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 634b62cc6f..318398ec12 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1163,7 +1163,8 @@ def populate_categories_and_models_ExperimentalOps(self): for i in insts( "LinAlgMatrixQueryAccumulatorLayout,LinAlgMatrixLoadFromDescriptor," + "LinAlgMatrixAccumulateToDescriptor,LinAlgMatVecMul," - + "LinAlgMatVecMulAdd,LinAlgMatrixOuterProduct,LinAlgConvert" + + "LinAlgMatVecMulAdd,LinAlgMatrixOuterProduct,LinAlgConvert," + + "LinAlgVectorAccumulateToDescriptor" ): i.category = "Linear Algebra Operations" i.shader_model = experimental_sm @@ -6590,7 +6591,21 @@ def populate_ExperimentalOps(self): ], ) - op_table.reserve_dxil_op_range("ReservedE", 2) + add_dxil_op( + "LinAlgVectorAccumulateToDescriptor", + "LinAlgVectorAccumulateToDescriptor", + "Accumulates given vector to the buffer at the given offset", + "