llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-directx Author: Justin Bogner (bogner) <details> <summary>Changes</summary> This introduces `@<!-- -->llvm.dx.resource.store.rawbuffer` and generalizes the buffer store docs under DirectX/DXILResources. Fixes #<!-- -->106188 --- Patch is 28.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121282.diff 7 Files Affected: - (modified) llvm/docs/DirectX/DXILResources.rst (+99-15) - (modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+4) - (modified) llvm/lib/Target/DirectX/DXIL.td (+20) - (modified) llvm/lib/Target/DirectX/DXILOpLowering.cpp (+56-26) - (added) llvm/test/CodeGen/DirectX/BufferStore-sm61.ll (+126) - (added) llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll (+20) - (added) llvm/test/CodeGen/DirectX/RawBufferStore.ll (+144) ``````````diff diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst index 857d29e48363b0..c350303c39f796 100644 --- a/llvm/docs/DirectX/DXILResources.rst +++ b/llvm/docs/DirectX/DXILResources.rst @@ -491,26 +491,28 @@ Examples: i32 %byte_offset, i32 0) -Texture and Typed Buffer Stores -------------------------------- +Stores +------ -*relevant types: Textures and TypedBuffer* +*relevant types: Textures and Buffer* -The `TextureStore`_ and `BufferStore`_ DXIL operations always write all four -32-bit components to a texture or a typed buffer. While both operations include -a mask parameter, it is specified that the mask must cover all components when -used with these types. +The `TextureStore`_, `BufferStore`_, and `RawBufferStore`_ DXIL operations +write four components to a texture or a buffer. These include a mask argument +that is used when fewer than 4 components are written, but notably this only +takes on the contiguous x, xy, xyz, and xyzw values. -The store operations that we define as intrinsics behave similarly, and will -only accept writes to the whole of the contained type. This differs from the -loads above, but this makes sense to do from a semantics preserving point of -view. Thus, texture and buffer stores may only operate on 4-element vectors of -types that are 32-bits or fewer, such as ``<4 x i32>``, ``<4 x float>``, and -``<4 x half>``, and 2 element vectors of 64-bit types like ``<2 x double>`` and -``<2 x i64>``. +We define the LLVM store intrinsics to accept vectors when storing multiple +components rather than using `undef` and a mask, but otherwise match the DXIL +ops fairly closely. -.. _BufferStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore .. _TextureStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#texturestore +.. _BufferStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore +.. _RawBufferStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#rawbufferstore + +For TypedBuffer, we only need one coordinate, and we must always write a vector +since partial writes aren't possible. Similarly to the load operations +described above, we handle 64-bit types specially and only handle 2-element +vectors rather than 4. Examples: @@ -548,3 +550,85 @@ Examples: target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data) call void @llvm.dx.resource.store.typedbuffer.tdx.Buffer_v2f64_1_0_0t( target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data) + +For RawBuffer, we need two indices and we accept scalars and vectors of less +than 4 elements. Note that we do allow vectors of 4 64-bit elements here. + +Examples: + +.. list-table:: ``@llvm.dx.resource.store.rawbuffer`` + :header-rows: 1 + + * - Argument + - + - Type + - Description + * - Return value + - + - ``void`` + - + * - ``%buffer`` + - 0 + - ``target(dx.TypedBuffer, ...)`` + - The buffer to store into + * - ``%index`` + - 1 + - ``i32`` + - Index into the buffer + * - ``%offset`` + - 2 + - ``i32`` + - Byte offset into structured buffer elements + * - ``%data`` + - 3 + - Scalar or vector + - The data to store + +Examples: + +.. code-block:: llvm + + ; float + call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f32_1_0_0t.f32( + target("dx.RawBuffer", float, 1, 0, 0) %buffer, + i32 %index, i32 %offset, float %data) + call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.f32( + target("dx.RawBuffer", i8, 1, 0, 0) %buffer, + i32 %index, i32 0, float %data) + + ; float4 + call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f32_1_0_0t.v4f32( + target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer, + i32 %index, i32 0, <4 x float> %data) + call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f32( + target("dx.RawBuffer", i8, 1, 0, 0) %buffer, + i32 %index, i32 0, <4 x float> %data) + + ; struct S0 { float4 f; int4 i; } + call void @llvm.dx.resource.store.rawbuffer.v4f32( + target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer, + i32 %index, i32 0, <4 x float> %data0) + call void @llvm.dx.resource.store.rawbuffer.v4i32( + target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer, + i32 %index, i32 16, <4 x i32> %data1) + + ; struct Q { float4 f; int3 i; } + ; struct R { int z; S x; } + call void @llvm.dx.resource.store.rawbuffer.i32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) + %buffer, + i32 %index, i32 0, i32 %data0) + call void @llvm.dx.resource.store.rawbuffer.v4f32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) + %buffer, + i32 %index, i32 4, <4 x float> %data1) + call void @llvm.dx.resource.store.rawbuffer.v3f16( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) + %buffer, + i32 %index, i32 20, <3 x half> %data2) + + ; byteaddressbuf.Store<int64_t4> + call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f64( + target("dx.RawBuffer", i8, 1, 0, 0) %buffer, + i32 %offset, i32 0, <4 x double> %data) + diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 3b1d1a88e01a8b..18a1d09a14aaf1 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -40,6 +40,10 @@ def int_dx_resource_load_rawbuffer : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; +def int_dx_resource_store_rawbuffer + : DefaultAttrsIntrinsic< + [], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_any_ty], + [IntrWriteMem]>; def int_dx_resource_updatecounter : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty], diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 62b5b704e99eb2..6fdd83c4dc8778 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -909,6 +909,26 @@ def RawBufferLoad : DXILOp<139, rawBufferLoad> { let stages = [Stages<DXIL1_2, [all_stages]>]; } +def RawBufferStore : DXILOp<140, rawBufferStore> { + let Doc = "writes to a RWByteAddressBuffer or RWStructuredBuffer"; + // Handle, Coord0, Coord1, Val0, Val1, Val2, Val3, Mask, Alignment + let arguments = [ + HandleTy, Int32Ty, Int32Ty, OverloadTy, OverloadTy, OverloadTy, OverloadTy, + Int8Ty, Int32Ty + ]; + let result = VoidTy; + let overloads = [ + Overloads<DXIL1_2, + [ResRetHalfTy, ResRetFloatTy, ResRetInt16Ty, ResRetInt32Ty]>, + Overloads<DXIL1_3, + [ + ResRetHalfTy, ResRetFloatTy, ResRetDoubleTy, ResRetInt16Ty, + ResRetInt32Ty, ResRetInt64Ty + ]> + ]; + let stages = [Stages<DXIL1_2, [all_stages]>]; +} + def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> { let Doc = "signed dot product of 4 x i8 vectors packed into i32, with " "accumulate to i32"; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index f43815bf211664..0c245c1a43d31c 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -616,7 +616,10 @@ class OpLowerer { return false; } - [[nodiscard]] bool lowerTypedBufferStore(Function &F) { + [[nodiscard]] bool lowerBufferStore(Function &F, bool IsRaw) { + Triple TT(Triple(M.getTargetTriple())); + VersionTuple DXILVersion = TT.getDXILVersion(); + const DataLayout &DL = F.getDataLayout(); IRBuilder<> &IRB = OpBuilder.getIRB(); Type *Int8Ty = IRB.getInt8Ty(); Type *Int32Ty = IRB.getInt32Ty(); @@ -627,51 +630,75 @@ class OpLowerer { Value *Handle = createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); Value *Index0 = CI->getArgOperand(1); - Value *Index1 = UndefValue::get(Int32Ty); - // For typed stores, the mask must always cover all four elements. - Constant *Mask = ConstantInt::get(Int8Ty, 0xF); + Value *Index1 = IsRaw ? CI->getArgOperand(2) : UndefValue::get(Int32Ty); + + Value *Data = CI->getArgOperand(IsRaw ? 3 : 2); + Type *DataTy = Data->getType(); + Type *ScalarTy = DataTy->getScalarType(); - Value *Data = CI->getArgOperand(2); - auto *DataTy = dyn_cast<FixedVectorType>(Data->getType()); - if (!DataTy || DataTy->getNumElements() != 4) + uint64_t NumElements = + DL.getTypeSizeInBits(DataTy) / DL.getTypeSizeInBits(ScalarTy); + Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements)); + + // TODO: check that we only have vector or scalar... + if (!IsRaw && NumElements != 4) return make_error<StringError>( "typedBufferStore data must be a vector of 4 elements", inconvertibleErrorCode()); + else if (NumElements > 4) + return make_error<StringError>( + "rawBufferStore data must have at most 4 elements", + inconvertibleErrorCode()); - // Since we're post-scalarizer, we likely have a vector that's constructed - // solely for the argument of the store. If so, just use the scalar values - // from before they're inserted into the temporary. std::array<Value *, 4> DataElements{nullptr, nullptr, nullptr, nullptr}; - auto *IEI = dyn_cast<InsertElementInst>(Data); - while (IEI) { - auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2)); - if (!IndexOp) - break; - size_t IndexVal = IndexOp->getZExtValue(); - assert(IndexVal < 4 && "Too many elements for buffer store"); - DataElements[IndexVal] = IEI->getOperand(1); - IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0)); + if (DataTy == ScalarTy) + DataElements[0] = Data; + else { + // Since we're post-scalarizer, if we see a vector here it's likely + // constructed solely for the argument of the store. Just use the scalar + // values from before they're inserted into the temporary. + auto *IEI = dyn_cast<InsertElementInst>(Data); + while (IEI) { + auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2)); + if (!IndexOp) + break; + size_t IndexVal = IndexOp->getZExtValue(); + assert(IndexVal < 4 && "Too many elements for buffer store"); + DataElements[IndexVal] = IEI->getOperand(1); + IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0)); + } } // If for some reason we weren't able to forward the arguments from the - // scalarizer artifact, then we need to actually extract elements from the - // vector. - for (int I = 0, E = 4; I != E; ++I) + // scalarizer artifact, then we may need to actually extract elements from + // the vector. + for (int I = 0, E = NumElements; I < E; ++I) if (DataElements[I] == nullptr) DataElements[I] = IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, I)); + // For any elements beyond the length of the vector, fill up with undef. + for (int I = NumElements, E = 4; I < E; ++I) + if (DataElements[I] == nullptr) + DataElements[I] = UndefValue::get(ScalarTy); - std::array<Value *, 8> Args{ + dxil::OpCode Op = OpCode::BufferStore; + SmallVector<Value *, 9> Args{ Handle, Index0, Index1, DataElements[0], DataElements[1], DataElements[2], DataElements[3], Mask}; + if (IsRaw && DXILVersion >= VersionTuple(1, 2)) { + Op = OpCode::RawBufferStore; + // RawBufferStore requires the alignment + Args.push_back( + ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value())); + } Expected<CallInst *> OpCall = - OpBuilder.tryCreateOp(OpCode::BufferStore, Args, CI->getName()); + OpBuilder.tryCreateOp(Op, Args, CI->getName()); if (Error E = OpCall.takeError()) return E; CI->eraseFromParent(); // Clean up any leftover `insertelement`s - IEI = dyn_cast<InsertElementInst>(Data); + auto *IEI = dyn_cast<InsertElementInst>(Data); while (IEI && IEI->use_empty()) { InsertElementInst *Tmp = IEI; IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0)); @@ -776,11 +803,14 @@ class OpLowerer { HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true); break; case Intrinsic::dx_resource_store_typedbuffer: - HasErrors |= lowerTypedBufferStore(F); + HasErrors |= lowerBufferStore(F, /*IsRaw=*/false); break; case Intrinsic::dx_resource_load_rawbuffer: HasErrors |= lowerRawBufferLoad(F); break; + case Intrinsic::dx_resource_store_rawbuffer: + HasErrors |= lowerBufferStore(F, /*IsRaw=*/true); + break; case Intrinsic::dx_resource_updatecounter: HasErrors |= lowerUpdateCounter(F); break; diff --git a/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll b/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll new file mode 100644 index 00000000000000..1916cdf3744555 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll @@ -0,0 +1,126 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s +; Before SM6.2 ByteAddressBuffer and StructuredBuffer lower to bufferStore. + +target triple = "dxil-pc-shadermodel6.1-compute" + +; CHECK-LABEL: define void @storef32_struct +define void @storef32_struct(i32 %index, float %data) { + %buffer = call target("dx.RawBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 0, float %data, float undef, float undef, float undef, i8 1) + call void @llvm.dx.resource.store.rawbuffer.f32( + target("dx.RawBuffer", float, 1, 0, 0) %buffer, + i32 %index, i32 0, float %data) + + ret void +} + +; CHECK-LABEL: define void @storef32_byte +define void @storef32_byte(i32 %offset, float %data) { + %buffer = call target("dx.RawBuffer", i8, 1, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %offset, i32 0, float %data, float undef, float undef, float undef, i8 1) + call void @llvm.dx.resource.store.rawbuffer.f32( + target("dx.RawBuffer", i8, 1, 0, 0) %buffer, + i32 %offset, i32 0, float %data) + + ret void +} + +; CHECK-LABEL: define void @storev4f32_struct +define void @storev4f32_struct(i32 %index, <4 x float> %data) { + %buffer = call target("dx.RawBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA0:%.*]] = extractelement <4 x float> %data, i32 0 + ; CHECK: [[DATA1:%.*]] = extractelement <4 x float> %data, i32 1 + ; CHECK: [[DATA2:%.*]] = extractelement <4 x float> %data, i32 2 + ; CHECK: [[DATA3:%.*]] = extractelement <4 x float> %data, i32 3 + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 0, float [[DATA0]], float [[DATA1]], float [[DATA2]], float [[DATA3]], i8 15) + call void @llvm.dx.resource.store.rawbuffer.v4f32( + target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer, + i32 %index, i32 0, <4 x float> %data) + + ret void +} + +; CHECK-LABEL: define void @storev4f32_byte +define void @storev4f32_byte(i32 %offset, <4 x float> %data) { + %buffer = call target("dx.RawBuffer", i8, 1, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA0:%.*]] = extractelement <4 x float> %data, i32 0 + ; CHECK: [[DATA1:%.*]] = extractelement <4 x float> %data, i32 1 + ; CHECK: [[DATA2:%.*]] = extractelement <4 x float> %data, i32 2 + ; CHECK: [[DATA3:%.*]] = extractelement <4 x float> %data, i32 3 + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %offset, i32 0, float [[DATA0]], float [[DATA1]], float [[DATA2]], float [[DATA3]], i8 15) + call void @llvm.dx.resource.store.rawbuffer.v4f32( + target("dx.RawBuffer", i8, 1, 0, 0) %buffer, + i32 %offset, i32 0, <4 x float> %data) + + ret void +} + +; CHECK-LABEL: define void @storeelements +define void @storeelements(i32 %index, <4 x float> %data0, <4 x i32> %data1) { + %buffer = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data0, i32 0 + ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data0, i32 1 + ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data0, i32 2 + ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data0, i32 3 + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 0, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15) + call void @llvm.dx.resource.store.rawbuffer.v4f32( + target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) %buffer, + i32 %index, i32 0, <4 x float> %data0) + + ; CHECK: [[DATA1_0:%.*]] = extractelement <4 x i32> %data1, i32 0 + ; CHECK: [[DATA1_1:%.*]] = extractelement <4 x i32> %data1, i32 1 + ; CHECK: [[DATA1_2:%.*]] = extractelement <4 x i32> %data1, i32 2 + ; CHECK: [[DATA1_3:%.*]] = extractelement <4 x i32> %data1, i32 3 + ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 16, i32 [[DATA1_0]], i32 [[DATA1_1]], i32 [[DATA1_2]], i32 [[DATA1_3]], i8 15) + call void @llvm.dx.resource.store.rawbuffer.v4i32( + target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) %buffer, + i32 %index, i32 16, <4 x i32> %data1) + + ret void +} + +; CHECK-LABEL: define void @storenested +define void @storenested(i32 %index, i32 %data0, <4 x float> %data1, <3 x half> %data2) { + %buffer = call + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 0, i32 %data0, i32 undef, i32 undef, i32 undef, i8 1) + call void @llvm.dx.resource.store.rawbuffer.i32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer, + i32 %index, i32 0, i32 %data0) + + ; CHECK: [[DATA1_0:%.*]] = extractelement <4 x float> %data1, i32 0 + ; CHECK: [[DATA1_1:%.*]] = extractelement <4 x float> %data1, i32 1 + ; CHECK: [[DATA1_2:%.*]] = extractelement <4 x float> %data1, i32 2 + ; CHECK: [[DATA1_3:%.*]] = extractelement <4 x float> %data1, i32 3 + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 4, float [[DATA1_0]], float [[DATA1_1]], float [[DATA1_2]], float [[DATA1_3]], i8 15) + call void @llvm.dx.resource.store.rawbuffer.v4f32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer, + i32 %index, i32 4, <4 x float> %data1) + + ; CHECK: [[DATA2_0:%.*]] = extractelement <3 x half> %data2, i32 0 + ; CHECK: [[DATA2_1:%.*]] = extractelement <3 x half> %data2, i32 1 + ; CHECK: [[DATA2_2:%.*]] = extractelement <3 x half> %data2, i32 2 + ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 20, half [[DATA2_0]], half [[DATA2_1]], half [[DATA2_2]], half undef, i8 7) + call void @llvm.dx.resource.store.rawbuffer.v3f16( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer, + i32 %index, i32 20, <3 x half> %data2) + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll b/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll new file mode 100644 index 00000000000000..a883a0bbc29fdf... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/121282 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits