llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-directx

Author: Justin Bogner (bogner)

<details>
<summary>Changes</summary>

This introduces `@<!-- -->llvm.dx.resource.store.rawbuffer` and generalizes the 
buffer store docs under DirectX/DXILResources.

Fixes #<!-- -->106188

---

Patch is 28.64 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/121282.diff


7 Files Affected:

- (modified) llvm/docs/DirectX/DXILResources.rst (+99-15) 
- (modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+4) 
- (modified) llvm/lib/Target/DirectX/DXIL.td (+20) 
- (modified) llvm/lib/Target/DirectX/DXILOpLowering.cpp (+56-26) 
- (added) llvm/test/CodeGen/DirectX/BufferStore-sm61.ll (+126) 
- (added) llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll (+20) 
- (added) llvm/test/CodeGen/DirectX/RawBufferStore.ll (+144) 


``````````diff
diff --git a/llvm/docs/DirectX/DXILResources.rst 
b/llvm/docs/DirectX/DXILResources.rst
index 857d29e48363b0..c350303c39f796 100644
--- a/llvm/docs/DirectX/DXILResources.rst
+++ b/llvm/docs/DirectX/DXILResources.rst
@@ -491,26 +491,28 @@ Examples:
            i32 %byte_offset,
            i32 0)
 
-Texture and Typed Buffer Stores
--------------------------------
+Stores
+------
 
-*relevant types: Textures and TypedBuffer*
+*relevant types: Textures and Buffer*
 
-The `TextureStore`_ and `BufferStore`_ DXIL operations always write all four
-32-bit components to a texture or a typed buffer. While both operations include
-a mask parameter, it is specified that the mask must cover all components when
-used with these types.
+The `TextureStore`_, `BufferStore`_, and `RawBufferStore`_ DXIL operations
+write four components to a texture or a buffer. These include a mask argument
+that is used when fewer than 4 components are written, but notably this only
+takes on the contiguous x, xy, xyz, and xyzw values.
 
-The store operations that we define as intrinsics behave similarly, and will
-only accept writes to the whole of the contained type. This differs from the
-loads above, but this makes sense to do from a semantics preserving point of
-view. Thus, texture and buffer stores may only operate on 4-element vectors of
-types that are 32-bits or fewer, such as ``<4 x i32>``, ``<4 x float>``, and
-``<4 x half>``, and 2 element vectors of 64-bit types like ``<2 x double>`` and
-``<2 x i64>``.
+We define the LLVM store intrinsics to accept vectors when storing multiple
+components rather than using `undef` and a mask, but otherwise match the DXIL
+ops fairly closely.
 
-.. _BufferStore: 
https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
 .. _TextureStore: 
https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#texturestore
+.. _BufferStore: 
https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
+.. _RawBufferStore: 
https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#rawbufferstore
+
+For TypedBuffer, we only need one coordinate, and we must always write a vector
+since partial writes aren't possible. Similarly to the load operations
+described above, we handle 64-bit types specially and only handle 2-element
+vectors rather than 4.
 
 Examples:
 
@@ -548,3 +550,85 @@ Examples:
        target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data)
    call void @llvm.dx.resource.store.typedbuffer.tdx.Buffer_v2f64_1_0_0t(
        target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data)
+
+For RawBuffer, we need two indices and we accept scalars and vectors of less
+than 4 elements. Note that we do allow vectors of 4 64-bit elements here.
+
+Examples:
+
+.. list-table:: ``@llvm.dx.resource.store.rawbuffer``
+   :header-rows: 1
+
+   * - Argument
+     -
+     - Type
+     - Description
+   * - Return value
+     -
+     - ``void``
+     -
+   * - ``%buffer``
+     - 0
+     - ``target(dx.TypedBuffer, ...)``
+     - The buffer to store into
+   * - ``%index``
+     - 1
+     - ``i32``
+     - Index into the buffer
+   * - ``%offset``
+     - 2
+     - ``i32``
+     - Byte offset into structured buffer elements
+   * - ``%data``
+     - 3
+     - Scalar or vector
+     - The data to store
+
+Examples:
+
+.. code-block:: llvm
+
+   ; float
+   call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f32_1_0_0t.f32(
+       target("dx.RawBuffer", float, 1, 0, 0) %buffer,
+       i32 %index, i32 %offset, float %data)
+   call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.f32(
+       target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+       i32 %index, i32 0, float %data)
+
+   ; float4
+   call void 
@llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f32_1_0_0t.v4f32(
+       target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer,
+       i32 %index, i32 0, <4 x float> %data)
+   call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f32(
+       target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+       i32 %index, i32 0, <4 x float> %data)
+
+   ; struct S0 { float4 f; int4 i; }
+   call void @llvm.dx.resource.store.rawbuffer.v4f32(
+       target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer,
+       i32 %index, i32 0, <4 x float> %data0)
+   call void @llvm.dx.resource.store.rawbuffer.v4i32(
+       target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer,
+       i32 %index, i32 16, <4 x i32> %data1)
+
+   ; struct Q { float4 f; int3 i; }
+   ; struct R { int z; S x; }
+   call void @llvm.dx.resource.store.rawbuffer.i32(
+       target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+           %buffer,
+       i32 %index, i32 0, i32 %data0)
+   call void @llvm.dx.resource.store.rawbuffer.v4f32(
+       target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+           %buffer,
+       i32 %index, i32 4, <4 x float> %data1)
+   call void @llvm.dx.resource.store.rawbuffer.v3f16(
+       target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+           %buffer,
+       i32 %index, i32 20, <3 x half> %data2)
+
+   ; byteaddressbuf.Store<int64_t4>
+   call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f64(
+       target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+       i32 %offset, i32 0, <4 x double> %data)
+
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td 
b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 3b1d1a88e01a8b..18a1d09a14aaf1 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -40,6 +40,10 @@ def int_dx_resource_load_rawbuffer
     : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
                             [llvm_any_ty, llvm_i32_ty, llvm_i32_ty],
                             [IntrReadMem]>;
+def int_dx_resource_store_rawbuffer
+    : DefaultAttrsIntrinsic<
+          [], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_any_ty],
+          [IntrWriteMem]>;
 
 def int_dx_resource_updatecounter
     : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 62b5b704e99eb2..6fdd83c4dc8778 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -909,6 +909,26 @@ def RawBufferLoad : DXILOp<139, rawBufferLoad> {
   let stages = [Stages<DXIL1_2, [all_stages]>];
 }
 
+def RawBufferStore : DXILOp<140, rawBufferStore> {
+  let Doc = "writes to a RWByteAddressBuffer or RWStructuredBuffer";
+  // Handle, Coord0, Coord1, Val0, Val1, Val2, Val3, Mask, Alignment
+  let arguments = [
+    HandleTy, Int32Ty, Int32Ty, OverloadTy, OverloadTy, OverloadTy, OverloadTy,
+    Int8Ty, Int32Ty
+  ];
+  let result = VoidTy;
+  let overloads = [
+    Overloads<DXIL1_2,
+              [ResRetHalfTy, ResRetFloatTy, ResRetInt16Ty, ResRetInt32Ty]>,
+    Overloads<DXIL1_3,
+              [
+                ResRetHalfTy, ResRetFloatTy, ResRetDoubleTy, ResRetInt16Ty,
+                ResRetInt32Ty, ResRetInt64Ty
+              ]>
+  ];
+  let stages = [Stages<DXIL1_2, [all_stages]>];
+}
+
 def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
   let Doc = "signed dot product of 4 x i8 vectors packed into i32, with "
             "accumulate to i32";
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp 
b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index f43815bf211664..0c245c1a43d31c 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -616,7 +616,10 @@ class OpLowerer {
     return false;
   }
 
-  [[nodiscard]] bool lowerTypedBufferStore(Function &F) {
+  [[nodiscard]] bool lowerBufferStore(Function &F, bool IsRaw) {
+    Triple TT(Triple(M.getTargetTriple()));
+    VersionTuple DXILVersion = TT.getDXILVersion();
+    const DataLayout &DL = F.getDataLayout();
     IRBuilder<> &IRB = OpBuilder.getIRB();
     Type *Int8Ty = IRB.getInt8Ty();
     Type *Int32Ty = IRB.getInt32Ty();
@@ -627,51 +630,75 @@ class OpLowerer {
       Value *Handle =
           createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
       Value *Index0 = CI->getArgOperand(1);
-      Value *Index1 = UndefValue::get(Int32Ty);
-      // For typed stores, the mask must always cover all four elements.
-      Constant *Mask = ConstantInt::get(Int8Ty, 0xF);
+      Value *Index1 = IsRaw ? CI->getArgOperand(2) : UndefValue::get(Int32Ty);
+
+      Value *Data = CI->getArgOperand(IsRaw ? 3 : 2);
+      Type *DataTy = Data->getType();
+      Type *ScalarTy = DataTy->getScalarType();
 
-      Value *Data = CI->getArgOperand(2);
-      auto *DataTy = dyn_cast<FixedVectorType>(Data->getType());
-      if (!DataTy || DataTy->getNumElements() != 4)
+      uint64_t NumElements =
+          DL.getTypeSizeInBits(DataTy) / DL.getTypeSizeInBits(ScalarTy);
+      Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements));
+
+      // TODO: check that we only have vector or scalar...
+      if (!IsRaw && NumElements != 4)
         return make_error<StringError>(
             "typedBufferStore data must be a vector of 4 elements",
             inconvertibleErrorCode());
+      else if (NumElements > 4)
+        return make_error<StringError>(
+            "rawBufferStore data must have at most 4 elements",
+            inconvertibleErrorCode());
 
-      // Since we're post-scalarizer, we likely have a vector that's 
constructed
-      // solely for the argument of the store. If so, just use the scalar 
values
-      // from before they're inserted into the temporary.
       std::array<Value *, 4> DataElements{nullptr, nullptr, nullptr, nullptr};
-      auto *IEI = dyn_cast<InsertElementInst>(Data);
-      while (IEI) {
-        auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2));
-        if (!IndexOp)
-          break;
-        size_t IndexVal = IndexOp->getZExtValue();
-        assert(IndexVal < 4 && "Too many elements for buffer store");
-        DataElements[IndexVal] = IEI->getOperand(1);
-        IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
+      if (DataTy == ScalarTy)
+        DataElements[0] = Data;
+      else {
+        // Since we're post-scalarizer, if we see a vector here it's likely
+        // constructed solely for the argument of the store. Just use the 
scalar
+        // values from before they're inserted into the temporary.
+        auto *IEI = dyn_cast<InsertElementInst>(Data);
+        while (IEI) {
+          auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2));
+          if (!IndexOp)
+            break;
+          size_t IndexVal = IndexOp->getZExtValue();
+          assert(IndexVal < 4 && "Too many elements for buffer store");
+          DataElements[IndexVal] = IEI->getOperand(1);
+          IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
+        }
       }
 
       // If for some reason we weren't able to forward the arguments from the
-      // scalarizer artifact, then we need to actually extract elements from 
the
-      // vector.
-      for (int I = 0, E = 4; I != E; ++I)
+      // scalarizer artifact, then we may need to actually extract elements 
from
+      // the vector.
+      for (int I = 0, E = NumElements; I < E; ++I)
         if (DataElements[I] == nullptr)
           DataElements[I] =
               IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, I));
+      // For any elements beyond the length of the vector, fill up with undef.
+      for (int I = NumElements, E = 4; I < E; ++I)
+        if (DataElements[I] == nullptr)
+          DataElements[I] = UndefValue::get(ScalarTy);
 
-      std::array<Value *, 8> Args{
+      dxil::OpCode Op = OpCode::BufferStore;
+      SmallVector<Value *, 9> Args{
           Handle,          Index0,          Index1,          DataElements[0],
           DataElements[1], DataElements[2], DataElements[3], Mask};
+      if (IsRaw && DXILVersion >= VersionTuple(1, 2)) {
+        Op = OpCode::RawBufferStore;
+        // RawBufferStore requires the alignment
+        Args.push_back(
+            ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value()));
+      }
       Expected<CallInst *> OpCall =
-          OpBuilder.tryCreateOp(OpCode::BufferStore, Args, CI->getName());
+          OpBuilder.tryCreateOp(Op, Args, CI->getName());
       if (Error E = OpCall.takeError())
         return E;
 
       CI->eraseFromParent();
       // Clean up any leftover `insertelement`s
-      IEI = dyn_cast<InsertElementInst>(Data);
+      auto *IEI = dyn_cast<InsertElementInst>(Data);
       while (IEI && IEI->use_empty()) {
         InsertElementInst *Tmp = IEI;
         IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
@@ -776,11 +803,14 @@ class OpLowerer {
         HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
         break;
       case Intrinsic::dx_resource_store_typedbuffer:
-        HasErrors |= lowerTypedBufferStore(F);
+        HasErrors |= lowerBufferStore(F, /*IsRaw=*/false);
         break;
       case Intrinsic::dx_resource_load_rawbuffer:
         HasErrors |= lowerRawBufferLoad(F);
         break;
+      case Intrinsic::dx_resource_store_rawbuffer:
+        HasErrors |= lowerBufferStore(F, /*IsRaw=*/true);
+        break;
       case Intrinsic::dx_resource_updatecounter:
         HasErrors |= lowerUpdateCounter(F);
         break;
diff --git a/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll 
b/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll
new file mode 100644
index 00000000000000..1916cdf3744555
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll
@@ -0,0 +1,126 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+; Before SM6.2 ByteAddressBuffer and StructuredBuffer lower to bufferStore.
+
+target triple = "dxil-pc-shadermodel6.1-compute"
+
+; CHECK-LABEL: define void @storef32_struct
+define void @storef32_struct(i32 %index, float %data) {
+  %buffer = call target("dx.RawBuffer", float, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f32_0_0_0(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, 
i32 %index, i32 0, float %data, float undef, float undef, float undef, i8 1)
+  call void @llvm.dx.resource.store.rawbuffer.f32(
+      target("dx.RawBuffer", float, 1, 0, 0) %buffer,
+      i32 %index, i32 0, float %data)
+
+  ret void
+}
+
+; CHECK-LABEL: define void @storef32_byte
+define void @storef32_byte(i32 %offset, float %data) {
+  %buffer = call target("dx.RawBuffer", i8, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, 
i32 %offset, i32 0, float %data, float undef, float undef, float undef, i8 1)
+  call void @llvm.dx.resource.store.rawbuffer.f32(
+      target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+      i32 %offset, i32 0, float %data)
+
+  ret void
+}
+
+; CHECK-LABEL: define void @storev4f32_struct
+define void @storev4f32_struct(i32 %index, <4 x float> %data) {
+  %buffer = call target("dx.RawBuffer", <4 x float>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4f32_0_0_0(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; CHECK: [[DATA0:%.*]] = extractelement <4 x float> %data, i32 0
+  ; CHECK: [[DATA1:%.*]] = extractelement <4 x float> %data, i32 1
+  ; CHECK: [[DATA2:%.*]] = extractelement <4 x float> %data, i32 2
+  ; CHECK: [[DATA3:%.*]] = extractelement <4 x float> %data, i32 3
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, 
i32 %index, i32 0, float [[DATA0]], float [[DATA1]], float [[DATA2]], float 
[[DATA3]], i8 15)
+  call void @llvm.dx.resource.store.rawbuffer.v4f32(
+      target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer,
+      i32 %index, i32 0, <4 x float> %data)
+
+  ret void
+}
+
+; CHECK-LABEL: define void @storev4f32_byte
+define void @storev4f32_byte(i32 %offset, <4 x float> %data) {
+  %buffer = call target("dx.RawBuffer", i8, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; CHECK: [[DATA0:%.*]] = extractelement <4 x float> %data, i32 0
+  ; CHECK: [[DATA1:%.*]] = extractelement <4 x float> %data, i32 1
+  ; CHECK: [[DATA2:%.*]] = extractelement <4 x float> %data, i32 2
+  ; CHECK: [[DATA3:%.*]] = extractelement <4 x float> %data, i32 3
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, 
i32 %offset, i32 0, float [[DATA0]], float [[DATA1]], float [[DATA2]], float 
[[DATA3]], i8 15)
+  call void @llvm.dx.resource.store.rawbuffer.v4f32(
+      target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+      i32 %offset, i32 0, <4 x float> %data)
+
+  ret void
+}
+
+; CHECK-LABEL: define void @storeelements
+define void @storeelements(i32 %index, <4 x float> %data0, <4 x i32> %data1) {
+  %buffer = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0_0(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data0, i32 0
+  ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data0, i32 1
+  ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data0, i32 2
+  ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data0, i32 3
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, 
i32 %index, i32 0, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], 
float [[DATA0_3]], i8 15)
+  call void @llvm.dx.resource.store.rawbuffer.v4f32(
+      target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) %buffer,
+      i32 %index, i32 0, <4 x float> %data0)
+
+  ; CHECK: [[DATA1_0:%.*]] = extractelement <4 x i32> %data1, i32 0
+  ; CHECK: [[DATA1_1:%.*]] = extractelement <4 x i32> %data1, i32 1
+  ; CHECK: [[DATA1_2:%.*]] = extractelement <4 x i32> %data1, i32 2
+  ; CHECK: [[DATA1_3:%.*]] = extractelement <4 x i32> %data1, i32 3
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, 
i32 %index, i32 16, i32 [[DATA1_0]], i32 [[DATA1_1]], i32 [[DATA1_2]], i32 
[[DATA1_3]], i8 15)
+  call void @llvm.dx.resource.store.rawbuffer.v4i32(
+      target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) %buffer,
+      i32 %index, i32 16, <4 x i32> %data1)
+
+  ret void
+}
+
+; CHECK-LABEL: define void @storenested
+define void @storenested(i32 %index, i32 %data0, <4 x float> %data1, <3 x 
half> %data2) {
+  %buffer = call
+      target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, 
i32 %index, i32 0, i32 %data0, i32 undef, i32 undef, i32 undef, i8 1)
+  call void @llvm.dx.resource.store.rawbuffer.i32(
+      target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) 
%buffer,
+      i32 %index, i32 0, i32 %data0)
+
+  ; CHECK: [[DATA1_0:%.*]] = extractelement <4 x float> %data1, i32 0
+  ; CHECK: [[DATA1_1:%.*]] = extractelement <4 x float> %data1, i32 1
+  ; CHECK: [[DATA1_2:%.*]] = extractelement <4 x float> %data1, i32 2
+  ; CHECK: [[DATA1_3:%.*]] = extractelement <4 x float> %data1, i32 3
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, 
i32 %index, i32 4, float [[DATA1_0]], float [[DATA1_1]], float [[DATA1_2]], 
float [[DATA1_3]], i8 15)
+  call void @llvm.dx.resource.store.rawbuffer.v4f32(
+      target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) 
%buffer,
+      i32 %index, i32 4, <4 x float> %data1)
+
+  ; CHECK: [[DATA2_0:%.*]] = extractelement <3 x half> %data2, i32 0
+  ; CHECK: [[DATA2_1:%.*]] = extractelement <3 x half> %data2, i32 1
+  ; CHECK: [[DATA2_2:%.*]] = extractelement <3 x half> %data2, i32 2
+  ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %{{.*}}, 
i32 %index, i32 20, half [[DATA2_0]], half [[DATA2_1]], half [[DATA2_2]], half 
undef, i8 7)
+  call void @llvm.dx.resource.store.rawbuffer.v3f16(
+      target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) 
%buffer,
+      i32 %index, i32 20, <3 x half> %data2)
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll 
b/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll
new file mode 100644
index 00000000000000..a883a0bbc29fdf...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/121282
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to