From: Marek Olšák <[email protected]>

---
 src/amd/common/ac_llvm_build.c                |  5 +++--
 src/amd/common/ac_llvm_build.h                |  1 +
 src/amd/common/ac_nir_to_llvm.c               |  2 +-
 .../radeonsi/si_compute_prim_discard.c        | 21 ++++++++-----------
 .../drivers/radeonsi/si_shader_tgsi_mem.c     |  2 +-
 5 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2a1a133c392..894d01ca036 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1187,30 +1187,31 @@ ac_build_llvm8_buffer_store_common(struct 
ac_llvm_context *ctx,
 }
 
 void
 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
                             LLVMValueRef rsrc,
                             LLVMValueRef data,
                             LLVMValueRef vindex,
                             LLVMValueRef voffset,
                             unsigned num_channels,
                             bool glc,
+                            bool slc,
                             bool writeonly_memory)
 {
        if (HAVE_LLVM >= 0x800) {
                ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
                                                   voffset, NULL, num_channels,
-                                                  ctx->f32, glc, false,
+                                                  ctx->f32, glc, slc,
                                                   writeonly_memory, true, 
true);
        } else {
                ac_build_llvm7_buffer_store_common(ctx, rsrc, data, vindex, 
voffset,
-                                                  num_channels, glc, false,
+                                                  num_channels, glc, slc,
                                                   writeonly_memory, true);
        }
 }
 
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by 
num_channels=1..4.
  * The type of vdata must be one of i32 (num_channels=1), v2i32 
(num_channels=2),
  * or v4i32 (num_channels=3,4).
  */
 void
 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 5ed9a112457..bbdb01184e6 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -275,20 +275,21 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            bool swizzle_enable_hint);
 
 void
 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
                             LLVMValueRef rsrc,
                             LLVMValueRef data,
                             LLVMValueRef vindex,
                             LLVMValueRef voffset,
                             unsigned num_channels,
                             bool glc,
+                            bool slc,
                             bool writeonly_memory);
 
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
                     LLVMValueRef rsrc,
                     int num_channels,
                     LLVMValueRef vindex,
                     LLVMValueRef voffset,
                     LLVMValueRef soffset,
                     unsigned inst_offset,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 833b1e54abc..7e2e8c30306 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2532,21 +2532,21 @@ static void visit_image_store(struct ac_nir_context 
*ctx,
 
                if (src_channels == 3)
                        src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
 
                vindex = LLVMBuildExtractElement(ctx->ac.builder,
                                                 get_src(ctx, instr->src[1]),
                                                 ctx->ac.i32_0, "");
 
                ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
                                             ctx->ac.i32_0, src_channels,
-                                            args.cache_policy & ac_glc,
+                                            args.cache_policy & ac_glc, false,
                                             writeonly_memory);
        } else {
                args.opcode = ac_image_store;
                args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, 
instr->src[3]));
                get_image_coords(ctx, instr, &args, dim, is_array);
                args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, 
true);
                args.dim = get_ac_image_dim(&ctx->ac, dim, is_array);
                args.dmask = 15;
 
                ac_build_image_opcode(&ctx->ac, &args);
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c 
b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index 362c63c2e44..3bed818d5ad 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -849,32 +849,29 @@ void si_build_prim_discard_compute_shader(struct 
si_shader_context *ctx)
                         * multiple subdraws, the first primitive ID is > 0
                         * for subsequent subdraws. Each subdraw uses a 
different
                         * portion of the output index buffer. Offset the store
                         * vindex by the first primitive ID to get the correct
                         * store address for the subdraw.
                         */
                        start = LLVMBuildAdd(builder, start, vertex_counter, 
"");
                }
 
                /* Write indices for accepted primitives. */
-               LLVMValueRef buf_args[] = {
-                       ac_to_float(&ctx->ac, ac_build_expand_to_vec4(&ctx->ac,
-                                               
ac_build_gather_values(&ctx->ac, index, 3), 3)),
-                       output_indexbuf,
-                       LLVMBuildAdd(builder, start, prim_index, ""),
-                       ctx->i32_0, /* voffset */
-                       ctx->i1true, /* glc */
-                       LLVMConstInt(ctx->i1, INDEX_STORES_USE_SLC, 0),
-               };
-               ac_build_intrinsic(&ctx->ac, 
"llvm.amdgcn.buffer.store.format.v4f32",
-                                  ctx->voidt, buf_args, 6,
-                                  ac_get_store_intr_attribs(true));
+               LLVMValueRef vindex = LLVMBuildAdd(builder, start, prim_index, 
"");
+               LLVMValueRef vdata = ac_build_gather_values(&ctx->ac, index, 3);
+
+               if (!ac_has_vec3_support(ctx->ac.chip_class, true))
+                       vdata = ac_build_expand_to_vec4(&ctx->ac, vdata, 3);
+
+               ac_build_buffer_store_format(&ctx->ac, output_indexbuf, vdata,
+                                            vindex, ctx->i32_0, 3, true,
+                                            INDEX_STORES_USE_SLC, true);
        }
        lp_build_endif(&if_accepted);
 
        LLVMBuildRetVoid(builder);
 }
 
 /* Return false if the shader isn't ready. */
 static bool si_shader_select_prim_discard_cs(struct si_context *sctx,
                                             const struct pipe_draw_info *info,
                                             bool primitive_restart)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index cc634f495ef..f4a988f90fa 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -728,21 +728,21 @@ static void store_emit(
                return;
        }
 
        if (target == TGSI_TEXTURE_BUFFER) {
                unsigned num_channels = 
util_last_bit(inst->Dst[0].Register.WriteMask);
 
                ac_build_buffer_store_format(&ctx->ac, args.resource,
                                             ac_build_gather_values(&ctx->ac, 
chans, num_channels),
                                             vindex, ctx->i32_0 /* voffset */,
                                             num_channels,
-                                            !!(args.cache_policy & ac_glc),
+                                            !!(args.cache_policy & ac_glc), 
false,
                                             writeonly_memory);
        } else {
                args.opcode = ac_image_store;
                args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4);
                args.dim = ac_image_dim_from_tgsi_target(ctx->screen, 
inst->Memory.Texture);
                args.attributes = ac_get_store_intr_attribs(writeonly_memory);
                args.dmask = 0xf;
 
                emit_data->output[emit_data->chan] =
                        ac_build_image_opcode(&ctx->ac, &args);
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to