Re: [Mesa-dev] [PATCH 19/19] radeonsi: don't declare LDS in PS when ds_bpermute is used

Nicolai Hähnle Tue, 04 Oct 2016 02:47:08 -0700

Reviewed-by: Nicolai Hähnle <[email protected]>


On 02.10.2016 23:09, Marek Olšák wrote:

From: Marek Olšák <[email protected]>

I guess this is not needed because dead code elimination removes
the declaration.
---
 src/gallium/drivers/radeonsi/si_pipe.c   | 3 +++
 src/gallium/drivers/radeonsi/si_pipe.h   | 1 +
 src/gallium/drivers/radeonsi/si_shader.c | 7 +++----
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 43d6377..2aa679c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -810,20 +810,23 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
                (sscreen->b.chip_class == VI &&
                 sscreen->b.info.pfp_fw_version >= 121 &&
                 sscreen->b.info.me_fw_version >= 87) ||
                (sscreen->b.chip_class == CIK &&
                 sscreen->b.info.pfp_fw_version >= 211 &&
                 sscreen->b.info.me_fw_version >= 173) ||
                (sscreen->b.chip_class == SI &&
                 sscreen->b.info.pfp_fw_version >= 121 &&
                 sscreen->b.info.me_fw_version >= 87);

+       sscreen->has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
+                                  sscreen->b.chip_class >= VI;
+
        sscreen->b.has_cp_dma = true;
        sscreen->b.has_streamout = true;
        pipe_mutex_init(sscreen->shader_parts_mutex);
        sscreen->use_monolithic_shaders =
                HAVE_LLVM < 0x0308 ||
                (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;

        sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
                                            SI_CONTEXT_INV_VMEM_L1 |
                                            SI_CONTEXT_INV_GLOBAL_L2;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 558e185..3cefee7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -76,20 +76,21 @@
 struct si_compute;
 struct hash_table;
 struct u_suballocator;

 struct si_screen {
        struct r600_common_screen       b;
        unsigned                        gs_table_depth;
        unsigned                        tess_offchip_block_dw_size;
        bool                            has_distributed_tess;
        bool                            has_draw_indirect_multi;
+       bool                            has_ds_bpermute;

        /* Whether shaders are monolithic (1-part) or separate (3-part). */
        bool                            use_monolithic_shaders;
        bool                            record_llvm_ir;

        pipe_mutex                      shader_parts_mutex;
        struct si_shader_part           *vs_prologs;
        struct si_shader_part           *vs_epilogs;
        struct si_shader_part           *tcs_epilogs;
        struct si_shader_part           *ps_prologs;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 7844ebd..30bf093 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5002,43 +5002,41 @@ static void si_llvm_emit_ddxy(
        const struct lp_build_tgsi_action *action,
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        unsigned opcode = emit_data->info->opcode;
        LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2];
        int idx;
        unsigned mask;
-       bool has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
-                              ctx->screen->b.chip_class >= VI;

        thread_id = get_thread_id(ctx);

        if (opcode == TGSI_OPCODE_DDX_FINE)
                mask = TID_MASK_LEFT;
        else if (opcode == TGSI_OPCODE_DDY_FINE)
                mask = TID_MASK_TOP;
        else
                mask = TID_MASK_TOP_LEFT;

        tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
                                lp_build_const_int32(gallivm, mask), "");

        /* for DDX we want to next X pixel, DDY next Y pixel. */
        idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 
: 2;
        trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
                                  lp_build_const_int32(gallivm, idx), "");

        val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, 
"");

-       if (has_ds_bpermute) {
+       if (ctx->screen->has_ds_bpermute) {
                args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
                                       lp_build_const_int32(gallivm, 4), "");
                args[1] = val;
                tl = lp_build_intrinsic(gallivm->builder,
                                        "llvm.amdgcn.ds.bpermute", ctx->i32,
                                        args, 2, LLVMReadNoneAttribute);

                args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
                                       lp_build_const_int32(gallivm, 4), "");
                trbl = lp_build_intrinsic(gallivm->builder,
@@ -5731,21 +5729,22 @@ static void create_function(struct si_shader_context 
*ctx)
        for (i = 0; i <= last_sgpr; ++i)
                shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 
4;

        /* Unused fragment shader inputs are eliminated by the compiler,
         * so we don't know yet how many there will be.
         */
        if (ctx->type != PIPE_SHADER_FRAGMENT)
                for (; i < num_params; ++i)
                        shader->info.num_input_vgprs += 
llvm_get_type_size(params[i]) / 4;

-       if (bld_base->info &&
+       if (!ctx->screen->has_ds_bpermute &&
+           bld_base->info &&
            (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
                ctx->lds =
                        LLVMAddGlobalInAddressSpace(gallivm->module,
                                                    LLVMArrayType(ctx->i32, 64),
                                                    "ddxy_lds",

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 19/19] radeonsi: don't declare LDS in PS when ds_bpermute is used

Reply via email to