One minor comment on patch #8, apart from that the series is:

Reviewed-by: Nicolai Hähnle <[email protected]>

On 28.11.2016 12:17, Marek Olšák wrote:
From: Marek Olšák <[email protected]>

The compiler doesn't shrink s_load_dwordx8, so we always wasted 4 SGPRs.
Also, the extraction of the descriptor created some really ugly asm code
with lots of VALU bitwise ops and v_readfirstlane.

Totals from *affected* shaders:
SGPRS: 13880 -> 13253 (-4.52 %)
VGPRS: 15200 -> 15088 (-0.74 %)
Code Size: 499864 -> 459816 (-8.01 %) bytes
Max Waves: 1554 -> 1564 (0.64 %)
---
 src/gallium/drivers/radeonsi/si_shader.c | 94 +++++++++++++++-----------------
 1 file changed, 43 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f4c6e9c..bb57e78 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3107,30 +3107,30 @@ static void si_llvm_return_fs_outputs(struct 
lp_build_tgsi_context *bld_base)
  */
 static LLVMValueRef get_buffer_size(
        struct lp_build_tgsi_context *bld_base,
        LLVMValueRef descriptor)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef size =
                LLVMBuildExtractElement(builder, descriptor,
-                                       lp_build_const_int32(gallivm, 6), "");
+                                       lp_build_const_int32(gallivm, 2), "");

        if (ctx->screen->b.chip_class >= VI) {
                /* On VI, the descriptor contains the size in bytes,
                 * but TXQ must return the size in elements.
                 * The stride is always non-zero for resources using TXQ.
                 */
                LLVMValueRef stride =
                        LLVMBuildExtractElement(builder, descriptor,
-                                               lp_build_const_int32(gallivm, 5), 
"");
+                                               lp_build_const_int32(gallivm, 1), 
"");
                stride = LLVMBuildLShr(builder, stride,
                                       lp_build_const_int32(gallivm, 16), "");
                stride = LLVMBuildAnd(builder, stride,
                                      lp_build_const_int32(gallivm, 0x3FFF), 
"");

                size = LLVMBuildUDiv(builder, size, stride, "");
        }

        return size;
 }
@@ -3271,20 +3271,26 @@ static LLVMValueRef force_dcc_off(struct 
si_shader_context *ctx,
                LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
                LLVMValueRef i32_C = LLVMConstInt(ctx->i32, 
C_008F28_COMPRESSION_EN, 0);
                LLVMValueRef tmp;

                tmp = LLVMBuildExtractElement(builder, rsrc, i32_6, "");
                tmp = LLVMBuildAnd(builder, tmp, i32_C, "");
                return LLVMBuildInsertElement(builder, rsrc, tmp, i32_6, "");
        }
 }

+static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
+{
+       return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
+                              CONST_ADDR_SPACE);
+}
+
 /**
  * Load the resource descriptor for \p image.
  */
 static void
 image_fetch_rsrc(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *image,
        bool is_store, unsigned target,
        LLVMValueRef *rsrc)
 {
@@ -3312,20 +3318,33 @@ image_fetch_rsrc(
                 *    and if the index used to select an individual element is
                 *    negative or greater than or equal to the size of the
                 *    array, the results of the operation are undefined but may
                 *    not lead to termination.
                 */
                index = get_bounded_indirect_index(ctx, &image->Indirect,
                                                   image->Register.Index,
                                                   SI_NUM_IMAGES);
        }

+       if (target == TGSI_TEXTURE_BUFFER) {
+               LLVMBuilderRef builder = ctx->gallivm.builder;
+
+               rsrc_ptr = LLVMBuildPointerCast(builder, rsrc_ptr,
+                                               const_array(ctx->v4i32, 0), "");
+               index = LLVMBuildMul(builder, index,
+                                    LLVMConstInt(ctx->i32, 2, 0), "");
+               index = LLVMBuildAdd(builder, index,
+                                    LLVMConstInt(ctx->i32, 1, 0), "");
+               *rsrc = build_indexed_load_const(ctx, rsrc_ptr, index);
+               return;
+       }
+
        tmp = build_indexed_load_const(ctx, rsrc_ptr, index);
        if (dcc_off)
                tmp = force_dcc_off(ctx, tmp);
        *rsrc = tmp;
 }

 static LLVMValueRef image_fetch_coords(
                struct lp_build_tgsi_context *bld_base,
                const struct tgsi_full_instruction *inst,
                unsigned src)
@@ -3387,39 +3406,20 @@ static void image_append_args(
        }

        /* HAVE_LLVM >= 0x0400 */
        emit_data->args[emit_data->arg_count++] = glc;
        emit_data->args[emit_data->arg_count++] = slc;
        emit_data->args[emit_data->arg_count++] = lwe;
        emit_data->args[emit_data->arg_count++] = da;
 }

 /**
- * Given a 256 bit resource, extract the top half (which stores the buffer
- * resource in the case of textures and images).
- */
-static LLVMValueRef extract_rsrc_top_half(
-               struct si_shader_context *ctx,
-               LLVMValueRef rsrc)
-{
-       struct gallivm_state *gallivm = &ctx->gallivm;
-       struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
-       LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
-
-       rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, "");
-       rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, 
"");
-       rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, "");
-
-       return rsrc;
-}
-
-/**
  * Append the resource and indexing arguments for buffer intrinsics.
  *
  * \param rsrc the v4i32 buffer resource
  * \param index index into the buffer (stride-based)
  * \param offset byte offset into the buffer
  */
 static void buffer_append_args(
                struct si_shader_context *ctx,
                struct lp_build_emit_data *emit_data,
                LLVMValueRef rsrc,
@@ -3466,21 +3466,20 @@ static void load_fetch_args(

                buffer_append_args(ctx, emit_data, rsrc, 
bld_base->uint_bld.zero,
                                   offset, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
                LLVMValueRef coords;

                image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
                coords = image_fetch_coords(bld_base, inst, 1);

                if (target == TGSI_TEXTURE_BUFFER) {
-                       rsrc = extract_rsrc_top_half(ctx, rsrc);
                        buffer_append_args(ctx, emit_data, rsrc, coords,
                                        bld_base->uint_bld.zero, false);
                } else {
                        emit_data->args[0] = coords;
                        emit_data->args[1] = rsrc;
                        emit_data->args[2] = lp_build_const_int32(gallivm, 15); 
/* dmask */
                        emit_data->arg_count = 3;

                        image_append_args(ctx, emit_data, target, false);
                }
@@ -3674,22 +3673,20 @@ static void store_fetch_args(
                buffer_append_args(ctx, emit_data, rsrc, 
bld_base->uint_bld.zero,
                                   offset, false);
        } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
                unsigned target = inst->Memory.Texture;
                LLVMValueRef coords;

                coords = image_fetch_coords(bld_base, inst, 0);

                if (target == TGSI_TEXTURE_BUFFER) {
                        image_fetch_rsrc(bld_base, &memory, true, target, 
&rsrc);
-
-                       rsrc = extract_rsrc_top_half(ctx, rsrc);
                        buffer_append_args(ctx, emit_data, rsrc, coords,
                                        bld_base->uint_bld.zero, false);
                } else {
                        emit_data->args[1] = coords;
                        image_fetch_rsrc(bld_base, &memory, true, target,
                                         &emit_data->args[2]);
                        emit_data->args[3] = lp_build_const_int32(gallivm, 15); 
/* dmask */
                        emit_data->arg_count = 4;

                        image_append_args(ctx, emit_data, target, false);
@@ -3878,21 +3875,20 @@ static void atomic_fetch_args(
                buffer_append_args(ctx, emit_data, rsrc, 
bld_base->uint_bld.zero,
                                   offset, true);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
                unsigned target = inst->Memory.Texture;
                LLVMValueRef coords;

                image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
                coords = image_fetch_coords(bld_base, inst, 1);

                if (target == TGSI_TEXTURE_BUFFER) {
-                       rsrc = extract_rsrc_top_half(ctx, rsrc);
                        buffer_append_args(ctx, emit_data, rsrc, coords,
                                           bld_base->uint_bld.zero, true);
                } else {
                        emit_data->args[emit_data->arg_count++] = coords;
                        emit_data->args[emit_data->arg_count++] = rsrc;

                        image_append_args(ctx, emit_data, target, true);
                }
        }
 }
@@ -4122,45 +4118,47 @@ static void set_tex_fetch_args(struct si_shader_context 
*ctx,
        emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* tfe 
*/
        emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* lwe 
*/

        emit_data->arg_count = num_args;
 }

 static const struct lp_build_tgsi_action tex_action;

 enum desc_type {
        DESC_IMAGE,
+       DESC_BUFFER,
        DESC_FMASK,
-       DESC_SAMPLER
+       DESC_SAMPLER,
 };

-static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
-{
-       return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
-                              CONST_ADDR_SPACE);
-}
-
 /**
  * Load an image view, fmask view. or sampler state descriptor.
  */
 static LLVMValueRef load_sampler_desc_custom(struct si_shader_context *ctx,
                                             LLVMValueRef list, LLVMValueRef 
index,
                                             enum desc_type type)
 {
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;

        switch (type) {
        case DESC_IMAGE:
                /* The image is at [0:7]. */
                index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), 
"");
                break;
+       case DESC_BUFFER:
+               /* The buffer is in [4:7]. */
+               index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), 
"");
+               index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 1, 0), 
"");
+               list = LLVMBuildPointerCast(builder, list,
+                                           const_array(ctx->v4i32, 0), "");
+               break;
        case DESC_FMASK:
                /* The FMASK is at [8:15]. */
                index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), 
"");
                index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 1, 0), 
"");
                break;
        case DESC_SAMPLER:
                /* The sampler state is at [12:15]. */
                index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), 
"");
                index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), 
"");
                list = LLVMBuildPointerCast(builder, list,
@@ -4228,56 +4226,57 @@ static void tex_fetch_ptrs(
                const struct tgsi_full_src_register *reg = 
&emit_data->inst->Src[sampler_src];

                index = get_bounded_indirect_index(ctx,
                                                   &reg->Indirect,
                                                   reg->Register.Index,
                                                   SI_NUM_SAMPLERS);
        } else {
                index = LLVMConstInt(ctx->i32, sampler_index, 0);
        }

-       *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
+       if (target == TGSI_TEXTURE_BUFFER)
+               *res_ptr = load_sampler_desc(ctx, index, DESC_BUFFER);
+       else
+               *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
+
+       if (samp_ptr)
+               *samp_ptr = NULL;
+       if (fmask_ptr)
+               *fmask_ptr = NULL;

        if (target == TGSI_TEXTURE_2D_MSAA ||
            target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
-               if (samp_ptr)
-                       *samp_ptr = NULL;
                if (fmask_ptr)
                        *fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK);
-       } else {
+       } else if (target != TGSI_TEXTURE_BUFFER) {
                if (samp_ptr) {
                        *samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER);
                        *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, 
*samp_ptr);
                }
-               if (fmask_ptr)
-                       *fmask_ptr = NULL;
        }
 }

 static void txq_fetch_args(
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
-       LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction *inst = emit_data->inst;
        unsigned target = inst->Texture.Texture;
        LLVMValueRef res_ptr;
        LLVMValueRef address;

        tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL);

        if (target == TGSI_TEXTURE_BUFFER) {
                /* Read the size from the buffer descriptor directly. */
-               LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, 
"");
-               emit_data->args[0] = get_buffer_size(bld_base, res);
+               emit_data->args[0] = get_buffer_size(bld_base, res_ptr);
                return;
        }

        /* Textures - set the mip level. */
        address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);

        set_tex_fetch_args(ctx, emit_data, TGSI_OPCODE_TXQ, target, res_ptr,
                           NULL, &address, 1, 0xf);
 }

@@ -4331,30 +4330,23 @@ static void tex_fetch_args(
        unsigned count = 0;
        unsigned chan;
        unsigned num_deriv_channels = 0;
        bool has_offset = inst->Texture.NumOffsets > 0;
        LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
        unsigned dmask = 0xf;

        tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);

        if (target == TGSI_TEXTURE_BUFFER) {
-               LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
-
-               /* Bitcast and truncate v8i32 to v16i8. */
-               LLVMValueRef res = res_ptr;
-               res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
-               res = LLVMBuildExtractElement(gallivm->builder, res, 
bld_base->uint_bld.one, "");
-               res = LLVMBuildBitCast(gallivm->builder, res, ctx->v16i8, "");
-
                emit_data->dst_type = ctx->v4f32;
-               emit_data->args[0] = res;
+               emit_data->args[0] = LLVMBuildBitCast(gallivm->builder, res_ptr,
+                                                     ctx->v16i8, "");
                emit_data->args[1] = bld_base->uint_bld.zero;
                emit_data->args[2] = lp_build_emit_fetch(bld_base, 
emit_data->inst, 0, TGSI_CHAN_X);
                emit_data->arg_count = 3;
                return;
        }

        /* Fetch and project texture coordinates */
        coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 
TGSI_CHAN_W);
        for (chan = 0; chan < 3; chan++ ) {
                coords[chan] = lp_build_emit_fetch(bld_base,

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to