On 01/10/2018 05:11 PM, Marek Olšák wrote:
On Mon, Jan 8, 2018 at 10:31 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:


On 01/06/2018 12:12 PM, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

SGPRS: 2170102 -> 2158430 (-0.54 %)
VGPRS: 1645656 -> 1641516 (-0.25 %)
Spilled SGPRs: 9078 -> 8810 (-2.95 %)
Spilled VGPRs: 130 -> 114 (-12.31 %)
Scratch size: 1508 -> 1492 (-1.06 %) dwords per thread
Code Size: 52094872 -> 52692540 (1.15 %) bytes


These numbers are quite nice, great work! I think it's something I would
like to implement for RADV.

Just one minor nitpick below.

Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>


---
   src/amd/common/ac_llvm_build.c                    |  13 +++
   src/amd/common/ac_llvm_build.h                    |   5 +
   src/gallium/drivers/radeonsi/si_descriptors.c     |  10 +-
   src/gallium/drivers/radeonsi/si_shader.c          | 115
+++++++++++++---------
   src/gallium/drivers/radeonsi/si_shader.h          |  23 ++++-
   src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |   6 +-
   6 files changed, 122 insertions(+), 50 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c
b/src/amd/common/ac_llvm_build.c
index ed00d20..02d1b39 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -57,20 +57,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
LLVMContextRef context,
         ctx->context = context;
         ctx->module = NULL;
         ctx->builder = NULL;
         ctx->voidt = LLVMVoidTypeInContext(ctx->context);
         ctx->i1 = LLVMInt1TypeInContext(ctx->context);
         ctx->i8 = LLVMInt8TypeInContext(ctx->context);
         ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
         ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
         ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
+       ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
         ctx->f16 = LLVMHalfTypeInContext(ctx->context);
         ctx->f32 = LLVMFloatTypeInContext(ctx->context);
         ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
         ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
         ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
         ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
@@ -128,21 +129,24 @@ unsigned
   ac_get_type_size(LLVMTypeRef type)
   {
         LLVMTypeKind kind = LLVMGetTypeKind(type);
         switch (kind) {
         case LLVMIntegerTypeKind:
                 return LLVMGetIntTypeWidth(type) / 8;
         case LLVMFloatTypeKind:
                 return 4;
         case LLVMDoubleTypeKind:
+               return 8;
         case LLVMPointerTypeKind:
+               if (LLVMGetPointerAddressSpace(type) ==
AC_CONST_32BIT_ADDR_SPACE)
+                       return 4;
                 return 8;
         case LLVMVectorTypeKind:
                 return LLVMGetVectorSize(type) *
                        ac_get_type_size(LLVMGetElementType(type));
         case LLVMArrayTypeKind:
                 return LLVMGetArrayLength(type) *
                        ac_get_type_size(LLVMGetElementType(type));
         default:
                 assert(0);
                 return 0;
@@ -2035,10 +2039,19 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context
*ctx,
                                                            LLVMIntEQ,
src0,
                                                            ctx->i32_0,
""),
                                LLVMConstInt(ctx->i32, -1, 0), lsb, "");
   }
     LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
   {
         return LLVMPointerType(LLVMArrayType(elem_type, 0),
                                AC_CONST_ADDR_SPACE);
   }
+
+LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
+{
+       if (!HAVE_32BIT_POINTERS)
+               return ac_array_in_const_addr_space(elem_type);
+
+       return LLVMPointerType(LLVMArrayType(elem_type, 0),
+                              AC_CONST_32BIT_ADDR_SPACE);
+}
diff --git a/src/amd/common/ac_llvm_build.h
b/src/amd/common/ac_llvm_build.h
index b1c4737..5235664 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -27,36 +27,40 @@
     #include <stdbool.h>
   #include <llvm-c/TargetMachine.h>
     #include "amd_family.h"
     #ifdef __cplusplus
   extern "C" {
   #endif
   +#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0600)
+
   enum {
         AC_CONST_ADDR_SPACE = 2, /* CONST is the only address space that
selects SMEM loads */
         AC_LOCAL_ADDR_SPACE = 3,
+       AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer
type has 32 bits */
   };
     struct ac_llvm_context {
         LLVMContextRef context;
         LLVMModuleRef module;
         LLVMBuilderRef builder;
         LLVMTypeRef voidt;
         LLVMTypeRef i1;
         LLVMTypeRef i8;
         LLVMTypeRef i16;
         LLVMTypeRef i32;
         LLVMTypeRef i64;
+       LLVMTypeRef intptr;
         LLVMTypeRef f16;
         LLVMTypeRef f32;
         LLVMTypeRef f64;
         LLVMTypeRef v2i16;
         LLVMTypeRef v2i32;
         LLVMTypeRef v3i32;
         LLVMTypeRef v4i32;
         LLVMTypeRef v2f32;
         LLVMTypeRef v4f32;
         LLVMTypeRef v8i32;
@@ -331,16 +335,17 @@ void ac_declare_lds_as_pointer(struct
ac_llvm_context *ac);
   LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
                          LLVMValueRef dw_addr);
   void ac_lds_store(struct ac_llvm_context *ctx,
                   LLVMValueRef dw_addr, LLVMValueRef value);
     LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
                          LLVMTypeRef dst_type,
                          LLVMValueRef src0);
     LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
+LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
     #ifdef __cplusplus
   }
   #endif
     #endif
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
b/src/gallium/drivers/radeonsi/si_descriptors.c
index b372090..810169d 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1996,31 +1996,35 @@ void si_shader_change_notify(struct si_context
*sctx)
         } else {
                 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
         }
   }
     static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs,
                                         struct si_descriptors *desc,
                                         unsigned sh_base,
                                         unsigned pointer_count)
   {
-       radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0));
+       radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count *
(HAVE_32BIT_POINTERS ? 1 : 2), 0));
         radeon_emit(cs, (sh_base + desc->shader_userdata_offset -
SI_SH_REG_OFFSET) >> 2);
   }
     static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
                                         struct si_descriptors *desc)
   {
         uint64_t va = desc->gpu_address;
         radeon_emit(cs, va);
-       radeon_emit(cs, va >> 32);
+
+       if (HAVE_32BIT_POINTERS)
+               assert(va <= 0xffffffff);
+       else
+               radeon_emit(cs, va >> 32);
   }
     static void si_emit_shader_pointer(struct si_context *sctx,
                                    struct si_descriptors *desc,
                                    unsigned sh_base)
   {
         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
         si_emit_shader_pointer_head(cs, desc, sh_base, 1);
         si_emit_shader_pointer_body(cs, desc);
@@ -2553,22 +2557,24 @@ void si_all_resident_buffers_begin_new_cs(struct
si_context *sctx)
         sctx->b.num_resident_handles += num_resident_tex_handles +
                                         num_resident_img_handles;
   }
     /* INIT/DEINIT/UPLOAD */
     void si_init_all_descriptors(struct si_context *sctx)
   {
         int i;
   +#if !HAVE_32BIT_POINTERS
         STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
         STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
+#endif
         for (i = 0; i < SI_NUM_SHADERS; i++) {
                 bool gfx9_tcs = false;
                 bool gfx9_gs = false;
                 unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
SI_NUM_SAMPLERS;
                 unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
SI_NUM_CONST_BUFFERS;
                 struct si_descriptors *desc;
                 if (sctx->b.chip_class >= GFX9) {
                         gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index 760e742..57336ff 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3144,26 +3144,32 @@ si_insert_input_ret_float(struct si_shader_context
*ctx, LLVMValueRef ret,
   {
         LLVMBuilderRef builder = ctx->ac.builder;
         LLVMValueRef p = LLVMGetParam(ctx->main_fn, param);
         return LLVMBuildInsertValue(builder, ret,
                                     ac_to_float(&ctx->ac, p),
                                     return_index, "");
   }
     static LLVMValueRef
-si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef
ret,
-                            unsigned param, unsigned return_index)
+si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
+                   unsigned param, unsigned return_index)
   {
         LLVMBuilderRef builder = ctx->ac.builder;
         LLVMValueRef ptr, lo, hi;
   +     if (HAVE_32BIT_POINTERS) {
+               ptr = LLVMGetParam(ctx->main_fn, param);
+               ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
+               return LLVMBuildInsertValue(builder, ret, ptr,
return_index, "");
+       }
+
         ptr = LLVMGetParam(ctx->main_fn, param);
         ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
         ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
         lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
         hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
         ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
         return LLVMBuildInsertValue(builder, ret, hi, return_index + 1,
"");
   }
     /* This only writes the tessellation factor levels. */
@@ -3265,75 +3271,76 @@ static void si_llvm_emit_tcs_epilogue(struct
ac_shader_abi *abi,
   /* Pass TCS inputs from LS to TCS on GFX9. */
   static void si_set_ls_return_value_for_tcs(struct si_shader_context
*ctx)
   {
         LLVMValueRef ret = ctx->return_value;
         ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset,
2);
         ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info,
3);
         ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset,
4);
         ret = si_insert_input_ret(ctx, ret,
ctx->param_merged_scratch_offset, 5);
   -     ret = si_insert_input_ptr_as_2xi32(ctx, ret,
ctx->param_rw_buffers,
-                                          8 + SI_SGPR_RW_BUFFERS);
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret,
-               ctx->param_bindless_samplers_and_images,
-               8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
+       ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers,
+                                 8 + SI_SGPR_RW_BUFFERS);
+       ret = si_insert_input_ptr(ctx, ret,
+                                 ctx->param_bindless_samplers_and_images,
+                                 8 +
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
         ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
                                   8 + SI_SGPR_VS_STATE_BITS);
         ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
                                   8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
         ret = si_insert_input_ret(ctx, ret,
ctx->param_tcs_out_lds_offsets,
                                   8 + GFX9_SGPR_TCS_OUT_OFFSETS);
         ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
                                   8 + GFX9_SGPR_TCS_OUT_LAYOUT);
         ret = si_insert_input_ret(ctx, ret,
ctx->param_tcs_offchip_addr_base64k,
                                   8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
         ret = si_insert_input_ret(ctx, ret,
ctx->param_tcs_factor_addr_base64k,
                                   8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
   -     unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2;
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
-                                          8 +
GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
-                                          8 +
GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
+       unsigned desc_param = ctx->param_tcs_factor_addr_base64k +
+                             (HAVE_32BIT_POINTERS ? 1 : 2);
+       ret = si_insert_input_ptr(ctx, ret, desc_param,
+                                 8 +
GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
+       ret = si_insert_input_ptr(ctx, ret, desc_param + 1,
+                                 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
         unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
         ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
                                    ac_to_float(&ctx->ac,
ctx->abi.tcs_patch_id),
                                    vgpr++, "");
         ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
                                    ac_to_float(&ctx->ac,
ctx->abi.tcs_rel_ids),
                                    vgpr++, "");
         ctx->return_value = ret;
   }
     /* Pass GS inputs from ES to GS on GFX9. */
   static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
   {
         LLVMValueRef ret = ctx->return_value;
         ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
         ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info,
3);
         ret = si_insert_input_ret(ctx, ret,
ctx->param_merged_scratch_offset, 5);
   -     ret = si_insert_input_ptr_as_2xi32(ctx, ret,
ctx->param_rw_buffers,
-                                          8 + SI_SGPR_RW_BUFFERS);
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret,
-               ctx->param_bindless_samplers_and_images,
-               8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
+       ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers,
+                                 8 + SI_SGPR_RW_BUFFERS);
+       ret = si_insert_input_ptr(ctx, ret,
+                                 ctx->param_bindless_samplers_and_images,
+                                 8 +
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
         unsigned desc_param = ctx->param_vs_state_bits + 1;
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
-                                          8 +
GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
-                                          8 +
GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
+       ret = si_insert_input_ptr(ctx, ret, desc_param,
+                                 8 +
GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
+       ret = si_insert_input_ptr(ctx, ret, desc_param + 1,
+                                 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
         unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
         for (unsigned i = 0; i < 5; i++) {
                 unsigned param = ctx->param_gs_vtx01_offset + i;
                 ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
         }
         ctx->return_value = ret;
   }
     static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi,
@@ -4424,46 +4431,46 @@ static void declare_per_stage_desc_pointers(struct
si_shader_context *ctx,
         LLVMTypeRef const_shader_buf_type;
         if (ctx->shader->selector->info.const_buffers_declared == 1 &&
             ctx->shader->selector->info.shader_buffers_declared == 0)
                 const_shader_buf_type = ctx->f32;
         else
                 const_shader_buf_type = ctx->v4i32;
         unsigned const_and_shader_buffers =
                 add_arg(fninfo, ARG_SGPR,
-
ac_array_in_const_addr_space(const_shader_buf_type));
+
ac_array_in_const32_addr_space(const_shader_buf_type));
         unsigned samplers_and_images =
                 add_arg(fninfo, ARG_SGPR,
-                       ac_array_in_const_addr_space(ctx->v8i32));
+                       ac_array_in_const32_addr_space(ctx->v8i32));
         if (assign_params) {
                 ctx->param_const_and_shader_buffers =
const_and_shader_buffers;
                 ctx->param_samplers_and_images = samplers_and_images;
         }
   }
     static void declare_global_desc_pointers(struct si_shader_context
*ctx,
                                          struct si_function_info *fninfo)
   {
         ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
-               ac_array_in_const_addr_space(ctx->v4i32));
+               ac_array_in_const32_addr_space(ctx->v4i32));
         ctx->param_bindless_samplers_and_images = add_arg(fninfo,
ARG_SGPR,
-               ac_array_in_const_addr_space(ctx->v8i32));
+               ac_array_in_const32_addr_space(ctx->v8i32));
   }
     static void declare_vs_specific_input_sgprs(struct si_shader_context
*ctx,
                                             struct si_function_info
*fninfo)
   {
         ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
-               ac_array_in_const_addr_space(ctx->v4i32));
+               ac_array_in_const32_addr_space(ctx->v4i32));
         add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
         add_arg_assign(fninfo, ARG_SGPR, ctx->i32,
&ctx->abi.start_instance);
         add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
         ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
   }
     static void declare_vs_input_vgprs(struct si_shader_context *ctx,
                                    struct si_function_info *fninfo,
                                    unsigned *num_prolog_vgprs)
   {
@@ -4617,21 +4624,22 @@ static void create_function(struct
si_shader_context *ctx)
                 declare_global_desc_pointers(ctx, &fninfo);
                 declare_per_stage_desc_pointers(ctx, &fninfo,
                                                 ctx->type ==
PIPE_SHADER_VERTEX);
                 declare_vs_specific_input_sgprs(ctx, &fninfo);
                 ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR,
ctx->i32);
                 ctx->param_tcs_out_lds_offsets = add_arg(&fninfo,
ARG_SGPR, ctx->i32);
                 ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR,
ctx->i32);
                 ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo,
ARG_SGPR, ctx->i32);
                 ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo,
ARG_SGPR, ctx->i32);
-               add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+               if (!HAVE_32BIT_POINTERS)
+                       add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
                 declare_per_stage_desc_pointers(ctx, &fninfo,
                                                 ctx->type ==
PIPE_SHADER_TESS_CTRL);
                 /* VGPRs (first TCS, then VS) */
                 add_arg_assign(&fninfo, ARG_VGPR, ctx->i32,
&ctx->abi.tcs_patch_id);
                 add_arg_assign(&fninfo, ARG_VGPR, ctx->i32,
&ctx->abi.tcs_rel_ids);
                 if (ctx->type == PIPE_SHADER_VERTEX) {
                         declare_vs_input_vgprs(ctx, &fninfo,
@@ -4673,21 +4681,22 @@ static void create_function(struct
si_shader_context *ctx)
                                                  ctx->type ==
PIPE_SHADER_TESS_EVAL));
                 if (ctx->type == PIPE_SHADER_VERTEX) {
                         declare_vs_specific_input_sgprs(ctx, &fninfo);
                 } else {
                         /* TESS_EVAL (and also GEOMETRY):
                          * Declare as many input SGPRs as the VS has. */
                         ctx->param_tcs_offchip_layout = add_arg(&fninfo,
ARG_SGPR, ctx->i32);
                         ctx->param_tcs_offchip_addr_base64k =
add_arg(&fninfo, ARG_SGPR, ctx->i32);
                         add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
                         add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-                       add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+                       if (!HAVE_32BIT_POINTERS)
+                               add_arg(&fninfo, ARG_SGPR, ctx->i32); /*
unused */
                         ctx->param_vs_state_bits = add_arg(&fninfo,
ARG_SGPR, ctx->i32); /* unused */
                 }
                 declare_per_stage_desc_pointers(ctx, &fninfo,
                                                 ctx->type ==
PIPE_SHADER_GEOMETRY);
                 /* VGPRs (first GS, then VS/TES) */
                 ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR,
ctx->i32);
                 ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR,
ctx->i32);
                 add_arg_assign(&fninfo, ARG_VGPR, ctx->i32,
&ctx->abi.gs_prim_id);
@@ -6385,20 +6394,25 @@ static void si_build_wrapper_function(struct
si_shader_context *ctx,
         num_out = 0;
         num_out_sgpr = 0;
         for (unsigned i = 0; i < fninfo.num_params; ++i) {
                 LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
                 LLVMTypeRef param_type = LLVMTypeOf(param);
                 LLVMTypeRef out_type = i < fninfo.num_sgpr_params ?
ctx->i32 : ctx->f32;
                 unsigned size = ac_get_type_size(param_type) / 4;
                 if (size == 1) {
+                       if (LLVMGetTypeKind(param_type) ==
LLVMPointerTypeKind) {
+                               param = LLVMBuildPtrToInt(builder, param,
ctx->i32, "");
+                               param_type = ctx->i32;
+                       }
+
                         if (param_type != out_type)
                                 param = LLVMBuildBitCast(builder, param,
out_type, "");
                         out[num_out++] = param;
                 } else {
                         LLVMTypeRef vector_type = LLVMVectorType(out_type,
size);
                         if (LLVMGetTypeKind(param_type) ==
LLVMPointerTypeKind) {
                                 param = LLVMBuildPtrToInt(builder, param,
ctx->i64, "");
                                 param_type = ctx->i64;
                         }
@@ -6460,22 +6474,28 @@ static void si_build_wrapper_function(struct
si_shader_context *ctx,
                         assert(out_idx + param_size <= (is_sgpr ?
num_out_sgpr : num_out));
                         assert(is_sgpr || out_idx >= num_out_sgpr);
                         if (param_size == 1)
                                 arg = out[out_idx];
                         else
                                 arg =
lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size);
                         if (LLVMTypeOf(arg) != param_type) {
                                 if (LLVMGetTypeKind(param_type) ==
LLVMPointerTypeKind) {
-                                       arg = LLVMBuildBitCast(builder,
arg, ctx->i64, "");
-                                       arg = LLVMBuildIntToPtr(builder,
arg, param_type, "");
+                                       if
(LLVMGetPointerAddressSpace(param_type) ==
+                                           AC_CONST_32BIT_ADDR_SPACE) {
+                                               arg =
LLVMBuildBitCast(builder, arg, ctx->i32, "");
+                                               arg =
LLVMBuildIntToPtr(builder, arg, param_type, "");
+                                       } else {
+                                               arg =
LLVMBuildBitCast(builder, arg, ctx->i64, "");
+                                               arg =
LLVMBuildIntToPtr(builder, arg, param_type, "");
+                                       }
                                 } else {
                                         arg = LLVMBuildBitCast(builder,
arg, param_type, "");
                                 }
                         }
                         in[param_idx] = arg;
                         out_idx += param_size;
                 }
                 ret = LLVMBuildCall(builder, parts[part], in, num_params,
"");
@@ -6934,23 +6954,30 @@ out:
     static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context
*ctx)
   {
         LLVMValueRef ptr[2], list;
         bool is_merged_shader =
                 ctx->screen->info.chip_class >= GFX9 &&
                 (ctx->type == PIPE_SHADER_TESS_CTRL ||
                  ctx->type == PIPE_SHADER_GEOMETRY ||
                  ctx->shader->key.as_ls || ctx->shader->key.as_es);
   +     if (HAVE_32BIT_POINTERS) {
+               ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8
: 0) + SI_SGPR_RW_BUFFERS);
+               list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
+
ac_array_in_const32_addr_space(ctx->v4i32), "");
+               return list;
+       }
+
         /* Get the pointer to rw buffers. */
         ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) +
SI_SGPR_RW_BUFFERS);
-       ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) +
SI_SGPR_RW_BUFFERS_HI);
+       ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) +
SI_SGPR_RW_BUFFERS + 1);


I think SI_SGPR_RW_BUFFERS_HI makes more sense here.

Yeah but I can't use the _HI definition here because it's not defined
if HAVE_32BIT_POINTERS.

Right.


Marek

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to