Module: Mesa
Branch: main
Commit: 1728111c29123e03bc31e79a6f564806bbce8821
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1728111c29123e03bc31e79a6f564806bbce8821

Author: Qiang Yu <[email protected]>
Date:   Mon Aug  7 14:28:50 2023 +0800

radeonsi: extract si_get_ps_prolog_args to be shared with aco

Reviewed-by: Marek Olšák <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24989>

---

 src/gallium/drivers/radeonsi/si_shader.c          |  34 ++++
 src/gallium/drivers/radeonsi/si_shader_internal.h |   3 +-
 src/gallium/drivers/radeonsi/si_shader_llvm.c     |  11 --
 src/gallium/drivers/radeonsi/si_shader_llvm_ps.c  | 212 ++++++++--------------
 4 files changed, 115 insertions(+), 145 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5d53a6b3cd3..bbafd08d26d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3662,3 +3662,37 @@ void si_get_vs_prolog_args(enum amd_gfx_level gfx_level,
    args->ac.start_instance = input_sgprs[user_sgpr_base + 
SI_SGPR_START_INSTANCE];
    args->ac.base_vertex = input_sgprs[user_sgpr_base + SI_SGPR_BASE_VERTEX];
 }
+
+void si_get_ps_prolog_args(struct si_shader_args *args,
+                           const union si_shader_part_key *key)
+{
+   memset(args, 0, sizeof(*args));
+
+   const unsigned num_input_sgprs = key->ps_prolog.num_input_sgprs;
+
+   struct ac_arg input_sgprs[num_input_sgprs];
+   for (unsigned i = 0; i < num_input_sgprs; i++)
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, input_sgprs + i);
+
+   args->internal_bindings = input_sgprs[SI_SGPR_INTERNAL_BINDINGS];
+   /* Use the absolute location of the input. */
+   args->ac.prim_mask = input_sgprs[SI_PS_NUM_USER_SGPR];
+
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, &args->ac.persp_sample);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, &args->ac.persp_center);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, 
&args->ac.persp_centroid);
+   /* skip PERSP_PULL_MODEL */
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, 
&args->ac.linear_sample);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, 
&args->ac.linear_center);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, 
&args->ac.linear_centroid);
+   /* skip LINE_STIPPLE_TEX */
+
+   /* POS_X|Y|Z|W_FLOAT */
+   for (unsigned i = args->ac.num_vgprs_used; i < 
key->ps_prolog.face_vgpr_index; i++)
+      ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
+
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.front_face);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.ancillary);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, 
&args->ac.sample_coverage);
+   ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.pos_fixed_pt);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index d5bf2915fbd..854ac377b15 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -156,6 +156,8 @@ void si_get_tcs_epilog_args(enum amd_gfx_level gfx_level,
 void si_get_vs_prolog_args(enum amd_gfx_level gfx_level,
                            struct si_shader_args *args,
                            const union si_shader_part_key *key);
+void si_get_ps_prolog_args(struct si_shader_args *args,
+                           const union si_shader_part_key *key);
 
 /* gfx10_shader_ngg.c */
 unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);
@@ -199,7 +201,6 @@ LLVMValueRef si_insert_input_ret_float(struct 
si_shader_context *ctx, LLVMValueR
                                        struct ac_arg param, unsigned 
return_index);
 LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef 
ret,
                                  struct ac_arg param, unsigned return_index);
-LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx);
 LLVMValueRef si_prolog_get_internal_binding_slot(struct si_shader_context 
*ctx, unsigned slot);
 LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg 
param, unsigned rshift,
                              unsigned bitwidth);
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 2e8f389a66e..ab198267394 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -290,17 +290,6 @@ LLVMValueRef si_insert_input_ptr(struct si_shader_context 
*ctx, LLVMValueRef ret
    return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
 }
 
-LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx)
-{
-   LLVMValueRef ptr[2], list;
-   bool merged_shader = si_is_merged_shader(ctx->shader);
-
-   ptr[0] = LLVMGetParam(ctx->main_fn.value, (merged_shader ? 8 : 0) + 
SI_SGPR_INTERNAL_BINDINGS);
-   list =
-      LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], 
ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
-   return list;
-}
-
 LLVMValueRef si_prolog_get_internal_binding_slot(struct si_shader_context 
*ctx, unsigned slot)
 {
    LLVMValueRef list = LLVMBuildIntToPtr(
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
index 2874d171593..ae89ff039e9 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
@@ -58,9 +58,6 @@ static void interp_fs_color(struct si_shader_context *ctx, 
unsigned input_index,
    bool interp = interp_param != NULL;
 
    if (interp) {
-      interp_param =
-         LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, "");
-
       i = LLVMBuildExtractElement(ctx->ac.builder, interp_param, 
ctx->ac.i32_0, "");
       j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, 
ctx->ac.i32_1, "");
    }
@@ -428,24 +425,20 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx)
    ctx->return_value = ret;
 }
 
-static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
-                                         LLVMValueRef param_internal_bindings,
-                                         struct ac_arg param_pos_fixed_pt)
+static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx)
 {
    LLVMBuilderRef builder = ctx->ac.builder;
-   LLVMValueRef slot, desc, offset, row, bit, address[2];
+   LLVMValueRef desc, offset, row, bit, address[2];
 
    /* Use the fixed-point gl_FragCoord input.
     * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
     * per coordinate to get the repeating effect.
     */
-   address[0] = si_unpack_param(ctx, param_pos_fixed_pt, 0, 5);
-   address[1] = si_unpack_param(ctx, param_pos_fixed_pt, 16, 5);
+   address[0] = si_unpack_param(ctx, ctx->args->ac.pos_fixed_pt, 0, 5);
+   address[1] = si_unpack_param(ctx, ctx->args->ac.pos_fixed_pt, 16, 5);
 
    /* Load the buffer descriptor. */
-   slot = LLVMConstInt(ctx->ac.i32, SI_PS_CONST_POLY_STIPPLE, 0);
-
-   desc = ac_build_load_to_sgpr(&ctx->ac, (struct ac_llvm_pointer) { .t = 
ctx->ac.v4i32, .v = param_internal_bindings }, slot);
+   desc = si_prolog_get_internal_binding_slot(ctx, SI_PS_CONST_POLY_STIPPLE);
 
    /* The stipple pattern is 32x32, each row has 32 bits. */
    offset = LLVMBuildMul(builder, address[1], LLVMConstInt(ctx->ac.i32, 4, 0), 
"");
@@ -456,6 +449,25 @@ static void si_llvm_emit_polygon_stipple(struct 
si_shader_context *ctx,
    ac_build_kill_if_false(&ctx->ac, bit);
 }
 
+static LLVMValueRef insert_ret_of_arg(struct si_shader_context *ctx, 
LLVMValueRef ret,
+                                      LLVMValueRef data, unsigned arg_index)
+{
+   unsigned base = ctx->args->ac.args[arg_index].file == AC_ARG_VGPR ?
+      ctx->args->ac.num_sgprs_used : 0;
+   unsigned index = base + ctx->args->ac.args[arg_index].offset;
+
+   if (ctx->args->ac.args[arg_index].size == 1) {
+      return LLVMBuildInsertValue(ctx->ac.builder, ret, data, index, "");
+   } else {
+      assert(ctx->args->ac.args[arg_index].size == 2);
+      LLVMValueRef tmp = LLVMBuildExtractElement(ctx->ac.builder, data, 
ctx->ac.i32_0, "");
+      ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, index, "");
+      tmp = LLVMBuildExtractElement(ctx->ac.builder, data, ctx->ac.i32_1, "");
+      ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, index + 1, "");
+      return ret;
+   }
+}
+
 /**
  * Build the pixel shader prolog function. This handles:
  * - two-side color selection and interpolation
@@ -468,204 +480,138 @@ static void si_llvm_emit_polygon_stipple(struct 
si_shader_context *ctx,
  */
 void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union 
si_shader_part_key *key)
 {
-   LLVMValueRef ret, func;
-   int num_returns, i, num_color_channels;
-
-   memset(ctx->args, 0, sizeof(*ctx->args));
+   struct si_shader_args *args = ctx->args;
+   si_get_ps_prolog_args(args, key);
 
-   /* Declare inputs. */
+   /* Declare outputs (same as inputs + add colors if needed) */
    LLVMTypeRef return_types[AC_MAX_ARGS];
-   num_returns = 0;
-   num_color_channels = util_bitcount(key->ps_prolog.colors_read);
-   assert(key->ps_prolog.num_input_sgprs + key->ps_prolog.num_input_vgprs + 
num_color_channels <=
-          AC_MAX_ARGS);
-   for (i = 0; i < key->ps_prolog.num_input_sgprs; i++) {
-      ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
-      return_types[num_returns++] = ctx->ac.i32;
-   }
+   int num_returns = 0;
 
-   struct ac_arg pos_fixed_pt;
-   struct ac_arg ancillary;
-   struct ac_arg param_sample_mask;
-   for (i = 0; i < key->ps_prolog.num_input_vgprs; i++) {
-      struct ac_arg *arg = NULL;
-      if (i == key->ps_prolog.ancillary_vgpr_index) {
-         arg = &ancillary;
-      } else if (i == key->ps_prolog.sample_coverage_vgpr_index) {
-         arg = &param_sample_mask;
-      } else if (i == key->ps_prolog.num_input_vgprs - 1) {
-         /* POS_FIXED_PT is always last. */
-         arg = &pos_fixed_pt;
-      }
-      ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, arg);
-      return_types[num_returns++] = ctx->ac.f32;
-   }
+   for (int i = 0; i < args->ac.num_sgprs_used; i++)
+      return_types[num_returns++] = ctx->ac.i32;
 
-   /* Declare outputs (same as inputs + add colors if needed) */
-   for (i = 0; i < num_color_channels; i++)
+   unsigned num_color_channels = util_bitcount(key->ps_prolog.colors_read);
+   unsigned num_output_vgprs = args->ac.num_vgprs_used + num_color_channels;
+   for (int i = 0; i < num_output_vgprs; i++)
       return_types[num_returns++] = ctx->ac.f32;
 
    /* Create the function. */
    si_llvm_create_func(ctx, "ps_prolog", return_types, num_returns, 0);
-   func = ctx->main_fn.value;
+   LLVMValueRef func = ctx->main_fn.value;
 
    /* Copy inputs to outputs. This should be no-op, as the registers match,
     * but it will prevent the compiler from overwriting them unintentionally.
     */
-   ret = ctx->return_value;
-   for (i = 0; i < ctx->args->ac.arg_count; i++) {
+   LLVMValueRef ret = ctx->return_value;
+   for (int i = 0; i < args->ac.arg_count; i++) {
       LLVMValueRef p = LLVMGetParam(func, i);
-      ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
+      ret = insert_ret_of_arg(ctx, ret, p, i);
    }
 
    /* Polygon stippling. */
-   if (key->ps_prolog.states.poly_stipple) {
-      LLVMValueRef list = si_prolog_get_internal_bindings(ctx);
-
-      si_llvm_emit_polygon_stipple(ctx, list, pos_fixed_pt);
-   }
+   if (key->ps_prolog.states.poly_stipple)
+      si_llvm_emit_polygon_stipple(ctx);
 
    if (key->ps_prolog.states.bc_optimize_for_persp ||
        key->ps_prolog.states.bc_optimize_for_linear) {
-      unsigned i, base = key->ps_prolog.num_input_sgprs;
-      LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
+      LLVMValueRef center, centroid, tmp;
 
       /* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
        * The hw doesn't compute CENTROID if the whole wave only
        * contains fully-covered quads.
-       *
-       * PRIM_MASK is after user SGPRs.
        */
-      bc_optimize = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
+      LLVMValueRef bc_optimize = ac_get_arg(&ctx->ac, args->ac.prim_mask);
       bc_optimize =
          LLVMBuildLShr(ctx->ac.builder, bc_optimize, LLVMConstInt(ctx->ac.i32, 
31, 0), "");
       bc_optimize = LLVMBuildTrunc(ctx->ac.builder, bc_optimize, ctx->ac.i1, 
"");
 
       if (key->ps_prolog.states.bc_optimize_for_persp) {
-         /* Read PERSP_CENTER. */
-         for (i = 0; i < 2; i++)
-            center[i] = LLVMGetParam(func, base + 2 + i);
-         /* Read PERSP_CENTROID. */
-         for (i = 0; i < 2; i++)
-            centroid[i] = LLVMGetParam(func, base + 4 + i);
+         center = ac_get_arg(&ctx->ac, args->ac.persp_center);
+         centroid = ac_get_arg(&ctx->ac, args->ac.persp_centroid);
          /* Select PERSP_CENTROID. */
-         for (i = 0; i < 2; i++) {
-            tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center[i], 
centroid[i], "");
-            ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, base + 4 + 
i, "");
-         }
+         tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center, centroid, 
"");
+         ret = insert_ret_of_arg(ctx, ret, tmp, 
args->ac.persp_centroid.arg_index);
       }
       if (key->ps_prolog.states.bc_optimize_for_linear) {
-         /* Read LINEAR_CENTER. */
-         for (i = 0; i < 2; i++)
-            center[i] = LLVMGetParam(func, base + 8 + i);
-         /* Read LINEAR_CENTROID. */
-         for (i = 0; i < 2; i++)
-            centroid[i] = LLVMGetParam(func, base + 10 + i);
-         /* Select LINEAR_CENTROID. */
-         for (i = 0; i < 2; i++) {
-            tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center[i], 
centroid[i], "");
-            ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, base + 10 + 
i, "");
-         }
+         center = ac_get_arg(&ctx->ac, args->ac.linear_center);
+         centroid = ac_get_arg(&ctx->ac, args->ac.linear_centroid);
+         /* Select PERSP_CENTROID. */
+         tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center, centroid, 
"");
+         ret = insert_ret_of_arg(ctx, ret, tmp, 
args->ac.linear_centroid.arg_index);
       }
    }
 
    /* Force per-sample interpolation. */
    if (key->ps_prolog.states.force_persp_sample_interp) {
-      unsigned i, base = key->ps_prolog.num_input_sgprs;
-      LLVMValueRef persp_sample[2];
-
-      /* Read PERSP_SAMPLE. */
-      for (i = 0; i < 2; i++)
-         persp_sample[i] = LLVMGetParam(func, base + i);
+      LLVMValueRef persp_sample = ac_get_arg(&ctx->ac, args->ac.persp_sample);
       /* Overwrite PERSP_CENTER. */
-      for (i = 0; i < 2; i++)
-         ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_sample[i], 
base + 2 + i, "");
+      ret = insert_ret_of_arg(ctx, ret, persp_sample, 
args->ac.persp_center.arg_index);
       /* Overwrite PERSP_CENTROID. */
-      for (i = 0; i < 2; i++)
-         ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_sample[i], 
base + 4 + i, "");
+      ret = insert_ret_of_arg(ctx, ret, persp_sample, 
args->ac.persp_centroid.arg_index);
    }
    if (key->ps_prolog.states.force_linear_sample_interp) {
-      unsigned i, base = key->ps_prolog.num_input_sgprs;
-      LLVMValueRef linear_sample[2];
-
-      /* Read LINEAR_SAMPLE. */
-      for (i = 0; i < 2; i++)
-         linear_sample[i] = LLVMGetParam(func, base + 6 + i);
+      LLVMValueRef linear_sample = ac_get_arg(&ctx->ac, 
args->ac.linear_sample);
       /* Overwrite LINEAR_CENTER. */
-      for (i = 0; i < 2; i++)
-         ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], 
base + 8 + i, "");
+      ret = insert_ret_of_arg(ctx, ret, linear_sample, 
args->ac.linear_center.arg_index);
       /* Overwrite LINEAR_CENTROID. */
-      for (i = 0; i < 2; i++)
-         ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], 
base + 10 + i, "");
+      ret = insert_ret_of_arg(ctx, ret, linear_sample, 
args->ac.linear_centroid.arg_index);
    }
 
    /* Force center interpolation. */
    if (key->ps_prolog.states.force_persp_center_interp) {
-      unsigned i, base = key->ps_prolog.num_input_sgprs;
-      LLVMValueRef persp_center[2];
-
-      /* Read PERSP_CENTER. */
-      for (i = 0; i < 2; i++)
-         persp_center[i] = LLVMGetParam(func, base + 2 + i);
+      LLVMValueRef persp_center = ac_get_arg(&ctx->ac, args->ac.persp_center);
       /* Overwrite PERSP_SAMPLE. */
-      for (i = 0; i < 2; i++)
-         ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_center[i], 
base + i, "");
+      ret = insert_ret_of_arg(ctx, ret, persp_center, 
args->ac.persp_sample.arg_index);
       /* Overwrite PERSP_CENTROID. */
-      for (i = 0; i < 2; i++)
-         ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_center[i], 
base + 4 + i, "");
+      ret = insert_ret_of_arg(ctx, ret, persp_center, 
args->ac.persp_centroid.arg_index);
    }
    if (key->ps_prolog.states.force_linear_center_interp) {
-      unsigned i, base = key->ps_prolog.num_input_sgprs;
-      LLVMValueRef linear_center[2];
-
-      /* Read LINEAR_CENTER. */
-      for (i = 0; i < 2; i++)
-         linear_center[i] = LLVMGetParam(func, base + 8 + i);
+      LLVMValueRef linear_center = ac_get_arg(&ctx->ac, 
args->ac.linear_center);
       /* Overwrite LINEAR_SAMPLE. */
-      for (i = 0; i < 2; i++)
-         ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_center[i], 
base + 6 + i, "");
+      ret = insert_ret_of_arg(ctx, ret, linear_center, 
args->ac.linear_sample.arg_index);
       /* Overwrite LINEAR_CENTROID. */
-      for (i = 0; i < 2; i++)
-         ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_center[i], 
base + 10 + i, "");
+      ret = insert_ret_of_arg(ctx, ret, linear_center, 
args->ac.linear_centroid.arg_index);
    }
 
    /* Interpolate colors. */
    unsigned color_out_idx = 0;
-   for (i = 0; i < 2; i++) {
+   unsigned num_input_gprs = args->ac.num_sgprs_used + args->ac.num_vgprs_used;
+   for (int i = 0; i < 2; i++) {
       unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
-      unsigned face_vgpr = key->ps_prolog.num_input_sgprs + 
key->ps_prolog.face_vgpr_index;
-      LLVMValueRef interp[2], color[4];
-      LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
 
       if (!writemask)
          continue;
 
       /* If the interpolation qualifier is not CONSTANT (-1). */
+      LLVMValueRef interp_ij = NULL;
       if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
-         unsigned interp_vgpr =
-            key->ps_prolog.num_input_sgprs + 
key->ps_prolog.color_interp_vgpr_index[i];
+         unsigned index =
+            args->ac.num_sgprs_used + 
key->ps_prolog.color_interp_vgpr_index[i];
 
          /* Get the (i,j) updated by bc_optimize handling. */
-         interp[0] = LLVMBuildExtractValue(ctx->ac.builder, ret, interp_vgpr, 
"");
-         interp[1] = LLVMBuildExtractValue(ctx->ac.builder, ret, interp_vgpr + 
1, "");
+         LLVMValueRef interp[2] = {
+            LLVMBuildExtractValue(ctx->ac.builder, ret, index, ""),
+            LLVMBuildExtractValue(ctx->ac.builder, ret, index + 1, ""),
+         };
          interp_ij = ac_build_gather_values(&ctx->ac, interp, 2);
       }
 
-      /* Use the absolute location of the input. */
-      prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
+      LLVMValueRef prim_mask = ac_get_arg(&ctx->ac, args->ac.prim_mask);
 
+      LLVMValueRef face = NULL;
       if (key->ps_prolog.states.color_two_side) {
-         face = LLVMGetParam(func, face_vgpr);
+         face = ac_get_arg(&ctx->ac, args->ac.front_face);
          face = ac_to_integer(&ctx->ac, face);
       }
 
+      LLVMValueRef color[4];
       interp_fs_color(ctx, key->ps_prolog.color_attr_index[i], i, 
key->ps_prolog.num_interp_inputs,
                       key->ps_prolog.colors_read, interp_ij, prim_mask, face, 
color);
 
       while (writemask) {
          unsigned chan = u_bit_scan(&writemask);
          ret = LLVMBuildInsertValue(ctx->ac.builder, ret, color[chan],
-                                    ctx->args->ac.arg_count + color_out_idx++, 
"");
+                                    num_input_gprs + color_out_idx++, "");
       }
    }
 
@@ -688,8 +634,8 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, 
union si_shader_part
    if (key->ps_prolog.states.samplemask_log_ps_iter) {
       uint32_t ps_iter_mask =
          ac_get_ps_iter_mask(1 << 
key->ps_prolog.states.samplemask_log_ps_iter);
-      LLVMValueRef sampleid = si_unpack_param(ctx, ancillary, 8, 4);
-      LLVMValueRef samplemask = ac_get_arg(&ctx->ac, param_sample_mask);
+      LLVMValueRef sampleid = si_unpack_param(ctx, args->ac.ancillary, 8, 4);
+      LLVMValueRef samplemask = ac_get_arg(&ctx->ac, args->ac.sample_coverage);
 
       samplemask = ac_to_integer(&ctx->ac, samplemask);
       samplemask =
@@ -699,7 +645,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, 
union si_shader_part
                       "");
       samplemask = ac_to_float(&ctx->ac, samplemask);
 
-      ret = LLVMBuildInsertValue(ctx->ac.builder, ret, samplemask, 
param_sample_mask.arg_index, "");
+      ret = insert_ret_of_arg(ctx, ret, samplemask, 
args->ac.sample_coverage.arg_index);
    }
 
    /* Tell LLVM to insert WQM instruction sequence when needed. */

Reply via email to