Related bugs:
https://bugs.freedesktop.org/show_bug.cgi?id=99349
https://bugs.freedesktop.org/show_bug.cgi?id=50338

1. Allocate ctx.temp_reg and a limited number of registers
(R600_TEMP_REG_RESERVED=10) that are given out via r600_get_temp() before
the temporaries of the TGSI are allocated. That makes it possible for
tgsi_split_constants() allocate registers inside the proper GPR range,
so that r600_asm.c:check_and_set_bank_swizzle doesn't fail.

2. Move the test for the register use limit (124) to after the optimization
in r600_pipe_shader_create(). Add a test for a hard limit of 191 in
tr600_shader_from_tgsi() though to avoid interference with reserved values.
---
 src/gallium/drivers/r600/r600_shader.c | 52 +++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index bdaf28ced2..d550f4cd7f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -83,6 +83,13 @@ The compiler must issue the source argument to slots z, y, 
and x
       face_gpr.w = SampleID
 */
 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
+
+/* Number of GPRs reserved before the temporaries in order to work around
+   problems with shaders that request too many temporaries that can be
+   optimized away in the sb pass.
+*/
+#define R600_TEMP_REG_RESERVED 10
+
 static int r600_shader_from_tgsi(struct r600_context *rctx,
                                 struct r600_pipe_shader *pipeshader,
                                 union r600_shader_key key);
@@ -216,6 +223,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
                }
        }
 
+       if (shader->shader.bc.ngpr > 124) {
+               r = -ENOMEM;
+               R600_ERR("Shader GPR limit exceeded - shader requires %d 
registers.\n",
+                        shader->shader.bc.ngpr);
+               goto error;
+       }
+
        if (shader->gs_copy_shader) {
                if (dump) {
                        // dump copy shader
@@ -322,6 +336,7 @@ struct r600_shader_ctx {
        unsigned                                type;
        unsigned                                file_offset[TGSI_FILE_COUNT];
        unsigned                                temp_reg;
+       unsigned                                temp_reg_highmem;
        const struct r600_shader_tgsi_instruction       *inst_info;
        struct r600_bytecode                    *bc;
        struct r600_shader                      *shader;
@@ -814,7 +829,11 @@ static inline int get_address_file_reg(struct 
r600_shader_ctx *ctx, int index)
 
 static int r600_get_temp(struct r600_shader_ctx *ctx)
 {
-       return ctx->temp_reg + ctx->max_driver_temp_used++;
+       if (ctx->max_driver_temp_used < R600_TEMP_REG_RESERVED)
+               return ctx->temp_reg + ctx->max_driver_temp_used++;
+       else
+               return ctx->temp_reg_highmem + ctx->max_driver_temp_used++ -
+                      R600_TEMP_REG_RESERVED;
 }
 
 static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
@@ -2213,6 +2232,8 @@ static int generate_gs_copy_shader(struct r600_context 
*rctx,
                r600_bytecode_add_vtx(ctx.bc, &vtx);
        }
        ctx.temp_reg = i + 1;
+       ctx.temp_reg_highmem = ctx.temp_reg + R600_TEMP_REG_RESERVED;
+
        for (ring = 3; ring >= 0; --ring) {
                bool enabled = false;
                for (i = 0; i < so->num_outputs; i++) {
@@ -3065,8 +3086,11 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
        ctx.file_offset[TGSI_FILE_OUTPUT] =
                        ctx.file_offset[TGSI_FILE_INPUT] +
                        ctx.info.file_max[TGSI_FILE_INPUT] + 1;
-       ctx.file_offset[TGSI_FILE_TEMPORARY] = 
ctx.file_offset[TGSI_FILE_OUTPUT] +
-                                               
ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
+
+        ctx.temp_reg = ctx.file_offset[TGSI_FILE_OUTPUT] +
+                       ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
+
+        ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.temp_reg + 
R600_TEMP_REG_RESERVED;
 
        /* Outside the GPR range. This will be translated to one of the
         * kcache banks later. */
@@ -3081,19 +3105,19 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
        if (ctx.type == PIPE_SHADER_TESS_CTRL) {
                ctx.tess_input_info = ctx.bc->ar_reg + 3;
                ctx.tess_output_info = ctx.bc->ar_reg + 4;
-               ctx.temp_reg = ctx.bc->ar_reg + 5;
+               ctx.temp_reg_highmem = ctx.bc->ar_reg + 5;
        } else if (ctx.type == PIPE_SHADER_TESS_EVAL) {
                ctx.tess_input_info = 0;
                ctx.tess_output_info = ctx.bc->ar_reg + 3;
-               ctx.temp_reg = ctx.bc->ar_reg + 4;
+               ctx.temp_reg_highmem = ctx.bc->ar_reg + 4;
        } else if (ctx.type == PIPE_SHADER_GEOMETRY) {
                ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3;
                ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4;
                ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5;
                ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6;
-               ctx.temp_reg = ctx.bc->ar_reg + 7;
+               ctx.temp_reg_highmem = ctx.bc->ar_reg + 7;
        } else {
-               ctx.temp_reg = ctx.bc->ar_reg + 3;
+               ctx.temp_reg_highmem = ctx.bc->ar_reg + 3;
        }
 
        shader->max_arrays = 0;
@@ -3656,9 +3680,17 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
        }
 
        /* check GPR limit - we have 124 = 128 - 4
-        * (4 are reserved as alu clause temporary registers) */
-       if (ctx.bc->ngpr > 124) {
-               R600_ERR("GPR limit exceeded - shader requires %d registers\n", 
ctx.bc->ngpr);
+        * (4 are reserved as alu clause temporary registers)
+        * Use this as a soft limit since the sb optimiation pass
+        * might reduce this number. */
+       if (ctx.bc->ngpr > 124)
+               fprintf(stderr, "Warning: GPR limit exceeded prior to 
optimization"
+                           " - shader requires %d registers\n", ctx.bc->ngpr);
+
+    /* Set a hard limit for register usage */
+       if (ctx.bc->ngpr > 191) {
+               R600_ERR("GPR limit exceeded - shader requires %d registers\n",
+                        ctx.bc->ngpr);
                r = -ENOMEM;
                goto out_err;
        }
-- 
2.13.0

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to