From: Marek Olšák <[email protected]>

---
 src/gallium/drivers/radeonsi/si_debug.c  |  4 ++++
 src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_shader.h |  1 +
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index 9d0c0c5..038c8b4 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -632,20 +632,24 @@ static void si_print_annotated_shader(struct si_shader 
*shader,
         * Buffer size / 4 is the upper bound of the instruction count.
         */
        unsigned num_inst = 0;
        struct si_shader_inst *instructions =
                calloc(shader->bo->b.b.width0 / 4, sizeof(struct 
si_shader_inst));
 
        if (shader->prolog) {
                si_add_split_disasm(shader->prolog->binary.disasm_string,
                                    start_addr, &num_inst, instructions);
        }
+       if (shader->previous_stage) {
+               
si_add_split_disasm(shader->previous_stage->binary.disasm_string,
+                                   start_addr, &num_inst, instructions);
+       }
        si_add_split_disasm(shader->binary.disasm_string,
                            start_addr, &num_inst, instructions);
        if (shader->epilog) {
                si_add_split_disasm(shader->epilog->binary.disasm_string,
                                    start_addr, &num_inst, instructions);
        }
 
        fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
                si_get_shader_name(shader, shader->selector->type));
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f6cd313..ffec302 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6141,38 +6141,43 @@ void si_shader_apply_scratch_relocs(struct si_context 
*sctx,
                }
        }
 }
 
 static unsigned si_get_shader_binary_size(struct si_shader *shader)
 {
        unsigned size = shader->binary.code_size;
 
        if (shader->prolog)
                size += shader->prolog->binary.code_size;
+       if (shader->previous_stage)
+               size += shader->previous_stage->binary.code_size;
        if (shader->epilog)
                size += shader->epilog->binary.code_size;
        return size;
 }
 
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader)
 {
        const struct ac_shader_binary *prolog =
                shader->prolog ? &shader->prolog->binary : NULL;
+       const struct ac_shader_binary *previous_stage =
+               shader->previous_stage ? &shader->previous_stage->binary : NULL;
        const struct ac_shader_binary *epilog =
                shader->epilog ? &shader->epilog->binary : NULL;
        const struct ac_shader_binary *mainb = &shader->binary;
        unsigned bo_size = si_get_shader_binary_size(shader) +
                           (!epilog ? mainb->rodata_size : 0);
        unsigned char *ptr;
 
        assert(!prolog || !prolog->rodata_size);
-       assert((!prolog && !epilog) || !mainb->rodata_size);
+       assert(!previous_stage || !previous_stage->rodata_size);
+       assert((!prolog && !previous_stage && !epilog) || !mainb->rodata_size);
        assert(!epilog || !epilog->rodata_size);
 
        /* GFX9 can fetch at most 128 bytes past the end of the shader.
         * Prevent VM faults.
         */
        if (sscreen->b.chip_class >= GFX9)
                bo_size += 128;
 
        r600_resource_reference(&shader->bo, NULL);
        shader->bo = (struct r600_resource*)
@@ -6183,20 +6188,25 @@ int si_shader_binary_upload(struct si_screen *sscreen, 
struct si_shader *shader)
                return -ENOMEM;
 
        /* Upload. */
        ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
                                        PIPE_TRANSFER_READ_WRITE);
 
        if (prolog) {
                util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
                ptr += prolog->code_size;
        }
+       if (previous_stage) {
+               util_memcpy_cpu_to_le32(ptr, previous_stage->code,
+                                       previous_stage->code_size);
+               ptr += previous_stage->code_size;
+       }
 
        util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
        ptr += mainb->code_size;
 
        if (epilog)
                util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size);
        else if (mainb->rodata_size > 0)
                util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size);
 
        sscreen->b.ws->buffer_unmap(shader->bo->buf);
@@ -6390,20 +6400,23 @@ void si_shader_dump(struct si_screen *sscreen, struct 
si_shader *shader,
        }
 
        if (!check_debug_option ||
            (r600_can_dump_shader(&sscreen->b, processor) &&
             !(sscreen->b.debug_flags & DBG_NO_ASM))) {
                fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
 
                if (shader->prolog)
                        si_shader_dump_disassembly(&shader->prolog->binary,
                                                   debug, "prolog", file);
+               if (shader->previous_stage)
+                       
si_shader_dump_disassembly(&shader->previous_stage->binary,
+                                                  debug, "previous stage", 
file);
 
                si_shader_dump_disassembly(&shader->binary, debug, "main", 
file);
 
                if (shader->epilog)
                        si_shader_dump_disassembly(&shader->epilog->binary,
                                                   debug, "epilog", file);
                fprintf(file, "\n");
        }
 
        si_shader_dump_stats(sscreen, shader, debug, processor, file,
@@ -8702,20 +8715,40 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
                        break;
                }
 
                /* Update SGPR and VGPR counts. */
                if (shader->prolog) {
                        shader->config.num_sgprs = 
MAX2(shader->config.num_sgprs,
                                                        
shader->prolog->config.num_sgprs);
                        shader->config.num_vgprs = 
MAX2(shader->config.num_vgprs,
                                                        
shader->prolog->config.num_vgprs);
                }
+               if (shader->previous_stage) {
+                       shader->config.num_sgprs = 
MAX2(shader->config.num_sgprs,
+                                                       
shader->previous_stage->config.num_sgprs);
+                       shader->config.num_vgprs = 
MAX2(shader->config.num_vgprs,
+                                                       
shader->previous_stage->config.num_vgprs);
+                       shader->config.spilled_sgprs =
+                               MAX2(shader->config.spilled_sgprs,
+                                    
shader->previous_stage->config.spilled_sgprs);
+                       shader->config.spilled_vgprs =
+                               MAX2(shader->config.spilled_vgprs,
+                                    
shader->previous_stage->config.spilled_vgprs);
+                       shader->config.private_mem_vgprs =
+                               MAX2(shader->config.private_mem_vgprs,
+                                    
shader->previous_stage->config.private_mem_vgprs);
+                       shader->config.scratch_bytes_per_wave =
+                               MAX2(shader->config.scratch_bytes_per_wave,
+                                    
shader->previous_stage->config.scratch_bytes_per_wave);
+                       shader->info.uses_instanceid |=
+                               shader->previous_stage->info.uses_instanceid;
+               }
                if (shader->epilog) {
                        shader->config.num_sgprs = 
MAX2(shader->config.num_sgprs,
                                                        
shader->epilog->config.num_sgprs);
                        shader->config.num_vgprs = 
MAX2(shader->config.num_vgprs,
                                                        
shader->epilog->config.num_vgprs);
                }
        }
 
        si_fix_resource_usage(sscreen, shader);
        si_shader_dump(sscreen, shader, debug, sel->info.processor,
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 582c427..92293c4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -510,20 +510,21 @@ struct si_shader_info {
        ubyte                   nr_param_exports;
 };
 
 struct si_shader {
        struct si_compiler_ctx_state    compiler_ctx_state;
 
        struct si_shader_selector       *selector;
        struct si_shader                *next_variant;
 
        struct si_shader_part           *prolog;
+       struct si_shader                *previous_stage; /* for GFX9 */
        struct si_shader_part           *epilog;
 
        struct si_pm4_state             *pm4;
        struct r600_resource            *bo;
        struct r600_resource            *scratch_bo;
        struct si_shader_key            key;
        struct util_queue_fence         optimized_ready;
        bool                            compilation_failed;
        bool                            is_monolithic;
        bool                            is_optimized;
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to