From: Nicolai Hähnle <[email protected]>

By keeping track of fewer generics, everything can fit into 64 bits.
---
 src/gallium/drivers/radeonsi/si_shader.c        | 32 +++++++------------------
 src/gallium/drivers/radeonsi/si_shader.h        |  6 +----
 src/gallium/drivers/radeonsi/si_state_shaders.c | 22 +++--------------
 3 files changed, 13 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c12c8ea..837cc1c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -141,43 +141,36 @@ unsigned si_shader_io_get_unique_index(unsigned 
semantic_name, unsigned index)
        case TGSI_SEMANTIC_CLIPDIST:
                assert(index <= 1);
                return 2 + index;
        case TGSI_SEMANTIC_GENERIC:
                if (index < SI_MAX_IO_GENERIC)
                        return 4 + index;
 
                assert(!"invalid generic index");
                return 0;
 
-       default:
-               assert(!"invalid semantic name");
-               return 0;
-       }
-}
-
-unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index)
-{
-       switch (name) {
        case TGSI_SEMANTIC_FOG:
-               return 0;
+               return SI_MAX_IO_GENERIC + 4;
        case TGSI_SEMANTIC_LAYER:
-               return 1;
+               return SI_MAX_IO_GENERIC + 5;
        case TGSI_SEMANTIC_VIEWPORT_INDEX:
-               return 2;
+               return SI_MAX_IO_GENERIC + 6;
        case TGSI_SEMANTIC_PRIMID:
-               return 3;
+               return SI_MAX_IO_GENERIC + 7;
        case TGSI_SEMANTIC_COLOR: /* these alias */
        case TGSI_SEMANTIC_BCOLOR:
-               return 4 + index;
+               assert(index < 2);
+               return SI_MAX_IO_GENERIC + 8 + index;
        case TGSI_SEMANTIC_TEXCOORD:
                assert(index < 8);
-               return 6 + index;
+               assert(SI_MAX_IO_GENERIC + 10 + index < 64);
+               return SI_MAX_IO_GENERIC + 10 + index;
        default:
                assert(!"invalid semantic name");
                return 0;
        }
 }
 
 /**
  * Get the value of a shader input parameter and extract a bitfield.
  */
 static LLVMValueRef unpack_param(struct si_shader_context *ctx,
@@ -2291,30 +2284,24 @@ static void si_llvm_export_vs(struct 
lp_build_tgsi_context *bld_base,
                case TGSI_SEMANTIC_POSITION: /* ignore these */
                case TGSI_SEMANTIC_PSIZE:
                case TGSI_SEMANTIC_CLIPVERTEX:
                case TGSI_SEMANTIC_EDGEFLAG:
                        break;
                case TGSI_SEMANTIC_GENERIC:
                        /* don't process indices the function can't handle */
                        if (semantic_index >= SI_MAX_IO_GENERIC)
                                break;
                        /* fall through */
-               case TGSI_SEMANTIC_CLIPDIST:
+               default:
                        if (shader->key.opt.hw_vs.kill_outputs &
                            (1ull << 
si_shader_io_get_unique_index(semantic_name, semantic_index)))
                                export_param = false;
-                       break;
-               default:
-                       if (shader->key.opt.hw_vs.kill_outputs2 &
-                           (1u << 
si_shader_io_get_unique_index2(semantic_name, semantic_index)))
-                               export_param = false;
-                       break;
                }
 
                if (outputs[i].vertex_stream[0] != 0 &&
                    outputs[i].vertex_stream[1] != 0 &&
                    outputs[i].vertex_stream[2] != 0 &&
                    outputs[i].vertex_stream[3] != 0)
                        export_param = false;
 
 handle_semantic:
                /* Select the correct target */
@@ -7152,21 +7139,20 @@ static void si_dump_shader_key(unsigned processor, 
const struct si_shader *shade
 
        default:
                assert(0);
        }
 
        if ((processor == PIPE_SHADER_GEOMETRY ||
             processor == PIPE_SHADER_TESS_EVAL ||
             processor == PIPE_SHADER_VERTEX) &&
            !key->as_es && !key->as_ls) {
                fprintf(f, "  opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", 
key->opt.hw_vs.kill_outputs);
-               fprintf(f, "  opt.hw_vs.kill_outputs2 = 0x%x\n", 
key->opt.hw_vs.kill_outputs2);
                fprintf(f, "  opt.hw_vs.clip_disable = %u\n", 
key->opt.hw_vs.clip_disable);
        }
 }
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
                               struct si_screen *sscreen,
                               LLVMTargetMachineRef tm)
 {
        struct lp_build_tgsi_context *bld_base;
        struct lp_build_tgsi_action tmpl = {};
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 3075900..1627de3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -354,25 +354,23 @@ struct si_shader_selector {
        unsigned        db_shader_control;
        /* Set 0xf or 0x0 (4 bits) per each written output.
         * ANDed with spi_shader_col_format.
         */
        unsigned        colors_written_4bit;
 
        /* CS parameters */
        unsigned local_size;
 
        uint64_t        outputs_written;        /* "get_unique_index" bits */
-       uint32_t        patch_outputs_written;  /* "get_unique_index" bits */
-       uint32_t        outputs_written2;       /* "get_unique_index2" bits */
+       uint32_t        patch_outputs_written;  /* "get_unique_index_patch" 
bits */
 
        uint64_t        inputs_read;            /* "get_unique_index" bits */
-       uint32_t        inputs_read2;           /* "get_unique_index2" bits */
 };
 
 /* Valid shader configurations:
  *
  * API shaders       VS | TCS | TES | GS |pass| PS
  * are compiled as:     |     |     |    |thru|
  *                      |     |     |    |    |
  * Only VS & PS:     VS |     |     |    |    | PS
  * GFX6 - with GS:   ES |     |     | GS | VS | PS
  *      - with tess: LS | HS  | VS  |    |    | PS
@@ -498,21 +496,20 @@ struct si_shader_key {
                uint8_t         vs_fix_fetch[SI_MAX_ATTRIBS];
                uint64_t        ff_tcs_inputs_to_copy; /* for fixed-func TCS */
                /* When PS needs PrimID and GS is disabled. */
                unsigned        vs_export_prim_id:1;
        } mono;
 
        /* Optimization flags for asynchronous compilation only. */
        struct {
                struct {
                        uint64_t        kill_outputs; /* "get_unique_index" 
bits */
-                       uint32_t        kill_outputs2; /* "get_unique_index2" 
bits */
                        unsigned        clip_disable:1;
                } hw_vs; /* HW VS (it can be VS, TES, GS) */
 
                /* For shaders where monolithic variants have better code.
                 *
                 * This is a flag that has no effect on code generation,
                 * but forces monolithic shaders to be used as soon as
                 * possible, because it's in the "opt" group.
                 */
                unsigned        prefer_mono:1;
@@ -597,21 +594,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                           LLVMTargetMachineRef tm,
                           struct si_shader *shader,
                           bool is_monolithic,
                           struct pipe_debug_callback *debug);
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned 
index);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
-unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
 void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
                    struct pipe_debug_callback *debug, unsigned processor,
                    FILE *f, bool check_debug_option);
 void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
                                      unsigned *lds_size);
 void si_shader_apply_scratch_relocs(struct si_shader *shader,
                                    uint64_t scratch_va);
 void si_shader_binary_read_config(struct ac_shader_binary *binary,
                                  struct si_shader_config *conf,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6020bec..5da6014 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1224,36 +1224,31 @@ static void si_shader_selector_key_hw_vs(struct 
si_context *sctx,
                        ps_colormask &= ps->colors_written_4bit;
 
                ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard 
||
                              (!ps_colormask &&
                               !ps_modifies_zs &&
                               !ps->info.writes_memory);
        }
 
        /* Find out which VS outputs aren't used by the PS. */
        uint64_t outputs_written = vs->outputs_written;
-       uint32_t outputs_written2 = vs->outputs_written2;
        uint64_t inputs_read = 0;
-       uint32_t inputs_read2 = 0;
 
        outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
 
        if (!ps_disabled) {
                inputs_read = ps->inputs_read;
-               inputs_read2 = ps->inputs_read2;
        }
 
        uint64_t linked = outputs_written & inputs_read;
-       uint32_t linked2 = outputs_written2 & inputs_read2;
 
        key->opt.hw_vs.kill_outputs = ~linked & outputs_written;
-       key->opt.hw_vs.kill_outputs2 = ~linked2 & outputs_written2;
 }
 
 /* Compute the key for the hw shader variant */
 static inline void si_shader_selector_key(struct pipe_context *ctx,
                                          struct si_shader_selector *sel,
                                          struct si_shader_key *key)
 {
        struct si_context *sctx = (struct si_context *)ctx;
 
        memset(key, 0, sizeof(*key));
@@ -1839,32 +1834,29 @@ void si_init_shader_selector_async(void *job, int 
thread_index)
                                unsigned name = 
sel->info.output_semantic_name[i];
                                unsigned index = 
sel->info.output_semantic_index[i];
                                unsigned id;
 
                                switch (name) {
                                case TGSI_SEMANTIC_GENERIC:
                                        /* don't process indices the function 
can't handle */
                                        if (index >= SI_MAX_IO_GENERIC)
                                                break;
                                        /* fall through */
-                               case TGSI_SEMANTIC_CLIPDIST:
+                               default:
                                        id = 
si_shader_io_get_unique_index(name, index);
                                        sel->outputs_written &= ~(1ull << id);
                                        break;
                                case TGSI_SEMANTIC_POSITION: /* ignore these */
                                case TGSI_SEMANTIC_PSIZE:
                                case TGSI_SEMANTIC_CLIPVERTEX:
                                case TGSI_SEMANTIC_EDGEFLAG:
                                        break;
-                               default:
-                                       id = 
si_shader_io_get_unique_index2(name, index);
-                                       sel->outputs_written2 &= ~(1u << id);
                                }
                        }
                }
        }
 
        /* Pre-compilation. */
        if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
                struct si_shader_ctx_state state = {sel};
                struct si_shader_key key;
 
@@ -1996,32 +1988,27 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
                        case TGSI_SEMANTIC_PATCH:
                                sel->patch_outputs_written |=
                                        1llu << 
si_shader_io_get_unique_index_patch(name, index);
                                break;
 
                        case TGSI_SEMANTIC_GENERIC:
                                /* don't process indices the function can't 
handle */
                                if (index >= SI_MAX_IO_GENERIC)
                                        break;
                                /* fall through */
-                       case TGSI_SEMANTIC_POSITION:
-                       case TGSI_SEMANTIC_PSIZE:
-                       case TGSI_SEMANTIC_CLIPDIST:
+                       default:
                                sel->outputs_written |=
                                        1llu << 
si_shader_io_get_unique_index(name, index);
                                break;
                        case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
                        case TGSI_SEMANTIC_EDGEFLAG:
                                break;
-                       default:
-                               sel->outputs_written2 |=
-                                       1u << 
si_shader_io_get_unique_index2(name, index);
                        }
                }
                sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
 
                /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
                 * conflicts, i.e. each vertex will start at a different bank.
                 */
                if (sctx->b.chip_class >= GFX9)
                        sel->esgs_itemsize += 4;
                break;
@@ -2030,29 +2017,26 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
                for (i = 0; i < sel->info.num_inputs; i++) {
                        unsigned name = sel->info.input_semantic_name[i];
                        unsigned index = sel->info.input_semantic_index[i];
 
                        switch (name) {
                        case TGSI_SEMANTIC_GENERIC:
                                /* don't process indices the function can't 
handle */
                                if (index >= SI_MAX_IO_GENERIC)
                                        break;
                                /* fall through */
-                       case TGSI_SEMANTIC_CLIPDIST:
+                       default:
                                sel->inputs_read |=
                                        1llu << 
si_shader_io_get_unique_index(name, index);
                                break;
                        case TGSI_SEMANTIC_PCOORD: /* ignore this */
                                break;
-                       default:
-                               sel->inputs_read2 |=
-                                       1u << 
si_shader_io_get_unique_index2(name, index);
                        }
                }
 
                for (i = 0; i < 8; i++)
                        if (sel->info.colors_written & (1 << i))
                                sel->colors_written_4bit |= 0xf << (4 * i);
 
                for (i = 0; i < sel->info.num_inputs; i++) {
                        if (sel->info.input_semantic_name[i] == 
TGSI_SEMANTIC_COLOR) {
                                int index = sel->info.input_semantic_index[i];
-- 
2.9.3

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to