---
 src/amd/common/ac_nir_to_llvm.c          | 11 +++++-----
 src/amd/common/ac_shader_abi.h           |  4 ++++
 src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++++++++++++++-------------
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2ae656693f..36f471dcc7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3902,46 +3902,45 @@ static LLVMValueRef visit_interp(struct 
nir_to_llvm_context *ctx,
                                                              
LLVMConstInt(ctx->ac.i32, 2, false),
                                                              llvm_chan, 
attr_number,
                                                              ctx->prim_mask);
                }
        }
        return build_varying_gather_values(&ctx->ac, result, 
instr->num_components,
                                           
instr->variables[0]->var->data.location_frac);
 }
 
 static void
-visit_emit_vertex(struct nir_to_llvm_context *ctx,
-                 const nir_intrinsic_instr *instr)
+visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef 
*addrs)
 {
        LLVMValueRef gs_next_vertex;
        LLVMValueRef can_emit;
        int idx;
+       struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
 
-       assert(instr->const_index[0] == 0);
        /* Write vertex attribute values to GSVS ring */
        gs_next_vertex = LLVMBuildLoad(ctx->builder,
                                       ctx->gs_next_vertex,
                                       "");
 
        /* If this thread has already emitted the declared maximum number of
         * vertices, kill it: excessive vertex emissions are not supposed to
         * have any effect, and GS threads have no externally observable
         * effects other than emitting vertices.
         */
        can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
                                 LLVMConstInt(ctx->ac.i32, 
ctx->gs_max_out_vertices, false), "");
        ac_build_kill_if_false(&ctx->ac, can_emit);
 
        /* loop num outputs */
        idx = 0;
        for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
-               LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
+               LLVMValueRef *out_ptr = &addrs[i * 4];
                int length = 4;
                int slot = idx;
                int slot_inc = 1;
 
                if (!(ctx->output_mask & (1ull << i)))
                        continue;
 
                if (i == VARYING_SLOT_CLIP_DIST0) {
                        /* pack clip and cull into a single set of slots */
                        length = ctx->num_output_clips + ctx->num_output_culls;
@@ -4160,21 +4159,22 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_var_atomic_exchange:
        case nir_intrinsic_var_atomic_comp_swap:
                result = visit_var_atomic(ctx->nctx, instr);
                break;
        case nir_intrinsic_interp_var_at_centroid:
        case nir_intrinsic_interp_var_at_sample:
        case nir_intrinsic_interp_var_at_offset:
                result = visit_interp(ctx->nctx, instr);
                break;
        case nir_intrinsic_emit_vertex:
-               visit_emit_vertex(ctx->nctx, instr);
+               assert(instr->const_index[0] == 0);
+               ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
                break;
        case nir_intrinsic_end_primitive:
                visit_end_primitive(ctx->nctx, instr);
                break;
        case nir_intrinsic_load_tess_coord:
                result = visit_load_tess_coord(ctx->nctx, instr);
                break;
        case nir_intrinsic_load_patch_vertices_in:
                result = LLVMConstInt(ctx->ac.i32, 
ctx->nctx->options->key.tcs.input_vertices, false);
                break;
@@ -6490,20 +6490,21 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
                ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
                                              
ac_nir_get_max_workgroup_size(ctx.options->chip_class,
                                                                            
shaders[i]));
        }
 
        create_function(&ctx, shaders[shader_count - 1]->info.stage, 
shader_count >= 2,
                        shader_count >= 2 ? shaders[shader_count - 
2]->info.stage  : MESA_SHADER_VERTEX);
 
        ctx.abi.inputs = &ctx.inputs[0];
        ctx.abi.emit_outputs = handle_shader_outputs_post;
+       ctx.abi.emit_vertex = visit_emit_vertex;
        ctx.abi.load_ssbo = radv_load_ssbo;
        ctx.abi.load_sampler_desc = radv_get_sampler_desc;
        ctx.abi.clamp_shadow_reference = false;
 
        if (shader_count >= 2)
                ac_init_exec_full_mask(&ctx.ac);
 
        if (ctx.ac.chip_class == GFX9 &&
            shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
                ac_nir_fixup_ls_hs_input_vgprs(&ctx);
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 14517d5570..27586d0212 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -51,20 +51,24 @@ struct ac_shader_abi {
         *
         * Currently only used for NIR shaders; indexed by variables'
         * driver_location.
         */
        LLVMValueRef *inputs;
 
        void (*emit_outputs)(struct ac_shader_abi *abi,
                             unsigned max_outputs,
                             LLVMValueRef *addrs);
 
+       void (*emit_vertex)(struct ac_shader_abi *abi,
+                           unsigned stream,
+                           LLVMValueRef *addrs);
+
        LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
 
        /**
         * Load the descriptor for the given buffer.
         *
         * \param buffer the buffer as presented in NIR: this is the descriptor
         *               in Vulkan, and the buffer index in OpenGL/Gallium
         * \param write whether buffer contents will be written
         */
        LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index d234e08071..47ca64fdea 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4031,39 +4031,35 @@ static unsigned si_llvm_get_stream(struct 
lp_build_tgsi_context *bld_base,
        unsigned stream;
 
        assert(src0.File == TGSI_FILE_IMMEDIATE);
 
        imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
        stream = LLVMConstIntGetZExtValue(imm) & 0x3;
        return stream;
 }
 
 /* Emit one vertex from the geometry shader */
-static void si_llvm_emit_vertex(
-       const struct lp_build_tgsi_action *action,
-       struct lp_build_tgsi_context *bld_base,
-       struct lp_build_emit_data *emit_data)
+static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
+                               unsigned stream,
+                               LLVMValueRef *addrs)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct lp_build_context *uint = &bld_base->uint_bld;
+       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+       struct tgsi_shader_info *info = &ctx->shader->selector->info;
+       struct lp_build_context *uint = &ctx->bld_base.uint_bld;
        struct si_shader *shader = ctx->shader;
-       struct tgsi_shader_info *info = &shader->selector->info;
        struct lp_build_if_state if_state;
        LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
                                            ctx->param_gs2vs_offset);
        LLVMValueRef gs_next_vertex;
        LLVMValueRef can_emit;
        unsigned chan, offset;
        int i;
-       unsigned stream;
-
-       stream = si_llvm_get_stream(bld_base, emit_data);
 
        /* Write vertex attribute values to GSVS ring */
        gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
                                       ctx->gs_next_vertex[stream],
                                       "");
 
        /* If this thread has already emitted the declared maximum number of
         * vertices, skip the write: excessive vertex emissions are not
         * supposed to have any effect.
         *
@@ -4077,28 +4073,26 @@ static void si_llvm_emit_vertex(
 
        bool use_kill = !info->writes_memory;
        if (use_kill) {
                ac_build_kill_if_false(&ctx->ac, can_emit);
        } else {
                lp_build_if(&if_state, &ctx->gallivm, can_emit);
        }
 
        offset = 0;
        for (i = 0; i < info->num_outputs; i++) {
-               LLVMValueRef *out_ptr = ctx->outputs[i];
-
                for (chan = 0; chan < 4; chan++) {
                        if (!(info->output_usagemask[i] & (1 << chan)) ||
                            ((info->output_streams[i] >> (2 * chan)) & 3) != 
stream)
                                continue;
 
-                       LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, 
out_ptr[chan], "");
+                       LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, 
addrs[4 * i + chan], "");
                        LLVMValueRef voffset =
                                LLVMConstInt(ctx->i32, offset *
                                             
shader->selector->gs_max_out_vertices, 0);
                        offset++;
 
                        voffset = lp_build_add(uint, voffset, gs_next_vertex);
                        voffset = lp_build_mul_imm(uint, voffset, 4);
 
                        out_val = ac_to_integer(&ctx->ac, out_val);
 
@@ -4115,20 +4109,32 @@ static void si_llvm_emit_vertex(
 
        LLVMBuildStore(ctx->ac.builder, gs_next_vertex, 
ctx->gs_next_vertex[stream]);
 
        /* Signal vertex emission */
        ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | 
(stream << 8),
                         si_get_gs_wave_id(ctx));
        if (!use_kill)
                lp_build_endif(&if_state);
 }
 
+/* Emit one vertex from the geometry shader */
+static void si_tgsi_emit_vertex(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       unsigned stream = si_llvm_get_stream(bld_base, emit_data);
+
+       si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
+}
+
 /* Cut one primitive from the geometry shader */
 static void si_llvm_emit_primitive(
        const struct lp_build_tgsi_action *action,
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        unsigned stream;
 
        /* Signal primitive cut */
@@ -5634,21 +5640,21 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
        bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
        bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
        bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
        bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
        bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = 
"llvm.amdgcn.readfirstlane";
        bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
        bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = 
"llvm.amdgcn.readlane";
        bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = 
read_invoc_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
 
-       bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
+       bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
        bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
        bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
 static void si_optimize_vs_outputs(struct si_shader_context *ctx)
 {
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
 
        if ((ctx->type != PIPE_SHADER_VERTEX &&
@@ -5748,20 +5754,21 @@ static bool si_compile_tgsi_main(struct 
si_shader_context *ctx,
                bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
                if (shader->key.as_es)
                        bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
                else {
                        ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
                        bld_base->emit_epilogue = si_tgsi_emit_epilogue;
                }
                break;
        case PIPE_SHADER_GEOMETRY:
                bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
+               ctx->abi.emit_vertex = si_llvm_emit_vertex;
                bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
                break;
        case PIPE_SHADER_FRAGMENT:
                ctx->load_input = declare_input_fs;
                ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
                bld_base->emit_epilogue = si_tgsi_emit_epilogue;
                break;
        case PIPE_SHADER_COMPUTE:
                break;
        default:
-- 
2.14.3

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to