From: Marek Olšák <[email protected]>

This will be a new LLVM intrinsic and will also work nicely with
llvm.amdgcn.wqm.vote.
---
 src/amd/common/ac_llvm_build.c                    | 19 +++------
 src/amd/common/ac_llvm_build.h                    |  2 +-
 src/amd/common/ac_nir_to_llvm.c                   | 16 ++------
 src/gallium/drivers/radeonsi/si_shader.c          | 48 +++++++++++------------
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 42 ++++++++------------
 5 files changed, 49 insertions(+), 78 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 949f181..752c42e 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1398,34 +1398,27 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct 
ac_llvm_context *ctx,
                                           v2f16, args, 2,
                                           AC_FUNC_ATTR_READNONE);
                return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
        }
 
        return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2,
                                  AC_FUNC_ATTR_READNONE |
                                  AC_FUNC_ATTR_LEGACY);
 }
 
-/**
- * KILL, AKA discard in GLSL.
- *
- * \param value  kill if value < 0.0 or value == NULL.
- */
-void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value)
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
 {
-       if (value) {
-               ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
-                                  &value, 1, AC_FUNC_ATTR_LEGACY);
-       } else {
-               ac_build_intrinsic(ctx, "llvm.AMDGPU.kilp", ctx->voidt,
-                                  NULL, 0, AC_FUNC_ATTR_LEGACY);
-       }
+       LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1,
+                                            LLVMConstReal(ctx->f32, 1),
+                                            LLVMConstReal(ctx->f32, -1), "");
+       ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
+                          &value, 1, AC_FUNC_ATTR_LEGACY);
 }
 
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
                          LLVMValueRef offset, LLVMValueRef width,
                          bool is_signed)
 {
        LLVMValueRef args[] = {
                input,
                offset,
                width,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index f0b5875..b721782 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -258,21 +258,21 @@ struct ac_image_args {
        LLVMValueRef addr;
        unsigned dmask;
        bool unorm;
        bool da;
 };
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                                   struct ac_image_args *a);
 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
                                    LLVMValueRef args[2]);
-void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value);
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
                          LLVMValueRef offset, LLVMValueRef width,
                          bool is_signed);
 
 void ac_get_image_intr_name(const char *base_name,
                            LLVMTypeRef data_type,
                            LLVMTypeRef coords_type,
                            LLVMTypeRef rsrc_type,
                            char *out_name, unsigned out_len);
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3ba3ebf..c1490a5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3614,28 +3614,24 @@ static void emit_barrier(struct nir_to_llvm_context 
*ctx)
        }
        ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
                           ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
 }
 
 static void emit_discard_if(struct ac_nir_context *ctx,
                            const nir_intrinsic_instr *instr)
 {
        LLVMValueRef cond;
 
-       cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
+       cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
                             get_src(ctx, instr->src[0]),
                             ctx->ac.i32_0, "");
-
-       cond = LLVMBuildSelect(ctx->ac.builder, cond,
-                              LLVMConstReal(ctx->ac.f32, -1.0f),
-                              ctx->ac.f32_0, "");
-       ac_build_kill(&ctx->ac, cond);
+       ac_build_kill_if_false(&ctx->ac, cond);
 }
 
 static LLVMValueRef
 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
 {
        LLVMValueRef result;
        LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
        result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
                              LLVMConstInt(ctx->i32, 0xfc0, false), "");
 
@@ -3856,41 +3852,37 @@ static LLVMValueRef visit_interp(struct 
nir_to_llvm_context *ctx,
                }
        }
        return ac_build_gather_values(&ctx->ac, result, 2);
 }
 
 static void
 visit_emit_vertex(struct nir_to_llvm_context *ctx,
                  const nir_intrinsic_instr *instr)
 {
        LLVMValueRef gs_next_vertex;
-       LLVMValueRef can_emit, kill;
+       LLVMValueRef can_emit;
        int idx;
 
        assert(instr->const_index[0] == 0);
        /* Write vertex attribute values to GSVS ring */
        gs_next_vertex = LLVMBuildLoad(ctx->builder,
                                       ctx->gs_next_vertex,
                                       "");
 
        /* If this thread has already emitted the declared maximum number of
         * vertices, kill it: excessive vertex emissions are not supposed to
         * have any effect, and GS threads have no externally observable
         * effects other than emitting vertices.
         */
        can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
                                 LLVMConstInt(ctx->i32, 
ctx->gs_max_out_vertices, false), "");
-
-       kill = LLVMBuildSelect(ctx->builder, can_emit,
-                              LLVMConstReal(ctx->f32, 1.0f),
-                              LLVMConstReal(ctx->f32, -1.0f), "");
-       ac_build_kill(&ctx->ac, kill);
+       ac_build_kill_if_false(&ctx->ac, can_emit);
 
        /* loop num outputs */
        idx = 0;
        for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
                LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
                int length = 4;
                int slot = idx;
                int slot_inc = 1;
 
                if (!(ctx->output_mask & (1ull << i)))
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c3fe13d..8abacac 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2265,36 +2265,38 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
                break;
        }
 }
 
 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
                          LLVMValueRef alpha)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
        if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
+               static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
+                       [PIPE_FUNC_LESS] = LLVMRealOLT,
+                       [PIPE_FUNC_EQUAL] = LLVMRealOEQ,
+                       [PIPE_FUNC_LEQUAL] = LLVMRealOLE,
+                       [PIPE_FUNC_GREATER] = LLVMRealOGT,
+                       [PIPE_FUNC_NOTEQUAL] = LLVMRealONE,
+                       [PIPE_FUNC_GEQUAL] = LLVMRealOGE,
+               };
+               LLVMRealPredicate cond = 
cond_map[ctx->shader->key.part.ps.epilog.alpha_func];
+               assert(cond);
+
                LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn,
                                SI_PARAM_ALPHA_REF);
-
                LLVMValueRef alpha_pass =
-                       lp_build_cmp(&bld_base->base,
-                                    ctx->shader->key.part.ps.epilog.alpha_func,
-                                    alpha, alpha_ref);
-               LLVMValueRef arg =
-                       lp_build_select(&bld_base->base,
-                                       alpha_pass,
-                                       LLVMConstReal(ctx->f32, 1.0f),
-                                       LLVMConstReal(ctx->f32, -1.0f));
-
-               ac_build_kill(&ctx->ac, arg);
+                       LLVMBuildFCmp(ctx->ac.builder, cond, alpha, alpha_ref, 
"");
+               ac_build_kill_if_false(&ctx->ac, alpha_pass);
        } else {
-               ac_build_kill(&ctx->ac, NULL);
+               ac_build_kill_if_false(&ctx->ac, LLVMConstInt(ctx->i1, 0, 0));
        }
 }
 
 static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context 
*bld_base,
                                                  LLVMValueRef alpha,
                                                  unsigned samplemask_param)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMValueRef coverage;
 
@@ -3566,21 +3568,21 @@ static void si_llvm_return_fs_outputs(struct 
ac_shader_abi *abi,
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
        LLVMBuilderRef builder = ctx->ac.builder;
        unsigned i, j, first_vgpr, vgpr;
 
        LLVMValueRef color[8][4] = {};
        LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
        LLVMValueRef ret;
 
        if (ctx->postponed_kill)
-               ac_build_kill(&ctx->ac, LLVMBuildLoad(builder, 
ctx->postponed_kill, ""));
+               ac_build_kill_if_false(&ctx->ac, LLVMBuildLoad(builder, 
ctx->postponed_kill, ""));
 
        /* Read the output values. */
        for (i = 0; i < info->num_outputs; i++) {
                unsigned semantic_name = info->output_semantic_name[i];
                unsigned semantic_index = info->output_semantic_index[i];
 
                switch (semantic_name) {
                case TGSI_SEMANTIC_COLOR:
                        assert(semantic_index < 8);
                        for (j = 0; j < 4; j++) {
@@ -4049,21 +4051,21 @@ static void si_llvm_emit_vertex(
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct lp_build_context *uint = &bld_base->uint_bld;
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
        struct lp_build_if_state if_state;
        LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
                                            ctx->param_gs2vs_offset);
        LLVMValueRef gs_next_vertex;
-       LLVMValueRef can_emit, kill;
+       LLVMValueRef can_emit;
        unsigned chan, offset;
        int i;
        unsigned stream;
 
        stream = si_llvm_get_stream(bld_base, emit_data);
 
        /* Write vertex attribute values to GSVS ring */
        gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
                                       ctx->gs_next_vertex[stream],
                                       "");
@@ -4075,25 +4077,21 @@ static void si_llvm_emit_vertex(
         * If the shader has no writes to memory, kill it instead. This skips
         * further memory loads and may allow LLVM to skip to the end
         * altogether.
         */
        can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
                                 LLVMConstInt(ctx->i32,
                                              
shader->selector->gs_max_out_vertices, 0), "");
 
        bool use_kill = !info->writes_memory;
        if (use_kill) {
-               kill = lp_build_select(&bld_base->base, can_emit,
-                                      LLVMConstReal(ctx->f32, 1.0f),
-                                      LLVMConstReal(ctx->f32, -1.0f));
-
-               ac_build_kill(&ctx->ac, kill);
+               ac_build_kill_if_false(&ctx->ac, can_emit);
        } else {
                lp_build_if(&if_state, &ctx->gallivm, can_emit);
        }
 
        offset = 0;
        for (i = 0; i < info->num_outputs; i++) {
                LLVMValueRef *out_ptr = ctx->outputs[i];
 
                for (chan = 0; chan < 4; chan++) {
                        if (!(info->output_usagemask[i] & (1 << chan)) ||
@@ -4874,25 +4872,21 @@ static void si_llvm_emit_polygon_stipple(struct 
si_shader_context *ctx,
        slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0);
        desc = ac_build_load_to_sgpr(&ctx->ac, param_rw_buffers, slot);
 
        /* The stipple pattern is 32x32, each row has 32 bits. */
        offset = LLVMBuildMul(builder, address[1],
                              LLVMConstInt(ctx->i32, 4, 0), "");
        row = buffer_load_const(ctx, desc, offset);
        row = ac_to_integer(&ctx->ac, row);
        bit = LLVMBuildLShr(builder, row, address[0], "");
        bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
-
-       /* The intrinsic kills the thread if arg < 0. */
-       bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
-                             LLVMConstReal(ctx->f32, -1), "");
-       ac_build_kill(&ctx->ac, bit);
+       ac_build_kill_if_false(&ctx->ac, bit);
 }
 
 void si_shader_binary_read_config(struct ac_shader_binary *binary,
                                  struct si_shader_config *conf,
                                  unsigned symbol_offset)
 {
        unsigned i;
        const unsigned char *config =
                ac_shader_binary_config_start(binary, symbol_offset);
        bool really_needs_scratch = false;
@@ -5847,22 +5841,24 @@ static bool si_compile_tgsi_main(struct 
si_shader_context *ctx,
                int i;
                for (i = 0; i < 4; i++) {
                        ctx->gs_next_vertex[i] =
                                lp_build_alloca(&ctx->gallivm,
                                                ctx->i32, "");
                }
        }
 
        if (ctx->type == PIPE_SHADER_FRAGMENT && sel->info.uses_kill &&
            ctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) {
-               /* This is initialized to 0.0 = not kill. */
-               ctx->postponed_kill = lp_build_alloca(&ctx->gallivm, ctx->f32, 
"");
+               ctx->postponed_kill = lp_build_alloca_undef(&ctx->gallivm, 
ctx->i1, "");
+               /* true = don't kill. */
+               LLVMBuildStore(ctx->ac.builder, LLVMConstInt(ctx->i1, 1, 0),
+                              ctx->postponed_kill);
        }
 
        if (sel->tokens) {
                if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
                        fprintf(stderr, "Failed to translate shader from TGSI 
to LLVM\n");
                        return false;
                }
        } else {
                if (!si_nir_build_llvm(ctx, sel->nir)) {
                        fprintf(stderr, "Failed to translate shader from NIR to 
LLVM\n");
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index fc705c3..ad7a42f 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -32,67 +32,57 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context 
*bld_base,
                               struct lp_build_emit_data *emit_data)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = ctx->ac.builder;
        unsigned i;
        LLVMValueRef conds[TGSI_NUM_CHANNELS];
 
        for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
                LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
-               conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
+               conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value,
                                        ctx->ac.f32_0, "");
        }
 
-       /* Or the conditions together */
+       /* And the conditions together */
        for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
-               conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
+               conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], 
"");
        }
 
        emit_data->dst_type = ctx->voidt;
        emit_data->arg_count = 1;
-       emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
-                                       LLVMConstReal(ctx->f32, -1.0f),
-                                       ctx->ac.f32_0, "");
+       emit_data->args[0] = conds[0];
 }
 
 static void kil_emit(const struct lp_build_tgsi_action *action,
                     struct lp_build_tgsi_context *bld_base,
                     struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = ctx->ac.builder;
+       LLVMValueRef visible;
+
+       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
+               visible = emit_data->args[0];
+       } else {
+               assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
+               visible = LLVMConstInt(ctx->i1, false, 0);
+       }
 
        if (ctx->postponed_kill) {
-               if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) 
{
-                       LLVMValueRef val;
-
-                       /* Take the minimum kill value. This is the same as OR
-                        * between 2 kill values. If the value is negative,
-                        * the pixel will be killed.
-                        */
-                       val = LLVMBuildLoad(builder, ctx->postponed_kill, "");
-                       val = lp_build_emit_llvm_binary(bld_base, 
TGSI_OPCODE_MIN,
-                                                       val, 
emit_data->args[0]);
-                       LLVMBuildStore(builder, val, ctx->postponed_kill);
-               } else {
-                       LLVMBuildStore(builder,
-                                      LLVMConstReal(ctx->f32, -1),
-                                      ctx->postponed_kill);
-               }
+               LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, 
"");
+               mask = LLVMBuildAnd(builder, mask, visible, "");
+               LLVMBuildStore(builder, mask, ctx->postponed_kill);
                return;
        }
 
-       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF)
-               ac_build_kill(&ctx->ac, emit_data->args[0]);
-       else
-               ac_build_kill(&ctx->ac, NULL);
+       ac_build_kill_if_false(&ctx->ac, visible);
 }
 
 static void emit_icmp(const struct lp_build_tgsi_action *action,
                      struct lp_build_tgsi_context *bld_base,
                      struct lp_build_emit_data *emit_data)
 {
        unsigned pred;
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
        switch (emit_data->inst->Instruction.Opcode) {
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to