D3D behaviour and Vulkan are different. The workaround can
be enabled with RADV_DEBUG=correctderivsafterkill.

When enabled, this fixes rendering issues with DXVK. For
now, I don't want to force-enable this behaviour for DXVK
because having per-app workarounds is a bad idea.

We are probably going to draft a little extension that
explicitely enable this when needed.

Signed-off-by: Samuel Pitoiset <[email protected]>
---
 src/amd/vulkan/radv_debug.h       |  1 +
 src/amd/vulkan/radv_device.c      |  1 +
 src/amd/vulkan/radv_nir_to_llvm.c | 36 +++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_shader.c      |  2 ++
 src/amd/vulkan/radv_shader.h      |  2 ++
 5 files changed, 42 insertions(+)

diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 9dda9b6b0c..d63f6538d1 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -45,6 +45,7 @@ enum {
        RADV_DEBUG_PREOPTIR          = 0x8000,
        RADV_DEBUG_NO_DYNAMIC_BOUNDS = 0x10000,
        RADV_DEBUG_NO_OUT_OF_ORDER   = 0x20000,
+       RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL = 0x40000,
 };
 
 enum {
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index a7f4a5ab7b..d01f20617f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -391,6 +391,7 @@ static const struct debug_control radv_debug_options[] = {
        {"preoptir", RADV_DEBUG_PREOPTIR},
        {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
        {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
+       {"correctderivsafterkill", RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL},
        {NULL, 0}
 };
 
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index e2d241e495..60ee408871 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -85,6 +85,8 @@ struct radv_shader_context {
        LLVMValueRef persp_sample, persp_center, persp_centroid;
        LLVMValueRef linear_sample, linear_center, linear_centroid;
 
+       LLVMValueRef postponed_kill;
+
        gl_shader_stage stage;
 
        LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
@@ -1486,6 +1488,22 @@ load_gs_input(struct ac_shader_abi *abi,
 static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
 {
        struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+       if (ctx->shader_info->fs.force_correct_derivs_after_kill) {
+               /* LLVM 6.0 can kill immediately while maintaining WQM. */
+               if (HAVE_LLVM >= 0x0600) {
+                       ac_build_kill_if_false(&ctx->ac,
+                                              ac_build_wqm_vote(&ctx->ac,
+                                                                visible));
+               }
+
+               LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder,
+                                                 ctx->postponed_kill, "");
+               mask = LLVMBuildAnd(ctx->ac.builder, mask, visible, "");
+               LLVMBuildStore(ctx->ac.builder, mask, ctx->postponed_kill);
+               return;
+       }
+
        ac_build_kill_if_false(&ctx->ac, visible);
 }
 
@@ -2776,6 +2794,12 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
        LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
        struct ac_export_args color_args[8];
 
+       if (ctx->postponed_kill) {
+               ac_build_kill_if_false(&ctx->ac,
+                                      LLVMBuildLoad(ctx->ac.builder,
+                                                    ctx->postponed_kill, ""));
+       }
+
        for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
                LLVMValueRef values[4];
 
@@ -3075,6 +3099,18 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
        create_function(&ctx, shaders[shader_count - 1]->info.stage, 
shader_count >= 2,
                        shader_count >= 2 ? shaders[shader_count - 
2]->info.stage  : MESA_SHADER_VERTEX);
 
+       if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT &&
+           shader_info->info.ps.uses_derivatives &&
+           shader_info->info.ps.uses_kill &&
+           options->force_correct_derivs_after_kill) {
+               ctx.postponed_kill =
+                       ac_build_alloca_undef(&ctx.ac, ctx.ac.i1, "");
+               /* true = don't kill. */
+               LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, 
ctx.postponed_kill);
+
+               shader_info->fs.force_correct_derivs_after_kill = true;
+       }
+
        ctx.abi.inputs = &ctx.inputs[0];
        ctx.abi.emit_outputs = handle_shader_outputs_post;
        ctx.abi.emit_vertex = visit_emit_vertex;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 27b3fbed16..9bafd53b98 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -486,6 +486,8 @@ shader_variant_create(struct radv_device *device,
                                 device->instance->debug_flags & 
RADV_DEBUG_PREOPTIR;
        options->record_llvm_ir = device->keep_shader_info;
        options->tess_offchip_block_dw_size = 
device->tess_offchip_block_dw_size;
+       options->force_correct_derivs_after_kill =
+               device->instance->debug_flags & 
RADV_DEBUG_CORRECT_DERIVS_AFTER_KILL;
 
        if (options->supports_spill)
                tm_options |= AC_TM_SUPPORTS_SPILL;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index f8826f41ef..fd190dc537 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -108,6 +108,7 @@ struct radv_nir_compiler_options {
        bool dump_shader;
        bool dump_preoptir;
        bool record_llvm_ir;
+       bool force_correct_derivs_after_kill;
        enum radeon_family family;
        enum chip_class chip_class;
        uint32_t tess_offchip_block_dw_size;
@@ -224,6 +225,7 @@ struct radv_shader_variant_info {
                        uint32_t flat_shaded_mask;
                        bool can_discard;
                        bool early_fragment_test;
+                       bool force_correct_derivs_after_kill;
                } fs;
                struct {
                        unsigned block_size[3];
-- 
2.17.0

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to