From: Marek Olšák <[email protected]>
In addition to the non-monolithic variant.
---
src/gallium/drivers/radeonsi/si_shader.h | 10 +++++++++-
src/gallium/drivers/radeonsi/si_state_shaders.c | 19 +++++++++++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h
b/src/gallium/drivers/radeonsi/si_shader.h
index e0227e4..4fb79e6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -438,26 +438,34 @@ struct si_shader_key {
unsigned as_ls:1; /* local shader, which precedes TCS */
/* Flags for monolithic compilation only. */
struct {
/* One byte for every input: SI_FIX_FETCH_* enums. */
uint8_t vs_fix_fetch[SI_MAX_ATTRIBS];
uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
} mono;
/* Optimization flags for asynchronous compilation only. */
- union {
+ struct {
struct {
uint64_t kill_outputs; /* "get_unique_index"
bits */
uint32_t kill_outputs2; /* "get_unique_index2"
bits */
unsigned clip_disable:1;
} hw_vs; /* HW VS (it can be VS, TES, GS) */
+
+ /* For shaders where monolithic variants have better code.
+ *
+ * This is a flag that has no effect on code generation,
+ * but forces monolithic shaders to be used as soon as
+ * possible, because it's in the "opt" group.
+ */
+ unsigned prefer_mono:1;
} opt;
};
struct si_shader_config {
unsigned num_sgprs;
unsigned num_vgprs;
unsigned spilled_sgprs;
unsigned spilled_vgprs;
unsigned private_mem_vgprs;
unsigned lds_size;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6bb3f50..22bf3cf 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1271,20 +1271,39 @@ static inline void si_shader_selector_key(struct
pipe_context *ctx,
break;
case PIPE_SHADER_GEOMETRY:
if (sctx->b.chip_class >= GFX9) {
if (sctx->tes_shader.cso) {
key->part.gs.es = sctx->tes_shader.cso;
} else {
si_shader_selector_key_vs(sctx,
sctx->vs_shader.cso,
key,
&key->part.gs.vs_prolog);
key->part.gs.es = sctx->vs_shader.cso;
}
+
+ /* Merged ES-GS can have unbalanced wave usage.
+ *
+ * ES threads are per-vertex, while GS threads are
+ * per-primitive. So without any amplification, there
+ * are fewer GS threads than ES threads, which can
result
+ * in empty (no-op) GS waves. With too much
amplification,
+ * there are more GS threads than ES threads, which
+ * can result in empty (no-op) ES waves.
+ *
+ * Non-monolithic shaders are implemented by setting
EXEC
+ * at the beginning of shader parts, and don't jump to
+ * the end if EXEC is 0.
+ *
+ * Monolithic shaders use conditional blocks, so they
can
+ * jump and skip empty waves of ES or GS. So set this to
+ * always use optimized variants, which are monolithic.
+ */
+ key->opt.prefer_mono = 1;
}
key->part.gs.prolog.tri_strip_adj_fix =
sctx->gs_tri_strip_adj_fix;
break;
case PIPE_SHADER_FRAGMENT: {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct si_state_blend *blend = sctx->queued.named.blend;
if
(sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
sel->info.colors_written == 0x1)
key->part.ps.epilog.last_cbuf =
MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
--
2.7.4
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev