The implementation of these opcodes in the generator assumes that their
arguments are packed, and it generates register regions based on that
assumption. While this expectation is reasonable for 32-bit, when we
load 16-bit elements from UBOs we get them with a stride of 2 that we
then need to pack with a stride of 1. Copy propagation can see through this
and rewrite ddx/ddy operands to use the original, strided register, breaking
the implementation in the generator.
---
.../compiler/brw_fs_copy_propagation.cpp | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp
b/src/intel/compiler/brw_fs_copy_propagation.cpp
index 58d5080b4e9..c01d4ec4a4f 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -361,6 +361,20 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned
stride,
return true;
}
+static bool
+instruction_requires_packed_data(fs_inst *inst)
+{
+ switch (inst->opcode) {
+ case FS_OPCODE_DDX_FINE:
+ case FS_OPCODE_DDX_COARSE:
+ case FS_OPCODE_DDY_FINE:
+ case FS_OPCODE_DDY_COARSE:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
@@ -407,6 +421,13 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg,
acp_entry *entry)
inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
return false;
+ /* Some instructions implemented in the generator backend, such as
+ * derivatives, assume that their operands are packed so we can't
+ * generally propagate strided regions to them.
+ */
+ if (instruction_requires_packed_data(inst) && entry->src.stride > 1)
+ return false;
+
/* Bail if the result of composing both strides would exceed the
* hardware limit.
*/
--
2.17.1
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev