Render Target Message's payloads for 16bit values fit in only one
register.
From Intel PRM vol07, page 249 "Render Target Messages" / "Message
Data Payloads"
"The half precision Render Target Write messages have data payloads
that can pack a full SIMD16 payload into 1 register instead of
two. The half-precision packed format is used for RGBA and Source
0 Alpha, but Source Depth data payload is always supplied in full
precision."
So when 16-bit data is uploaded to the payload it will use 1 register
independently of it is SIMD16 or SIMD8.
This change implies that we need to replicate the approach in the
copy propagation of the load_payload operations.
v2: By default 16-bit sources should be packed (Jason Ekstrand)
Include changes in in copy_propagation of load_payload (Chema Casanova)
---
src/intel/compiler/brw_fs.cpp | 5 ++++-
src/intel/compiler/brw_fs_copy_propagation.cpp | 4 ++--
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index b695508823..b1e548fd93 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -3485,7 +3485,10 @@ fs_visitor::lower_load_payload()
for (uint8_t i = inst->header_size; i < inst->sources; i++) {
if (inst->src[i].file != BAD_FILE)
ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]);
- dst = offset(dst, ibld, 1);
+ if (type_sz(inst->src[i].type) == 2)
+ dst = byte_offset(dst, REG_SIZE);
+ else
+ dst = offset(dst, ibld, 1);
}
inst->remove(block);
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp
b/src/intel/compiler/brw_fs_copy_propagation.cpp
index d4d01d783c..470eaeec4f 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -800,7 +800,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx,
bblock_t *block,
int offset = 0;
for (int i = 0; i < inst->sources; i++) {
int effective_width = i < inst->header_size ? 8 : inst->exec_size;
- assert(effective_width * type_sz(inst->src[i].type) % REG_SIZE ==
0);
+ assert(effective_width * MAX2(4, type_sz(inst->src[i].type)) %
REG_SIZE == 0);
const unsigned size_written = effective_width *
type_sz(inst->src[i].type);
if (inst->src[i].file == VGRF) {
@@ -816,7 +816,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx,
bblock_t *block,
ralloc_free(entry);
}
}
- offset += size_written;
+ offset += type_sz(inst->src[i].type) == 2 ? REG_SIZE :
size_written;
}
}
}
--
2.14.3
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev