https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64731
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Assignee|unassigned at gcc dot gnu.org |rguenth at gcc dot
gnu.org
Status|NEW |ASSIGNED
--- Comment #7 from Richard Biener <rguenth at gcc dot gnu.org> ---
Yes, forwprop splits the vector loads:
_5 = &MEM[(double4 *)a_11(D) + ivtmp.14_22 * 1];
_1 = BIT_FIELD_REF <MEM[(double4 *)_5], 128, 128>;
_25 = BIT_FIELD_REF <MEM[(double4 *)_5], 128, 0>;
_14 = &MEM[(double4 *)b_12(D) + ivtmp.14_22 * 1];
_2 = BIT_FIELD_REF <MEM[(double4 *)_14], 128, 128>;
_17 = BIT_FIELD_REF <MEM[(double4 *)_14], 128, 0>;
_24 = _17 + _25;
_3 = _1 + _2;
but not the store from the CTOR:
_7 = {_24, _3};
MEM[(double4 *)a_11(D) + ivtmp.14_22 * 1] = _7;
forwprop would also split that, but we have
else if (code == CONSTRUCTOR
&& VECTOR_TYPE_P (TREE_TYPE (rhs))
&& TYPE_MODE (TREE_TYPE (rhs)) == BLKmode
&& CONSTRUCTOR_NELTS (rhs) > 0
&& (!VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs,
0)->value))
|| (TYPE_MODE (TREE_TYPE (CONSTRUCTOR_ELT (rhs,
0)->value))
!= BLKmode)))
{
/* Rewrite stores of a single-use vector constructors
to component-wise stores if the mode isn't supported. */
use_operand_p use_p;
gimple *use_stmt;
if (single_imm_use (lhs, &use_p, &use_stmt)
&& gimple_store_p (use_stmt)
&& !gimple_has_volatile_ops (use_stmt)
&& !stmt_can_throw_internal (fun, use_stmt)
&& is_gimple_assign (use_stmt)
&& (TREE_CODE (gimple_assign_lhs (use_stmt))
!= TARGET_MEM_REF))
and in this case there's a TARGET_MEM_REF on the LHS. With -fno-ivopts we
get
.L2:
movslq %ecx, %rax
addl $4, %ecx
salq $3, %rax
leaq (%rdi,%rax), %rdx
addq %rsi, %rax
movapd 16(%rax), %xmm0
movapd (%rdx), %xmm1
addpd 16(%rdx), %xmm0
addpd (%rax), %xmm1
movaps %xmm0, 16(%rdx)
movaps %xmm1, (%rdx)
subl $1, %r8d
jne .L2
We could use the same trick as optimize_vector_load and instead of a
TARGET_MEM_REF memory reference use that only as address generation.