This patch improves the RTL expansion of casts (VIEW_CONVERT_EXPR)
from complex numbers to two-component vectors with the same inner
type. Currently, expand spills the complex number to memory to
perform this conversion. With the patch below we now convert the
V_C_E into the equivalent of (v2x){__real__ z,__imag__ z}, using
the backend's vec_init_optab.
The motivating example from the Bugzilla PR is:
typedef double v2df __attribute__((vector_size(16)));
v2df foo (_Complex double x)
{
return *(v2df *)&x;
}
Currently, with -O2 GCC implements this by spilling to memory:
foo: movsd %xmm0, -24(%rsp)
movsd %xmm1, -16(%rsp)
movupd -24(%rsp), %xmm0
ret
with this enhancement to RTL expansion, we now generate:
foo: unpcklpd %xmm1, %xmm0
ret
The improvement with -m32 -msse2 -O2 is even more pronounced.
From: subl $28, %esp
movsd 32(%esp), %xmm1
movsd 40(%esp), %xmm2
movsd %xmm1, (%esp)
movsd %xmm2, 8(%esp)
movupd (%esp), %xmm0
addl $28, %esp
ret
To: movq 4(%esp), %xmm0
movhpd 12(%esp), %xmm0
ret
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures. Ok for mainline?
2026-06-30 Roger Sayle <[email protected]>
gcc/ChangeLog
PR target/99668
* expr.cc (expand_expr_real_1) <case VIEW_CONVERT_EXPR>: Use
vec_init_optab to convert a complex number into two-component
vector with the same inner type.
gcc/testsuite/ChangeLog
PR target/99668
* gcc.target/pr99668.c: New test case.
Thanks in advance,
Roger
--
diff --git a/gcc/expr.cc b/gcc/expr.cc
index de73215ccc6..7cf8eb3186b 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -12783,6 +12783,33 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode
tmode,
return extract_bit_field (op0, TYPE_PRECISION (type), 0,
TYPE_UNSIGNED (type), NULL_RTX,
mode, mode, false, NULL);
+ /* If source is a complex number and destination is a
+ two-component vector with same inner type, use vector
+ initialization. */
+ else if (COMPLEX_MODE_P (GET_MODE (op0))
+ && VECTOR_MODE_P (mode)
+ && known_eq (GET_MODE_NUNITS (mode), 2)
+ && GET_MODE_INNER (mode) == GET_MODE_INNER (GET_MODE (op0))
+ && convert_optab_handler (vec_init_optab, mode,
+ GET_MODE_INNER (mode))
+ != CODE_FOR_nothing)
+ {
+ if (!target)
+ target = gen_reg_rtx (mode);
+ enum insn_code icode = convert_optab_handler (vec_init_optab, mode,
+ GET_MODE_INNER (mode));
+ rtx rpart = read_complex_part (op0, false);
+ rtx ipart = read_complex_part (op0, true);
+ if (!REG_P (rpart) && !CONSTANT_P (rpart))
+ rpart = force_reg (GET_MODE_INNER (mode), rpart);
+ if (!REG_P (ipart) && !CONSTANT_P (ipart))
+ ipart = force_reg (GET_MODE_INNER (mode), ipart);
+ rtvec vec = rtvec_alloc (2);
+ RTVEC_ELT (vec, 0) = rpart;
+ RTVEC_ELT (vec, 1) = ipart;
+ emit_insn (GEN_FCN (icode) (target, gen_rtx_PARALLEL (mode, vec)));
+ return target;
+ }
/* As a last resort, spill op0 to memory, and reload it in a
different mode. */
else if (!MEM_P (op0))
diff --git a/gcc/testsuite/gcc.target/i386/pr99668.c
b/gcc/testsuite/gcc.target/i386/pr99668.c
new file mode 100644
index 00000000000..90bac695083
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99668.c
@@ -0,0 +1,12 @@
+/* PR target/99668 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+v2df foo (_Complex double x)
+{
+ return *(v2df *)&x;
+}
+
+/* { dg-final { scan-assembler-not "\\tmovsd" } } */
+/* { dg-final { scan-assembler-not "movupd" } } */