[PATCH] PR target/99668: Improved complex to vector RTL expansion.

Roger Sayle Tue, 30 Jun 2026 14:42:09 -0700

This patch improves the RTL expansion of casts (VIEW_CONVERT_EXPR)
from complex numbers to two-component vectors with the same inner
type.  Currently, expand spills the complex number to memory to
perform this conversion.  With the patch below we now convert the
V_C_E into the equivalent of (v2x){__real__ z,__imag__ z}, using
the backend's vec_init_optab.


The motivating example from the Bugzilla PR is:

typedef double v2df __attribute__((vector_size(16)));
v2df foo (_Complex double x)
{
  return *(v2df *)&x;
}

Currently, with -O2 GCC implements this by spilling to memory:

foo:    movsd   %xmm0, -24(%rsp)
        movsd   %xmm1, -16(%rsp)
        movupd  -24(%rsp), %xmm0
        ret

with this enhancement to RTL expansion, we now generate:

foo:    unpcklpd        %xmm1, %xmm0
        ret

The improvement with -m32 -msse2 -O2 is even more pronounced.

From:   subl    $28, %esp
        movsd   32(%esp), %xmm1
        movsd   40(%esp), %xmm2
        movsd   %xmm1, (%esp)
        movsd   %xmm2, 8(%esp)
        movupd  (%esp), %xmm0
        addl    $28, %esp
        ret

To:     movq    4(%esp), %xmm0
        movhpd  12(%esp), %xmm0
        ret


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2026-06-30  Roger Sayle  <[email protected]>

gcc/ChangeLog
        PR target/99668
        * expr.cc (expand_expr_real_1) <case VIEW_CONVERT_EXPR>: Use
        vec_init_optab to convert a complex number into two-component
        vector with the same inner type.

gcc/testsuite/ChangeLog
        PR target/99668
        * gcc.target/pr99668.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/expr.cc b/gcc/expr.cc
index de73215ccc6..7cf8eb3186b 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -12783,6 +12783,33 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode 
tmode,
        return extract_bit_field (op0, TYPE_PRECISION (type), 0,
                                  TYPE_UNSIGNED (type), NULL_RTX,
                                  mode, mode, false, NULL);
+      /* If source is a complex number and destination is a
+        two-component vector with same inner type, use vector
+        initialization.  */
+      else if (COMPLEX_MODE_P (GET_MODE (op0))
+              && VECTOR_MODE_P (mode)
+              && known_eq (GET_MODE_NUNITS (mode), 2)
+              && GET_MODE_INNER (mode) == GET_MODE_INNER (GET_MODE (op0))
+              && convert_optab_handler (vec_init_optab, mode,
+                                        GET_MODE_INNER (mode))
+                 != CODE_FOR_nothing)
+       {
+         if (!target)
+           target = gen_reg_rtx (mode);
+         enum insn_code icode = convert_optab_handler (vec_init_optab, mode,
+                                                       GET_MODE_INNER (mode));
+         rtx rpart = read_complex_part (op0, false);
+         rtx ipart = read_complex_part (op0, true);
+         if (!REG_P (rpart)  && !CONSTANT_P (rpart))
+           rpart = force_reg (GET_MODE_INNER (mode), rpart);
+         if (!REG_P (ipart)  && !CONSTANT_P (ipart))
+           ipart = force_reg (GET_MODE_INNER (mode), ipart);
+         rtvec vec = rtvec_alloc (2);
+         RTVEC_ELT (vec, 0) = rpart;
+         RTVEC_ELT (vec, 1) = ipart;
+         emit_insn (GEN_FCN (icode) (target, gen_rtx_PARALLEL (mode, vec)));
+         return target;
+       }
       /* As a last resort, spill op0 to memory, and reload it in a
         different mode.  */
       else if (!MEM_P (op0))
diff --git a/gcc/testsuite/gcc.target/i386/pr99668.c 
b/gcc/testsuite/gcc.target/i386/pr99668.c
new file mode 100644
index 00000000000..90bac695083
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99668.c
@@ -0,0 +1,12 @@
+/* PR target/99668 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+v2df foo (_Complex double x)
+{
+  return *(v2df *)&x;
+}
+
+/* { dg-final { scan-assembler-not "\\tmovsd" } } */
+/* { dg-final { scan-assembler-not "movupd" } } */

[PATCH] PR target/99668: Improved complex to vector RTL expansion.

Reply via email to