[gcc r16-2305] x86: Convert MMX integer loads from constant vector pool

H.J. Lu via Gcc-cvs Wed, 16 Jul 2025 11:30:30 -0700

https://gcc.gnu.org/g:11f73c82f178beb9f3f29cbfe2e0a5e592e40b69


commit r16-2305-g11f73c82f178beb9f3f29cbfe2e0a5e592e40b69
Author: Uros Bizjak <ubiz...@gmail.com>
Date:   Tue Jul 15 05:05:10 2025 +0800

    x86: Convert MMX integer loads from constant vector pool
    
    For MMX 16-bit, 32-bit and 64-bit constant vector loads from constant
    vector pool:
    
    (insn 6 2 7 2 (set (reg:V1SI 5 di)
            (mem/u/c:V1SI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S4 A32])) 
"pr121062-2.c":10:3 2036 {*movv1si_internal}
         (expr_list:REG_EQUAL (const_vector:V1SI [
                    (const_int -1 [0xffffffffffffffff])
                ])
            (nil)))
    
    we can convert it to
    
    (insn 12 2 7 2 (set (reg:SI 5 di)
            (const_int -1 [0xffffffffffffffff])) "pr121062-2.c":10:3 100 
{*movsi_internal}
         (nil))
    
    Co-Developed-by: H.J. Lu <hjl.to...@gmail.com>
    
    gcc/
    
            PR target/121062
            * config/i386/i386.cc (ix86_convert_const_vector_to_integer):
            Handle E_V1SImode and E_V1DImode.
            * config/i386/mmx.md (V_16_32_64): Add V1SI, V2BF and V1DI.
            (mmxinsnmode): Add V1DI and V1SI.
            Add V_16_32_64 splitter for constant vector loads from constant
            vector pool.
            (V_16_32_64:*mov<mode>_imm): Moved after V_16_32_64 splitter.
            Replace lowpart_subreg with adjust_address.
    
    gcc/testsuite/
    
            PR target/121062
            * gcc.target/i386/pr121062-1.c: New test.
            * gcc.target/i386/pr121062-2.c: Likewise.
            * gcc.target/i386/pr121062-3a.c: Likewise.
            * gcc.target/i386/pr121062-3b.c: Likewise.
            * gcc.target/i386/pr121062-3c.c: Likewise.
            * gcc.target/i386/pr121062-4.c: Likewise.
            * gcc.target/i386/pr121062-5.c: Likewise.
            * gcc.target/i386/pr121062-6.c: Likewise.
            * gcc.target/i386/pr121062-7.c: Likewise.

Diff:
---
 gcc/config/i386/i386.cc                     |  4 ++
 gcc/config/i386/mmx.md                      | 60 ++++++++++++++++++++---------
 gcc/testsuite/gcc.target/i386/pr121062-1.c  | 34 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr121062-2.c  | 14 +++++++
 gcc/testsuite/gcc.target/i386/pr121062-3a.c | 23 +++++++++++
 gcc/testsuite/gcc.target/i386/pr121062-3b.c |  6 +++
 gcc/testsuite/gcc.target/i386/pr121062-3c.c |  6 +++
 gcc/testsuite/gcc.target/i386/pr121062-4.c  | 14 +++++++
 gcc/testsuite/gcc.target/i386/pr121062-5.c  | 13 +++++++
 gcc/testsuite/gcc.target/i386/pr121062-6.c  | 13 +++++++
 gcc/testsuite/gcc.target/i386/pr121062-7.c  | 13 +++++++
 11 files changed, 181 insertions(+), 19 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d45ffb1892f0..49bd3939eb4e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -16703,6 +16703,10 @@ ix86_convert_const_vector_to_integer (rtx op, 
machine_mode mode)
          val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
        }
       break;
+    case E_V1SImode:
+    case E_V1DImode:
+      op = CONST_VECTOR_ELT (op, 0);
+      return INTVAL (op);
     case E_V2HFmode:
     case E_V2BFmode:
     case E_V4HFmode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 29a8cb599a7e..1f9799344b64 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -81,12 +81,13 @@
 ;; 4-byte and 2-byte QImode vector modes
 (define_mode_iterator VI1_16_32 [V4QI V2QI])
 
-;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
+;; All 2-byte, 4-byte and 8-byte vector modes.
 (define_mode_iterator V_16_32_64
-   [V2QI V4QI V2HI V2HF
+   [V2QI V4QI V2HI V1SI V2HF V2BF
     (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
     (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
-    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
+    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")
+    (V1DI "TARGET_64BIT")])
 
 ;; V2S* modes
 (define_mode_iterator V2FI [V2SF V2SI])
@@ -107,6 +108,7 @@
   [(V8QI "DI") (V4QI "SI") (V2QI "HI")
    (V4HI "DI") (V2HI "SI")
    (V2SI "DI")
+   (V1DI "DI") (V1SI "SI")
    (V4HF "DI") (V2HF "SI")
    (V4BF "DI") (V2BF "SI")
    (V2SF "DI")])
@@ -407,22 +409,6 @@
           ]
           (symbol_ref "true")))])
 
-;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
-;; convert them to immediate integer stores.
-(define_insn_and_split "*mov<mode>_imm"
-  [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
-       (match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
-  ""
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0) (match_dup 1))]
-{
-  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
-                                                           <MODE>mode);
-  operands[1] = GEN_INT (val);
-  operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
-})
-
 ;; For TARGET_64BIT we always round up to 8 bytes.
 (define_insn "*push<mode>2_rex64"
   [(set (match_operand:V_32 0 "push_operand" "=X,X")
@@ -588,6 +574,42 @@
           ]
           (symbol_ref "true")))])
 
+(define_split
+  [(set (match_operand:V_16_32_64 0 "general_reg_operand")
+       (match_operand:V_16_32_64 1 "memory_operand"))]
+  "reload_completed
+   && SYMBOL_REF_P (XEXP (operands[1], 0))
+   && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx op1 = avoid_constant_pool_reference (operands[1]);
+
+  if (!CONST_VECTOR_P (op1))
+    FAIL;
+
+  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op1, <MODE>mode);
+
+  operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
+  operands[1] = GEN_INT (val);
+})
+
+;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
+;; convert them to immediate integer stores.
+(define_insn_and_split "*mov<mode>_imm"
+  [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
+       (match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx op1 = operands[1];
+  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op1, <MODE>mode);
+
+  operands[0] = adjust_address (operands[0], <mmxinsnmode>mode, 0);
+  operands[1] = GEN_INT (val);
+})
+
 ;; We always round up to UNITS_PER_WORD bytes.
 (define_insn "*pushv2qi2"
   [(set (match_operand:V2QI 0 "push_operand" "=X,X")
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-1.c 
b/gcc/testsuite/gcc.target/i386/pr121062-1.c
new file mode 100644
index 000000000000..799f8562c9f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3" } */
+
+extern union {
+  int i;
+  float f;
+} int_as_float_u;
+
+extern int render_result_from_bake_w;
+extern int render_result_from_bake_h_seed_pass;
+extern float *render_result_from_bake_h_primitive;
+extern float *render_result_from_bake_h_seed;
+
+float
+int_as_float(int i)
+{
+  int_as_float_u.i = i;
+  return int_as_float_u.f;
+}
+
+void
+render_result_from_bake_h(int tx)
+{
+  while (render_result_from_bake_w) {
+    for (; tx < render_result_from_bake_w; tx++)
+      render_result_from_bake_h_primitive[1] =
+          render_result_from_bake_h_primitive[2] = int_as_float(-1);
+    if (render_result_from_bake_h_seed_pass) {
+      *render_result_from_bake_h_seed = 0;
+    }
+  }
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1, %r\[a-z0-9\]+" 2 { 
target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-2.c 
b/gcc/testsuite/gcc.target/i386/pr121062-2.c
new file mode 100644
index 000000000000..723d68a40031
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -fno-dce -mtune=generic" } */
+
+typedef int __attribute__((__vector_size__ (4))) S;
+extern void bar (S);
+
+void
+foo ()
+{
+  bar ((S){-1});
+}
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1, \\(%esp\\)" 1 { 
target ia32 } } } */
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1, %edi" 1 { target { 
! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-3a.c 
b/gcc/testsuite/gcc.target/i386/pr121062-3a.c
new file mode 100644
index 000000000000..effd4ff53673
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-3a.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-O2 -march=x86-64 -fpic" } */
+
+typedef struct {
+  struct {
+    unsigned short lo4;
+    unsigned short lo3;
+    unsigned short lo2;
+    unsigned short lo1;
+  } i;
+} BID_BINARY80LDOUBLE;
+extern BID_BINARY80LDOUBLE __bid64_to_binary80_x_out;
+void
+__bid64_to_binary80 (void)
+{
+  __bid64_to_binary80_x_out.i.lo4
+    = __bid64_to_binary80_x_out.i.lo3
+    = __bid64_to_binary80_x_out.i.lo2
+    = __bid64_to_binary80_x_out.i.lo1 = 65535;
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+%xmm\[0-9\]+, " 1 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1, 
\\(%(e|r)\[a-z0-9\]+\\)" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-3b.c 
b/gcc/testsuite/gcc.target/i386/pr121062-3b.c
new file mode 100644
index 000000000000..eb89b5da0914
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-3b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { fpic && lp64 } } } */
+/* { dg-options "-O2 -march=x86-64 -fno-pic -mcmodel=large" } */
+
+#include "pr121062-3a.c"
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1, 
\\(%r\[a-z0-9\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-3c.c 
b/gcc/testsuite/gcc.target/i386/pr121062-3c.c
new file mode 100644
index 000000000000..4c07029c4f54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-3c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { fpic && lp64 } } } */
+/* { dg-options "-O2 -march=x86-64 -fpic -mcmodel=large" } */
+
+#include "pr121062-3a.c"
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1, 
\\(%r\[a-z0-9\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-4.c 
b/gcc/testsuite/gcc.target/i386/pr121062-4.c
new file mode 100644
index 000000000000..77a0c2e90bb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+typedef long long int __attribute__((__vector_size__ (8))) S;
+
+void
+foo (S *c)
+{
+  *c = (S){0x12345678badbeefULL};
+}
+
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+%xmm\[0-9\]+, " 1 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "movabsq\[ \\t\]+\\\$81985529250168559, 
%r\[a-z0-9\]+" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-5.c 
b/gcc/testsuite/gcc.target/i386/pr121062-5.c
new file mode 100644
index 000000000000..22c09a6bfec7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-5.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+typedef int __attribute__((__vector_size__ (4))) S;
+
+void
+foo (S *c)
+{
+  *c = (S){0x12345678};
+}
+
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$305419896, 
\\(%(e|r)\[a-z0-9\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-6.c 
b/gcc/testsuite/gcc.target/i386/pr121062-6.c
new file mode 100644
index 000000000000..780b496b504e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -fno-dce -mtune=generic" } */
+
+typedef int __attribute__((__vector_size__ (8))) S;
+
+void
+foo (S *c)
+{
+  *c = (S){0x12345678,0xbadbeefULL};
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+%xmm\[0-9\]+, " 1 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "movabsq\[ \\t\]+\\\$841538639400031864, 
%r\[a-z0-9\]+" 1 { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121062-7.c 
b/gcc/testsuite/gcc.target/i386/pr121062-7.c
new file mode 100644
index 000000000000..f1834f8e173b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121062-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+typedef __bf16 __attribute__((__vector_size__ (4))) S;
+
+void
+foo (S *c)
+{
+  *c = (S){-0.1, 2.1};
+}
+
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$1074183629, 
\\(%(e|r)\[a-z0-9\]+\\)" 1 } } */

[gcc r16-2305] x86: Convert MMX integer loads from constant vector pool

Reply via email to