.. in ix86_expand_vector_move and
ix86_convert_const_wide_int_to_broadcast(called by the former).

ix86_expand_vector_move is called by emit_move_insn which is used by
many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
when there's explict usage of xmm7/xmm15/xmm31.

Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
for both w/and w/o --with-cpu=native --with-arch=native.

Ok for trunk?

gcc/ChangeLog:

        PR target/104704
        * config/i386/i386-expand.cc
        (ix86_convert_const_wide_int_to_broadcast): Replace
        ix86_gen_scratch_sse_rtx with gen_reg_rtx.
        (ix86_expand_vector_move): Ditto.
        * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
        corresponding splitter after it.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
        * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
        * gcc.target/i386/pr100865-12b.c: Ditto.
        * gcc.target/i386/pr100865-8b.c: Ditto.
        * gcc.target/i386/pr100865-9b.c: Ditto.
        * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
        * gcc.target/i386/pr82942-1.c: Ditto.
        * gcc.target/i386/pr82990-1.c: Ditto.
        * gcc.target/i386/pr82990-3.c: Ditto.
        * gcc.target/i386/pr82990-5.c: Ditto.
---
 gcc/config/i386/i386-expand.cc               |  6 +--
 gcc/config/i386/sse.md                       | 41 +++++++++++++++-----
 gcc/testsuite/gcc.target/i386/incoming-11.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-11b.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-12b.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-8b.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-9b.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr82941-1.c    |  3 +-
 gcc/testsuite/gcc.target/i386/pr82942-1.c    |  3 +-
 gcc/testsuite/gcc.target/i386/pr82990-1.c    |  3 +-
 gcc/testsuite/gcc.target/i386/pr82990-3.c    |  3 +-
 gcc/testsuite/gcc.target/i386/pr82990-5.c    |  3 +-
 12 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index faa0191c6dd..75a28cdd89d 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode 
mode, rtx op)
   machine_mode vector_mode;
   if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
     gcc_unreachable ();
-  rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
+  rtx target = gen_reg_rtx (vector_mode);
   bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
                                               target,
                                               GEN_INT (val_broadcast));
@@ -605,7 +605,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
       if (!register_operand (op0, mode)
          && !register_operand (op1, mode))
        {
-         rtx scratch = ix86_gen_scratch_sse_rtx (mode);
+         rtx scratch = gen_reg_rtx (mode);
          emit_move_insn (scratch, op1);
          op1 = scratch;
        }
@@ -647,7 +647,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
       && !register_operand (op0, mode)
       && !register_operand (op1, mode))
     {
-      rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
+      rtx tmp = gen_reg_rtx (GET_MODE (op0));
       emit_move_insn (tmp, op1);
       emit_move_insn (op0, tmp);
       return;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3066ea3734a..d124545aa5d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25121,20 +25121,43 @@ (define_insn "vec_dupv4sf"
    (set_attr "mode" "V4SF")])
 
 (define_insn "*vec_dupv4si"
-  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
+  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x,v")
        (vec_duplicate:V4SI
-         (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
+         (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))]
   "TARGET_SSE"
   "@
    %vpshufd\t{$0, %1, %0|%0, %1, 0}
    vbroadcastss\t{%1, %0|%0, %1}
-   shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "sse2,avx,noavx")
-   (set_attr "type" "sselog1,ssemov,sselog1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
-   (set_attr "mode" "TI,V4SF,V4SF")])
+   shufps\t{$0, %0, %0|%0, %0, 0}
+   #"
+  [(set_attr "isa" "sse2,avx,noavx,noavx512vl")
+   (set_attr "type" "sselog1,ssemov,sselog1,sselog1")
+   (set_attr "length_immediate" "1,0,1,1")
+   (set_attr "prefix_extra" "0,1,*,0")
+   (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex")
+   (set_attr "mode" "TI,V4SF,V4SF,TI")
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "3")
+             (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+          ]
+          (symbol_ref "true")))])
+
+(define_split
+  [(set (match_operand:V4SI 0 "sse_reg_operand")
+       (vec_duplicate:V4SI
+         (match_operand:SI 1 "general_reg_operand")))]
+  "TARGET_SSE && reload_completed
+   /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is
+      available, because then we can broadcast from GPRs directly.  */
+   && !TARGET_AVX512VL"
+  [(const_int 0)]
+{
+  emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
+                               CONST0_RTX (V4SImode),
+                               gen_lowpart (SImode, operands[1])));
+  emit_insn (gen_vec_duplicatev4si (operands[0], operands[0]));
+  DONE;
+})
 
 (define_insn "*vec_dupv2di"
   [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,x")
diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c 
b/gcc/testsuite/gcc.target/i386/incoming-11.c
index 4b822684b88..a830c96f7d1 100644
--- a/gcc/testsuite/gcc.target/i386/incoming-11.c
+++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
@@ -15,4 +15,4 @@ void f()
        for (i = 0; i < 100; i++) q[i] = 1;
 }
 
-/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
+/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-11b.c 
b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
index 7e458e85cdd..fe7736c318c 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-11b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
@@ -5,4 +5,4 @@
 
 /* { dg-final { scan-assembler-times "movabsq" 1 } } */
 /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, 
%xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } 
*/
+/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 
} } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-12b.c 
b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
index dee0cfb016a..c9acfc7088f 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-12b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
@@ -5,4 +5,4 @@
 
 /* { dg-final { scan-assembler-times "movabsq" 1 } } */
 /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, 
%xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } 
*/
+/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 
} } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8b.c 
b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
index 4b7dd7cee3e..fa474c98a37 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-8b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
@@ -4,4 +4,4 @@
 #include "pr100865-8a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, 
%xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } 
*/
+/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 
} } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9b.c 
b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
index a315dde7c52..0714c3c9d6a 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-9b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
@@ -4,4 +4,4 @@
 #include "pr100865-9a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, 
%xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } 
*/
+/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 
} } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c 
b/gcc/testsuite/gcc.target/i386/pr82941-1.c
index c3be2f5b797..d7e530d5116 100644
--- a/gcc/testsuite/gcc.target/i386/pr82941-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
@@ -11,5 +11,4 @@ pr82941 ()
   z = y;
 }
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c 
b/gcc/testsuite/gcc.target/i386/pr82942-1.c
index 29ead049a67..9cdf81a9d60 100644
--- a/gcc/testsuite/gcc.target/i386/pr82942-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
@@ -3,5 +3,4 @@
 
 #include "pr82941-1.c"
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c 
b/gcc/testsuite/gcc.target/i386/pr82990-1.c
index bbf580fea77..ff1d6d40eb2 100644
--- a/gcc/testsuite/gcc.target/i386/pr82990-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
@@ -11,5 +11,4 @@ pr82941 ()
   z = y;
 }
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c 
b/gcc/testsuite/gcc.target/i386/pr82990-3.c
index 89ddb20adb3..201fa98d8d4 100644
--- a/gcc/testsuite/gcc.target/i386/pr82990-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
@@ -3,5 +3,4 @@
 
 #include "pr82941-1.c"
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c 
b/gcc/testsuite/gcc.target/i386/pr82990-5.c
index b9da0e706b1..008217af0b8 100644
--- a/gcc/testsuite/gcc.target/i386/pr82990-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
@@ -11,5 +11,4 @@ pr82941 ()
   z = y;
 }
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1  } } */
-- 
2.18.1

Reply via email to