https://gcc.gnu.org/g:ab214ef734bfc3dcffcf79ff9e1dd651c2b40566

commit r15-3314-gab214ef734bfc3dcffcf79ff9e1dd651c2b40566
Author: liuhongt <hongtao....@intel.com>
Date:   Thu Aug 29 11:39:20 2024 +0800

    Check avx upper register for parallel.
    
    For function arguments/return, when it's BLK mode, it's put in a
    parallel with an expr_list, and the expr_list contains the real mode
    and registers.
    Current ix86_check_avx_upper_register only checked for SSE_REG_P, and
    failed to handle that. The patch extend the handle to each subrtx.
    
    gcc/ChangeLog:
    
            PR target/116512
            * config/i386/i386.cc (ix86_check_avx_upper_register): Iterate
            subrtx to scan for avx upper register.
            (ix86_check_avx_upper_stores): Inline old
            ix86_check_avx_upper_register.
            (ix86_avx_u128_mode_needed): Ditto, and replace
            FOR_EACH_SUBRTX with call to new
            ix86_check_avx_upper_register.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr116512.c: New test.

Diff:
---
 gcc/config/i386/i386.cc                  | 36 ++++++++++++++++++++------------
 gcc/testsuite/gcc.target/i386/pr116512.c | 26 +++++++++++++++++++++++
 2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index a1f65d41fdd5..546c964d2a47 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14882,9 +14882,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn)
 static bool
 ix86_check_avx_upper_register (const_rtx exp)
 {
-  return (SSE_REG_P (exp)
-         && !EXT_REX_SSE_REG_P (exp)
-         && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
+  /* construct_container may return a parallel with expr_list
+     which contains the real reg and mode  */
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
+    {
+      const_rtx x = *iter;
+      if (SSE_REG_P (x)
+         && !EXT_REX_SSE_REG_P (x)
+         && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
+       return true;
+    }
+
+  return false;
 }
 
 /* Check if a 256bit or 512bit AVX register is referenced in stores.   */
@@ -14892,7 +14902,9 @@ ix86_check_avx_upper_register (const_rtx exp)
 static void
 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
 {
-  if (ix86_check_avx_upper_register (dest))
+  if (SSE_REG_P (dest)
+      && !EXT_REX_SSE_REG_P (dest)
+      && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
     {
       bool *used = (bool *) data;
       *used = true;
@@ -14951,14 +14963,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
       return AVX_U128_CLEAN;
     }
 
-  subrtx_iterator::array_type array;
-
   rtx set = single_set (insn);
   if (set)
     {
       rtx dest = SET_DEST (set);
       rtx src = SET_SRC (set);
-      if (ix86_check_avx_upper_register (dest))
+      if (SSE_REG_P (dest)
+         && !EXT_REX_SSE_REG_P (dest)
+         && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
        {
          /* This is an YMM/ZMM load.  Return AVX_U128_DIRTY if the
             source isn't zero.  */
@@ -14969,9 +14981,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
        }
       else
        {
-         FOR_EACH_SUBRTX (iter, array, src, NONCONST)
-           if (ix86_check_avx_upper_register (*iter))
-             return AVX_U128_DIRTY;
+         if (ix86_check_avx_upper_register (src))
+           return AVX_U128_DIRTY;
        }
 
       /* This isn't YMM/ZMM load/store.  */
@@ -14982,9 +14993,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
      Hardware changes state only when a 256bit register is written to,
      but we need to prevent the compiler from moving optimal insertion
      point above eventual read from 256bit or 512 bit register.  */
-  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
-    if (ix86_check_avx_upper_register (*iter))
-      return AVX_U128_DIRTY;
+  if (ix86_check_avx_upper_register (PATTERN (insn)))
+    return AVX_U128_DIRTY;
 
   return AVX_U128_ANY;
 }
diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c 
b/gcc/testsuite/gcc.target/i386/pr116512.c
new file mode 100644
index 000000000000..c2bc6c91b648
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116512.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+
+#include <immintrin.h>
+
+struct B {
+  union {
+    __m512 f;
+    __m512i s;
+  };
+};
+
+struct B foo(int n) {
+  struct B res;
+  res.s = _mm512_set1_epi32(n);
+
+  return res;
+}
+
+__m512i bar(int n) {
+  struct B res;
+  res.s = _mm512_set1_epi32(n);
+
+  return res.s;
+}

Reply via email to