https://gcc.gnu.org/g:a3f03891065cb9691f6e9cebce4d4542deb92a35

commit r15-2217-ga3f03891065cb9691f6e9cebce4d4542deb92a35
Author: liuhongt <hongtao....@intel.com>
Date:   Mon Jul 22 11:36:59 2024 +0800

    Relax ix86_hardreg_mov_ok after split1.
    
    ix86_hardreg_mov_ok is added by r11-5066-gbe39636d9f68c4
    
    >    The solution proposed here is to have the x86 backend/recog prevent
    >    early RTL passes composing instructions (that set likely_spilled hard
    >    registers) that they (combine) can't simplify, until after reload.
    >    We allow sets from pseudo registers, immediate constants and memory
    >    accesses, but anything more complicated is performed via a temporary
    >    pseudo.  Not only does this simplify things for the register allocator,
    >    but any remaining register-to-register moves are easily cleaned up
    >    by the late optimization passes after reload, such as peephole2 and
    >    cprop_hardreg.
    
    The restriction is mainly for rtl optimization passes before pass_combine.
    
    But split1 splits
    
    ```
    (insn 17 13 18 2 (set (reg/i:V4SI 20 xmm0)
            (vec_merge:V4SI (const_vector:V4SI [
                        (const_int -1 [0xffffffffffffffff]) repeated x4
                    ])
                (const_vector:V4SI [
                        (const_int 0 [0]) repeated x4
                    ])
                (unspec:QI [
                        (reg:V4SF 106)
                        (reg:V4SF 102)
                        (const_int 0 [0])
                    ] UNSPEC_PCMP))) "/app/example.cpp":20:1 2929 
{*avx_cmpv4sf3_1}
         (expr_list:REG_DEAD (reg:V4SF 102)
            (expr_list:REG_DEAD (reg:V4SF 106)
                (nil))))
    ```
    
    into:
    ```
    (insn 23 13 24 2 (set (reg:V4SF 107)
            (unspec:V4SF [
                    (reg:V4SF 106)
                    (reg:V4SF 102)
                    (const_int 0 [0])
                ] UNSPEC_PCMP)) "/app/example.cpp":20:1 -1
         (nil))
    (insn 24 23 18 2 (set (reg/i:V4SI 20 xmm0)
            (subreg:V4SI (reg:V4SF 107) 0)) "/app/example.cpp":20:1 -1
         (nil))
    ```
    
    There're many splitters generating MOV insn with SUBREG and would have
    same problem.
    Instead of changing those splitters one by one, the patch relaxes
    ix86_hard_mov_ok to allow mov subreg to hard register after
    split1. ix86_pre_reload_split () is used to replace
    !reload_completed && ira_in_progress.
    
    gcc/ChangeLog:
    
            * config/i386/i386.cc (ix86_hardreg_mov_ok): Relax mov subreg
            to hard register after split1.
    
    gcc/testsuite/ChangeLog:
    
            * g++.target/i386/pr115982.C: New test.

Diff:
---
 gcc/config/i386/i386.cc                  |  5 ++---
 gcc/testsuite/g++.target/i386/pr115982.C | 11 +++++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 9c2ebe74fc92..77c441893b40 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20212,7 +20212,7 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
 }
 
 /* Return true if a set of DST by the expression SRC should be allowed.
-   This prevents complex sets of likely_spilled hard regs before reload.  */
+   This prevents complex sets of likely_spilled hard regs before split1.  */
 
 bool
 ix86_hardreg_mov_ok (rtx dst, rtx src)
@@ -20224,8 +20224,7 @@ ix86_hardreg_mov_ok (rtx dst, rtx src)
           ? standard_sse_constant_p (src, GET_MODE (dst))
           : x86_64_immediate_operand (src, GET_MODE (dst)))
       && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
-      && !reload_completed
-      && !lra_in_progress)
+      && ix86_pre_reload_split ())
     return false;
   return true;
 }
diff --git a/gcc/testsuite/g++.target/i386/pr115982.C 
b/gcc/testsuite/g++.target/i386/pr115982.C
new file mode 100644
index 000000000000..4b91618405d5
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr115982.C
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+
+typedef float VF __attribute__((__vector_size__(16)));
+typedef int VI __attribute__((__vector_size__(16)));
+
+VI
+foo (VF x)
+{
+  return !x;
+}

Reply via email to