https://gcc.gnu.org/g:9ebc9fbdddfe1ec85355b068354315a4da8e1ca0

commit r15-3602-g9ebc9fbdddfe1ec85355b068354315a4da8e1ca0
Author: Stefan Schulze Frielinghaus <stefa...@gcc.gnu.org>
Date:   Thu Sep 12 13:29:43 2024 +0200

    s390: Fix strict_low_part generation
    
    In s390_expand_insv(), if generating code for ICM et al. src is a MEM
    and gen_lowpart might force src into a register such that we end up with
    patterns which do not match anymore.  Use adjust_address() instead in
    order to preserve a MEM.
    
    Furthermore, it is not straight forward to enforce a subreg.  For
    example, in case of a paradoxical subreg, gen_lowpart() may return a
    register.  In order to compensate this, s390_gen_lowpart_subreg() emits
    a reference to a pseudo which does not coincide with its definition
    which is wrong.  Additionally, if dest is a paradoxical subreg, then do
    not try to emit a strict_low_part since it could mean that dest was not
    initialized even though this might be fixed up later by init-regs.
    
    Splitter for insn *get_tp_64, *zero_extendhisi2_31,
    *zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload.
    Thus, operands[0] is a hard register and gen_lowpart (m, operands[0])
    just returns the hard register for mode m which is fine to use as an
    argument for strict_low_part, i.e., we do not need to enforce subregs
    here since after reload subregs are supposed to be eliminated anyway.
    
    This fixes gcc.dg/torture/pr111821.c.
    
    gcc/ChangeLog:
    
            * config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove.
            * config/s390/s390.cc (s390_gen_lowpart_subreg): Remove.
            (s390_expand_insv): Use adjust_address() and emit a
            strict_low_part only in case of a natural subreg.
            * config/s390/s390.md: Use gen_lowpart() instead of
            s390_gen_lowpart_subreg().

Diff:
---
 gcc/config/s390/s390-protos.h |  1 -
 gcc/config/s390/s390.cc       | 47 +++++++++++++------------------------------
 gcc/config/s390/s390.md       | 13 ++++++------
 3 files changed, 20 insertions(+), 41 deletions(-)

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index b4646ccb6060..e7ac59d17daa 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -50,7 +50,6 @@ extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
 extern bool s390_return_addr_from_memory(void);
-extern rtx s390_gen_lowpart_subreg (machine_mode, rtx);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
 extern tree s390_valid_target_attribute_tree (tree args,
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 47e1d5adfd99..c1649ca49bd1 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -516,31 +516,6 @@ s390_return_addr_from_memory ()
   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
 }
 
-/* Generate a SUBREG for the MODE lowpart of EXPR.
-
-   In contrast to gen_lowpart it will always return a SUBREG
-   expression.  This is useful to generate STRICT_LOW_PART
-   expressions.  */
-rtx
-s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
-{
-  rtx lowpart = gen_lowpart (mode, expr);
-
-  /* There might be no SUBREG in case it could be applied to the hard
-     REG rtx or it could be folded with a paradoxical subreg.  Bring
-     it back.  */
-  if (!SUBREG_P (lowpart))
-    {
-      machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
-      gcc_assert (REG_P (lowpart));
-      lowpart = gen_lowpart_SUBREG (mode,
-                                   gen_rtx_REG (reg_mode,
-                                                REGNO (lowpart)));
-    }
-
-  return lowpart;
-}
-
 /* Return nonzero if it's OK to use fused multiply-add for MODE.  */
 bool
 s390_fma_allowed_p (machine_mode mode)
@@ -7112,15 +7087,21 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
       /* Emit a strict_low_part pattern if possible.  */
       if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
        {
-         rtx low_dest = s390_gen_lowpart_subreg (smode, dest);
-         rtx low_src = gen_lowpart (smode, src);
-
-         switch (smode)
+         rtx low_dest = gen_lowpart (smode, dest);
+         if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest))
            {
-           case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); 
return true;
-           case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); 
return true;
-           case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); 
return true;
-           default: break;
+             poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode);
+             rtx low_src = adjust_address (src, smode, offset);
+             switch (smode)
+               {
+               case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src));
+                              return true;
+               case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src));
+                              return true;
+               case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src));
+                              return true;
+               default: break;
+               }
            }
        }
 
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 3d5759d62521..592cf62d9623 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1974,12 +1974,11 @@
   "TARGET_ZARCH"
   "#"
   "&& reload_completed"
-  [(set (match_dup 2) (match_dup 4))
+  [(set (match_dup 2) (match_dup 3))
    (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
-   (set (strict_low_part (match_dup 3)) (match_dup 5))]
+   (set (strict_low_part (match_dup 2)) (match_dup 4))]
   "operands[2] = gen_lowpart (SImode, operands[0]);
-   operands[3] = s390_gen_lowpart_subreg (SImode, operands[0]);
-   s390_split_access_reg (operands[1], &operands[5], &operands[4]);")
+   s390_split_access_reg (operands[1], &operands[4], &operands[3]);")
 
 ; Splitters for storing TLS pointer to %a0:DI.
 
@@ -5068,7 +5067,7 @@
    (parallel
     [(set (strict_low_part (match_dup 2)) (match_dup 1))
      (clobber (reg:CC CC_REGNUM))])]
-  "operands[2] = s390_gen_lowpart_subreg (HImode, operands[0]);")
+  "operands[2] = gen_lowpart (HImode, operands[0]);")
 
 (define_insn_and_split "*zero_extendqisi2_31"
   [(set (match_operand:SI 0 "register_operand" "=&d")
@@ -5078,7 +5077,7 @@
   "&& reload_completed"
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
-  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
 
 ;
 ; zero_extendqihi2 instruction pattern(s).
@@ -5110,7 +5109,7 @@
   "&& reload_completed"
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
-  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
 
 ;
 ; fixuns_trunc(dd|td|sf|df|tf)(si|di)2 expander

Reply via email to