https://gcc.gnu.org/g:7051fa5fa4eaa24785a64072490c1e0c65039915

commit r12-10729-g7051fa5fa4eaa24785a64072490c1e0c65039915
Author: Stefan Schulze Frielinghaus <stefa...@gcc.gnu.org>
Date:   Fri Sep 27 12:45:42 2024 +0200

    s390: Fix TF to FPRX2 conversion [PR115860]
    
    Currently subregs originating from *tf_to_fprx2_0 and *tf_to_fprx2_1
    survive register allocation.  This in turn leads to wrong register
    renaming.  Keeping the current approach would mean we need two insns for
    *tf_to_fprx2_0 and *tf_to_fprx2_1, respectively.  Something along the
    lines
    
    (define_insn "*tf_to_fprx2_0"
      [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "=f") 0)
            (unspec:DF [(match_operand:TF 1 "general_operand" "v")]
                       UNSPEC_TF_TO_FPRX2_0))]
      "TARGET_VXE"
      "#")
    
    (define_insn "*tf_to_fprx2_0"
      [(set (match_operand:DF 0 "nonimmediate_operand" "=f")
            (unspec:DF [(match_operand:TF 1 "general_operand" "v")]
                       UNSPEC_TF_TO_FPRX2_0))]
      "TARGET_VXE"
      "vpdi\t%v0,%v1,%v0,1
      [(set_attr "op_type" "VRR")])
    
    and similar for *tf_to_fprx2_1.  Note, pre register allocation operand 0
    has mode FPRX2 and afterwards DF once subregs have been eliminated.
    
    Since we always copy a whole vector register into a floating-point
    register pair, another way to fix this is to merge *tf_to_fprx2_0 and
    *tf_to_fprx2_1 into a single insn which means we don't have to use
    subregs at all.  The downside of this is that the assembler template
    contains two instructions, now.  The upside is that we don't have to
    come up with some artificial insn before RA which might be more
    readable/maintainable.  That is implemented by this patch.
    
    In commit r11-4872-ge627cda5686592, the output operand specifier %V was
    introduced which is used in tf_to_fprx2 only, now.  Instead of coming up
    with its counterpart %F for floating-point registers, which would also
    only be used in tf_to_fprx2, I print the operands directly.  This
    renders %V unused which is why it is removed by this patch.
    
    gcc/ChangeLog:
    
            PR target/115860
            * config/s390/s390.cc (print_operand): Remove operand specifier
            %V.
            * config/s390/s390.md (UNSPEC_TF_TO_FPRX2): New.
            * config/s390/vector.md (*tf_to_fprx2_0): Remove.
            (*tf_to_fprx2_1): Remove.
            (tf_to_fprx2): New.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/s390/vector/long-double-asm-abi.c: Adapt
            scan-assembler directive.
            * gcc.target/s390/vector/long-double-to-i64.c: Adapt
            scan-assembler directive.
            * gcc.target/s390/pr115860-1.c: New test.
    
    (cherry picked from commit 46c2538435dfc50dd5c67c4e03ce387d1f6ebe9b)

Diff:
---
 gcc/config/s390/s390.cc                            |  5 +-
 gcc/config/s390/s390.md                            |  2 +
 gcc/config/s390/vector.md                          | 75 ++++++++++++----------
 gcc/testsuite/gcc.target/s390/pr115860-1.c         | 26 ++++++++
 .../gcc.target/s390/vector/long-double-asm-abi.c   |  2 +-
 .../gcc.target/s390/vector/long-double-to-i64.c    |  2 -
 6 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 2be3a873b891..a8f804ffe4f7 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -8028,7 +8028,6 @@ print_operand_address (FILE *file, rtx addr)
         CONST_VECTOR: Generate a bitmask for vgbm instruction.
     'x': print integer X as if it's an unsigned halfword.
     'v': print register number as vector register (v1 instead of f1).
-    'V': print the second word of a TFmode operand as vector register.
 */
 
 void
@@ -8221,13 +8220,13 @@ print_operand (FILE *file, rtx x, int code)
     case REG:
       /* Print FP regs as fx instead of vx when they are accessed
         through non-vector mode.  */
-      if ((code == 'v' || code == 'V')
+      if (code == 'v'
          || VECTOR_NOFP_REG_P (x)
          || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
          || (VECTOR_REG_P (x)
              && (GET_MODE_SIZE (GET_MODE (x)) /
                  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
-       fprintf (file, "%%v%s", reg_names[REGNO (x) + (code == 'V')] + 2);
+       fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
       else
        fprintf (file, "%s", reg_names[REGNO (x)]);
       break;
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 2fc799695377..335aff9884e2 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -244,6 +244,8 @@
 
    UNSPEC_VEC_ELTSWAP
 
+   UNSPEC_TF_TO_FPRX2
+
    UNSPEC_NNPA_VCLFNHS_V8HI
    UNSPEC_NNPA_VCLFNLS_V8HI
    UNSPEC_NNPA_VCRNFS_V8HI
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 75912280c23d..ac3816a6f5c6 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -900,36 +900,45 @@
   "vmrlg\t%0,%1,%2";
   [(set_attr "op_type" "VRR")])
 
-
-(define_insn "*tf_to_fprx2_0"
-  [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 0)
-       (subreg:DF (match_operand:TF    1 "general_operand"       "v") 0))]
-  "TARGET_VXE"
-  ; M4 == 1 corresponds to %v0[0] = %v1[0]; %v0[1] = %v0[1];
-  "vpdi\t%v0,%v1,%v0,1"
-  [(set_attr "op_type" "VRR")])
-
-(define_insn "*tf_to_fprx2_1"
-  [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 8)
-       (subreg:DF (match_operand:TF    1 "general_operand"       "v") 8))]
+(define_insn "tf_to_fprx2"
+  [(set (match_operand:FPRX2             0 "register_operand" "=f,f ,f")
+       (unspec:FPRX2 [(match_operand:TF 1 "general_operand"   "v,AR,AT")]
+                     UNSPEC_TF_TO_FPRX2))]
   "TARGET_VXE"
-  ; M4 == 5 corresponds to %V0[0] = %v1[1]; %V0[1] = %V0[1];
-  "vpdi\t%V0,%v1,%V0,5"
-  [(set_attr "op_type" "VRR")])
-
-(define_insn_and_split "tf_to_fprx2"
-  [(set (match_operand:FPRX2            0 "nonimmediate_operand" "=f,f")
-       (subreg:FPRX2 (match_operand:TF 1 "general_operand"       "v,AR") 0))]
-  "TARGET_VXE"
-  "#"
-  "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))"
-  [(set (match_dup 2) (match_dup 3))
-   (set (match_dup 4) (match_dup 5))]
 {
-  operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0);
-  operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0);
-  operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8);
-  operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
+  char buf[64];
+  const char *reg_pair = reg_names[REGNO (operands[0]) + 1];
+  switch (which_alternative)
+    {
+    case 0:
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+       {
+         reg_pair += 2;  // get rid of prefix %f
+         snprintf (buf, sizeof (buf), "vpdi\t%%%%v%s,%%v1,%%%%v%s,5", 
reg_pair, reg_pair);
+         output_asm_insn (buf, operands);
+         return "";
+       }
+      else
+       {
+         reg_pair += 2;  // get rid of prefix %f
+         snprintf (buf, sizeof (buf), 
"ldr\t%%f0,%%f1;vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair);
+         output_asm_insn (buf, operands);
+         return "";
+       }
+    case 1:
+      {
+       snprintf (buf, sizeof (buf), "ld\t%%f0,%%1;ld\t%%%s,8+%%1", reg_pair);
+       output_asm_insn (buf, operands);
+       return "";
+      }
+    case 2:
+      {
+       snprintf (buf, sizeof (buf), "ldy\t%%f0,%%1;ldy\t%%%s,8+%%1", reg_pair);
+       output_asm_insn (buf, operands);
+       return "";
+      }
+    default: gcc_unreachable ();
+    }
 })
 
 
@@ -2548,9 +2557,8 @@
 ; There is no instruction for rounding an extended BFP operand in a VR into
 ; a signed integer, therefore copy it into a FPR pair first.
 (define_expand "fix_trunctf<mode>2_vr"
-  [(set (subreg:DF (match_dup 2) 0)
-       (subreg:DF (match_operand:TF 1 "register_operand" "") 0))
-   (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8))
+  [(set (match_dup 2)
+       (unspec:FPRX2 [(match_operand:TF 1 "register_operand")] 
UNSPEC_TF_TO_FPRX2))
    (parallel [(set (match_operand:GPR 0 "register_operand" "")
                   (fix:GPR (match_dup 2)))
              (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND)
@@ -2582,9 +2590,8 @@
 ; There is no instruction for rounding an extended BFP operand in a VR into
 ; an unsigned integer, therefore copy it into a FPR pair first.
 (define_expand "fixuns_trunctf<mode>2_vr"
-  [(set (subreg:DF (match_dup 2) 0)
-       (subreg:DF (match_operand:TF 1 "register_operand" "") 0))
-   (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8))
+  [(set (match_dup 2)
+       (unspec:FPRX2 [(match_operand:TF 1 "register_operand")] 
UNSPEC_TF_TO_FPRX2))
    (parallel [(set (match_operand:GPR 0 "register_operand" "")
                   (unsigned_fix:GPR (match_dup 2)))
              (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND)
diff --git a/gcc/testsuite/gcc.target/s390/pr115860-1.c 
b/gcc/testsuite/gcc.target/s390/pr115860-1.c
new file mode 100644
index 000000000000..abcddeaed95d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr115860-1.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-options "-O2 -march=z14 -mzarch" } */
+
+__attribute__ ((noipa))
+long long trunctf (long double x)
+{
+  /* Ensure via ++x that x is in a register.  */
+  ++x;
+  return x;
+}
+
+__attribute__ ((noipa))
+long long trunctf_from_mem (long double x)
+{
+  return x;
+}
+
+int main (void)
+{
+  if (trunctf (0x7ffffffffffffffeLL) != 0x7fffffffffffffffLL)
+    __builtin_abort ();
+  if (trunctf_from_mem (0x7fffffffffffffffLL) != 0x7fffffffffffffffLL)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c 
b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c
index f9f2d1286e2d..ac3576801f09 100644
--- a/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c
+++ b/gcc/testsuite/gcc.target/s390/vector/long-double-asm-abi.c
@@ -14,7 +14,7 @@ xsqrt (long double x)
 
 /* Check that the generated code is very small and straightforward.  In
    particular, there must be no unnecessary copying and no stack frame.  */
-/* { dg-final { scan-assembler 
{\n\tld\t[^\n]*\n\tld\t[^\n]*\n(#[^\n]*\n)*\tsqxbr\t.*\n(#[^\n]*\n)*\tstd\t[^\n]*\n\tstd\t[^\n]*\n\tbr\t%r14\n}
 } } */
+/* { dg-final { scan-assembler 
{\n\tld\t[^\n]*;ld\t[^\n]*\n(#[^\n]*\n)*\tsqxbr\t.*\n(#[^\n]*\n)*\tstd\t[^\n]*\n\tstd\t[^\n]*\n\tbr\t%r14\n}
 } } */
 
 int
 main (void)
diff --git a/gcc/testsuite/gcc.target/s390/vector/long-double-to-i64.c 
b/gcc/testsuite/gcc.target/s390/vector/long-double-to-i64.c
index 2dbbb5d1c03e..ed47fd9b8589 100644
--- a/gcc/testsuite/gcc.target/s390/vector/long-double-to-i64.c
+++ b/gcc/testsuite/gcc.target/s390/vector/long-double-to-i64.c
@@ -10,8 +10,6 @@ long_double_to_i64 (long double x)
   return x;
 }
 
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,1\n} 1 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,5\n} 1 } } */
 /* { dg-final { scan-assembler-times {\n\tcgxbr\t} 1 } } */
 
 int

Reply via email to