[gcc r15-7290] s390: Fix up *vec_cmpgt{, u}_nocc_emu splitters [PR118696]

Jakub Jelinek via Gcc-cvs Thu, 30 Jan 2025 09:31:54 -0800

https://gcc.gnu.org/g:6a6df260c7cdbf8f40c1245a3c930293a20bf8c0


commit r15-7290-g6a6df260c7cdbf8f40c1245a3c930293a20bf8c0
Author: Jakub Jelinek <ja...@redhat.com>
Date:   Thu Jan 30 18:30:10 2025 +0100

    s390: Fix up *vec_cmpgt{,u}<mode><mode>_nocc_emu splitters [PR118696]
    
    The following testcase is miscompiled on s390x-linux with e.g. -march=z13
    (both -O0 and -O2) starting with r15-7053.
    The problem is in the splitters which emulate TImode/V1TImode GT and GTU
    comparisons.
    For GT we want to do
    (ior (gt (hi op1) (hi op2))
         (and (eq (hi op1) (hi op2)) (gtu (lo op1) (lo op2))))
    and for GTU similarly except for gtu instead of gt in there.
    Now, the splitter emulation is using V2DImode comparisons where on s390x
    the hi part is in the first element of the vector, lo part in the second,
    and for the gtu case it swaps the elements of the vector.
    So, we get the right result in the first element of the result vector.
    But vrepg was then broadcasting the second element of the result vector
    rather than the first, and the value of the second element of the vector
    is instead
    (ior (gt (lo op1) (lo op2))
         (and (eq (lo op1) (lo op2)) (gtu (hi op1) (hi op2))))
    so something not really usable for the emulated comparison.
    
    The following patch fixes that.  The testcase tries to test behavior of
    double-word smin/smax/umin/umax with various cases of the halves of both
    operands (one that is sometimes EQ, sometimes GT, sometimes LT, sometimes
    GTU, sometimes LTU).
    
    2025-01-30  Jakub Jelinek  <ja...@redhat.com>
                Stefan Schulze Frielinghaus  <stefa...@gcc.gnu.org>
    
            PR target/118696
            * config/s390/vector.md (*vec_cmpgt<mode><mode>_nocc_emu,
            *vec_cmpgtu<mode><mode>_nocc_emu): Duplicate the first rather than
            second V2DImode element.
    
            * gcc.dg/pr118696.c: New test.
            * gcc.target/s390/vector/pr118696.c: New test.
            * gcc.target/s390/vector/vec-abs-emu.c: Expect vrepg with 0 as last
            operand rather than 1.
            * gcc.target/s390/vector/vec-max-emu.c: Likewise.
            * gcc.target/s390/vector/vec-min-emu.c: Likewise.

Diff:
---
 gcc/config/s390/vector.md                          |   4 +-
 gcc/testsuite/gcc.dg/pr118696.c                    | 131 +++++++++++++++++++++
 gcc/testsuite/gcc.target/s390/vector/pr118696.c    |   5 +
 gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c |   2 +-
 gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c |   2 +-
 gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c |   2 +-
 6 files changed, 141 insertions(+), 5 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 547e6a282816..e29255fe1116 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -2166,7 +2166,7 @@
        (vec_duplicate:V2DI
         (vec_select:DI
          (match_dup 4)
-         (parallel [(const_int 1)]))))
+         (parallel [(const_int 0)]))))
    (set (match_dup 0)
        (subreg:<MODE> (match_dup 4) 0))]
 {
@@ -2198,7 +2198,7 @@
        (vec_duplicate:V2DI
         (vec_select:DI
          (match_dup 4)
-         (parallel [(const_int 1)]))))
+         (parallel [(const_int 0)]))))
    (set (match_dup 0)
        (subreg:<MODE> (match_dup 4) 0))]
 {
diff --git a/gcc/testsuite/gcc.dg/pr118696.c b/gcc/testsuite/gcc.dg/pr118696.c
new file mode 100644
index 000000000000..092af6f94173
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr118696.c
@@ -0,0 +1,131 @@
+/* PR target/118696 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#if __CHAR_BIT__ == 8
+#if __SIZEOF_INT128__ == 16 && __SIZEOF_LONG_LONG__ == 8
+#define D __int128
+#define S long long
+#define M 0x8000000000000000ULL
+#define C 64
+#elif __SIZEOF_LONG_LONG__ == 8 && __SIZEOF_INT__ == 4
+#define D long long
+#define S int
+#define M 0x80000000U
+#define C 32
+#endif
+#endif
+
+#ifdef D
+static inline D
+combine (unsigned S x, unsigned S y)
+{
+  return (unsigned D) x << C | y;
+}
+
+__attribute__((noipa)) D
+smin (D x, D y)
+{
+  return x < y ? x : y;
+}
+
+__attribute__((noipa)) D
+smax (D x, D y)
+{
+  return x > y ? x : y;
+}
+
+__attribute__((noipa)) unsigned D
+umin (unsigned D x, unsigned D y)
+{
+  return x < y ? x : y;
+}
+
+__attribute__((noipa)) unsigned D
+umax (unsigned D x, unsigned D y)
+{
+  return x > y ? x : y;
+}
+#endif
+
+int
+main ()
+{
+#ifdef D
+  unsigned S vals[] = {
+    0, 12, 42, M, M | 12, M | 42
+  };
+  unsigned char expected[] = {
+    4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,
+    3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,4,3,3,3,3,3,3,
+    3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,4,3,
+    3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
+    0,0,0,4,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,
+    3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,4,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,4,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,4,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,4,3,3,3,3,3,
+    3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,4,
+    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,4,3,3,3,3,3,3,3,3,3,3,3,3,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,4,3,3,3,3,3,3,3,
+    3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,4,3,3,
+    3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+    0,0,4,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+    0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,3,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,3,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,3,3,3,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    4,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,4,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,4,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
+  };
+  int m = 0;
+  for (int i = 0; i < 6; ++i)
+    for (int j = 0; j < 6; ++j)
+      for (int k = 0; k < 6; ++k)
+       for (int l = 0; l < 6; ++l)
+         {
+           D a = combine (vals[i], vals[j]);
+           D b = combine (vals[k], vals[l]);
+           int r = 0;
+           r |= (smin (a, b) == a) * 1;
+           r |= (smin (a, b) == b) * 2;
+           r |= (smax (a, b) == a) * 4;
+           r |= (smax (a, b) == b) * 8;
+           r |= (umin (a, b) == a) * 16;
+           r |= (umin (a, b) == b) * 32;
+           r |= (umax (a, b) == a) * 64;
+           r |= (umax (a, b) == b) * 128;
+           switch (r)
+             {
+             case 102: r = 0; break;
+             case 105: r = 1; break;
+             case 150: r = 2; break;
+             case 153: r = 3; break;
+             case 255: r = 4; break;
+             default: __builtin_abort ();
+             }
+           if (r != expected[m++])
+             __builtin_abort ();
+         }
+#endif
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/pr118696.c 
b/gcc/testsuite/gcc.target/s390/vector/pr118696.c
new file mode 100644
index 000000000000..1f67bf5c47fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pr118696.c
@@ -0,0 +1,5 @@
+/* PR target/118696 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mzarch -march=z13" } */
+
+#include "../../../gcc.dg/pr118696.c"
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c
index 8c1038a13d75..e0dd222e62c7 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-abs-emu.c
@@ -18,7 +18,7 @@ typedef __attribute__ ((vector_size (16))) signed __int128 
v1ti;
 **     vchg    %v[0-9]+,%v[0-9]+,%v[0-9]+
 **     vn      %v[0-9]+,%v[0-9]+,%v[0-9]+
 **     vo      %v[0-9]+,%v[0-9]+,%v[0-9]+
-**     vrepg   %v[0-9]+,%v[0-9]+,1
+**     vrepg   %v[0-9]+,%v[0-9]+,0
 **     vsq     %v[0-9]+,%v[0-9]+,%v[0-9]+
 **     vsel    %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+
 **     br      %r14
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c
index 1c4451b19c1d..12c7e76edc19 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-max-emu.c
@@ -17,7 +17,7 @@ typedef __attribute__ ((vector_size (16))) signed __int128 
v1ti;
 **     vchg    %v[0-9]+,%v[0-9]+,%v[0-9]+
 **     vn      %v[0-9]+,%v[0-9]+,%v[0-9]+
 **     vo      %v[0-9]+,%v[0-9]+,%v[0-9]+
-**     vrepg   %v[0-9]+,%v[0-9]+,1
+**     vrepg   %v[0-9]+,%v[0-9]+,0
 **     vsel    %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+
 **     br      %r14
 */
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c
index 89f4dff542d2..a9bcba39e224 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-min-emu.c
@@ -17,7 +17,7 @@ typedef __attribute__ ((vector_size (16))) signed __int128 
v1ti;
 **     vchg    %v[0-9]+,%v[0-9]+,%v[0-9]+
 **     vn      %v[0-9]+,%v[0-9]+,%v[0-9]+
 **     vo      %v[0-9]+,%v[0-9]+,%v[0-9]+
-**     vrepg   %v[0-9]+,%v[0-9]+,1
+**     vrepg   %v[0-9]+,%v[0-9]+,0
 **     vsel    %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+
 **     br      %r14
 */

[gcc r15-7290] s390: Fix up *vec_cmpgt{, u}_nocc_emu splitters [PR118696]

Reply via email to