https://gcc.gnu.org/g:6b730eebd44aae4dbdd3699b8c864e0bc7e78188

commit 6b730eebd44aae4dbdd3699b8c864e0bc7e78188
Author: Jeff Law <[email protected]>
Date:   Fri Oct 3 08:41:53 2025 -0600

    [RISC-V][PR rtl-optimization/121937] Don't call neg_poly_int_rtx with a 
vector mode
    
    Fun little bug.
    
    We're asked to simplify this:
    
    (vec_select:HI (if_then_else:V2HI (unspec:V2BI [
                    (const_vector:V2BI [
                            (const_int 0 [0])
                            (const_int 1 [0x1])
                        ])
                    (const_int 2 [0x2])
                    (const_int 0 [0]) repeated x3
                    (reg:SI 66 vl)
                    (reg:SI 67 vtype)
                ] UNSPEC_VPREDICATE)
            (const_vector:V2HI [
                    (const_int 0 [0])
                    (const_int -1 [0xffffffffffffffff])
                ])
            (const_vector:V2HI [
                    (const_int -1 [0xffffffffffffffff])
                    (const_int 0 [0])
                ]))
        (parallel [
                (const_int 0 [0])
            ]))
    
    That triggers some fairly obscure code in combine which realizes the arms 
are
    STORE_FLAG_VALUE computabble.  So we ask for a simplified conditional of the
    condition against (const_int 0):
    
    3610      return simplify_context ().simplify_gen_relational (code, mode, 
op_mode,
    (gdb) p debug_rtx (op0)
    (unspec:V2BI [
            (const_vector:V2BI [
                    (const_int 0 [0])
                    (const_int 1 [0x1])
                ])
            (const_int 2 [0x2])
            (const_int 0 [0]) repeated x3
            (reg:SI 66 vl)
            (reg:SI 67 vtype)
        ] UNSPEC_VPREDICATE)
    $50 = void
    (gdb) p debug_rtx (op1)
    (const_int 0 [0])
    
    CODE will be EQ.  So that eventually we'll try that as a simplification 
using
    MINUS with those two operands.
    
    That ultimately lands us in simplify_binary_operation_1 which (of course) 
tries
    to simplify x - 0 to x.  But that fails because we test (const_int 0) 
against
    CONST0_RTX (V2BI) which, of course, false.
    
    We then stumble down into this code:
    
          /* Don't let a relocatable value get a negative coeff.  */
          if (poly_int_rtx_p (op1) && GET_MODE (op0) != VOIDmode)
            return simplify_gen_binary (PLUS, mode,
                                        op0,
                                        neg_poly_int_rtx (mode, op1));
    
    Where MODE is V2BI.  That's not a scalar mode and we try to get the 
precision
    of V2BI in the bowels of neg_poly_int_rtx, which looks like:
    
            return GET_MODE_PRECISION (as_a <scalar_mode> (x.second));
    
    Where x.second is the mode, V2BI.  Since V2BI is not a scalar mode it blows 
up
    as seen in the BZ.
    
    The immediate and direct fix is to guard that code with a check that we've 
got
    a scalar mode.
    
    I looked at passing a more suitable zero node as well as improving the 
checks
    to simplify x - 0 -> x for this case.  While the RTL does simplify in the
    expected ways, nothing really comes out of the RTL simplification (ie, the
    final assembly code is the same).  So I decided against including those 
hacks
    (they really didn't feel all that clean to me).  There's just not a 
compelling
    reason for them.
    
    Anyway, bootstrapped and regression tested on x86_64.  Verified it fixes the
    riscv fault and doesn't regress riscv64-elf and riscv32-elf. Bootstrap on 
riscv
    native targets will fire up overnight.
    
            PR rtl-optimization/121937
    
    gcc/
            * simplify-rtx.cc (simplify_context::simplify_binary_operation_1): 
Make
            sure we've got a scalar_int_mode before calling neg_poly_int_rtx.
    
    gcc/testsuite/
            * gcc.target/riscv/pr121937.c: New test.
    
    (cherry picked from commit f864e4b54a13420f37dc3710aeb9f8a6f9e63b9c)

Diff:
---
 gcc/simplify-rtx.cc                       |  4 +-
 gcc/testsuite/gcc.target/riscv/pr121937.c | 66 +++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index b5562bf6e683..166e44b30fc9 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3412,7 +3412,9 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
        return plus_constant (mode, op0, trunc_int_for_mode (-offset, mode));
 
       /* Don't let a relocatable value get a negative coeff.  */
-      if (poly_int_rtx_p (op1) && GET_MODE (op0) != VOIDmode)
+      if (is_a <scalar_int_mode> (mode)
+         && poly_int_rtx_p (op1)
+         && GET_MODE (op0) != VOIDmode)
        return simplify_gen_binary (PLUS, mode,
                                    op0,
                                    neg_poly_int_rtx (mode, op1));
diff --git a/gcc/testsuite/gcc.target/riscv/pr121937.c 
b/gcc/testsuite/gcc.target/riscv/pr121937.c
new file mode 100644
index 000000000000..3c0389c09346
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr121937.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w -march=rv64gcv -mabi=lp64d" { target rv64 } } */
+/* { dg-additional-options "-w -march=rv32gcv -mabi=ilp32" { target rv32 } } */
+
+#include <stdint-gcc.h>
+#define BS_VEC(type, num) type __attribute__((vector_size(num * sizeof(type))))
+typedef int16_t int16;
+typedef uint16_t uint16;
+typedef int32_t int32;
+typedef uint64_t uint64;
+int32_t g_69, g_539;
+int32_t *g_68;
+void func_59(int32_t p_60) {
+  BS_VEC(uint64, 2) BS_VAR_4;
+  BS_VEC(int16, 8) BS_VAR_6;
+  uint64 *LOCAL_CHECKSUM;
+  int32_t *l_108 = &g_69;
+  int64_t l_829 = 10;
+  int32_t l_844 = -1;
+  for (; g_69;) {
+    int32_t l_924;
+    if (p_60 * 2u) {
+    BS_LABEL_0:
+      *LOCAL_CHECKSUM ^= BS_VAR_4[3];
+      for (l_924 = 3; l_924; l_924 -= 1) {
+        BS_VEC(uint64, 8)
+        BS_TEMP_600 = -__builtin_convertvector(BS_VAR_6, BS_VEC(uint64, 8));
+        BS_VEC(uint64, 8)
+        BS_TEMP_601 = __builtin_convertvector((BS_VEC(int32, 8)){p_60},
+                                              BS_VEC(uint64, 8));
+        BS_VAR_4[356358257141730375] =
+            __builtin_convertvector(
+                __builtin_shufflevector((BS_VEC(uint16, 2))0,
+                                        (BS_VEC(uint16, 2))0, 1, 3, 0, 1, 2, 0,
+                                        0, 2, 0, 0, 1, 2, 3, 3, 3, 2),
+                BS_VEC(uint64, 16))[BS_VAR_6[4]] >
+            (BS_VEC(uint64, 8)){0, BS_TEMP_600[1] ? BS_TEMP_601[1]
+                                                  : 0}[l_829 != 0];
+      }
+    }
+    if (*l_108)
+      *g_68 |= g_539;
+    __asm goto("" : : : : BS_LABEL_0);
+    BS_VEC(int16, 4)
+    BS_TEMP_681 = __builtin_shufflevector(
+        (BS_VEC(int16, 2))__builtin_shufflevector(
+            __builtin_convertvector(
+                __builtin_shufflevector(BS_VAR_6, BS_VAR_6, 8, 6, 5, 8, 1, 3, 
6,
+                                        2, 0, 1, 2, 5, 8, 6, 5, 1, 5, 0, 3, 5,
+                                        8, 2, 2, 4, 6, 0, 6, 4, 3, 3, 1, 2),
+                BS_VEC(uint16, 32)),
+            __builtin_convertvector((BS_VEC(int32, 32)){}, BS_VEC(uint16, 32)),
+            42, 52) -
+            __builtin_convertvector((BS_VEC(int32, 2)){l_844},
+                                    BS_VEC(uint16, 2)) *
+                ~0,
+        ~(0 < __builtin_shufflevector(
+                  __builtin_convertvector((BS_VEC(int32, 16)){p_60},
+                                          BS_VEC(uint16, 16)),
+                  (BS_VEC(uint16, 16)){20489, 3, 2, 4}, 19, 6)),
+        1, 2, 0, 3);
+    BS_VAR_6[0] =
+        BS_TEMP_681[0] ^ BS_TEMP_681[1] ^ BS_TEMP_681[2] ^ BS_TEMP_681[3];
+  }
+}
+

Reply via email to