https://gcc.gnu.org/g:e7287cbbb208b676991096dd9081ff8a61c49781

commit r15-7743-ge7287cbbb208b676991096dd9081ff8a61c49781
Author: Pan Li <pan2...@intel.com>
Date:   Sat Feb 22 19:34:52 2025 +0800

    RISC-V: Fix bug for expand_const_vector interleave [PR118931]
    
    This patch would like to fix one bug when expanding const vector for the
    interleave case.  For example, we have:
    
    base1 = 151
    step = 121
    
    For vec_series, we will generate vector in format of v[i] = base + i * step.
    Then the vec_series will have below result for HImode, and we can find
    that the result overflow to the highest 8 bits of HImode.
    
    v1.b = {151, 255, 7,  0, 119,  0, 231,  0, 87,  1, 199,  1, 55,   2, 167,   
2}
    
    Aka we expect v1.b should be:
    
    v1.b = {151, 0, 7,  0, 119,  0, 231,  0, 87,  0, 199,  0, 55,   0, 167,   0}
    
    After that it will perform the IOR with v2 for the base2(aka another 
series).
    
    v2.b =  {0,  17, 0, 33,   0, 49,   0, 65,  0, 81,   0, 97,  0, 113,   0, 
129}
    
    Unfortunately, the base1 + i * step1 in HImode may overflow to the high
    8 bits, and the high 8 bits will pollute the v2 and result in incorrect
    value in const_vector.
    
    This patch would like to perform the overflow to smode check before the
    optimized interleave code generation.  If overflow or VLA, it will fall
    back to the default merge approach.
    
    The below test suites are passed for this patch.
    * The rv64gcv fully regression test.
    
            PR target/118931
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-v.cc (expand_const_vector): Add overflow to
            smode check and clean up highest bits if overflow.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/base/pr118931-run-1.c: New test.
    
    Signed-off-by: Pan Li <pan2...@intel.com>

Diff:
---
 gcc/config/riscv/riscv-v.cc                        | 36 +++++++++++++++++-----
 .../gcc.target/riscv/rvv/base/pr118931-run-1.c     | 19 ++++++++++++
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 7cc15f3d53c1..287eb3e54cf7 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1489,22 +1489,44 @@ expand_const_vector (rtx target, rtx src)
 
                  EEW = 32, { 2, 4, ... }.
 
-            This only works as long as the larger type does not overflow
-            as we can't guarantee a zero value for each second element
-            of the sequence with smaller EEW.
-            ??? For now we assume that no overflow happens with positive
-            steps and forbid negative steps altogether.  */
+            Both the series1 and series2 may overflow before taking the IOR
+            to generate the final result.  However, only series1 matters
+            because the series2 will shift before IOR, thus the overflow
+            bits will never pollute the final result.
+
+            For now we forbid the negative steps and overflow, and they
+            will fall back to the default merge way to generate the
+            const_vector.  */
+
          unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
          scalar_int_mode new_smode;
          machine_mode new_mode;
          poly_uint64 new_nunits
            = exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
+
+         poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+         bool overflow_smode_p = false;
+
+         if (!step1.is_constant ())
+           overflow_smode_p = true;
+         else
+           {
+             int elem_count = XVECLEN (src, 0);
+             uint64_t step1_val = step1.to_constant ();
+             uint64_t base1_val = base1_poly.to_constant ();
+             uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+
+             if ((elem_val >> builder.inner_bits_size ()) != 0)
+               overflow_smode_p = true;
+           }
+
          if (known_ge (step1, 0) && known_ge (step2, 0)
              && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
-             && get_vector_mode (new_smode, new_nunits).exists (&new_mode))
+             && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+             && !overflow_smode_p)
            {
              rtx tmp1 = gen_reg_rtx (new_mode);
-             base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode);
+             base1 = gen_int_mode (base1_poly, new_smode);
              expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
 
              if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
new file mode 100644
index 000000000000..84c63b5040e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fwhole-program 
-mrvv-vector-bits=zvl" } */
+
+long long m;
+char f = 151;
+char h = 103;
+unsigned char a = 109;
+
+int main() {
+  for (char l = 0; l < 255 - 241; l += h - 102)
+    a *= f;
+
+  m = a;
+
+  if (m != 29)
+    __builtin_abort ();
+
+  return 0;
+}

Reply via email to