https://gcc.gnu.org/g:f043ef2b6a59088b16a269b55f09023f76c92e32

commit r15-8021-gf043ef2b6a59088b16a269b55f09023f76c92e32
Author: Robin Dapp <rd...@ventanamicro.com>
Date:   Tue Feb 25 12:55:08 2025 +0100

    RISC-V: Adjust LMUL when using maximum SEW [PR117955].
    
    When merging two vsetvls that both only demand "SEW >= ..." we
    use their maximum SEW and keep the LMUL.  That may lead to invalid
    vector configurations like
      e64, mf4.
    As we make sure that the SEW requirements overlap we can use the SEW
    and LMUL of the configuration with the larger SEW.
    
    Ma Jin already touched this merge rule some weeks ago and fixed the
    ratio calculation (r15-6873).  Calculating the ratio from an invalid
    SEW/LMUL combination lead to an overflow in the ratio variable, though.
    I'd argue the proper fix is to update SEW and LMUL, keeping the ratio
    as before.  This "breaks" bug-10.c but its check only checked for a
    workaround anyway so I turned it into a run test.
    
    Ma Jin helped minify the PR's test and provided a larger test case for
    bug-10.
    
            PR target/117955
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-vsetvl.cc: Use LMUL/ratio from vsetvl with
            larger SEW.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/base/bug-10.c: Convert to run test.
            * gcc.target/riscv/rvv/base/bug-10-2.c: New test.
            * gcc.target/riscv/rvv/base/pr117955.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc                   |  8 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c | 93 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c   | 33 +++++++-
 gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c | 26 ++++++
 4 files changed, 154 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 82284624a242..f0165f7b8c8c 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1729,9 +1729,11 @@ private:
   }
   inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
   {
-    int max_sew = MAX (prev.get_sew (), next.get_sew ());
-    prev.set_sew (max_sew);
-    prev.set_ratio (calculate_ratio (prev.get_sew (), prev.get_vlmul ()));
+    bool prev_sew_larger = prev.get_sew () >= next.get_sew ();
+    const vsetvl_info from = prev_sew_larger ? prev : next;
+    prev.set_sew (from.get_sew ());
+    prev.set_vlmul (from.get_vlmul ());
+    prev.set_ratio (from.get_ratio ());
     use_min_of_max_sew (prev, next);
   }
   inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c
new file mode 100644
index 000000000000..fe3a1efb8d86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c
@@ -0,0 +1,93 @@
+/* { dg-do run { target { rv64 } } } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2" } */
+
+#include <riscv_vector.h>
+
+int8_t a[1];
+uint16_t b[1];
+float c[1], n[1];
+uint16_t d[1];
+uint8_t e[1];
+uint16_t f[1];
+_Float16 g[1], k[1], m[1], p[1];
+uint16_t i[1];
+int8_t j[1];
+uint8_t o[1];
+uint32_t l[1];
+uint16_t q[1];
+uint32_t r[1];
+uint32_t s[1];
+int16_t t[1];
+int main()
+{
+  int u = 25;
+  int8_t *v = a;
+  uint32_t *w;
+  uint16_t *aa = b;
+  float *ab = c, *as = n;
+  uint32_t *ad;
+  uint16_t *ah = f;
+  _Float16 *ai = g, *aj = k, *an = m, *au = p;
+  int32_t *ak;
+  int16_t *al;
+  uint16_t *am = i;
+  int8_t *ao = j;
+  uint8_t *ap = o;
+  uint32_t *aq = l;
+  uint16_t *ar = q;
+  uint32_t *at = r;
+  uint32_t *av = s;
+  int32_t *ax;
+  int16_t *ay = t;
+  for (size_t az; u; u -= az)
+  {
+    az = __riscv_vsetvl_e32m8(u);
+    vint8m2_t ba = __riscv_vle8_v_i8m2(v, az);
+    vbool4_t bb = __riscv_vmseq_vx_i8m2_b4(ba, 1, az);
+    vuint16m4_t bc = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 2, az);
+    vuint32m8_t bd = __riscv_vsll_vx_u32m8(__riscv_vid_v_u32m8(az), 1, az);
+    vuint32m8_t be = __riscv_vluxei16_v_u32m8_m(bb, w, bc, az);
+    vuint16m4_t bf;
+    __riscv_vsuxei16_v_u32m8_m(bb, aq, bf, be, az);
+    vuint8m2_t bg = __riscv_vsll_vx_u8m2(__riscv_vid_v_u8m2(az), 1, az);
+    vuint16m4_t bh = __riscv_vloxei8_v_u16m4(aa, bg, az);
+    vfloat16m4_t bi;
+    vuint16m4_t bj = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 1, az);
+    vint16m4_t bk = __riscv_vloxei32_v_i16m4_m(bb, al, bd, az);
+    __riscv_vsse16_v_u16m4(ar, 2, bh, az);
+    vuint16m4_t bl = __riscv_vloxei16_v_u16m4(d, bj, az);
+    vfloat16m4_t bm = __riscv_vle16_v_f16m4(ai, az);
+    vuint16m4_t bn = __riscv_vlse16_v_u16m4(ah, 2, az);
+    vint32m8_t bo = __riscv_vle32_v_i32m8_m(bb, ak, az);
+    vfloat16m1_t bp = __riscv_vle16_v_f16m1(aj, az);
+    vuint16m4_t bq = __riscv_vrgatherei16_vv_u16m4(bl, bn, az);
+    __riscv_vse16_v_u16m4(am, bq, az);
+    vfloat16m1_t br = __riscv_vfredusum_vs_f16m4_f16m1_m(bb, bm, bp, az);
+    vuint8m2_t bs;
+    vuint32m8_t bt;
+    __riscv_vse16_v_f16m1(an, br, az);
+    vfloat32m8_t bu = __riscv_vloxei8_v_f32m8(ab, bs, az);
+    __riscv_vse16_v_i16m4(ay, bk, az);
+    bi = __riscv_vfmv_s_f_f16m4(1, az);
+    __riscv_vse16_v_f16m4(au, bi, az);
+    vuint16m4_t bw = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 0, az);
+    vuint32m8_t by = __riscv_vle32_v_u32m8_m(bb, ad, az);
+    bt = __riscv_vmv_s_x_u32m8(3090659, az);
+    __riscv_vse32_v_u32m8(at, bt, az);
+    vuint8m2_t bz = __riscv_vloxei16_v_u8m2(e, bw, az);
+    __riscv_vse32_v_u32m8(av, by, az);
+    vint8m2_t cd;
+    __riscv_vse8_v_i8m2(ao, cd, az);
+    __riscv_vsse32_v_i32m8_m(bb, ax, 4, bo, az);
+    __riscv_vse32_v_f32m8(as, bu, az);
+    vuint16m4_t cf;
+    __riscv_vsoxei16_v_u32m8(aq, cf, be, az);
+    vuint8m2_t cg = __riscv_vmulhu_vx_u8m2(bz, 0, az);
+    vuint32m8_t ch = __riscv_vsll_vx_u32m8(__riscv_vid_v_u32m8(az), 0, az);
+    __riscv_vsoxei32_v_u8m2(ap, ch, cg, az);
+  }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
index af3a8610d63f..60fdfc419e6a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
@@ -1,14 +1,41 @@
-/* { dg-do compile { target { rv64 } } } */
+/* { dg-do run { target { rv64 } } } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
 /* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2 
--param=vsetvl-strategy=optim -fno-schedule-insns  -fno-schedule-insns2 
-fno-schedule-fusion " } */
 
 #include <riscv_vector.h>
 
 void
-foo (uint8_t *ptr, vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t 
vl)
+__attribute__ ((noipa))
+foo (vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t vl)
 {
   *v1 = __riscv_vfmv_s_f_f16m4 (1, vl);
   *v2 = __riscv_vmv_s_x_u32m8 (2963090659u, vl);
   *v3 = __riscv_vsll_vx_u8m2 (__riscv_vid_v_u8m2 (vl), 2, vl);
 }
 
-/* { dg-final { scan-assembler-not {vsetvli.*zero,zero} } }*/
+int
+main ()
+{
+  vfloat16m4_t v1;
+  vuint32m8_t v2;
+  vuint8m2_t v3;
+  int vl = 4;
+  foo (&v1, &v2, &v3, vl);
+
+  _Float16 val1 = ((_Float16 *)&v1)[0];
+  if (val1 - 1.0000f > 0.00001f)
+    __builtin_abort ();
+
+  uint32_t val2 = ((uint32_t *)&v2)[0];
+  if (val2 != 2963090659u)
+    __builtin_abort ();
+
+  for (int i = 0; i < vl; i++)
+    {
+      uint8_t val = ((uint8_t *)&v3)[i];
+      if (val != i << 2)
+        __builtin_abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
new file mode 100644
index 000000000000..81e3a6ed8eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3" } */
+
+#include <riscv_vector.h>
+
+_Float16 a (uint64_t);
+int8_t b () {
+  int c = 100;
+  double *d;
+  _Float16 *e;
+  for (size_t f;; c -= f)
+    {
+      f = c;
+      __riscv_vsll_vx_u8mf8 (__riscv_vid_v_u8mf8 (f), 2, f);
+      vfloat16mf4_t g;
+      a (1);
+      g = __riscv_vfmv_s_f_f16mf4 (2, f);
+      vfloat64m1_t i = __riscv_vfmv_s_f_f64m1 (30491, f);
+      vuint16mf4_t j;
+      __riscv_vsoxei16_v_f16mf4 (e, j, g, f);
+      vuint8mf8_t k = __riscv_vsll_vx_u8mf8 (__riscv_vid_v_u8mf8 (f), 3, f);
+      __riscv_vsoxei8_v_f64m1 (d, k, i, f);
+    }
+}
+
+/* { dg-final { scan-assembler-not "e64,mf4" } } */

Reply via email to