[PATCH v2] RISC-V: Adjust LMUL when using maximum SEW [PR117955].

Robin Dapp Wed, 05 Mar 2025 06:40:09 -0800

Hi,

when merging two vsetvls that both only demand "SEW >= ..." we
use their maximum SEW and keep the LMUL.  That may lead to invalid
vector configurations like
 e64, mf4.
As we make sure that the SEW requirements overlap we can use the SEW
and LMUL of the configuration with the larger SEW.


Ma Jin already touched this merge rule some weeks ago and fixed the
ratio calculation (r15-6873).  Calculating the ratio from an invalid
SEW/LMUL combination lead to an overflow in the ratio variable, though.
I'd argue the proper fix is to update SEW and LMUL, keeping the ratio
as before.  This "breaks" bug-10.c but its check only checked for a
workaround anyway so I turned it into a run test.

For V2, Ma Jin helped minify the PR's test and provided a larger test
case for bug-10.

Regtested on rv64gcv_zvl512b.

Regards
Robin

        PR target/117955

gcc/ChangeLog:

        * config/riscv/riscv-vsetvl.cc: Use LMUL/ratio from vsetvl with
        larger SEW.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/base/bug-10.c: Convert to run test.
        * gcc.target/riscv/rvv/base/bug-10-2.c: New test.
        * gcc.target/riscv/rvv/base/pr117955.c: New test.
---
gcc/config/riscv/riscv-vsetvl.cc              |  8 +-
.../gcc.target/riscv/rvv/base/bug-10-2.c      | 92 +++++++++++++++++++
.../gcc.target/riscv/rvv/base/bug-10.c        | 32 ++++++-
.../gcc.target/riscv/rvv/base/pr117955.c      | 26 ++++++
4 files changed, 152 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 82284624a24..f0165f7b8c8 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1729,9 +1729,11 @@ private:
  }
  inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
  {
-    int max_sew = MAX (prev.get_sew (), next.get_sew ());
-    prev.set_sew (max_sew);
-    prev.set_ratio (calculate_ratio (prev.get_sew (), prev.get_vlmul ()));
+    bool prev_sew_larger = prev.get_sew () >= next.get_sew ();
+    const vsetvl_info from = prev_sew_larger ? prev : next;
+    prev.set_sew (from.get_sew ());
+    prev.set_vlmul (from.get_vlmul ());
+    prev.set_ratio (from.get_ratio ());
    use_min_of_max_sew (prev, next);
  }
  inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c
new file mode 100644
index 00000000000..a7d3b3ccf66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c
@@ -0,0 +1,92 @@
+/* { dg-do run { target { rv64 } } } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-require-effective-target riscv_v } */
+/* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2" } */
+
+#include <riscv_vector.h>
+
+int8_t a[1];
+uint16_t b[1];
+float c[1], n[1];
+uint16_t d[1];
+uint8_t e[1];
+uint16_t f[1];
+_Float16 g[1], k[1], m[1], p[1];
+uint16_t i[1];
+int8_t j[1];
+uint8_t o[1];
+uint32_t l[1];
+uint16_t q[1];
+uint32_t r[1];
+uint32_t s[1];
+int16_t t[1];
+int main()
+{
+  int u = 25;
+  int8_t *v = a;
+  uint32_t *w;
+  uint16_t *aa = b;
+  float *ab = c, *as = n;
+  uint32_t *ad;
+  uint16_t *ah = f;
+  _Float16 *ai = g, *aj = k, *an = m, *au = p;
+  int32_t *ak;
+  int16_t *al;
+  uint16_t *am = i;
+  int8_t *ao = j;
+  uint8_t *ap = o;
+  uint32_t *aq = l;
+  uint16_t *ar = q;
+  uint32_t *at = r;
+  uint32_t *av = s;
+  int32_t *ax;
+  int16_t *ay = t;
+  for (size_t az; u; u -= az)
+  {
+    az = __riscv_vsetvl_e32m8(u);
+    vint8m2_t ba = __riscv_vle8_v_i8m2(v, az);
+    vbool4_t bb = __riscv_vmseq_vx_i8m2_b4(ba, 1, az);
+    vuint16m4_t bc = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 2, az);
+    vuint32m8_t bd = __riscv_vsll_vx_u32m8(__riscv_vid_v_u32m8(az), 1, az);
+    vuint32m8_t be = __riscv_vluxei16_v_u32m8_m(bb, w, bc, az);
+    vuint16m4_t bf;
+    __riscv_vsuxei16_v_u32m8_m(bb, aq, bf, be, az);
+    vuint8m2_t bg = __riscv_vsll_vx_u8m2(__riscv_vid_v_u8m2(az), 1, az);
+    vuint16m4_t bh = __riscv_vloxei8_v_u16m4(aa, bg, az);
+    vfloat16m4_t bi;
+    vuint16m4_t bj = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 1, az);
+    vint16m4_t bk = __riscv_vloxei32_v_i16m4_m(bb, al, bd, az);
+    __riscv_vsse16_v_u16m4(ar, 2, bh, az);
+    vuint16m4_t bl = __riscv_vloxei16_v_u16m4(d, bj, az);
+    vfloat16m4_t bm = __riscv_vle16_v_f16m4(ai, az);
+    vuint16m4_t bn = __riscv_vlse16_v_u16m4(ah, 2, az);
+    vint32m8_t bo = __riscv_vle32_v_i32m8_m(bb, ak, az);
+    vfloat16m1_t bp = __riscv_vle16_v_f16m1(aj, az);
+    vuint16m4_t bq = __riscv_vrgatherei16_vv_u16m4(bl, bn, az);
+    __riscv_vse16_v_u16m4(am, bq, az);
+    vfloat16m1_t br = __riscv_vfredusum_vs_f16m4_f16m1_m(bb, bm, bp, az);
+    vuint8m2_t bs;
+    vuint32m8_t bt;
+    __riscv_vse16_v_f16m1(an, br, az);
+    vfloat32m8_t bu = __riscv_vloxei8_v_f32m8(ab, bs, az);
+    __riscv_vse16_v_i16m4(ay, bk, az);
+    bi = __riscv_vfmv_s_f_f16m4(1, az);
+    __riscv_vse16_v_f16m4(au, bi, az);
+    vuint16m4_t bw = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 0, az);
+    vuint32m8_t by = __riscv_vle32_v_u32m8_m(bb, ad, az);
+    bt = __riscv_vmv_s_x_u32m8(3090659, az);
+    __riscv_vse32_v_u32m8(at, bt, az);
+    vuint8m2_t bz = __riscv_vloxei16_v_u8m2(e, bw, az);
+    __riscv_vse32_v_u32m8(av, by, az);
+    vint8m2_t cd;
+    __riscv_vse8_v_i8m2(ao, cd, az);
+    __riscv_vsse32_v_i32m8_m(bb, ax, 4, bo, az);
+    __riscv_vse32_v_f32m8(as, bu, az);
+    vuint16m4_t cf;
+    __riscv_vsoxei16_v_u32m8(aq, cf, be, az);
+    vuint8m2_t cg = __riscv_vmulhu_vx_u8m2(bz, 0, az);
+    vuint32m8_t ch = __riscv_vsll_vx_u32m8(__riscv_vid_v_u32m8(az), 0, az);
+    __riscv_vsoxei32_v_u8m2(ap, ch, cg, az);
+  }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
index af3a8610d63..5f7490e8a3b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
@@ -1,14 +1,40 @@
-/* { dg-do compile { target { rv64 } } } */
+/* { dg-do run { target { rv64 } } } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-require-effective-target riscv_v } */
/* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2 --param=vsetvl-strategy=optim 
-fno-schedule-insns  -fno-schedule-insns2 -fno-schedule-fusion " } */

#include <riscv_vector.h>

void
-foo (uint8_t *ptr, vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t 
vl)
+__attribute__ ((noipa))
+foo (vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t vl)
{
  *v1 = __riscv_vfmv_s_f_f16m4 (1, vl);
  *v2 = __riscv_vmv_s_x_u32m8 (2963090659u, vl);
  *v3 = __riscv_vsll_vx_u8m2 (__riscv_vid_v_u8m2 (vl), 2, vl);
}

-/* { dg-final { scan-assembler-not {vsetvli.*zero,zero} } }*/
+int
+main ()
+{
+  vfloat16m4_t v1;
+  vuint32m8_t v2;
+  vuint8m2_t v3;
+  int vl = 4;
+  foo (&v1, &v2, &v3, vl);
+
+  _Float16 val1 = ((_Float16 *)&v1)[0];
+  if (val1 - 1.0000f > 0.00001f)
+    __builtin_abort ();
+
+  uint32_t val2 = ((uint32_t *)&v2)[0];
+  if (val2 != 2963090659u)
+    __builtin_abort ();
+
+  for (int i = 0; i < vl; i++)
+    {
+      uint8_t val = ((uint8_t *)&v3)[i];
+      if (val != i << 2)
+        __builtin_abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
new file mode 100644
index 00000000000..81e3a6ed8eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3" } */
+
+#include <riscv_vector.h>
+
+_Float16 a (uint64_t);
+int8_t b () {
+  int c = 100;
+  double *d;
+  _Float16 *e;
+  for (size_t f;; c -= f)
+    {
+      f = c;
+      __riscv_vsll_vx_u8mf8 (__riscv_vid_v_u8mf8 (f), 2, f);
+      vfloat16mf4_t g;
+      a (1);
+      g = __riscv_vfmv_s_f_f16mf4 (2, f);
+      vfloat64m1_t i = __riscv_vfmv_s_f_f64m1 (30491, f);
+      vuint16mf4_t j;
+      __riscv_vsoxei16_v_f16mf4 (e, j, g, f);
+      vuint8mf8_t k = __riscv_vsll_vx_u8mf8 (__riscv_vid_v_u8mf8 (f), 3, f);
+      __riscv_vsoxei8_v_f64m1 (d, k, i, f);
+    }
+}
+
+/* { dg-final { scan-assembler-not "e64,mf4" } } */
--
2.48.1

[PATCH v2] RISC-V: Adjust LMUL when using maximum SEW [PR117955].

Reply via email to