As I remembered, I have ever trided to use larger LMUL in VLS mode than VLA 
modes. 
But it may cause ICE. I don't remember what scenario now... That's why 
originally I use MAX_LMUL to control both VLA modes and VLS modes.



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2025-08-14 15:17
To: gcc-patches; kito.cheng; palmer; jeffreyalaw; rdapp; juzhe.zhong; pan2.li; 
vineetg; patrick
CC: Kito Cheng
Subject: [PATCH] RISC-V: Allow VLS types using up to LMUL 8
We used to apply -mrvv-max-lmul= to limit VLS code gen, auto vectorizer,
and builtin string function expansion. But I think the VLS code gen part doesn't
need this limit, since it only happens when the user explicitly writes vector
types.
 
For example, int32x8_t under -mrvv-max-lmul=m1 with VLEN=128 would be split into
two int32x4_t, which generate more instructions and runs slower.
 
In this patch, I changed -mrvv-max-lmul= to only affect auto vectorization and
builtin string function expansion. Actually, the option's help text already
says it only controls the LMUL used by auto-vectorization, so I believe this
change is makes sense :)
 
gcc/ChangeLog:
 
* config/riscv/riscv-protos.h (vls_mode_valid_p): New argument
allow_up_to_lmul_8.
* config/riscv/riscv-v.cc (autovectorize_vector_modes): Set
allow_up_to_lmul_8 to false.
(vls_mode_valid_p): Add new argument allow_up_to_lmul_8, and use
it to determine whether to allow LMUL 8.
 
gcc/testsuite/ChangeLog:
 
* gcc.target.riscv/rvv/vls-type-rvv-max-lmul.c: New test.
---
gcc/config/riscv/riscv-protos.h               |  2 +-
gcc/config/riscv/riscv-v.cc                   | 31 ++++++++++---------
.../riscv/rvv/vls-type-rvv-max-lmul.c         | 12 +++++++
3 files changed, 29 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls-type-rvv-max-lmul.c
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 539321ff95b..045ee09b23f 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -763,7 +763,7 @@ opt_machine_mode vectorize_related_mode (machine_mode, 
scalar_mode,
unsigned int autovectorize_vector_modes (vec<machine_mode> *, bool);
bool cmp_lmul_le_one (machine_mode);
bool cmp_lmul_gt_one (machine_mode);
-bool vls_mode_valid_p (machine_mode);
+bool vls_mode_valid_p (machine_mode, bool allow_up_to_lmul_8 = true);
bool vlmax_avl_type_p (rtx_insn *);
bool has_vl_op (rtx_insn *);
bool tail_agnostic_p (rtx_insn *);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c9c83282732..3484f6442e7 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2910,7 +2910,7 @@ autovectorize_vector_modes (vector_modes *modes, bool)
     machine_mode mode;
     while (size > 0 && get_vector_mode (QImode, size).exists (&mode))
      {
- if (vls_mode_valid_p (mode))
+ if (vls_mode_valid_p (mode, /* allow_up_to_lmul_8 */ false))
  modes->safe_push (mode);
i++;
@@ -5027,26 +5027,27 @@ cmp_lmul_gt_one (machine_mode mode)
    Then we can have the condition for VLS mode in fixed-vlmax, aka:
      PRECISION (VLSmode) < VLEN / (64 / PRECISION(VLS_inner_mode)).  */
bool
-vls_mode_valid_p (machine_mode vls_mode)
+vls_mode_valid_p (machine_mode vls_mode, bool allow_up_to_lmul_8)
{
   if (!TARGET_VECTOR || TARGET_XTHEADVECTOR)
     return false;
   if (rvv_vector_bits == RVV_VECTOR_BITS_SCALABLE)
     {
-      if (GET_MODE_CLASS (vls_mode) != MODE_VECTOR_BOOL
-   && !ordered_p (TARGET_MAX_LMUL * BITS_PER_RISCV_VECTOR,
- GET_MODE_PRECISION (vls_mode)))
- /* We enable VLS modes which are aligned with TARGET_MAX_LMUL and
-    BITS_PER_RISCV_VECTOR.
-
-    e.g. When TARGET_MAX_LMUL = 1 and BITS_PER_RISCV_VECTOR = (128,128).
-    We enable VLS modes have fixed size <= 128bit.  Since ordered_p is
-    false between VLA modes with size = (128, 128) bits and VLS mode
-    with size = 128 bits, we will end up with multiple ICEs in
-    middle-end generic codes.  */
- return false;
-      return true;
+      if (GET_MODE_CLASS (vls_mode) != MODE_VECTOR_BOOL)
+ return true;
+      if (allow_up_to_lmul_8)
+ return true;
+      /* We enable VLS modes which are aligned with TARGET_MAX_LMUL and
+ BITS_PER_RISCV_VECTOR.
+
+ e.g. When TARGET_MAX_LMUL = 1 and BITS_PER_RISCV_VECTOR = (128,128).
+ We enable VLS modes have fixed size <= 128bit.  Since ordered_p is
+ false between VLA modes with size = (128, 128) bits and VLS mode
+ with size = 128 bits, we will end up with multiple ICEs in
+ middle-end generic codes.  */
+      return !ordered_p (TARGET_MAX_LMUL * BITS_PER_RISCV_VECTOR,
+ GET_MODE_PRECISION (vls_mode));
     }
   if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls-type-rvv-max-lmul.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vls-type-rvv-max-lmul.c
new file mode 100644
index 00000000000..5d52f7798d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls-type-rvv-max-lmul.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m1 
-fdump-tree-optimized" } */
+
+typedef long long int64x8_t __attribute__((vector_size(64)));
+
+int64x8_t foo(int64x8_t a, int64x8_t b)
+{
+    return a + b;
+}
+/* Make sure we can us up to LMUL 4 to process int64x8_t at once rather than
+   break that into 4 LMUL 1 operations.  */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e64,m4,t[au],m[au]} } } */
-- 
2.34.1
 
 

Reply via email to