I experimented with this patch which allows to remove a vfmv when a
floating-point op can be loaded directly from memory with a zero-stride
vlse.
In terms of benchmarks, I measured the following reductions in icount:
* 503.bwaves: -4.0%
* 538.imagick: -3.3%
* 549.fotonik3d: -0.34%
However, the icount for 507.cactuBSSN increased by 0.43%. In addition,
measurements on the BPI board show that the patch actually increases
execution times by 5 to 11%.
This may still be beneficial for some uarchs but would have to be
tunable, wouldn't it?
Is worth proceeding with this?
--
PA
From 521a1e7ac7843f2b1c1fd60e0f8768fc537f2210 Mon Sep 17 00:00:00 2001
From: Paul-Antoine Arras <[email protected]>
Date: Tue, 23 Sep 2025 19:29:59 +0200
Subject: [PATCH] RISC-V: Allow FP strided broadcast from memory [PR121451]
This enables floating-point memory operands to be directly broadcast via a
zero-stride load.
Before this patch, we have two instructions, e.g.:
flw fa5,0(a2)
vfmv.v.f v3,fa5
After, we get only one:
vlse32.v v3,0(a2),zero
PR target/121451
gcc/ChangeLog:
* config/riscv/riscv-selftests.cc (BROADCAST_TEST,
run_broadcast_selftests): Adjust broadcast self test.
* config/riscv/riscv-v.cc (can_be_broadcast_p): Return false for float
memory operand.
(strided_broadcast_p): Return true for float memory operand.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust
check-function-bodies.
* gcc.target/riscv/rvv/base/scalar_move-6.c: Likewise.
* gcc.target/riscv/rvv/base/scalar_move-7.c: Likewise.
* gcc.target/riscv/rvv/base/scalar_move-8.c: Likewise.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f32.c: New test.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f64.c: New test.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f32.c: New test.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f64.c: New test.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse.h: New test.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse_data.h: New test.
* gcc.target/riscv/rvv/autovec/strided/fma_vlse_run.h: New test.
---
gcc/config/riscv/riscv-selftests.cc | 12 +-
gcc/config/riscv/riscv-v.cc | 7 +-
.../rvv/autovec/strided/fma_vlse-1-f16.c | 10 +
.../rvv/autovec/strided/fma_vlse-1-f32.c | 10 +
.../rvv/autovec/strided/fma_vlse-1-f64.c | 10 +
.../rvv/autovec/strided/fma_vlse-run-1-f16.c | 13 ++
.../rvv/autovec/strided/fma_vlse-run-1-f32.c | 13 ++
.../rvv/autovec/strided/fma_vlse-run-1-f64.c | 13 ++
.../riscv/rvv/autovec/strided/fma_vlse.h | 20 ++
.../riscv/rvv/autovec/strided/fma_vlse_data.h | 211 ++++++++++++++++++
.../riscv/rvv/autovec/strided/fma_vlse_run.h | 32 +++
.../gcc.target/riscv/rvv/base/scalar_move-5.c | 2 +-
.../gcc.target/riscv/rvv/base/scalar_move-6.c | 4 +-
.../gcc.target/riscv/rvv/base/scalar_move-7.c | 4 +-
.../gcc.target/riscv/rvv/base/scalar_move-8.c | 4 +-
15 files changed, 347 insertions(+), 18 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f16.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f32.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f64.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f16.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f32.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f64.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse_data.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse_run.h
diff --git gcc/config/riscv/riscv-selftests.cc gcc/config/riscv/riscv-selftests.cc
index 9ca1ffee394..fb2e2b97edb 100644
--- gcc/config/riscv/riscv-selftests.cc
+++ gcc/config/riscv/riscv-selftests.cc
@@ -328,7 +328,7 @@ run_broadcast_selftests (void)
machine_mode mode;
#define BROADCAST_TEST(MODE_CLASS) \
- FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) \
+ FOR_EACH_MODE_IN_CLASS (mode, MODE_INT) \
{ \
if (riscv_v_ext_vector_mode_p (mode)) \
{ \
@@ -342,13 +342,9 @@ run_broadcast_selftests (void)
expand_vector_broadcast (mode, mem); \
insn = get_last_insn (); \
src = SET_SRC (PATTERN (insn)); \
- if (strided_load_broadcast_p ()) \
- { \
- ASSERT_TRUE (MEM_P (XEXP (src, 0))); \
- ASSERT_TRUE ( \
- rtx_equal_p (src, \
- gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0)))); \
- } \
+ ASSERT_TRUE (MEM_P (XEXP (src, 0))); \
+ ASSERT_TRUE ( \
+ rtx_equal_p (src, gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0)))); \
end_sequence (); \
/* Test vmv.v.x or vfmv.v.f. */ \
start_sequence (); \
diff --git gcc/config/riscv/riscv-v.cc gcc/config/riscv/riscv-v.cc
index b30a95d0e3f..c118a9bd087 100644
--- gcc/config/riscv/riscv-v.cc
+++ gcc/config/riscv/riscv-v.cc
@@ -6088,9 +6088,7 @@ can_be_broadcast_p (rtx op)
/* Same for float, just that we can always handle 64-bit doubles
even on !TARGET_64BIT. We have ruled out 16-bit HF already
above. */
- if (FLOAT_MODE_P (mode)
- && (memory_operand (op, mode) || CONSTANT_P (op))
- && can_create_pseudo_p ())
+ if (FLOAT_MODE_P (mode) && CONSTANT_P (op) && can_create_pseudo_p ())
return true;
/* After excluding all the cases we cannot handle the register types
@@ -6124,6 +6122,9 @@ strided_broadcast_p (rtx op)
if (!TARGET_ZVFH && mode == HFmode)
return true;
+ if (FLOAT_MODE_P (mode) && memory_operand (op, mode))
+ return true;
+
return false;
}
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f16.c
new file mode 100644
index 00000000000..4e303666287
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f16.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d" } */
+
+#include "fma_vlse.h"
+
+DEF_FMA_VLSE_FORM_1(_Float16)
+
+/* { dg-final { scan-assembler-times {vlse16.v} 1 } } */
+/* { dg-final { scan-assembler-not {flh} } } */
+/* { dg-final { scan-assembler-not {vfmv.v.f} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f32.c
new file mode 100644
index 00000000000..0d7f329b4b0
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f32.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include "fma_vlse.h"
+
+DEF_FMA_VLSE_FORM_1(float)
+
+/* { dg-final { scan-assembler-times {vlse32.v} 1 } } */
+/* { dg-final { scan-assembler-not {flw} } } */
+/* { dg-final { scan-assembler-not {vfmv.v.f} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f64.c gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f64.c
new file mode 100644
index 00000000000..63991f892ea
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-1-f64.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include "fma_vlse.h"
+
+DEF_FMA_VLSE_FORM_1(double)
+
+/* { dg-final { scan-assembler-times {vlse64.v} 1 } } */
+/* { dg-final { scan-assembler-not {fld} } } */
+/* { dg-final { scan-assembler-not {vfmv.v.f} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f16.c
new file mode 100644
index 00000000000..5a86bdc82ac
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f16.c
@@ -0,0 +1,13 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh" } */
+
+#include "fma_vlse.h"
+
+#define T _Float16
+
+DEF_FMA_VLSE_FORM_1_WRAP (T)
+
+#define TEST_RUN(T, out, in, f, n) RUN_FMA_VLSE_FORM_1(T, out, in, f, n)
+#define LIMIT -32768
+
+#include "fma_vlse_run.h"
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f32.c
new file mode 100644
index 00000000000..a3ab385d467
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f32.c
@@ -0,0 +1,13 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-march=rv64gcv" } */
+
+#include "fma_vlse.h"
+
+#define T float
+
+DEF_FMA_VLSE_FORM_1_WRAP (T)
+
+#define TEST_RUN(T, out, in, f, n) RUN_FMA_VLSE_FORM_1(T, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "fma_vlse_run.h"
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f64.c gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f64.c
new file mode 100644
index 00000000000..7ce640ba7ff
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse-run-1-f64.c
@@ -0,0 +1,13 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-march=rv64gcv" } */
+
+#include "fma_vlse.h"
+
+#define T double
+
+DEF_FMA_VLSE_FORM_1_WRAP (T)
+
+#define TEST_RUN(T, out, in, f, n) RUN_FMA_VLSE_FORM_1(T, out, in, f, n)
+#define LIMIT -9223372036854775808ull
+
+#include "fma_vlse_run.h"
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse.h gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse.h
new file mode 100644
index 00000000000..063feefa113
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse.h
@@ -0,0 +1,20 @@
+#ifndef HAVE_DEFINED_FMA_VLSE_H
+#define HAVE_DEFINED_FMA_VLSE_H
+
+#define DEF_FMA_VLSE_FORM_1(T) \
+ void __attribute__ ((noinline)) vec_fma_vlse_##T##_form_1 (T *restrict out, \
+ T *restrict in, \
+ T *restrict f, \
+ unsigned n) \
+ { \
+ for (unsigned i; i < n; i++) \
+ { \
+ out[i] += *f * in[i]; \
+ } \
+ }
+
+#define DEF_FMA_VLSE_FORM_1_WRAP(T) DEF_FMA_VLSE_FORM_1(T)
+#define RUN_FMA_VLSE_FORM_1(T, out, in, f, n) \
+ vec_fma_vlse_##T##_form_1 (out, in, f, n)
+
+#endif
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse_data.h gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse_data.h
new file mode 100644
index 00000000000..f635ff8fdca
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse_data.h
@@ -0,0 +1,211 @@
+#ifndef HAVE_DEFINED_FMA_VLSE_DATA_H
+#define HAVE_DEFINED_FMA_VLSE_DATA_H
+
+#define N 16
+
+#define TEST_FMA_VLSE_DATA(T) test_##T##_data
+#define TEST_FMA_VLSE_DATA_WRAP(T) TEST_FMA_VLSE_DATA(T)
+
+
+_Float16 TEST_FMA_VLSE_DATA(_Float16)[][4][N] =
+{
+ {
+ { 0.30f16 },
+ {
+ 1.48f16, 1.48f16, 1.48f16, 1.48f16,
+ 0.80f16, 0.80f16, 0.80f16, 0.80f16,
+ 0.62f16, 0.62f16, 0.62f16, 0.62f16,
+ 1.18f16, 1.18f16, 1.18f16, 1.18f16,
+ },
+ {
+ 1.25f16, 1.25f16, 1.25f16, 1.25f16,
+ 1.89f16, 1.89f16, 1.89f16, 1.89f16,
+ 1.57f16, 1.57f16, 1.57f16, 1.57f16,
+ 1.21f16, 1.21f16, 1.21f16, 1.21f16,
+ },
+ {
+ 1.85f16, 1.85f16, 1.85f16, 1.85f16,
+ 1.37f16, 1.37f16, 1.37f16, 1.37f16,
+ 1.09f16, 1.09f16, 1.09f16, 1.09f16,
+ 1.54f16, 1.54f16, 1.54f16, 1.54f16,
+ }
+ },
+ {
+ { -0.505f16 },
+ {
+ -2.38f16, -2.38f16, -2.38f16, -2.38f16,
+ -2.06f16, -2.06f16, -2.06f16, -2.06f16,
+ -1.69f16, -1.69f16, -1.69f16, -1.69f16,
+ -1.1f16, -1.1f16, -1.1f16, -1.1f16,
+ },
+ {
+ -1.77f16, -1.77f16, -1.77f16, -1.77f16,
+ -1.6f16, -1.6f16, -1.6f16, -1.6f16,
+ -1.f16, -1.f16, -1.f16, -1.f16,
+ -1.23f16, -1.23f16, -1.23f16, -1.23f16,
+ },
+ {
+ -1.49f16, -1.49f16, -1.49f16, -1.49f16,
+ -1.25f16, -1.25f16, -1.25f16, -1.25f16,
+ -1.18f16, -1.18f16, -1.18f16, -1.18f16,
+ -0.479f16, -0.479f16, -0.479f16, -0.479f16,
+ }
+ },
+ {
+ { 4.95e-04f16 },
+ {
+ 1.4266e-05f16, 1.4266e-05f16, 1.4266e-05f16, 1.4266e-05f16,
+ 1.8129e-05f16, 1.8129e-05f16, 1.8129e-05f16, 1.8129e-05f16,
+ -8.4710e-06f16, -8.4710e-06f16, -8.4710e-06f16, -8.4710e-06f16,
+ 3.7876e-05f16, 3.7876e-05f16, 3.7876e-05f16, 3.7876e-05f16,
+ },
+ {
+ 2.2808e-02f16, 2.2808e-02f16, 2.2808e-02f16, 2.2808e-02f16,
+ 3.9633e-02f16, 3.9633e-02f16, 3.9633e-02f16, 3.9633e-02f16,
+ 9.9657e-02f16, 9.9657e-02f16, 9.9657e-02f16, 9.9657e-02f16,
+ 7.7189e-02f16, 7.7189e-02f16, 7.7189e-02f16, 7.7189e-02f16,
+ },
+ {
+ 2.5547e-05f16, 2.5547e-05f16, 2.5547e-05f16, 2.5547e-05f16,
+ 3.7732e-05f16, 3.7732e-05f16, 3.7732e-05f16, 3.7732e-05f16,
+ 4.0820e-05f16, 4.0820e-05f16, 4.0820e-05f16, 4.0820e-05f16,
+ 7.6054e-05f16, 7.6054e-05f16, 7.6054e-05f16, 7.6054e-05f16,
+ }
+ },
+};
+
+float TEST_FMA_VLSE_DATA(float)[][4][N] =
+{
+ {
+ { 43.71f },
+ {
+ -410.28f, -410.28f, -410.28f, -410.28f,
+ -276.91f, -276.91f, -276.91f, -276.91f,
+ -103.38f, -103.38f, -103.38f, -103.38f,
+ -378.24f, -378.24f, -378.24f, -378.24f,
+ },
+ {
+ 9.56f, 9.56f, 9.56f, 9.56f,
+ 6.39f, 6.39f, 6.39f, 6.39f,
+ 2.40f, 2.40f, 2.40f, 2.40f,
+ 8.80f, 8.80f, 8.80f, 8.80f,
+ },
+ {
+ 7.59f, 7.59f, 7.59f, 7.59f,
+ 2.40f, 2.40f, 2.40f, 2.40f,
+ 1.52f, 1.52f, 1.52f, 1.52f,
+ 6.41f, 6.41f, 6.41f, 6.41f,
+ }
+ },
+ {
+ { 2.04f },
+ {
+ -110.22f, -110.22f, -110.22f, -110.22f,
+ -25.13f, -25.13f, -25.13f, -25.13f,
+ -108.18f, -108.18f, -108.18f, -108.18f,
+ -107.14f, -107.14f, -107.14f, -107.14f,
+ },
+ {
+ 64.82f, 64.82f, 64.82f, 64.82f,
+ 31.65f, 31.65f, 31.65f, 31.65f,
+ 87.32f, 87.32f, 87.32f, 87.32f,
+ 58.70f, 58.70f, 58.70f, 58.70f,
+ },
+ {
+ 22.01f, 22.01f, 22.01f, 22.01f,
+ 39.44f, 39.44f, 39.44f, 39.44f,
+ 69.95f, 69.95f, 69.95f, 69.95f,
+ 12.61f, 12.61f, 12.61f, 12.61f,
+ }
+ },
+ {
+ { 20.35f },
+ {
+ 881.43f, 881.43f, 881.43f, 881.43f,
+ 3300.17f, 3300.17f, 3300.17f, 3300.17f,
+ 5217.85f, 5217.85f, 5217.85f, 5217.85f,
+ 66.57f, 66.57f, 66.57f, 66.57f,
+ },
+ {
+ 64.82f, 64.82f, 64.82f, 64.82f,
+ 31.65f, 31.65f, 31.65f, 31.65f,
+ 87.32f, 87.32f, 87.32f, 87.32f,
+ 58.70f, 58.70f, 58.70f, 58.70f,
+ },
+ {
+ 2200.52f, 2200.52f, 2200.52f, 2200.52f,
+ 3944.25f, 3944.25f, 3944.25f, 3944.25f,
+ 6994.81f, 6994.81f, 6994.81f, 6994.81f,
+ 1261.12f, 1261.12f, 1261.12f, 1261.12f,
+ }
+ },
+};
+
+double TEST_FMA_VLSE_DATA(double)[][4][N] =
+{
+ {
+ { 1.16e+12 },
+ {
+ 1.8757e+45, 1.8757e+45, 1.8757e+45, 1.8757e+45,
+ 7.5140e+45, 7.5140e+45, 7.5140e+45, 7.5140e+45,
+ 8.2069e+45, 8.2069e+45, 8.2069e+45, 8.2069e+45,
+ 4.9456e+45, 4.9456e+45, 4.9456e+45, 4.9456e+45,
+ },
+ {
+ 9.0242e+32, 9.0242e+32, 9.0242e+32, 9.0242e+32,
+ 3.6908e+32, 3.6908e+32, 3.6908e+32, 3.6908e+32,
+ 3.9202e+32, 3.9202e+32, 3.9202e+32, 3.9202e+32,
+ 5.0276e+32, 5.0276e+32, 5.0276e+32, 5.0276e+32,
+ },
+ {
+ 2.9201e+45, 2.9201e+45, 2.9201e+45, 2.9201e+45,
+ 7.9411e+45, 7.9411e+45, 7.9411e+45, 7.9411e+45,
+ 8.6606e+45, 8.6606e+45, 8.6606e+45, 8.6606e+45,
+ 5.5275e+45, 5.5275e+45, 5.5275e+45, 5.5275e+45,
+ }
+ },
+ {
+ { -7.29e+23 },
+ {
+ -6.4993e+65, -6.4993e+65, -6.4993e+65, -6.4993e+65,
+ -4.6760e+65, -4.6760e+65, -4.6760e+65, -4.6760e+65,
+ -8.1564e+65, -8.1564e+65, -8.1564e+65, -8.1564e+65,
+ -8.2899e+65, -8.2899e+65, -8.2899e+65, -8.2899e+65,
+ },
+ {
+ -7.7764e+41, -7.7764e+41, -7.7764e+41, -7.7764e+41,
+ -1.9756e+41, -1.9756e+41, -1.9756e+41, -1.9756e+41,
+ -4.8980e+41, -4.8980e+41, -4.8980e+41, -4.8980e+41,
+ -8.1062e+41, -8.1062e+41, -8.1062e+41, -8.1062e+41,
+ },
+ {
+ -8.2928e+64, -8.2928e+64, -8.2928e+64, -8.2928e+64,
+ -3.2356e+65, -3.2356e+65, -3.2356e+65, -3.2356e+65,
+ -4.5850e+65, -4.5850e+65, -4.5850e+65, -4.5850e+65,
+ -2.3794e+65, -2.3794e+65, -2.3794e+65, -2.3794e+65,
+ }
+ },
+ {
+ { 2.02e-03 },
+ {
+ -1.2191e-35, -1.2191e-35, -1.2191e-35, -1.2191e-35,
+ -1.0471e-36, -1.0471e-36, -1.0471e-36, -1.0471e-36,
+ -9.7582e-36, -9.7582e-36, -9.7582e-36, -9.7582e-36,
+ -2.2097e-36, -2.2097e-36, -2.2097e-36, -2.2097e-36,
+ },
+ {
+ 9.7703e-33, 9.7703e-33, 9.7703e-33, 9.7703e-33,
+ 4.1632e-33, 4.1632e-33, 4.1632e-33, 4.1632e-33,
+ 8.1964e-33, 8.1964e-33, 8.1964e-33, 8.1964e-33,
+ 4.7314e-33, 4.7314e-33, 4.7314e-33, 4.7314e-33,
+ },
+ {
+ 7.5586e-36, 7.5586e-36, 7.5586e-36, 7.5586e-36,
+ 7.3684e-36, 7.3684e-36, 7.3684e-36, 7.3684e-36,
+ 6.8101e-36, 6.8101e-36, 6.8101e-36, 6.8101e-36,
+ 7.3543e-36, 7.3543e-36, 7.3543e-36, 7.3543e-36,
+ }
+ },
+};
+
+#endif
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse_run.h gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse_run.h
new file mode 100644
index 00000000000..dc18987686f
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/fma_vlse_run.h
@@ -0,0 +1,32 @@
+#ifndef HAVE_DEFINED_FMA_VLSE_RUN_H
+#define HAVE_DEFINED_FMA_VLSE_RUN_H
+
+#include <assert.h>
+
+#define N 512
+
+int main ()
+{
+ T f[N];
+ T in[N];
+ T out[N];
+ T out2[N];
+
+ for (int i = 0; i < N; i++)
+ {
+ f[i] = LIMIT + i % 8723;
+ in[i] = LIMIT + i & 1964;
+ out[i] = LIMIT + i & 628;
+ out2[i] = LIMIT + i & 628;
+ asm volatile ("" ::: "memory");
+ }
+
+ TEST_RUN (T, out, in, f, N);
+
+ for (int i = 0; i < N; i++)
+ assert (out[i] == *f * in[i] + out2[i]);
+
+ return 0;
+}
+
+#endif
diff --git gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
index 4f6785ace68..a5cd07aaa19 100644
--- gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
+++ gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
@@ -21,7 +21,7 @@ void foo (void *base, void *out, size_t vl)
/*
** foo2:
-** fld\tfa[0-9]+,\s*100\(a0\)
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
** vsetvli\tzero,a2,e64,m2,t[au],m[au]
** vfmv\.s\.f\tv[0-9]+,\s*fa[0-9]+
** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
diff --git gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
index a8c9263c4df..c5d190b41fd 100644
--- gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
+++ gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
@@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
/*
** foo2:
-** fld\tfa[0-9]+,\s*100\(a0\)
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vfmv\.s\.f\tv[0-9]+,\s*fa[0-9]+
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
** ret
*/
diff --git gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
index cf53aca5c62..b470dd7a9dc 100644
--- gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
+++ gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
@@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
/*
** foo2:
-** fld\tfa[0-9]+,\s*100\(a0\)
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
** ret
*/
diff --git gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
index fd3b7c57510..eeff5af9e3d 100644
--- gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
+++ gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
@@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
/*
** foo2:
-** fld\tfa[0-9]+,\s*100\(a0\)
+** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
+** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
** ret
*/
--
2.39.5