This patch runs counter to the ABI spec, which states that vxrm is not preserved across calls and is volatile upon function entry [1]. vxrm does not play the same role as frm plays in the calling convention. (I won't get into the rationale in this email, but the rationale isn't especially important: we should follow the ABI.)
[1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/3a79e936eec5491078b1133ac943f91ef5fd75fd/riscv-cc.adoc?plain=1#L119-L120 On Fri, Feb 7, 2025 at 12:21 AM <pan2...@intel.com> wrote: > > From: Pan Li <pan2...@intel.com> > > Inspired by PR118103, the VXRM register should be treated almost the > same as the FRM register, aka cooperatively-managed global register. > Thus, add the VXRM to global_regs to avoid the elimination by the > late-combine pass. > > For example as below code: > > 21 │ > 22 │ void compute () > 23 │ { > 24 │ size_t vl = __riscv_vsetvl_e16m1 (N); > 25 │ vuint16m1_t va = __riscv_vle16_v_u16m1 (a, vl); > 26 │ vuint16m1_t vb = __riscv_vle16_v_u16m1 (b, vl); > 27 │ vuint16m1_t vc = __riscv_vaaddu_vv_u16m1 (va, vb, > __RISCV_VXRM_RDN, vl); > 28 │ > 29 │ __riscv_vse16_v_u16m1 (c, vc, vl); > 30 │ } > 31 │ > 32 │ int main () > 33 │ { > 34 │ initialize (); > 35 │ compute(); > 36 │ > 37 │ return 0; > 38 │ } > > After compile with -march=rv64gcv -O3, we will have: > > 30 │ compute: > 31 │ csrwi vxrm,2 > 32 │ lui a3,%hi(a) > 33 │ lui a4,%hi(b) > 34 │ addi a4,a4,%lo(b) > 35 │ vsetivli zero,4,e16,m1,ta,ma > 36 │ addi a3,a3,%lo(a) > 37 │ vle16.v v2,0(a4) > 38 │ vle16.v v1,0(a3) > 39 │ lui a4,%hi(c) > 40 │ addi a4,a4,%lo(c) > 41 │ vaaddu.vv v1,v1,v2 > 42 │ vse16.v v1,0(a4) > 43 │ ret > 44 │ .size compute, .-compute > 45 │ .section .text.startup,"ax",@progbits > 46 │ .align 1 > 47 │ .globl main > 48 │ .type main, @function > 49 │ main: > | // csrwi vxrm,2 deleted after inline > 50 │ addi sp,sp,-16 > 51 │ sd ra,8(sp) > 52 │ call initialize > 53 │ lui a3,%hi(a) > 54 │ lui a4,%hi(b) > 55 │ vsetivli zero,4,e16,m1,ta,ma > 56 │ addi a4,a4,%lo(b) > 57 │ addi a3,a3,%lo(a) > 58 │ vle16.v v2,0(a4) > 59 │ vle16.v v1,0(a3) > 60 │ lui a4,%hi(c) > 61 │ addi a4,a4,%lo(c) > 62 │ li a0,0 > 63 │ vaaddu.vv v1,v1,v2 > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > > PR target/118103 > > gcc/ChangeLog: > > * config/riscv/riscv.cc (riscv_conditional_register_usage): Add > the VXRM as the global_regs. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/pr118103-2.c: New test. > * gcc.target/riscv/rvv/base/pr118103-run-2.c: New test. > > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/config/riscv/riscv.cc | 4 +- > .../gcc.target/riscv/rvv/base/pr118103-2.c | 40 +++++++++++++++++ > .../riscv/rvv/base/pr118103-run-2.c | 44 +++++++++++++++++++ > 3 files changed, 87 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-2.c > > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 439cc12f93d..819e1538741 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -10900,7 +10900,9 @@ riscv_conditional_register_usage (void) > call_used_regs[regno] = 1; > } > > - if (!TARGET_VECTOR) > + if (TARGET_VECTOR) > + global_regs[VXRM_REGNUM] = 1; > + else > { > for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++) > fixed_regs[regno] = call_used_regs[regno] = 1; > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-2.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-2.c > new file mode 100644 > index 00000000000..d6e3aa09077 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-2.c > @@ -0,0 +1,40 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d" } */ > + > +#include "riscv_vector.h" > + > +#define N 4 > +uint16_t a[N]; > +uint16_t b[N]; > +uint16_t c[N]; > + > +void initialize () > +{ > + uint16_t tmp_0[N] = { 0xfff, 3213, 238, 275, }; > + > + for (int i = 0; i < N; ++i) > + a[i] = b[i] = tmp_0[i]; > + > + for (int i = 0; i < N; ++i) > + c[i] = 0; > +} > + > +void compute () > +{ > + size_t vl = __riscv_vsetvl_e16m1 (N); > + vuint16m1_t va = __riscv_vle16_v_u16m1 (a, vl); > + vuint16m1_t vb = __riscv_vle16_v_u16m1 (b, vl); > + vuint16m1_t vc = __riscv_vaaddu_vv_u16m1 (va, vb, __RISCV_VXRM_RDN, vl); > + > + __riscv_vse16_v_u16m1 (c, vc, vl); > +} > + > +int main () > +{ > + initialize (); > + compute(); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-times {csrwi\s+vxrm,\s*[01234]} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-2.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-2.c > new file mode 100644 > index 00000000000..89150d4f6b5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-2.c > @@ -0,0 +1,44 @@ > +/* { dg-do run { target { riscv_v } } } */ > +/* { dg-options "-O3" } */ > + > +#include "riscv_vector.h" > + > +#define N 4 > +uint16_t a[N]; > +uint16_t b[N]; > +uint16_t c[N]; > + > +void initialize () { > + uint16_t tmp_0[N] = { 0xfff, 3213, 238, 275, }; > + uint16_t tmp_1[N] = { 0x2, 823, 39, 9, }; > + > + for (int i = 0; i < N; ++i) > + { > + a[i] = tmp_0[i]; > + b[i] = tmp_1[i]; > + } > + > + for (int i = 0; i < N; ++i) > + c[i] = 0; > +} > + > +void compute () > +{ > + size_t vl = __riscv_vsetvl_e16m1 (N); > + vuint16m1_t va = __riscv_vle16_v_u16m1 (a, vl); > + vuint16m1_t vb = __riscv_vle16_v_u16m1 (b, vl); > + vuint16m1_t vc = __riscv_vaaddu_vv_u16m1 (va, vb, __RISCV_VXRM_RDN, vl); > + > + __riscv_vse16_v_u16m1 (c, vc, vl); > +} > + > +int main () > +{ > + initialize (); > + compute (); > + > + if (c[0] != 2048) > + __builtin_abort (); > + > + return 0; > +} > -- > 2.43.0 >