https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114639
--- Comment #19 from Li Pan <pan2.li at intel dot com> ---
Thanks Juzhe. Here is another example
---------------------------------
#include <riscv_vector.h>
extern size_t get_new_vl ();
size_t
__attribute__((noinline))
get_vl (size_t *c)
{
size_t vl = c[0] + c[1];
return vl;
}
vbool64_t
test_fail_2 (vuint64m1_t a, unsigned long b, size_t *c)
{
return __riscv_vmsne_vx_u64m1_b64 (a, b, get_vl (c));
}
-------------------------------------------------------
test_fail_2:
[30/37834]
addi sp,sp,-16
sd ra,8(sp)
sd s0,0(sp)
csrr t0,vlenb
sub sp,sp,t0
vs1r.v v1,0(sp)
sub sp,sp,t0
vs1r.v v2,0(sp)
sub sp,sp,t0
vs1r.v v3,0(sp)
sub sp,sp,t0
vs1r.v v4,0(sp)
sub sp,sp,t0
vs1r.v v5,0(sp)
sub sp,sp,t0
vs1r.v v6,0(sp)
sub sp,sp,t0
vs1r.v v7,0(sp)
sub sp,sp,t0
vs1r.v v24,0(sp)
sub sp,sp,t0
vs1r.v v25,0(sp)
sub sp,sp,t0
vs1r.v v26,0(sp)
sub sp,sp,t0
vs1r.v v27,0(sp)
sub sp,sp,t0
vs1r.v v28,0(sp)
sub sp,sp,t0
vs1r.v v29,0(sp)
sub sp,sp,t0
vs1r.v v30,0(sp)
sub sp,sp,t0
vs1r.v v31,0(sp)
csrr t0,vlenb
sub sp,sp,t0
vs1r.v v8,0(sp)
mv s0,a0
mv a0,a1
call get_vl
vl1re64.v v8,0(sp)
vsetvli zero,a0,e64,m1,ta,ma
vmsne.vx v0,v8,s0
csrr t0,vlenb
add sp,sp,t0
csrr t0,vlenb
vl1re64.v v31,0(sp)
add sp,sp,t0
vl1re64.v v30,0(sp)
add sp,sp,t0
vl1re64.v v29,0(sp)
add sp,sp,t0
vl1re64.v v28,0(sp)
...
As I understand, these callee saved vector registers are not required if the
function body doesn't pollute these registers. Only the polluted registers
need to go in/out stack.
However, it is somehow one optimization here, we can consider to improve this
in GCC-15 if my understanding is correct.