https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114639

--- Comment #19 from Li Pan <pan2.li at intel dot com> ---
Thanks Juzhe.  Here is another example

---------------------------------
#include <riscv_vector.h>

extern size_t get_new_vl ();

size_t
__attribute__((noinline))
get_vl (size_t *c)
{
  size_t vl = c[0] + c[1];

  return vl;
}

vbool64_t
test_fail_2 (vuint64m1_t a, unsigned long b, size_t *c)
{
  return __riscv_vmsne_vx_u64m1_b64 (a, b, get_vl (c));
}
-------------------------------------------------------

test_fail_2:                                                                   
                                                                               
                                       [30/37834]
        addi    sp,sp,-16
        sd      ra,8(sp)
        sd      s0,0(sp)
        csrr    t0,vlenb
        sub     sp,sp,t0
        vs1r.v  v1,0(sp)
        sub     sp,sp,t0
        vs1r.v  v2,0(sp)
        sub     sp,sp,t0
        vs1r.v  v3,0(sp)
        sub     sp,sp,t0
        vs1r.v  v4,0(sp)
        sub     sp,sp,t0
        vs1r.v  v5,0(sp)
        sub     sp,sp,t0
        vs1r.v  v6,0(sp)
        sub     sp,sp,t0
        vs1r.v  v7,0(sp)
        sub     sp,sp,t0
        vs1r.v  v24,0(sp)
        sub     sp,sp,t0
        vs1r.v  v25,0(sp)
        sub     sp,sp,t0
        vs1r.v  v26,0(sp)
        sub     sp,sp,t0
        vs1r.v  v27,0(sp)
        sub     sp,sp,t0
        vs1r.v  v28,0(sp)
        sub     sp,sp,t0                                                       
                                                                               
                                                         vs1r.v  v29,0(sp)     
                                                                               
                                                                               
                          sub     sp,sp,t0
        vs1r.v  v30,0(sp)
        sub     sp,sp,t0
        vs1r.v  v31,0(sp)
        csrr    t0,vlenb
        sub     sp,sp,t0
        vs1r.v  v8,0(sp)
        mv      s0,a0
        mv      a0,a1
        call    get_vl
        vl1re64.v       v8,0(sp)
        vsetvli zero,a0,e64,m1,ta,ma
        vmsne.vx        v0,v8,s0
        csrr    t0,vlenb
        add     sp,sp,t0
        csrr    t0,vlenb
        vl1re64.v       v31,0(sp)
        add     sp,sp,t0
        vl1re64.v       v30,0(sp)
        add     sp,sp,t0
        vl1re64.v       v29,0(sp)
        add     sp,sp,t0
        vl1re64.v       v28,0(sp)
        ...

As I understand, these callee saved vector registers are not required if the
function body doesn't pollute these registers.  Only the polluted registers
need to go in/out stack.

However, it is somehow one optimization here, we can consider to improve this
in GCC-15 if my understanding is correct.

Reply via email to