Hi everyone,
I tried to set the "vlen" after the add & multi, as shown in the following code:
➜
vf32 x3,x4;
void foo1(float16_t* input, float16_t* output, int vlen){
    vf32 add = x3 + x4;
    vf32 mul = x3 * x4;
    __builtin_riscv_vlen(vlen);  //<----
    storevf(&output[0], add);
    storevf(&output[4], mul);
}
but after compilation, the "vlen" is reordered:
➜
foo1:
    lui     a5,%hi(.LANCHOR0)
    addi    a5,a5,%lo(.LANCHOR0)
    addi    a4,a5,64
    vfld    v0,a5
    vfld    v1,a4
    csrw    vlen,a2  //<----
    vfadd   v2,v0,v1
    addi    a5,a1,8
    vfmul   v0,v0,v1
    vfst    v2,a1
    vfst    v0,a5
    ret
And I've tried to add some barrier code shown as the following:
➜
#define barrier() __asm__ __volatile__("": : :"memory")
vf32 x3,x4;
void foo1(float16_t* input, float16_t* output, int vlen){
    vf32 add = x3 + x4;
    vf32 mul = x3 * x4;
    barrier();
    __builtin_riscv_vlen(vlen);
    barrier();
    storevf(&output[0], add);
    storevf(&output[4], mul);
}
➜
vf32 x3,x4;
void foo1(float16_t* input, float16_t* output, int vlen){
    vf32 add = x3 + x4;
    vf32 mul = x3 * x4;
    __asm__ __volatile__ ("csrw\tvlen,%0" : : "rJ"(vlen) : "memory");
    storevf(&output[0], add);
    storevf(&output[4], mul);
}
Both methods compiled out the same false assembly.
=======
But if I tried the code like: (add & multi are using different operands)
➜
vf32 x1,x2;
vf32 x3,x4;
void foo1(float16_t* input, float16_t* output, int vlen){
    vf32 add = x3 + x4;
    vf32 mul = x1 * x2;
    __builtin_riscv_vlen(vlen);
    storevf(&output[0], add);
    storevf(&output[4], mul);
}
the assembly will be right:
➜
foo1:
    lui     a5,%hi(.LANCHOR0)
    addi    a5,a5,%lo(.LANCHOR0)
    addi    a0,a5,64
    addi    a3,a5,128
    addi    a4,a5,192
    vfld    v1,a5
    vfld    v3,a0
    vfld    v0,a3
    vfld    v2,a4
    vfadd   v1,v1,v3
    vfmul   v0,v0,v2
    csrw    vlen,a2  <----
    addi    a5,a1,8
    vfst    v1,a1
    vfst    v0,a5
    ret

Is there any other way for coding or other option for gcc compilation to deal 
with this issue.
Any suggestion would be appreciated. Thank you very much!

Best,
Jin

Reply via email to