https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119547

            Bug ID: 119547
           Summary: RISC-V: VSETVL mistakenly modified other data
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: zhijin.zeng at spacemit dot com
  Target Milestone: ---

This code extract from opencv/modules/core/src/convert_scale.simd.hpp.

-march=rv64gcv -mabi=lp64d --param logical-op-non-short-circuit=1


```
#include <riscv_vector.h>

using v_uint8 = vuint8m2_t;
using v_int8 = vint8m2_t;
using v_uint16 = vuint16m2_t;
using v_int16 = vint16m2_t;
using v_uint32 = vuint32m2_t;
using v_int32 = vint32m2_t;
using v_uint64 = vuint64m2_t;
using v_int64 = vint64m2_t;
using v_float32 = vfloat32m2_t;
using v_float64 = vfloat64m2_t;

using uchar = unsigned char;
using schar = signed char;
using ushort = unsigned short;
using uint = unsigned int;
using uint64 = unsigned long int;
using int64 = long int;

struct Size {
  int width;
  int height;
};

template <class T>
struct VTraits;

template <> struct VTraits<vint32m1_t> { static inline int vlanes() { return
__riscv_vsetvlmax_e32m1(); } using lane_type = int32_t; static const int
max_nlanes = 1024/32*2; };
template <> struct VTraits<vint32m2_t> { static inline int vlanes() { return
__riscv_vsetvlmax_e32m2(); } using lane_type = int32_t; static const int
max_nlanes = 1024/32*2; };
template <> struct VTraits<vint32m4_t> { static inline int vlanes() { return
__riscv_vsetvlmax_e32m4(); } using lane_type = int32_t; static const int
max_nlanes = 1024/32*2; };
template <> struct VTraits<vint32m8_t> { static inline int vlanes() { return
__riscv_vsetvlmax_e32m8(); } using lane_type = int32_t; static const int
max_nlanes = 1024/32*2; };

template <> struct VTraits<vfloat64m1_t> { static inline int vlanes() { return
__riscv_vsetvlmax_e64m1(); } using lane_type = double; static const int
max_nlanes = 1024/64*2; };
template <> struct VTraits<vfloat64m2_t> { static inline int vlanes() { return
__riscv_vsetvlmax_e64m2(); } using lane_type = double; static const int
max_nlanes = 1024/64*2; };
template <> struct VTraits<vfloat64m4_t> { static inline int vlanes() { return
__riscv_vsetvlmax_e64m4(); } using lane_type = double; static const int
max_nlanes = 1024/64*2; };
template <> struct VTraits<vfloat64m8_t> { static inline int vlanes() { return
__riscv_vsetvlmax_e64m8(); } using lane_type = double; static const int
max_nlanes = 1024/64*2; };

static inline v_float64 v_setall_f64(double v) { return
__riscv_vfmv_v_f_f64m2(v, VTraits<v_float64>::vlanes()); }
static inline v_float64 vx_setall_f64(double v) { return v_setall_f64(v); }

inline v_int32 v_load_expand_q(const schar* ptr)
{
    return __riscv_vwcvt_x(__riscv_vwcvt_x(__riscv_vle8_v_i8mf2(ptr,
VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes()),
VTraits<v_int32>::vlanes());
}

static inline v_int32 vx_load_expand_q(const schar * ptr) { return
v_load_expand_q(ptr); }

inline v_float64 v_cvt_f64(const v_int32& a)
{
    return __riscv_vget_f64m2(__riscv_vfwcvt_f(a, VTraits<v_int32>::vlanes()),
0);
}

inline v_float64 v_cvt_f64_high(const v_int32& a)
{
    return __riscv_vget_f64m2(__riscv_vfwcvt_f(a, VTraits<v_int32>::vlanes()),
1);
}

inline void v_store(double* ptr, const v_float64& a) { __riscv_vse64(ptr, a,
VTraits<v_float64>::vlanes()); }

static inline void v_store_pair_as(double* ptr, const v_float64& a, const
v_float64& b)
{
    v_store(ptr, a);
    v_store(ptr + VTraits<v_float64>::vlanes(), b);
}

static inline void vx_load_pair_as(const schar* ptr, v_float64& a, v_float64&
b)
{
    v_int32 v0 = vx_load_expand_q(ptr);
    a = v_cvt_f64(v0);
    b = v_cvt_f64_high(v0);
}

inline v_float64 v_fma(const v_float64& a, const v_float64& b, const v_float64&
c)
{
    return __riscv_vfmacc_vv_f64m2(c, a, b, VTraits<v_float64>::vlanes());
}

template<typename _Tp> static inline _Tp saturate_cast(double v) { return
_Tp(v); }

template<typename _Ts, typename _Td> __attribute__((noinline)) void
cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
         Size size, double a, double b )
{

    v_float64 va = vx_setall_f64(a), vb = vx_setall_f64(b);
    const int VECSZ = VTraits<v_float64>::vlanes()*2;

    sstep /= sizeof(src[0]);
    dstep /= sizeof(dst[0]);

    for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
    {
        int j = 0;


        for( ; j < size.width; j += VECSZ )
        {
            if( j > size.width - VECSZ )
            {
                if( j == 0 || src == (_Ts*)dst )
                    break;
                j = size.width - VECSZ;
            }
            v_float64 v0, v1;
            vx_load_pair_as(src + j, v0, v1);
            v0 = v_fma(v0, va, vb);
            v1 = v_fma(v1, va, vb);
            v_store_pair_as(dst + j, v0, v1);
        }

        for( ; j < size.width; j++ )
            dst[j] = saturate_cast<_Td>(src[j]*a + b);
    }
}

void cvtScale8s64f( const uchar* src_, size_t sstep, const uchar*, size_t,
uchar* dst_, size_t dstep, Size size, void* scale_) { const schar* src = (const
schar*)src_; double* dst = (double*)dst_; double* scale = (double*)scale_;
cvt_64f(src, sstep, dst, dstep, size, (double)scale[0], (double)scale[1]); }
```

asm code:
```
.L14:
        add     t6,t5,a4
        ble     a5,t4,.L10
        beq     a5,zero,.L7
        ble     t1,a5,.L41
        subw    a4,s3,a5
        subw    s5,t1,a5
        slli    s4,a5,3
        add     t6,a0,a5
        bgtu    a4,s2,.L53
.L15:
        mv      a5,t6
        add     a4,a2,s4
        vsetvli a7,zero,e8,mf2,ta,ma    // this vsetvli modify the exit
condition
.L19:
        lb      t3,0(a5)
        addi    a5,a5,1
        addi    a4,a4,8
        fcvt.d.w        fa5,t3
        fmadd.d fa5,fa0,fa5,fa1
        fsd     fa5,-8(a4)
        bne     a5,a7,.L19              // loop exit condition
        addiw   t2,t2,1
        bne     s0,t2,.L54
.L40:
        ld      s1,64(sp)
        ld      s2,56(sp)
        ld      s3,48(sp)
        ld      s4,40(sp)
        ld      s5,32(sp)
        ld      s6,24(sp)
.L38:
```

Reply via email to