https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119547
Bug ID: 119547 Summary: RISC-V: VSETVL mistakenly modified other data Product: gcc Version: 15.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: zhijin.zeng at spacemit dot com Target Milestone: --- This code extract from opencv/modules/core/src/convert_scale.simd.hpp. -march=rv64gcv -mabi=lp64d --param logical-op-non-short-circuit=1 ``` #include <riscv_vector.h> using v_uint8 = vuint8m2_t; using v_int8 = vint8m2_t; using v_uint16 = vuint16m2_t; using v_int16 = vint16m2_t; using v_uint32 = vuint32m2_t; using v_int32 = vint32m2_t; using v_uint64 = vuint64m2_t; using v_int64 = vint64m2_t; using v_float32 = vfloat32m2_t; using v_float64 = vfloat64m2_t; using uchar = unsigned char; using schar = signed char; using ushort = unsigned short; using uint = unsigned int; using uint64 = unsigned long int; using int64 = long int; struct Size { int width; int height; }; template <class T> struct VTraits; template <> struct VTraits<vint32m1_t> { static inline int vlanes() { return __riscv_vsetvlmax_e32m1(); } using lane_type = int32_t; static const int max_nlanes = 1024/32*2; }; template <> struct VTraits<vint32m2_t> { static inline int vlanes() { return __riscv_vsetvlmax_e32m2(); } using lane_type = int32_t; static const int max_nlanes = 1024/32*2; }; template <> struct VTraits<vint32m4_t> { static inline int vlanes() { return __riscv_vsetvlmax_e32m4(); } using lane_type = int32_t; static const int max_nlanes = 1024/32*2; }; template <> struct VTraits<vint32m8_t> { static inline int vlanes() { return __riscv_vsetvlmax_e32m8(); } using lane_type = int32_t; static const int max_nlanes = 1024/32*2; }; template <> struct VTraits<vfloat64m1_t> { static inline int vlanes() { return __riscv_vsetvlmax_e64m1(); } using lane_type = double; static const int max_nlanes = 1024/64*2; }; template <> struct VTraits<vfloat64m2_t> { static inline int vlanes() { return __riscv_vsetvlmax_e64m2(); } using lane_type = double; static const int max_nlanes = 1024/64*2; }; template <> struct VTraits<vfloat64m4_t> { static inline int vlanes() { return __riscv_vsetvlmax_e64m4(); } using lane_type = double; static const int max_nlanes = 1024/64*2; }; template <> struct VTraits<vfloat64m8_t> { static inline int vlanes() { return __riscv_vsetvlmax_e64m8(); } using lane_type = double; static const int max_nlanes = 1024/64*2; }; static inline v_float64 v_setall_f64(double v) { return __riscv_vfmv_v_f_f64m2(v, VTraits<v_float64>::vlanes()); } static inline v_float64 vx_setall_f64(double v) { return v_setall_f64(v); } inline v_int32 v_load_expand_q(const schar* ptr) { return __riscv_vwcvt_x(__riscv_vwcvt_x(__riscv_vle8_v_i8mf2(ptr, VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes()); } static inline v_int32 vx_load_expand_q(const schar * ptr) { return v_load_expand_q(ptr); } inline v_float64 v_cvt_f64(const v_int32& a) { return __riscv_vget_f64m2(__riscv_vfwcvt_f(a, VTraits<v_int32>::vlanes()), 0); } inline v_float64 v_cvt_f64_high(const v_int32& a) { return __riscv_vget_f64m2(__riscv_vfwcvt_f(a, VTraits<v_int32>::vlanes()), 1); } inline void v_store(double* ptr, const v_float64& a) { __riscv_vse64(ptr, a, VTraits<v_float64>::vlanes()); } static inline void v_store_pair_as(double* ptr, const v_float64& a, const v_float64& b) { v_store(ptr, a); v_store(ptr + VTraits<v_float64>::vlanes(), b); } static inline void vx_load_pair_as(const schar* ptr, v_float64& a, v_float64& b) { v_int32 v0 = vx_load_expand_q(ptr); a = v_cvt_f64(v0); b = v_cvt_f64_high(v0); } inline v_float64 v_fma(const v_float64& a, const v_float64& b, const v_float64& c) { return __riscv_vfmacc_vv_f64m2(c, a, b, VTraits<v_float64>::vlanes()); } template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); } template<typename _Ts, typename _Td> __attribute__((noinline)) void cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, double a, double b ) { v_float64 va = vx_setall_f64(a), vb = vx_setall_f64(b); const int VECSZ = VTraits<v_float64>::vlanes()*2; sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) { if( j == 0 || src == (_Ts*)dst ) break; j = size.width - VECSZ; } v_float64 v0, v1; vx_load_pair_as(src + j, v0, v1); v0 = v_fma(v0, va, vb); v1 = v_fma(v1, va, vb); v_store_pair_as(dst + j, v0, v1); } for( ; j < size.width; j++ ) dst[j] = saturate_cast<_Td>(src[j]*a + b); } } void cvtScale8s64f( const uchar* src_, size_t sstep, const uchar*, size_t, uchar* dst_, size_t dstep, Size size, void* scale_) { const schar* src = (const schar*)src_; double* dst = (double*)dst_; double* scale = (double*)scale_; cvt_64f(src, sstep, dst, dstep, size, (double)scale[0], (double)scale[1]); } ``` asm code: ``` .L14: add t6,t5,a4 ble a5,t4,.L10 beq a5,zero,.L7 ble t1,a5,.L41 subw a4,s3,a5 subw s5,t1,a5 slli s4,a5,3 add t6,a0,a5 bgtu a4,s2,.L53 .L15: mv a5,t6 add a4,a2,s4 vsetvli a7,zero,e8,mf2,ta,ma // this vsetvli modify the exit condition .L19: lb t3,0(a5) addi a5,a5,1 addi a4,a4,8 fcvt.d.w fa5,t3 fmadd.d fa5,fa0,fa5,fa1 fsd fa5,-8(a4) bne a5,a7,.L19 // loop exit condition addiw t2,t2,1 bne s0,t2,.L54 .L40: ld s1,64(sp) ld s2,56(sp) ld s3,48(sp) ld s4,40(sp) ld s5,32(sp) ld s6,24(sp) .L38: ```