Soumya AR <soum...@nvidia.com> writes: > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index 06bd3e4bb2c..119a0e53853 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -5088,6 +5088,21 @@ > ;; - FTSSEL > ;; ------------------------------------------------------------------------- > > +(define_expand "ldexp<mode>3" > + [(set (match_operand:GPF_HF 0 "register_operand") > + (unspec:GPF_HF > + [(match_dup 3) > + (const_int SVE_RELAXED_GP)
Sorry for only noticing now, but: this should be SVE_STRICT_GP instead of SVE_RELAXED_GP, since we don't want to allow other lanes to be made active later. > + (match_operand:GPF_HF 1 "register_operand") > + (match_operand:<V_INT_EQUIV> 2 "register_operand")] > + UNSPEC_COND_FSCALE))] > + "TARGET_SVE" > + { > + operands[3] = aarch64_ptrue_reg (<VPRED>mode, > + GET_MODE_UNIT_SIZE (<MODE>mode)); > + } > +) > + > ;; Unpredicated floating-point binary operations that take an integer as > ;; their second operand. > (define_insn "@aarch64_sve_<optab><mode>" > @@ -5103,17 +5118,17 @@ > ;; Predicated floating-point binary operations that take an integer > ;; as their second operand. > (define_insn "@aarch64_pred_<optab><mode>" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_FULL_F_SCALAR 0 "register_operand") > + (unspec:SVE_FULL_F_SCALAR > [(match_operand:<VPRED> 1 "register_operand") > (match_operand:SI 4 "aarch64_sve_gp_strictness") > - (match_operand:SVE_FULL_F 2 "register_operand") > + (match_operand:SVE_FULL_F_SCALAR 2 "register_operand") > (match_operand:<V_INT_EQUIV> 3 "register_operand")] > SVE_COND_FP_BINARY_INT))] > "TARGET_SVE" > {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] > - [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, > %1/m, %0.<Vetype>, %3.<Vetype> > - [ ?&w , Upl , w , w ; yes ] movprfx\t%0, > %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> > + [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%Z0.<Vetype>, > %1/m, %Z0.<Vetype>, %Z3.<Vetype> > + [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, > %Z2\;<sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype> > } > ) > > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index 8269b0cdcd9..4153c72954e 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -452,6 +452,9 @@ > ;; All fully-packed SVE floating-point vector modes. > (define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF]) > > +;; Fully-packed SVE floating-point vector modes and their scalar equivalents. > +(define_mode_iterator SVE_FULL_F_SCALAR [SVE_FULL_F GPF_HF]) > + > ;; Fully-packed SVE integer vector modes that have 8-bit or 16-bit elements. > (define_mode_iterator SVE_FULL_BHI [VNx16QI VNx8HI]) > > @@ -2302,7 +2305,8 @@ > (VNx8DI "VNx2BI") (VNx8DF "VNx2BI") > (V8QI "VNx8BI") (V16QI "VNx16BI") > (V4HI "VNx4BI") (V8HI "VNx8BI") (V2SI "VNx2BI") > - (V4SI "VNx4BI") (V2DI "VNx2BI") (V1DI "VNx2BI")]) > + (V4SI "VNx4BI") (V2DI "VNx2BI") (V1DI "VNx2BI") > + (HF "VNx8BI") (SF "VNx4BI") (DF "VNx2BI")]) > > ;; ...and again in lower case. > (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi") > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index c3d0efc0f2c..09b7844d094 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -441,7 +441,7 @@ DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, > vec_fmaddsub, ternary) > DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary) > > /* FP scales. */ > -DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary) > +DEF_INTERNAL_FLT_FLOATN_FN (LDEXP, ECF_CONST, ldexp, binary) > > /* Ternary math functions. */ > DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fscale.c > b/gcc/testsuite/gcc.target/aarch64/sve/fscale.c > new file mode 100644 > index 00000000000..2c32d410f6b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fscale.c > @@ -0,0 +1,46 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-Ofast" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +/* > +** test_ldexpf16: > +** ... > +** ptrue p[0-7]\.b, vl2 It would be more robust to capture the register using: ** ptrue (p[0-7])\.b, vl2 > +** ... > +** fscale z[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h and then match it here using: ** fscale z[0-9]+\.h, \1/m, z[0-9]+\.h, z[0-9]+\.h Same for the other tests. OK with those changes if they work (no need for another review unless you'd prefer one). Thanks, Richard > +** ret > +*/ > +_Float16 > +test_ldexpf16 (_Float16 x, int i) > +{ > + return __builtin_ldexpf16 (x, i); > +} > + > +/* > +** test_ldexpf: > +** ... > +** ptrue p[0-7]\.b, vl4 > +** ... > +** fscale z[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s > +** ret > +*/ > +float > +test_ldexpf (float x, int i) > +{ > + return __builtin_ldexpf (x, i); > +} > + > +/* > +** test_ldexp: > +** ... > +** ptrue p[0-7]\.b, vl8 > +** ... > +** fscale z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d > +** ret > +*/ > +double > +test_ldexp (double x, int i) > +{ > + return __builtin_ldexp (x, i); > +} > +