Introduce support for a new set of NEON square-root intrinsics for half, single, and double precision.
modified: gcc/config/arm/arm-builtins.cc 1. Define the df_UP macro to map to E_DFmode. 2. Add CODE_FOR_neon_vsqrtsf and CODE_FOR_neon_vsqrtdf constants that reference the underlying VFP sqrt RTL patterns (sqrtsf2 and sqrtdf2). modified: gcc/config/arm/arm_vfp_builtins.def 1. Replace the single-mode entry for vsqrt with a unified VAR3 entry that supports hf, sf, and df modes. These modifications enable the use of __builtin_neon_vsqrt{hf,sf,df} in user code and ensure the correct mode is selected for each precision variant. Signed-off-by: Ayan Shafqat <ayan.x.shaf...@gmail.com> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- gcc/config/arm/arm-builtins.cc | 3 +++ gcc/config/arm/arm_vfp_builtins.def | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index c56ab5db985..acc86c7e8a1 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -694,6 +694,7 @@ arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define hi_UP E_HImode #define void_UP E_VOIDmode #define sf_UP E_SFmode +#define df_UP E_DFmode #define UP(X) X##_UP typedef struct { @@ -710,6 +711,8 @@ constexpr insn_code CODE_FOR_neon_usdotv8qi = CODE_FOR_neon_usdotv2siv8qi; constexpr insn_code CODE_FOR_neon_sdotv16qi = CODE_FOR_neon_sdotv4siv16qi; constexpr insn_code CODE_FOR_neon_udotv16qi = CODE_FOR_neon_udotv4siv16qi; constexpr insn_code CODE_FOR_neon_usdotv16qi = CODE_FOR_neon_usdotv4siv16qi; +constexpr insn_code CODE_FOR_neon_vsqrtsf = CODE_FOR_sqrtsf2; +constexpr insn_code CODE_FOR_neon_vsqrtdf = CODE_FOR_sqrtdf2; #define CF(N,X) CODE_FOR_neon_##N##X diff --git a/gcc/config/arm/arm_vfp_builtins.def b/gcc/config/arm/arm_vfp_builtins.def index 1fbf71e728e..8cafd72b565 100644 --- a/gcc/config/arm/arm_vfp_builtins.def +++ b/gcc/config/arm/arm_vfp_builtins.def @@ -40,7 +40,7 @@ VAR1 (UNOP, vrndm, hf) VAR1 (UNOP, vrndn, hf) VAR1 (UNOP, vrndp, hf) VAR1 (UNOP, vrndx, hf) -VAR1 (UNOP, vsqrt, hf) +VAR3 (UNOP, vsqrt, hf, sf, df) VAR2 (BINOP, vcvths_n, hf, si) VAR2 (BINOP, vcvthu_n, hf, si) -- 2.43.0