https://gcc.gnu.org/g:294e5424f294cdbd79aa5dcd755f4c825fa814c9
commit r15-6228-g294e5424f294cdbd79aa5dcd755f4c825fa814c9 Author: Christophe Lyon <christophe.l...@linaro.org> Date: Fri Oct 11 15:37:03 2024 +0000 arm: [MVE intrinsics] rework vstr_scatter_shifted_offset Implement vstr?q_scatter_shifted_offset intrinsics using the MVE builtins framework. We use the same approach as the previous patch, and we now have four sets of patterns: - vector scatter stores with shifted offset (non-truncating) - predicated vector scatter stores with shifted offset (non-truncating) - truncating vector scatter stores with shifted offset - predicated truncating vector scatter stores with shifted offset Note that the truncating patterns do not use an iterator since there is only one such variant: V4SI to V4HI. We need to introduce new iterators: - MVE_VLD_ST_scatter_shifted, same as MVE_VLD_ST_scatter without V16QI - MVE_scatter_shift to map the mode to the shift amount gcc/ChangeLog: * config/arm/arm-builtins.cc (arm_strss_qualifiers) (arm_strsu_qualifiers, arm_strsu_p_qualifiers) (arm_strss_p_qualifiers): Delete. * config/arm/arm-mve-builtins-base.cc (class vstrq_scatter_impl): Add support for shifted version. (vstrdq_scatter_shifted, vstrhq_scatter_shifted) (vstrwq_scatter_shifted): New. * config/arm/arm-mve-builtins-base.def (vstrhq_scatter_shifted) (vstrwq_scatter_shifted, vstrdq_scatter_shifted): New. * config/arm/arm-mve-builtins-base.h (vstrhq_scatter_shifted) (vstrwq_scatter_shifted, vstrdq_scatter_shifted): New. * config/arm/arm_mve.h (vstrhq_scatter_shifted_offset): Delete. (vstrhq_scatter_shifted_offset_p): Delete. (vstrdq_scatter_shifted_offset_p): Delete. (vstrdq_scatter_shifted_offset): Delete. (vstrwq_scatter_shifted_offset_p): Delete. (vstrwq_scatter_shifted_offset): Delete. (vstrhq_scatter_shifted_offset_s32): Delete. (vstrhq_scatter_shifted_offset_s16): Delete. (vstrhq_scatter_shifted_offset_u32): Delete. (vstrhq_scatter_shifted_offset_u16): Delete. (vstrhq_scatter_shifted_offset_p_s32): Delete. (vstrhq_scatter_shifted_offset_p_s16): Delete. (vstrhq_scatter_shifted_offset_p_u32): Delete. (vstrhq_scatter_shifted_offset_p_u16): Delete. (vstrdq_scatter_shifted_offset_p_s64): Delete. (vstrdq_scatter_shifted_offset_p_u64): Delete. (vstrdq_scatter_shifted_offset_s64): Delete. (vstrdq_scatter_shifted_offset_u64): Delete. (vstrhq_scatter_shifted_offset_f16): Delete. (vstrhq_scatter_shifted_offset_p_f16): Delete. (vstrwq_scatter_shifted_offset_f32): Delete. (vstrwq_scatter_shifted_offset_p_f32): Delete. (vstrwq_scatter_shifted_offset_p_s32): Delete. (vstrwq_scatter_shifted_offset_p_u32): Delete. (vstrwq_scatter_shifted_offset_s32): Delete. (vstrwq_scatter_shifted_offset_u32): Delete. (__arm_vstrhq_scatter_shifted_offset_s32): Delete. (__arm_vstrhq_scatter_shifted_offset_s16): Delete. (__arm_vstrhq_scatter_shifted_offset_u32): Delete. (__arm_vstrhq_scatter_shifted_offset_u16): Delete. (__arm_vstrhq_scatter_shifted_offset_p_s32): Delete. (__arm_vstrhq_scatter_shifted_offset_p_s16): Delete. (__arm_vstrhq_scatter_shifted_offset_p_u32): Delete. (__arm_vstrhq_scatter_shifted_offset_p_u16): Delete. (__arm_vstrdq_scatter_shifted_offset_p_s64): Delete. (__arm_vstrdq_scatter_shifted_offset_p_u64): Delete. (__arm_vstrdq_scatter_shifted_offset_s64): Delete. (__arm_vstrdq_scatter_shifted_offset_u64): Delete. (__arm_vstrwq_scatter_shifted_offset_p_s32): Delete. (__arm_vstrwq_scatter_shifted_offset_p_u32): Delete. (__arm_vstrwq_scatter_shifted_offset_s32): Delete. (__arm_vstrwq_scatter_shifted_offset_u32): Delete. (__arm_vstrhq_scatter_shifted_offset_f16): Delete. (__arm_vstrhq_scatter_shifted_offset_p_f16): Delete. (__arm_vstrwq_scatter_shifted_offset_f32): Delete. (__arm_vstrwq_scatter_shifted_offset_p_f32): Delete. (__arm_vstrhq_scatter_shifted_offset): Delete. (__arm_vstrhq_scatter_shifted_offset_p): Delete. (__arm_vstrdq_scatter_shifted_offset_p): Delete. (__arm_vstrdq_scatter_shifted_offset): Delete. (__arm_vstrwq_scatter_shifted_offset_p): Delete. (__arm_vstrwq_scatter_shifted_offset): Delete. * config/arm/arm_mve_builtins.def (vstrhq_scatter_shifted_offset_p_u) (vstrhq_scatter_shifted_offset_u) (vstrhq_scatter_shifted_offset_p_s) (vstrhq_scatter_shifted_offset_s, vstrdq_scatter_shifted_offset_s) (vstrhq_scatter_shifted_offset_f, vstrwq_scatter_shifted_offset_f) (vstrwq_scatter_shifted_offset_s) (vstrdq_scatter_shifted_offset_p_s) (vstrhq_scatter_shifted_offset_p_f) (vstrwq_scatter_shifted_offset_p_f) (vstrwq_scatter_shifted_offset_p_s) (vstrdq_scatter_shifted_offset_u, vstrwq_scatter_shifted_offset_u) (vstrdq_scatter_shifted_offset_p_u) (vstrwq_scatter_shifted_offset_p_u): Delete. * config/arm/iterators.md (MVE_VLD_ST_scatter_shifted): New. (MVE_scatter_shift): New. (supf): Remove VSTRHQSSO_S, VSTRHQSSO_U, VSTRDQSSO_S, VSTRDQSSO_U, VSTRWQSSO_U, VSTRWQSSO_S. (VSTRHSSOQ, VSTRDSSOQ, VSTRWSSOQ): Delete. * config/arm/mve.md (mve_vstrhq_scatter_shifted_offset_p_<supf><mode>): Delete. (mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn): Delete. (mve_vstrhq_scatter_shifted_offset_<supf><mode>): Delete. (mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn): Delete. (mve_vstrdq_scatter_shifted_offset_p_<supf>v2di): Delete. (mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn): Delete. (mve_vstrdq_scatter_shifted_offset_<supf>v2di): Delete. (mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn): Delete. (mve_vstrhq_scatter_shifted_offset_fv8hf): Delete. (mve_vstrhq_scatter_shifted_offset_fv8hf_insn): Delete. (mve_vstrhq_scatter_shifted_offset_p_fv8hf): Delete. (mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn): Delete. (mve_vstrwq_scatter_shifted_offset_fv4sf): Delete. (mve_vstrwq_scatter_shifted_offset_fv4sf_insn): Delete. (mve_vstrwq_scatter_shifted_offset_p_fv4sf): Delete. (mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn): Delete. (mve_vstrwq_scatter_shifted_offset_p_<supf>v4si): Delete. (mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn): Delete. (mve_vstrwq_scatter_shifted_offset_<supf>v4si): Delete. (mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn): Delete. (@mve_vstrq_scatter_shifted_offset_<mode>): New. (@mve_vstrq_scatter_shifted_offset_p_<mode>): New. (mve_vstrq_truncate_scatter_shifted_offset_v4si): New. (mve_vstrq_truncate_scatter_shifted_offset_p_v4si): New. * config/arm/unspecs.md (VSTRDQSSO_S, VSTRDQSSO_U, VSTRWQSSO_S) (VSTRWQSSO_U, VSTRHQSSO_F, VSTRWQSSO_F, VSTRHQSSO_S, VSTRHQSSO_U): Delete. (VSTRSSOQ, VSTRSSOQ_P, VSTRSSOQ_TRUNC, VSTRSSOQ_TRUNC_P): New. Diff: --- gcc/config/arm/arm-builtins.cc | 24 -- gcc/config/arm/arm-mve-builtins-base.cc | 37 ++- gcc/config/arm/arm-mve-builtins-base.def | 5 + gcc/config/arm/arm-mve-builtins-base.h | 3 + gcc/config/arm/arm_mve.h | 426 ------------------------------- gcc/config/arm/arm_mve_builtins.def | 16 -- gcc/config/arm/iterators.md | 10 +- gcc/config/arm/mve.md | 356 +++++--------------------- gcc/config/arm/unspecs.md | 12 +- 9 files changed, 103 insertions(+), 786 deletions(-) diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index 71b49e453bd2..5c9b1d80747d 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -610,18 +610,6 @@ arm_quadop_unone_unone_unone_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define QUADOP_UNONE_UNONE_UNONE_NONE_PRED_QUALIFIERS \ (arm_quadop_unone_unone_unone_none_pred_qualifiers) -static enum arm_type_qualifiers -arm_strss_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_void, qualifier_pointer, qualifier_unsigned, - qualifier_none}; -#define STRSS_QUALIFIERS (arm_strss_qualifiers) - -static enum arm_type_qualifiers -arm_strsu_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_void, qualifier_pointer, qualifier_unsigned, - qualifier_unsigned}; -#define STRSU_QUALIFIERS (arm_strsu_qualifiers) - static enum arm_type_qualifiers arm_strsbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_unsigned, qualifier_immediate, qualifier_none}; @@ -633,18 +621,6 @@ arm_strsbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] qualifier_unsigned}; #define STRSBU_QUALIFIERS (arm_strsbu_qualifiers) -static enum arm_type_qualifiers -arm_strsu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_void, qualifier_pointer, qualifier_unsigned, - qualifier_unsigned, qualifier_predicate}; -#define STRSU_P_QUALIFIERS (arm_strsu_p_qualifiers) - -static enum arm_type_qualifiers -arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_void, qualifier_pointer, qualifier_unsigned, - qualifier_none, qualifier_predicate}; -#define STRSS_P_QUALIFIERS (arm_strss_p_qualifiers) - static enum arm_type_qualifiers arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_unsigned, qualifier_immediate, diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index 855115c009fb..40ac09af62b7 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -245,6 +245,16 @@ class vstrq_scatter_impl : public store_truncating public: using store_truncating::store_truncating; + CONSTEXPR vstrq_scatter_impl (bool shifted, + scalar_mode to_int_mode, + opt_scalar_mode to_float_mode) + : store_truncating (to_int_mode, to_float_mode), + m_shifted (shifted) + {} + + /* Shifted offset (true) or plain offset (false). */ + bool m_shifted; + rtx expand (function_expander &e) const override { insn_code icode; @@ -255,15 +265,23 @@ public: case PRED_none: icode = (e.vector_mode (0) == memory_mode /* Non-truncating store case. */ - ? code_for_mve_vstrq_scatter_offset (memory_mode) + ? (m_shifted + ? code_for_mve_vstrq_scatter_shifted_offset (memory_mode) + : code_for_mve_vstrq_scatter_offset (memory_mode)) /* Truncating store case. */ - : code_for_mve_vstrq_truncate_scatter_offset (memory_mode)); + : (m_shifted + ? CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_v4si + : code_for_mve_vstrq_truncate_scatter_offset (memory_mode))); break; case PRED_p: icode = (e.vector_mode (0) == memory_mode - ? code_for_mve_vstrq_scatter_offset_p (memory_mode) - : code_for_mve_vstrq_truncate_scatter_offset_p (memory_mode)); + ? (m_shifted + ? code_for_mve_vstrq_scatter_shifted_offset_p (memory_mode) + : code_for_mve_vstrq_scatter_offset_p (memory_mode)) + : (m_shifted + ? CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_p_v4si + : code_for_mve_vstrq_truncate_scatter_offset_p (memory_mode))); break; default: @@ -1241,12 +1259,15 @@ FUNCTION_ONLY_N_NO_F (vsliq, VSLIQ) FUNCTION_ONLY_N_NO_F (vsriq, VSRIQ) FUNCTION (vst1q, vst1_impl,) FUNCTION (vstrbq, vstrq_impl, (QImode, opt_scalar_mode ())) -FUNCTION (vstrbq_scatter, vstrq_scatter_impl, (QImode, opt_scalar_mode ())) -FUNCTION (vstrdq_scatter, vstrq_scatter_impl, (DImode, opt_scalar_mode ())) +FUNCTION (vstrbq_scatter, vstrq_scatter_impl, (false, QImode, opt_scalar_mode ())) +FUNCTION (vstrdq_scatter, vstrq_scatter_impl, (false, DImode, opt_scalar_mode ())) +FUNCTION (vstrdq_scatter_shifted, vstrq_scatter_impl, (true, DImode, opt_scalar_mode ())) FUNCTION (vstrhq, vstrq_impl, (HImode, HFmode)) -FUNCTION (vstrhq_scatter, vstrq_scatter_impl, (HImode, HFmode)) +FUNCTION (vstrhq_scatter, vstrq_scatter_impl, (false, HImode, HFmode)) +FUNCTION (vstrhq_scatter_shifted, vstrq_scatter_impl, (true, HImode, HFmode)) FUNCTION (vstrwq, vstrq_impl, (SImode, SFmode)) -FUNCTION (vstrwq_scatter, vstrq_scatter_impl, (SImode, SFmode)) +FUNCTION (vstrwq_scatter, vstrq_scatter_impl, (false, SImode, SFmode)) +FUNCTION (vstrwq_scatter_shifted, vstrq_scatter_impl, (true, SImode, SFmode)) FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ) FUNCTION (vuninitializedq, vuninitializedq_impl,) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index 30b576f01ed8..d4c28be904ba 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -174,9 +174,12 @@ DEF_MVE_FUNCTION (vstrbq, store, all_integer, p_or_none) DEF_MVE_FUNCTION (vstrbq_scatter, store_scatter_offset, all_integer, p_or_none) DEF_MVE_FUNCTION (vstrhq, store, integer_16_32, p_or_none) DEF_MVE_FUNCTION (vstrhq_scatter, store_scatter_offset, integer_16_32, p_or_none) +DEF_MVE_FUNCTION (vstrhq_scatter_shifted, store_scatter_offset, integer_16_32, p_or_none) DEF_MVE_FUNCTION (vstrwq, store, integer_32, p_or_none) DEF_MVE_FUNCTION (vstrwq_scatter, store_scatter_offset, integer_32, p_or_none) +DEF_MVE_FUNCTION (vstrwq_scatter_shifted, store_scatter_offset, integer_32, p_or_none) DEF_MVE_FUNCTION (vstrdq_scatter, store_scatter_offset, integer_64, p_or_none) +DEF_MVE_FUNCTION (vstrdq_scatter_shifted, store_scatter_offset, integer_64, p_or_none) DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none) DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none) #undef REQUIRES_FLOAT @@ -247,8 +250,10 @@ DEF_MVE_FUNCTION (vrndxq, unary, all_float, mx_or_none) DEF_MVE_FUNCTION (vst1q, store, all_float, p_or_none) DEF_MVE_FUNCTION (vstrhq, store, float_16, p_or_none) DEF_MVE_FUNCTION (vstrhq_scatter, store_scatter_offset, float_16, p_or_none) +DEF_MVE_FUNCTION (vstrhq_scatter_shifted, store_scatter_offset, float_16, p_or_none) DEF_MVE_FUNCTION (vstrwq, store, float_32, p_or_none) DEF_MVE_FUNCTION (vstrwq_scatter, store_scatter_offset, float_32, p_or_none) +DEF_MVE_FUNCTION (vstrwq_scatter_shifted, store_scatter_offset, float_32, p_or_none) DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_float, mx_or_none) DEF_MVE_FUNCTION (vuninitializedq, inherent, all_float, none) #undef REQUIRES_FLOAT diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index 6ff3b149bc65..096e707bb91c 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -208,10 +208,13 @@ extern const function_base *const vst1q; extern const function_base *const vstrbq; extern const function_base *const vstrbq_scatter; extern const function_base *const vstrdq_scatter; +extern const function_base *const vstrdq_scatter_shifted; extern const function_base *const vstrhq; extern const function_base *const vstrhq_scatter; +extern const function_base *const vstrhq_scatter_shifted; extern const function_base *const vstrwq; extern const function_base *const vstrwq_scatter; +extern const function_base *const vstrwq_scatter_shifted; extern const function_base *const vsubq; extern const function_base *const vuninitializedq; diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 7946734651d7..cf2cde49fefe 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -62,14 +62,8 @@ #define vldrwq_gather_offset_z(__base, __offset, __p) __arm_vldrwq_gather_offset_z(__base, __offset, __p) #define vldrwq_gather_shifted_offset(__base, __offset) __arm_vldrwq_gather_shifted_offset(__base, __offset) #define vldrwq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p) -#define vstrhq_scatter_shifted_offset(__base, __offset, __value) __arm_vstrhq_scatter_shifted_offset(__base, __offset, __value) -#define vstrhq_scatter_shifted_offset_p(__base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p(__base, __offset, __value, __p) #define vstrdq_scatter_base_p(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p(__addr, __offset, __value, __p) #define vstrdq_scatter_base(__addr, __offset, __value) __arm_vstrdq_scatter_base(__addr, __offset, __value) -#define vstrdq_scatter_shifted_offset_p(__base, __offset, __value, __p) __arm_vstrdq_scatter_shifted_offset_p(__base, __offset, __value, __p) -#define vstrdq_scatter_shifted_offset(__base, __offset, __value) __arm_vstrdq_scatter_shifted_offset(__base, __offset, __value) -#define vstrwq_scatter_shifted_offset_p(__base, __offset, __value, __p) __arm_vstrwq_scatter_shifted_offset_p(__base, __offset, __value, __p) -#define vstrwq_scatter_shifted_offset(__base, __offset, __value) __arm_vstrwq_scatter_shifted_offset(__base, __offset, __value) #define vuninitializedq(__v) __arm_vuninitializedq(__v) #define vstrdq_scatter_base_wb(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb(__addr, __offset, __value) #define vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p) @@ -157,32 +151,12 @@ #define vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) #define vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) #define vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) -#define vstrhq_scatter_shifted_offset_s32( __base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_s32( __base, __offset, __value) -#define vstrhq_scatter_shifted_offset_s16( __base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_s16( __base, __offset, __value) -#define vstrhq_scatter_shifted_offset_u32( __base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_u32( __base, __offset, __value) -#define vstrhq_scatter_shifted_offset_u16( __base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_u16( __base, __offset, __value) -#define vstrhq_scatter_shifted_offset_p_s32( __base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_s32( __base, __offset, __value, __p) -#define vstrhq_scatter_shifted_offset_p_s16( __base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_s16( __base, __offset, __value, __p) -#define vstrhq_scatter_shifted_offset_p_u32( __base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_u32( __base, __offset, __value, __p) -#define vstrhq_scatter_shifted_offset_p_u16( __base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_u16( __base, __offset, __value, __p) #define vstrdq_scatter_base_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p_s64(__addr, __offset, __value, __p) #define vstrdq_scatter_base_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p_u64(__addr, __offset, __value, __p) #define vstrdq_scatter_base_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_s64(__addr, __offset, __value) #define vstrdq_scatter_base_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_u64(__addr, __offset, __value) -#define vstrdq_scatter_shifted_offset_p_s64(__base, __offset, __value, __p) __arm_vstrdq_scatter_shifted_offset_p_s64(__base, __offset, __value, __p) -#define vstrdq_scatter_shifted_offset_p_u64(__base, __offset, __value, __p) __arm_vstrdq_scatter_shifted_offset_p_u64(__base, __offset, __value, __p) -#define vstrdq_scatter_shifted_offset_s64(__base, __offset, __value) __arm_vstrdq_scatter_shifted_offset_s64(__base, __offset, __value) -#define vstrdq_scatter_shifted_offset_u64(__base, __offset, __value) __arm_vstrdq_scatter_shifted_offset_u64(__base, __offset, __value) -#define vstrhq_scatter_shifted_offset_f16(__base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_f16(__base, __offset, __value) -#define vstrhq_scatter_shifted_offset_p_f16(__base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_f16(__base, __offset, __value, __p) #define vstrwq_scatter_base_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_f32(__addr, __offset, __value) #define vstrwq_scatter_base_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_p_f32(__addr, __offset, __value, __p) -#define vstrwq_scatter_shifted_offset_f32(__base, __offset, __value) __arm_vstrwq_scatter_shifted_offset_f32(__base, __offset, __value) -#define vstrwq_scatter_shifted_offset_p_f32(__base, __offset, __value, __p) __arm_vstrwq_scatter_shifted_offset_p_f32(__base, __offset, __value, __p) -#define vstrwq_scatter_shifted_offset_p_s32(__base, __offset, __value, __p) __arm_vstrwq_scatter_shifted_offset_p_s32(__base, __offset, __value, __p) -#define vstrwq_scatter_shifted_offset_p_u32(__base, __offset, __value, __p) __arm_vstrwq_scatter_shifted_offset_p_u32(__base, __offset, __value, __p) -#define vstrwq_scatter_shifted_offset_s32(__base, __offset, __value) __arm_vstrwq_scatter_shifted_offset_s32(__base, __offset, __value) -#define vstrwq_scatter_shifted_offset_u32(__base, __offset, __value) __arm_vstrwq_scatter_shifted_offset_u32(__base, __offset, __value) #define vuninitializedq_u8(void) __arm_vuninitializedq_u8(void) #define vuninitializedq_u16(void) __arm_vuninitializedq_u16(void) #define vuninitializedq_u32(void) __arm_vuninitializedq_u32(void) @@ -741,62 +715,6 @@ __arm_vldrwq_gather_shifted_offset_z_u32 (uint32_t const * __base, uint32x4_t __ return __builtin_mve_vldrwq_gather_shifted_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_s32 (int16_t * __base, uint32x4_t __offset, int32x4_t __value) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_sv4si ((__builtin_neon_hi *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_s16 (int16_t * __base, uint16x8_t __offset, int16x8_t __value) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_sv8hi ((__builtin_neon_hi *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_u32 (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_uv4si ((__builtin_neon_hi *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_u16 (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_uv8hi ((__builtin_neon_hi *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p_s32 (int16_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_p_sv4si ((__builtin_neon_hi *) __base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p_s16 (int16_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_p_sv8hi ((__builtin_neon_hi *) __base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p_u32 (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_p_uv4si ((__builtin_neon_hi *) __base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p_u16 (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_p_uv8hi ((__builtin_neon_hi *) __base, __offset, __value, __p); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrdq_scatter_base_p_s64 (uint64x2_t __addr, const int __offset, int64x2_t __value, mve_pred16_t __p) @@ -825,62 +743,6 @@ __arm_vstrdq_scatter_base_u64 (uint64x2_t __addr, const int __offset, uint64x2_t __builtin_mve_vstrdq_scatter_base_uv2di (__addr, __offset, __value); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrdq_scatter_shifted_offset_p_s64 (int64_t * __base, uint64x2_t __offset, int64x2_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrdq_scatter_shifted_offset_p_sv2di ((__builtin_neon_di *) __base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrdq_scatter_shifted_offset_p_u64 (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrdq_scatter_shifted_offset_p_uv2di ((__builtin_neon_di *) __base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrdq_scatter_shifted_offset_s64 (int64_t * __base, uint64x2_t __offset, int64x2_t __value) -{ - __builtin_mve_vstrdq_scatter_shifted_offset_sv2di ((__builtin_neon_di *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrdq_scatter_shifted_offset_u64 (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value) -{ - __builtin_mve_vstrdq_scatter_shifted_offset_uv2di ((__builtin_neon_di *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_p_s32 (int32_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrwq_scatter_shifted_offset_p_sv4si ((__builtin_neon_si *) __base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_p_u32 (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrwq_scatter_shifted_offset_p_uv4si ((__builtin_neon_si *) __base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_s32 (int32_t * __base, uint32x4_t __offset, int32x4_t __value) -{ - __builtin_mve_vstrwq_scatter_shifted_offset_sv4si ((__builtin_neon_si *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_u32 (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value) -{ - __builtin_mve_vstrwq_scatter_shifted_offset_uv4si ((__builtin_neon_si *) __base, __offset, __value); -} - __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset) @@ -1517,20 +1379,6 @@ __arm_vldrwq_gather_shifted_offset_z_f32 (float32_t const * __base, uint32x4_t _ return __builtin_mve_vldrwq_gather_shifted_offset_z_fv4sf ((__builtin_neon_si *) __base, __offset, __p); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_f16 (float16_t * __base, uint16x8_t __offset, float16x8_t __value) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_fv8hf ((__builtin_neon_hi *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p_f16 (float16_t * __base, uint16x8_t __offset, float16x8_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrhq_scatter_shifted_offset_p_fv8hf ((__builtin_neon_hi *) __base, __offset, __value, __p); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrwq_scatter_base_f32 (uint32x4_t __addr, const int __offset, float32x4_t __value) @@ -1545,20 +1393,6 @@ __arm_vstrwq_scatter_base_p_f32 (uint32x4_t __addr, const int __offset, float32x __builtin_mve_vstrwq_scatter_base_p_fv4sf (__addr, __offset, __value, __p); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_f32 (float32_t * __base, uint32x4_t __offset, float32x4_t __value) -{ - __builtin_mve_vstrwq_scatter_shifted_offset_fv4sf ((__builtin_neon_si *) __base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_p_f32 (float32_t * __base, uint32x4_t __offset, float32x4_t __value, mve_pred16_t __p) -{ - __builtin_mve_vstrwq_scatter_shifted_offset_p_fv4sf ((__builtin_neon_si *) __base, __offset, __value, __p); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset) @@ -2061,62 +1895,6 @@ __arm_vldrwq_gather_shifted_offset_z (uint32_t const * __base, uint32x4_t __offs return __arm_vldrwq_gather_shifted_offset_z_u32 (__base, __offset, __p); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset (int16_t * __base, uint32x4_t __offset, int32x4_t __value) -{ - __arm_vstrhq_scatter_shifted_offset_s32 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset (int16_t * __base, uint16x8_t __offset, int16x8_t __value) -{ - __arm_vstrhq_scatter_shifted_offset_s16 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value) -{ - __arm_vstrhq_scatter_shifted_offset_u32 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value) -{ - __arm_vstrhq_scatter_shifted_offset_u16 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p (int16_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p) -{ - __arm_vstrhq_scatter_shifted_offset_p_s32 (__base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p (int16_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p) -{ - __arm_vstrhq_scatter_shifted_offset_p_s16 (__base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p) -{ - __arm_vstrhq_scatter_shifted_offset_p_u32 (__base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p) -{ - __arm_vstrhq_scatter_shifted_offset_p_u16 (__base, __offset, __value, __p); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrdq_scatter_base_p (uint64x2_t __addr, const int __offset, int64x2_t __value, mve_pred16_t __p) @@ -2145,62 +1923,6 @@ __arm_vstrdq_scatter_base (uint64x2_t __addr, const int __offset, uint64x2_t __v __arm_vstrdq_scatter_base_u64 (__addr, __offset, __value); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrdq_scatter_shifted_offset_p (int64_t * __base, uint64x2_t __offset, int64x2_t __value, mve_pred16_t __p) -{ - __arm_vstrdq_scatter_shifted_offset_p_s64 (__base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrdq_scatter_shifted_offset_p (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value, mve_pred16_t __p) -{ - __arm_vstrdq_scatter_shifted_offset_p_u64 (__base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrdq_scatter_shifted_offset (int64_t * __base, uint64x2_t __offset, int64x2_t __value) -{ - __arm_vstrdq_scatter_shifted_offset_s64 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrdq_scatter_shifted_offset (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value) -{ - __arm_vstrdq_scatter_shifted_offset_u64 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_p (int32_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p) -{ - __arm_vstrwq_scatter_shifted_offset_p_s32 (__base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_p (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p) -{ - __arm_vstrwq_scatter_shifted_offset_p_u32 (__base, __offset, __value, __p); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset (int32_t * __base, uint32x4_t __offset, int32x4_t __value) -{ - __arm_vstrwq_scatter_shifted_offset_s32 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value) -{ - __arm_vstrwq_scatter_shifted_offset_u32 (__base, __offset, __value); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrdq_scatter_base_wb (uint64x2_t * __addr, const int __offset, int64x2_t __value) @@ -2567,20 +2289,6 @@ __arm_vldrwq_gather_shifted_offset_z (float32_t const * __base, uint32x4_t __off return __arm_vldrwq_gather_shifted_offset_z_f32 (__base, __offset, __p); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset (float16_t * __base, uint16x8_t __offset, float16x8_t __value) -{ - __arm_vstrhq_scatter_shifted_offset_f16 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrhq_scatter_shifted_offset_p (float16_t * __base, uint16x8_t __offset, float16x8_t __value, mve_pred16_t __p) -{ - __arm_vstrhq_scatter_shifted_offset_p_f16 (__base, __offset, __value, __p); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrwq_scatter_base (uint32x4_t __addr, const int __offset, float32x4_t __value) @@ -2595,20 +2303,6 @@ __arm_vstrwq_scatter_base_p (uint32x4_t __addr, const int __offset, float32x4_t __arm_vstrwq_scatter_base_p_f32 (__addr, __offset, __value, __p); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset (float32_t * __base, uint32x4_t __offset, float32x4_t __value) -{ - __arm_vstrwq_scatter_shifted_offset_f32 (__base, __offset, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vstrwq_scatter_shifted_offset_p (float32_t * __base, uint32x4_t __offset, float32x4_t __value, mve_pred16_t __p) -{ - __arm_vstrwq_scatter_shifted_offset_p_f32 (__base, __offset, __value, __p); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, float32x4_t __value) @@ -3105,42 +2799,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x2_t]: __arm_vst2q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x2_t)), \ int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x2_t]: __arm_vst2q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x2_t)));}) -#define __arm_vstrhq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ - int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3));}) - -#define __arm_vstrhq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)), \ - int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_shifted_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t)));}) - -#define __arm_vstrhq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)), \ - int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_shifted_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t)));}) - -#define __arm_vstrhq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ - int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3));}) - #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ @@ -3153,34 +2811,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32(p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \ int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_p_f32(p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));}) -#define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \ - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \ - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_shifted_offset_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t)));}) - -#define __arm_vstrwq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \ - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));}) - -#define __arm_vstrwq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \ - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));}) - -#define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \ - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \ - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_shifted_offset_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t)));}) - #define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \ @@ -3331,22 +2961,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x2_t]: __arm_vst2q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x2_t)));}) -#define __arm_vstrhq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define __arm_vstrhq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));}) - #define __arm_vstrdq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \ @@ -3357,34 +2971,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \ int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));}) -#define __arm_vstrhq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));}) - -#define __arm_vstrhq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \ - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)));}) - -#define __arm_vstrwq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - #define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \ @@ -3493,18 +3079,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \ int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));}) -#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce_s64_ptr(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \ - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce_u64_ptr(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));}) - -#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce_s64_ptr(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \ - int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce_u64_ptr(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));}) - #endif /* __cplusplus */ #endif /* __ARM_FEATURE_MVE */ #endif /* _GCC_ARM_MVE_H. */ diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 167d272916c6..dc0618d56929 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -713,28 +713,12 @@ VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di) VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di) VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si) VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si) -VAR2 (STRSU_P, vstrhq_scatter_shifted_offset_p_u, v8hi, v4si) -VAR2 (STRSU, vstrhq_scatter_shifted_offset_u, v8hi, v4si) -VAR2 (STRSS_P, vstrhq_scatter_shifted_offset_p_s, v8hi, v4si) -VAR2 (STRSS, vstrhq_scatter_shifted_offset_s, v8hi, v4si) VAR1 (STRSBS, vstrdq_scatter_base_s, v2di) VAR1 (STRSBS, vstrwq_scatter_base_f, v4sf) VAR1 (STRSBS_P, vstrdq_scatter_base_p_s, v2di) VAR1 (STRSBS_P, vstrwq_scatter_base_p_f, v4sf) VAR1 (STRSBU, vstrdq_scatter_base_u, v2di) VAR1 (STRSBU_P, vstrdq_scatter_base_p_u, v2di) -VAR1 (STRSS, vstrdq_scatter_shifted_offset_s, v2di) -VAR1 (STRSS, vstrhq_scatter_shifted_offset_f, v8hf) -VAR1 (STRSS, vstrwq_scatter_shifted_offset_f, v4sf) -VAR1 (STRSS, vstrwq_scatter_shifted_offset_s, v4si) -VAR1 (STRSS_P, vstrdq_scatter_shifted_offset_p_s, v2di) -VAR1 (STRSS_P, vstrhq_scatter_shifted_offset_p_f, v8hf) -VAR1 (STRSS_P, vstrwq_scatter_shifted_offset_p_f, v4sf) -VAR1 (STRSS_P, vstrwq_scatter_shifted_offset_p_s, v4si) -VAR1 (STRSU, vstrdq_scatter_shifted_offset_u, v2di) -VAR1 (STRSU, vstrwq_scatter_shifted_offset_u, v4si) -VAR1 (STRSU_P, vstrdq_scatter_shifted_offset_p_u, v2di) -VAR1 (STRSU_P, vstrwq_scatter_shifted_offset_p_u, v4si) VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si) VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di) VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index f046225584a8..814f25cb6d3a 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -274,6 +274,7 @@ (define_mode_iterator MVE_vecs [V16QI V8HI V4SI V2DI V8HF V4SF V2DF]) (define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF]) (define_mode_iterator MVE_VLD_ST_scatter [V16QI V8HI V4SI V8HF V4SF V2DI]) +(define_mode_iterator MVE_VLD_ST_scatter_shifted [V8HI V4SI V8HF V4SF V2DI]) (define_mode_iterator MVE_0 [V8HF V4SF]) (define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI]) (define_mode_iterator MVE_3 [V16QI V8HI]) @@ -292,6 +293,7 @@ (define_mode_attr MVE_wide_n_sz_elem [(V8QI "16") (V4QI "32") (V4HI "32")]) (define_mode_attr MVE_wide_n_VPRED [(V8QI "V8BI") (V4QI "V4BI") (V4HI "V4BI")]) (define_mode_attr MVE_scatter_offset [(V16QI "V16QI") (V8HI "V8HI") (V4SI "V4SI") (V8HF "V8HI") (V4SF "V4SI") (V2DI "V2DI")]) +(define_mode_attr MVE_scatter_shift [(V8HI "1") (V4SI "2") (V8HF "1") (V4SF "2") (V2DI "3")]) ;;---------------------------------------------------------------------------- ;; Code iterators @@ -2532,11 +2534,8 @@ (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s") (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u") (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") - (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRDQSB_S "s") (VSTRDQSB_U "u") - (VSTRDQSSO_S "s") (VSTRDQSSO_U "u") - (VSTRWQSSO_U "u") - (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u") + (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u") (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s") (VSTRDQSBWB_U "u") (VSBCQ_U "u") (VSBCQ_M_U "u") @@ -2947,10 +2946,7 @@ (define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U]) (define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U]) (define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U]) -(define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U]) (define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U]) -(define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U]) -(define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U]) (define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U]) (define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) (define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index b2fb2d878c49..ea85804e7393 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -4072,68 +4072,80 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_<supf>v4si")) (set_attr "length" "8")]) +;; Vector scatter stores with shifted offset ;; -;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u] +;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u] +;; [vstrhq_scatter_shifted_offset_f] +;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u] +;; [vstrwq_scatter_shifted_offset_f] +;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u] +(define_insn "@mve_vstrq_scatter_shifted_offset_<mode>" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w") + (match_operand:MVE_VLD_ST_scatter_shifted 2 "s_register_operand" "w")] + VSTRSSOQ))] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vstr<MVE_elem_ch>.<V_sz_elem>\t%q2, [%0, %q1, uxtw #<MVE_scatter_shift>]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_shifted_offset_<mode>")) + (set_attr "length" "4")]) + +;; Truncating vector scatter stores with shifted offset ;; -(define_expand "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>" - [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") - (match_operand:MVE_5 1 "s_register_operand") - (match_operand:MVE_5 2 "s_register_operand") - (match_operand:<MVE_VPRED> 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] +;; [vstrhq_scatter_shifted_offset_s32 vstrhq_scatter_shifted_offset_u32] +(define_insn "mve_vstrq_truncate_scatter_shifted_offset_v4si" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (truncate:V4HI + (match_operand:V4SI 2 "s_register_operand" "w"))] + VSTRSSOQ_TRUNC))] "TARGET_HAVE_MVE" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn ( - gen_mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn (ind, operands[1], - operands[2], - operands[3])); - DONE; -}) + "vstrh.32\t%q2, [%0, %q1, uxtw #1]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_v4si")) + (set_attr "length" "4")]) -(define_insn "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn" +;; Predicated vector scatter stores with shifted offset +;; +;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u] +;; [vstrhq_scatter_shifted_offset_p_f] +;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u] +;; [vstrwq_scatter_shifted_offset_p_f] +;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u] +;; +(define_insn "@mve_vstrq_scatter_shifted_offset_p_<mode>" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:SI 0 "register_operand" "r") - (match_operand:MVE_5 1 "s_register_operand" "w") - (match_operand:MVE_5 2 "s_register_operand" "w") + (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w") + (match_operand:MVE_VLD_ST_scatter_shifted 2 "s_register_operand" "w") (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] - VSTRHSSOQ))] - "TARGET_HAVE_MVE" - "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn")) + VSTRSSOQ_P))] + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vpst\;vstr<MVE_elem_ch>t.<V_sz_elem>\t%q2, [%0, %q1, uxtw #<MVE_scatter_shift>]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_shifted_offset_<mode>")) (set_attr "length" "8")]) +;; Predicated truncating vector scatter stores with shifted offset ;; -;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u] -;; -(define_expand "mve_vstrhq_scatter_shifted_offset_<supf><mode>" - [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") - (match_operand:MVE_5 1 "s_register_operand") - (match_operand:MVE_5 2 "s_register_operand") - (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] - "TARGET_HAVE_MVE" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn ( - gen_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn (ind, operands[1], - operands[2])); - DONE; -}) - -(define_insn "mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn" +;; [vstrhq_scatter_shifted_offset_p_s32 vstrhq_scatter_shifted_offset_p_u32] +(define_insn "mve_vstrq_truncate_scatter_shifted_offset_p_v4si" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:SI 0 "register_operand" "r") - (match_operand:MVE_5 1 "s_register_operand" "w") - (match_operand:MVE_5 2 "s_register_operand" "w")] - VSTRHSSOQ))] + (match_operand:V4SI 1 "s_register_operand" "w") + (truncate:V4HI + (match_operand:V4SI 2 "s_register_operand" "w")) + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] + VSTRSSOQ_TRUNC_P))] "TARGET_HAVE_MVE" - "vstrh.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn")) - (set_attr "length" "4")]) + "vpst\;vstrht.32\t%q2, [%0, %q1, uxtw #1]" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_v4si")) + (set_attr "length" "8")]) ;; ;; [vstrdq_scatter_base_p_s vstrdq_scatter_base_p_u] @@ -4182,131 +4194,6 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di")) (set_attr "length" "4")]) -;; -;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u] -;; -(define_expand "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di" - [(match_operand:V2DI 0 "mve_scatter_memory") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:V2DI 2 "s_register_operand") - (match_operand:V2QI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] - "TARGET_HAVE_MVE" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn ( - gen_mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn (ind, operands[1], - operands[2], - operands[3])); - DONE; -}) - -(define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn" - [(set (mem:BLK (scratch)) - (unspec:BLK - [(match_operand:SI 0 "register_operand" "r") - (match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:V2QI 3 "vpr_register_operand" "Up")] - VSTRDSSOQ))] - "TARGET_HAVE_MVE" - "vpst\;vstrdt.64\t%q2, [%0, %q1, uxtw #3]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn")) - (set_attr "length" "8")]) - -;; -;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u] -;; -(define_expand "mve_vstrdq_scatter_shifted_offset_<supf>v2di" - [(match_operand:V2DI 0 "mve_scatter_memory") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:V2DI 2 "s_register_operand") - (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] - "TARGET_HAVE_MVE" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn ( - gen_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn (ind, operands[1], - operands[2])); - DONE; -}) - -(define_insn "mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn" - [(set (mem:BLK (scratch)) - (unspec:BLK - [(match_operand:SI 0 "register_operand" "r") - (match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w")] - VSTRDSSOQ))] - "TARGET_HAVE_MVE" - "vstrd.64\t%q2, [%0, %q1, uxtw #3]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn")) - (set_attr "length" "4")]) - -;; -;; [vstrhq_scatter_shifted_offset_f] -;; -(define_expand "mve_vstrhq_scatter_shifted_offset_fv8hf" - [(match_operand:V8HI 0 "memory_operand" "=Us") - (match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w") - (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn (gen_mve_vstrhq_scatter_shifted_offset_fv8hf_insn (ind, operands[1], - operands[2])); - DONE; -}) - -(define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf_insn" - [(set (mem:BLK (scratch)) - (unspec:BLK - [(match_operand:SI 0 "register_operand" "r") - (match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w")] - VSTRHQSSO_F))] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vstrh.16\t%q2, [%0, %q1, uxtw #1]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_fv8hf_insn")) - (set_attr "length" "4")]) - -;; -;; [vstrhq_scatter_shifted_offset_p_f] -;; -(define_expand "mve_vstrhq_scatter_shifted_offset_p_fv8hf" - [(match_operand:V8HI 0 "memory_operand" "=Us") - (match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:V8BI 3 "vpr_register_operand" "Up") - (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn ( - gen_mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn (ind, operands[1], - operands[2], - operands[3])); - DONE; -}) - -(define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn" - [(set (mem:BLK (scratch)) - (unspec:BLK - [(match_operand:SI 0 "register_operand" "r") - (match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:V8BI 3 "vpr_register_operand" "Up")] - VSTRHQSSO_F))] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vpst\;vstrht.16\t%q2, [%0, %q1, uxtw #1]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_fv8hf_insn")) - (set_attr "length" "8")]) - ;; ;; [vstrwq_scatter_base_f] ;; @@ -4354,131 +4241,6 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf")) (set_attr "length" "8")]) -;; -;; [vstrwq_scatter_shifted_offset_f] -;; -(define_expand "mve_vstrwq_scatter_shifted_offset_fv4sf" - [(match_operand:V4SI 0 "mve_scatter_memory") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:V4SF 2 "s_register_operand") - (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn (gen_mve_vstrwq_scatter_shifted_offset_fv4sf_insn (ind, operands[1], - operands[2])); - DONE; -}) - -(define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf_insn" - [(set (mem:BLK (scratch)) - (unspec:BLK - [(match_operand:SI 0 "register_operand" "r") - (match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w")] - VSTRWQSSO_F))] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vstrw.32\t%q2, [%0, %q1, uxtw #2]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_fv4sf_insn")) - (set_attr "length" "8")]) - -;; -;; [vstrwq_scatter_shifted_offset_p_f] -;; -(define_expand "mve_vstrwq_scatter_shifted_offset_p_fv4sf" - [(match_operand:V4SI 0 "mve_scatter_memory") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:V4SF 2 "s_register_operand") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn ( - gen_mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn (ind, operands[1], - operands[2], - operands[3])); - DONE; -}) - -(define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn" - [(set (mem:BLK (scratch)) - (unspec:BLK - [(match_operand:SI 0 "register_operand" "r") - (match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:V4BI 3 "vpr_register_operand" "Up")] - VSTRWQSSO_F))] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_fv4sf_insn")) - (set_attr "length" "8")]) - -;; -;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u] -;; -(define_expand "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si" - [(match_operand:V4SI 0 "mve_scatter_memory") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:V4SI 2 "s_register_operand") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] - "TARGET_HAVE_MVE" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn ( - gen_mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn (ind, operands[1], - operands[2], - operands[3])); - DONE; -}) - -(define_insn "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn" - [(set (mem:BLK (scratch)) - (unspec:BLK - [(match_operand:SI 0 "register_operand" "r") - (match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:V4BI 3 "vpr_register_operand" "Up")] - VSTRWSSOQ))] - "TARGET_HAVE_MVE" - "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn")) - (set_attr "length" "8")]) - -;; -;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u] -;; -(define_expand "mve_vstrwq_scatter_shifted_offset_<supf>v4si" - [(match_operand:V4SI 0 "mve_scatter_memory") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:V4SI 2 "s_register_operand") - (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] - "TARGET_HAVE_MVE" -{ - rtx ind = XEXP (operands[0], 0); - gcc_assert (REG_P (ind)); - emit_insn ( - gen_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn (ind, operands[1], - operands[2])); - DONE; -}) - -(define_insn "mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn" - [(set (mem:BLK (scratch)) - (unspec:BLK - [(match_operand:SI 0 "register_operand" "r") - (match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w")] - VSTRWSSOQ))] - "TARGET_HAVE_MVE" - "vstrw.32\t%q2, [%0, %q1, uxtw #2]" - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn")) - (set_attr "length" "4")]) - ;; ;; ;; [vddupq_u_insn, vidupq_u_insn] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 5315db580f3d..11d85273b567 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -1185,13 +1185,11 @@ VSTRQ_TRUNC_P VSTRDQSB_S VSTRDQSB_U - VSTRDQSSO_S - VSTRDQSSO_U - VSTRWQSSO_S - VSTRWQSSO_U - VSTRHQSSO_F + VSTRSSOQ + VSTRSSOQ_P + VSTRSSOQ_TRUNC + VSTRSSOQ_TRUNC_P VSTRWQSB_F - VSTRWQSSO_F VDDUPQ VDDUPQ_M VDWDUPQ @@ -1231,8 +1229,6 @@ VST2Q VSHLCQ_M_U VSHLCQ_M_S - VSTRHQSSO_S - VSTRHQSSO_U VSTRHQ_S SRSHRL SRSHR