https://gcc.gnu.org/g:1775a7280a230776927897147f1b07964cf5cfc7
commit r15-7016-g1775a7280a230776927897147f1b07964cf5cfc7 Author: Tamar Christina <tamar.christ...@arm.com> Date: Sat Jan 18 11:12:38 2025 +0000 Revert "AArch64: Use standard names for saturating arithmetic" This reverts commit 5f5833a4107ddfbcd87651bf140151de043f4c36. Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 12 - gcc/config/aarch64/aarch64-simd-builtins.def | 8 +- gcc/config/aarch64/aarch64-simd.md | 207 +--------------- gcc/config/aarch64/arm_neon.h | 96 ++++---- gcc/config/aarch64/iterators.md | 4 - .../saturating_arithmetic_autovect.inc | 58 ----- .../saturating_arithmetic_autovect_1.c | 79 ------ .../saturating_arithmetic_autovect_2.c | 79 ------ .../saturating_arithmetic_autovect_3.c | 75 ------ .../saturating_arithmetic_autovect_4.c | 77 ------ .../aarch64/saturating-arithmetic-signed.c | 270 --------------------- .../gcc.target/aarch64/saturating_arithmetic.inc | 39 --- .../gcc.target/aarch64/saturating_arithmetic_1.c | 36 --- .../gcc.target/aarch64/saturating_arithmetic_2.c | 36 --- .../gcc.target/aarch64/saturating_arithmetic_3.c | 30 --- .../gcc.target/aarch64/saturating_arithmetic_4.c | 30 --- .../gcc.target/aarch64/scalar_intrinsics.c | 32 +-- 17 files changed, 72 insertions(+), 1096 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 6d5479c2e449..86eebc168859 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -5039,18 +5039,6 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, new_stmt = gimple_build_assign (gimple_call_lhs (stmt), LSHIFT_EXPR, args[0], args[1]); break; - /* lower saturating add/sub neon builtins to gimple. */ - BUILTIN_VSDQ_I (BINOP, ssadd, 3, DEFAULT) - BUILTIN_VSDQ_I (BINOPU, usadd, 3, DEFAULT) - new_stmt = gimple_build_call_internal (IFN_SAT_ADD, 2, args[0], args[1]); - gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); - break; - BUILTIN_VSDQ_I (BINOP, sssub, 3, DEFAULT) - BUILTIN_VSDQ_I (BINOPU, ussub, 3, DEFAULT) - new_stmt = gimple_build_call_internal (IFN_SAT_SUB, 2, args[0], args[1]); - gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); - break; - BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, DEFAULT) BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, DEFAULT) { diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 6cc45b18a723..286272a33118 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -71,10 +71,10 @@ BUILTIN_VSDQ_I (BINOP, sqrshl, 0, DEFAULT) BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0, DEFAULT) /* Implemented by aarch64_<su_optab><optab><mode>. */ - BUILTIN_VSDQ_I (BINOP, ssadd, 3, DEFAULT) - BUILTIN_VSDQ_I (BINOPU, usadd, 3, DEFAULT) - BUILTIN_VSDQ_I (BINOP, sssub, 3, DEFAULT) - BUILTIN_VSDQ_I (BINOPU, ussub, 3, DEFAULT) + BUILTIN_VSDQ_I (BINOP, sqadd, 0, DEFAULT) + BUILTIN_VSDQ_I (BINOPU, uqadd, 0, DEFAULT) + BUILTIN_VSDQ_I (BINOP, sqsub, 0, DEFAULT) + BUILTIN_VSDQ_I (BINOPU, uqsub, 0, DEFAULT) /* Implemented by aarch64_<sur>qadd<mode>. */ BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0, DEFAULT) BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, DEFAULT) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e2afe87e5130..eeb626f129a8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5162,214 +5162,15 @@ ) ;; <su>q<addsub> -(define_insn "<su_optab>s<addsub><mode>3<vczle><vczbe>" - [(set (match_operand:VSDQ_I_QI_HI 0 "register_operand" "=w") - (BINQOPS:VSDQ_I_QI_HI - (match_operand:VSDQ_I_QI_HI 1 "register_operand" "w") - (match_operand:VSDQ_I_QI_HI 2 "register_operand" "w")))] +(define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") + (match_operand:VSDQ_I 2 "register_operand" "w")))] "TARGET_SIMD" "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" [(set_attr "type" "neon_q<addsub><q>")] ) -(define_expand "<su_optab>s<addsub><mode>3" - [(parallel - [(set (match_operand:GPI 0 "register_operand") - (SBINQOPS:GPI (match_operand:GPI 1 "register_operand") - (match_operand:GPI 2 "aarch64_plus_operand"))) - (clobber (scratch:GPI)) - (clobber (reg:CC CC_REGNUM))])] -) - -;; Introducing a temporary GP reg allows signed saturating arithmetic with GPR -;; operands to be calculated without the use of costly transfers to and from FP -;; registers. For example, saturating addition usually uses three FMOVs: -;; -;; fmov d0, x0 -;; fmov d1, x1 -;; sqadd d0, d0, d1 -;; fmov x0, d0 -;; -;; Using a temporary register results in three cheaper instructions being used -;; in place of the three FMOVs, which calculate the saturating limit accounting -;; for the signedness of operand2: -;; -;; asr x2, x1, 63 -;; adds x0, x0, x1 -;; eor x2, x2, 0x8000000000000000 -;; csinv x0, x0, x2, vc -;; -;; If operand2 is a constant value, the temporary register can be used to store -;; the saturating limit without the need for asr, xor to calculate said limit. - -(define_insn_and_split "aarch64_<su_optab>s<addsub><mode>3<vczle><vczbe>" - [(set (match_operand:GPI 0 "register_operand") - (SBINQOPS:GPI (match_operand:GPI 1 "register_operand") - (match_operand:GPI 2 "aarch64_plus_operand"))) - (clobber (match_scratch:GPI 3)) - (clobber (reg:CC CC_REGNUM))] - "" - {@ [ cons: =0, 1 , 2 , =3 ; attrs: type , arch , length ] - [ w , w , w , X ; neon_q<addsub><q> , simd , 4 ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype> - [ r , r , JIr , &r ; * , * , 8 ] # - } - "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))" - [(set (match_dup 0) - (if_then_else:GPI - (match_dup 4) - (match_dup 5) - (match_dup 6)))] - { - if (REG_P (operands[2])) - { - rtx shift_constant = gen_int_mode (GET_MODE_BITSIZE (<MODE>mode) - 1, - <MODE>mode); - auto limit = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1); - rtx limit_constant = gen_int_mode (limit, <MODE>mode); - emit_insn (gen_ashr<mode>3 (operands[3], operands[2], shift_constant)); - emit_insn (gen_xor<mode>3 (operands[3], operands[3], limit_constant)); - - switch (<CODE>) - { - case SS_MINUS: - emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], - operands[2])); - break; - case SS_PLUS: - emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1], - operands[2])); - break; - default: - gcc_unreachable (); - } - - rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM); - switch (<CODE>) - { - case SS_PLUS: - operands[4] = gen_rtx_NE (<MODE>mode, ccin, const0_rtx); - operands[5] = gen_rtx_NOT (<MODE>mode, operands[3]); - operands[6] = operands[0]; - break; - case SS_MINUS: - operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx); - operands[5] = operands[0]; - operands[6] = operands[3]; - break; - default: - gcc_unreachable (); - } - } - else - { - auto imm = INTVAL (operands[2]); - rtx neg_imm = gen_int_mode (-imm, <MODE>mode); - wide_int limit; - - switch (<CODE>) - { - case SS_MINUS: - emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1], - operands[2], neg_imm)); - limit = imm >= 0 ? wi::min_value (<MODE>mode, SIGNED) - : wi::max_value (<MODE>mode, SIGNED); - break; - case SS_PLUS: - emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1], - neg_imm, operands[2])); - limit = imm >= 0 ? wi::max_value (<MODE>mode, SIGNED) - : wi::min_value (<MODE>mode, SIGNED); - break; - default: - gcc_unreachable (); - } - - rtx sat_limit = immed_wide_int_const (limit, <MODE>mode); - emit_insn (gen_rtx_SET (operands[3], sat_limit)); - - rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM); - operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx); - operands[5] = operands[0]; - operands[6] = operands[3]; - } - } -) - -;; Unsigned saturating arithmetic with GPR operands can be optimised similarly -;; to the signed case, albeit without the need for a temporary register as the -;; saturating limit can be inferred from the <addsub> code. This applies only -;; to SImode and DImode. - -(define_insn_and_split "<su_optab>s<addsub><mode>3<vczle><vczbe>" - [(set (match_operand:GPI 0 "register_operand") - (UBINQOPS:GPI (match_operand:GPI 1 "register_operand") - (match_operand:GPI 2 "aarch64_plus_operand"))) - (clobber (reg:CC CC_REGNUM))] - "" - {@ [ cons: =0, 1 , 2 ; attrs: type , arch , length ] - [ w , w , w ; neon_q<addsub><q> , simd , 4 ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype> - [ r , r , JIr ; * , * , 8 ] # - } - "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))" - [(set (match_dup 0) - (if_then_else:GPI - (match_dup 3) - (match_dup 0) - (match_dup 4)))] - { - - if (REG_P (operands[2])) - { - switch (<CODE>) - { - case US_MINUS: - emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], - operands[2])); - break; - case US_PLUS: - emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1], - operands[2])); - break; - default: - gcc_unreachable (); - } - } - else - { - auto imm = UINTVAL (operands[2]); - rtx neg_imm = gen_int_mode (-imm, <MODE>mode); - switch (<CODE>) - { - case US_MINUS: - emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1], - operands[2], neg_imm)); - break; - case US_PLUS: - emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1], - neg_imm, operands[2])); - break; - default: - gcc_unreachable (); - } - } - - rtx ccin = gen_rtx_REG (CCmode, CC_REGNUM); - switch (<CODE>) - { - case US_PLUS: - operands[3] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx); - operands[4] = gen_int_mode (-1, <MODE>mode); - break; - case US_MINUS: - operands[3] = gen_rtx_GEU (<MODE>mode, ccin, const0_rtx); - operands[4] = const0_rtx; - break; - default: - gcc_unreachable (); - } - } -) - ;; suqadd and usqadd (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 4899acead9b7..33594cb65d28 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -1864,35 +1864,35 @@ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t) __builtin_aarch64_ssaddv8qi (__a, __b); + return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t) __builtin_aarch64_ssaddv4hi (__a, __b); + return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t) __builtin_aarch64_ssaddv2si (__a, __b); + return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t) {__builtin_aarch64_ssadddi (__a[0], __b[0])}; + return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])}; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_u8 (uint8x8_t __a, uint8x8_t __b) { - return __builtin_aarch64_usaddv8qi_uuu (__a, __b); + return __builtin_aarch64_uqaddv8qi_uuu (__a, __b); } __extension__ extern __inline int8x8_t @@ -2151,189 +2151,189 @@ __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_u16 (uint16x4_t __a, uint16x4_t __b) { - return __builtin_aarch64_usaddv4hi_uuu (__a, __b); + return __builtin_aarch64_uqaddv4hi_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_u32 (uint32x2_t __a, uint32x2_t __b) { - return __builtin_aarch64_usaddv2si_uuu (__a, __b); + return __builtin_aarch64_uqaddv2si_uuu (__a, __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) {__builtin_aarch64_usadddi_uuu (__a[0], __b[0])}; + return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])}; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t) __builtin_aarch64_ssaddv16qi (__a, __b); + return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t) __builtin_aarch64_ssaddv8hi (__a, __b); + return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t) __builtin_aarch64_ssaddv4si (__a, __b); + return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t) __builtin_aarch64_ssaddv2di (__a, __b); + return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) { - return __builtin_aarch64_usaddv16qi_uuu (__a, __b); + return __builtin_aarch64_uqaddv16qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) { - return __builtin_aarch64_usaddv8hi_uuu (__a, __b); + return __builtin_aarch64_uqaddv8hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) { - return __builtin_aarch64_usaddv4si_uuu (__a, __b); + return __builtin_aarch64_uqaddv4si_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) { - return __builtin_aarch64_usaddv2di_uuu (__a, __b); + return __builtin_aarch64_uqaddv2di_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t) __builtin_aarch64_sssubv8qi (__a, __b); + return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t) __builtin_aarch64_sssubv4hi (__a, __b); + return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t) __builtin_aarch64_sssubv2si (__a, __b); + return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t) {__builtin_aarch64_sssubdi (__a[0], __b[0])}; + return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])}; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_u8 (uint8x8_t __a, uint8x8_t __b) { - return __builtin_aarch64_ussubv8qi_uuu (__a, __b); + return __builtin_aarch64_uqsubv8qi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_u16 (uint16x4_t __a, uint16x4_t __b) { - return __builtin_aarch64_ussubv4hi_uuu (__a, __b); + return __builtin_aarch64_uqsubv4hi_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_u32 (uint32x2_t __a, uint32x2_t __b) { - return __builtin_aarch64_ussubv2si_uuu (__a, __b); + return __builtin_aarch64_uqsubv2si_uuu (__a, __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsub_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) {__builtin_aarch64_ussubdi_uuu (__a[0], __b[0])}; + return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])}; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t) __builtin_aarch64_sssubv16qi (__a, __b); + return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t) __builtin_aarch64_sssubv8hi (__a, __b); + return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t) __builtin_aarch64_sssubv4si (__a, __b); + return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t) __builtin_aarch64_sssubv2di (__a, __b); + return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) { - return __builtin_aarch64_ussubv16qi_uuu (__a, __b); + return __builtin_aarch64_uqsubv16qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) { - return __builtin_aarch64_ussubv8hi_uuu (__a, __b); + return __builtin_aarch64_uqsubv8hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) { - return __builtin_aarch64_ussubv4si_uuu (__a, __b); + return __builtin_aarch64_uqsubv4si_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) { - return __builtin_aarch64_ussubv2di_uuu (__a, __b); + return __builtin_aarch64_uqsubv2di_uuu (__a, __b); } __extension__ extern __inline int8x8_t @@ -17543,56 +17543,56 @@ __extension__ extern __inline int8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddb_s8 (int8_t __a, int8_t __b) { - return (int8_t) __builtin_aarch64_ssaddqi (__a, __b); + return (int8_t) __builtin_aarch64_sqaddqi (__a, __b); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddh_s16 (int16_t __a, int16_t __b) { - return (int16_t) __builtin_aarch64_ssaddhi (__a, __b); + return (int16_t) __builtin_aarch64_sqaddhi (__a, __b); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadds_s32 (int32_t __a, int32_t __b) { - return (int32_t) __builtin_aarch64_ssaddsi (__a, __b); + return (int32_t) __builtin_aarch64_sqaddsi (__a, __b); } __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddd_s64 (int64_t __a, int64_t __b) { - return __builtin_aarch64_ssadddi (__a, __b); + return __builtin_aarch64_sqadddi (__a, __b); } __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddb_u8 (uint8_t __a, uint8_t __b) { - return (uint8_t) __builtin_aarch64_usaddqi_uuu (__a, __b); + return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddh_u16 (uint16_t __a, uint16_t __b) { - return (uint16_t) __builtin_aarch64_usaddhi_uuu (__a, __b); + return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqadds_u32 (uint32_t __a, uint32_t __b) { - return (uint32_t) __builtin_aarch64_usaddsi_uuu (__a, __b); + return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqaddd_u64 (uint64_t __a, uint64_t __b) { - return __builtin_aarch64_usadddi_uuu (__a, __b); + return __builtin_aarch64_uqadddi_uuu (__a, __b); } /* vqdmlal */ @@ -19242,56 +19242,56 @@ __extension__ extern __inline int8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubb_s8 (int8_t __a, int8_t __b) { - return (int8_t) __builtin_aarch64_sssubqi (__a, __b); + return (int8_t) __builtin_aarch64_sqsubqi (__a, __b); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubh_s16 (int16_t __a, int16_t __b) { - return (int16_t) __builtin_aarch64_sssubhi (__a, __b); + return (int16_t) __builtin_aarch64_sqsubhi (__a, __b); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubs_s32 (int32_t __a, int32_t __b) { - return (int32_t) __builtin_aarch64_sssubsi (__a, __b); + return (int32_t) __builtin_aarch64_sqsubsi (__a, __b); } __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubd_s64 (int64_t __a, int64_t __b) { - return __builtin_aarch64_sssubdi (__a, __b); + return __builtin_aarch64_sqsubdi (__a, __b); } __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubb_u8 (uint8_t __a, uint8_t __b) { - return (uint8_t) __builtin_aarch64_ussubqi_uuu (__a, __b); + return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubh_u16 (uint16_t __a, uint16_t __b) { - return (uint16_t) __builtin_aarch64_ussubhi_uuu (__a, __b); + return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubs_u32 (uint32_t __a, uint32_t __b) { - return (uint32_t) __builtin_aarch64_ussubsi_uuu (__a, __b); + return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqsubd_u64 (uint64_t __a, uint64_t __b) { - return __builtin_aarch64_ussubdi_uuu (__a, __b); + return __builtin_aarch64_uqsubdi_uuu (__a, __b); } /* vqtbl2 */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 2f7aa489ae8b..ff0f34dd0430 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -95,10 +95,6 @@ ;; integer modes; 64-bit scalar integer mode. (define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI]) -;; Advanced SIMD and scalar, 64 & 128-bit container; 8 and 16-bit scalar -;; integer modes. -(define_mode_iterator VSDQ_I_QI_HI [VDQ_I HI QI]) - ;; Double vector modes. (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF V4BF]) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect.inc deleted file mode 100644 index 1fadfd587555..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect.inc +++ /dev/null @@ -1,58 +0,0 @@ -/* Template file for vector saturating arithmetic validation. - - This file defines saturating addition and subtraction functions for a given - scalar type, testing the auto-vectorization of these two operators. This - type, along with the corresponding minimum and maximum values for that type, - must be defined by any test file which includes this template file. */ - -#ifndef SAT_ARIT_AUTOVEC_INC -#define SAT_ARIT_AUTOVEC_INC - -#include <limits.h> -#include <arm_neon.h> - -#ifndef UT -#define UT unsigned int -#define VT uint32x4_t -#define UMAX UINT_MAX -#define UMIN 0 -#endif - - -UT uadd_lane (UT a, VT b) -{ - UT sum = a + b[0]; - return sum < a ? UMAX : sum; -} - -void uaddq (UT *out, UT *a, UT *b, int n) -{ - for (int i = 0; i < n; i++) - { - UT sum = a[i] + b[i]; - out[i] = sum < a[i] ? UMAX : sum; - } -} - -void uaddq2 (UT *out, UT *a, UT *b, int n) -{ - for (int i = 0; i < n; i++) - { - UT sum; - if (!__builtin_add_overflow(a[i], b[i], &sum)) - out[i] = sum; - else - out[i] = UMAX; - } -} - -void usubq (UT *out, UT *a, UT *b, int n) -{ - for (int i = 0; i < n; i++) - { - UT sum = a[i] - b[i]; - out[i] = sum > a[i] ? UMIN : sum; - } -} - -#endif \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_1.c deleted file mode 100644 index 2b72be7b0d7c..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_1.c +++ /dev/null @@ -1,79 +0,0 @@ -/* { dg-do assemble { target { aarch64*-*-* } } } */ -/* { dg-options "-O2 --save-temps -ftree-vectorize" } */ -/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - -/* -** uadd_lane: { xfail *-*-* } -** dup\tv([0-9]+).8b, w0 -** uqadd\tb([0-9]+), (?:b\1, b0|b0, b\1) -** umov\tw0, v\2.b\[0\] -** ret -*/ -/* -** uaddq: -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqadd\tv[0-9]+.16b, (?:v\1.16b, v\2.16b|v\2.16b, v\1.16b) -** ... -** ldr\td([0-9]+), .* -** ldr\td([0-9]+), .* -** uqadd\tv[0-9]+.8b, (?:v\3.8b, v\4.8b|v\4.8b, v\3.8b) -** ... -** ldr\tb([0-9]+), .* -** ldr\tb([0-9]+), .* -** uqadd\tb[0-9]+, (?:b\5, b\6|b\6, b\5) -** ... -** ldr\tb([0-9]+), .* -** ldr\tb([0-9]+), .* -** uqadd\tb[0-9]+, (?:b\7, b\8|b\8, b\7) -** ... -*/ -/* -** uaddq2: -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqadd\tv[0-9]+.16b, (?:v\1.16b, v\2.16b|v\2.16b, v\1.16b) -** ... -** ldr\td([0-9]+), .* -** ldr\td([0-9]+), .* -** uqadd\tv[0-9]+.8b, (?:v\3.8b, v\4.8b|v\4.8b, v\3.8b) -** ... -** ldr\tb([0-9]+), .* -** ldr\tb([0-9]+), .* -** uqadd\tb[0-9]+, (?:b\5, b\6|b\6, b\5) -** ... -** uqadd\tb([0-9]+), (?:b[0-9]+, b\7|b\7, b[0-9]+) -** ... -*/ -/* -** usubq: { xfail *-*-* } -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqsub\tv[0-9]+.16b, v\1.16b, v\2.16b -** ... -** ldr\td([0-9]+), .* -** ldr\td([0-9]+), .* -** uqsub\tv[0-9]+.8b, v\3.8b, v\4.8b -** ... -** ldr\tb([0-9]+), .* -** ldr\tb([0-9]+), .* -** uqsub\tb[0-9]+, b\5, b\6 -** ... -** ldr\tb([0-9]+), .* -** ldr\tb([0-9]+), .* -** uqsub\tb[0-9]+, b\7, b\8 -** ... -*/ - -#include <limits.h> -#include <arm_neon.h> - -#define UT unsigned char -#define VT uint8x8_t -#define UMAX UCHAR_MAX -#define UMIN 0 - -#include "saturating_arithmetic_autovect.inc" \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_2.c deleted file mode 100644 index 0640361498f0..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_2.c +++ /dev/null @@ -1,79 +0,0 @@ -/* { dg-do assemble { target { aarch64*-*-* } } } */ -/* { dg-options "-O2 --save-temps -ftree-vectorize" } */ -/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - -/* -** uadd_lane: { xfail *-*-* } -** dup\tv([0-9]+).4h, w0 -** uqadd\th([0-9]+), (?:h\1, h0|h0, h\1) -** umov\tw0, v\2.h\[0\] -** ret -*/ -/* -** uaddq: -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqadd\tv[0-9]+.8h, (?:v\1.8h, v\2.8h|v\2.8h, v\1.8h) -** ... -** ldr\td([0-9]+), .* -** ldr\td([0-9]+), .* -** uqadd\tv[0-9]+.4h, (?:v\3.4h, v\4.4h|v\4.4h, v\3.4h) -** ... -** ldr\th([0-9]+), .* -** ldr\th([0-9]+), .* -** uqadd\th[0-9]+, (?:h\5, h\6|h\6, h\5) -** ... -** ldr\th([0-9]+), .* -** ldr\th([0-9]+), .* -** uqadd\th[0-9]+, (?:h\7, h\8|h\8, h\7) -** ... -*/ -/* -** uaddq2: -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqadd\tv[0-9]+.8h, (?:v\1.8h, v\2.8h|v\2.8h, v\1.8h) -** ... -** ldr\td([0-9]+), .* -** ldr\td([0-9]+), .* -** uqadd\tv[0-9]+.4h, (?:v\3.4h, v\4.4h|v\4.4h, v\3.4h) -** ... -** ldr\th([0-9]+), .* -** ldr\th([0-9]+), .* -** uqadd\th[0-9]+, (?:h\5, h\6|h\6, h\5) -** ... -** uqadd\th([0-9]+), (?:h[0-9]+, h\7|h\7, h[0-9]+) -** ... -*/ -/* -** usubq: { xfail *-*-* } -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqsub\tv[0-9]+.8h, v\1.8h, v\2.8h -** ... -** ldr\td([0-9]+), .* -** ldr\td([0-9]+), .* -** uqsub\tv[0-9]+.4h, v\3.4h, v\4.4h -** ... -** ldr\th([0-9]+), .* -** ldr\th([0-9]+), .* -** uqsub\th[0-9]+, h\5, h\6 -** ... -** ldr\th([0-9]+), .* -** ldr\th([0-9]+), .* -** uqsub\th[0-9]+, h\7, h\8 -** ... -*/ - -#include <limits.h> -#include <arm_neon.h> - -#define UT unsigned short -#define VT uint16x4_t -#define UMAX USHRT_MAX -#define UMIN 0 - -#include "saturating_arithmetic_autovect.inc" \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_3.c deleted file mode 100644 index ea6e0c78d786..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_3.c +++ /dev/null @@ -1,75 +0,0 @@ -/* { dg-do assemble { target { aarch64*-*-* } } } */ -/* { dg-options "-O2 --save-temps -ftree-vectorize" } */ -/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - -/* -** uadd_lane: -** fmov\tw([0-9]+), s0 -** adds\tw([0-9]+), (?:w\1, w0|w0, w\1) -** csinv\tw\2, w\2, wzr, cc -** ret -*/ -/* -** uaddq: -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqadd\tv[0-9]+.4s, (?:v\1.4s, v\2.4s|v\2.4s, v\1.4s) -** ... -** ldr\tw([0-9]+), .* -** ldr\tw([0-9]+), .* -** adds\tw([0-9]+), (?:w\3, w\4|w\4, w\3) -** csinv\tw\5, w\5, wzr, cc -** ... -** ldr\tw([0-9]+), .* -** ldr\tw([0-9]+), .* -** adds\tw([0-9]+), (?:w\6, w\7|w\7, w\6) -** csinv\tw\8, w\8, wzr, cc -** ... -*/ -/* -** uaddq2: -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqadd\tv[0-9]+.4s, (?:v\1.4s, v\2.4s|v\2.4s, v\1.4s) -** ... -** ldr\tw([0-9]+), .* -** ldr\tw([0-9]+), .* -** adds\tw([0-9]+), (?:w\3, w\4|w\4, w\3) -** csinv\tw\5, w\5, wzr, cc -** ... -** ldr\tw([0-9]+), .* -** ldr\tw([0-9]+), .* -** adds\tw([0-9]+), (?:w\6, w\7|w\7, w\6) -** csinv\tw\8, w\8, wzr, cc -** ... -*/ -/* -** usubq: { xfail *-*-* } -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqsub\tv[0-9]+.4s, v\1.4s, v\2.4s -** ... -** ldr\tw([0-9]+), .* -** ldr\tw([0-9]+), .* -** subs\tw([0-9]+), w\3, w\4 -** csel\tw\5, w\5, wzr, cs -** ... -** ldr\tw([0-9]+), .* -** ldr\tw([0-9]+), .* -** subs\tw([0-9]+), w\6, w\7 -** csel\tw\8, w\8, wzr, cs -** ... -*/ - -#include <limits.h> -#include <arm_neon.h> - -#define UT unsigned int -#define VT uint32x2_t -#define UMAX UINT_MAX -#define UMIN 0 - -#include "saturating_arithmetic_autovect.inc" \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_4.c deleted file mode 100644 index 01390637b5ca..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/saturating_arithmetic_autovect_4.c +++ /dev/null @@ -1,77 +0,0 @@ -/* { dg-do assemble { target { aarch64*-*-* } } } */ -/* { dg-options "-O2 --save-temps -ftree-vectorize" } */ -/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - -/* -** uadd_lane: -** ... -** (?:fmov|ldr)\tx([0-9]+), .* -** ... -** adds\tx([0-9]+), (?:x\1, x0|x0, x\1) -** csinv\tx\2, x\2, xzr, cc -** ret -*/ -/* -** uaddq: -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqadd\tv[0-9]+.2d, (?:v\1.2d, v\2.2d|v\2.2d, v\1.2d) -** ... -** ldr\tx([0-9]+), .* -** ldr\tx([0-9]+), .* -** adds\tx([0-9]+), (?:x\3, x\4|x\4, x\3) -** csinv\tx\5, x\5, xzr, cc -** ... -** ldr\tx([0-9]+), .* -** ldr\tx([0-9]+), .* -** adds\tx([0-9]+), (?:x\6, x\7|x\7, x\6) -** csinv\tx\8, x\8, xzr, cc -** ... -*/ -/* -** uaddq2: -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqadd\tv[0-9]+.2d, (?:v\1.2d, v\2.2d|v\2.2d, v\1.2d) -** ... -** ldr\tx([0-9]+), .* -** ldr\tx([0-9]+), .* -** adds\tx([0-9]+), (?:x\3, x\4|x\4, x\3) -** csinv\tx\5, x\5, xzr, cc -** ... -** ldr\tx([0-9]+), .* -** ldr\tx([0-9]+), .* -** adds\tx([0-9]+), (?:x\6, x\7|x\7, x\6) -** csinv\tx\8, x\8, xzr, cc -** ... -*/ -/* -** usubq: { xfail *-*-* } -** ... -** ldr\tq([0-9]+), .* -** ldr\tq([0-9]+), .* -** uqsub\tv[0-9]+.2d, v\1.2d, v\2.2d -** ... -** ldr\tx([0-9]+), .* -** ldr\tx([0-9]+), .* -** subs\tx([0-9]+), x\3, x\4 -** csel\tx\5, x\5, xzr, cs -** ... -** ldr\tx([0-9]+), .* -** ldr\tx([0-9]+), .* -** subs\tx([0-9]+), x\6, x\7 -** csel\tx\8, x\8, xzr, cs -** ... -*/ - -#include <limits.h> -#include <arm_neon.h> - -#define UT unsigned long -#define VT uint64x2_t -#define UMAX ULONG_MAX -#define UMIN 0 - -#include "saturating_arithmetic_autovect.inc" \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/saturating-arithmetic-signed.c b/gcc/testsuite/gcc.target/aarch64/saturating-arithmetic-signed.c deleted file mode 100644 index de652bf1d9e6..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/saturating-arithmetic-signed.c +++ /dev/null @@ -1,270 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 --save-temps -mearly-ra=none -fno-schedule-insns2" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ - -#include <limits.h> -#include <stdbool.h> -#include <stdint.h> - -/* -** sadd32: -** asr w([0-9]+), w1, 31 -** eor w\1, w\1, -2147483648 -** adds w([0-9]+), (?:w0, w1|w1, w0) -** csinv w0, w\2, w\1, vc -** ret -*/ -int32_t __attribute__((noipa)) -sadd32 (int32_t __a, int32_t __b) -{ - int32_t sum; - bool overflow = __builtin_add_overflow (__a, __b, &sum); - return !overflow ? sum : __a < 0 ? INT_MIN : INT_MAX; -} - -/* -** sadd32_imm: -** adds w([0-9]+), w0, #67 -** mov w([0-9]+), 2147483647 -** csel w0, w\1, w\2, vc -** ret -*/ -int32_t __attribute__((noipa)) -sadd32_imm (int32_t __a) -{ - int32_t sum; - bool overflow = __builtin_add_overflow (__a, 67, &sum); - return !overflow ? sum : __a < 0 ? INT_MIN : INT_MAX; -} - -/* -** sadd32_imm2: -** subs w([0-9]+), w0, 67 -** mov w([0-9]+), -2147483648 -** csel w0, w\1, w\2, vc -** ret -*/ -int32_t __attribute__((noipa)) -sadd32_imm2 (int32_t __a) -{ - int32_t sum; - bool overflow = __builtin_add_overflow (__a, -67, &sum); - return !overflow ? sum : __a < 0 ? INT_MIN : INT_MAX; -} - -/* -** ssub32: -** asr w([0-9]+), w1, 31 -** eor w\1, w\1, -2147483648 -** subs w([0-9]+), w0, w1 -** csel w0, w\2, w\1, vc -** ret -*/ -int32_t __attribute__((noipa)) -ssub32 (int32_t __a, int32_t __b) -{ - int32_t result; - bool overflow = __builtin_sub_overflow (__a, __b, &result); - return !overflow ? result : __a < 0 ? INT_MIN : INT_MAX; -} - -/* -** ssub32_imm: -** subs w([0-9]+), w0, 67 -** mov w([0-9]+), -2147483648 -** csel w0, w\1, w\2, vc -** ret -*/ -int32_t __attribute__((noipa)) -ssub32_imm (int32_t __a) -{ - int32_t result; - bool overflow = __builtin_sub_overflow (__a, 67, &result); - return !overflow ? result : __a < 0 ? INT_MIN : INT_MAX; -} - -/* -** ssub32_imm2: -** adds w([0-9]+), w0, #67 -** mov w([0-9]+), 2147483647 -** csel w0, w\1, w\2, vc -** ret -*/ -int32_t __attribute__((noipa)) -ssub32_imm2 (int32_t __a) -{ - int32_t result; - bool overflow = __builtin_sub_overflow (__a, -67, &result); - return !overflow ? result : __a < 0 ? INT_MIN : INT_MAX; -} - -/* -** sadd64: -** asr x([0-9]+), x1, 63 -** eor x\1, x\1, -9223372036854775808 -** adds x([0-9]+), (?:x0, x1|x1, x0) -** csinv x0, x\2, x\1, vc -** ret -*/ -int64_t __attribute__((noipa)) -sadd64 (int64_t __a, int64_t __b) -{ - int64_t sum; - bool overflow = __builtin_add_overflow (__a, __b, &sum); - return !overflow ? sum : __a < 0 ? LONG_MIN : LONG_MAX; -} - -/* -** sadd64_imm: -** adds x([0-9]+), x0, #67 -** mov x([0-9]+), 9223372036854775807 -** csel x0, x\1, x\2, vc -** ret -*/ -int64_t __attribute__((noipa)) -sadd64_imm (int64_t __a) -{ - int64_t sum; - bool overflow = __builtin_add_overflow (__a, (int64_t)67, &sum); - return !overflow ? sum : __a < 0 ? LONG_MIN : LONG_MAX; -} - -/* -** sadd64_imm2: -** subs x([0-9]+), x0, 67 -** mov x([0-9]+), -9223372036854775808 -** csel x0, x\1, x\2, vc -** ret -*/ -int64_t __attribute__((noipa)) -sadd64_imm2 (int64_t __a) -{ - int64_t sum; - bool overflow = __builtin_add_overflow (__a, (int64_t)-67, &sum); - return !overflow ? sum : __a < 0 ? LONG_MIN : LONG_MAX; -} - -/* -** ssub64: -** asr x([0-9]+), x1, 63 -** eor x\1, x\1, -9223372036854775808 -** subs x([0-9]+), x0, x1 -** csel x0, x\2, x\1, vc -** ret -*/ -int64_t __attribute__((noipa)) -ssub64 (int64_t __a, int64_t __b) -{ - int64_t result; - bool overflow = __builtin_sub_overflow (__a, __b, &result); - return !overflow ? result : __a < 0 ? LONG_MIN : LONG_MAX; -} - -/* -** ssub64_imm: -** subs x([0-9]+), x0, 67 -** mov x([0-9]+), -9223372036854775808 -** csel x0, x\1, x\2, vc -** ret -*/ -int64_t __attribute__((noipa)) -ssub64_imm (int64_t __a) -{ - int64_t result; - bool overflow = __builtin_sub_overflow (__a, (int64_t) 67, &result); - return !overflow ? result : __a < 0 ? LONG_MIN : LONG_MAX; -} - -/* -** ssub64_imm2: -** adds x([0-9]+), x0, #67 -** mov x([0-9]+), 9223372036854775807 -** csel x0, x\1, x\2, vc -** ret -*/ -int64_t __attribute__((noipa)) -ssub64_imm2 (int64_t __a) -{ - int64_t result; - bool overflow = __builtin_sub_overflow (__a, (int64_t) -67, &result); - return !overflow ? result : __a < 0 ? LONG_MIN : LONG_MAX; -} - -int -main (void) -{ - /* Addition: - SAT_ADD(x, +ve), non-saturating - SAT_ADD(x, +ve), saturating - SAT_ADD(x, immediate +ve) - SAT_ADD(x, immediate -ve) - SAT_ADD(x, -ve), non-saturating - SAT_ADD(x, -ve), saturating - - Subtraction: - SAT_SUB(x, +ve), non-saturating - SAT_SUB(x, +ve), saturating - SAT_SUB(x, immediate +ve) - SAT_SUB(x, immediate -ve) - SAT_SUB(x, -ve), non-saturating */ - - int32_t a = 4; - int32_t b = 70; - int32_t c = 2147483647; - int32_t d = (int32_t) -2147483648; - - if (sadd32 (a, b) != (a + b)) - __builtin_abort (); - if (sadd32 (a, c) != c) - __builtin_abort (); - if (sadd32_imm (a) != (a + 67)) - __builtin_abort (); - if (sadd32_imm2 (a) != (a - 67)) - __builtin_abort (); - if (sadd32 (a, -b) != (a - b)) - __builtin_abort (); - if (sadd32 (a, d) != (d + 4)) - __builtin_abort (); - - if (ssub32 (a, b) != (a - b)) - __builtin_abort (); - if (ssub32 (-a, c) != d) - __builtin_abort (); - if (ssub32_imm (a) != (a - 67)) - __builtin_abort (); - if (ssub32_imm2 (a) != (a + 67)) - __builtin_abort (); - if (ssub32 (a, -b) != (a + b)) - __builtin_abort (); - - int64_t a_64 = a; - int64_t b_64 = b; - int64_t c_64 = (int64_t) 9223372036854775807; - int64_t d_64 = (int64_t) 0x8000000000000000; - - if (sadd64 (a_64, b_64) != (a_64 + b_64)) - __builtin_abort (); - if (sadd64 (a_64, c_64) != c_64) - __builtin_abort (); - if (sadd64_imm (a_64) != (a_64 + 67)) - __builtin_abort (); - if (sadd64_imm2 (a_64) != (a_64 - 67)) - __builtin_abort (); - if (sadd64 (a_64, -b_64) != (a_64 - b_64)) - __builtin_abort (); - if (sadd64 (a_64, d_64) != (d_64 + 4)) - __builtin_abort (); - - if (ssub64 (a_64, b_64) != (a_64 - b_64)) - __builtin_abort (); - if (ssub64 (-a_64, c_64) != d_64) - __builtin_abort (); - if (ssub64_imm (a_64) != (a_64 - 67)) - __builtin_abort (); - if (ssub64_imm2 (a_64) != (a_64 + 67)) - __builtin_abort (); - if (ssub64 (a_64, -b_64) != (a_64 + b_64)) - __builtin_abort (); - - return 0; -} \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic.inc b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic.inc deleted file mode 100644 index e979d5354057..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic.inc +++ /dev/null @@ -1,39 +0,0 @@ -/* Template file for scalar saturating arithmetic validation. - - This file defines scalar saturating addition and subtraction functions for a - given type. This type, along with the corresponding minimum and maximum - values for that type, must be defined by any test file which includes this - template file. */ - -#ifndef SAT_ARIT_INC -#define SAT_ARIT_INC - -#include <limits.h> - -#ifndef UT -#define UT unsigned int -#define UMAX UINT_MAX -#define UMIN 0 -#endif - -UT uadd (UT a, UT b) -{ - UT sum = a + b; - return sum < a ? UMAX : sum; -} - -UT uadd2 (UT a, UT b) -{ - UT c; - if (!__builtin_add_overflow(a, b, &c)) - return c; - return UMAX; -} - -UT usub (UT a, UT b) -{ - UT sum = a - b; - return sum > a ? UMIN : sum; -} - -#endif \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c deleted file mode 100644 index 2ac0c376d126..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c +++ /dev/null @@ -1,36 +0,0 @@ -/* { dg-do-compile } */ -/* { dg-options "-O2 --save-temps -fno-schedule-insns2" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ - -/* -** uadd: -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 -** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2) -** umov w0, v\3.b\[0\] -** ret -*/ -/* -** uadd2: -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 -** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2) -** umov w0, v\3.b\[0\] -** ret -*/ -/* -** usub: { xfail *-*-* } -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 -** uqsub b([0-9]+), b\1, b\2 -** umov w0, v\3.b\[0\] -** ret -*/ - -#include <limits.h> - -#define UT unsigned char -#define UMAX UCHAR_MAX -#define UMIN 0 - -#include "saturating_arithmetic.inc" \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c deleted file mode 100644 index 2a55aa9f2218..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c +++ /dev/null @@ -1,36 +0,0 @@ -/* { dg-do-compile } */ -/* { dg-options "-O2 --save-temps -fno-schedule-insns2" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ - -/* -** uadd: -** dup v([0-9]+).4h, w1 -** dup v([0-9]+).4h, w0 -** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2) -** umov w0, v\3.h\[0\] -** ret -*/ -/* -** uadd2: -** dup v([0-9]+).4h, w1 -** dup v([0-9]+).4h, w0 -** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2) -** umov w0, v\3.h\[0\] -** ret -*/ -/* -** usub: { xfail *-*-* } -** dup v([0-9]+).4h, w1 -** dup v([0-9]+).4h, w0 -** uqsub h([0-9]+), h\1, h\2 -** umov w0, v\3.h\[0\] -** ret -*/ - -#include <limits.h> - -#define UT unsigned short -#define UMAX USHRT_MAX -#define UMIN 0 - -#include "saturating_arithmetic.inc" \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_3.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_3.c deleted file mode 100644 index 215172545196..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_3.c +++ /dev/null @@ -1,30 +0,0 @@ -/* { dg-do compile { target { aarch64*-*-* } } } */ -/* { dg-options "-O2 --save-temps -ftree-vectorize" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ - -/* -** uadd: -** adds\tw([0-9]+), w([0-9]+), w([0-9]+) -** csinv\tw\1, w\1, wzr, cc -** ret -*/ -/* -** uadd2: -** adds\tw([0-9]+), w([0-9]+), w([0-9]+) -** csinv\tw\1, w\1, wzr, cc -** ret -*/ -/* -** usub: -** subs\tw([0-9]+), w([0-9]+), w([0-9]+) -** csel\tw\1, w\1, wzr, cs -** ret -*/ - -#include <limits.h> - -#define UT unsigned int -#define UMAX UINT_MAX -#define UMIN 0 - -#include "saturating_arithmetic.inc" \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_4.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_4.c deleted file mode 100644 index 363d0a79a730..000000000000 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_4.c +++ /dev/null @@ -1,30 +0,0 @@ -/* { dg-do compile { target { aarch64*-*-* } } } */ -/* { dg-options "-O2 --save-temps -ftree-vectorize" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ - -/* -** uadd: -** adds\tx([0-9]+), x([0-9]+), x([0-9]+) -** csinv\tx\1, x\1, xzr, cc -** ret -*/ -/* -** uadd2: -** adds\tx([0-9]+), x([0-9]+), x([0-9]+) -** csinv\tx\1, x\1, xzr, cc -** ret -*/ -/* -** usub: -** subs\tx([0-9]+), x([0-9]+), x([0-9]+) -** csel\tx\1, x\1, xzr, cs -** ret -*/ - -#include <limits.h> - -#define UT unsigned long -#define UMAX ULONG_MAX -#define UMIN 0 - -#include "saturating_arithmetic.inc" \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c index dcf9dc777ade..c2e13b651e96 100644 --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c @@ -318,33 +318,33 @@ test_vpaddd_u64 (uint64x2_t a) /* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */ uint64_t -test_vqaddd_u64 (uint64x1_t a, uint64x1_t b) +test_vqaddd_u64 (uint64_t a, uint64_t b) { - return vqaddd_u64 (a[0], b[0]); + return vqaddd_u64 (a, b); } /* { dg-final { scan-assembler-times "\\tuqadd\\ts\[0-9\]+" 1 } } */ uint32_t -test_vqadds_u32 (uint32x4_t a, uint32x4_t b) +test_vqadds_u32 (uint32_t a, uint32_t b) { - return vqadds_u32 (a[0], b[0]); + return vqadds_u32 (a, b); } /* { dg-final { scan-assembler-times "\\tuqadd\\th\[0-9\]+" 1 } } */ uint16_t -test_vqaddh_u16 (uint16x8_t a, uint16x8_t b) +test_vqaddh_u16 (uint16_t a, uint16_t b) { - return vqaddh_u16 (a[0], b[0]); + return vqaddh_u16 (a, b); } /* { dg-final { scan-assembler-times "\\tuqadd\\tb\[0-9\]+" 1 } } */ uint8_t -test_vqaddb_u8 (uint8x16_t a, uint8x16_t b) +test_vqaddb_u8 (uint8_t a, uint8_t b) { - return vqaddb_u8 (a[0], b[0]); + return vqaddb_u8 (a, b); } /* { dg-final { scan-assembler-times "\\tsqadd\\td\[0-9\]+" 1 } } */ @@ -761,33 +761,33 @@ test_vsubd_s64_2 (int64_t a, int64_t b) /* { dg-final { scan-assembler-times "\\tuqsub\\td\[0-9\]+" 1 } } */ uint64_t -test_vqsubd_u64 (uint64x1_t a, uint64x1_t b) +test_vqsubd_u64 (uint64_t a, uint64_t b) { - return vqsubd_u64 (a[0], b[0]); + return vqsubd_u64 (a, b); } /* { dg-final { scan-assembler-times "\\tuqsub\\ts\[0-9\]+" 1 } } */ uint32_t -test_vqsubs_u32 (uint32x4_t a, uint32x4_t b) +test_vqsubs_u32 (uint32_t a, uint32_t b) { - return vqsubs_u32 (a[0], b[0]); + return vqsubs_u32 (a, b); } /* { dg-final { scan-assembler-times "\\tuqsub\\th\[0-9\]+" 1 } } */ uint16_t -test_vqsubh_u16 (uint16x8_t a, uint16x8_t b) +test_vqsubh_u16 (uint16_t a, uint16_t b) { - return vqsubh_u16 (a[0], b[0]); + return vqsubh_u16 (a, b); } /* { dg-final { scan-assembler-times "\\tuqsub\\tb\[0-9\]+" 1 } } */ uint8_t -test_vqsubb_u8 (uint8x16_t a, uint8x16_t b) +test_vqsubb_u8 (uint8_t a, uint8_t b) { - return vqsubb_u8 (a[0], b[0]); + return vqsubb_u8 (a, b); } /* { dg-final { scan-assembler-times "\\tsqsub\\td\[0-9\]+" 1 } } */