Richard Henderson <[email protected]> writes:
> * config/aarch64/aarch64-protos.h, config/aarch64/aarch64.c
> (aarch64_sve_prepare_conditional_op): Remove.
> * config/aarch64/aarch64-sve.md (cond_<SVE_INT_BINARY><SVE_I>):
> Allow aarch64_simd_reg_or_zero as select operand; remove
> the aarch64_sve_prepare_conditional_op call.
> (cond_<SVE_INT_BINARY_SD><SVE_SDI>): Likewise.
> (cond_<SVE_COND_FP_BINARY><SVE_F>): Likewise.
> (*cond_<SVE_INT_BINARY><SVE_I>_z): New pattern.
> (*cond_<SVE_INT_BINARY_SD><SVE_SDI>_z): New pattern.
> (*cond_<SVE_COND_FP_BINARY><SVE_F>_z): New pattern.
> (*cond_<SVE_INT_BINARY><SVE_I>_any): New pattern.
> (*cond_<SVE_INT_BINARY_SD><SVE_SDI>_any): New pattern.
> (*cond_<SVE_COND_FP_BINARY><SVE_F>_any): New pattern
> and a splitters to match all of the *_any patterns.
> * config/aarch64/predicates.md (aarch64_sve_any_binary_operator): New.
> ---
> gcc/config/aarch64/aarch64-protos.h | 1 -
> gcc/config/aarch64/aarch64.c | 54 ----------
> gcc/config/aarch64/aarch64-sve.md | 154 ++++++++++++++++++++++++----
> gcc/config/aarch64/predicates.md | 3 +
> 4 files changed, 136 insertions(+), 76 deletions(-)
OK, thanks.
Richard
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h
> b/gcc/config/aarch64/aarch64-protos.h
> index 87c6ae20278..514ddc457ca 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -513,7 +513,6 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool,
> scalar_mode, RTX_CODE);
> void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
> bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
> void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
> -void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
> #endif /* RTX_CODE */
>
> void aarch64_init_builtins (void);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3af7e98e166..d75d45f4b8b 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -16058,60 +16058,6 @@ aarch64_expand_sve_vcond (machine_mode data_mode,
> machine_mode cmp_mode,
> emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
> }
>
> -/* Prepare a cond_<optab><mode> operation that has the operands
> - given by OPERANDS, where:
> -
> - - operand 0 is the destination
> - - operand 1 is a predicate
> - - operands 2 to NOPS - 2 are the operands to an operation that is
> - performed for active lanes
> - - operand NOPS - 1 specifies the values to use for inactive lanes.
> -
> - COMMUTATIVE_P is true if operands 2 and 3 are commutative. In that case,
> - no pattern is provided for a tie between operands 3 and NOPS - 1. */
> -
> -void
> -aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
> - bool commutative_p)
> -{
> - /* We can do the operation directly if the "else" value matches one
> - of the other inputs. */
> - for (unsigned int i = 2; i < nops - 1; ++i)
> - if (rtx_equal_p (operands[i], operands[nops - 1]))
> - {
> - if (i == 3 && commutative_p)
> - std::swap (operands[2], operands[3]);
> - return;
> - }
> -
> - /* If the "else" value is different from the other operands, we have
> - the choice of doing a SEL on the output or a SEL on an input.
> - Neither choice is better in all cases, but one advantage of
> - selecting the input is that it can avoid a move when the output
> - needs to be distinct from the inputs. E.g. if operand N maps to
> - register N, selecting the output would give:
> -
> - MOVPRFX Z0.S, Z2.S
> - ADD Z0.S, P1/M, Z0.S, Z3.S
> - SEL Z0.S, P1, Z0.S, Z4.S
> -
> - whereas selecting the input avoids the MOVPRFX:
> -
> - SEL Z0.S, P1, Z2.S, Z4.S
> - ADD Z0.S, P1/M, Z0.S, Z3.S.
> -
> - ??? Matching the other input can produce
> -
> - MOVPRFX Z4.S, P1/M, Z2.S
> - ADD Z4.S, P1/M, Z4.S, Z3.S
> - */
> - machine_mode mode = GET_MODE (operands[0]);
> - rtx temp = gen_reg_rtx (mode);
> - rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
> - emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
> - operands[2] = operands[nops - 1] = temp;
> -}
> -
> /* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
> true. However due to issues with register allocation it is preferable
> to avoid tieing integer scalar and FP scalar modes. Executing integer
> diff --git a/gcc/config/aarch64/aarch64-sve.md
> b/gcc/config/aarch64/aarch64-sve.md
> index db16affc093..b16d0455159 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -1817,13 +1817,10 @@
> (SVE_INT_BINARY:SVE_I
> (match_operand:SVE_I 2 "register_operand")
> (match_operand:SVE_I 3 "register_operand"))
> - (match_operand:SVE_I 4 "register_operand")]
> + (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
> UNSPEC_SEL))]
> "TARGET_SVE"
> -{
> - bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
> - aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
> -})
> +)
>
> (define_expand "cond_<optab><mode>"
> [(set (match_operand:SVE_SDI 0 "register_operand")
> @@ -1832,19 +1829,12 @@
> (SVE_INT_BINARY_SD:SVE_SDI
> (match_operand:SVE_SDI 2 "register_operand")
> (match_operand:SVE_SDI 3 "register_operand"))
> - (match_operand:SVE_SDI 4 "register_operand")]
> + (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
> UNSPEC_SEL))]
> "TARGET_SVE"
> -{
> - bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
> - aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
> -})
> -
> -;; Predicated integer operations.
> -;; All other things being equal, prefer the patterns for which the
> -;; destination matches the select input, as that gives us the most
> -;; freedom to swap the other operands.
> +)
>
> +;; Predicated integer operations with select matching the output operand.
> (define_insn "*cond_<optab><mode>_0"
> [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w")
> (unspec:SVE_I
> @@ -1945,6 +1935,87 @@
> [(set_attr "movprfx" "*,yes")]
> )
>
> +;; Predicated integer operations with select matching zero.
> +(define_insn "*cond_<optab><mode>_z"
> + [(set (match_operand:SVE_I 0 "register_operand" "=&w")
> + (unspec:SVE_I
> + [(match_operand:<VPRED> 1 "register_operand" "Upl")
> + (SVE_INT_BINARY:SVE_I
> + (match_operand:SVE_I 2 "register_operand" "w")
> + (match_operand:SVE_I 3 "register_operand" "w"))
> + (match_operand:SVE_I 4 "aarch64_simd_imm_zero")]
> + UNSPEC_SEL))]
> + "TARGET_SVE"
> + "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m,
> %0.<Vetype>, %3.<Vetype>"
> + [(set_attr "movprfx" "yes")]
> +)
> +
> +(define_insn "*cond_<optab><mode>_z"
> + [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
> + (unspec:SVE_SDI
> + [(match_operand:<VPRED> 1 "register_operand" "Upl")
> + (SVE_INT_BINARY_SD:SVE_SDI
> + (match_operand:SVE_SDI 2 "register_operand" "w")
> + (match_operand:SVE_SDI 3 "register_operand" "w"))
> + (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")]
> + UNSPEC_SEL))]
> + "TARGET_SVE"
> + "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m,
> %0.<Vetype>, %3.<Vetype>"
> + [(set_attr "movprfx" "yes")]
> +)
> +
> +;; Synthetic predications with select unmatched.
> +(define_insn "*cond_<optab><mode>_any"
> + [(set (match_operand:SVE_I 0 "register_operand" "=&w")
> + (unspec:SVE_I
> + [(match_operand:<VPRED> 1 "register_operand" "Upl")
> + (SVE_INT_BINARY:SVE_I
> + (match_operand:SVE_I 2 "register_operand" "w")
> + (match_operand:SVE_I 3 "register_operand" "w"))
> + (match_operand:SVE_I 4 "register_operand" "w")]
> + UNSPEC_SEL))]
> + "TARGET_SVE"
> + "#"
> +)
> +
> +(define_insn "*cond_<optab><mode>_any"
> + [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
> + (unspec:SVE_SDI
> + [(match_operand:<VPRED> 1 "register_operand" "Upl")
> + (SVE_INT_BINARY_SD:SVE_I
> + (match_operand:SVE_SDI 2 "register_operand" "w")
> + (match_operand:SVE_SDI 3 "register_operand" "w"))
> + (match_operand:SVE_SDI 4 "register_operand" "w")]
> + UNSPEC_SEL))]
> + "TARGET_SVE"
> + "#"
> +)
> +
> +(define_split
> + [(set (match_operand:SVE_I 0 "register_operand")
> + (unspec:SVE_I
> + [(match_operand:<VPRED> 1 "register_operand")
> + (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator"
> + [(match_operand:SVE_I 2 "register_operand")
> + (match_operand:SVE_I 3 "register_operand")])
> + (match_operand:SVE_I 4 "register_operand")]
> + UNSPEC_SEL))]
> + "TARGET_SVE && reload_completed
> + && !(rtx_equal_p (operands[0], operands[4])
> + || rtx_equal_p (operands[2], operands[4])
> + || rtx_equal_p (operands[3], operands[4]))"
> + ; Not matchable by any one insn or movprfx insn. We need a separate
> select.
> + [(set (match_dup 0)
> + (unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)]
> + UNSPEC_SEL))
> + (set (match_dup 0)
> + (unspec:SVE_I
> + [(match_dup 1)
> + (match_op_dup 5 [(match_dup 0) (match_dup 3)])
> + (match_dup 0)]
> + UNSPEC_SEL))]
> +)
> +
> ;; Set operand 0 to the last active element in operand 3, or to tied
> ;; operand 1 if no elements are active.
> (define_insn "fold_extract_last_<mode>"
> @@ -2731,12 +2802,10 @@
> [(match_operand:SVE_F 2 "register_operand")
> (match_operand:SVE_F 3 "register_operand")]
> SVE_COND_FP_BINARY)
> - (match_operand:SVE_F 4 "register_operand")]
> + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
> UNSPEC_SEL))]
> "TARGET_SVE"
> -{
> - aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
> -})
> +)
>
> ;; Predicated floating-point operations with select matching output.
> (define_insn "*cond_<optab><mode>_0"
> @@ -2744,8 +2813,7 @@
> (unspec:SVE_F
> [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
> (unspec:SVE_F
> - [(match_dup 1)
> - (match_operand:SVE_F 2 "register_operand" "0, w, w")
> + [(match_operand:SVE_F 2 "register_operand" "0, w, w")
> (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
> SVE_COND_FP_BINARY)
> (match_dup 0)]
> @@ -2794,6 +2862,50 @@
> [(set_attr "movprfx" "*,yes")]
> )
>
> +;; Predicated floating-point operations with select matching zero.
> +(define_insn "*cond_<optab><mode>_z"
> + [(set (match_operand:SVE_F 0 "register_operand" "=&w")
> + (unspec:SVE_F
> + [(match_operand:<VPRED> 1 "register_operand" "Upl")
> + (unspec:SVE_F
> + [(match_operand:SVE_F 2 "register_operand" "w")
> + (match_operand:SVE_F 3 "register_operand" "w")]
> + SVE_COND_FP_BINARY)
> + (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
> + UNSPEC_SEL))]
> + "TARGET_SVE"
> + "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m,
> %0.<Vetype>, %3.<Vetype>"
> + [(set_attr "movprfx" "yes")]
> +)
> +
> +;; Synthetic predication of floating-point operations with select unmatched.
> +(define_insn_and_split "*cond_<optab><mode>_any"
> + [(set (match_operand:SVE_F 0 "register_operand" "=&w")
> + (unspec:SVE_F
> + [(match_operand:<VPRED> 1 "register_operand" "Upl")
> + (unspec:SVE_F
> + [(match_operand:SVE_F 2 "register_operand" "w")
> + (match_operand:SVE_F 3 "register_operand" "w")]
> + SVE_COND_FP_BINARY)
> + (match_operand:SVE_F 4 "register_operand" "w")]
> + UNSPEC_SEL))]
> + "TARGET_SVE"
> + "#"
> + "&& reload_completed
> + && !(rtx_equal_p (operands[0], operands[4])
> + || rtx_equal_p (operands[2], operands[4])
> + || rtx_equal_p (operands[3], operands[4]))"
> + ; Not matchable by any one insn or movprfx insn. We need a separate
> select.
> + [(set (match_dup 0)
> + (unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL))
> + (set (match_dup 0)
> + (unspec:SVE_F
> + [(match_dup 1)
> + (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY)
> + (match_dup 0)]
> + UNSPEC_SEL))]
> +)
> +
> ;; Shift an SVE vector left and insert a scalar into element 0.
> (define_insn "vec_shl_insert_<mode>"
> [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
> diff --git a/gcc/config/aarch64/predicates.md
> b/gcc/config/aarch64/predicates.md
> index 7aec76d681f..4acbc218a8d 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -625,3 +625,6 @@
> ;; A special predicate that doesn't match a particular mode.
> (define_special_predicate "aarch64_any_register_operand"
> (match_code "reg"))
> +
> +(define_predicate "aarch64_sve_any_binary_operator"
> + (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))