Re: [PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select

Richard Sandiford Mon, 02 Jul 2018 04:58:20 -0700

Richard Henderson <[email protected]> writes:
>       * config/aarch64/aarch64-protos.h, config/aarch64/aarch64.c
>       (aarch64_sve_prepare_conditional_op): Remove.
>       * config/aarch64/aarch64-sve.md (cond_<SVE_INT_BINARY><SVE_I>):
>       Allow aarch64_simd_reg_or_zero as select operand; remove
>       the aarch64_sve_prepare_conditional_op call.
>       (cond_<SVE_INT_BINARY_SD><SVE_SDI>): Likewise.
>       (cond_<SVE_COND_FP_BINARY><SVE_F>): Likewise.
>       (*cond_<SVE_INT_BINARY><SVE_I>_z): New pattern.
>       (*cond_<SVE_INT_BINARY_SD><SVE_SDI>_z): New pattern.
>       (*cond_<SVE_COND_FP_BINARY><SVE_F>_z): New pattern.
>       (*cond_<SVE_INT_BINARY><SVE_I>_any): New pattern.
>       (*cond_<SVE_INT_BINARY_SD><SVE_SDI>_any): New pattern.
>       (*cond_<SVE_COND_FP_BINARY><SVE_F>_any): New pattern
>       and a splitters to match all of the *_any patterns.
>       * config/aarch64/predicates.md (aarch64_sve_any_binary_operator): New.
> ---
>  gcc/config/aarch64/aarch64-protos.h |   1 -
>  gcc/config/aarch64/aarch64.c        |  54 ----------
>  gcc/config/aarch64/aarch64-sve.md   | 154 ++++++++++++++++++++++++----
>  gcc/config/aarch64/predicates.md    |   3 +
>  4 files changed, 136 insertions(+), 76 deletions(-)


OK, thanks.

Richard

>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 87c6ae20278..514ddc457ca 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -513,7 +513,6 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, 
> scalar_mode, RTX_CODE);
>  void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
>  bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
>  void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
> -void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
>  #endif /* RTX_CODE */
>  
>  void aarch64_init_builtins (void);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3af7e98e166..d75d45f4b8b 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -16058,60 +16058,6 @@ aarch64_expand_sve_vcond (machine_mode data_mode, 
> machine_mode cmp_mode,
>    emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
>  }
>  
> -/* Prepare a cond_<optab><mode> operation that has the operands
> -   given by OPERANDS, where:
> -
> -   - operand 0 is the destination
> -   - operand 1 is a predicate
> -   - operands 2 to NOPS - 2 are the operands to an operation that is
> -     performed for active lanes
> -   - operand NOPS - 1 specifies the values to use for inactive lanes.
> -
> -   COMMUTATIVE_P is true if operands 2 and 3 are commutative.  In that case,
> -   no pattern is provided for a tie between operands 3 and NOPS - 1.  */
> -
> -void
> -aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
> -                                 bool commutative_p)
> -{
> -  /* We can do the operation directly if the "else" value matches one
> -     of the other inputs.  */
> -  for (unsigned int i = 2; i < nops - 1; ++i)
> -    if (rtx_equal_p (operands[i], operands[nops - 1]))
> -      {
> -     if (i == 3 && commutative_p)
> -       std::swap (operands[2], operands[3]);
> -     return;
> -      }
> -
> -  /* If the "else" value is different from the other operands, we have
> -     the choice of doing a SEL on the output or a SEL on an input.
> -     Neither choice is better in all cases, but one advantage of
> -     selecting the input is that it can avoid a move when the output
> -     needs to be distinct from the inputs.  E.g. if operand N maps to
> -     register N, selecting the output would give:
> -
> -     MOVPRFX Z0.S, Z2.S
> -     ADD Z0.S, P1/M, Z0.S, Z3.S
> -     SEL Z0.S, P1, Z0.S, Z4.S
> -
> -     whereas selecting the input avoids the MOVPRFX:
> -
> -     SEL Z0.S, P1, Z2.S, Z4.S
> -     ADD Z0.S, P1/M, Z0.S, Z3.S.
> -
> -     ??? Matching the other input can produce
> -
> -     MOVPRFX Z4.S, P1/M, Z2.S
> -     ADD Z4.S, P1/M, Z4.S, Z3.S
> -   */
> -  machine_mode mode = GET_MODE (operands[0]);
> -  rtx temp = gen_reg_rtx (mode);
> -  rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
> -  emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
> -  operands[2] = operands[nops - 1] = temp;
> -}
> -
>  /* Implement TARGET_MODES_TIEABLE_P.  In principle we should always return
>     true.  However due to issues with register allocation it is preferable
>     to avoid tieing integer scalar and FP scalar modes.  Executing integer
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index db16affc093..b16d0455159 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -1817,13 +1817,10 @@
>          (SVE_INT_BINARY:SVE_I
>            (match_operand:SVE_I 2 "register_operand")
>            (match_operand:SVE_I 3 "register_operand"))
> -        (match_operand:SVE_I 4 "register_operand")]
> +        (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
>         UNSPEC_SEL))]
>    "TARGET_SVE"
> -{
> -  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
> -  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
> -})
> +)
>  
>  (define_expand "cond_<optab><mode>"
>    [(set (match_operand:SVE_SDI 0 "register_operand")
> @@ -1832,19 +1829,12 @@
>          (SVE_INT_BINARY_SD:SVE_SDI
>            (match_operand:SVE_SDI 2 "register_operand")
>            (match_operand:SVE_SDI 3 "register_operand"))
> -        (match_operand:SVE_SDI 4 "register_operand")]
> +        (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
>         UNSPEC_SEL))]
>    "TARGET_SVE"
> -{
> -  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
> -  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
> -})
> -
> -;; Predicated integer operations.
> -;; All other things being equal, prefer the patterns for which the
> -;; destination matches the select input, as that gives us the most
> -;; freedom to swap the other operands.
> +)
>  
> +;; Predicated integer operations with select matching the output operand.
>  (define_insn "*cond_<optab><mode>_0"
>    [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w")
>       (unspec:SVE_I
> @@ -1945,6 +1935,87 @@
>    [(set_attr "movprfx" "*,yes")]
>  )
>  
> +;; Predicated integer operations with select matching zero.
> +(define_insn "*cond_<optab><mode>_z"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=&w")
> +     (unspec:SVE_I
> +       [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +        (SVE_INT_BINARY:SVE_I
> +          (match_operand:SVE_I 2 "register_operand" "w")
> +          (match_operand:SVE_I 3 "register_operand" "w"))
> +        (match_operand:SVE_I 4 "aarch64_simd_imm_zero")]
> +       UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, 
> %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "yes")]
> +)
> +
> +(define_insn "*cond_<optab><mode>_z"
> +  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
> +     (unspec:SVE_SDI
> +       [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +        (SVE_INT_BINARY_SD:SVE_SDI
> +          (match_operand:SVE_SDI 2 "register_operand" "w")
> +          (match_operand:SVE_SDI 3 "register_operand" "w"))
> +        (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")]
> +       UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, 
> %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "yes")]
> +)
> +
> +;; Synthetic predications with select unmatched.
> +(define_insn "*cond_<optab><mode>_any"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=&w")
> +     (unspec:SVE_I
> +       [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +        (SVE_INT_BINARY:SVE_I
> +          (match_operand:SVE_I 2 "register_operand" "w")
> +          (match_operand:SVE_I 3 "register_operand" "w"))
> +        (match_operand:SVE_I 4 "register_operand"   "w")]
> +       UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "#"
> +)
> +
> +(define_insn "*cond_<optab><mode>_any"
> +  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
> +     (unspec:SVE_SDI
> +       [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +        (SVE_INT_BINARY_SD:SVE_I
> +          (match_operand:SVE_SDI 2 "register_operand" "w")
> +          (match_operand:SVE_SDI 3 "register_operand" "w"))
> +        (match_operand:SVE_SDI 4 "register_operand"   "w")]
> +       UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "#"
> +)
> +
> +(define_split
> +  [(set (match_operand:SVE_I 0 "register_operand")
> +     (unspec:SVE_I
> +       [(match_operand:<VPRED> 1 "register_operand")
> +        (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator"
> +          [(match_operand:SVE_I 2 "register_operand")
> +           (match_operand:SVE_I 3 "register_operand")])
> +        (match_operand:SVE_I 4 "register_operand")]
> +       UNSPEC_SEL))]
> +  "TARGET_SVE && reload_completed
> +   && !(rtx_equal_p (operands[0], operands[4])
> +        || rtx_equal_p (operands[2], operands[4])
> +        || rtx_equal_p (operands[3], operands[4]))"
> +  ; Not matchable by any one insn or movprfx insn.  We need a separate 
> select.
> +  [(set (match_dup 0)
> +     (unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)]
> +                      UNSPEC_SEL))
> +   (set (match_dup 0)
> +     (unspec:SVE_I
> +       [(match_dup 1)
> +        (match_op_dup 5 [(match_dup 0) (match_dup 3)])
> +           (match_dup 0)]
> +       UNSPEC_SEL))]
> +)
> +
>  ;; Set operand 0 to the last active element in operand 3, or to tied
>  ;; operand 1 if no elements are active.
>  (define_insn "fold_extract_last_<mode>"
> @@ -2731,12 +2802,10 @@
>            [(match_operand:SVE_F 2 "register_operand")
>             (match_operand:SVE_F 3 "register_operand")]
>            SVE_COND_FP_BINARY)
> -        (match_operand:SVE_F 4 "register_operand")]
> +        (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
>         UNSPEC_SEL))]
>    "TARGET_SVE"
> -{
> -  aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
> -})
> +)
>  
>  ;; Predicated floating-point operations with select matching output.
>  (define_insn "*cond_<optab><mode>_0"
> @@ -2744,8 +2813,7 @@
>       (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>          (unspec:SVE_F
> -          [(match_dup 1)
> -           (match_operand:SVE_F 2 "register_operand" "0, w, w")
> +          [(match_operand:SVE_F 2 "register_operand" "0, w, w")
>             (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
>            SVE_COND_FP_BINARY)
>          (match_dup 0)]
> @@ -2794,6 +2862,50 @@
>    [(set_attr "movprfx" "*,yes")]
>  )
>  
> +;; Predicated floating-point operations with select matching zero.
> +(define_insn "*cond_<optab><mode>_z"
> +  [(set (match_operand:SVE_F 0 "register_operand" "=&w")
> +     (unspec:SVE_F
> +       [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +        (unspec:SVE_F
> +          [(match_operand:SVE_F 2 "register_operand" "w")
> +           (match_operand:SVE_F 3 "register_operand" "w")]
> +          SVE_COND_FP_BINARY)
> +        (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
> +       UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, 
> %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "yes")]
> +)
> +
> +;; Synthetic predication of floating-point operations with select unmatched.
> +(define_insn_and_split "*cond_<optab><mode>_any"
> +  [(set (match_operand:SVE_F 0 "register_operand" "=&w")
> +     (unspec:SVE_F
> +       [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +        (unspec:SVE_F
> +          [(match_operand:SVE_F 2 "register_operand" "w")
> +           (match_operand:SVE_F 3 "register_operand" "w")]
> +          SVE_COND_FP_BINARY)
> +        (match_operand:SVE_F 4 "register_operand" "w")]
> +       UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& reload_completed
> +   && !(rtx_equal_p (operands[0], operands[4])
> +        || rtx_equal_p (operands[2], operands[4])
> +        || rtx_equal_p (operands[3], operands[4]))"
> +  ; Not matchable by any one insn or movprfx insn.  We need a separate 
> select.
> +  [(set (match_dup 0)
> +     (unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL))
> +   (set (match_dup 0)
> +     (unspec:SVE_F
> +       [(match_dup 1)
> +        (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY)
> +           (match_dup 0)]
> +       UNSPEC_SEL))]
> +)
> +
>  ;; Shift an SVE vector left and insert a scalar into element 0.
>  (define_insn "vec_shl_insert_<mode>"
>    [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
> diff --git a/gcc/config/aarch64/predicates.md 
> b/gcc/config/aarch64/predicates.md
> index 7aec76d681f..4acbc218a8d 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -625,3 +625,6 @@
>  ;; A special predicate that doesn't match a particular mode.
>  (define_special_predicate "aarch64_any_register_operand"
>    (match_code "reg"))
> +
> +(define_predicate "aarch64_sve_any_binary_operator"
> +  (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))

Re: [PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select

Reply via email to