On Fri, May 8, 2020 at 3:02 PM Richard Biener <rguent...@suse.de> wrote:
>
>
> Currently we fail to optimize those which are used when MIN/MAX_EXPR
> cannot be used for FP values but the target has IEEE conforming
> implementations.
>
> This patch adds support for fmin/fmax detection to phiopt and
> makes the named patterns available in the x86 backend.  It also
> removes some superfluous restrictions from the SLP vectorizer
> to make SLP vectorizing IFN_FMIN/FMAX work.
>
> This also unblocks the fix for PR94865, restoring
> gcc.target/i386/pr54855-[89].c operation.
>
> Bootstrap & regtest running on x86_64-unknown-linux-gnu.
>
> The x86 portion did simplify a bit, is it still OK?  I verified
> vectorization works for SSE, AVX and AVX512 and 3dnow (it doesn't
> work for v2sf w/o -m3dnow even though "3dnow-in-sse" would work)

Ugh, please don't use named patterns for MMX. The mmx_ prefix is there
to *prevent* autovectorization. %mm registers are aliased inito x87
register set and EMMS insn is needed to switch back to x87 mode.

"3dnow-in-sse" would not work because in SSE mode, we leave two lanes
in XMM register unused. They should be zero, until they are not. There
can be many side effects from vector operations with effectively
undefined lanes (div-by-zero, exceptions, etc...), so we decided to
skip V2SF for mmx-with-sse.

Uros.

> Thanks,
> Richard.
>
> 2020-05-08  Richard Biener  <rguent...@suse.de>
>
>         PR tree-optimization/88540
>         * tree-ssa-phiopt.c: Include internal-fn.h and gimple-match.h.
>         (minmax_replacement): Parse IFN_FMIN/FMAX as MIN/MAX_EXPR.
>         Code-generate IFN_FMIN/FMAX if honoring NaNs or signed zeros.
>         * tree-vect-slp.c (vect_build_slp_tree_1): Remove superfluous
>         tests on calls.
>
>         * config/i386/i386.md (*ieee_s<ieee_maxmin><mode>3): Rename as
>         (f<ieee_maxmin><mode>3): this.
>         * config/i386/mmx.md (mmx_<code>v2sf3): Adjust.
>         (mmx_ieee_<ieee_maxmin>v2sf3): Rename as
>         (f<ieee_maxmin>v2sf3): this.
>         * config/i386/sse.md
>         (<code><mode>3<mask_name><round_saeonly_name>): Adjust.
>         (ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>):
>         Rename as
>         (f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>): this.
>
>         * gcc.target/i386/pr88540-1.c: New testcase.

OK with the renamed pattern, as described inline.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.md                   |  2 +-
>  gcc/config/i386/mmx.md                    |  4 +-
>  gcc/config/i386/sse.md                    |  4 +-
>  gcc/testsuite/gcc.target/i386/pr88540-1.c | 21 ++++++++++
>  gcc/tree-ssa-phiopt.c                     | 66 
> +++++++++++++++++++++++++------
>  gcc/tree-vect-slp.c                       |  3 --
>  6 files changed, 80 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr88540-1.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 8bfc9cb0b71..f1496a98456 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -18443,7 +18443,7 @@
>  ;; Their operands are not commutative, and thus they may be used in the
>  ;; presence of -0.0 and NaN.
>
> -(define_insn "*ieee_s<ieee_maxmin><mode>3"
> +(define_insn "f<ieee_maxmin><mode>3"
>    [(set (match_operand:MODEF 0 "register_operand" "=x,v")
>         (unspec:MODEF
>           [(match_operand:MODEF 1 "register_operand" "0,v")
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 472f90f9bc1..422d858f25d 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -301,7 +301,7 @@
>    if (!flag_finite_math_only || flag_signed_zeros)
>      {
>        operands[1] = force_reg (V2SFmode, operands[1]);
> -      emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
> +      emit_insn (gen_f<maxmin_float>v2sf3
>                  (operands[0], operands[1], operands[2]));
>        DONE;
>      }
> @@ -331,7 +331,7 @@
>  ;; Their operands are not commutative, and thus they may be used in the
>  ;; presence of -0.0 and NaN.
>
> -(define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
> +(define_insn "f<ieee_maxmin>v2sf3"

Please name this pattern "mmx_f<ieee_maxmin>v2sf" and update the caller above.

>    [(set (match_operand:V2SF 0 "register_operand" "=y")
>          (unspec:V2SF
>           [(match_operand:V2SF 1 "register_operand" "0")
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 7a7ecd4be87..ac350711527 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -2191,7 +2191,7 @@
>    if (!flag_finite_math_only || flag_signed_zeros)
>      {
>        operands[1] = force_reg (<MODE>mode, operands[1]);
> -      emit_insn 
> (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
> +      emit_insn (gen_f<maxmin_float><mode>3<mask_name><round_saeonly_name>
>                  (operands[0], operands[1], operands[2]
>                   <mask_operand_arg34>
>                   <round_saeonly_mask_arg3>));
> @@ -2229,7 +2229,7 @@
>  ;; Their operands are not commutative, and thus they may be used in the
>  ;; presence of -0.0 and NaN.
>
> -(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
> +(define_insn "f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
>    [(set (match_operand:VF 0 "register_operand" "=x,v")
>         (unspec:VF
>           [(match_operand:VF 1 "register_operand" "0,v")
> diff --git a/gcc/testsuite/gcc.target/i386/pr88540-1.c 
> b/gcc/testsuite/gcc.target/i386/pr88540-1.c
> new file mode 100644
> index 00000000000..a66b89ef66c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr88540-1.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msse2" } */
> +
> +void test1(double* __restrict d1, double* __restrict d2, double* __restrict 
> d3)
> +{
> +  for (int n = 0; n < 2; ++n)
> +    {
> +      d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
> +    }
> +}
> +
> +void test2(double* __restrict d1, double* __restrict d2, double* __restrict 
> d3)
> +{
> +  for (int n = 0; n < 2; ++n)
> +    {
> +      d3[n] = d1[n] > d2[n] ? d1[n] : d2[n];
> +    }
> +}
> +
> +/* { dg-final { scan-assembler "minpd" } } */
> +/* { dg-final { scan-assembler "maxpd" } } */
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index b1e0dce93d8..7ceda4a37ab 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-inline.h"
>  #include "case-cfn-macros.h"
>  #include "tree-eh.h"
> +#include "internal-fn.h"
> +#include "gimple-match.h"
>
>  static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
>  static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
> @@ -1364,7 +1366,6 @@ minmax_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>  {
>    tree result, type, rhs;
>    gcond *cond;
> -  gassign *new_stmt;
>    edge true_edge, false_edge;
>    enum tree_code cmp, minmax, ass_code;
>    tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false;
> @@ -1373,7 +1374,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>    type = TREE_TYPE (PHI_RESULT (phi));
>
>    /* The optimization may be unsafe due to NaNs.  */
> -  if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
> +  if ((HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
> +      && (!direct_internal_fn_supported_p (IFN_FMIN, type, OPTIMIZE_FOR_BOTH)
> +         || !direct_internal_fn_supported_p (IFN_FMAX, type,
> +                                             OPTIMIZE_FOR_BOTH)))
>      return false;
>
>    cond = as_a <gcond *> (last_stmt (cond_bb));
> @@ -1530,16 +1534,44 @@ minmax_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>        gimple *assign = last_and_only_stmt (middle_bb);
>        tree lhs, op0, op1, bound;
>
> -      if (!assign
> -         || gimple_code (assign) != GIMPLE_ASSIGN)
> +      if (!assign)
>         return false;
>
> -      lhs = gimple_assign_lhs (assign);
> -      ass_code = gimple_assign_rhs_code (assign);
> -      if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
> +      if (is_gimple_assign (assign))
> +       {
> +         lhs = gimple_assign_lhs (assign);
> +         ass_code = gimple_assign_rhs_code (assign);
> +         if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
> +           return false;
> +         op0 = gimple_assign_rhs1 (assign);
> +         op1 = gimple_assign_rhs2 (assign);
> +       }
> +      else if (is_gimple_call (assign))
> +       {
> +         lhs = gimple_call_lhs (assign);
> +         if (!lhs)
> +           return false;
> +         combined_fn cfn = gimple_call_combined_fn (assign);
> +         switch (cfn)
> +           {
> +           CASE_CFN_FMIN:
> +           CASE_CFN_FMIN_FN:
> +             ass_code = MIN_EXPR;
> +             op0 = gimple_call_arg (assign, 0);
> +             op1 = gimple_call_arg (assign, 1);
> +             break;
> +           CASE_CFN_FMAX:
> +           CASE_CFN_FMAX_FN:
> +             ass_code = MAX_EXPR;
> +             op0 = gimple_call_arg (assign, 0);
> +             op1 = gimple_call_arg (assign, 1);
> +             break;
> +           default:
> +             return false;
> +           }
> +       }
> +      else
>         return false;
> -      op0 = gimple_assign_rhs1 (assign);
> -      op1 = gimple_assign_rhs2 (assign);
>
>        if (true_edge->src == middle_bb)
>         {
> @@ -1574,8 +1606,9 @@ minmax_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>                 return false;
>
>               /* We need BOUND <= LARGER.  */
> -             if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
> -                                                 bound, larger)))
> +             if (bound != larger
> +                 && !integer_nonzerop (fold_build2 (LE_EXPR, 
> boolean_type_node,
> +                                                    bound, larger)))
>                 return false;
>             }
>           else if (operand_equal_for_phi_arg_p (arg_false, smaller)
> @@ -1698,7 +1731,16 @@ minmax_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>      result = make_ssa_name (TREE_TYPE (result));
>
>    /* Emit the statement to compute min/max.  */
> -  new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
> +  gimple *new_stmt;
> +  if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
> +    {
> +      new_stmt = gimple_build_call_internal (minmax == MAX_EXPR
> +                                            ? IFN_FMAX : IFN_FMIN,
> +                                            2, arg0, arg1);
> +      gimple_call_set_lhs (new_stmt, result);
> +    }
> +  else
> +    new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
>    gsi = gsi_last_bb (cond_bb);
>    gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
>
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index c097840b09a..b1dcaec3d9f 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -849,9 +849,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
> *swap,
>           else if ((gimple_call_internal_p (call_stmt)
>                     && (!vectorizable_internal_fn_p
>                         (gimple_call_internal_fn (call_stmt))))
> -                  || gimple_call_tail_p (call_stmt)
> -                  || gimple_call_noreturn_p (call_stmt)
> -                  || !gimple_call_nothrow_p (call_stmt)
>                    || gimple_call_chain (call_stmt))
>             {
>               if (dump_enabled_p ())
> --
> 2.13.7

Reply via email to