Hi Richard,
On 18/07/18 16:27, Richard Sandiford wrote:
Thanks for doing this.
Kyrill Tkachov <kyrylo.tkac...@foss.arm.com> writes:
+ calc = build_call_expr_internal_loc (input_location, ifn, type,
+ 2, mvar, convert (type, val));
(indentation looks off)
diff --git a/gcc/testsuite/gfortran.dg/max_fmaxl_aarch64.f90
b/gcc/testsuite/gfortran.dg/max_fmaxl_aarch64.f90
new file mode 100644
index
0000000000000000000000000000000000000000..8c8ea063e5d0718dc829c1f5574c5b46040e6786
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/max_fmaxl_aarch64.f90
@@ -0,0 +1,9 @@
+! { dg-do compile { target aarch64*-*-* } }
+! { dg-options "-O2 -fdump-tree-optimized" }
+
+subroutine fool (a, b, c, d, e, f, g, h)
+ real (kind=16) :: a, b, c, d, e, f, g, h
+ a = max (a, b, c, d, e, f, g, h)
+end subroutine
+
+! { dg-final { scan-tree-dump-times "__builtin_fmaxl " 7 "optimized" } }
diff --git a/gcc/testsuite/gfortran.dg/min_fminl_aarch64.f90
b/gcc/testsuite/gfortran.dg/min_fminl_aarch64.f90
new file mode 100644
index
0000000000000000000000000000000000000000..92368917fb48e0c468a16d080ab3a9ac842e01a7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/min_fminl_aarch64.f90
@@ -0,0 +1,9 @@
+! { dg-do compile { target aarch64*-*-* } }
+! { dg-options "-O2 -fdump-tree-optimized" }
+
+subroutine fool (a, b, c, d, e, f, g, h)
+ real (kind=16) :: a, b, c, d, e, f, g, h
+ a = min (a, b, c, d, e, f, g, h)
+end subroutine
+
+! { dg-final { scan-tree-dump-times "__builtin_fminl " 7 "optimized" } }
Do these still pass? I wouldn't have expected us to use __builtin_fmin*
and __builtin_fmax* now.
It would be good to have tests that we use ".FMIN" and ".FMAX" for kind=4
and kind=8 on AArch64, since that's really the end goal here.
Doh, yes. I had spotted that myself after I had sent out the patch.
I've fixed that and the indentation issue in this small revision.
Given Janne's comments I will commit this tomorrow if there are no objections.
This patch should be a conservative improvement. If the Fortran folks decide
to sacrifice the more predictable NaN handling in favour of more optimisation
leeway by using MIN/MAX_EXPR unconditionally we can do that as a follow-up.
Thanks for the help,
Kyrill
2018-07-18 Kyrylo Tkachov <kyrylo.tkac...@arm.com>
* trans-intrinsic.c: (gfc_conv_intrinsic_minmax): Emit MIN_MAX_EXPR
or IFN_FMIN/FMAX sequence to calculate the min/max when possible.
2018-07-18 Kyrylo Tkachov <kyrylo.tkac...@arm.com>
* gfortran.dg/max_fmax_aarch64.f90: New test.
* gfortran.dg/min_fmin_aarch64.f90: Likewise.
* gfortran.dg/minmax_integer.f90: Likewise.
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index d306e3a5a6209c1621d91f99ffc366acecd9c3d0..c9b5479740c3f98f906132fda5c252274c4b6edd 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see
#include "trans.h"
#include "stringpool.h"
#include "fold-const.h"
+#include "internal-fn.h"
#include "tree-nested.h"
#include "stor-layout.h"
#include "toplev.h" /* For rest_of_decl_compilation. */
@@ -3874,14 +3875,15 @@ gfc_conv_intrinsic_ttynam (gfc_se * se, gfc_expr * expr)
minmax (a1, a2, a3, ...)
{
mvar = a1;
- if (a2 .op. mvar || isnan (mvar))
- mvar = a2;
- if (a3 .op. mvar || isnan (mvar))
- mvar = a3;
+ mvar = COMP (mvar, a2)
+ mvar = COMP (mvar, a3)
...
- return mvar
+ return mvar;
}
- */
+ Where COMP is MIN/MAX_EXPR for integral types or when we don't
+ care about NaNs, or IFN_FMIN/MAX when the target has support for
+ fast NaN-honouring min/max. When neither holds expand a sequence
+ of explicit comparisons. */
/* TODO: Mismatching types can occur when specific names are used.
These should be handled during resolution. */
@@ -3891,7 +3893,6 @@ gfc_conv_intrinsic_minmax (gfc_se * se, gfc_expr * expr, enum tree_code op)
tree tmp;
tree mvar;
tree val;
- tree thencase;
tree *args;
tree type;
gfc_actual_arglist *argexpr;
@@ -3912,55 +3913,77 @@ gfc_conv_intrinsic_minmax (gfc_se * se, gfc_expr * expr, enum tree_code op)
mvar = gfc_create_var (type, "M");
gfc_add_modify (&se->pre, mvar, args[0]);
- for (i = 1, argexpr = argexpr->next; i < nargs; i++)
- {
- tree cond, isnan;
+ internal_fn ifn = op == GT_EXPR ? IFN_FMAX : IFN_FMIN;
+
+ for (i = 1, argexpr = argexpr->next; i < nargs; i++, argexpr = argexpr->next)
+ {
+ tree cond = NULL_TREE;
val = args[i];
/* Handle absent optional arguments by ignoring the comparison. */
if (argexpr->expr->expr_type == EXPR_VARIABLE
&& argexpr->expr->symtree->n.sym->attr.optional
&& TREE_CODE (val) == INDIRECT_REF)
- cond = fold_build2_loc (input_location,
+ {
+ cond = fold_build2_loc (input_location,
NE_EXPR, logical_type_node,
TREE_OPERAND (val, 0),
build_int_cst (TREE_TYPE (TREE_OPERAND (val, 0)), 0));
- else
- {
- cond = NULL_TREE;
-
+ }
+ else if (!VAR_P (val) && !TREE_CONSTANT (val))
/* Only evaluate the argument once. */
- if (!VAR_P (val) && !TREE_CONSTANT (val))
- val = gfc_evaluate_now (val, &se->pre);
- }
+ val = gfc_evaluate_now (val, &se->pre);
- thencase = build2_v (MODIFY_EXPR, mvar, convert (type, val));
+ tree calc;
+ /* If we dealing with integral types or we don't care about NaNs
+ just do a MIN/MAX_EXPR. */
+ if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type))
+ {
+
+ tree_code code = op == GT_EXPR ? MAX_EXPR : MIN_EXPR;
+ calc = fold_build2_loc (input_location, code, type,
+ convert (type, val), mvar);
+ tmp = build2_v (MODIFY_EXPR, mvar, calc);
- tmp = fold_build2_loc (input_location, op, logical_type_node,
- convert (type, val), mvar);
+ }
+ /* If we care about NaNs and we have internal functions available for
+ fmin/fmax to perform the comparison, use those. */
+ else if (SCALAR_FLOAT_TYPE_P (type)
+ && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED))
+ {
+ calc = build_call_expr_internal_loc (input_location, ifn, type,
+ 2, mvar, convert (type, val));
+ tmp = build2_v (MODIFY_EXPR, mvar, calc);
- /* FIXME: When the IEEE_ARITHMETIC module is implemented, the call to
- __builtin_isnan might be made dependent on that module being loaded,
- to help performance of programs that don't rely on IEEE semantics. */
- if (FLOAT_TYPE_P (TREE_TYPE (mvar)))
+ }
+ /* Otherwise expand to:
+ mvar = a1;
+ if (a2 .op. mvar || isnan (mvar))
+ mvar = a2;
+ if (a3 .op. mvar || isnan (mvar))
+ mvar = a3;
+ ... */
+ else
{
- isnan = build_call_expr_loc (input_location,
- builtin_decl_explicit (BUILT_IN_ISNAN),
- 1, mvar);
+ tree isnan = build_call_expr_loc (input_location,
+ builtin_decl_explicit (BUILT_IN_ISNAN),
+ 1, mvar);
+ tmp = fold_build2_loc (input_location, op, logical_type_node,
+ convert (type, val), mvar);
+
tmp = fold_build2_loc (input_location, TRUTH_OR_EXPR,
- logical_type_node, tmp,
- fold_convert (logical_type_node, isnan));
+ logical_type_node, tmp,
+ fold_convert (logical_type_node, isnan));
+ tmp = build3_v (COND_EXPR, tmp,
+ build2_v (MODIFY_EXPR, mvar, convert (type, val)),
+ build_empty_stmt (input_location));
}
- tmp = build3_v (COND_EXPR, tmp, thencase,
- build_empty_stmt (input_location));
if (cond != NULL_TREE)
tmp = build3_v (COND_EXPR, cond, tmp,
build_empty_stmt (input_location));
-
gfc_add_expr_to_block (&se->pre, tmp);
- argexpr = argexpr->next;
}
se->expr = mvar;
}
diff --git a/gcc/testsuite/gfortran.dg/max_fmax_aarch64.f90 b/gcc/testsuite/gfortran.dg/max_fmax_aarch64.f90
new file mode 100644
index 0000000000000000000000000000000000000000..b818241a1f9aa7018efaf300cfecb70f413b7573
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/max_fmax_aarch64.f90
@@ -0,0 +1,15 @@
+! { dg-do compile { target aarch64*-*-* } }
+! { dg-options "-O2 -fdump-tree-optimized" }
+
+subroutine foo (a, b, c, d, e, f, g, h)
+ real (kind=8) :: a, b, c, d, e, f, g, h
+ a = max (a, b, c, d, e, f, g, h)
+end subroutine
+
+subroutine foof (a, b, c, d, e, f, g, h)
+ real (kind=4) :: a, b, c, d, e, f, g, h
+ a = max (a, b, c, d, e, f, g, h)
+end subroutine
+
+
+! { dg-final { scan-tree-dump-times "\.FMAX " 14 "optimized" } }
diff --git a/gcc/testsuite/gfortran.dg/min_fmin_aarch64.f90 b/gcc/testsuite/gfortran.dg/min_fmin_aarch64.f90
new file mode 100644
index 0000000000000000000000000000000000000000..009869b497df7737089971e00c01e1c29c0a3032
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/min_fmin_aarch64.f90
@@ -0,0 +1,15 @@
+! { dg-do compile { target aarch64*-*-* } }
+! { dg-options "-O2 -fdump-tree-optimized" }
+
+subroutine foo (a, b, c, d, e, f, g, h)
+ real (kind=8) :: a, b, c, d, e, f, g, h
+ a = min (a, b, c, d, e, f, g, h)
+end subroutine
+
+
+subroutine foof (a, b, c, d, e, f, g, h)
+ real (kind=4) :: a, b, c, d, e, f, g, h
+ a = min (a, b, c, d, e, f, g, h)
+end subroutine
+
+! { dg-final { scan-tree-dump-times "\.FMIN " 14 "optimized" } }
diff --git a/gcc/testsuite/gfortran.dg/minmax_integer.f90 b/gcc/testsuite/gfortran.dg/minmax_integer.f90
new file mode 100644
index 0000000000000000000000000000000000000000..5b6be38c7055ce4e8620cf75ec7d8a182436b24f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmax_integer.f90
@@ -0,0 +1,15 @@
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-optimized" }
+
+subroutine foo (a, b, c, d, e, f, g, h)
+ integer (kind=4) :: a, b, c, d, e, f, g, h
+ a = min (a, b, c, d, e, f, g, h)
+end subroutine
+
+subroutine foof (a, b, c, d, e, f, g, h)
+ integer (kind=4) :: a, b, c, d, e, f, g, h
+ a = max (a, b, c, d, e, f, g, h)
+end subroutine
+
+! { dg-final { scan-tree-dump-times "MIN_EXPR" 7 "optimized" } }
+! { dg-final { scan-tree-dump-times "MAX_EXPR" 7 "optimized" } }