https://gcc.gnu.org/g:b696e5de16f5f0e1403a03e27e0a2e159a37cf83

commit b696e5de16f5f0e1403a03e27e0a2e159a37cf83
Author: Mikael Morin <mik...@gcc.gnu.org>
Date:   Thu Aug 8 13:44:16 2024 +0200

    fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]
    
    Enable the generation of inline code for MINLOC/MAXLOC when argument
    ARRAY is of integral type, DIM is a constant, and MASK is scalar (only
    absent MASK or rank 1 ARRAY were inlined before).
    
    Scalar masks are implemented with a wrapping condition around the code
    one would generate if MASK wasn't present, so they are easy to support
    once inline code without MASK is working.
    
    With this change, there are both expressions evaluated inside the nested
    loop (ARRAY, and in the future MASK if non-scalar) and expressions evaluated
    outside of it (MASK if scalar).  Both have to advance the scalarization
    chain passed in argument SE to gfc_conv_intrinsic_minmaxloc as they are
    evaluated, but expressions evaluated from within the nested loop
    additionally have to advance the nested scalarization chain of the reduction
    loop.  This is normally handled transparently through the inheritance that
    is defined when initializing gfc_se structs, but there has to be some
    variable to inherit from, and there is a single one, SE.  This variable is
    kept as base for out of nested loop expressions (scalar MASK), and this
    change introduces a new variable to hold the current advance of the nested
    loop scalarization chain and serve as inheritance base to evaluate nested
    loop expressions (just ARRAY for now, additionally non-scalar MASK later).
    
            PR fortran/90608
    
    gcc/fortran/ChangeLog:
    
            * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE
            if MASK is scalar.
            (walk_inline_intrinsic_minmaxloc): Append to the scalarization chain
            a scalar element for MASK if it's present.
            (gfc_conv_intrinsic_minmaxloc): Use a local gfc_se struct to serve
            as base for all the expressions evaluated in the nested loop.  To
            evaluate MASK in a nested loop, enable usage of the scalarizer and
            set the current scalarization chain element to use to that of the
            original passed in SE argument.  And use the nested loop from the
            scalarizer instead of the local loop in that case.
    
    gcc/testsuite/ChangeLog:
    
            * gfortran.dg/maxloc_bounds_8.f90: Accept the error message
            generated by the scalarizer in case the MAXLOC intrinsic call is
            implemented through inline code.
            * gfortran.dg/minmaxloc_20.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc                |  27 ++--
 gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90 |   4 +-
 gcc/testsuite/gfortran.dg/minmaxloc_20.f90    | 182 ++++++++++++++++++++++++++
 3 files changed, 201 insertions(+), 12 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index dedb49b4a64e..cd6aca51f218 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5479,6 +5479,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_ss *maskss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
+  gfc_se nested_se;
   gfc_se *base_se;
   gfc_expr *arrayexpr;
   gfc_expr *maskexpr;
@@ -5616,7 +5617,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_add_block_to_block (&se->pre, &backse.post);
 
   if (nested_loop)
-    base_se = se;
+    {
+      gfc_init_se (&nested_se, se);
+      base_se = &nested_se;
+    }
   else
     {
       /* Walk the arguments.  */
@@ -5706,7 +5710,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   if (nested_loop)
     {
-      ploop = enter_nested_loop (se);
+      ploop = enter_nested_loop (&nested_se);
       ploop->temp_dim = 1;
     }
   else
@@ -6063,21 +6067,19 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
     {
       tree ifmask;
 
-      gcc_assert (!nested_loop);
-
-      gfc_init_se (&maskse, NULL);
+      gfc_init_se (&maskse, nested_loop ? se : nullptr);
       gfc_conv_expr_val (&maskse, maskexpr);
       gfc_add_block_to_block (&se->pre, &maskse.pre);
       gfc_init_block (&block);
-      gfc_add_block_to_block (&block, &loop.pre);
-      gfc_add_block_to_block (&block, &loop.post);
+      gfc_add_block_to_block (&block, &ploop->pre);
+      gfc_add_block_to_block (&block, &ploop->post);
       tmp = gfc_finish_block (&block);
 
       /* For the else part of the scalar mask, just initialize
         the pos variable the same way as above.  */
 
       gfc_init_block (&elseblock);
-      for (int i = 0; i < loop.dimen; i++)
+      for (int i = 0; i < ploop->dimen; i++)
        gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node);
       elsetmp = gfc_finish_block (&elseblock);
       ifmask = conv_mask_condition (&maskse, maskexpr, optional_mask);
@@ -11857,9 +11859,11 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr 
*expr ATTRIBUTE_UNUSED)
 
   gfc_actual_arglist *array_arg = expr->value.function.actual;
   gfc_actual_arglist *dim_arg = array_arg->next;
+  gfc_actual_arglist *mask_arg = dim_arg->next;
 
   gfc_expr *array = array_arg->expr;
   gfc_expr *dim = dim_arg->expr;
+  gfc_expr *mask = mask_arg->expr;
 
   if (dim == nullptr)
     return gfc_get_array_ss (ss, expr, 1, GFC_SS_INTRINSIC);
@@ -11877,7 +11881,10 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr 
*expr ATTRIBUTE_UNUSED)
   gfc_ss *tail = nest_loop_dimension (tmp_ss, dim_val);
   tail->next = ss;
 
-  return array_ss;
+  if (mask)
+    tmp_ss = gfc_get_scalar_ss (tmp_ss, mask);
+
+  return tmp_ss;
 }
 
 
@@ -12038,7 +12045,7 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
        if (array->ts.type != BT_INTEGER)
          return false;
 
-       if (mask == nullptr)
+       if (mask == nullptr || mask->rank == 0)
          return true;
 
        return false;
diff --git a/gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90 
b/gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90
index 4ec113716953..ace7d43054ca 100644
--- a/gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90
+++ b/gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90
@@ -1,6 +1,6 @@
 ! { dg-do run }
 ! { dg-options "-fbounds-check" }
-! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic in 
dimension 1: is 3, should be 2" }
+! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic in 
dimension 1: is 3, should be 2|Array bound mismatch for dimension 1 of array 
'res' .3/2." }
 program main
   integer(kind=4), allocatable :: f(:,:)
   logical, allocatable :: m(:,:)
@@ -12,5 +12,5 @@ program main
   res = maxloc(f,dim=1,mask=.true.)
   write(line,fmt='(80I1)') res
 end program main
-! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic in dimension 1: is 3, should be 2" }
+! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic in dimension 1: is 3, should be 2|Array bound mismatch for 
dimension 1 of array 'res' .3/2." }
 
diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_20.f90 
b/gcc/testsuite/gfortran.dg/minmaxloc_20.f90
new file mode 100644
index 000000000000..f87d66a4ed38
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmaxloc_20.f90
@@ -0,0 +1,182 @@
+! { dg-do compile }
+! { dg-additional-options "-O -fdump-tree-original" }
+! { dg-final { scan-tree-dump-not "gfortran_\[sm\]?minloc" "original" } }
+! { dg-final { scan-tree-dump-not "gfortran_\[sm\]?maxloc" "original" } }
+!
+! PR fortran/90608
+! Check that all MINLOC and MAXLOC calls are inlined with optimizations,
+! when ARRAY is of integral type, DIM is a constant, and MASK is a scalar.
+
+subroutine check_maxloc
+  implicit none
+  integer, parameter :: data60(*) = (/ 2, 5, 4, 6, 0, 9, 3, 5, 4, 4,  &
+                                       1, 7, 3, 2, 1, 2, 5, 4, 6, 0,  &
+                                       9, 3, 5, 4, 4, 1, 7, 3, 2, 1,  &
+                                       2, 5, 4, 6, 0, 9, 3, 5, 4, 4,  &
+                                       1, 7, 3, 2, 1, 2, 5, 4, 6, 0,  &
+                                       9, 3, 5, 4, 4, 1, 7, 3, 2, 1  /)
+  integer, parameter :: data1(*) = (/ 2, 3, 2, 3,  &
+                                      1, 2, 3, 2,  &
+                                      3, 1, 2, 3,  &
+                                      2, 3, 1, 2,  &
+                                      3, 2, 3, 1  /)
+  integer, parameter :: data2(*) = (/ 2, 1, 2,  &
+                                      3, 2, 3,  &
+                                      4, 3, 4,  &
+                                      2, 1, 2,  &
+                                      1, 2, 1  /)
+  integer, parameter :: data3(*) = (/ 5, 1, 5,  &
+                                      1, 2, 1,  &
+                                      2, 1, 2,  &
+                                      3, 2, 3  /)
+  call check_int_const_shape_rank_3_true_mask
+  call check_int_const_shape_rank_3_false_mask
+  call check_int_alloc_rank_3_true_mask
+  call check_int_alloc_rank_3_false_mask
+contains
+  subroutine check_int_const_shape_rank_3_true_mask()
+    integer :: a(3,4,5)
+    integer, allocatable :: r(:,:)
+    a = reshape(data60, shape(a))
+    r = maxloc(a, dim = 1, mask = .true.)
+    if (any(shape(r) /= (/ 4, 5 /))) stop 21
+    if (any(r /= reshape(data1, (/ 4, 5 /)))) stop 22
+    r = maxloc(a, dim = 2, mask = .true.)
+    if (any(shape(r) /= (/ 3, 5 /))) stop 23
+    if (any(r /= reshape(data2, (/ 3, 5 /)))) stop 24
+    r = maxloc(a, dim = 3, mask = .true.)
+    if (any(shape(r) /= (/ 3, 4 /))) stop 25
+    if (any(r /= reshape(data3, (/ 3, 4 /)))) stop 26
+  end subroutine
+  subroutine check_int_const_shape_rank_3_false_mask()
+    integer :: a(3,4,5)
+    integer, allocatable :: r(:,:)
+    a = reshape(data60, shape(a))
+    r = maxloc(a, dim = 1, mask = .false.)
+    if (any(shape(r) /= (/ 4, 5 /))) stop 31
+    if (any(r /= 0)) stop 32
+    r = maxloc(a, dim = 2, mask = .false.)
+    if (any(shape(r) /= (/ 3, 5 /))) stop 33
+    if (any(r /= 0)) stop 34
+    r = maxloc(a, dim = 3, mask = .false.)
+    if (any(shape(r) /= (/ 3, 4 /))) stop 35
+    if (any(r /= 0)) stop 36
+  end subroutine
+  subroutine check_int_alloc_rank_3_true_mask()
+    integer, allocatable :: a(:,:,:)
+    integer, allocatable :: r(:,:)
+    allocate(a(3,4,5))
+    a(:,:,:) = reshape(data60, shape(a))
+    r = maxloc(a, dim = 1, mask = .true.)
+    if (any(shape(r) /= (/ 4, 5 /))) stop 81
+    if (any(r /= reshape(data1, (/ 4, 5 /)))) stop 82
+    r = maxloc(a, dim = 2, mask = .true.)
+    if (any(shape(r) /= (/ 3, 5 /))) stop 83
+    if (any(r /= reshape(data2, (/ 3, 5 /)))) stop 84
+    r = maxloc(a, dim = 3, mask = .true.)
+    if (any(shape(r) /= (/ 3, 4 /))) stop 85
+    if (any(r /= reshape(data3, (/ 3, 4 /)))) stop 86
+  end subroutine
+  subroutine check_int_alloc_rank_3_false_mask()
+    integer, allocatable :: a(:,:,:)
+    integer, allocatable :: r(:,:)
+    allocate(a(3,4,5))
+    a(:,:,:) = reshape(data60, shape(a))
+    r = maxloc(a, dim = 1, mask = .false.)
+    if (any(shape(r) /= (/ 4, 5 /))) stop 91
+    if (any(r /= 0)) stop 92
+    r = maxloc(a, dim = 2, mask = .false.)
+    if (any(shape(r) /= (/ 3, 5 /))) stop 93
+    if (any(r /= 0)) stop 94
+    r = maxloc(a, dim = 3, mask = .false.)
+    if (any(shape(r) /= (/ 3, 4 /))) stop 95
+    if (any(r /= 0)) stop 96
+  end subroutine
+end subroutine
+
+subroutine check_minloc
+  implicit none
+  integer, parameter :: data60(*) = (/ 7, 4, 5, 3, 9, 0, 6, 4, 5, 5,  &
+                                       8, 2, 6, 7, 8, 7, 4, 5, 3, 9,  &
+                                       0, 6, 4, 5, 5, 8, 2, 6, 7, 8,  &
+                                       7, 4, 5, 3, 9, 0, 6, 4, 5, 5,  &
+                                       8, 2, 6, 7, 8, 7, 4, 5, 3, 9,  &
+                                       0, 6, 4, 5, 5, 8, 2, 6, 7, 8  /)
+  integer, parameter :: data1(*) = (/ 2, 3, 2, 3,  &
+                                      1, 2, 3, 2,  &
+                                      3, 1, 2, 3,  &
+                                      2, 3, 1, 2,  &
+                                      3, 2, 3, 1  /)
+  integer, parameter :: data2(*) = (/ 2, 1, 2,  &
+                                      3, 2, 3,  &
+                                      4, 3, 4,  &
+                                      2, 1, 2,  &
+                                      1, 2, 1  /)
+  integer, parameter :: data3(*) = (/ 5, 1, 5,  &
+                                      1, 2, 1,  &
+                                      2, 1, 2,  &
+                                      3, 2, 3  /)
+  call check_int_const_shape_rank_3_true_mask
+  call check_int_const_shape_rank_3_false_mask
+  call check_int_alloc_rank_3_true_mask
+  call check_int_alloc_rank_3_false_mask
+contains
+  subroutine check_int_const_shape_rank_3_true_mask()
+    integer :: a(3,4,5)
+    integer, allocatable :: r(:,:)
+    a = reshape(data60, shape(a))
+    r = minloc(a, dim = 1, mask = .true.)
+    if (any(shape(r) /= (/ 4, 5 /))) stop 21
+    if (any(r /= reshape(data1, (/ 4, 5 /)))) stop 22
+    r = minloc(a, dim = 2, mask = .true.)
+    if (any(shape(r) /= (/ 3, 5 /))) stop 23
+    if (any(r /= reshape(data2, (/ 3, 5 /)))) stop 24
+    r = minloc(a, dim = 3, mask = .true.)
+    if (any(shape(r) /= (/ 3, 4 /))) stop 25
+    if (any(r /= reshape(data3, (/ 3, 4 /)))) stop 26
+  end subroutine
+  subroutine check_int_const_shape_rank_3_false_mask()
+    integer :: a(3,4,5)
+    integer, allocatable :: r(:,:)
+    a = reshape(data60, shape(a))
+    r = minloc(a, dim = 1, mask = .false.)
+    if (any(shape(r) /= (/ 4, 5 /))) stop 31
+    if (any(r /= 0)) stop 32
+    r = minloc(a, dim = 2, mask = .false.)
+    if (any(shape(r) /= (/ 3, 5 /))) stop 33
+    if (any(r /= 0)) stop 34
+    r = minloc(a, dim = 3, mask = .false.)
+    if (any(shape(r) /= (/ 3, 4 /))) stop 35
+    if (any(r /= 0)) stop 36
+  end subroutine
+  subroutine check_int_alloc_rank_3_true_mask()
+    integer, allocatable :: a(:,:,:)
+    integer, allocatable :: r(:,:)
+    allocate(a(3,4,5))
+    a(:,:,:) = reshape(data60, shape(a))
+    r = minloc(a, dim = 1, mask = .true.)
+    if (any(shape(r) /= (/ 4, 5 /))) stop 81
+    if (any(r /= reshape(data1, (/ 4, 5 /)))) stop 82
+    r = minloc(a, dim = 2, mask = .true.)
+    if (any(shape(r) /= (/ 3, 5 /))) stop 83
+    if (any(r /= reshape(data2, (/ 3, 5 /)))) stop 84
+    r = minloc(a, dim = 3, mask = .true.)
+    if (any(shape(r) /= (/ 3, 4 /))) stop 85
+    if (any(r /= reshape(data3, (/ 3, 4 /)))) stop 86
+  end subroutine
+  subroutine check_int_alloc_rank_3_false_mask()
+    integer, allocatable :: a(:,:,:)
+    integer, allocatable :: r(:,:)
+    allocate(a(3,4,5))
+    a(:,:,:) = reshape(data60, shape(a))
+    r = minloc(a, dim = 1, mask = .false.)
+    if (any(shape(r) /= (/ 4, 5 /))) stop 91
+    if (any(r /= 0)) stop 92
+    r = minloc(a, dim = 2, mask = .false.)
+    if (any(shape(r) /= (/ 3, 5 /))) stop 93
+    if (any(r /= 0)) stop 94
+    r = minloc(a, dim = 3, mask = .false.)
+    if (any(shape(r) /= (/ 3, 4 /))) stop 95
+    if (any(r /= 0)) stop 96
+  end subroutine
+end subroutine

Reply via email to