[gcc r15-4245] i386: Fix some patterns's mem attribute.

2024-10-10 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:9f2f108a8a68c7b7b2de5350439a8ab8e17a54da

commit r15-4245-g9f2f108a8a68c7b7b2de5350439a8ab8e17a54da
Author: Hu, Lin1 
Date:   Wed Oct 9 10:20:05 2024 +0800

i386: Fix some patterns's mem attribute.

Hi, all

This is another patch to modify some pattern's type attr from ssemov to
ssemov2.

Some ssemov pattern's mem attr should be load when their 2 operand is a 
memory
operand.

Bootstrapped and regtested on x86-64-linux-pc, OK for trunk?

BRs,
Lin

gcc/ChangeLog:

* config/i386/sse.md
(sse_movhlps): Change type attr from ssemov to ssemov2.
(sse_loadhps): Ditto.
(*vec_concat): Ditto.
(vec_setv2df_0): Ditto.
(sse_loadlps): Change attr from ssemov to ssemov2 except for 2, 3.
(sse2_loadhps): Change attr from ssemov to ssemov2 except for 0, 1.
(sse2_loadlpd): Change attr from ssemov to ssemov2 except for 0, 1,
2.
(sse2_movsd_): Change attr from ssemov to ssemov2 except for 
5.
(vec_concatv2df): Change attr from ssemov to ssemov2 except for 0, 
1,
2.
(*vec_concat): Change attr from ssemov to ssemov2 for 3, 4.
(vec_concatv2di): Change attr from ssemov to ssemov2 except for 0, 
1,
2, 3, 4, 5.

Diff:
---
 gcc/config/i386/sse.md | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ccef3e063eca..a45b50ad7324 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10995,7 +10995,7 @@
vmovlps\t{%H2, %1, %0|%0, %1, %H2}
%vmovhps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
@@ -11557,7 +11557,7 @@
vmovlhps\t{%2, %1, %0|%0, %1, %2}
%vmovlps\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
 
@@ -11610,7 +11610,7 @@
vmovlps\t{%2, %1, %0|%0, %1, %q2}
%vmovlps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
+   (set_attr "type" "sseshuf,sseshuf,ssemov2,ssemov2,ssemov")
(set (attr "length_immediate")
  (if_then_else (eq_attr "alternative" "0,1")
   (const_string "1")
@@ -11766,7 +11766,7 @@
movhps\t{%2, %0|%0, %q2}
vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
 
@@ -12214,7 +12214,7 @@
movlpd\t{%2, %0|%0, %2}
vmovlpd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "mode" "DF")])
 
 (define_expand "vec_set"
@@ -14665,7 +14665,7 @@
#
#"
   [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
-   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
+   (set_attr "type" "ssemov2,ssemov2,sselog,sselog,ssemov,fmov,imov")
(set (attr "prefix_data16")
  (if_then_else (eq_attr "alternative" "0")
   (const_string "1")
@@ -14735,6 +14735,8 @@
  (const_string "fmov")
(eq_attr "alternative" "10")
  (const_string "imov")
+   (eq_attr "alternative" "0,1,2")
+ (const_string "ssemov2")
   ]
   (const_string "ssemov")))
(set (attr "prefix_data16")
@@ -14787,7 +14789,7 @@
  (if_then_else
(eq_attr "alternative" "5")
(const_string "sselog")
-   (const_string "ssemov")))
+   (const_string "ssemov2")))
(set (attr "prefix_data16")
  (if_then_else
(and (eq_attr "alternative" "2,4")
@@ -14859,7 +14861,7 @@
  (if_then_else
(eq_attr "alternative" "0,1,2")
(const_string "sselog")
-   (const_string "ssemov")))
+   (const_string "ssemov2")))
(set (attr "prefix_data16")
(if_then_else (eq_attr "alternative" "3")
  (const_string "1")
@@ -21545,7 +21547,7 @@
movhps\t{%2, %0|%0, %q2}
vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
-   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "type" "sselog,sselog,ssemov,ssemov2,ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
(set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
 
@@ -21653,7 +21655,7 @@
  (if_then_else
(eq_attr "alternative" "0,1,2,3,4,5")
(const_string "sselog")
-   (const_string "ssemov")))

[gcc r15-4247] libstdc++: Fix some test failures with -fno-char8_t

2024-10-10 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:cb0988a659cef6324887018b9066c5f81b558832

commit r15-4247-gcb0988a659cef6324887018b9066c5f81b558832
Author: Jonathan Wakely 
Date:   Wed Oct 9 14:24:19 2024 +0100

libstdc++: Fix some test failures with -fno-char8_t

libstdc++-v3/ChangeLog:

* testsuite/20_util/duration/io.cc [!__cpp_lib_char8_t]: Define
char8_t as a typedef for unsigned char.
* testsuite/std/format/parse_ctx_neg.cc: Skip for -fno-char8_t.

Diff:
---
 libstdc++-v3/testsuite/20_util/duration/io.cc  | 10 --
 libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/testsuite/20_util/duration/io.cc 
b/libstdc++-v3/testsuite/20_util/duration/io.cc
index 383fb60afe2a..0117673dbdc0 100644
--- a/libstdc++-v3/testsuite/20_util/duration/io.cc
+++ b/libstdc++-v3/testsuite/20_util/duration/io.cc
@@ -5,6 +5,10 @@
 #include 
 #include 
 
+#ifndef __cpp_lib_char8_t
+using char8_t = unsigned char; // Prevent errors if -fno-char8_t is used.
+#endif
+
 void
 test01()
 {
@@ -173,12 +177,14 @@ test_format()
 
 #if __cplusplus > 202002L
   static_assert( ! std::formattable, char> );
-  static_assert( ! std::formattable, char> );
   static_assert( ! std::formattable, char> );
   static_assert( ! std::formattable, char> );
-  static_assert( ! std::formattable, wchar_t> );
   static_assert( ! std::formattable, wchar_t> 
);
   static_assert( ! std::formattable, wchar_t> 
);
+#ifdef __cpp_lib_char8_t
+  static_assert( ! std::formattable, char> );
+  static_assert( ! std::formattable, wchar_t> );
+#endif
 #endif
 }
 
diff --git a/libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc 
b/libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc
index d6a4366d7d0b..f19107c886fc 100644
--- a/libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc
+++ b/libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc
@@ -1,4 +1,5 @@
 // { dg-do compile { target c++26 } }
+// { dg-skip-if "" { *-*-* } { "-fno-char8_t" } }
 
 #include 


[gcc r15-4246] Fix possible wrong-code with masked store-lanes

2024-10-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:36b9c5e6f3301d3d0165f578d020dcd350cd516d

commit r15-4246-g36b9c5e6f3301d3d0165f578d020dcd350cd516d
Author: Richard Biener 
Date:   Thu Oct 10 14:00:11 2024 +0200

Fix possible wrong-code with masked store-lanes

When we're doing masked store-lanes one mask element applies to all
loads of one struct element.  This requires uniform masks for all
of the SLP lanes, something we already compute into STMT_VINFO_SLP_VECT_ONLY
but fail to check when doing SLP store-lanes.  The following corrects
this.  The following also adjusts the store-lane heuristic to properly
check for masked or non-masked optab support.

* tree-vect-slp.cc (vect_slp_prefer_store_lanes_p): Allow
passing in of vectype, pass in whether the stores are masked
and query the correct optab.
(vect_build_slp_instance): Guard store-lanes query with
! STMT_VINFO_SLP_VECT_ONLY, guaranteeing an uniform mask.

Diff:
---
 gcc/tree-vect-slp.cc | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 96f1992cfbff..3024b87a1f83 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3504,17 +3504,22 @@ vect_match_slp_patterns (slp_instance instance, 
vec_info *vinfo,
 }
 
 /* STMT_INFO is a store group of size GROUP_SIZE that we are considering
-   splitting into two, with the first split group having size NEW_GROUP_SIZE.
+   vectorizing with VECTYPE that might be NULL.  MASKED_P indicates whether
+   the stores are masked.
Return true if we could use IFN_STORE_LANES instead and if that appears
to be the better approach.  */
 
 static bool
 vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info,
+  tree vectype, bool masked_p,
   unsigned int group_size,
   unsigned int new_group_size)
 {
-  tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
-  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+  if (!vectype)
+{
+  tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
+  vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+}
   if (!vectype)
 return false;
   /* Allow the split if one of the two new groups would operate on full
@@ -3528,7 +3533,7 @@ vect_slp_prefer_store_lanes_p (vec_info *vinfo, 
stmt_vec_info stmt_info,
   if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS (vectype))
   || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype)))
 return false;
-  return vect_store_lanes_supported (vectype, group_size, false) != IFN_LAST;
+  return vect_store_lanes_supported (vectype, group_size, masked_p) != 
IFN_LAST;
 }
 
 /* Analyze an SLP instance starting from a group of grouped stores.  Call
@@ -3973,6 +3978,10 @@ vect_build_slp_instance (vec_info *vinfo,
   else if (is_a  (vinfo)
   && (group_size != 1 && i < group_size))
{
+ gcall *call = dyn_cast  (stmt_info->stmt);
+ bool masked_p = call
+ && gimple_call_internal_p (call)
+ && internal_fn_mask_index (gimple_call_internal_fn (call)) != -1;
  /* There are targets that cannot do even/odd interleaving schemes
 so they absolutely need to use load/store-lanes.  For now
 force single-lane SLP for them - they would be happy with
@@ -3987,9 +3996,10 @@ vect_build_slp_instance (vec_info *vinfo,
  bool want_store_lanes
= (! STMT_VINFO_GATHER_SCATTER_P (stmt_info)
   && ! STMT_VINFO_STRIDED_P (stmt_info)
+  && ! STMT_VINFO_SLP_VECT_ONLY (stmt_info)
   && compare_step_with_zero (vinfo, stmt_info) > 0
-  && vect_slp_prefer_store_lanes_p (vinfo, stmt_info,
-group_size, 1));
+  && vect_slp_prefer_store_lanes_p (vinfo, stmt_info, NULL_TREE,
+masked_p, group_size, 1));
  if (want_store_lanes || force_single_lane)
i = 1;
 
@@ -4074,14 +4084,14 @@ vect_build_slp_instance (vec_info *vinfo,
 
  /* Now re-assess whether we want store lanes in case the
 discovery ended up producing all single-lane RHSs.  */
- if (rhs_common_nlanes == 1
+ if (! want_store_lanes
+ && rhs_common_nlanes == 1
  && ! STMT_VINFO_GATHER_SCATTER_P (stmt_info)
  && ! STMT_VINFO_STRIDED_P (stmt_info)
+ && ! STMT_VINFO_SLP_VECT_ONLY (stmt_info)
  && compare_step_with_zero (vinfo, stmt_info) > 0
  && (vect_store_lanes_supported (SLP_TREE_VECTYPE (rhs_nodes[0]),
- group_size,
- SLP_TREE_CHILDREN
-  

[gcc r15-4237] tree-optimization/117060 - fix oversight in vect_build_slp_tree_1

2024-10-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:7ce2229d54d575d788b016f941aafd0464ea77f7

commit r15-4237-g7ce2229d54d575d788b016f941aafd0464ea77f7
Author: Richard Biener 
Date:   Thu Oct 10 14:15:13 2024 +0200

tree-optimization/117060 - fix oversight in vect_build_slp_tree_1

We are failing to match call vs. non-call when dealing with matching
loads or stores.

PR tree-optimization/117060
* tree-vect-slp.cc (vect_build_slp_tree_1): When comparing
calls also fail if the first isn't a call.

* gfortran.dg/pr117060.f90: New testcase.

Diff:
---
 gcc/testsuite/gfortran.dg/pr117060.f90 | 21 +
 gcc/tree-vect-slp.cc   |  5 +++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/pr117060.f90 
b/gcc/testsuite/gfortran.dg/pr117060.f90
new file mode 100644
index ..50004e1aaf3d
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr117060.f90
@@ -0,0 +1,21 @@
+! { dg-do compile }
+! { dg-options "-O2" }
+
+subroutine foo (out)
+
+implicit none
+
+real:: out(*)
+integer :: i,k
+real:: a(100)
+real:: b(100)
+
+k = 0
+do i = 1, 10
+  k = k + 1
+  out(k) = a(i)
+  k = k + 1
+  out(k) = sqrt((a(3*i)-b(4))**2 + (a(3*i+1)-b(4+1))**2)
+end do
+
+end subroutine
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 8b53b0fdb16d..9bf6ae4ec8e0 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1367,8 +1367,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
  && first_stmt_code != CFN_MASK_LOAD
  && first_stmt_code != CFN_MASK_STORE)
{
- if (!compatible_calls_p (as_a  (stmts[0]->stmt),
-  call_stmt))
+ if (!is_a  (stmts[0]->stmt)
+ || !compatible_calls_p (as_a  (stmts[0]->stmt),
+ call_stmt))
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,


[gcc r15-4235] Allow SLP store of mixed external and constant

2024-10-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:b3221b838ee7ae7848e7194603acb18294b3

commit r15-4235-gb3221b838ee7ae7848e7194603acb18294b3
Author: Richard Biener 
Date:   Wed Oct 9 15:31:59 2024 +0200

Allow SLP store of mixed external and constant

vect_build_slp_tree_1 rejected this during SLP discovery because it
ran into the rhs code comparison code for stores.  The following
skips that completely for loads and stores as those are handled
later anyway.

This needs a heuristic adjustment in vect_get_and_check_slp_defs
to avoid fallout with regard to BB vectorization and splitting
of a store group vs. demoting one operand to external.

gcc.dg/Wstringop-overflow-47.c needs adjustment given we now have
vast improvements for code generation.  gcc.dg/strlenopt-32.c
needs adjustment because the strlen pass doesn't handle

  _11 = {0, b_6(D)};
  __builtin_memcpy (&a, "foo.bar", 8);
  MEM  [(char *)&a + 3B] = _11;
  _9 = strlen (&a);

I have opened PR117057 for this.

* tree-vect-slp.cc (vect_build_slp_tree_1): Do not compare
RHS codes for loads or stores.
(vect_get_and_check_slp_defs): Only demote operand to external
in case there is more than one operand.

* gcc.dg/vect/slp-57.c: New testcase.
* gcc.dg/Wstringop-overflow-47.c: Adjust.
* gcc.dg/strlenopt-32.c: XFAIL parts.

Diff:
---
 gcc/testsuite/gcc.dg/Wstringop-overflow-47.c |  6 +++---
 gcc/testsuite/gcc.dg/strlenopt-32.c  |  3 ++-
 gcc/testsuite/gcc.dg/vect/slp-57.c   | 14 ++
 gcc/tree-vect-slp.cc | 24 +++-
 4 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c 
b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c
index 9fb78e55046f..aa5402a060f3 100644
--- a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c
@@ -31,15 +31,15 @@ void nowarn_c32 (char c)
 
 void warn_c32 (char c)
 {
-  extern char warn_a32[32];   // { dg-message "at offset (32|1) into 
destination object 'warn_a32' of size 32" "pr97027" }
+  extern char warn_a32[32];   // { dg-message "at offset (32|1|17) into 
destination object 'warn_a32' of size 32" "pr97027" }
 
   void *p = warn_a32 + 1;
-  *(C32*)p = (C32){ c };  // { dg-warning "writing (1 byte|32 bytes) into 
a region of size (0|31)" "pr97027" }
+  *(C32*)p = (C32){ c };  // { dg-warning "writing (1 byte|16 bytes|32 
bytes) into a region of size (0|15|31)" "pr97027" }
 
   /* Verify a local variable too. */
   char a32[32];
   p = a32 + 1;
-  *(C32*)p = (C32){ c };  // { dg-warning "writing (1 byte|32 bytes) into 
a region of size (0|31)" "pr97027" }
+  *(C32*)p = (C32){ c };  // { dg-warning "writing (1 byte|16 bytes|32 
bytes) into a region of size (0|15|31)" "pr97027" }
   sink (p);
 }
 
diff --git a/gcc/testsuite/gcc.dg/strlenopt-32.c 
b/gcc/testsuite/gcc.dg/strlenopt-32.c
index 4220314fb3f0..c53168570fdb 100644
--- a/gcc/testsuite/gcc.dg/strlenopt-32.c
+++ b/gcc/testsuite/gcc.dg/strlenopt-32.c
@@ -190,4 +190,5 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" } } */
+/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" { xfail 
vect_slp_v2qi_store_unalign } } } */
+/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen1" { target 
vect_slp_v2qi_store_unalign } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-57.c 
b/gcc/testsuite/gcc.dg/vect/slp-57.c
new file mode 100644
index ..a35c4ef62030
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-57.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int a[1024];
+void foo (int x)
+{
+  for (int i = 0; i < 1024; i += 2)
+{
+  a[i] = x;
+  a[i+1] = 1;
+}
+}
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9bb765e2cbac..8b53b0fdb16d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -905,7 +905,8 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
}
 
  if (is_a  (vinfo)
- && !oprnd_info->any_pattern)
+ && !oprnd_info->any_pattern
+ && number_of_oprnds > 1)
{
  /* Now for commutative ops we should see whether we can
 make the other operand matching.  */
@@ -1305,10 +1306,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
  /* Mismatch.  */
  continue;
}
- if (first_stmt_code != rhs_code
+ if (!ldst_p
+ && first_stmt_code != rhs_code
  && alt_stmt_code == ERROR_MARK)
alt_stmt_code = rhs_code;
- if ((first_stmt_code != rhs_code
+ if ((!ldst_p
+  && first_stmt_code != rhs_code
   

[gcc] Created branch 'mikael/heads/inline_minmaxloc_v333' in namespace 'refs/users'

2024-10-10 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/inline_minmaxloc_v333' was created in namespace 
'refs/users' pointing to:

 128c217eee0c... fortran: Evaluate once BACK argument of MINLOC/MAXLOC with 


[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:d1f2bdaa6449030e8577c3904967bc43eef6b773

commit d1f2bdaa6449030e8577c3904967bc43eef6b773
Author: Mikael Morin 
Date:   Fri Nov 17 19:04:19 2023 +0100

fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608]

Enable generation of inline code for the MINLOC and MAXLOC intrinsics,
if the ARRAY argument is of integral type and of any rank (only the rank 1
case was previously inlined), the DIM argument is a constant value and there
is no MASK argument.

The restriction to integral ARRAY and absent MASK limits the scope of
the change to the cases where we generate single loop inline code.

This change uses the existing scalarizer suport for reductions, that is
arrays used in scalarization loops, where each element uses a nested
scalarization loop to calculate its value.  The nested loop (and
respictively the nested scalarization chain) is created while walking the
MINLOC/MAXLOC expression, it's setup automatically by the outer scalarizer,
and gfc_conv_intrinsic_minmaxloc is changed to use it as a replacement for
the local loop variable (respectively ARRAY scalarization chain) used in the
non-reduction case (i.e. when DIM is absent).

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return true
if DIM is constant, ARRAY is integral and MASK is absent.
(walk_inline_intrinsic_minmaxloc): If DIM is present, walk ARRAY and
move the dimension corresponding to DIM to a nested chain, keeping
the rest of the dimensions as the returned scalarization chain.
(gfc_conv_intrinsic_minmaxloc): When inside the scalarization loops,
proceed with inline code generation If DIM is present.  If DIM is
present, skip result array creation and final initialization from
individual result local variables.  If DIM is present and ARRAY has
rank greater than 1, use the nested loop initialized by the
scalarizer instead of the local one, use 1 as scalarization
dimension, and evaluate ARRAY using the inherited scalarization
chain instead of creating a local one by walking the expression.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_1.f90: Also accept the error message
generated by the scalarizer in case the function call is implemented
through inline code.
* gfortran.dg/maxloc_bounds_2.f90: Likewise.
* gfortran.dg/maxloc_bounds_3.f90: Likewise.
* gfortran.dg/minmaxloc_19.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 227 ++
 gcc/testsuite/gfortran.dg/maxloc_bounds_1.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_2.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_3.f90 |   4 +-
 gcc/testsuite/gfortran.dg/minmaxloc_19.f90| 182 +
 5 files changed, 343 insertions(+), 78 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index a282ae1c0903..dedb49b4a64e 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5472,12 +5472,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   tree lab1, lab2;
   tree b_if, b_else;
   tree back;
-  gfc_loopinfo loop;
-  gfc_actual_arglist *actual;
-  gfc_ss *arrayss;
-  gfc_ss *maskss;
+  gfc_loopinfo loop, *ploop;
+  gfc_actual_arglist *actual, *array_arg, *dim_arg, *mask_arg, *kind_arg;
+  gfc_actual_arglist *back_arg;
+  gfc_ss *arrayss = nullptr;
+  gfc_ss *maskss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
+  gfc_se *base_se;
   gfc_expr *arrayexpr;
   gfc_expr *maskexpr;
   gfc_expr *backexpr;
@@ -5489,6 +5491,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   bool optional_mask;
 
   actual = expr->value.function.actual;
+  array_arg = actual;
+  dim_arg = array_arg->next;
+  mask_arg = dim_arg->next;
+  kind_arg = mask_arg->next;
+  back_arg = kind_arg->next;
+
+  bool dim_present = dim_arg->expr != nullptr;
+  bool nested_loop = dim_present && expr->rank > 0;
 
   /* The last argument, BACK, is passed by value. Ensure that
  by setting its name to %VAL. */
@@ -5502,11 +5512,15 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 {
   if (se->ss->info->useflags)
{
- /* The inline implementation of MINLOC/MAXLOC has been generated
-before, out of the scalarization loop; now we can just use the
-result.  */
- gfc_conv_tmp_array_ref (se);
- return;
+ if (!dim_present || !gfc_inline_intrinsic_function_p (expr))
+   {
+ /* The code generating and initializing the result array has been
+generated already before the scalari

[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:25e8f732e34d5a41df821c43f4372567fac7b787

commit 25e8f732e34d5a41df821c43f4372567fac7b787
Author: Mikael Morin 
Date:   Thu Aug 8 12:23:16 2024 +0200

fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608]

Enable generation of inline MINLOC/MAXLOC code in the cases where DIM is a
constant, and either ARRAY is of floating point or MASK is an array.  Those
cases are the remaining bits to fully support inlining of non-CHARACTER
MINLOC/MAXLOC with DIM.  They are treated together because they generate
similar code, the NANs for REAL types being handled a bit like a second
level of masking.  These are the cases for which we generate two loops.

This change affects the code generating the second loop, that was
previously accessible only in cases ARRAY had rank 1.

The main changes are in gfc_conv_intrinsic_minmaxloc the replacement of the
locally initialized scalarization loop with the one provided and previously
initialized by the scalarizer.  Same goes for the locally initialized MASK
scalarizer chain.

As this is enabling the code generating a second loop in a context of
reduction and nested loops, care is taken not to advance parent
scalarization chains twice.

The scalarization chain element(s) for an array MASK are inserted in the
chain at a different place from that of a scalar MASK.  This is done on
purpose to match the code consuming the chains which are in different places
for scalar and array MASK.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE
for MINLOC/MAXLOC with constant DIM and non-scalar MASK.
(walk_inline_intrinsic_minmaxloc): Walk MASK and if it's an array
add the chain obtained before that of ARRAY.
(gfc_conv_intrinsic_minmaxloc): Use the nested loop if there is one.
To evaluate MASK (respectively ARRAY in the second loop), inherit
the scalarizer chain if in a nested loop, otherwise keep using the
chain obtained by walking MASK (respectively ARRAY).  If there is a
nested loop, avoid advancing the parent scalarization chain a second
time in the second loop.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_21.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  94 ++---
 gcc/testsuite/gfortran.dg/minmaxloc_21.f90 | 572 +
 2 files changed, 623 insertions(+), 43 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index a146d7263c88..4beead175b77 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5477,6 +5477,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_actual_arglist *back_arg;
   gfc_ss *arrayss = nullptr;
   gfc_ss *maskss = nullptr;
+  gfc_ss *orig_ss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
   gfc_se nested_se;
@@ -5711,6 +5712,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   if (nested_loop)
 {
   ploop = enter_nested_loop (&nested_se);
+  orig_ss = nested_se.ss;
   ploop->temp_dim = 1;
 }
   else
@@ -5785,9 +5787,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 }
   else
 {
-  gcc_assert (!nested_loop);
-  for (int i = 0; i < loop.dimen; i++)
-   gfc_add_modify (&loop.pre, pos[i], gfc_index_zero_node);
+  for (int i = 0; i < ploop->dimen; i++)
+   gfc_add_modify (&ploop->pre, pos[i], gfc_index_zero_node);
   lab1 = gfc_build_label_decl (NULL_TREE);
   TREE_USED (lab1) = 1;
   lab2 = gfc_build_label_decl (NULL_TREE);
@@ -5818,10 +5819,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   /* If we have a mask, only check this element if the mask is set.  */
   if (maskexpr && maskexpr->rank > 0)
 {
-  gcc_assert (!nested_loop);
-  gfc_init_se (&maskse, NULL);
-  gfc_copy_loopinfo_to_se (&maskse, &loop);
-  maskse.ss = maskss;
+  gfc_init_se (&maskse, base_se);
+  gfc_copy_loopinfo_to_se (&maskse, ploop);
+  if (!nested_loop)
+   maskse.ss = maskss;
   gfc_conv_expr_val (&maskse, maskexpr);
   gfc_add_block_to_block (&body, &maskse.pre);
 
@@ -5849,13 +5850,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   stmtblock_t ifblock2;
   tree ifbody2;
 
-  gcc_assert (!nested_loop);
-
   gfc_start_block (&ifblock2);
-  for (int i = 0; i < loop.dimen; i++)
+  for (int i = 0; i < ploop->dimen; i++)
{
  tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[i]),
-loop.loopvar[i], offset[i]);
+ploop->loopvar[i

[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:8674757e6e99a36dfc211aa66c19dbc0affbf8a4

commit 8674757e6e99a36dfc211aa66c19dbc0affbf8a4
Author: Mikael Morin 
Date:   Thu Nov 16 10:00:26 2023 +0100

fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608]

Add the tests covering the cases for which the following patches will
implement inline expansion of MINLOC and MAXLOC.  Those are cases where the
DIM argument is a constant value, and the ARRAY argument has rank greater
than 1.

PR fortran/90608

gcc/testsuite/ChangeLog:

* gfortran.dg/ieee/maxloc_nan_2.f90: New test.
* gfortran.dg/ieee/minloc_nan_2.f90: New test.
* gfortran.dg/maxloc_with_dim_1.f90: New test.
* gfortran.dg/maxloc_with_dim_and_mask_1.f90: New test.
* gfortran.dg/minloc_with_dim_1.f90: New test.
* gfortran.dg/minloc_with_dim_and_mask_1.f90: New test.

Diff:
---
 gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90|  64 +++
 gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90|  64 +++
 gcc/testsuite/gfortran.dg/maxloc_with_dim_1.f90| 201 +
 .../gfortran.dg/maxloc_with_dim_and_mask_1.f90 | 452 +
 gcc/testsuite/gfortran.dg/minloc_with_dim_1.f90| 201 +
 .../gfortran.dg/minloc_with_dim_and_mask_1.f90 | 452 +
 6 files changed, 1434 insertions(+)

diff --git a/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 
b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90
new file mode 100644
index ..788903506350
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90
@@ -0,0 +1,64 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline maxloc implementation,
+! when the dim argument is present.
+
+program p
+  implicit none
+  call check_without_mask
+  call check_with_mask
+contains
+  subroutine check_without_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,4,5), source = nan)
+r = maxloc(a, dim=1)
+if (any(shape(r) /= (/ 4, 5 /))) stop 21
+if (any(r /= 1)) stop 22
+r = maxloc(a, dim=2)
+if (any(shape(r) /= (/ 3, 5 /))) stop 23
+if (any(r /= 1)) stop 24
+r = maxloc(a, dim=3)
+if (any(shape(r) /= (/ 3, 4 /))) stop 25
+if (any(r /= 1)) stop 26
+  end subroutine
+  subroutine check_with_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+logical, allocatable :: m(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(2,3,4), source = nan)
+allocate(m(2,3,4))
+m(:,:,:) = reshape((/ .false., .false., .true. , .true. ,  &
+  .false., .true. , .false., .false.,  &
+  .false., .true. , .true. , .false.,  &
+  .true. , .true. , .true. , .false.,  &
+  .false., .true. , .true. , .false.,  &
+  .false., .true. , .false., .false.  /), shape(m))
+r = maxloc(a, dim = 1, mask = m)
+if (any(shape(r) /= (/ 3, 4 /))) stop 51
+if (any(r /= reshape((/ 0, 1, 2,  &
+0, 2, 1,  &
+1, 1, 2,  &
+1, 2, 0  /), (/ 3, 4 / stop 52
+r = maxloc(a, dim = 2, mask = m)
+if (any(shape(r) /= (/ 2, 4 /))) stop 53
+if (any(r /= reshape((/ 2, 2,  &
+3, 2,  &
+1, 1,  &
+1, 2  /), (/ 2, 4 / stop 54
+r = maxloc(a, dim = 3, mask = m)
+if (any(shape(r) /= (/ 2, 3 /))) stop 55
+if (any(r /= reshape((/ 3, 3,  &
+1, 1,  &
+2, 1  /), (/ 2, 3 / stop 56
+  end subroutine
+end program p
diff --git a/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 
b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90
new file mode 100644
index ..37724d8202de
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90
@@ -0,0 +1,64 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline minloc implementation,
+! when the dim argument is present.
+
+program p
+  implicit none
+  call check_without_mask
+  call check_with_mask
+contains
+  subroutine check_without_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,4,5), source = nan)
+r = minloc(a, dim=1)
+if (any(shape(r) /= (/ 4, 5 /))) stop 21
+if (any(r /= 1)) stop 22
+r = minloc(a, dim=2)
+if (any(shape(r) /= (/ 3, 5 /))) stop 23
+   

[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Check MASK directly instead of its scalarization chain

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:fe0feb4163558087c9dd02f0d2c909e7512a9ab4

commit fe0feb4163558087c9dd02f0d2c909e7512a9ab4
Author: Mikael Morin 
Date:   Thu Sep 12 16:56:39 2024 +0200

fortran: Check MASK directly instead of its scalarization chain

Update the conditions used by the inline MINLOC/MAXLOC code generation
function to check directly the properties of MASK instead of the
variable holding its scalarization chain.

The inline implementation of MINLOC/MAXLOC in gfc_conv_intrinsic_minmaxloc
uses several conditions checking the presence of a scalarization chain for
MASK, which means that the argument is present and non-scalar.  The next
patch will allow inlining MINLOC/MAXLOC with DIM and MASK, and in that
case the scalarization chain for MASK is initialized elsewhere, so the
variable usually holding it in the function is not used, and the conditions
won't work in that case.

This change updates the conditions to check directly the properties of
MASK so that they work even if the scalarization chain variable is not used.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Use
conditionals based on the MASK expression rather than on its
scalarization chains.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index cd6aca51f218..a146d7263c88 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5746,7 +5746,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gcc_assert (reduction_dimensions == ploop->dimen);
 
-  if (nonempty == NULL && maskss == NULL)
+  if (nonempty == NULL && !(maskexpr && maskexpr->rank > 0))
 {
   nonempty = logical_true_node;
 
@@ -5816,7 +5816,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_start_scalarized_body (ploop, &body);
 
   /* If we have a mask, only check this element if the mask is set.  */
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
 {
   gcc_assert (!nested_loop);
   gfc_init_se (&maskse, NULL);
@@ -5921,7 +5921,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 }
   gfc_add_expr_to_block (&block, ifbody);
 
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
 {
   /* We enclose the above in if (mask) {...}.  If the mask is an
 optional argument, generate IF (.NOT. PRESENT(MASK)
@@ -5972,7 +5972,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_add_expr_to_block (outer_block, build1_v (LABEL_EXPR, lab1));
 
   /* If we have a mask, only check this element if the mask is set.  */
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
{
  gfc_init_se (&maskse, NULL);
  gfc_copy_loopinfo_to_se (&maskse, &loop);
@@ -6038,7 +6038,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gfc_add_expr_to_block (&block, tmp);
 
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
{
  /* We enclose the above in if (mask) {...}.  If the mask is
 an optional argument, generate IF (.NOT. PRESENT(MASK)
@@ -6063,7 +6063,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 gfc_add_expr_to_block (&loop.pre, build1_v (LABEL_EXPR, lab2));
 
   /* For a scalar mask, enclose the loop in an if statement.  */
-  if (maskexpr && maskss == NULL)
+  if (maskexpr && maskexpr->rank == 0)
 {
   tree ifmask;


[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:b696e5de16f5f0e1403a03e27e0a2e159a37cf83

commit b696e5de16f5f0e1403a03e27e0a2e159a37cf83
Author: Mikael Morin 
Date:   Thu Aug 8 13:44:16 2024 +0200

fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]

Enable the generation of inline code for MINLOC/MAXLOC when argument
ARRAY is of integral type, DIM is a constant, and MASK is scalar (only
absent MASK or rank 1 ARRAY were inlined before).

Scalar masks are implemented with a wrapping condition around the code
one would generate if MASK wasn't present, so they are easy to support
once inline code without MASK is working.

With this change, there are both expressions evaluated inside the nested
loop (ARRAY, and in the future MASK if non-scalar) and expressions evaluated
outside of it (MASK if scalar).  Both have to advance the scalarization
chain passed in argument SE to gfc_conv_intrinsic_minmaxloc as they are
evaluated, but expressions evaluated from within the nested loop
additionally have to advance the nested scalarization chain of the reduction
loop.  This is normally handled transparently through the inheritance that
is defined when initializing gfc_se structs, but there has to be some
variable to inherit from, and there is a single one, SE.  This variable is
kept as base for out of nested loop expressions (scalar MASK), and this
change introduces a new variable to hold the current advance of the nested
loop scalarization chain and serve as inheritance base to evaluate nested
loop expressions (just ARRAY for now, additionally non-scalar MASK later).

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE
if MASK is scalar.
(walk_inline_intrinsic_minmaxloc): Append to the scalarization chain
a scalar element for MASK if it's present.
(gfc_conv_intrinsic_minmaxloc): Use a local gfc_se struct to serve
as base for all the expressions evaluated in the nested loop.  To
evaluate MASK in a nested loop, enable usage of the scalarizer and
set the current scalarization chain element to use to that of the
original passed in SE argument.  And use the nested loop from the
scalarizer instead of the local loop in that case.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_8.f90: Accept the error message
generated by the scalarizer in case the MAXLOC intrinsic call is
implemented through inline code.
* gfortran.dg/minmaxloc_20.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc|  27 ++--
 gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90 |   4 +-
 gcc/testsuite/gfortran.dg/minmaxloc_20.f90| 182 ++
 3 files changed, 201 insertions(+), 12 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index dedb49b4a64e..cd6aca51f218 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5479,6 +5479,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_ss *maskss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
+  gfc_se nested_se;
   gfc_se *base_se;
   gfc_expr *arrayexpr;
   gfc_expr *maskexpr;
@@ -5616,7 +5617,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_add_block_to_block (&se->pre, &backse.post);
 
   if (nested_loop)
-base_se = se;
+{
+  gfc_init_se (&nested_se, se);
+  base_se = &nested_se;
+}
   else
 {
   /* Walk the arguments.  */
@@ -5706,7 +5710,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   if (nested_loop)
 {
-  ploop = enter_nested_loop (se);
+  ploop = enter_nested_loop (&nested_se);
   ploop->temp_dim = 1;
 }
   else
@@ -6063,21 +6067,19 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 {
   tree ifmask;
 
-  gcc_assert (!nested_loop);
-
-  gfc_init_se (&maskse, NULL);
+  gfc_init_se (&maskse, nested_loop ? se : nullptr);
   gfc_conv_expr_val (&maskse, maskexpr);
   gfc_add_block_to_block (&se->pre, &maskse.pre);
   gfc_init_block (&block);
-  gfc_add_block_to_block (&block, &loop.pre);
-  gfc_add_block_to_block (&block, &loop.post);
+  gfc_add_block_to_block (&block, &ploop->pre);
+  gfc_add_block_to_block (&block, &ploop->post);
   tmp = gfc_finish_block (&block);
 
   /* For the else part of the scalar mask, just initialize
 the pos variable the same way as above.  */
 
   gfc_init_block (&elseblock);
-  for (int i = 0; i < loop.dimen; i++)
+  for (int i = 0; i < ploop->dimen; i++)
gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node);
   elsetmp = gfc_finish_bl

[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:128c217eee0ccf363433684edb754db5d5aedc08

commit 128c217eee0ccf363433684edb754db5d5aedc08
Author: Mikael Morin 
Date:   Thu Oct 3 15:57:50 2024 +0200

fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608]

Evaluate the BACK argument of MINLOC/MAXLOC once before the
scalarization loops in the case where the DIM argument is present.

This is a follow-up to r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3
which added knowledge of BACK to the scalarizer, to
r15-2701-ga10436a8404ad2f0cc5aa4d6a0cc850abe5ef49e which removed it to
handle it out of scalarization instead, and to more immediate previous
patches that added support for MINLOC/MAXLOC with DIM.  The recent
support for MINLOC/MAXLOC with DIM introduced nested loops, which made
the evaluation of BACK (removed from the scalarizer knowledge by previous
patches) wrapped in a loop, so possibly executed more than once.  This
change adds BACK to the scalarization chain if MINLOC/MAXLOC will use
nested loops, so that it is evaluated by the scalarizer only once before
the outermost loop in that case.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc
(walk_inline_intrinsic_minmaxloc): Add a scalar element for BACK as
first item of the list if BACK is present and there will be nested
loops.
(gfc_conv_intrinsic_minmaxloc): Evaluate BACK using an inherited
scalarization chain if there is a nested loop.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_8.f90: New test.
* gfortran.dg/minloc_9.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  20 +-
 gcc/testsuite/gfortran.dg/maxloc_8.f90 | 349 +
 gcc/testsuite/gfortran.dg/minloc_9.f90 | 349 +
 3 files changed, 716 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 90dcb759b378..5c25eedcc4f7 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5594,7 +5594,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 && maskexpr->symtree->n.sym->attr.optional;
   backexpr = back_arg->expr;
 
-  gfc_init_se (&backse, NULL);
+  gfc_init_se (&backse, nested_loop ? se : nullptr);
   if (backexpr == nullptr)
 back = logical_false_node;
   else if (maybe_absent_optional_variable (backexpr))
@@ -11885,10 +11885,13 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr 
*expr ATTRIBUTE_UNUSED)
   gfc_actual_arglist *array_arg = expr->value.function.actual;
   gfc_actual_arglist *dim_arg = array_arg->next;
   gfc_actual_arglist *mask_arg = dim_arg->next;
+  gfc_actual_arglist *kind_arg = mask_arg->next;
+  gfc_actual_arglist *back_arg = kind_arg->next;
 
   gfc_expr *array = array_arg->expr;
   gfc_expr *dim = dim_arg->expr;
   gfc_expr *mask = mask_arg->expr;
+  gfc_expr *back = back_arg->expr;
 
   if (dim == nullptr)
 return gfc_get_array_ss (ss, expr, 1, GFC_SS_INTRINSIC);
@@ -11914,7 +11917,20 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr 
*expr ATTRIBUTE_UNUSED)
  chain.  */
   int dim_val = mpz_get_si (dim->value.integer) - 1;
   gfc_ss *tail = nest_loop_dimension (tmp_ss, dim_val);
-  tail->next = ss;
+
+  if (back && array->rank > 1)
+{
+  /* If there are nested scalarization loops, include BACK in the
+scalarization chains to avoid evaluating it multiple times in a loop.
+Otherwise, prefer to handle it outside of scalarization.  */
+  gfc_ss *back_ss = gfc_get_scalar_ss (ss, back);
+  back_ss->info->type = GFC_SS_REFERENCE;
+  back_ss->info->can_be_null_ref = true;
+
+  tail->next = back_ss;
+}
+  else
+tail->next = ss;
 
   if (scalar_mask)
 tmp_ss = gfc_get_scalar_ss (tmp_ss, mask);
diff --git a/gcc/testsuite/gfortran.dg/maxloc_8.f90 
b/gcc/testsuite/gfortran.dg/maxloc_8.f90
new file mode 100644
index ..21bc4591235a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/maxloc_8.f90
@@ -0,0 +1,349 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check that the evaluation of MAXLOC's BACK argument is made only once
+! before the scalarization loops, when the DIM argument is present.
+
+program p
+  implicit none
+  integer, parameter :: data60(*) = (/ 7, 4, 5, 3, 9, 0, 6, 4, 5, 5,  &
+   8, 2, 6, 7, 8, 7, 4, 5, 3, 9,  &
+   0, 6, 4, 5, 5, 8, 2, 6, 7, 8,  &
+   7, 4, 5, 3, 9, 0, 6, 4, 5, 5,  &
+   8, 2, 6, 7, 8, 7, 4, 5, 3, 9,  &
+   0, 6, 4, 5, 5, 8, 2, 6, 7, 8  /)
+  logical, parameter :: mask60(*) = (/ .true. , .false., .false., .false., &
+   .true. , .false., .true. , .false.,

[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:de0b77fb688595f13a2f5740aa720718c70ac456

commit de0b77fb688595f13a2f5740aa720718c70ac456
Author: Mikael Morin 
Date:   Sat Nov 18 20:54:20 2023 +0100

fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only

In the function generating inline code to implement MINLOC and MAXLOC, only
get the size of ARRAY along DIM if DIM is present to check for emptyness.

The check for ARRAY emptyness had been checking the size of the full array,
which is correct for MINLOC and MAXLOC without DIM.  But if DIM is
present, the reduction is along DIM only so the check for emptyness
should consider that dimension only as well.

This sounds like a correctness issue, but fortunately the cases where it
makes a difference are cases where ARRAY is empty, so even if the MINLOC or
MAXLOC calculated value is wrong, it's wrapped in a zero iteration loop, and
the wrong values are not actually used.  In the end this just avoids
unnecessary calculations.

A previous version of this patch didn't support non-constant DIM with
rank 1 ARRAY.  The new testcase checks that that case is supported.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Only get the 
size
along DIM instead of the full size if DIM is present.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_22.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 19 ++-
 gcc/testsuite/gfortran.dg/minmaxloc_22.f90 | 24 
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 4beead175b77..90dcb759b378 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5641,7 +5641,24 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   if (!(maskexpr && maskexpr->rank > 0))
 {
   mpz_t asize;
-  if (gfc_array_size (arrayexpr, &asize))
+  bool reduction_size_known;
+
+  if (dim_present)
+   {
+ int reduction_dim;
+ if (dim_arg->expr->expr_type == EXPR_CONSTANT)
+   reduction_dim = mpz_get_si (dim_arg->expr->value.integer) - 1;
+ else if (arrayexpr->rank == 1)
+   reduction_dim = 0;
+ else
+   gcc_unreachable ();
+ reduction_size_known = gfc_array_dimen_size (arrayexpr, reduction_dim,
+  &asize);
+   }
+  else
+   reduction_size_known = gfc_array_size (arrayexpr, &asize);
+
+  if (reduction_size_known)
{
  nonempty = gfc_conv_mpz_to_tree (asize, gfc_index_integer_kind);
  mpz_clear (asize);
diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 
b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90
new file mode 100644
index ..4f323ec5daba
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90
@@ -0,0 +1,24 @@
+! { dg-do compile }
+!
+! Check that the inline code generated for MINLOC and MAXLOC supports
+! a non-constant DIM argument if ARRAY has rank 1.
+
+program p
+  implicit none
+  integer, parameter :: n = 5
+  integer :: a(n)
+  print *, f(a, 1)
+contains
+  function f(a, d)
+integer :: a(n)
+integer :: d
+integer :: f
+f = minloc(a, dim=d) 
+  end function
+  function g(a, d)
+integer :: a(n)
+integer :: d
+integer :: g
+g = maxloc(a, dim=d) 
+  end function
+end program p


[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:54e60dc0d4959bf51b24ac1dc9dfcf104876820b

commit 54e60dc0d4959bf51b24ac1dc9dfcf104876820b
Author: Mikael Morin 
Date:   Fri Nov 17 19:04:19 2023 +0100

fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608]

Enable generation of inline code for the MINLOC and MAXLOC intrinsics,
if the ARRAY argument is of integral type and of any rank (only the rank 1
case was previously inlined), the DIM argument is a constant value and there
is no MASK argument.

The restriction to integral ARRAY and absent MASK limits the scope of
the change to the cases where we generate single loop inline code.

This change uses the existing scalarizer suport for reductions, that is
arrays used in scalarization loops, where each element uses a nested
scalarization loop to calculate its value.  The nested loop (and
respictively the nested scalarization chain) is created while walking the
MINLOC/MAXLOC expression, it's setup automatically by the outer scalarizer,
and gfc_conv_intrinsic_minmaxloc is changed to use it as a replacement for
the local loop variable (respectively ARRAY scalarization chain) used in the
non-reduction case (i.e. when DIM is absent).

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return true
if DIM is constant, ARRAY is integral and MASK is absent.
(walk_inline_intrinsic_minmaxloc): If DIM is present, walk ARRAY and
move the dimension corresponding to DIM to a nested chain, keeping
the rest of the dimensions as the returned scalarization chain.
(gfc_conv_intrinsic_minmaxloc): When inside the scalarization loops,
proceed with inline code generation If DIM is present.  If DIM is
present, skip result array creation and final initialization from
individual result local variables.  If DIM is present and ARRAY has
rank greater than 1, use the nested loop initialized by the
scalarizer instead of the local one, use 1 as scalarization
dimension, and evaluate ARRAY using the inherited scalarization
chain instead of creating a local one by walking the expression.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_1.f90: Also accept the error message
generated by the scalarizer in case the function call is implemented
through inline code.
* gfortran.dg/maxloc_bounds_2.f90: Likewise.
* gfortran.dg/maxloc_bounds_3.f90: Likewise.
* gfortran.dg/minmaxloc_19.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 227 ++
 gcc/testsuite/gfortran.dg/maxloc_bounds_1.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_2.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_3.f90 |   4 +-
 gcc/testsuite/gfortran.dg/minmaxloc_19.f90| 182 +
 5 files changed, 343 insertions(+), 78 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index a282ae1c0903..dedb49b4a64e 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5472,12 +5472,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   tree lab1, lab2;
   tree b_if, b_else;
   tree back;
-  gfc_loopinfo loop;
-  gfc_actual_arglist *actual;
-  gfc_ss *arrayss;
-  gfc_ss *maskss;
+  gfc_loopinfo loop, *ploop;
+  gfc_actual_arglist *actual, *array_arg, *dim_arg, *mask_arg, *kind_arg;
+  gfc_actual_arglist *back_arg;
+  gfc_ss *arrayss = nullptr;
+  gfc_ss *maskss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
+  gfc_se *base_se;
   gfc_expr *arrayexpr;
   gfc_expr *maskexpr;
   gfc_expr *backexpr;
@@ -5489,6 +5491,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   bool optional_mask;
 
   actual = expr->value.function.actual;
+  array_arg = actual;
+  dim_arg = array_arg->next;
+  mask_arg = dim_arg->next;
+  kind_arg = mask_arg->next;
+  back_arg = kind_arg->next;
+
+  bool dim_present = dim_arg->expr != nullptr;
+  bool nested_loop = dim_present && expr->rank > 0;
 
   /* The last argument, BACK, is passed by value. Ensure that
  by setting its name to %VAL. */
@@ -5502,11 +5512,15 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 {
   if (se->ss->info->useflags)
{
- /* The inline implementation of MINLOC/MAXLOC has been generated
-before, out of the scalarization loop; now we can just use the
-result.  */
- gfc_conv_tmp_array_ref (se);
- return;
+ if (!dim_present || !gfc_inline_intrinsic_function_p (expr))
+   {
+ /* The code generating and initializing the result array has been
+generated already before the scalari

[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:e714901ed532aaae8248e7e2e98fddcb7c8bfe75

commit e714901ed532aaae8248e7e2e98fddcb7c8bfe75
Author: Mikael Morin 
Date:   Thu Nov 16 10:00:26 2023 +0100

fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608]

Add the tests covering the cases for which the following patches will
implement inline expansion of MINLOC and MAXLOC.  Those are cases where the
DIM argument is a constant value, and the ARRAY argument has rank greater
than 1.

PR fortran/90608

gcc/testsuite/ChangeLog:

* gfortran.dg/ieee/maxloc_nan_2.f90: New test.
* gfortran.dg/ieee/minloc_nan_2.f90: New test.
* gfortran.dg/maxloc_with_dim_1.f90: New test.
* gfortran.dg/maxloc_with_dim_and_mask_1.f90: New test.
* gfortran.dg/minloc_with_dim_1.f90: New test.
* gfortran.dg/minloc_with_dim_and_mask_1.f90: New test.

Diff:
---
 gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90|  64 +++
 gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90|  64 +++
 gcc/testsuite/gfortran.dg/maxloc_with_dim_1.f90| 201 +
 .../gfortran.dg/maxloc_with_dim_and_mask_1.f90 | 452 +
 gcc/testsuite/gfortran.dg/minloc_with_dim_1.f90| 201 +
 .../gfortran.dg/minloc_with_dim_and_mask_1.f90 | 452 +
 6 files changed, 1434 insertions(+)

diff --git a/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 
b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90
new file mode 100644
index ..4d73431f8c23
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90
@@ -0,0 +1,64 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline maxloc implementation,
+! when the dim argument is present.
+
+program p
+  implicit none
+  call check_without_mask
+  call check_with_mask
+contains
+  subroutine check_without_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,4,5), source = nan)
+r = maxloc(a, dim=1)
+if (any(shape(r) /= (/ 4, 5 /))) stop 21
+if (any(r /= 1)) stop 22
+r = maxloc(a, dim=2)
+if (any(shape(r) /= (/ 3, 5 /))) stop 23
+if (any(r /= 1)) stop 24
+r = maxloc(a, dim=3)
+if (any(shape(r) /= (/ 3, 4 /))) stop 25
+if (any(r /= 1)) stop 26
+  end subroutine
+  subroutine check_with_mask()
+real, allocatable :: a(:,:,:)
+logical, allocatable :: m(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(2,3,4), source = nan)
+allocate(m(2,3,4))
+m(:,:,:) = reshape((/ .false., .false., .true. , .true. ,  &
+  .false., .true. , .false., .false.,  &
+  .false., .true. , .true. , .false.,  &
+  .true. , .true. , .true. , .false.,  &
+  .false., .true. , .true. , .false.,  &
+  .false., .true. , .false., .false.  /), shape(m))
+r = maxloc(a, dim = 1, mask = m)
+if (any(shape(r) /= (/ 3, 4 /))) stop 51
+if (any(r /= reshape((/ 0, 1, 2,  &
+0, 2, 1,  &
+1, 1, 2,  &
+1, 2, 0  /), (/ 3, 4 / stop 52
+r = maxloc(a, dim = 2, mask = m)
+if (any(shape(r) /= (/ 2, 4 /))) stop 53
+if (any(r /= reshape((/ 2, 2,  &
+3, 2,  &
+1, 1,  &
+1, 2  /), (/ 2, 4 / stop 54
+r = maxloc(a, dim = 3, mask = m)
+if (any(shape(r) /= (/ 2, 3 /))) stop 55
+if (any(r /= reshape((/ 3, 3,  &
+1, 1,  &
+2, 1  /), (/ 2, 3 / stop 56
+  end subroutine
+end program p
+
diff --git a/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 
b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90
new file mode 100644
index ..311526484fc8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90
@@ -0,0 +1,64 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline minloc implementation,
+! when the dim argument is present.
+
+program p
+  implicit none
+  call check_without_mask
+  call check_with_mask
+contains
+  subroutine check_without_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,4,5), source = nan)
+r = minloc(a, dim=1)
+if (any(shape(r) /= (/ 4, 5 /))) stop 21
+if (any(r /= 1)) stop 22
+r = minloc(a, dim=2)
+if (any(shape(r) /= (/ 3, 5 /))) stop 23
+if (any(r /= 1)) stop 24
+r = mi

[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:32e9e7020eb681eab7351bdd8db31b9068186755

commit 32e9e7020eb681eab7351bdd8db31b9068186755
Author: Mikael Morin 
Date:   Thu Aug 8 12:23:16 2024 +0200

fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608]

Enable generation of inline MINLOC/MAXLOC code in the cases where DIM is a
constant, and either ARRAY is of floating point or MASK is an array.  Those
cases are the remaining bits to fully support inlining of non-CHARACTER
MINLOC/MAXLOC with DIM.  They are treated together because they generate
similar code, the NANs for REAL types being handled a bit like a second
level of masking.  These are the cases for which we generate two loops.

This change affects the code generating the second loop, that was
previously accessible only in cases ARRAY had rank 1.

The main changes are in gfc_conv_intrinsic_minmaxloc the replacement of the
locally initialized scalarization loop with the one provided and previously
initialized by the scalarizer.  Same goes for the locally initialized MASK
scalarizer chain.

As this is enabling the code generating a second loop in a context of
reduction and nested loops, care is taken not to advance parent
scalarization chains twice.

The scalarization chain element(s) for an array MASK are inserted in the
chain at a different place from that of a scalar MASK.  This is done on
purpose to match the code consuming the chains which are in different places
for scalar and array MASK.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE
for MINLOC/MAXLOC with constant DIM and non-scalar MASK.
(walk_inline_intrinsic_minmaxloc): Walk MASK and if it's an array
add the chain obtained before that of ARRAY.
(gfc_conv_intrinsic_minmaxloc): Use the nested loop if there is one.
To evaluate MASK (respectively ARRAY in the second loop), inherit
the scalarizer chain if in a nested loop, otherwise keep using the
chain obtained by walking MASK (respectively ARRAY).  If there is a
nested loop, avoid advancing the parent scalarization chain a second
time in the second loop.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_21.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  94 ++---
 gcc/testsuite/gfortran.dg/minmaxloc_21.f90 | 572 +
 2 files changed, 623 insertions(+), 43 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index a146d7263c88..4beead175b77 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5477,6 +5477,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_actual_arglist *back_arg;
   gfc_ss *arrayss = nullptr;
   gfc_ss *maskss = nullptr;
+  gfc_ss *orig_ss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
   gfc_se nested_se;
@@ -5711,6 +5712,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   if (nested_loop)
 {
   ploop = enter_nested_loop (&nested_se);
+  orig_ss = nested_se.ss;
   ploop->temp_dim = 1;
 }
   else
@@ -5785,9 +5787,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 }
   else
 {
-  gcc_assert (!nested_loop);
-  for (int i = 0; i < loop.dimen; i++)
-   gfc_add_modify (&loop.pre, pos[i], gfc_index_zero_node);
+  for (int i = 0; i < ploop->dimen; i++)
+   gfc_add_modify (&ploop->pre, pos[i], gfc_index_zero_node);
   lab1 = gfc_build_label_decl (NULL_TREE);
   TREE_USED (lab1) = 1;
   lab2 = gfc_build_label_decl (NULL_TREE);
@@ -5818,10 +5819,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   /* If we have a mask, only check this element if the mask is set.  */
   if (maskexpr && maskexpr->rank > 0)
 {
-  gcc_assert (!nested_loop);
-  gfc_init_se (&maskse, NULL);
-  gfc_copy_loopinfo_to_se (&maskse, &loop);
-  maskse.ss = maskss;
+  gfc_init_se (&maskse, base_se);
+  gfc_copy_loopinfo_to_se (&maskse, ploop);
+  if (!nested_loop)
+   maskse.ss = maskss;
   gfc_conv_expr_val (&maskse, maskexpr);
   gfc_add_block_to_block (&body, &maskse.pre);
 
@@ -5849,13 +5850,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   stmtblock_t ifblock2;
   tree ifbody2;
 
-  gcc_assert (!nested_loop);
-
   gfc_start_block (&ifblock2);
-  for (int i = 0; i < loop.dimen; i++)
+  for (int i = 0; i < ploop->dimen; i++)
{
  tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[i]),
-loop.loopvar[i], offset[i]);
+ploop->loopvar[i

[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:3c97c96384c6b21c89aa6811e6a2801879fc598e

commit 3c97c96384c6b21c89aa6811e6a2801879fc598e
Author: Mikael Morin 
Date:   Thu Oct 3 15:57:50 2024 +0200

fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608]

Evaluate the BACK argument of MINLOC/MAXLOC once before the
scalarization loops in the case where the DIM argument is present.

This is a follow-up to r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3
which added knowledge of BACK to the scalarizer, to
r15-2701-ga10436a8404ad2f0cc5aa4d6a0cc850abe5ef49e which removed it to
handle it out of scalarization instead, and to more immediate previous
patches that added support for MINLOC/MAXLOC with DIM.  The recent
support for MINLOC/MAXLOC with DIM introduced nested loops, which made
the evaluation of BACK (removed from the scalarizer knowledge by previous
patches) wrapped in a loop, so possibly executed more than once.  This
change adds BACK to the scalarization chain if MINLOC/MAXLOC will use
nested loops, so that it is evaluated by the scalarizer only once before
the outermost loop in that case.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc
(walk_inline_intrinsic_minmaxloc): Add a scalar element for BACK as
first item of the list if BACK is present and there will be nested
loops.
(gfc_conv_intrinsic_minmaxloc): Evaluate BACK using an inherited
scalarization chain if there is a nested loop.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_8.f90: New test.
* gfortran.dg/minloc_9.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  20 +-
 gcc/testsuite/gfortran.dg/maxloc_8.f90 | 349 +
 gcc/testsuite/gfortran.dg/minloc_9.f90 | 349 +
 3 files changed, 716 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 90dcb759b378..5c25eedcc4f7 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5594,7 +5594,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 && maskexpr->symtree->n.sym->attr.optional;
   backexpr = back_arg->expr;
 
-  gfc_init_se (&backse, NULL);
+  gfc_init_se (&backse, nested_loop ? se : nullptr);
   if (backexpr == nullptr)
 back = logical_false_node;
   else if (maybe_absent_optional_variable (backexpr))
@@ -11885,10 +11885,13 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr 
*expr ATTRIBUTE_UNUSED)
   gfc_actual_arglist *array_arg = expr->value.function.actual;
   gfc_actual_arglist *dim_arg = array_arg->next;
   gfc_actual_arglist *mask_arg = dim_arg->next;
+  gfc_actual_arglist *kind_arg = mask_arg->next;
+  gfc_actual_arglist *back_arg = kind_arg->next;
 
   gfc_expr *array = array_arg->expr;
   gfc_expr *dim = dim_arg->expr;
   gfc_expr *mask = mask_arg->expr;
+  gfc_expr *back = back_arg->expr;
 
   if (dim == nullptr)
 return gfc_get_array_ss (ss, expr, 1, GFC_SS_INTRINSIC);
@@ -11914,7 +11917,20 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr 
*expr ATTRIBUTE_UNUSED)
  chain.  */
   int dim_val = mpz_get_si (dim->value.integer) - 1;
   gfc_ss *tail = nest_loop_dimension (tmp_ss, dim_val);
-  tail->next = ss;
+
+  if (back && array->rank > 1)
+{
+  /* If there are nested scalarization loops, include BACK in the
+scalarization chains to avoid evaluating it multiple times in a loop.
+Otherwise, prefer to handle it outside of scalarization.  */
+  gfc_ss *back_ss = gfc_get_scalar_ss (ss, back);
+  back_ss->info->type = GFC_SS_REFERENCE;
+  back_ss->info->can_be_null_ref = true;
+
+  tail->next = back_ss;
+}
+  else
+tail->next = ss;
 
   if (scalar_mask)
 tmp_ss = gfc_get_scalar_ss (tmp_ss, mask);
diff --git a/gcc/testsuite/gfortran.dg/maxloc_8.f90 
b/gcc/testsuite/gfortran.dg/maxloc_8.f90
new file mode 100644
index ..21bc4591235a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/maxloc_8.f90
@@ -0,0 +1,349 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check that the evaluation of MAXLOC's BACK argument is made only once
+! before the scalarization loops, when the DIM argument is present.
+
+program p
+  implicit none
+  integer, parameter :: data60(*) = (/ 7, 4, 5, 3, 9, 0, 6, 4, 5, 5,  &
+   8, 2, 6, 7, 8, 7, 4, 5, 3, 9,  &
+   0, 6, 4, 5, 5, 8, 2, 6, 7, 8,  &
+   7, 4, 5, 3, 9, 0, 6, 4, 5, 5,  &
+   8, 2, 6, 7, 8, 7, 4, 5, 3, 9,  &
+   0, 6, 4, 5, 5, 8, 2, 6, 7, 8  /)
+  logical, parameter :: mask60(*) = (/ .true. , .false., .false., .false., &
+   .true. , .false., .true. , .false.,

[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:d9f0956929c7d0a182cfb623256f9bb320feb9ea

commit d9f0956929c7d0a182cfb623256f9bb320feb9ea
Author: Mikael Morin 
Date:   Thu Aug 8 13:44:16 2024 +0200

fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]

Enable the generation of inline code for MINLOC/MAXLOC when argument
ARRAY is of integral type, DIM is a constant, and MASK is scalar (only
absent MASK or rank 1 ARRAY were inlined before).

Scalar masks are implemented with a wrapping condition around the code
one would generate if MASK wasn't present, so they are easy to support
once inline code without MASK is working.

With this change, there are both expressions evaluated inside the nested
loop (ARRAY, and in the future MASK if non-scalar) and expressions evaluated
outside of it (MASK if scalar).  Both have to advance the scalarization
chain passed in argument SE to gfc_conv_intrinsic_minmaxloc as they are
evaluated, but expressions evaluated from within the nested loop
additionally have to advance the nested scalarization chain of the reduction
loop.  This is normally handled transparently through the inheritance that
is defined when initializing gfc_se structs, but there has to be some
variable to inherit from, and there is a single one, SE.  This variable is
kept as base for out of nested loop expressions (scalar MASK), and this
change introduces a new variable to hold the current advance of the nested
loop scalarization chain and serve as inheritance base to evaluate nested
loop expressions (just ARRAY for now, additionally non-scalar MASK later).

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE
if MASK is scalar.
(walk_inline_intrinsic_minmaxloc): Append to the scalarization chain
a scalar element for MASK if it's present.
(gfc_conv_intrinsic_minmaxloc): Use a local gfc_se struct to serve
as base for all the expressions evaluated in the nested loop.  To
evaluate MASK in a nested loop, enable usage of the scalarizer and
set the current scalarization chain element to use to that of the
original passed in SE argument.  And use the nested loop from the
scalarizer instead of the local loop in that case.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_8.f90: Accept the error message
generated by the scalarizer in case the MAXLOC intrinsic call is
implemented through inline code.
* gfortran.dg/minmaxloc_20.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc|  27 ++--
 gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90 |   4 +-
 gcc/testsuite/gfortran.dg/minmaxloc_20.f90| 182 ++
 3 files changed, 201 insertions(+), 12 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index dedb49b4a64e..cd6aca51f218 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5479,6 +5479,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_ss *maskss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
+  gfc_se nested_se;
   gfc_se *base_se;
   gfc_expr *arrayexpr;
   gfc_expr *maskexpr;
@@ -5616,7 +5617,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_add_block_to_block (&se->pre, &backse.post);
 
   if (nested_loop)
-base_se = se;
+{
+  gfc_init_se (&nested_se, se);
+  base_se = &nested_se;
+}
   else
 {
   /* Walk the arguments.  */
@@ -5706,7 +5710,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   if (nested_loop)
 {
-  ploop = enter_nested_loop (se);
+  ploop = enter_nested_loop (&nested_se);
   ploop->temp_dim = 1;
 }
   else
@@ -6063,21 +6067,19 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 {
   tree ifmask;
 
-  gcc_assert (!nested_loop);
-
-  gfc_init_se (&maskse, NULL);
+  gfc_init_se (&maskse, nested_loop ? se : nullptr);
   gfc_conv_expr_val (&maskse, maskexpr);
   gfc_add_block_to_block (&se->pre, &maskse.pre);
   gfc_init_block (&block);
-  gfc_add_block_to_block (&block, &loop.pre);
-  gfc_add_block_to_block (&block, &loop.post);
+  gfc_add_block_to_block (&block, &ploop->pre);
+  gfc_add_block_to_block (&block, &ploop->post);
   tmp = gfc_finish_block (&block);
 
   /* For the else part of the scalar mask, just initialize
 the pos variable the same way as above.  */
 
   gfc_init_block (&elseblock);
-  for (int i = 0; i < loop.dimen; i++)
+  for (int i = 0; i < ploop->dimen; i++)
gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node);
   elsetmp = gfc_finish_bl

[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Check MASK directly instead of its scalarization chain

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:1810a146003e03c431ce4c700799dc93e2006743

commit 1810a146003e03c431ce4c700799dc93e2006743
Author: Mikael Morin 
Date:   Thu Sep 12 16:56:39 2024 +0200

fortran: Check MASK directly instead of its scalarization chain

Update the conditions used by the inline MINLOC/MAXLOC code generation
function to check directly the properties of MASK instead of the
variable holding its scalarization chain.

The inline implementation of MINLOC/MAXLOC in gfc_conv_intrinsic_minmaxloc
uses several conditions checking the presence of a scalarization chain for
MASK, which means that the argument is present and non-scalar.  The next
patch will allow inlining MINLOC/MAXLOC with DIM and MASK, and in that
case the scalarization chain for MASK is initialized elsewhere, so the
variable usually holding it in the function is not used, and the conditions
won't work in that case.

This change updates the conditions to check directly the properties of
MASK so that they work even if the scalarization chain variable is not used.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Use
conditionals based on the MASK expression rather than on its
scalarization chains.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index cd6aca51f218..a146d7263c88 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5746,7 +5746,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gcc_assert (reduction_dimensions == ploop->dimen);
 
-  if (nonempty == NULL && maskss == NULL)
+  if (nonempty == NULL && !(maskexpr && maskexpr->rank > 0))
 {
   nonempty = logical_true_node;
 
@@ -5816,7 +5816,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_start_scalarized_body (ploop, &body);
 
   /* If we have a mask, only check this element if the mask is set.  */
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
 {
   gcc_assert (!nested_loop);
   gfc_init_se (&maskse, NULL);
@@ -5921,7 +5921,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 }
   gfc_add_expr_to_block (&block, ifbody);
 
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
 {
   /* We enclose the above in if (mask) {...}.  If the mask is an
 optional argument, generate IF (.NOT. PRESENT(MASK)
@@ -5972,7 +5972,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_add_expr_to_block (outer_block, build1_v (LABEL_EXPR, lab1));
 
   /* If we have a mask, only check this element if the mask is set.  */
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
{
  gfc_init_se (&maskse, NULL);
  gfc_copy_loopinfo_to_se (&maskse, &loop);
@@ -6038,7 +6038,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gfc_add_expr_to_block (&block, tmp);
 
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
{
  /* We enclose the above in if (mask) {...}.  If the mask is
 an optional argument, generate IF (.NOT. PRESENT(MASK)
@@ -6063,7 +6063,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 gfc_add_expr_to_block (&loop.pre, build1_v (LABEL_EXPR, lab2));
 
   /* For a scalar mask, enclose the loop in an if statement.  */
-  if (maskexpr && maskss == NULL)
+  if (maskexpr && maskexpr->rank == 0)
 {
   tree ifmask;


[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only

2024-10-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:091b04da1657c9f71d62491fa30c0846f8cb5b43

commit 091b04da1657c9f71d62491fa30c0846f8cb5b43
Author: Mikael Morin 
Date:   Sat Nov 18 20:54:20 2023 +0100

fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only

In the function generating inline code to implement MINLOC and MAXLOC, only
get the size of ARRAY along DIM if DIM is present to check for emptyness.

The check for ARRAY emptyness had been checking the size of the full array,
which is correct for MINLOC and MAXLOC without DIM.  But if DIM is
present, the reduction is along DIM only so the check for emptyness
should consider that dimension only as well.

This sounds like a correctness issue, but fortunately the cases where it
makes a difference are cases where ARRAY is empty, so even if the MINLOC or
MAXLOC calculated value is wrong, it's wrapped in a zero iteration loop, and
the wrong values are not actually used.  In the end this just avoids
unnecessary calculations.

A previous version of this patch didn't support non-constant DIM with
rank 1 ARRAY.  The new testcase checks that that case is supported.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Only get the 
size
along DIM instead of the full size if DIM is present.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_22.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 19 ++-
 gcc/testsuite/gfortran.dg/minmaxloc_22.f90 | 24 
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 4beead175b77..90dcb759b378 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5641,7 +5641,24 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   if (!(maskexpr && maskexpr->rank > 0))
 {
   mpz_t asize;
-  if (gfc_array_size (arrayexpr, &asize))
+  bool reduction_size_known;
+
+  if (dim_present)
+   {
+ int reduction_dim;
+ if (dim_arg->expr->expr_type == EXPR_CONSTANT)
+   reduction_dim = mpz_get_si (dim_arg->expr->value.integer) - 1;
+ else if (arrayexpr->rank == 1)
+   reduction_dim = 0;
+ else
+   gcc_unreachable ();
+ reduction_size_known = gfc_array_dimen_size (arrayexpr, reduction_dim,
+  &asize);
+   }
+  else
+   reduction_size_known = gfc_array_size (arrayexpr, &asize);
+
+  if (reduction_size_known)
{
  nonempty = gfc_conv_mpz_to_tree (asize, gfc_index_integer_kind);
  mpz_clear (asize);
diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 
b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90
new file mode 100644
index ..4f323ec5daba
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90
@@ -0,0 +1,24 @@
+! { dg-do compile }
+!
+! Check that the inline code generated for MINLOC and MAXLOC supports
+! a non-constant DIM argument if ARRAY has rank 1.
+
+program p
+  implicit none
+  integer, parameter :: n = 5
+  integer :: a(n)
+  print *, f(a, 1)
+contains
+  function f(a, d)
+integer :: a(n)
+integer :: d
+integer :: f
+f = minloc(a, dim=d) 
+  end function
+  function g(a, d)
+integer :: a(n)
+integer :: d
+integer :: g
+g = maxloc(a, dim=d) 
+  end function
+end program p


[gcc] Created branch 'mikael/heads/inline_minmaxloc_v332' in namespace 'refs/users'

2024-10-10 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/inline_minmaxloc_v332' was created in namespace 
'refs/users' pointing to:

 3c97c96384c6... fortran: Evaluate once BACK argument of MINLOC/MAXLOC with 


[gcc r15-4240] RISC-V:Bugfix for C++ code compilation failure with rv32imafc_zve32f[pr116883]

2024-10-10 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:fd8e590ff11266598d8f9b3d03d72ba7a6100512

commit r15-4240-gfd8e590ff11266598d8f9b3d03d72ba7a6100512
Author: Li Xu 
Date:   Thu Oct 10 08:51:19 2024 -0600

RISC-V:Bugfix for C++ code compilation failure with 
rv32imafc_zve32f[pr116883]

From: xuli 

Example as follows:

int main()
{
  unsigned long arraya[128], arrayb[128], arrayc[128];
  for (int i = 0; i < 128; i++)
   {
  arraya[i] = arrayb[i] + arrayc[i];
   }
  return 0;
}

Compiled with -march=rv32imafc_zve32f -mabi=ilp32f, it will cause a 
compilation issue:

riscv_vector.h:40:25: error: ambiguating new declaration of 'vint64m4_t 
__riscv_vle64(vbool16_t, const long long int*, unsigned int)'
   40 | #pragma riscv intrinsic "vector"
  | ^~~~
riscv_vector.h:40:25: note: old declaration 'vint64m1_t 
__riscv_vle64(vbool64_t, const long long int*, unsigned int)'

With zvl=32b, vbool16_t is registered in init_builtins() with
type_common.precision=0x101 (nunits=2), mode_nunits[E_RVVMF16BI]=[2,2].

Normally, vbool64_t is only valid when TARGET_MIN_VLEN > 32, so vbool64_t
is not registered in init_builtins(), meaning vbool64_t=null.

In order to implement __attribute__((target("arch=+v"))), we must register
all vector types and all RVV intrinsics. Therefore, vbool64_t will be 
registered
by default with zvl=128b in reinit_builtins(), resulting in
type_common.precision=0x101 (nunits=2) and mode_nunits[E_RVVMF64BI]=[2,2].

We then get TYPE_VECTOR_SUBPARTS(vbool16_t) == 
TYPE_VECTOR_SUBPARTS(vbool64_t),
calculated using type_common.precision, resulting in 2. Since vbool16_t and
vbool64_t have the same element type (boolean_type), the compiler treats 
them
as the same type, leading to a re-declaration conflict.

After all types and intrinsics have been registered, processing
__attribute__((target("arch=+v"))) will update the parameters option and
init_adjust_machine_modes. Therefore, to avoid conflicts, we can choose
zvl=4096b for the null type reinit_builtins().

command option zvl=32b
  type nunits
  vbool64_t => null
  vbool32_t=> [1,1]
  vbool16_t=> [2,2]
  vbool8_t=>  [4,4]
  vbool4_t=>  [8,8]
  vbool2_t=>  [16,16]
  vbool1_t=>  [32,32]

reinit zvl=128b
  vbool64_t => [2,2] conflict with zvl32b vbool16_t=> [2,2]
reinit zvl=256b
  vbool64_t => [4,4] conflict with zvl32b vbool8_t=>  [4,4]
reinit zvl=512b
  vbool64_t => [8,8] conflict with zvl32b vbool4_t=>  [8,8]
reinit zvl=1024b
  vbool64_t => [16,16] conflict with zvl32b vbool2_t=>  [16,16]
reinit zvl=2048b
  vbool64_t => [32,32] conflict with zvl32b vbool1_t=>  [32,32]
reinit zvl=4096b
  vbool64_t => [64,64] zvl=4096b is ok

Signed-off-by: xuli 

PR target/116883

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_pragma_intrinsic_flags_pollute): 
Choose zvl4096b
to initialize null type.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr116883.C: New test.

Diff:
---
 gcc/config/riscv/riscv-c.cc|  7 ++-
 gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C | 15 +++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 71112d9c66d7..c59f408d3a8e 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -59,7 +59,12 @@ riscv_pragma_intrinsic_flags_pollute (struct 
pragma_intrinsic_flags *flags)
   riscv_zvl_flags = riscv_zvl_flags
 | MASK_ZVL32B
 | MASK_ZVL64B
-| MASK_ZVL128B;
+| MASK_ZVL128B
+| MASK_ZVL256B
+| MASK_ZVL512B
+| MASK_ZVL1024B
+| MASK_ZVL2048B
+| MASK_ZVL4096B;
 
   riscv_vector_elen_flags = riscv_vector_elen_flags
 | MASK_VECTOR_ELEN_32
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C
new file mode 100644
index ..15bbec40bdde
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C
@@ -0,0 +1,15 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imafc_zve32f -mabi=ilp32f" } */
+
+#include 
+
+int main()
+{
+  unsigned long arraya[128], arrayb[128], arrayc[128];
+  for (int i; i < 128; i++)
+   {
+  arraya[i] = arrayb[i] + arrayc[i];
+   }
+  return 0;
+}


[gcc r15-4243] aarch64: Alter pr116258.c test to correct for big endian.

2024-10-10 Thread Richard Ball via Gcc-cvs
https://gcc.gnu.org/g:a17a9bdcb3f749b895abf1fbf4f62859df9e8184

commit r15-4243-ga17a9bdcb3f749b895abf1fbf4f62859df9e8184
Author: Richard Ball 
Date:   Thu Oct 10 19:16:39 2024 +0100

aarch64: Alter pr116258.c test to correct for big endian.

The test at pr116258.c fails on big endian targets,
this is because the test checks that the index of a floating
point multiply is 0, which is correct only for little endian.

gcc/testsuite/ChangeLog:

PR tree-optimization/116258
* gcc.target/aarch64/pr116258.c:
Alter test to add big-endian support.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/pr116258.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/pr116258.c 
b/gcc/testsuite/gcc.target/aarch64/pr116258.c
index e727ad4b72a5..5b63de25b7bf 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr116258.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr116258.c
@@ -12,6 +12,7 @@
   return (x + h(t));
 }
 
-/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 { target { 
aarch64_little_endian } } } } */
+/* { dg-final { scan-assembler-times "\\\[3\\\]" 1 { target { 
aarch64_big_endian } } } } */
 /* { dg-final { scan-assembler-not "dup\t" } } */
 /* { dg-final { scan-assembler-not "ins\t" } } */


[gcc r14-10763] aarch64: Alter pr116258.c test to correct for big endian.

2024-10-10 Thread Richard Ball via Gcc-cvs
https://gcc.gnu.org/g:44dc46415ce8fafc1f6a46bac123b430ae5aba4d

commit r14-10763-g44dc46415ce8fafc1f6a46bac123b430ae5aba4d
Author: Richard Ball 
Date:   Thu Oct 10 19:16:39 2024 +0100

aarch64: Alter pr116258.c test to correct for big endian.

The test at pr116258.c fails on big endian targets,
this is because the test checks that the index of a floating
point multiply is 0, which is correct only for little endian.

gcc/testsuite/ChangeLog:

PR tree-optimization/116258
* gcc.target/aarch64/pr116258.c:
Alter test to add big-endian support.

(cherry picked from commit a17a9bdcb3f749b895abf1fbf4f62859df9e8184)

Diff:
---
 gcc/testsuite/gcc.target/aarch64/pr116258.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/pr116258.c 
b/gcc/testsuite/gcc.target/aarch64/pr116258.c
index e727ad4b72a5..5b63de25b7bf 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr116258.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr116258.c
@@ -12,6 +12,7 @@
   return (x + h(t));
 }
 
-/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 { target { 
aarch64_little_endian } } } } */
+/* { dg-final { scan-assembler-times "\\\[3\\\]" 1 { target { 
aarch64_big_endian } } } } */
 /* { dg-final { scan-assembler-not "dup\t" } } */
 /* { dg-final { scan-assembler-not "ins\t" } } */


[gcc r15-4236] match.pd: Check trunc_mod vector obtap before folding.

2024-10-10 Thread Jennifer Schmitz via Gcc-cvs
https://gcc.gnu.org/g:a2e06b7f081a3d2e50e3afa8d3f1676a05099707

commit r15-4236-ga2e06b7f081a3d2e50e3afa8d3f1676a05099707
Author: Jennifer Schmitz 
Date:   Thu Oct 3 04:46:51 2024 -0700

match.pd: Check trunc_mod vector obtap before folding.

This patch guards the simplification x / y * y == x -> x % y == 0 in
match.pd by a check for:
1) Non-vector mode of x OR
2) Lack of support for vector division OR
3) Support of vector modulo

The patch was bootstrapped and tested with no regression on
aarch64-linux-gnu and x86_64-linux-gnu.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/
PR tree-optimization/116831
* match.pd: Guard simplification to trunc_mod with check for
mod optab support.

gcc/testsuite/
PR tree-optimization/116831
* gcc.dg/torture/pr116831.c: New test.

Diff:
---
 gcc/match.pd|  9 +++--
 gcc/testsuite/gcc.dg/torture/pr116831.c | 10 ++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 755ed13e77d1..8a7569ce3871 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5415,8 +5415,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* x / y * y == x -> x % y == 0.  */
 (simplify
   (eq:c (mult:c (trunc_div:s @0 @1) @1) @0)
-  (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE)
-(eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); })))
+  (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE
+   && (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0)))
+  || !target_supports_op_p (TREE_TYPE (@0), TRUNC_DIV_EXPR,
+optab_vector)
+  || target_supports_op_p (TREE_TYPE (@0), TRUNC_MOD_EXPR,
+   optab_vector)))
+   (eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); })))
 
 /* ((X /[ex] A) +- B) * A  -->  X +- A * B.  */
 (for op (plus minus)
diff --git a/gcc/testsuite/gcc.dg/torture/pr116831.c 
b/gcc/testsuite/gcc.dg/torture/pr116831.c
new file mode 100644
index ..92b2a130e69f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116831.c
@@ -0,0 +1,10 @@
+/* { dg-additional-options "-mcpu=neoverse-v2" { target aarch64*-*-* } } */
+
+long a;
+int b, c;
+void d (int e[][5], short f[][5][5][5]) 
+{
+  for (short g; g; g += 4)
+a = c ?: e[6][0] % b ? 0 : f[0][0][0][g];
+}
+


[gcc r15-4238] libiberty: Restore build with CP_DEMANGLE_DEBUG defined

2024-10-10 Thread Simon Martin via Gcc-cvs
https://gcc.gnu.org/g:c1b2100e736c8ad80479fa6417db760695a00256

commit r15-4238-gc1b2100e736c8ad80479fa6417db760695a00256
Author: Simon Martin 
Date:   Thu Oct 10 15:29:32 2024 +0200

libiberty: Restore build with CP_DEMANGLE_DEBUG defined

cp-demangle.c does not build when CP_DEMANGLE_DEBUG is defined since
r13-2887-gb04208895fed34. This trivial patch fixes the issue.

libiberty/ChangeLog:

* cp-demangle.c (d_dump): Fix compilation when CP_DEMANGLE_DEBUG
is defined.

Diff:
---
 libiberty/cp-demangle.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c
index fc2cf64e6e01..5b1bd5dff227 100644
--- a/libiberty/cp-demangle.c
+++ b/libiberty/cp-demangle.c
@@ -655,9 +655,9 @@ d_dump (struct demangle_component *dc, int indent)
   return;
 case DEMANGLE_COMPONENT_EXTENDED_BUILTIN_TYPE:
   {
-   char suffix[2] = { dc->u.s_extended_builtin.type->suffix, 0 };
+   char suffix[2] = { dc->u.s_extended_builtin.suffix, 0 };
printf ("builtin type %s%d%s\n", dc->u.s_extended_builtin.type->name,
-   dc->u.s_extended_builtin.type->arg, suffix);
+   dc->u.s_extended_builtin.arg, suffix);
   }
   return;
 case DEMANGLE_COMPONENT_OPERATOR:


[gcc r15-4241] phiopt: Remove candorest variable return instead

2024-10-10 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:dc3015ff0934a48176c43c0582d5a93029d298f9

commit r15-4241-gdc3015ff0934a48176c43c0582d5a93029d298f9
Author: Andrew Pinski 
Date:   Thu Oct 10 04:44:23 2024 +

phiopt: Remove candorest variable return instead

After r15-3560-gb081e6c860eb9688d24365d39, the setting of candorest
with the break can just change to a return since this is inside a lambda 
now.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (pass_phiopt::execute): Remove candorest
and return instead of setting candorest.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiopt.cc | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 43b65b362a39..f3ee3a80c0f8 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -4322,7 +4322,6 @@ pass_phiopt::execute (function *)
}
 
   gimple_stmt_iterator gsi;
-  bool candorest = true;
 
   /* Check that we're looking for nested phis.  */
   basic_block merge = diamond_p ? EDGE_SUCC (bb2, 0)->dest : bb2;
@@ -4338,15 +4337,11 @@ pass_phiopt::execute (function *)
tree arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
  {
-   candorest = false;
cfgchanged = true;
-   break;
+   return;
  }
  }
 
-  if (!candorest)
-   return;
-
   gphi *phi = single_non_singleton_phi_for_edges (phis, e1, e2);
   if (!phi)
return;


[gcc r15-4239] vect: Avoid divide by zero for permutes of extern VLA vectors

2024-10-10 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:9bd19ff515c95af71b29bc6e232785532afa6823

commit r15-4239-g9bd19ff515c95af71b29bc6e232785532afa6823
Author: Richard Sandiford 
Date:   Thu Oct 10 15:15:26 2024 +0100

vect: Avoid divide by zero for permutes of extern VLA vectors

My recent VLA SLP patches caused a regression with cross compilers
in gcc.dg/torture/neon-sve-bridge.c.  There we have a VEC_PERM_EXPR
created from two BIT_FIELD_REFs, with the child node being an
external VLA vector:

note:   node 0x3704a70 (max_nunits=1, refcnt=2) vector(2) long int
note:   op: VEC_PERM_EXPR
note:  stmt 0 val1Return_9 = BIT_FIELD_REF ;
note:  stmt 1 val2Return_10 = BIT_FIELD_REF ;
note:  lane permutation { 0[0] 0[1] }
note:  children 0x3704b08
note:   node (external) 0x3704b08 (max_nunits=1, refcnt=1) svint64_t
note:  { }

For this kind of external node, the SLP_TREE_LANES is normally
the total number of lanes in the vector, but it is zero if the
vector has variable length:

  auto nunits = TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (vnode));
  unsigned HOST_WIDE_INT const_nunits;
  if (nunits.is_constant (&const_nunits))
SLP_TREE_LANES (vnode) = const_nunits;

This led to division by zero in:

  /* Check whether the output has N times as many lanes per vector.  */
  else if (constant_multiple_p (SLP_TREE_LANES (node) * op_nunits,
SLP_TREE_LANES (child) * nunits,
&this_unpack_factor)
   && (i == 0 || unpack_factor == this_unpack_factor))
unpack_factor = this_unpack_factor;

No repetition takes place for this kind of external node, so this
patch goes with Richard's suggestion to check for external nodes
that have no scalar statements.

This didn't show up for my native testing since division by zero
doesn't trap on AArch64.

gcc/
* tree-vect-slp.cc (vectorizable_slp_permutation_1): Set repeating_p
to false if we have an external node for a pre-existing vector.

Diff:
---
 gcc/tree-vect-slp.cc | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9bf6ae4ec8e0..96f1992cfbff 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -10279,10 +10279,19 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
}
   auto op_nunits = TYPE_VECTOR_SUBPARTS (op_vectype);
   unsigned int this_unpack_factor;
+  /* Detect permutations of external, pre-existing vectors.  The external
+node's SLP_TREE_LANES stores the total number of units in the vector,
+or zero if the vector has variable length.
+
+We are expected to keep the original VEC_PERM_EXPR for such cases.
+There is no repetition to model.  */
+  if (SLP_TREE_DEF_TYPE (child) == vect_external_def
+ && SLP_TREE_SCALAR_OPS (child).is_empty ())
+   repeating_p = false;
   /* Check whether the input has twice as many lanes per vector.  */
-  if (children.length () == 1
- && known_eq (SLP_TREE_LANES (child) * nunits,
-  SLP_TREE_LANES (node) * op_nunits * 2))
+  else if (children.length () == 1
+  && known_eq (SLP_TREE_LANES (child) * nunits,
+   SLP_TREE_LANES (node) * op_nunits * 2))
pack_p = true;
   /* Check whether the output has N times as many lanes per vector.  */
   else if (constant_multiple_p (SLP_TREE_LANES (node) * op_nunits,


[gcc r15-4242] Fix PR116650: check all regs in regrename targets

2024-10-10 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:85bee4f77b1b0ebe68b3efe0c356b7d5fb006c4d

commit r15-4242-g85bee4f77b1b0ebe68b3efe0c356b7d5fb006c4d
Author: Michael Matz 
Date:   Thu Oct 10 16:36:51 2024 +0200

Fix PR116650: check all regs in regrename targets

(this came up for m68k vs. LRA, but is a generic problem)

Regrename wants to use new registers for certain def-use chains.
For validity of replacements it needs to check that the selected
candidates are unused up to then.  That's done in check_new_reg_p.
But if it so happens that the new register needs more hardregs
than the old register (which happens if the target allows inter-bank
moves and the mode is something like a DFmode that needs to be placed
into a SImode reg-pair), then check_new_reg_p only checks the
first of those registers for free-ness.

This is caused by that function looking up the number of necessary
hardregs only in terms of the old hardreg number.  It of course needs
to do that in terms of the new candidate regnumber.  The symptom is that
regrename sometimes clobbers the higher numbered registers of such a
regrename target pair.  This patch fixes that problem.

(In the particular case of the bug report it was LRA that left over a
inter-bank move instruction that triggers regrename, ultimately causing
the mis-compile.  Reload didn't do that, but in general we of course
can't rely on such moves not happening if the target allows them.)

This also shows a general confusion in that function and the target hook
interface here:

  for (i = nregs - 1; i >= 0; --)
...
|| ! HARD_REGNO_RENAME_OK (reg + i, new_reg + i))

it uses nregs in a way that requires it to be the same between old and
new register.  The problem is that the target hook only gets register
numbers, when it instead should get a mode and register numbers and
would be called only for the first but not for subsequent registers.
I've looked at a number of definitions of that target hook and I think
that this is currently harmless in the sense that it would merely rule
out some potential reg-renames that would in fact be okay to do.  So I'm
not changing the target hook interface here and hence that problem
remains unfixed.

PR rtl-optimization/116650
* regrename.cc (check_new_reg_p): Calculate nregs in terms of
the new candidate register.

Diff:
---
 gcc/regrename.cc | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/regrename.cc b/gcc/regrename.cc
index 054e601740b1..22668d7bf57d 100644
--- a/gcc/regrename.cc
+++ b/gcc/regrename.cc
@@ -324,10 +324,27 @@ static bool
 check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg,
 class du_head *this_head, HARD_REG_SET this_unavailable)
 {
-  int nregs = this_head->nregs;
+  int nregs = 1;
   int i;
   struct du_chain *tmp;
 
+  /* See whether new_reg accepts all modes that occur in
+ definition and uses and record the number of regs it would take.  */
+  for (tmp = this_head->first; tmp; tmp = tmp->next_use)
+{
+  int n;
+  /* Completely ignore DEBUG_INSNs, otherwise we can get
+-fcompare-debug failures.  */
+  if (DEBUG_INSN_P (tmp->insn))
+   continue;
+
+  if (!targetm.hard_regno_mode_ok (new_reg, GET_MODE (*tmp->loc)))
+   return false;
+  n = hard_regno_nregs (new_reg, GET_MODE (*tmp->loc));
+  if (n > nregs)
+   nregs = n;
+}
+
   for (i = nregs - 1; i >= 0; --i)
 if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
|| fixed_regs[new_reg + i]
@@ -348,14 +365,10 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg,
  definition and uses.  */
   for (tmp = this_head->first; tmp; tmp = tmp->next_use)
 {
-  /* Completely ignore DEBUG_INSNs, otherwise we can get
--fcompare-debug failures.  */
   if (DEBUG_INSN_P (tmp->insn))
continue;
 
-  if (!targetm.hard_regno_mode_ok (new_reg, GET_MODE (*tmp->loc))
- || call_clobbered_in_chain_p (this_head, GET_MODE (*tmp->loc),
-   new_reg))
+  if (call_clobbered_in_chain_p (this_head, GET_MODE (*tmp->loc), new_reg))
return false;
 }