[gcc r15-4245] i386: Fix some patterns's mem attribute.
https://gcc.gnu.org/g:9f2f108a8a68c7b7b2de5350439a8ab8e17a54da commit r15-4245-g9f2f108a8a68c7b7b2de5350439a8ab8e17a54da Author: Hu, Lin1 Date: Wed Oct 9 10:20:05 2024 +0800 i386: Fix some patterns's mem attribute. Hi, all This is another patch to modify some pattern's type attr from ssemov to ssemov2. Some ssemov pattern's mem attr should be load when their 2 operand is a memory operand. Bootstrapped and regtested on x86-64-linux-pc, OK for trunk? BRs, Lin gcc/ChangeLog: * config/i386/sse.md (sse_movhlps): Change type attr from ssemov to ssemov2. (sse_loadhps): Ditto. (*vec_concat): Ditto. (vec_setv2df_0): Ditto. (sse_loadlps): Change attr from ssemov to ssemov2 except for 2, 3. (sse2_loadhps): Change attr from ssemov to ssemov2 except for 0, 1. (sse2_loadlpd): Change attr from ssemov to ssemov2 except for 0, 1, 2. (sse2_movsd_): Change attr from ssemov to ssemov2 except for 5. (vec_concatv2df): Change attr from ssemov to ssemov2 except for 0, 1, 2. (*vec_concat): Change attr from ssemov to ssemov2 for 3, 4. (vec_concatv2di): Change attr from ssemov to ssemov2 except for 0, 1, 2, 3, 4, 5. Diff: --- gcc/config/i386/sse.md | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ccef3e063eca..a45b50ad7324 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10995,7 +10995,7 @@ vmovlps\t{%H2, %1, %0|%0, %1, %H2} %vmovhps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") - (set_attr "type" "ssemov") + (set_attr "type" "ssemov2") (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -11557,7 +11557,7 @@ vmovlhps\t{%2, %1, %0|%0, %1, %2} %vmovlps\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") - (set_attr "type" "ssemov") + (set_attr "type" "ssemov2") (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) @@ -11610,7 +11610,7 @@ vmovlps\t{%2, %1, %0|%0, %1, %q2} %vmovlps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") - (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov") + (set_attr "type" "sseshuf,sseshuf,ssemov2,ssemov2,ssemov") (set (attr "length_immediate") (if_then_else (eq_attr "alternative" "0,1") (const_string "1") @@ -11766,7 +11766,7 @@ movhps\t{%2, %0|%0, %q2} vmovhps\t{%2, %1, %0|%0, %1, %q2}" [(set_attr "isa" "noavx,avx,noavx,avx") - (set_attr "type" "ssemov") + (set_attr "type" "ssemov2") (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) @@ -12214,7 +12214,7 @@ movlpd\t{%2, %0|%0, %2} vmovlpd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx,noavx,avx") - (set_attr "type" "ssemov") + (set_attr "type" "ssemov2") (set_attr "mode" "DF")]) (define_expand "vec_set" @@ -14665,7 +14665,7 @@ # #" [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") - (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") + (set_attr "type" "ssemov2,ssemov2,sselog,sselog,ssemov,fmov,imov") (set (attr "prefix_data16") (if_then_else (eq_attr "alternative" "0") (const_string "1") @@ -14735,6 +14735,8 @@ (const_string "fmov") (eq_attr "alternative" "10") (const_string "imov") + (eq_attr "alternative" "0,1,2") + (const_string "ssemov2") ] (const_string "ssemov"))) (set (attr "prefix_data16") @@ -14787,7 +14789,7 @@ (if_then_else (eq_attr "alternative" "5") (const_string "sselog") - (const_string "ssemov"))) + (const_string "ssemov2"))) (set (attr "prefix_data16") (if_then_else (and (eq_attr "alternative" "2,4") @@ -14859,7 +14861,7 @@ (if_then_else (eq_attr "alternative" "0,1,2") (const_string "sselog") - (const_string "ssemov"))) + (const_string "ssemov2"))) (set (attr "prefix_data16") (if_then_else (eq_attr "alternative" "3") (const_string "1") @@ -21545,7 +21547,7 @@ movhps\t{%2, %0|%0, %q2} vmovhps\t{%2, %1, %0|%0, %1, %q2}" [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx") - (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") + (set_attr "type" "sselog,sselog,ssemov,ssemov2,ssemov2") (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex") (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) @@ -21653,7 +21655,7 @@ (if_then_else (eq_attr "alternative" "0,1,2,3,4,5") (const_string "sselog") - (const_string "ssemov")))
[gcc r15-4247] libstdc++: Fix some test failures with -fno-char8_t
https://gcc.gnu.org/g:cb0988a659cef6324887018b9066c5f81b558832 commit r15-4247-gcb0988a659cef6324887018b9066c5f81b558832 Author: Jonathan Wakely Date: Wed Oct 9 14:24:19 2024 +0100 libstdc++: Fix some test failures with -fno-char8_t libstdc++-v3/ChangeLog: * testsuite/20_util/duration/io.cc [!__cpp_lib_char8_t]: Define char8_t as a typedef for unsigned char. * testsuite/std/format/parse_ctx_neg.cc: Skip for -fno-char8_t. Diff: --- libstdc++-v3/testsuite/20_util/duration/io.cc | 10 -- libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/testsuite/20_util/duration/io.cc b/libstdc++-v3/testsuite/20_util/duration/io.cc index 383fb60afe2a..0117673dbdc0 100644 --- a/libstdc++-v3/testsuite/20_util/duration/io.cc +++ b/libstdc++-v3/testsuite/20_util/duration/io.cc @@ -5,6 +5,10 @@ #include #include +#ifndef __cpp_lib_char8_t +using char8_t = unsigned char; // Prevent errors if -fno-char8_t is used. +#endif + void test01() { @@ -173,12 +177,14 @@ test_format() #if __cplusplus > 202002L static_assert( ! std::formattable, char> ); - static_assert( ! std::formattable, char> ); static_assert( ! std::formattable, char> ); static_assert( ! std::formattable, char> ); - static_assert( ! std::formattable, wchar_t> ); static_assert( ! std::formattable, wchar_t> ); static_assert( ! std::formattable, wchar_t> ); +#ifdef __cpp_lib_char8_t + static_assert( ! std::formattable, char> ); + static_assert( ! std::formattable, wchar_t> ); +#endif #endif } diff --git a/libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc b/libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc index d6a4366d7d0b..f19107c886fc 100644 --- a/libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc +++ b/libstdc++-v3/testsuite/std/format/parse_ctx_neg.cc @@ -1,4 +1,5 @@ // { dg-do compile { target c++26 } } +// { dg-skip-if "" { *-*-* } { "-fno-char8_t" } } #include
[gcc r15-4246] Fix possible wrong-code with masked store-lanes
https://gcc.gnu.org/g:36b9c5e6f3301d3d0165f578d020dcd350cd516d commit r15-4246-g36b9c5e6f3301d3d0165f578d020dcd350cd516d Author: Richard Biener Date: Thu Oct 10 14:00:11 2024 +0200 Fix possible wrong-code with masked store-lanes When we're doing masked store-lanes one mask element applies to all loads of one struct element. This requires uniform masks for all of the SLP lanes, something we already compute into STMT_VINFO_SLP_VECT_ONLY but fail to check when doing SLP store-lanes. The following corrects this. The following also adjusts the store-lane heuristic to properly check for masked or non-masked optab support. * tree-vect-slp.cc (vect_slp_prefer_store_lanes_p): Allow passing in of vectype, pass in whether the stores are masked and query the correct optab. (vect_build_slp_instance): Guard store-lanes query with ! STMT_VINFO_SLP_VECT_ONLY, guaranteeing an uniform mask. Diff: --- gcc/tree-vect-slp.cc | 30 -- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 96f1992cfbff..3024b87a1f83 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3504,17 +3504,22 @@ vect_match_slp_patterns (slp_instance instance, vec_info *vinfo, } /* STMT_INFO is a store group of size GROUP_SIZE that we are considering - splitting into two, with the first split group having size NEW_GROUP_SIZE. + vectorizing with VECTYPE that might be NULL. MASKED_P indicates whether + the stores are masked. Return true if we could use IFN_STORE_LANES instead and if that appears to be the better approach. */ static bool vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info, + tree vectype, bool masked_p, unsigned int group_size, unsigned int new_group_size) { - tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); - tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); + if (!vectype) +{ + tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); + vectype = get_vectype_for_scalar_type (vinfo, scalar_type); +} if (!vectype) return false; /* Allow the split if one of the two new groups would operate on full @@ -3528,7 +3533,7 @@ vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info, if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS (vectype)) || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype))) return false; - return vect_store_lanes_supported (vectype, group_size, false) != IFN_LAST; + return vect_store_lanes_supported (vectype, group_size, masked_p) != IFN_LAST; } /* Analyze an SLP instance starting from a group of grouped stores. Call @@ -3973,6 +3978,10 @@ vect_build_slp_instance (vec_info *vinfo, else if (is_a (vinfo) && (group_size != 1 && i < group_size)) { + gcall *call = dyn_cast (stmt_info->stmt); + bool masked_p = call + && gimple_call_internal_p (call) + && internal_fn_mask_index (gimple_call_internal_fn (call)) != -1; /* There are targets that cannot do even/odd interleaving schemes so they absolutely need to use load/store-lanes. For now force single-lane SLP for them - they would be happy with @@ -3987,9 +3996,10 @@ vect_build_slp_instance (vec_info *vinfo, bool want_store_lanes = (! STMT_VINFO_GATHER_SCATTER_P (stmt_info) && ! STMT_VINFO_STRIDED_P (stmt_info) + && ! STMT_VINFO_SLP_VECT_ONLY (stmt_info) && compare_step_with_zero (vinfo, stmt_info) > 0 - && vect_slp_prefer_store_lanes_p (vinfo, stmt_info, -group_size, 1)); + && vect_slp_prefer_store_lanes_p (vinfo, stmt_info, NULL_TREE, +masked_p, group_size, 1)); if (want_store_lanes || force_single_lane) i = 1; @@ -4074,14 +4084,14 @@ vect_build_slp_instance (vec_info *vinfo, /* Now re-assess whether we want store lanes in case the discovery ended up producing all single-lane RHSs. */ - if (rhs_common_nlanes == 1 + if (! want_store_lanes + && rhs_common_nlanes == 1 && ! STMT_VINFO_GATHER_SCATTER_P (stmt_info) && ! STMT_VINFO_STRIDED_P (stmt_info) + && ! STMT_VINFO_SLP_VECT_ONLY (stmt_info) && compare_step_with_zero (vinfo, stmt_info) > 0 && (vect_store_lanes_supported (SLP_TREE_VECTYPE (rhs_nodes[0]), - group_size, - SLP_TREE_CHILDREN -
[gcc r15-4237] tree-optimization/117060 - fix oversight in vect_build_slp_tree_1
https://gcc.gnu.org/g:7ce2229d54d575d788b016f941aafd0464ea77f7 commit r15-4237-g7ce2229d54d575d788b016f941aafd0464ea77f7 Author: Richard Biener Date: Thu Oct 10 14:15:13 2024 +0200 tree-optimization/117060 - fix oversight in vect_build_slp_tree_1 We are failing to match call vs. non-call when dealing with matching loads or stores. PR tree-optimization/117060 * tree-vect-slp.cc (vect_build_slp_tree_1): When comparing calls also fail if the first isn't a call. * gfortran.dg/pr117060.f90: New testcase. Diff: --- gcc/testsuite/gfortran.dg/pr117060.f90 | 21 + gcc/tree-vect-slp.cc | 5 +++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/pr117060.f90 b/gcc/testsuite/gfortran.dg/pr117060.f90 new file mode 100644 index ..50004e1aaf3d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr117060.f90 @@ -0,0 +1,21 @@ +! { dg-do compile } +! { dg-options "-O2" } + +subroutine foo (out) + +implicit none + +real:: out(*) +integer :: i,k +real:: a(100) +real:: b(100) + +k = 0 +do i = 1, 10 + k = k + 1 + out(k) = a(i) + k = k + 1 + out(k) = sqrt((a(3*i)-b(4))**2 + (a(3*i+1)-b(4+1))**2) +end do + +end subroutine diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 8b53b0fdb16d..9bf6ae4ec8e0 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1367,8 +1367,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, && first_stmt_code != CFN_MASK_LOAD && first_stmt_code != CFN_MASK_STORE) { - if (!compatible_calls_p (as_a (stmts[0]->stmt), - call_stmt)) + if (!is_a (stmts[0]->stmt) + || !compatible_calls_p (as_a (stmts[0]->stmt), + call_stmt)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
[gcc r15-4235] Allow SLP store of mixed external and constant
https://gcc.gnu.org/g:b3221b838ee7ae7848e7194603acb18294b3 commit r15-4235-gb3221b838ee7ae7848e7194603acb18294b3 Author: Richard Biener Date: Wed Oct 9 15:31:59 2024 +0200 Allow SLP store of mixed external and constant vect_build_slp_tree_1 rejected this during SLP discovery because it ran into the rhs code comparison code for stores. The following skips that completely for loads and stores as those are handled later anyway. This needs a heuristic adjustment in vect_get_and_check_slp_defs to avoid fallout with regard to BB vectorization and splitting of a store group vs. demoting one operand to external. gcc.dg/Wstringop-overflow-47.c needs adjustment given we now have vast improvements for code generation. gcc.dg/strlenopt-32.c needs adjustment because the strlen pass doesn't handle _11 = {0, b_6(D)}; __builtin_memcpy (&a, "foo.bar", 8); MEM [(char *)&a + 3B] = _11; _9 = strlen (&a); I have opened PR117057 for this. * tree-vect-slp.cc (vect_build_slp_tree_1): Do not compare RHS codes for loads or stores. (vect_get_and_check_slp_defs): Only demote operand to external in case there is more than one operand. * gcc.dg/vect/slp-57.c: New testcase. * gcc.dg/Wstringop-overflow-47.c: Adjust. * gcc.dg/strlenopt-32.c: XFAIL parts. Diff: --- gcc/testsuite/gcc.dg/Wstringop-overflow-47.c | 6 +++--- gcc/testsuite/gcc.dg/strlenopt-32.c | 3 ++- gcc/testsuite/gcc.dg/vect/slp-57.c | 14 ++ gcc/tree-vect-slp.cc | 24 +++- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c index 9fb78e55046f..aa5402a060f3 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c @@ -31,15 +31,15 @@ void nowarn_c32 (char c) void warn_c32 (char c) { - extern char warn_a32[32]; // { dg-message "at offset (32|1) into destination object 'warn_a32' of size 32" "pr97027" } + extern char warn_a32[32]; // { dg-message "at offset (32|1|17) into destination object 'warn_a32' of size 32" "pr97027" } void *p = warn_a32 + 1; - *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|32 bytes) into a region of size (0|31)" "pr97027" } + *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|16 bytes|32 bytes) into a region of size (0|15|31)" "pr97027" } /* Verify a local variable too. */ char a32[32]; p = a32 + 1; - *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|32 bytes) into a region of size (0|31)" "pr97027" } + *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|16 bytes|32 bytes) into a region of size (0|15|31)" "pr97027" } sink (p); } diff --git a/gcc/testsuite/gcc.dg/strlenopt-32.c b/gcc/testsuite/gcc.dg/strlenopt-32.c index 4220314fb3f0..c53168570fdb 100644 --- a/gcc/testsuite/gcc.dg/strlenopt-32.c +++ b/gcc/testsuite/gcc.dg/strlenopt-32.c @@ -190,4 +190,5 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" } } */ +/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" { xfail vect_slp_v2qi_store_unalign } } } */ +/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen1" { target vect_slp_v2qi_store_unalign } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-57.c b/gcc/testsuite/gcc.dg/vect/slp-57.c new file mode 100644 index ..a35c4ef62030 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-57.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +int a[1024]; +void foo (int x) +{ + for (int i = 0; i < 1024; i += 2) +{ + a[i] = x; + a[i+1] = 1; +} +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 9bb765e2cbac..8b53b0fdb16d 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -905,7 +905,8 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, } if (is_a (vinfo) - && !oprnd_info->any_pattern) + && !oprnd_info->any_pattern + && number_of_oprnds > 1) { /* Now for commutative ops we should see whether we can make the other operand matching. */ @@ -1305,10 +1306,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, /* Mismatch. */ continue; } - if (first_stmt_code != rhs_code + if (!ldst_p + && first_stmt_code != rhs_code && alt_stmt_code == ERROR_MARK) alt_stmt_code = rhs_code; - if ((first_stmt_code != rhs_code + if ((!ldst_p + && first_stmt_code != rhs_code
[gcc] Created branch 'mikael/heads/inline_minmaxloc_v333' in namespace 'refs/users'
The branch 'mikael/heads/inline_minmaxloc_v333' was created in namespace 'refs/users' pointing to: 128c217eee0c... fortran: Evaluate once BACK argument of MINLOC/MAXLOC with
[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608]
https://gcc.gnu.org/g:d1f2bdaa6449030e8577c3904967bc43eef6b773 commit d1f2bdaa6449030e8577c3904967bc43eef6b773 Author: Mikael Morin Date: Fri Nov 17 19:04:19 2023 +0100 fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608] Enable generation of inline code for the MINLOC and MAXLOC intrinsics, if the ARRAY argument is of integral type and of any rank (only the rank 1 case was previously inlined), the DIM argument is a constant value and there is no MASK argument. The restriction to integral ARRAY and absent MASK limits the scope of the change to the cases where we generate single loop inline code. This change uses the existing scalarizer suport for reductions, that is arrays used in scalarization loops, where each element uses a nested scalarization loop to calculate its value. The nested loop (and respictively the nested scalarization chain) is created while walking the MINLOC/MAXLOC expression, it's setup automatically by the outer scalarizer, and gfc_conv_intrinsic_minmaxloc is changed to use it as a replacement for the local loop variable (respectively ARRAY scalarization chain) used in the non-reduction case (i.e. when DIM is absent). PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return true if DIM is constant, ARRAY is integral and MASK is absent. (walk_inline_intrinsic_minmaxloc): If DIM is present, walk ARRAY and move the dimension corresponding to DIM to a nested chain, keeping the rest of the dimensions as the returned scalarization chain. (gfc_conv_intrinsic_minmaxloc): When inside the scalarization loops, proceed with inline code generation If DIM is present. If DIM is present, skip result array creation and final initialization from individual result local variables. If DIM is present and ARRAY has rank greater than 1, use the nested loop initialized by the scalarizer instead of the local one, use 1 as scalarization dimension, and evaluate ARRAY using the inherited scalarization chain instead of creating a local one by walking the expression. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_1.f90: Also accept the error message generated by the scalarizer in case the function call is implemented through inline code. * gfortran.dg/maxloc_bounds_2.f90: Likewise. * gfortran.dg/maxloc_bounds_3.f90: Likewise. * gfortran.dg/minmaxloc_19.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc| 227 ++ gcc/testsuite/gfortran.dg/maxloc_bounds_1.f90 | 4 +- gcc/testsuite/gfortran.dg/maxloc_bounds_2.f90 | 4 +- gcc/testsuite/gfortran.dg/maxloc_bounds_3.f90 | 4 +- gcc/testsuite/gfortran.dg/minmaxloc_19.f90| 182 + 5 files changed, 343 insertions(+), 78 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index a282ae1c0903..dedb49b4a64e 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5472,12 +5472,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) tree lab1, lab2; tree b_if, b_else; tree back; - gfc_loopinfo loop; - gfc_actual_arglist *actual; - gfc_ss *arrayss; - gfc_ss *maskss; + gfc_loopinfo loop, *ploop; + gfc_actual_arglist *actual, *array_arg, *dim_arg, *mask_arg, *kind_arg; + gfc_actual_arglist *back_arg; + gfc_ss *arrayss = nullptr; + gfc_ss *maskss = nullptr; gfc_se arrayse; gfc_se maskse; + gfc_se *base_se; gfc_expr *arrayexpr; gfc_expr *maskexpr; gfc_expr *backexpr; @@ -5489,6 +5491,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) bool optional_mask; actual = expr->value.function.actual; + array_arg = actual; + dim_arg = array_arg->next; + mask_arg = dim_arg->next; + kind_arg = mask_arg->next; + back_arg = kind_arg->next; + + bool dim_present = dim_arg->expr != nullptr; + bool nested_loop = dim_present && expr->rank > 0; /* The last argument, BACK, is passed by value. Ensure that by setting its name to %VAL. */ @@ -5502,11 +5512,15 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) { if (se->ss->info->useflags) { - /* The inline implementation of MINLOC/MAXLOC has been generated -before, out of the scalarization loop; now we can just use the -result. */ - gfc_conv_tmp_array_ref (se); - return; + if (!dim_present || !gfc_inline_intrinsic_function_p (expr)) + { + /* The code generating and initializing the result array has been +generated already before the scalari
[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608]
https://gcc.gnu.org/g:25e8f732e34d5a41df821c43f4372567fac7b787 commit 25e8f732e34d5a41df821c43f4372567fac7b787 Author: Mikael Morin Date: Thu Aug 8 12:23:16 2024 +0200 fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608] Enable generation of inline MINLOC/MAXLOC code in the cases where DIM is a constant, and either ARRAY is of floating point or MASK is an array. Those cases are the remaining bits to fully support inlining of non-CHARACTER MINLOC/MAXLOC with DIM. They are treated together because they generate similar code, the NANs for REAL types being handled a bit like a second level of masking. These are the cases for which we generate two loops. This change affects the code generating the second loop, that was previously accessible only in cases ARRAY had rank 1. The main changes are in gfc_conv_intrinsic_minmaxloc the replacement of the locally initialized scalarization loop with the one provided and previously initialized by the scalarizer. Same goes for the locally initialized MASK scalarizer chain. As this is enabling the code generating a second loop in a context of reduction and nested loops, care is taken not to advance parent scalarization chains twice. The scalarization chain element(s) for an array MASK are inserted in the chain at a different place from that of a scalar MASK. This is done on purpose to match the code consuming the chains which are in different places for scalar and array MASK. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE for MINLOC/MAXLOC with constant DIM and non-scalar MASK. (walk_inline_intrinsic_minmaxloc): Walk MASK and if it's an array add the chain obtained before that of ARRAY. (gfc_conv_intrinsic_minmaxloc): Use the nested loop if there is one. To evaluate MASK (respectively ARRAY in the second loop), inherit the scalarizer chain if in a nested loop, otherwise keep using the chain obtained by walking MASK (respectively ARRAY). If there is a nested loop, avoid advancing the parent scalarization chain a second time in the second loop. gcc/testsuite/ChangeLog: * gfortran.dg/minmaxloc_21.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 94 ++--- gcc/testsuite/gfortran.dg/minmaxloc_21.f90 | 572 + 2 files changed, 623 insertions(+), 43 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index a146d7263c88..4beead175b77 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5477,6 +5477,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_actual_arglist *back_arg; gfc_ss *arrayss = nullptr; gfc_ss *maskss = nullptr; + gfc_ss *orig_ss = nullptr; gfc_se arrayse; gfc_se maskse; gfc_se nested_se; @@ -5711,6 +5712,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) if (nested_loop) { ploop = enter_nested_loop (&nested_se); + orig_ss = nested_se.ss; ploop->temp_dim = 1; } else @@ -5785,9 +5787,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) } else { - gcc_assert (!nested_loop); - for (int i = 0; i < loop.dimen; i++) - gfc_add_modify (&loop.pre, pos[i], gfc_index_zero_node); + for (int i = 0; i < ploop->dimen; i++) + gfc_add_modify (&ploop->pre, pos[i], gfc_index_zero_node); lab1 = gfc_build_label_decl (NULL_TREE); TREE_USED (lab1) = 1; lab2 = gfc_build_label_decl (NULL_TREE); @@ -5818,10 +5819,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) /* If we have a mask, only check this element if the mask is set. */ if (maskexpr && maskexpr->rank > 0) { - gcc_assert (!nested_loop); - gfc_init_se (&maskse, NULL); - gfc_copy_loopinfo_to_se (&maskse, &loop); - maskse.ss = maskss; + gfc_init_se (&maskse, base_se); + gfc_copy_loopinfo_to_se (&maskse, ploop); + if (!nested_loop) + maskse.ss = maskss; gfc_conv_expr_val (&maskse, maskexpr); gfc_add_block_to_block (&body, &maskse.pre); @@ -5849,13 +5850,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) stmtblock_t ifblock2; tree ifbody2; - gcc_assert (!nested_loop); - gfc_start_block (&ifblock2); - for (int i = 0; i < loop.dimen; i++) + for (int i = 0; i < ploop->dimen; i++) { tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[i]), -loop.loopvar[i], offset[i]); +ploop->loopvar[i
[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608]
https://gcc.gnu.org/g:8674757e6e99a36dfc211aa66c19dbc0affbf8a4 commit 8674757e6e99a36dfc211aa66c19dbc0affbf8a4 Author: Mikael Morin Date: Thu Nov 16 10:00:26 2023 +0100 fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608] Add the tests covering the cases for which the following patches will implement inline expansion of MINLOC and MAXLOC. Those are cases where the DIM argument is a constant value, and the ARRAY argument has rank greater than 1. PR fortran/90608 gcc/testsuite/ChangeLog: * gfortran.dg/ieee/maxloc_nan_2.f90: New test. * gfortran.dg/ieee/minloc_nan_2.f90: New test. * gfortran.dg/maxloc_with_dim_1.f90: New test. * gfortran.dg/maxloc_with_dim_and_mask_1.f90: New test. * gfortran.dg/minloc_with_dim_1.f90: New test. * gfortran.dg/minloc_with_dim_and_mask_1.f90: New test. Diff: --- gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90| 64 +++ gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90| 64 +++ gcc/testsuite/gfortran.dg/maxloc_with_dim_1.f90| 201 + .../gfortran.dg/maxloc_with_dim_and_mask_1.f90 | 452 + gcc/testsuite/gfortran.dg/minloc_with_dim_1.f90| 201 + .../gfortran.dg/minloc_with_dim_and_mask_1.f90 | 452 + 6 files changed, 1434 insertions(+) diff --git a/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 new file mode 100644 index ..788903506350 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 @@ -0,0 +1,64 @@ +! { dg-do run } +! +! PR fortran/90608 +! Check the correct behaviour of the inline maxloc implementation, +! when the dim argument is present. + +program p + implicit none + call check_without_mask + call check_with_mask +contains + subroutine check_without_mask() +use, intrinsic :: ieee_arithmetic +real, allocatable :: a(:,:,:) +real :: nan +integer, allocatable :: r(:,:) +if (.not. ieee_support_nan(nan)) return +nan = ieee_value(nan, ieee_quiet_nan) +allocate(a(3,4,5), source = nan) +r = maxloc(a, dim=1) +if (any(shape(r) /= (/ 4, 5 /))) stop 21 +if (any(r /= 1)) stop 22 +r = maxloc(a, dim=2) +if (any(shape(r) /= (/ 3, 5 /))) stop 23 +if (any(r /= 1)) stop 24 +r = maxloc(a, dim=3) +if (any(shape(r) /= (/ 3, 4 /))) stop 25 +if (any(r /= 1)) stop 26 + end subroutine + subroutine check_with_mask() +use, intrinsic :: ieee_arithmetic +real, allocatable :: a(:,:,:) +logical, allocatable :: m(:,:,:) +real :: nan +integer, allocatable :: r(:,:) +if (.not. ieee_support_nan(nan)) return +nan = ieee_value(nan, ieee_quiet_nan) +allocate(a(2,3,4), source = nan) +allocate(m(2,3,4)) +m(:,:,:) = reshape((/ .false., .false., .true. , .true. , & + .false., .true. , .false., .false., & + .false., .true. , .true. , .false., & + .true. , .true. , .true. , .false., & + .false., .true. , .true. , .false., & + .false., .true. , .false., .false. /), shape(m)) +r = maxloc(a, dim = 1, mask = m) +if (any(shape(r) /= (/ 3, 4 /))) stop 51 +if (any(r /= reshape((/ 0, 1, 2, & +0, 2, 1, & +1, 1, 2, & +1, 2, 0 /), (/ 3, 4 / stop 52 +r = maxloc(a, dim = 2, mask = m) +if (any(shape(r) /= (/ 2, 4 /))) stop 53 +if (any(r /= reshape((/ 2, 2, & +3, 2, & +1, 1, & +1, 2 /), (/ 2, 4 / stop 54 +r = maxloc(a, dim = 3, mask = m) +if (any(shape(r) /= (/ 2, 3 /))) stop 55 +if (any(r /= reshape((/ 3, 3, & +1, 1, & +2, 1 /), (/ 2, 3 / stop 56 + end subroutine +end program p diff --git a/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 new file mode 100644 index ..37724d8202de --- /dev/null +++ b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 @@ -0,0 +1,64 @@ +! { dg-do run } +! +! PR fortran/90608 +! Check the correct behaviour of the inline minloc implementation, +! when the dim argument is present. + +program p + implicit none + call check_without_mask + call check_with_mask +contains + subroutine check_without_mask() +use, intrinsic :: ieee_arithmetic +real, allocatable :: a(:,:,:) +real :: nan +integer, allocatable :: r(:,:) +if (.not. ieee_support_nan(nan)) return +nan = ieee_value(nan, ieee_quiet_nan) +allocate(a(3,4,5), source = nan) +r = minloc(a, dim=1) +if (any(shape(r) /= (/ 4, 5 /))) stop 21 +if (any(r /= 1)) stop 22 +r = minloc(a, dim=2) +if (any(shape(r) /= (/ 3, 5 /))) stop 23 +
[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Check MASK directly instead of its scalarization chain
https://gcc.gnu.org/g:fe0feb4163558087c9dd02f0d2c909e7512a9ab4 commit fe0feb4163558087c9dd02f0d2c909e7512a9ab4 Author: Mikael Morin Date: Thu Sep 12 16:56:39 2024 +0200 fortran: Check MASK directly instead of its scalarization chain Update the conditions used by the inline MINLOC/MAXLOC code generation function to check directly the properties of MASK instead of the variable holding its scalarization chain. The inline implementation of MINLOC/MAXLOC in gfc_conv_intrinsic_minmaxloc uses several conditions checking the presence of a scalarization chain for MASK, which means that the argument is present and non-scalar. The next patch will allow inlining MINLOC/MAXLOC with DIM and MASK, and in that case the scalarization chain for MASK is initialized elsewhere, so the variable usually holding it in the function is not used, and the conditions won't work in that case. This change updates the conditions to check directly the properties of MASK so that they work even if the scalarization chain variable is not used. gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Use conditionals based on the MASK expression rather than on its scalarization chains. Diff: --- gcc/fortran/trans-intrinsic.cc | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index cd6aca51f218..a146d7263c88 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5746,7 +5746,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gcc_assert (reduction_dimensions == ploop->dimen); - if (nonempty == NULL && maskss == NULL) + if (nonempty == NULL && !(maskexpr && maskexpr->rank > 0)) { nonempty = logical_true_node; @@ -5816,7 +5816,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_start_scalarized_body (ploop, &body); /* If we have a mask, only check this element if the mask is set. */ - if (maskss) + if (maskexpr && maskexpr->rank > 0) { gcc_assert (!nested_loop); gfc_init_se (&maskse, NULL); @@ -5921,7 +5921,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) } gfc_add_expr_to_block (&block, ifbody); - if (maskss) + if (maskexpr && maskexpr->rank > 0) { /* We enclose the above in if (mask) {...}. If the mask is an optional argument, generate IF (.NOT. PRESENT(MASK) @@ -5972,7 +5972,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_expr_to_block (outer_block, build1_v (LABEL_EXPR, lab1)); /* If we have a mask, only check this element if the mask is set. */ - if (maskss) + if (maskexpr && maskexpr->rank > 0) { gfc_init_se (&maskse, NULL); gfc_copy_loopinfo_to_se (&maskse, &loop); @@ -6038,7 +6038,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_expr_to_block (&block, tmp); - if (maskss) + if (maskexpr && maskexpr->rank > 0) { /* We enclose the above in if (mask) {...}. If the mask is an optional argument, generate IF (.NOT. PRESENT(MASK) @@ -6063,7 +6063,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_expr_to_block (&loop.pre, build1_v (LABEL_EXPR, lab2)); /* For a scalar mask, enclose the loop in an if statement. */ - if (maskexpr && maskss == NULL) + if (maskexpr && maskexpr->rank == 0) { tree ifmask;
[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]
https://gcc.gnu.org/g:b696e5de16f5f0e1403a03e27e0a2e159a37cf83 commit b696e5de16f5f0e1403a03e27e0a2e159a37cf83 Author: Mikael Morin Date: Thu Aug 8 13:44:16 2024 +0200 fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608] Enable the generation of inline code for MINLOC/MAXLOC when argument ARRAY is of integral type, DIM is a constant, and MASK is scalar (only absent MASK or rank 1 ARRAY were inlined before). Scalar masks are implemented with a wrapping condition around the code one would generate if MASK wasn't present, so they are easy to support once inline code without MASK is working. With this change, there are both expressions evaluated inside the nested loop (ARRAY, and in the future MASK if non-scalar) and expressions evaluated outside of it (MASK if scalar). Both have to advance the scalarization chain passed in argument SE to gfc_conv_intrinsic_minmaxloc as they are evaluated, but expressions evaluated from within the nested loop additionally have to advance the nested scalarization chain of the reduction loop. This is normally handled transparently through the inheritance that is defined when initializing gfc_se structs, but there has to be some variable to inherit from, and there is a single one, SE. This variable is kept as base for out of nested loop expressions (scalar MASK), and this change introduces a new variable to hold the current advance of the nested loop scalarization chain and serve as inheritance base to evaluate nested loop expressions (just ARRAY for now, additionally non-scalar MASK later). PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE if MASK is scalar. (walk_inline_intrinsic_minmaxloc): Append to the scalarization chain a scalar element for MASK if it's present. (gfc_conv_intrinsic_minmaxloc): Use a local gfc_se struct to serve as base for all the expressions evaluated in the nested loop. To evaluate MASK in a nested loop, enable usage of the scalarizer and set the current scalarization chain element to use to that of the original passed in SE argument. And use the nested loop from the scalarizer instead of the local loop in that case. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_8.f90: Accept the error message generated by the scalarizer in case the MAXLOC intrinsic call is implemented through inline code. * gfortran.dg/minmaxloc_20.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc| 27 ++-- gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90 | 4 +- gcc/testsuite/gfortran.dg/minmaxloc_20.f90| 182 ++ 3 files changed, 201 insertions(+), 12 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index dedb49b4a64e..cd6aca51f218 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5479,6 +5479,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_ss *maskss = nullptr; gfc_se arrayse; gfc_se maskse; + gfc_se nested_se; gfc_se *base_se; gfc_expr *arrayexpr; gfc_expr *maskexpr; @@ -5616,7 +5617,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_block_to_block (&se->pre, &backse.post); if (nested_loop) -base_se = se; +{ + gfc_init_se (&nested_se, se); + base_se = &nested_se; +} else { /* Walk the arguments. */ @@ -5706,7 +5710,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) if (nested_loop) { - ploop = enter_nested_loop (se); + ploop = enter_nested_loop (&nested_se); ploop->temp_dim = 1; } else @@ -6063,21 +6067,19 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) { tree ifmask; - gcc_assert (!nested_loop); - - gfc_init_se (&maskse, NULL); + gfc_init_se (&maskse, nested_loop ? se : nullptr); gfc_conv_expr_val (&maskse, maskexpr); gfc_add_block_to_block (&se->pre, &maskse.pre); gfc_init_block (&block); - gfc_add_block_to_block (&block, &loop.pre); - gfc_add_block_to_block (&block, &loop.post); + gfc_add_block_to_block (&block, &ploop->pre); + gfc_add_block_to_block (&block, &ploop->post); tmp = gfc_finish_block (&block); /* For the else part of the scalar mask, just initialize the pos variable the same way as above. */ gfc_init_block (&elseblock); - for (int i = 0; i < loop.dimen; i++) + for (int i = 0; i < ploop->dimen; i++) gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node); elsetmp = gfc_finish_bl
[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608]
https://gcc.gnu.org/g:128c217eee0ccf363433684edb754db5d5aedc08 commit 128c217eee0ccf363433684edb754db5d5aedc08 Author: Mikael Morin Date: Thu Oct 3 15:57:50 2024 +0200 fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608] Evaluate the BACK argument of MINLOC/MAXLOC once before the scalarization loops in the case where the DIM argument is present. This is a follow-up to r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3 which added knowledge of BACK to the scalarizer, to r15-2701-ga10436a8404ad2f0cc5aa4d6a0cc850abe5ef49e which removed it to handle it out of scalarization instead, and to more immediate previous patches that added support for MINLOC/MAXLOC with DIM. The recent support for MINLOC/MAXLOC with DIM introduced nested loops, which made the evaluation of BACK (removed from the scalarizer knowledge by previous patches) wrapped in a loop, so possibly executed more than once. This change adds BACK to the scalarization chain if MINLOC/MAXLOC will use nested loops, so that it is evaluated by the scalarizer only once before the outermost loop in that case. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (walk_inline_intrinsic_minmaxloc): Add a scalar element for BACK as first item of the list if BACK is present and there will be nested loops. (gfc_conv_intrinsic_minmaxloc): Evaluate BACK using an inherited scalarization chain if there is a nested loop. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_8.f90: New test. * gfortran.dg/minloc_9.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 20 +- gcc/testsuite/gfortran.dg/maxloc_8.f90 | 349 + gcc/testsuite/gfortran.dg/minloc_9.f90 | 349 + 3 files changed, 716 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 90dcb759b378..5c25eedcc4f7 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5594,7 +5594,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) && maskexpr->symtree->n.sym->attr.optional; backexpr = back_arg->expr; - gfc_init_se (&backse, NULL); + gfc_init_se (&backse, nested_loop ? se : nullptr); if (backexpr == nullptr) back = logical_false_node; else if (maybe_absent_optional_variable (backexpr)) @@ -11885,10 +11885,13 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr *expr ATTRIBUTE_UNUSED) gfc_actual_arglist *array_arg = expr->value.function.actual; gfc_actual_arglist *dim_arg = array_arg->next; gfc_actual_arglist *mask_arg = dim_arg->next; + gfc_actual_arglist *kind_arg = mask_arg->next; + gfc_actual_arglist *back_arg = kind_arg->next; gfc_expr *array = array_arg->expr; gfc_expr *dim = dim_arg->expr; gfc_expr *mask = mask_arg->expr; + gfc_expr *back = back_arg->expr; if (dim == nullptr) return gfc_get_array_ss (ss, expr, 1, GFC_SS_INTRINSIC); @@ -11914,7 +11917,20 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr *expr ATTRIBUTE_UNUSED) chain. */ int dim_val = mpz_get_si (dim->value.integer) - 1; gfc_ss *tail = nest_loop_dimension (tmp_ss, dim_val); - tail->next = ss; + + if (back && array->rank > 1) +{ + /* If there are nested scalarization loops, include BACK in the +scalarization chains to avoid evaluating it multiple times in a loop. +Otherwise, prefer to handle it outside of scalarization. */ + gfc_ss *back_ss = gfc_get_scalar_ss (ss, back); + back_ss->info->type = GFC_SS_REFERENCE; + back_ss->info->can_be_null_ref = true; + + tail->next = back_ss; +} + else +tail->next = ss; if (scalar_mask) tmp_ss = gfc_get_scalar_ss (tmp_ss, mask); diff --git a/gcc/testsuite/gfortran.dg/maxloc_8.f90 b/gcc/testsuite/gfortran.dg/maxloc_8.f90 new file mode 100644 index ..21bc4591235a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/maxloc_8.f90 @@ -0,0 +1,349 @@ +! { dg-do run } +! +! PR fortran/90608 +! Check that the evaluation of MAXLOC's BACK argument is made only once +! before the scalarization loops, when the DIM argument is present. + +program p + implicit none + integer, parameter :: data60(*) = (/ 7, 4, 5, 3, 9, 0, 6, 4, 5, 5, & + 8, 2, 6, 7, 8, 7, 4, 5, 3, 9, & + 0, 6, 4, 5, 5, 8, 2, 6, 7, 8, & + 7, 4, 5, 3, 9, 0, 6, 4, 5, 5, & + 8, 2, 6, 7, 8, 7, 4, 5, 3, 9, & + 0, 6, 4, 5, 5, 8, 2, 6, 7, 8 /) + logical, parameter :: mask60(*) = (/ .true. , .false., .false., .false., & + .true. , .false., .true. , .false.,
[gcc(refs/users/mikael/heads/inline_minmaxloc_v333)] fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only
https://gcc.gnu.org/g:de0b77fb688595f13a2f5740aa720718c70ac456 commit de0b77fb688595f13a2f5740aa720718c70ac456 Author: Mikael Morin Date: Sat Nov 18 20:54:20 2023 +0100 fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only In the function generating inline code to implement MINLOC and MAXLOC, only get the size of ARRAY along DIM if DIM is present to check for emptyness. The check for ARRAY emptyness had been checking the size of the full array, which is correct for MINLOC and MAXLOC without DIM. But if DIM is present, the reduction is along DIM only so the check for emptyness should consider that dimension only as well. This sounds like a correctness issue, but fortunately the cases where it makes a difference are cases where ARRAY is empty, so even if the MINLOC or MAXLOC calculated value is wrong, it's wrapped in a zero iteration loop, and the wrong values are not actually used. In the end this just avoids unnecessary calculations. A previous version of this patch didn't support non-constant DIM with rank 1 ARRAY. The new testcase checks that that case is supported. gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Only get the size along DIM instead of the full size if DIM is present. gcc/testsuite/ChangeLog: * gfortran.dg/minmaxloc_22.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 19 ++- gcc/testsuite/gfortran.dg/minmaxloc_22.f90 | 24 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 4beead175b77..90dcb759b378 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5641,7 +5641,24 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) if (!(maskexpr && maskexpr->rank > 0)) { mpz_t asize; - if (gfc_array_size (arrayexpr, &asize)) + bool reduction_size_known; + + if (dim_present) + { + int reduction_dim; + if (dim_arg->expr->expr_type == EXPR_CONSTANT) + reduction_dim = mpz_get_si (dim_arg->expr->value.integer) - 1; + else if (arrayexpr->rank == 1) + reduction_dim = 0; + else + gcc_unreachable (); + reduction_size_known = gfc_array_dimen_size (arrayexpr, reduction_dim, + &asize); + } + else + reduction_size_known = gfc_array_size (arrayexpr, &asize); + + if (reduction_size_known) { nonempty = gfc_conv_mpz_to_tree (asize, gfc_index_integer_kind); mpz_clear (asize); diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 new file mode 100644 index ..4f323ec5daba --- /dev/null +++ b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 @@ -0,0 +1,24 @@ +! { dg-do compile } +! +! Check that the inline code generated for MINLOC and MAXLOC supports +! a non-constant DIM argument if ARRAY has rank 1. + +program p + implicit none + integer, parameter :: n = 5 + integer :: a(n) + print *, f(a, 1) +contains + function f(a, d) +integer :: a(n) +integer :: d +integer :: f +f = minloc(a, dim=d) + end function + function g(a, d) +integer :: a(n) +integer :: d +integer :: g +g = maxloc(a, dim=d) + end function +end program p
[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608]
https://gcc.gnu.org/g:54e60dc0d4959bf51b24ac1dc9dfcf104876820b commit 54e60dc0d4959bf51b24ac1dc9dfcf104876820b Author: Mikael Morin Date: Fri Nov 17 19:04:19 2023 +0100 fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608] Enable generation of inline code for the MINLOC and MAXLOC intrinsics, if the ARRAY argument is of integral type and of any rank (only the rank 1 case was previously inlined), the DIM argument is a constant value and there is no MASK argument. The restriction to integral ARRAY and absent MASK limits the scope of the change to the cases where we generate single loop inline code. This change uses the existing scalarizer suport for reductions, that is arrays used in scalarization loops, where each element uses a nested scalarization loop to calculate its value. The nested loop (and respictively the nested scalarization chain) is created while walking the MINLOC/MAXLOC expression, it's setup automatically by the outer scalarizer, and gfc_conv_intrinsic_minmaxloc is changed to use it as a replacement for the local loop variable (respectively ARRAY scalarization chain) used in the non-reduction case (i.e. when DIM is absent). PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return true if DIM is constant, ARRAY is integral and MASK is absent. (walk_inline_intrinsic_minmaxloc): If DIM is present, walk ARRAY and move the dimension corresponding to DIM to a nested chain, keeping the rest of the dimensions as the returned scalarization chain. (gfc_conv_intrinsic_minmaxloc): When inside the scalarization loops, proceed with inline code generation If DIM is present. If DIM is present, skip result array creation and final initialization from individual result local variables. If DIM is present and ARRAY has rank greater than 1, use the nested loop initialized by the scalarizer instead of the local one, use 1 as scalarization dimension, and evaluate ARRAY using the inherited scalarization chain instead of creating a local one by walking the expression. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_1.f90: Also accept the error message generated by the scalarizer in case the function call is implemented through inline code. * gfortran.dg/maxloc_bounds_2.f90: Likewise. * gfortran.dg/maxloc_bounds_3.f90: Likewise. * gfortran.dg/minmaxloc_19.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc| 227 ++ gcc/testsuite/gfortran.dg/maxloc_bounds_1.f90 | 4 +- gcc/testsuite/gfortran.dg/maxloc_bounds_2.f90 | 4 +- gcc/testsuite/gfortran.dg/maxloc_bounds_3.f90 | 4 +- gcc/testsuite/gfortran.dg/minmaxloc_19.f90| 182 + 5 files changed, 343 insertions(+), 78 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index a282ae1c0903..dedb49b4a64e 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5472,12 +5472,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) tree lab1, lab2; tree b_if, b_else; tree back; - gfc_loopinfo loop; - gfc_actual_arglist *actual; - gfc_ss *arrayss; - gfc_ss *maskss; + gfc_loopinfo loop, *ploop; + gfc_actual_arglist *actual, *array_arg, *dim_arg, *mask_arg, *kind_arg; + gfc_actual_arglist *back_arg; + gfc_ss *arrayss = nullptr; + gfc_ss *maskss = nullptr; gfc_se arrayse; gfc_se maskse; + gfc_se *base_se; gfc_expr *arrayexpr; gfc_expr *maskexpr; gfc_expr *backexpr; @@ -5489,6 +5491,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) bool optional_mask; actual = expr->value.function.actual; + array_arg = actual; + dim_arg = array_arg->next; + mask_arg = dim_arg->next; + kind_arg = mask_arg->next; + back_arg = kind_arg->next; + + bool dim_present = dim_arg->expr != nullptr; + bool nested_loop = dim_present && expr->rank > 0; /* The last argument, BACK, is passed by value. Ensure that by setting its name to %VAL. */ @@ -5502,11 +5512,15 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) { if (se->ss->info->useflags) { - /* The inline implementation of MINLOC/MAXLOC has been generated -before, out of the scalarization loop; now we can just use the -result. */ - gfc_conv_tmp_array_ref (se); - return; + if (!dim_present || !gfc_inline_intrinsic_function_p (expr)) + { + /* The code generating and initializing the result array has been +generated already before the scalari
[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608]
https://gcc.gnu.org/g:e714901ed532aaae8248e7e2e98fddcb7c8bfe75 commit e714901ed532aaae8248e7e2e98fddcb7c8bfe75 Author: Mikael Morin Date: Thu Nov 16 10:00:26 2023 +0100 fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608] Add the tests covering the cases for which the following patches will implement inline expansion of MINLOC and MAXLOC. Those are cases where the DIM argument is a constant value, and the ARRAY argument has rank greater than 1. PR fortran/90608 gcc/testsuite/ChangeLog: * gfortran.dg/ieee/maxloc_nan_2.f90: New test. * gfortran.dg/ieee/minloc_nan_2.f90: New test. * gfortran.dg/maxloc_with_dim_1.f90: New test. * gfortran.dg/maxloc_with_dim_and_mask_1.f90: New test. * gfortran.dg/minloc_with_dim_1.f90: New test. * gfortran.dg/minloc_with_dim_and_mask_1.f90: New test. Diff: --- gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90| 64 +++ gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90| 64 +++ gcc/testsuite/gfortran.dg/maxloc_with_dim_1.f90| 201 + .../gfortran.dg/maxloc_with_dim_and_mask_1.f90 | 452 + gcc/testsuite/gfortran.dg/minloc_with_dim_1.f90| 201 + .../gfortran.dg/minloc_with_dim_and_mask_1.f90 | 452 + 6 files changed, 1434 insertions(+) diff --git a/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 new file mode 100644 index ..4d73431f8c23 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 @@ -0,0 +1,64 @@ +! { dg-do run } +! +! PR fortran/90608 +! Check the correct behaviour of the inline maxloc implementation, +! when the dim argument is present. + +program p + implicit none + call check_without_mask + call check_with_mask +contains + subroutine check_without_mask() +use, intrinsic :: ieee_arithmetic +real, allocatable :: a(:,:,:) +real :: nan +integer, allocatable :: r(:,:) +if (.not. ieee_support_nan(nan)) return +nan = ieee_value(nan, ieee_quiet_nan) +allocate(a(3,4,5), source = nan) +r = maxloc(a, dim=1) +if (any(shape(r) /= (/ 4, 5 /))) stop 21 +if (any(r /= 1)) stop 22 +r = maxloc(a, dim=2) +if (any(shape(r) /= (/ 3, 5 /))) stop 23 +if (any(r /= 1)) stop 24 +r = maxloc(a, dim=3) +if (any(shape(r) /= (/ 3, 4 /))) stop 25 +if (any(r /= 1)) stop 26 + end subroutine + subroutine check_with_mask() +real, allocatable :: a(:,:,:) +logical, allocatable :: m(:,:,:) +real :: nan +integer, allocatable :: r(:,:) +if (.not. ieee_support_nan(nan)) return +nan = ieee_value(nan, ieee_quiet_nan) +allocate(a(2,3,4), source = nan) +allocate(m(2,3,4)) +m(:,:,:) = reshape((/ .false., .false., .true. , .true. , & + .false., .true. , .false., .false., & + .false., .true. , .true. , .false., & + .true. , .true. , .true. , .false., & + .false., .true. , .true. , .false., & + .false., .true. , .false., .false. /), shape(m)) +r = maxloc(a, dim = 1, mask = m) +if (any(shape(r) /= (/ 3, 4 /))) stop 51 +if (any(r /= reshape((/ 0, 1, 2, & +0, 2, 1, & +1, 1, 2, & +1, 2, 0 /), (/ 3, 4 / stop 52 +r = maxloc(a, dim = 2, mask = m) +if (any(shape(r) /= (/ 2, 4 /))) stop 53 +if (any(r /= reshape((/ 2, 2, & +3, 2, & +1, 1, & +1, 2 /), (/ 2, 4 / stop 54 +r = maxloc(a, dim = 3, mask = m) +if (any(shape(r) /= (/ 2, 3 /))) stop 55 +if (any(r /= reshape((/ 3, 3, & +1, 1, & +2, 1 /), (/ 2, 3 / stop 56 + end subroutine +end program p + diff --git a/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 new file mode 100644 index ..311526484fc8 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 @@ -0,0 +1,64 @@ +! { dg-do run } +! +! PR fortran/90608 +! Check the correct behaviour of the inline minloc implementation, +! when the dim argument is present. + +program p + implicit none + call check_without_mask + call check_with_mask +contains + subroutine check_without_mask() +use, intrinsic :: ieee_arithmetic +real, allocatable :: a(:,:,:) +real :: nan +integer, allocatable :: r(:,:) +if (.not. ieee_support_nan(nan)) return +nan = ieee_value(nan, ieee_quiet_nan) +allocate(a(3,4,5), source = nan) +r = minloc(a, dim=1) +if (any(shape(r) /= (/ 4, 5 /))) stop 21 +if (any(r /= 1)) stop 22 +r = minloc(a, dim=2) +if (any(shape(r) /= (/ 3, 5 /))) stop 23 +if (any(r /= 1)) stop 24 +r = mi
[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608]
https://gcc.gnu.org/g:32e9e7020eb681eab7351bdd8db31b9068186755 commit 32e9e7020eb681eab7351bdd8db31b9068186755 Author: Mikael Morin Date: Thu Aug 8 12:23:16 2024 +0200 fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608] Enable generation of inline MINLOC/MAXLOC code in the cases where DIM is a constant, and either ARRAY is of floating point or MASK is an array. Those cases are the remaining bits to fully support inlining of non-CHARACTER MINLOC/MAXLOC with DIM. They are treated together because they generate similar code, the NANs for REAL types being handled a bit like a second level of masking. These are the cases for which we generate two loops. This change affects the code generating the second loop, that was previously accessible only in cases ARRAY had rank 1. The main changes are in gfc_conv_intrinsic_minmaxloc the replacement of the locally initialized scalarization loop with the one provided and previously initialized by the scalarizer. Same goes for the locally initialized MASK scalarizer chain. As this is enabling the code generating a second loop in a context of reduction and nested loops, care is taken not to advance parent scalarization chains twice. The scalarization chain element(s) for an array MASK are inserted in the chain at a different place from that of a scalar MASK. This is done on purpose to match the code consuming the chains which are in different places for scalar and array MASK. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE for MINLOC/MAXLOC with constant DIM and non-scalar MASK. (walk_inline_intrinsic_minmaxloc): Walk MASK and if it's an array add the chain obtained before that of ARRAY. (gfc_conv_intrinsic_minmaxloc): Use the nested loop if there is one. To evaluate MASK (respectively ARRAY in the second loop), inherit the scalarizer chain if in a nested loop, otherwise keep using the chain obtained by walking MASK (respectively ARRAY). If there is a nested loop, avoid advancing the parent scalarization chain a second time in the second loop. gcc/testsuite/ChangeLog: * gfortran.dg/minmaxloc_21.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 94 ++--- gcc/testsuite/gfortran.dg/minmaxloc_21.f90 | 572 + 2 files changed, 623 insertions(+), 43 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index a146d7263c88..4beead175b77 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5477,6 +5477,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_actual_arglist *back_arg; gfc_ss *arrayss = nullptr; gfc_ss *maskss = nullptr; + gfc_ss *orig_ss = nullptr; gfc_se arrayse; gfc_se maskse; gfc_se nested_se; @@ -5711,6 +5712,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) if (nested_loop) { ploop = enter_nested_loop (&nested_se); + orig_ss = nested_se.ss; ploop->temp_dim = 1; } else @@ -5785,9 +5787,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) } else { - gcc_assert (!nested_loop); - for (int i = 0; i < loop.dimen; i++) - gfc_add_modify (&loop.pre, pos[i], gfc_index_zero_node); + for (int i = 0; i < ploop->dimen; i++) + gfc_add_modify (&ploop->pre, pos[i], gfc_index_zero_node); lab1 = gfc_build_label_decl (NULL_TREE); TREE_USED (lab1) = 1; lab2 = gfc_build_label_decl (NULL_TREE); @@ -5818,10 +5819,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) /* If we have a mask, only check this element if the mask is set. */ if (maskexpr && maskexpr->rank > 0) { - gcc_assert (!nested_loop); - gfc_init_se (&maskse, NULL); - gfc_copy_loopinfo_to_se (&maskse, &loop); - maskse.ss = maskss; + gfc_init_se (&maskse, base_se); + gfc_copy_loopinfo_to_se (&maskse, ploop); + if (!nested_loop) + maskse.ss = maskss; gfc_conv_expr_val (&maskse, maskexpr); gfc_add_block_to_block (&body, &maskse.pre); @@ -5849,13 +5850,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) stmtblock_t ifblock2; tree ifbody2; - gcc_assert (!nested_loop); - gfc_start_block (&ifblock2); - for (int i = 0; i < loop.dimen; i++) + for (int i = 0; i < ploop->dimen; i++) { tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[i]), -loop.loopvar[i], offset[i]); +ploop->loopvar[i
[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608]
https://gcc.gnu.org/g:3c97c96384c6b21c89aa6811e6a2801879fc598e commit 3c97c96384c6b21c89aa6811e6a2801879fc598e Author: Mikael Morin Date: Thu Oct 3 15:57:50 2024 +0200 fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608] Evaluate the BACK argument of MINLOC/MAXLOC once before the scalarization loops in the case where the DIM argument is present. This is a follow-up to r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3 which added knowledge of BACK to the scalarizer, to r15-2701-ga10436a8404ad2f0cc5aa4d6a0cc850abe5ef49e which removed it to handle it out of scalarization instead, and to more immediate previous patches that added support for MINLOC/MAXLOC with DIM. The recent support for MINLOC/MAXLOC with DIM introduced nested loops, which made the evaluation of BACK (removed from the scalarizer knowledge by previous patches) wrapped in a loop, so possibly executed more than once. This change adds BACK to the scalarization chain if MINLOC/MAXLOC will use nested loops, so that it is evaluated by the scalarizer only once before the outermost loop in that case. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (walk_inline_intrinsic_minmaxloc): Add a scalar element for BACK as first item of the list if BACK is present and there will be nested loops. (gfc_conv_intrinsic_minmaxloc): Evaluate BACK using an inherited scalarization chain if there is a nested loop. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_8.f90: New test. * gfortran.dg/minloc_9.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 20 +- gcc/testsuite/gfortran.dg/maxloc_8.f90 | 349 + gcc/testsuite/gfortran.dg/minloc_9.f90 | 349 + 3 files changed, 716 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 90dcb759b378..5c25eedcc4f7 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5594,7 +5594,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) && maskexpr->symtree->n.sym->attr.optional; backexpr = back_arg->expr; - gfc_init_se (&backse, NULL); + gfc_init_se (&backse, nested_loop ? se : nullptr); if (backexpr == nullptr) back = logical_false_node; else if (maybe_absent_optional_variable (backexpr)) @@ -11885,10 +11885,13 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr *expr ATTRIBUTE_UNUSED) gfc_actual_arglist *array_arg = expr->value.function.actual; gfc_actual_arglist *dim_arg = array_arg->next; gfc_actual_arglist *mask_arg = dim_arg->next; + gfc_actual_arglist *kind_arg = mask_arg->next; + gfc_actual_arglist *back_arg = kind_arg->next; gfc_expr *array = array_arg->expr; gfc_expr *dim = dim_arg->expr; gfc_expr *mask = mask_arg->expr; + gfc_expr *back = back_arg->expr; if (dim == nullptr) return gfc_get_array_ss (ss, expr, 1, GFC_SS_INTRINSIC); @@ -11914,7 +11917,20 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr *expr ATTRIBUTE_UNUSED) chain. */ int dim_val = mpz_get_si (dim->value.integer) - 1; gfc_ss *tail = nest_loop_dimension (tmp_ss, dim_val); - tail->next = ss; + + if (back && array->rank > 1) +{ + /* If there are nested scalarization loops, include BACK in the +scalarization chains to avoid evaluating it multiple times in a loop. +Otherwise, prefer to handle it outside of scalarization. */ + gfc_ss *back_ss = gfc_get_scalar_ss (ss, back); + back_ss->info->type = GFC_SS_REFERENCE; + back_ss->info->can_be_null_ref = true; + + tail->next = back_ss; +} + else +tail->next = ss; if (scalar_mask) tmp_ss = gfc_get_scalar_ss (tmp_ss, mask); diff --git a/gcc/testsuite/gfortran.dg/maxloc_8.f90 b/gcc/testsuite/gfortran.dg/maxloc_8.f90 new file mode 100644 index ..21bc4591235a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/maxloc_8.f90 @@ -0,0 +1,349 @@ +! { dg-do run } +! +! PR fortran/90608 +! Check that the evaluation of MAXLOC's BACK argument is made only once +! before the scalarization loops, when the DIM argument is present. + +program p + implicit none + integer, parameter :: data60(*) = (/ 7, 4, 5, 3, 9, 0, 6, 4, 5, 5, & + 8, 2, 6, 7, 8, 7, 4, 5, 3, 9, & + 0, 6, 4, 5, 5, 8, 2, 6, 7, 8, & + 7, 4, 5, 3, 9, 0, 6, 4, 5, 5, & + 8, 2, 6, 7, 8, 7, 4, 5, 3, 9, & + 0, 6, 4, 5, 5, 8, 2, 6, 7, 8 /) + logical, parameter :: mask60(*) = (/ .true. , .false., .false., .false., & + .true. , .false., .true. , .false.,
[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]
https://gcc.gnu.org/g:d9f0956929c7d0a182cfb623256f9bb320feb9ea commit d9f0956929c7d0a182cfb623256f9bb320feb9ea Author: Mikael Morin Date: Thu Aug 8 13:44:16 2024 +0200 fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608] Enable the generation of inline code for MINLOC/MAXLOC when argument ARRAY is of integral type, DIM is a constant, and MASK is scalar (only absent MASK or rank 1 ARRAY were inlined before). Scalar masks are implemented with a wrapping condition around the code one would generate if MASK wasn't present, so they are easy to support once inline code without MASK is working. With this change, there are both expressions evaluated inside the nested loop (ARRAY, and in the future MASK if non-scalar) and expressions evaluated outside of it (MASK if scalar). Both have to advance the scalarization chain passed in argument SE to gfc_conv_intrinsic_minmaxloc as they are evaluated, but expressions evaluated from within the nested loop additionally have to advance the nested scalarization chain of the reduction loop. This is normally handled transparently through the inheritance that is defined when initializing gfc_se structs, but there has to be some variable to inherit from, and there is a single one, SE. This variable is kept as base for out of nested loop expressions (scalar MASK), and this change introduces a new variable to hold the current advance of the nested loop scalarization chain and serve as inheritance base to evaluate nested loop expressions (just ARRAY for now, additionally non-scalar MASK later). PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE if MASK is scalar. (walk_inline_intrinsic_minmaxloc): Append to the scalarization chain a scalar element for MASK if it's present. (gfc_conv_intrinsic_minmaxloc): Use a local gfc_se struct to serve as base for all the expressions evaluated in the nested loop. To evaluate MASK in a nested loop, enable usage of the scalarizer and set the current scalarization chain element to use to that of the original passed in SE argument. And use the nested loop from the scalarizer instead of the local loop in that case. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_8.f90: Accept the error message generated by the scalarizer in case the MAXLOC intrinsic call is implemented through inline code. * gfortran.dg/minmaxloc_20.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc| 27 ++-- gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90 | 4 +- gcc/testsuite/gfortran.dg/minmaxloc_20.f90| 182 ++ 3 files changed, 201 insertions(+), 12 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index dedb49b4a64e..cd6aca51f218 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5479,6 +5479,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_ss *maskss = nullptr; gfc_se arrayse; gfc_se maskse; + gfc_se nested_se; gfc_se *base_se; gfc_expr *arrayexpr; gfc_expr *maskexpr; @@ -5616,7 +5617,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_block_to_block (&se->pre, &backse.post); if (nested_loop) -base_se = se; +{ + gfc_init_se (&nested_se, se); + base_se = &nested_se; +} else { /* Walk the arguments. */ @@ -5706,7 +5710,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) if (nested_loop) { - ploop = enter_nested_loop (se); + ploop = enter_nested_loop (&nested_se); ploop->temp_dim = 1; } else @@ -6063,21 +6067,19 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) { tree ifmask; - gcc_assert (!nested_loop); - - gfc_init_se (&maskse, NULL); + gfc_init_se (&maskse, nested_loop ? se : nullptr); gfc_conv_expr_val (&maskse, maskexpr); gfc_add_block_to_block (&se->pre, &maskse.pre); gfc_init_block (&block); - gfc_add_block_to_block (&block, &loop.pre); - gfc_add_block_to_block (&block, &loop.post); + gfc_add_block_to_block (&block, &ploop->pre); + gfc_add_block_to_block (&block, &ploop->post); tmp = gfc_finish_block (&block); /* For the else part of the scalar mask, just initialize the pos variable the same way as above. */ gfc_init_block (&elseblock); - for (int i = 0; i < loop.dimen; i++) + for (int i = 0; i < ploop->dimen; i++) gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node); elsetmp = gfc_finish_bl
[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Check MASK directly instead of its scalarization chain
https://gcc.gnu.org/g:1810a146003e03c431ce4c700799dc93e2006743 commit 1810a146003e03c431ce4c700799dc93e2006743 Author: Mikael Morin Date: Thu Sep 12 16:56:39 2024 +0200 fortran: Check MASK directly instead of its scalarization chain Update the conditions used by the inline MINLOC/MAXLOC code generation function to check directly the properties of MASK instead of the variable holding its scalarization chain. The inline implementation of MINLOC/MAXLOC in gfc_conv_intrinsic_minmaxloc uses several conditions checking the presence of a scalarization chain for MASK, which means that the argument is present and non-scalar. The next patch will allow inlining MINLOC/MAXLOC with DIM and MASK, and in that case the scalarization chain for MASK is initialized elsewhere, so the variable usually holding it in the function is not used, and the conditions won't work in that case. This change updates the conditions to check directly the properties of MASK so that they work even if the scalarization chain variable is not used. gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Use conditionals based on the MASK expression rather than on its scalarization chains. Diff: --- gcc/fortran/trans-intrinsic.cc | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index cd6aca51f218..a146d7263c88 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5746,7 +5746,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gcc_assert (reduction_dimensions == ploop->dimen); - if (nonempty == NULL && maskss == NULL) + if (nonempty == NULL && !(maskexpr && maskexpr->rank > 0)) { nonempty = logical_true_node; @@ -5816,7 +5816,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_start_scalarized_body (ploop, &body); /* If we have a mask, only check this element if the mask is set. */ - if (maskss) + if (maskexpr && maskexpr->rank > 0) { gcc_assert (!nested_loop); gfc_init_se (&maskse, NULL); @@ -5921,7 +5921,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) } gfc_add_expr_to_block (&block, ifbody); - if (maskss) + if (maskexpr && maskexpr->rank > 0) { /* We enclose the above in if (mask) {...}. If the mask is an optional argument, generate IF (.NOT. PRESENT(MASK) @@ -5972,7 +5972,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_expr_to_block (outer_block, build1_v (LABEL_EXPR, lab1)); /* If we have a mask, only check this element if the mask is set. */ - if (maskss) + if (maskexpr && maskexpr->rank > 0) { gfc_init_se (&maskse, NULL); gfc_copy_loopinfo_to_se (&maskse, &loop); @@ -6038,7 +6038,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_expr_to_block (&block, tmp); - if (maskss) + if (maskexpr && maskexpr->rank > 0) { /* We enclose the above in if (mask) {...}. If the mask is an optional argument, generate IF (.NOT. PRESENT(MASK) @@ -6063,7 +6063,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_add_expr_to_block (&loop.pre, build1_v (LABEL_EXPR, lab2)); /* For a scalar mask, enclose the loop in an if statement. */ - if (maskexpr && maskss == NULL) + if (maskexpr && maskexpr->rank == 0) { tree ifmask;
[gcc(refs/users/mikael/heads/inline_minmaxloc_v332)] fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only
https://gcc.gnu.org/g:091b04da1657c9f71d62491fa30c0846f8cb5b43 commit 091b04da1657c9f71d62491fa30c0846f8cb5b43 Author: Mikael Morin Date: Sat Nov 18 20:54:20 2023 +0100 fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only In the function generating inline code to implement MINLOC and MAXLOC, only get the size of ARRAY along DIM if DIM is present to check for emptyness. The check for ARRAY emptyness had been checking the size of the full array, which is correct for MINLOC and MAXLOC without DIM. But if DIM is present, the reduction is along DIM only so the check for emptyness should consider that dimension only as well. This sounds like a correctness issue, but fortunately the cases where it makes a difference are cases where ARRAY is empty, so even if the MINLOC or MAXLOC calculated value is wrong, it's wrapped in a zero iteration loop, and the wrong values are not actually used. In the end this just avoids unnecessary calculations. A previous version of this patch didn't support non-constant DIM with rank 1 ARRAY. The new testcase checks that that case is supported. gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Only get the size along DIM instead of the full size if DIM is present. gcc/testsuite/ChangeLog: * gfortran.dg/minmaxloc_22.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 19 ++- gcc/testsuite/gfortran.dg/minmaxloc_22.f90 | 24 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 4beead175b77..90dcb759b378 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5641,7 +5641,24 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) if (!(maskexpr && maskexpr->rank > 0)) { mpz_t asize; - if (gfc_array_size (arrayexpr, &asize)) + bool reduction_size_known; + + if (dim_present) + { + int reduction_dim; + if (dim_arg->expr->expr_type == EXPR_CONSTANT) + reduction_dim = mpz_get_si (dim_arg->expr->value.integer) - 1; + else if (arrayexpr->rank == 1) + reduction_dim = 0; + else + gcc_unreachable (); + reduction_size_known = gfc_array_dimen_size (arrayexpr, reduction_dim, + &asize); + } + else + reduction_size_known = gfc_array_size (arrayexpr, &asize); + + if (reduction_size_known) { nonempty = gfc_conv_mpz_to_tree (asize, gfc_index_integer_kind); mpz_clear (asize); diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 new file mode 100644 index ..4f323ec5daba --- /dev/null +++ b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 @@ -0,0 +1,24 @@ +! { dg-do compile } +! +! Check that the inline code generated for MINLOC and MAXLOC supports +! a non-constant DIM argument if ARRAY has rank 1. + +program p + implicit none + integer, parameter :: n = 5 + integer :: a(n) + print *, f(a, 1) +contains + function f(a, d) +integer :: a(n) +integer :: d +integer :: f +f = minloc(a, dim=d) + end function + function g(a, d) +integer :: a(n) +integer :: d +integer :: g +g = maxloc(a, dim=d) + end function +end program p
[gcc] Created branch 'mikael/heads/inline_minmaxloc_v332' in namespace 'refs/users'
The branch 'mikael/heads/inline_minmaxloc_v332' was created in namespace 'refs/users' pointing to: 3c97c96384c6... fortran: Evaluate once BACK argument of MINLOC/MAXLOC with
[gcc r15-4240] RISC-V:Bugfix for C++ code compilation failure with rv32imafc_zve32f[pr116883]
https://gcc.gnu.org/g:fd8e590ff11266598d8f9b3d03d72ba7a6100512 commit r15-4240-gfd8e590ff11266598d8f9b3d03d72ba7a6100512 Author: Li Xu Date: Thu Oct 10 08:51:19 2024 -0600 RISC-V:Bugfix for C++ code compilation failure with rv32imafc_zve32f[pr116883] From: xuli Example as follows: int main() { unsigned long arraya[128], arrayb[128], arrayc[128]; for (int i = 0; i < 128; i++) { arraya[i] = arrayb[i] + arrayc[i]; } return 0; } Compiled with -march=rv32imafc_zve32f -mabi=ilp32f, it will cause a compilation issue: riscv_vector.h:40:25: error: ambiguating new declaration of 'vint64m4_t __riscv_vle64(vbool16_t, const long long int*, unsigned int)' 40 | #pragma riscv intrinsic "vector" | ^~~~ riscv_vector.h:40:25: note: old declaration 'vint64m1_t __riscv_vle64(vbool64_t, const long long int*, unsigned int)' With zvl=32b, vbool16_t is registered in init_builtins() with type_common.precision=0x101 (nunits=2), mode_nunits[E_RVVMF16BI]=[2,2]. Normally, vbool64_t is only valid when TARGET_MIN_VLEN > 32, so vbool64_t is not registered in init_builtins(), meaning vbool64_t=null. In order to implement __attribute__((target("arch=+v"))), we must register all vector types and all RVV intrinsics. Therefore, vbool64_t will be registered by default with zvl=128b in reinit_builtins(), resulting in type_common.precision=0x101 (nunits=2) and mode_nunits[E_RVVMF64BI]=[2,2]. We then get TYPE_VECTOR_SUBPARTS(vbool16_t) == TYPE_VECTOR_SUBPARTS(vbool64_t), calculated using type_common.precision, resulting in 2. Since vbool16_t and vbool64_t have the same element type (boolean_type), the compiler treats them as the same type, leading to a re-declaration conflict. After all types and intrinsics have been registered, processing __attribute__((target("arch=+v"))) will update the parameters option and init_adjust_machine_modes. Therefore, to avoid conflicts, we can choose zvl=4096b for the null type reinit_builtins(). command option zvl=32b type nunits vbool64_t => null vbool32_t=> [1,1] vbool16_t=> [2,2] vbool8_t=> [4,4] vbool4_t=> [8,8] vbool2_t=> [16,16] vbool1_t=> [32,32] reinit zvl=128b vbool64_t => [2,2] conflict with zvl32b vbool16_t=> [2,2] reinit zvl=256b vbool64_t => [4,4] conflict with zvl32b vbool8_t=> [4,4] reinit zvl=512b vbool64_t => [8,8] conflict with zvl32b vbool4_t=> [8,8] reinit zvl=1024b vbool64_t => [16,16] conflict with zvl32b vbool2_t=> [16,16] reinit zvl=2048b vbool64_t => [32,32] conflict with zvl32b vbool1_t=> [32,32] reinit zvl=4096b vbool64_t => [64,64] zvl=4096b is ok Signed-off-by: xuli PR target/116883 gcc/ChangeLog: * config/riscv/riscv-c.cc (riscv_pragma_intrinsic_flags_pollute): Choose zvl4096b to initialize null type. gcc/testsuite/ChangeLog: * g++.target/riscv/rvv/base/pr116883.C: New test. Diff: --- gcc/config/riscv/riscv-c.cc| 7 ++- gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C | 15 +++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index 71112d9c66d7..c59f408d3a8e 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -59,7 +59,12 @@ riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags) riscv_zvl_flags = riscv_zvl_flags | MASK_ZVL32B | MASK_ZVL64B -| MASK_ZVL128B; +| MASK_ZVL128B +| MASK_ZVL256B +| MASK_ZVL512B +| MASK_ZVL1024B +| MASK_ZVL2048B +| MASK_ZVL4096B; riscv_vector_elen_flags = riscv_vector_elen_flags | MASK_VECTOR_ELEN_32 diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C new file mode 100644 index ..15bbec40bdde --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C @@ -0,0 +1,15 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv32imafc_zve32f -mabi=ilp32f" } */ + +#include + +int main() +{ + unsigned long arraya[128], arrayb[128], arrayc[128]; + for (int i; i < 128; i++) + { + arraya[i] = arrayb[i] + arrayc[i]; + } + return 0; +}
[gcc r15-4243] aarch64: Alter pr116258.c test to correct for big endian.
https://gcc.gnu.org/g:a17a9bdcb3f749b895abf1fbf4f62859df9e8184 commit r15-4243-ga17a9bdcb3f749b895abf1fbf4f62859df9e8184 Author: Richard Ball Date: Thu Oct 10 19:16:39 2024 +0100 aarch64: Alter pr116258.c test to correct for big endian. The test at pr116258.c fails on big endian targets, this is because the test checks that the index of a floating point multiply is 0, which is correct only for little endian. gcc/testsuite/ChangeLog: PR tree-optimization/116258 * gcc.target/aarch64/pr116258.c: Alter test to add big-endian support. Diff: --- gcc/testsuite/gcc.target/aarch64/pr116258.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/aarch64/pr116258.c b/gcc/testsuite/gcc.target/aarch64/pr116258.c index e727ad4b72a5..5b63de25b7bf 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr116258.c +++ b/gcc/testsuite/gcc.target/aarch64/pr116258.c @@ -12,6 +12,7 @@ return (x + h(t)); } -/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 { target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times "\\\[3\\\]" 1 { target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-not "dup\t" } } */ /* { dg-final { scan-assembler-not "ins\t" } } */
[gcc r14-10763] aarch64: Alter pr116258.c test to correct for big endian.
https://gcc.gnu.org/g:44dc46415ce8fafc1f6a46bac123b430ae5aba4d commit r14-10763-g44dc46415ce8fafc1f6a46bac123b430ae5aba4d Author: Richard Ball Date: Thu Oct 10 19:16:39 2024 +0100 aarch64: Alter pr116258.c test to correct for big endian. The test at pr116258.c fails on big endian targets, this is because the test checks that the index of a floating point multiply is 0, which is correct only for little endian. gcc/testsuite/ChangeLog: PR tree-optimization/116258 * gcc.target/aarch64/pr116258.c: Alter test to add big-endian support. (cherry picked from commit a17a9bdcb3f749b895abf1fbf4f62859df9e8184) Diff: --- gcc/testsuite/gcc.target/aarch64/pr116258.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/aarch64/pr116258.c b/gcc/testsuite/gcc.target/aarch64/pr116258.c index e727ad4b72a5..5b63de25b7bf 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr116258.c +++ b/gcc/testsuite/gcc.target/aarch64/pr116258.c @@ -12,6 +12,7 @@ return (x + h(t)); } -/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 { target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times "\\\[3\\\]" 1 { target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-not "dup\t" } } */ /* { dg-final { scan-assembler-not "ins\t" } } */
[gcc r15-4236] match.pd: Check trunc_mod vector obtap before folding.
https://gcc.gnu.org/g:a2e06b7f081a3d2e50e3afa8d3f1676a05099707 commit r15-4236-ga2e06b7f081a3d2e50e3afa8d3f1676a05099707 Author: Jennifer Schmitz Date: Thu Oct 3 04:46:51 2024 -0700 match.pd: Check trunc_mod vector obtap before folding. This patch guards the simplification x / y * y == x -> x % y == 0 in match.pd by a check for: 1) Non-vector mode of x OR 2) Lack of support for vector division OR 3) Support of vector modulo The patch was bootstrapped and tested with no regression on aarch64-linux-gnu and x86_64-linux-gnu. OK for mainline? Signed-off-by: Jennifer Schmitz gcc/ PR tree-optimization/116831 * match.pd: Guard simplification to trunc_mod with check for mod optab support. gcc/testsuite/ PR tree-optimization/116831 * gcc.dg/torture/pr116831.c: New test. Diff: --- gcc/match.pd| 9 +++-- gcc/testsuite/gcc.dg/torture/pr116831.c | 10 ++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 755ed13e77d1..8a7569ce3871 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5415,8 +5415,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* x / y * y == x -> x % y == 0. */ (simplify (eq:c (mult:c (trunc_div:s @0 @1) @1) @0) - (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE) -(eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); }))) + (if (TREE_CODE (TREE_TYPE (@0)) != COMPLEX_TYPE + && (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0))) + || !target_supports_op_p (TREE_TYPE (@0), TRUNC_DIV_EXPR, +optab_vector) + || target_supports_op_p (TREE_TYPE (@0), TRUNC_MOD_EXPR, + optab_vector))) + (eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); }))) /* ((X /[ex] A) +- B) * A --> X +- A * B. */ (for op (plus minus) diff --git a/gcc/testsuite/gcc.dg/torture/pr116831.c b/gcc/testsuite/gcc.dg/torture/pr116831.c new file mode 100644 index ..92b2a130e69f --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116831.c @@ -0,0 +1,10 @@ +/* { dg-additional-options "-mcpu=neoverse-v2" { target aarch64*-*-* } } */ + +long a; +int b, c; +void d (int e[][5], short f[][5][5][5]) +{ + for (short g; g; g += 4) +a = c ?: e[6][0] % b ? 0 : f[0][0][0][g]; +} +
[gcc r15-4238] libiberty: Restore build with CP_DEMANGLE_DEBUG defined
https://gcc.gnu.org/g:c1b2100e736c8ad80479fa6417db760695a00256 commit r15-4238-gc1b2100e736c8ad80479fa6417db760695a00256 Author: Simon Martin Date: Thu Oct 10 15:29:32 2024 +0200 libiberty: Restore build with CP_DEMANGLE_DEBUG defined cp-demangle.c does not build when CP_DEMANGLE_DEBUG is defined since r13-2887-gb04208895fed34. This trivial patch fixes the issue. libiberty/ChangeLog: * cp-demangle.c (d_dump): Fix compilation when CP_DEMANGLE_DEBUG is defined. Diff: --- libiberty/cp-demangle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c index fc2cf64e6e01..5b1bd5dff227 100644 --- a/libiberty/cp-demangle.c +++ b/libiberty/cp-demangle.c @@ -655,9 +655,9 @@ d_dump (struct demangle_component *dc, int indent) return; case DEMANGLE_COMPONENT_EXTENDED_BUILTIN_TYPE: { - char suffix[2] = { dc->u.s_extended_builtin.type->suffix, 0 }; + char suffix[2] = { dc->u.s_extended_builtin.suffix, 0 }; printf ("builtin type %s%d%s\n", dc->u.s_extended_builtin.type->name, - dc->u.s_extended_builtin.type->arg, suffix); + dc->u.s_extended_builtin.arg, suffix); } return; case DEMANGLE_COMPONENT_OPERATOR:
[gcc r15-4241] phiopt: Remove candorest variable return instead
https://gcc.gnu.org/g:dc3015ff0934a48176c43c0582d5a93029d298f9 commit r15-4241-gdc3015ff0934a48176c43c0582d5a93029d298f9 Author: Andrew Pinski Date: Thu Oct 10 04:44:23 2024 + phiopt: Remove candorest variable return instead After r15-3560-gb081e6c860eb9688d24365d39, the setting of candorest with the break can just change to a return since this is inside a lambda now. Bootstrapped and tested on x86_64-linux-gnu. gcc/ChangeLog: * tree-ssa-phiopt.cc (pass_phiopt::execute): Remove candorest and return instead of setting candorest. Signed-off-by: Andrew Pinski Diff: --- gcc/tree-ssa-phiopt.cc | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index 43b65b362a39..f3ee3a80c0f8 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -4322,7 +4322,6 @@ pass_phiopt::execute (function *) } gimple_stmt_iterator gsi; - bool candorest = true; /* Check that we're looking for nested phis. */ basic_block merge = diamond_p ? EDGE_SUCC (bb2, 0)->dest : bb2; @@ -4338,15 +4337,11 @@ pass_phiopt::execute (function *) tree arg1 = gimple_phi_arg_def (phi, e2->dest_idx); if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2) { - candorest = false; cfgchanged = true; - break; + return; } } - if (!candorest) - return; - gphi *phi = single_non_singleton_phi_for_edges (phis, e1, e2); if (!phi) return;
[gcc r15-4239] vect: Avoid divide by zero for permutes of extern VLA vectors
https://gcc.gnu.org/g:9bd19ff515c95af71b29bc6e232785532afa6823 commit r15-4239-g9bd19ff515c95af71b29bc6e232785532afa6823 Author: Richard Sandiford Date: Thu Oct 10 15:15:26 2024 +0100 vect: Avoid divide by zero for permutes of extern VLA vectors My recent VLA SLP patches caused a regression with cross compilers in gcc.dg/torture/neon-sve-bridge.c. There we have a VEC_PERM_EXPR created from two BIT_FIELD_REFs, with the child node being an external VLA vector: note: node 0x3704a70 (max_nunits=1, refcnt=2) vector(2) long int note: op: VEC_PERM_EXPR note: stmt 0 val1Return_9 = BIT_FIELD_REF ; note: stmt 1 val2Return_10 = BIT_FIELD_REF ; note: lane permutation { 0[0] 0[1] } note: children 0x3704b08 note: node (external) 0x3704b08 (max_nunits=1, refcnt=1) svint64_t note: { } For this kind of external node, the SLP_TREE_LANES is normally the total number of lanes in the vector, but it is zero if the vector has variable length: auto nunits = TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (vnode)); unsigned HOST_WIDE_INT const_nunits; if (nunits.is_constant (&const_nunits)) SLP_TREE_LANES (vnode) = const_nunits; This led to division by zero in: /* Check whether the output has N times as many lanes per vector. */ else if (constant_multiple_p (SLP_TREE_LANES (node) * op_nunits, SLP_TREE_LANES (child) * nunits, &this_unpack_factor) && (i == 0 || unpack_factor == this_unpack_factor)) unpack_factor = this_unpack_factor; No repetition takes place for this kind of external node, so this patch goes with Richard's suggestion to check for external nodes that have no scalar statements. This didn't show up for my native testing since division by zero doesn't trap on AArch64. gcc/ * tree-vect-slp.cc (vectorizable_slp_permutation_1): Set repeating_p to false if we have an external node for a pre-existing vector. Diff: --- gcc/tree-vect-slp.cc | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 9bf6ae4ec8e0..96f1992cfbff 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -10279,10 +10279,19 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi, } auto op_nunits = TYPE_VECTOR_SUBPARTS (op_vectype); unsigned int this_unpack_factor; + /* Detect permutations of external, pre-existing vectors. The external +node's SLP_TREE_LANES stores the total number of units in the vector, +or zero if the vector has variable length. + +We are expected to keep the original VEC_PERM_EXPR for such cases. +There is no repetition to model. */ + if (SLP_TREE_DEF_TYPE (child) == vect_external_def + && SLP_TREE_SCALAR_OPS (child).is_empty ()) + repeating_p = false; /* Check whether the input has twice as many lanes per vector. */ - if (children.length () == 1 - && known_eq (SLP_TREE_LANES (child) * nunits, - SLP_TREE_LANES (node) * op_nunits * 2)) + else if (children.length () == 1 + && known_eq (SLP_TREE_LANES (child) * nunits, + SLP_TREE_LANES (node) * op_nunits * 2)) pack_p = true; /* Check whether the output has N times as many lanes per vector. */ else if (constant_multiple_p (SLP_TREE_LANES (node) * op_nunits,
[gcc r15-4242] Fix PR116650: check all regs in regrename targets
https://gcc.gnu.org/g:85bee4f77b1b0ebe68b3efe0c356b7d5fb006c4d commit r15-4242-g85bee4f77b1b0ebe68b3efe0c356b7d5fb006c4d Author: Michael Matz Date: Thu Oct 10 16:36:51 2024 +0200 Fix PR116650: check all regs in regrename targets (this came up for m68k vs. LRA, but is a generic problem) Regrename wants to use new registers for certain def-use chains. For validity of replacements it needs to check that the selected candidates are unused up to then. That's done in check_new_reg_p. But if it so happens that the new register needs more hardregs than the old register (which happens if the target allows inter-bank moves and the mode is something like a DFmode that needs to be placed into a SImode reg-pair), then check_new_reg_p only checks the first of those registers for free-ness. This is caused by that function looking up the number of necessary hardregs only in terms of the old hardreg number. It of course needs to do that in terms of the new candidate regnumber. The symptom is that regrename sometimes clobbers the higher numbered registers of such a regrename target pair. This patch fixes that problem. (In the particular case of the bug report it was LRA that left over a inter-bank move instruction that triggers regrename, ultimately causing the mis-compile. Reload didn't do that, but in general we of course can't rely on such moves not happening if the target allows them.) This also shows a general confusion in that function and the target hook interface here: for (i = nregs - 1; i >= 0; --) ... || ! HARD_REGNO_RENAME_OK (reg + i, new_reg + i)) it uses nregs in a way that requires it to be the same between old and new register. The problem is that the target hook only gets register numbers, when it instead should get a mode and register numbers and would be called only for the first but not for subsequent registers. I've looked at a number of definitions of that target hook and I think that this is currently harmless in the sense that it would merely rule out some potential reg-renames that would in fact be okay to do. So I'm not changing the target hook interface here and hence that problem remains unfixed. PR rtl-optimization/116650 * regrename.cc (check_new_reg_p): Calculate nregs in terms of the new candidate register. Diff: --- gcc/regrename.cc | 25 +++-- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/gcc/regrename.cc b/gcc/regrename.cc index 054e601740b1..22668d7bf57d 100644 --- a/gcc/regrename.cc +++ b/gcc/regrename.cc @@ -324,10 +324,27 @@ static bool check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg, class du_head *this_head, HARD_REG_SET this_unavailable) { - int nregs = this_head->nregs; + int nregs = 1; int i; struct du_chain *tmp; + /* See whether new_reg accepts all modes that occur in + definition and uses and record the number of regs it would take. */ + for (tmp = this_head->first; tmp; tmp = tmp->next_use) +{ + int n; + /* Completely ignore DEBUG_INSNs, otherwise we can get +-fcompare-debug failures. */ + if (DEBUG_INSN_P (tmp->insn)) + continue; + + if (!targetm.hard_regno_mode_ok (new_reg, GET_MODE (*tmp->loc))) + return false; + n = hard_regno_nregs (new_reg, GET_MODE (*tmp->loc)); + if (n > nregs) + nregs = n; +} + for (i = nregs - 1; i >= 0; --i) if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i) || fixed_regs[new_reg + i] @@ -348,14 +365,10 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg, definition and uses. */ for (tmp = this_head->first; tmp; tmp = tmp->next_use) { - /* Completely ignore DEBUG_INSNs, otherwise we can get --fcompare-debug failures. */ if (DEBUG_INSN_P (tmp->insn)) continue; - if (!targetm.hard_regno_mode_ok (new_reg, GET_MODE (*tmp->loc)) - || call_clobbered_in_chain_p (this_head, GET_MODE (*tmp->loc), - new_reg)) + if (call_clobbered_in_chain_p (this_head, GET_MODE (*tmp->loc), new_reg)) return false; }