[gcc r14-10662] libstdc++: std::string move assignment should not use POCCA trait [PR116641]
https://gcc.gnu.org/g:d5d6d3ff43c5166ead1787c4334553be26cc84da commit r14-10662-gd5d6d3ff43c5166ead1787c4334553be26cc84da Author: Jonathan Wakely Date: Tue Sep 10 14:25:41 2024 +0100 libstdc++: std::string move assignment should not use POCCA trait [PR116641] The changes to implement LWG 2579 (r10-327-gdb33efde17932f) made std::string::assign use the propagate_on_container_copy_assignment (POCCA) trait, for consistency with operator=(const basic_string&). However, this also unintentionally affected operator=(basic_string&&) which calls assign(str) to make a deep copy when performing a move is not possible. The fix is for the move assignment operator to call _M_assign(str) instead of assign(str), as this just does the deep copy and doesn't check the POCCA trait first. The bug only affects the unlikely/useless combination of POCCA==true and POCMA==false, but we should fix it for correctness anyway. it should also make move assignment slightly cheaper to compile and execute, because we skip the extra code in assign(const basic_string&). libstdc++-v3/ChangeLog: PR libstdc++/116641 * include/bits/basic_string.h (operator=(basic_string&&)): Call _M_assign instead of assign. * testsuite/21_strings/basic_string/allocator/116641.cc: New test. (cherry picked from commit c07cf418fdde0c192e370a8d76a991cc7215e9c4) Diff: --- libstdc++-v3/include/bits/basic_string.h | 2 +- .../21_strings/basic_string/allocator/116641.cc| 53 ++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h index 8a695a494efd..2794ec6419ac 100644 --- a/libstdc++-v3/include/bits/basic_string.h +++ b/libstdc++-v3/include/bits/basic_string.h @@ -912,7 +912,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 __str._M_data(__str._M_use_local_data()); } else // Need to do a deep copy - assign(__str); + _M_assign(__str); __str.clear(); return *this; } diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/allocator/116641.cc b/libstdc++-v3/testsuite/21_strings/basic_string/allocator/116641.cc new file mode 100644 index ..a1a411b87faa --- /dev/null +++ b/libstdc++-v3/testsuite/21_strings/basic_string/allocator/116641.cc @@ -0,0 +1,53 @@ +// { dg-do run { target c++11 } } +// { dg-require-effective-target cxx11_abi } + +// Bug 116641 - std::string move assignment incorrectly depends on POCCA + +#include +#include + +template +struct Alloc +{ + using value_type = T; + using propagate_on_container_swap = std::false_type; + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::false_type; + + Alloc(int id) : id(id) { } + + template +Alloc(const Alloc& a) : id(a.id) { } + + T* allocate(unsigned long n) + { return std::allocator().allocate(n); } + + void deallocate(T* p, unsigned long n) + { std::allocator().deallocate(p, n); } + + Alloc& operator=(const Alloc&) { throw; } + + bool operator==(const Alloc& a) const { return id == a.id; } + bool operator!=(const Alloc& a) const { return id != a.id; } + + int id; +}; + +void +test_pr116641() +{ + Alloc a1(1), a2(2); + std::basic_string, Alloc> s1(a1), s2(a2); + + s1 = "allocator should not propagate on move assignment"; + VERIFY( s1.get_allocator() == a1 ); + VERIFY( s2.get_allocator() == a2 ); + s2 = std::move(s1); + VERIFY( s1.get_allocator() == a1 ); + VERIFY( s2.get_allocator() == a2 ); +} + +int main() +{ + test_pr116641(); +}
[gcc r14-10663] libstdc++: Only use std::ios_base_library_init() for ELF [PR116159]
https://gcc.gnu.org/g:2003f890b13b8ec35b6112fc13c7e69e61cd9162 commit r14-10663-g2003f890b13b8ec35b6112fc13c7e69e61cd9162 Author: Jonathan Wakely Date: Tue Sep 10 14:36:26 2024 +0100 libstdc++: Only use std::ios_base_library_init() for ELF [PR116159] The undefined std::ios_base_library_init() symbol that is referenced by is only supposed to be used for targets where symbol versioning is supported. The mingw-w64 target defaults to --enable-symvers=gnu due to using GNU ld but doesn't actually support symbol versioning. This means it tries to emit references to the std::ios_base_library_init() symbol, which isn't really defined in the library. This causes problems when using lld to link user binaries. Disable the undefined symbol reference for non-ELF targets. libstdc++-v3/ChangeLog: PR libstdc++/116159 * include/std/iostream (ios_base_library_init): Only define for ELF targets. * src/c++98/ios_init.cc (ios_base_library_init): Likewise. (cherry picked from commit fc7a1fb0238e379d466316aa219734ac61f4bc0e) Diff: --- libstdc++-v3/include/std/iostream | 2 +- libstdc++-v3/src/c++98/ios_init.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/std/iostream b/libstdc++-v3/include/std/iostream index 0c6a2d8a4b30..208fd1d51381 100644 --- a/libstdc++-v3/include/std/iostream +++ b/libstdc++-v3/include/std/iostream @@ -78,7 +78,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #if !(_GLIBCXX_USE_INIT_PRIORITY_ATTRIBUTE \ && __has_attribute(__init_priority__)) static ios_base::Init __ioinit; -#elif defined(_GLIBCXX_SYMVER_GNU) +#elif defined(_GLIBCXX_SYMVER_GNU) && defined(__ELF__) __extension__ __asm (".globl _ZSt21ios_base_library_initv"); #endif diff --git a/libstdc++-v3/src/c++98/ios_init.cc b/libstdc++-v3/src/c++98/ios_init.cc index 1422e20d9405..6e2e5014cf0f 100644 --- a/libstdc++-v3/src/c++98/ios_init.cc +++ b/libstdc++-v3/src/c++98/ios_init.cc @@ -199,7 +199,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __ret; } -#ifdef _GLIBCXX_SYMVER_GNU +#if defined(_GLIBCXX_SYMVER_GNU) && defined(__ELF__) #pragma GCC diagnostic ignored "-Wattribute-alias" void ios_base_library_init (void)
[gcc r15-3581] fortran/openmp.cc: Fix var init and locus use to avoid uninit values [PR fortran/116661]
https://gcc.gnu.org/g:4e9265a474def98cb6cdb59c15fbcb7630ba330e commit r15-3581-g4e9265a474def98cb6cdb59c15fbcb7630ba330e Author: Tobias Burnus Date: Wed Sep 11 09:25:47 2024 +0200 fortran/openmp.cc: Fix var init and locus use to avoid uninit values [PR fortran/116661] gcc/fortran/ChangeLog: PR fortran/116661 * openmp.cc (gfc_match_omp_prefer_type): NULL init a gfc_expr variable and use right locus in gfc_error. Diff: --- gcc/fortran/openmp.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc index c04d8b0f5281..1145e2ff8900 100644 --- a/gcc/fortran/openmp.cc +++ b/gcc/fortran/openmp.cc @@ -1860,6 +1860,7 @@ gfc_match_omp_prefer_type (char **pref_str, int *pref_str_len, int **pref_int_ar } fr_found = true; gfc_symbol *sym = NULL; + e = NULL; locus loc = gfc_current_locus; if (gfc_match_symbol (&sym, 0) != MATCH_YES || gfc_match (" _") == MATCH_YES) @@ -1881,7 +1882,7 @@ gfc_match_omp_prefer_type (char **pref_str, int *pref_str_len, int **pref_int_ar { gfc_error ("Expected constant integer identifier or " "non-empty default-kind character literal at %L", - &e->where); + &loc); gfc_free_expr (e); return MATCH_ERROR; }
[gcc r15-3582] OpenMP: Add interop routines to omp_runtime_api_procname
https://gcc.gnu.org/g:6291f25631500c2d1c2328f919aa4405c3837f02 commit r15-3582-g6291f25631500c2d1c2328f919aa4405c3837f02 Author: Tobias Burnus Date: Wed Sep 11 12:02:24 2024 +0200 OpenMP: Add interop routines to omp_runtime_api_procname gcc/ * omp-general.cc (omp_runtime_api_procname): Add omp_get_interop_{int,name,ptr,rc_desc,str,type_desc} and omp_get_num_interop_properties. Diff: --- gcc/omp-general.cc | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc index 0b61335dba40..aaa179afe13e 100644 --- a/gcc/omp-general.cc +++ b/gcc/omp-general.cc @@ -3260,7 +3260,10 @@ omp_runtime_api_procname (const char *name) "alloc", "calloc", "free", + "get_interop_int", + "get_interop_ptr", "get_mapped_ptr", + "get_num_interop_properties", "realloc", "target_alloc", "target_associate_ptr", @@ -3289,6 +3292,10 @@ omp_runtime_api_procname (const char *name) "get_device_num", "get_dynamic", "get_initial_device", + "get_interop_name", + "get_interop_rc_desc", + "get_interop_str", + "get_interop_type_desc", "get_level", "get_max_active_levels", "get_max_task_priority",
[gcc r15-3583] c++: Ensure ANNOTATE_EXPRs remain outermost expressions in conditions [PR116140]
https://gcc.gnu.org/g:f97d86242b86e4ad2bef3623c97e91481840a210 commit r15-3583-gf97d86242b86e4ad2bef3623c97e91481840a210 Author: Alex Coplan Date: Fri Aug 2 09:52:50 2024 +0100 c++: Ensure ANNOTATE_EXPRs remain outermost expressions in conditions [PR116140] For the testcase added with this patch, we would end up losing the: #pragma GCC unroll 4 and emitting "warning: ignoring loop annotation". That warning comes from tree-cfg.cc:replace_loop_annotate, and means that we failed to process the ANNOTATE_EXPR in tree-cfg.cc:replace_loop_annotate_in_block. That function walks backwards over the GIMPLE in an exiting BB for a loop, skipping over the final gcond, and looks for any ANNOTATE_EXPRS immediately preceding the gcond. The function documents the following pre-condition: /* [...] We assume that the annotations come immediately before the condition in BB, if any. */ now looking at the exiting BB of the loop, we have: : D.4524 = .ANNOTATE (iftmp.1, 1, 4); retval.0 = D.4524; if (retval.0 != 0) goto ; [INV] else goto ; [INV] and crucially there is an intervening assignment between the gcond and the preceding .ANNOTATE ifn call. To see where this comes from, we can look to the IR given by -fdump-tree-original: if (<::operator() (&pred, *first), unroll 4>>>) goto ; else goto ; here the problem is that we've wrapped a CLEANUP_POINT_EXPR around the ANNOTATE_EXPR, meaning the ANNOTATE_EXPR is no longer the outermost expression in the condition. The CLEANUP_POINT_EXPR gets added by the following call chain: finish_while_stmt_cond -> maybe_convert_cond -> condition_conversion -> fold_build_cleanup_point_expr this patch chooses to fix the issue by first introducing a new helper class (annotate_saver) to save and restore outer chains of ANNOTATE_EXPRs and then using it in maybe_convert_cond. With this patch, we don't get any such warning and the loop gets unrolled as expected at -O2. gcc/cp/ChangeLog: PR libstdc++/116140 * semantics.cc (anotate_saver): New. Use it ... (maybe_convert_cond): ... here, to ensure any ANNOTATE_EXPRs remain the outermost expression(s) of the condition. gcc/testsuite/ChangeLog: PR libstdc++/116140 * g++.dg/ext/pragma-unroll-lambda.C: New test. Diff: --- gcc/cp/semantics.cc | 88 - gcc/testsuite/g++.dg/ext/pragma-unroll-lambda.C | 17 + 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 3e117c216da5..63212afafb3b 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -951,6 +951,86 @@ maybe_warn_unparenthesized_assignment (tree t, bool nested_p, } } +/* Helper class for saving/restoring ANNOTATE_EXPRs. For a tree node t, users + can construct one of these like so: + + annotate_saver s (&t); + + and t will be updated to have any annotations removed. The user can then + transform t, and later restore the ANNOTATE_EXPRs with: + + t = s.restore (t). + + The intent is to ensure that any ANNOTATE_EXPRs remain the outermost + expressions following any operations on t. */ + +class annotate_saver { + /* The chain of saved annotations, if there were any. Otherwise null. */ + tree m_annotations; + + /* If M_ANNOTATIONS is non-null, then M_INNER points to TREE_OPERAND (A, 0) + for the innermost annotation A. */ + tree *m_inner; + +public: + annotate_saver (tree *); + tree restore (tree); +}; + +/* If *COND is an ANNOTATE_EXPR, walk through the chain of annotations, and set + *COND equal to the first non-ANNOTATE_EXPR (saving a pointer to the + original chain of annotations for later use in restore). */ + +annotate_saver::annotate_saver (tree *cond) : m_annotations (nullptr) +{ + tree *t = cond; + while (TREE_CODE (*t) == ANNOTATE_EXPR) +t = &TREE_OPERAND (*t, 0); + + if (t != cond) +{ + m_annotations = *cond; + *cond = *t; + m_inner = t; +} +} + +/* If we didn't strip any annotations on construction, return NEW_INNER + unmodified. Otherwise, wrap the saved annotations around NEW_INNER (updating + the types and flags of the annotations if needed) and return the resulting + expression. */ + +tree +annotate_saver::restore (tree new_inner) +{ + if (!m_annotations) +return new_inner; + + /* If the type of the inner expression changed, we need to update the types + of all the ANNOTATE_EXPRs. We may need to update the flags too, but we + assume they only change if the type of the inner expression changes. + The flag update logic assumes that the other operands to the + ANNOTATE_EXPRs are always INTEGER_CS
[gcc r15-3584] testsuite: Ensure ltrans dump files get cleaned up properly [PR116140]
https://gcc.gnu.org/g:31ff173c70847bba94613eac5b1ef2c0bec842e6 commit r15-3584-g31ff173c70847bba94613eac5b1ef2c0bec842e6 Author: Alex Coplan Date: Thu Aug 8 13:15:39 2024 + testsuite: Ensure ltrans dump files get cleaned up properly [PR116140] I noticed while working on a test that uses LTO and requests a dump file, that we are failing to cleanup ltrans dump files in the testsuite. E.g. the test I was working on compiles with -flto -fdump-rtl-loop2_unroll, and we end up with the following file: ./gcc/testsuite/g++/pr116140.ltrans0.ltrans.287r.loop2_unroll being left behind by the testsuite. This is problematic not just from a "missing cleanup" POV, but also because it can cause the test to pass spuriously when the test is re-run wtih an unpatched compiler (without the bug fix). In the broken case, loop2_unroll isn't run at all, so we end up scanning the old dumpfile (from the previous test run) and making the dumpfile scan pass. Running with `-v -v` in RUNTESTFLAGS we can see the following cleanup attempt is made: remove-build-file `pr116140.{C,exe}.{ltrans[0-9]*.,}[0-9][0-9][0-9]{l,i,r,t}.*' looking again at the ltrans dump file above we can see this will fail for two reasons: - The actual dump file has no {C,exe} extension between the basename and ltrans0. - The actual dump file has an additional `.ltrans` component after `.ltrans0`. This patch therefore relaxes the pattern constructed for cleaning up such dumpfiles to also match dumpfiles with the above form. Running the testsuite before/after this patch shows the number of files in gcc/testsuite (in the build dir) with "ltrans" in the name goes from 1416 to 62 on aarch64. gcc/testsuite/ChangeLog: PR libstdc++/116140 * lib/gcc-dg.exp (schedule-cleanups): Relax ltrans dumpfile cleanup pattern to handle missing cases. Diff: --- gcc/testsuite/lib/gcc-dg.exp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp index d9513e2859ce..cb401a704359 100644 --- a/gcc/testsuite/lib/gcc-dg.exp +++ b/gcc/testsuite/lib/gcc-dg.exp @@ -190,7 +190,7 @@ proc schedule-cleanups { opts } { # Handle ltrans files around -flto if [regexp -- {(^|\s+)-flto(\s+|$)} $opts] { verbose "Cleanup -flto seen" 4 - set ltrans "{ltrans\[0-9\]*.,}" + set ltrans "{ltrans\[0-9\]*{.ltrans,}.,}" } else { set ltrans "" } @@ -206,7 +206,7 @@ proc schedule-cleanups { opts } { if {$basename_ext != ""} { regsub -- {^.*\.} $basename_ext {} basename_ext } - lappend tfiles "$stem.{$basename_ext,exe}" + lappend tfiles "$stem{.$basename_ext,.exe,}" unset basename_ext } else { lappend tfiles $basename
[gcc r15-3585] lto: Stream has_unroll flag during LTO [PR116140]
https://gcc.gnu.org/g:9759f6299d9633cabac540e5c893341c708093ac commit r15-3585-g9759f6299d9633cabac540e5c893341c708093ac Author: Alex Coplan Date: Sat Aug 3 17:02:36 2024 + lto: Stream has_unroll flag during LTO [PR116140] When #pragma GCC unroll is processed in tree-cfg.cc:replace_loop_annotate_in_block, we set both the loop->unroll field (which is currently streamed out and back in during LTO) but also the cfun->has_unroll flag. cfun->has_unroll, however, is not currently streamed during LTO. This patch fixes that. Prior to this patch, loops marked with #pragma GCC unroll that would be unrolled by RTL loop2_unroll in a non-LTO compilation didn't get unrolled under LTO. gcc/ChangeLog: PR libstdc++/116140 * lto-streamer-in.cc (input_struct_function_base): Stream in fn->has_unroll. * lto-streamer-out.cc (output_struct_function_base): Stream out fn->has_unroll. gcc/testsuite/ChangeLog: PR libstdc++/116140 * g++.dg/ext/pragma-unroll-lambda-lto.C: New test. Diff: --- gcc/lto-streamer-in.cc | 1 + gcc/lto-streamer-out.cc| 1 + .../g++.dg/ext/pragma-unroll-lambda-lto.C | 32 ++ 3 files changed, 34 insertions(+) diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc index 64f758073280..9d0ec5d589c4 100644 --- a/gcc/lto-streamer-in.cc +++ b/gcc/lto-streamer-in.cc @@ -1326,6 +1326,7 @@ input_struct_function_base (struct function *fn, class data_in *data_in, fn->has_force_vectorize_loops = bp_unpack_value (&bp, 1); fn->has_simduid_loops = bp_unpack_value (&bp, 1); fn->has_musttail = bp_unpack_value (&bp, 1); + fn->has_unroll = bp_unpack_value (&bp, 1); fn->assume_function = bp_unpack_value (&bp, 1); fn->va_list_fpr_size = bp_unpack_value (&bp, 8); fn->va_list_gpr_size = bp_unpack_value (&bp, 8); diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc index a4b171358d41..807b935537be 100644 --- a/gcc/lto-streamer-out.cc +++ b/gcc/lto-streamer-out.cc @@ -2283,6 +2283,7 @@ output_struct_function_base (struct output_block *ob, struct function *fn) bp_pack_value (&bp, fn->has_force_vectorize_loops, 1); bp_pack_value (&bp, fn->has_simduid_loops, 1); bp_pack_value (&bp, fn->has_musttail, 1); + bp_pack_value (&bp, fn->has_unroll, 1); bp_pack_value (&bp, fn->assume_function, 1); bp_pack_value (&bp, fn->va_list_fpr_size, 8); bp_pack_value (&bp, fn->va_list_gpr_size, 8); diff --git a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C new file mode 100644 index ..144c4c326924 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C @@ -0,0 +1,32 @@ +// { dg-do link { target c++11 } } +// { dg-options "-O2 -flto -fdump-rtl-loop2_unroll" } + +#include + +template +inline Iter +my_find(Iter first, Iter last, Pred pred) +{ +#pragma GCC unroll 4 +while (first != last && !pred(*first)) +++first; +return first; +} + +__attribute__((noipa)) +short *use_find(short *p) +{ +auto pred = [](short x) { return x == 42; }; +return my_find(p, p + 1024, pred); +} + +int main(void) +{ + short a[1024]; + for (int i = 0; i < 1024; i++) +a[i] = rand (); + + return use_find (a) - a; +} + +// { dg-final { scan-ltrans-rtl-dump-times "Unrolled loop 3 times" 1 "loop2_unroll" } }
[gcc r15-3586] libstdc++: Restore unrolling in std::find using pragma [PR116140]
https://gcc.gnu.org/g:3fd07d4f04f43816a038daf9b16c6d5bf2e96c9b commit r15-3586-g3fd07d4f04f43816a038daf9b16c6d5bf2e96c9b Author: Alex Coplan Date: Fri Aug 2 09:56:07 2024 +0100 libstdc++: Restore unrolling in std::find using pragma [PR116140] Together with the preparatory compiler patches, this patch restores unrolling in std::__find_if, but this time relying on the compiler to do it by using: #pragma GCC unroll 4 which should restore the majority of the regression relative to the hand-unrolled version while still being vectorizable with WIP alignment peeling enhancements. On Neoverse V1 with LTO, this reduces the regression in xalancbmk (from SPEC CPU 2017) from 5.8% to 1.7% (restoring ~71% of the lost performance). libstdc++-v3/ChangeLog: PR libstdc++/116140 * include/bits/stl_algobase.h (std::__find_if): Add #pragma to request GCC to unroll the loop. Diff: --- libstdc++-v3/include/bits/stl_algobase.h | 1 + 1 file changed, 1 insertion(+) diff --git a/libstdc++-v3/include/bits/stl_algobase.h b/libstdc++-v3/include/bits/stl_algobase.h index 27f6c377ad6f..f13662fc4482 100644 --- a/libstdc++-v3/include/bits/stl_algobase.h +++ b/libstdc++-v3/include/bits/stl_algobase.h @@ -2104,6 +2104,7 @@ _GLIBCXX_END_NAMESPACE_ALGO inline _Iterator __find_if(_Iterator __first, _Iterator __last, _Predicate __pred) { +#pragma GCC unroll 4 while (__first != __last && !__pred(__first)) ++__first; return __first;
[gcc r15-3587] tree-optimization/116674 - vectorizable_simd_clone_call and re-analysis
https://gcc.gnu.org/g:09a514fbb67caf7e33a6ceddf524ee21024c33c5 commit r15-3587-g09a514fbb67caf7e33a6ceddf524ee21024c33c5 Author: Richard Biener Date: Wed Sep 11 13:54:33 2024 +0200 tree-optimization/116674 - vectorizable_simd_clone_call and re-analysis When SLP analysis scraps an instance because it fails to analyze we can end up calling vectorizable_* in analysis mode on a node that was analyzed during the analysis of that instance again. vectorizable_simd_clone_call wasn't expecting that and instead guarded analysis/transform code on populated data structures. The following changes it so it survives re-analysis. PR tree-optimization/116674 * tree-vect-stmts.cc (vectorizable_simd_clone_call): Support re-analysis. * g++.dg/vect/pr116674.cc: New testcase. Diff: --- gcc/testsuite/g++.dg/vect/pr116674.cc | 85 +++ gcc/tree-vect-stmts.cc| 8 ++-- 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/g++.dg/vect/pr116674.cc b/gcc/testsuite/g++.dg/vect/pr116674.cc new file mode 100644 index ..1c13f12290bc --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr116674.cc @@ -0,0 +1,85 @@ +// { dg-do compile } +// { dg-require-effective-target c++11 } +// { dg-additional-options "-Ofast" } +// { dg-additional-options "-march=x86-64-v3" { target { x86_64-*-* i?86-*-* } } } + +namespace std { +typedef int a; +template struct b; +template class aa {}; +template c d(c e, c) { return e; } +template struct b> { + using f = c; + using g = c *; + template using j = aa; +}; +} // namespace std +namespace l { +template struct m : std::b { + typedef std::b n; + typedef typename n::f &q; + template struct ac { typedef typename n::j ad; }; +}; +} // namespace l +namespace std { +template struct o { + typedef typename l::m::ac::ad ae; + typedef typename l::m::g g; + struct p { + g af; + }; + struct ag : p { + ag(ae) {} + }; + typedef ab u; + o(a, u e) : ah(e) {} + ag ah; +}; +template > class r : o { + typedef o s; + typedef typename s::ae ae; + typedef l::m w; + +public: + c f; + typedef typename w::q q; + typedef a t; + typedef ab u; + r(t x, u e = u()) : s(ai(x, e), e) {} + q operator[](t x) { return *(this->ah.af + x); } + t ai(t x, u) { return x; } +}; +extern "C" __attribute__((__simd__)) double exp(double); +} // namespace std +using namespace std; +int ak; +double v, y; +void am(double, int an, double, double, double, double, double, double, double, + double, double, double, int, double, double, double, double, + r ap, double, double, double, double, double, double, double, + double, r ar, r as, double, double, r at, + r au, r av, double, double) { +double ba; +for (int k;;) + for (int i; i < an; ++i) { + y = i; + v = d(y, 25.0); + ba = exp(v); + ar[i * (ak + 1)] = ba; + as[i * (ak + 1)] = ar[i * (ak + 1)]; + if (k && ap[k]) { + at[i * (ak + 1)] = av[i * (ak + 1)] = as[i * (ak + 1)]; + au[i * (ak + 1)] = ar[i * (ak + 1)]; + } else { + au[i * (ak + 1)] = ba; + at[i * (ak + 1)] = av[i * (ak + 1)] = k; + } + } +} +void b(int bc) { +double bd, be, bf, bg, bh, ao, ap, bn, bo, bp, bq, br, bs, bt, bu, bv, bw, bx, +by, aq, ar, as, bz, ca, at, au, av, cb, aw; +int bi; +am(bh, bc, bi, bi, bi, bi, bv, bw, bx, by, bu, bt, bi, ao, bn, bo, bp, ap, bq, + br, bs, bd, be, bf, bg, aq, ar, as, bz, ca, at, au, av, cb, aw); +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index f6c5b7a7e872..b1353c91fce1 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -3985,6 +3985,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, vec& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node) : STMT_VINFO_SIMD_CLONE_INFO (stmt_info)); + if (!vec_stmt) +simd_clone_info.truncate (0); arginfo.reserve (nargs, true); auto_vec slp_op; slp_op.safe_grow_cleared (nargs); @@ -4033,10 +4035,10 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, /* For linear arguments, the analyze phase should have saved the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */ - if (i * 3 + 4 <= simd_clone_info.length () + if (vec_stmt + && i * 3 + 4 <= simd_clone_info.length () && simd_clone_info[i * 3 + 2]) { - gcc_assert (vec_stmt); thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]); thisarginfo.op = simd_clone_info[i * 3 + 1]; thisarginfo.simd_la
[gcc r15-3588] arm: avoid indirect sibcalls when IP is live [PR116597]
https://gcc.gnu.org/g:670cfd5fe6433ee8f2e86eedb197d2523dbb033b commit r15-3588-g670cfd5fe6433ee8f2e86eedb197d2523dbb033b Author: Richard Earnshaw Date: Wed Aug 21 16:15:34 2024 +0100 arm: avoid indirect sibcalls when IP is live [PR116597] On Arm only r0-r3 (the argument registers) and IP are available for use as an address for an indirect sibcall. But if all the argument registers are used and IP is clobbered during the epilogue, or is used to pass closure information, then there is no spare register to hold the address and we must reject the sibcall. arm_function_ok_for_sibcall did try to handle this, but it did this by examining the function declaration. That doesn't work if the function has no prototype, or if the prototype has variadic arguments: we must, instead, look at the list of actuals for the call rather than the list of formals. The old code also worked by laying out all the arguments and then trying to add one more integer argument at the end of the list, but this missed a corner case where a hole had been left in the argument register list due to argument alignment. We fix all of this by now scanning the list of actual values to be passed and then checking if a core register has been assigned to that argument. If it has, then we record which registers were assigned. Once done we then look to see if all the argument registers have been assigned and only block the sibcall if that is the case. This permits us to sibcall: int (*d)(int, ...); int g(void); int i () { return d(g(), 2LL);} because r1 remains free (the 2LL argument is passed in {r2,r3}). gcc/ PR target/116597 * config/arm/arm.cc (arm_function_ok_for_sibcall): Use the list of actuals for the call, not the list of formals. gcc/testsuite/ PR target/116597 * gcc.target/arm/pac-sibcall-2.c: New test. * gcc.target/arm/pac-sibcall-3.c: New test. Diff: --- gcc/config/arm/arm.cc| 38 gcc/testsuite/gcc.target/arm/pac-sibcall-2.c | 14 ++ gcc/testsuite/gcc.target/arm/pac-sibcall-3.c | 14 ++ 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 17485447693b..de34e9867e67 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -8007,10 +8007,11 @@ arm_function_ok_for_sibcall (tree decl, tree exp) && DECL_WEAK (decl)) return false; - /* We cannot tailcall an indirect call by descriptor if all the call-clobbered - general registers are live (r0-r3 and ip). This can happen when: - - IP contains the static chain, or - - IP is needed for validating the PAC signature. */ + /* Indirect tailcalls need a call-clobbered register to hold the function + address. But we only have r0-r3 and ip in that class. If r0-r3 all hold + function arguments, then we can only use IP. But IP may be needed in the + epilogue (for PAC validation), or for passing the static chain. We have + to disable the tail call if nothing is available. */ if (!decl && ((CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines) || arm_current_function_pac_enabled_p())) @@ -8022,18 +8023,33 @@ arm_function_ok_for_sibcall (tree decl, tree exp) arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE); cum_v = pack_cumulative_args (&cum); - for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t)) + tree arg; + call_expr_arg_iterator iter; + unsigned used_regs = 0; + + /* Layout each actual argument in turn. If it is allocated to +core regs, note which regs have been allocated. */ + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) { - tree type = TREE_VALUE (t); - if (!VOID_TYPE_P (type)) + tree type = TREE_TYPE (arg); + function_arg_info arg_info (type, /*named=*/true); + rtx reg = arm_function_arg (cum_v, arg_info); + if (reg && REG_P (reg) + && REGNO (reg) <= LAST_ARG_REGNUM) { - function_arg_info arg (type, /*named=*/true); - arm_function_arg_advance (cum_v, arg); + /* Avoid any chance of UB here. We don't care if TYPE +is very large since it will use up all the argument regs. */ + unsigned nregs = MIN (ARM_NUM_REGS2 (GET_MODE (reg), type), + LAST_ARG_REGNUM + 1); + used_regs |= ((1 << nregs) - 1) << REGNO (reg); } + arm_function_arg_advance (cum_v, arg_info); } - function_arg_info arg (integer_type_node, /*named=*/true); - if (!arm_function_arg (cum_v, arg)) + /* We've used all the argument regs, and we know IP is live during the +epilogue for some re
[gcc r15-3589] ipa: Rename ipa_supports_p to ipa_vr_supported_type_p
https://gcc.gnu.org/g:323291c29c77e3214f4850129bb8a3d0d8da6a45 commit r15-3589-g323291c29c77e3214f4850129bb8a3d0d8da6a45 Author: Martin Jambor Date: Wed Sep 11 23:53:21 2024 +0200 ipa: Rename ipa_supports_p to ipa_vr_supported_type_p ipa_supports_p is not a name that captures well what the predicate determines. Therefore, this patch renames it to ipa_vr_supported_type_p. gcc/ChangeLog: 2024-09-06 Martin Jambor * ipa-cp.h (ipa_supports_p): Rename to ipa_vr_supported_type_p. * ipa-cp.cc (ipa_vr_operation_and_type_effects): Adjust called function name. (propagate_vr_across_jump_function): Likewise. * ipa-prop.cc (ipa_compute_jump_functions_for_edge): Likewise. (ipcp_get_parm_bits): Likewise. Diff: --- gcc/ipa-cp.cc | 5 +++-- gcc/ipa-cp.h| 2 +- gcc/ipa-prop.cc | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index 56468dc40ee4..a1033b81aefc 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -1649,7 +1649,8 @@ ipa_vr_operation_and_type_effects (vrange &dst_vr, enum tree_code operation, tree dst_type, tree src_type) { - if (!ipa_supports_p (dst_type) || !ipa_supports_p (src_type)) + if (!ipa_vr_supported_type_p (dst_type) + || !ipa_vr_supported_type_p (src_type)) return false; range_op_handler handler (operation); @@ -2553,7 +2554,7 @@ propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, ipa_range_set_and_normalize (op_vr, op); if (!handler - || !ipa_supports_p (operand_type) + || !ipa_vr_supported_type_p (operand_type) /* Sometimes we try to fold comparison operators using a pointer type to hold the result instead of a boolean type. Avoid trapping in the sanity check in diff --git a/gcc/ipa-cp.h b/gcc/ipa-cp.h index 4616c61625ab..ba2ebfede63f 100644 --- a/gcc/ipa-cp.h +++ b/gcc/ipa-cp.h @@ -294,7 +294,7 @@ bool values_equal_for_ipcp_p (tree x, tree y); /* Return TRUE if IPA supports ranges of TYPE. */ static inline bool -ipa_supports_p (tree type) +ipa_vr_supported_type_p (tree type) { return irange::supports_p (type) || prange::supports_p (type); } diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc index 99ebd6229ec4..78d1fb7086d5 100644 --- a/gcc/ipa-prop.cc +++ b/gcc/ipa-prop.cc @@ -2392,8 +2392,8 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, else { if (param_type - && ipa_supports_p (TREE_TYPE (arg)) - && ipa_supports_p (param_type) + && ipa_vr_supported_type_p (TREE_TYPE (arg)) + && ipa_vr_supported_type_p (param_type) && get_range_query (cfun)->range_of_expr (vr, arg, cs->call_stmt) && !vr.undefined_p ()) { @@ -5761,7 +5761,7 @@ ipcp_get_parm_bits (tree parm, tree *value, widest_int *mask) ipcp_transformation *ts = ipcp_get_transformation_summary (cnode); if (!ts || vec_safe_length (ts->m_vr) == 0 - || !ipa_supports_p (TREE_TYPE (parm))) + || !ipa_vr_supported_type_p (TREE_TYPE (parm))) return false; int i = ts->get_param_index (current_function_decl, parm);
[gcc r15-3590] ipa-cp: One more use of ipa_vr_supported_type_p
https://gcc.gnu.org/g:f910b02919036647a3f096265cda19358dded628 commit r15-3590-gf910b02919036647a3f096265cda19358dded628 Author: Martin Jambor Date: Wed Sep 11 23:53:21 2024 +0200 ipa-cp: One more use of ipa_vr_supported_type_p Since we have the predicate, this patch converts one more check for essentially the same thing into its use. 2024-09-11 Martin Jambor * ipa-cp.cc (propagate_vr_across_jump_function): Use ipa_vr_supported_type_p instead of explicit check for integral and pointer types. Diff: --- gcc/ipa-cp.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index a1033b81aefc..fa7bd6a15da7 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -2519,8 +2519,7 @@ propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, return false; if (!param_type - || (!INTEGRAL_TYPE_P (param_type) - && !POINTER_TYPE_P (param_type))) + || !ipa_vr_supported_type_p (param_type)) return dest_lat->set_to_bottom (); if (jfunc->type == IPA_JF_PASS_THROUGH)
[gcc r15-3592] c++/modules: Really always track partial specialisations [PR116496]
https://gcc.gnu.org/g:ba393bf8879e5cf1f917bd88246d6b80ac081052 commit r15-3592-gba393bf8879e5cf1f917bd88246d6b80ac081052 Author: Nathaniel Shead Date: Wed Sep 11 22:41:21 2024 +1000 c++/modules: Really always track partial specialisations [PR116496] My last fix for this issue (PR c++/114947, r15-810) didn't go far enough; I had assumed that the issue where we lost track of partial specialisations we would need to walk again later was limited to partitions (where we always re-walk all specialisations), but the linked PR is the same cause but for header units, and it is possible to construct test cases exposing the same bug just for normal modules. As such this patch just unconditionally ensures that whenever we modify DECL_TEMPLATE_SPECIALIZATIONS we also track any partial specialisations that might have added. Also clean up a couple of comments and assertions to make expected state more obvious when processing these specs. PR c++/116496 gcc/cp/ChangeLog: * module.cc (trees_in::decl_value): Don't call set_defining_module_for_partial_spec here. (depset::hash::add_partial_entities): Clarity assertions. * pt.cc (add_mergeable_specialization): Always call set_defining_module_for_partial_spec when adding a partial spec. gcc/testsuite/ChangeLog: * g++.dg/modules/partial-5_a.C: New test. * g++.dg/modules/partial-5_b.C: New test. Signed-off-by: Nathaniel Shead Diff: --- gcc/cp/module.cc | 25 - gcc/cp/pt.cc | 1 + gcc/testsuite/g++.dg/modules/partial-5_a.C | 9 + gcc/testsuite/g++.dg/modules/partial-5_b.C | 9 + 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index dc0e9e5520f9..f5df9e875d3a 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -8434,11 +8434,6 @@ trees_in::decl_value () add_mergeable_specialization (!is_type, &spec, decl, spec_flags); } - /* When making a CMI from a partition we're going to need to walk partial -specializations again, so make sure they're tracked. */ - if (state->is_partition () && (spec_flags & 2)) - set_defining_module_for_partial_spec (inner); - if (NAMESPACE_SCOPE_P (decl) && (mk == MK_named || mk == MK_unique || mk == MK_enum || mk == MK_friend_spec) @@ -13356,16 +13351,20 @@ depset::hash::add_partial_entities (vec *partial_classes) specialization. */ gcc_checking_assert (dep->get_entity_kind () == depset::EK_PARTIAL); + + /* Only emit GM entities if reached. */ + if (!DECL_LANG_SPECIFIC (inner) + || !DECL_MODULE_PURVIEW_P (inner)) + dep->set_flag_bit (); } else - /* It was an explicit specialization, not a partial one. */ - gcc_checking_assert (dep->get_entity_kind () -== depset::EK_SPECIALIZATION); - - /* Only emit GM entities if reached. */ - if (!DECL_LANG_SPECIFIC (inner) - || !DECL_MODULE_PURVIEW_P (inner)) - dep->set_flag_bit (); + { + /* It was an explicit specialization, not a partial one. +We should have already added this. */ + gcc_checking_assert (dep->get_entity_kind () + == depset::EK_SPECIALIZATION); + gcc_checking_assert (dep->is_special ()); + } } } diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 310e5dfff033..cb3164d49147 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -31684,6 +31684,7 @@ add_mergeable_specialization (bool decl_p, spec_entry *elt, tree decl, DECL_TEMPLATE_SPECIALIZATIONS (elt->tmpl)); TREE_TYPE (cons) = decl_p ? TREE_TYPE (elt->spec) : elt->spec; DECL_TEMPLATE_SPECIALIZATIONS (elt->tmpl) = cons; + set_defining_module_for_partial_spec (STRIP_TEMPLATE (decl)); } } diff --git a/gcc/testsuite/g++.dg/modules/partial-5_a.C b/gcc/testsuite/g++.dg/modules/partial-5_a.C new file mode 100644 index ..768e6995f0ff --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/partial-5_a.C @@ -0,0 +1,9 @@ +// PR c++/116496 +// { dg-additional-options "-fmodules-ts -std=c++20 -Wno-global-module" } +// { dg-module-cmi A } + +module; +template struct S {}; +export module A; +template struct S {}; +template requires false struct S {}; diff --git a/gcc/testsuite/g++.dg/modules/partial-5_b.C b/gcc/testsuite/g++.dg/modules/partial-5_b.C new file mode 100644 index ..95401fe8b562 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/partial-5_b.C @@ -0,0 +1,9 @@ +// PR c++/116496 +// { dg-additional-options "-fmodules-ts -std=c++20 -Wno-global-module" } +// { dg-module-cmi B } + +module; +template struct S {
[gcc r15-3593] RISC-V: Fix vl_used_by_non_rvv_insn logic of vsetvl pass
https://gcc.gnu.org/g:c08e493ceee47bbeb466eeef100be7c1dd01a4e5 commit r15-3593-gc08e493ceee47bbeb466eeef100be7c1dd01a4e5 Author: garthlei Date: Wed Sep 11 17:09:37 2024 +0800 RISC-V: Fix vl_used_by_non_rvv_insn logic of vsetvl pass This patch fixes a bug in the current vsetvl pass. The current pass uses `m_vl` to determine whether the dest operand has been used by non-RVV instructions. However, `m_vl` may have been modified as a result of an `update_avl` call, and thus would be no longer the dest operand of the original instruction. This can lead to incorrect vsetvl eliminations, as is shown in the testcase. In this patch, we create a `dest_vl` variable for this scenerio. gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc: Use `dest_vl` for dest VL operand gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.cc| 16 +++- .../gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c | 17 + 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 017efa8bc17e..ce831685439a 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1002,6 +1002,9 @@ public: void parse_insn (insn_info *insn) { +/* The VL dest of the insn */ +rtx dest_vl = NULL_RTX; + m_insn = insn; m_bb = insn->bb (); /* Return if it is debug insn for the consistency with optimize == 0. */ @@ -1035,7 +1038,10 @@ public: if (m_avl) { if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ()) - m_vl = ::get_vl (insn->rtl ()); + { + m_vl = ::get_vl (insn->rtl ()); + dest_vl = m_vl; + } if (has_nonvlmax_reg_avl ()) m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def (); @@ -1132,22 +1138,22 @@ public: } /* Determine if dest operand(vl) has been used by non-RVV instructions. */ -if (has_vl ()) +if (dest_vl) { const hash_set vl_uses - = get_all_real_uses (get_insn (), REGNO (get_vl ())); + = get_all_real_uses (get_insn (), REGNO (dest_vl)); for (use_info *use : vl_uses) { gcc_assert (use->insn ()->is_real ()); rtx_insn *rinsn = use->insn ()->rtl (); if (!has_vl_op (rinsn) - || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1) + || count_regno_occurrences (rinsn, REGNO (dest_vl)) != 1) { m_vl_used_by_non_rvv_insn = true; break; } rtx avl = ::get_avl (rinsn); - if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl)) + if (!avl || !REG_P (avl) || REGNO (dest_vl) != REGNO (avl)) { m_vl_used_by_non_rvv_insn = true; break; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c new file mode 100644 index ..c155f5613d27 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2 -fdump-rtl-vsetvl-details" } */ + +#include + +uint64_t a[2], b[2]; + +void +foo () +{ + size_t vl = __riscv_vsetvl_e64m1 (2); + vuint64m1_t vx = __riscv_vle64_v_u64m1 (a, vl); + vx = __riscv_vslide1down_vx_u64m1 (vx, 0xull, vl); + __riscv_vse64_v_u64m1 (b, vx, vl); +} + +/* { dg-final { scan-rtl-dump-not "Eliminate insn" "vsetvl" } } */
[gcc r15-3594] i386: Fix incorrect avx512f-mask-type.h include
https://gcc.gnu.org/g:5958279509c4601499ac22629512f1723e6744b4 commit r15-3594-g5958279509c4601499ac22629512f1723e6744b4 Author: Haochen Jiang Date: Tue Sep 3 13:38:36 2024 +0800 i386: Fix incorrect avx512f-mask-type.h include In avx512f-mask-type.h, we need SIZE being defined to get MASK_TYPE defined correctly. Fix those testcases where SIZE are not defined before the include for avv512f-mask-type.h. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10-helper.h: Do not include avx512f-mask-type.h. * gcc.target/i386/avx10_2-512-vaddnepbf16-2.c: Define SIZE and include avx512f-mask-type.h. * gcc.target/i386/avx10_2-512-vcmppbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtnebf162iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttnebf162ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttnebf162iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vdivnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vdpphps-2.c: Ditto. * gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vfpclasspbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vgetexppbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vmaxpbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vminmaxnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vminmaxpd-2.c: Ditto. * gcc.target/i386/avx10_2-512-vminmaxph-2.c: Ditto. * gcc.target/i386/avx10_2-512-vminmaxps-2.c: Ditto. * gcc.target/i386/avx10_2-512-vminpbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vmpsadbw-2.c: Ditto. * gcc.target/i386/avx10_2-512-vmulnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbssd-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbssds-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbsud-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbsuds-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbuud-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpbuuds-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpwsud-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpwsuds-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpwusd-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpwusds-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpwuud-2.c: Ditto. * gcc.target/i386/avx10_2-512-vpdpwuuds-2.c: Ditto. * gcc.target/i386/avx10_2-512-vrcppbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vreducenepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vrndscalenepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vrsqrtpbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vscalefpbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vsqrtnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vsubnepbf16-2.c: Ditto. * gcc.target/i386/avx512fp16-vfpclassph-1b.c: Ditto. Diff: --- gcc/testsuite/gcc.target/i386/avx10-helper.h | 1 - .../gcc.target/i386/avx10_2-512-vaddnepbf16-2.c| 11 ++- .../gcc.target/i386/avx10_2-512-vcmppbf16-2.c | 5 +++-- .../gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c | 16 .../i386/avx10_2-512-vcvtnebf162iubs-2.c | 16 .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c | 16 .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c| 16 ++
[gcc r15-3595] RISC-V: Eliminate latter vsetvl when fused
https://gcc.gnu.org/g:3f212eabbba3edc1827d6da53cf6d5a64c6524f0 commit r15-3595-g3f212eabbba3edc1827d6da53cf6d5a64c6524f0 Author: Bohan Lei Date: Thu Sep 12 10:28:03 2024 +0800 RISC-V: Eliminate latter vsetvl when fused Hi all, A simple assembly check has been added in this version. Previous version: https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662783.html Thanks, Bohan -- The current vsetvl pass eliminates a vsetvl instruction when the previous info is "available," but does not when "compatible." This can lead to not only redundancy, but also incorrect behaviors when the previous info happens to be compatible with a later vector instruction, which ends of using the vsetvl info that should have been eliminated, as is shown in the testcase. This patch eliminates the vsetvl when the previous info is "compatible." gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pre_vsetvl::fuse_local_vsetvl_info): Delete vsetvl insn when `prev_info` is compatible gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.cc | 3 +++ .../gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c| 19 +++ 2 files changed, 22 insertions(+) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index ce831685439a..030ffbe2ebbc 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -2796,6 +2796,9 @@ pre_vsetvl::fuse_local_vsetvl_info () curr_info.dump (dump_file, ""); } m_dem.merge (prev_info, curr_info); + if (!curr_info.vl_used_by_non_rvv_insn_p () + && vsetvl_insn_p (curr_info.get_insn ()->rtl ())) + m_delete_list.safe_push (curr_info); if (curr_info.get_read_vl_insn ()) prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); if (dump_file && (dump_flags & TDF_DETAILS)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c new file mode 100644 index ..04a8ff2945a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2 -fno-schedule-insns -fdump-rtl-vsetvl-details" } */ + +#include + +vuint16m1_t +foo (vuint16m1_t a, vuint16m1_t b, size_t avl) +{ + size_t vl; + vuint16m1_t ret; + uint16_t c = __riscv_vmv_x_s_u16m1_u16(a); + vl = __riscv_vsetvl_e8mf2 (avl); + ret = __riscv_vadd_vx_u16m1 (a, c, avl); + ret = __riscv_vadd_vv_u16m1 (ret, a, vl); + return ret; +} + +/* { dg-final { scan-rtl-dump "Eliminate insn" "vsetvl" } } */ +/* { dg-final { scan-assembler-times {vsetvli} 2 } } */
[gcc r15-3596] i386: Enable V2BF/V4BF vec_cmp with AVX10.2 vcmppbf16
https://gcc.gnu.org/g:89d50c45048e5d7230ddde9afc8fbc83143e34cb commit r15-3596-g89d50c45048e5d7230ddde9afc8fbc83143e34cb Author: Levy Hsu Date: Wed Sep 4 16:34:04 2024 +0930 i386: Enable V2BF/V4BF vec_cmp with AVX10.2 vcmppbf16 gcc/ChangeLog: * config/i386/i386.cc (ix86_get_mask_mode): Enable BFmode for targetm.vectorize.get_mask_mode with AVX10.2. * config/i386/mmx.md (vec_cmpqi): Implement vec_cmpv2bfqi and vec_cmpv4bfqi. gcc/testsuite/ChangeLog: * gcc.target/i386/part-vect-vec_cmpbf.c: New test. Diff: --- gcc/config/i386/i386.cc| 3 ++- gcc/config/i386/mmx.md | 17 ++ .../gcc.target/i386/part-vect-vec_cmpbf.c | 26 ++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 45320124b91c..7dbae1d72e35 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -24682,7 +24682,8 @@ ix86_get_mask_mode (machine_mode data_mode) /* AVX512FP16 only supports vector comparison to kmask for _Float16. */ || (TARGET_AVX512VL && TARGET_AVX512FP16 - && GET_MODE_INNER (data_mode) == E_HFmode)) + && GET_MODE_INNER (data_mode) == E_HFmode) + || (TARGET_AVX10_2_256 && GET_MODE_INNER (data_mode) == E_BFmode)) { if (elem_size == 4 || elem_size == 8 diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 4bc191b874b3..2f8d958dd5f0 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2290,6 +2290,23 @@ DONE; }) +;;This instruction does not generate floating point exceptions +(define_expand "vec_cmpqi" + [(set (match_operand:QI 0 "register_operand") + (match_operator:QI 1 "" + [(match_operand:VBF_32_64 2 "register_operand") + (match_operand:VBF_32_64 3 "nonimmediate_operand")]))] + "TARGET_AVX10_2_256" +{ + rtx op2 = lowpart_subreg (V8BFmode, +force_reg (mode, operands[2]), mode); + rtx op3 = lowpart_subreg (V8BFmode, +force_reg (mode, operands[3]), mode); + + emit_insn (gen_vec_cmpv8bfqi (operands[0], operands[1], op2, op3)); + DONE; +}) + ; ;; ;; Parallel half-precision floating point rounding operations. diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c new file mode 100644 index ..0bb720b64324 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-final { scan-assembler-times "vcmppbf16" 10 } } */ + +typedef __bf16 __attribute__((__vector_size__ (4))) v2bf; +typedef __bf16 __attribute__((__vector_size__ (8))) v4bf; + + +#define VCMPMN(type, op, name) \ +type \ +__attribute__ ((noinline, noclone)) \ +vec_cmp_##type##type##name (type a, type b) \ +{ \ + return a op b; \ +} + +VCMPMN (v4bf, <, lt) +VCMPMN (v2bf, <, lt) +VCMPMN (v4bf, <=, le) +VCMPMN (v2bf, <=, le) +VCMPMN (v4bf, >, gt) +VCMPMN (v2bf, >, gt) +VCMPMN (v4bf, >=, ge) +VCMPMN (v2bf, >=, ge) +VCMPMN (v4bf, ==, eq) +VCMPMN (v2bf, ==, eq)