[gcc r14-10662] libstdc++: std::string move assignment should not use POCCA trait [PR116641]

2024-09-11 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:d5d6d3ff43c5166ead1787c4334553be26cc84da

commit r14-10662-gd5d6d3ff43c5166ead1787c4334553be26cc84da
Author: Jonathan Wakely 
Date:   Tue Sep 10 14:25:41 2024 +0100

libstdc++: std::string move assignment should not use POCCA trait [PR116641]

The changes to implement LWG 2579 (r10-327-gdb33efde17932f) made
std::string::assign use the propagate_on_container_copy_assignment
(POCCA) trait, for consistency with operator=(const basic_string&).
However, this also unintentionally affected operator=(basic_string&&)
which calls assign(str) to make a deep copy when performing a move is
not possible. The fix is for the move assignment operator to call
_M_assign(str) instead of assign(str), as this just does the deep copy
and doesn't check the POCCA trait first.

The bug only affects the unlikely/useless combination of POCCA==true and
POCMA==false, but we should fix it for correctness anyway. it should
also make move assignment slightly cheaper to compile and execute,
because we skip the extra code in assign(const basic_string&).

libstdc++-v3/ChangeLog:

PR libstdc++/116641
* include/bits/basic_string.h (operator=(basic_string&&)): Call
_M_assign instead of assign.
* testsuite/21_strings/basic_string/allocator/116641.cc: New
test.

(cherry picked from commit c07cf418fdde0c192e370a8d76a991cc7215e9c4)

Diff:
---
 libstdc++-v3/include/bits/basic_string.h   |  2 +-
 .../21_strings/basic_string/allocator/116641.cc| 53 ++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index 8a695a494efd..2794ec6419ac 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -912,7 +912,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
  __str._M_data(__str._M_use_local_data());
  }
else // Need to do a deep copy
- assign(__str);
+ _M_assign(__str);
__str.clear();
return *this;
   }
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/allocator/116641.cc 
b/libstdc++-v3/testsuite/21_strings/basic_string/allocator/116641.cc
new file mode 100644
index ..a1a411b87faa
--- /dev/null
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/allocator/116641.cc
@@ -0,0 +1,53 @@
+// { dg-do run { target c++11 } }
+// { dg-require-effective-target cxx11_abi }
+
+// Bug 116641 - std::string move assignment incorrectly depends on POCCA
+
+#include 
+#include 
+
+template
+struct Alloc
+{
+  using value_type = T;
+  using propagate_on_container_swap = std::false_type;
+  using propagate_on_container_copy_assignment = std::true_type;
+  using propagate_on_container_move_assignment = std::false_type;
+
+  Alloc(int id) : id(id) { }
+
+  template
+Alloc(const Alloc& a) : id(a.id) { }
+
+  T* allocate(unsigned long n)
+  { return std::allocator().allocate(n); }
+
+  void deallocate(T* p, unsigned long n)
+  { std::allocator().deallocate(p, n); }
+
+  Alloc& operator=(const Alloc&) { throw; }
+
+  bool operator==(const Alloc& a) const { return id == a.id; }
+  bool operator!=(const Alloc& a) const { return id != a.id; }
+
+  int id;
+};
+
+void
+test_pr116641()
+{
+  Alloc a1(1), a2(2);
+  std::basic_string, Alloc> s1(a1), s2(a2);
+
+  s1 = "allocator should not propagate on move assignment";
+  VERIFY( s1.get_allocator() == a1 );
+  VERIFY( s2.get_allocator() == a2 );
+  s2 = std::move(s1);
+  VERIFY( s1.get_allocator() == a1 );
+  VERIFY( s2.get_allocator() == a2 );
+}
+
+int main()
+{
+  test_pr116641();
+}


[gcc r14-10663] libstdc++: Only use std::ios_base_library_init() for ELF [PR116159]

2024-09-11 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:2003f890b13b8ec35b6112fc13c7e69e61cd9162

commit r14-10663-g2003f890b13b8ec35b6112fc13c7e69e61cd9162
Author: Jonathan Wakely 
Date:   Tue Sep 10 14:36:26 2024 +0100

libstdc++: Only use std::ios_base_library_init() for ELF [PR116159]

The undefined std::ios_base_library_init() symbol that is referenced by
 is only supposed to be used for targets where symbol
versioning is supported.

The mingw-w64 target defaults to --enable-symvers=gnu due to using GNU
ld but doesn't actually support symbol versioning. This means it tries
to emit references to the std::ios_base_library_init() symbol, which
isn't really defined in the library. This causes problems when using lld
to link user binaries.

Disable the undefined symbol reference for non-ELF targets.

libstdc++-v3/ChangeLog:

PR libstdc++/116159
* include/std/iostream (ios_base_library_init): Only define for
ELF targets.
* src/c++98/ios_init.cc (ios_base_library_init): Likewise.

(cherry picked from commit fc7a1fb0238e379d466316aa219734ac61f4bc0e)

Diff:
---
 libstdc++-v3/include/std/iostream  | 2 +-
 libstdc++-v3/src/c++98/ios_init.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/iostream 
b/libstdc++-v3/include/std/iostream
index 0c6a2d8a4b30..208fd1d51381 100644
--- a/libstdc++-v3/include/std/iostream
+++ b/libstdc++-v3/include/std/iostream
@@ -78,7 +78,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #if !(_GLIBCXX_USE_INIT_PRIORITY_ATTRIBUTE \
   && __has_attribute(__init_priority__))
   static ios_base::Init __ioinit;
-#elif defined(_GLIBCXX_SYMVER_GNU)
+#elif defined(_GLIBCXX_SYMVER_GNU) && defined(__ELF__)
   __extension__ __asm (".globl _ZSt21ios_base_library_initv");
 #endif
 
diff --git a/libstdc++-v3/src/c++98/ios_init.cc 
b/libstdc++-v3/src/c++98/ios_init.cc
index 1422e20d9405..6e2e5014cf0f 100644
--- a/libstdc++-v3/src/c++98/ios_init.cc
+++ b/libstdc++-v3/src/c++98/ios_init.cc
@@ -199,7 +199,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 return __ret;
   }
 
-#ifdef _GLIBCXX_SYMVER_GNU
+#if defined(_GLIBCXX_SYMVER_GNU) && defined(__ELF__)
 #pragma GCC diagnostic ignored "-Wattribute-alias"
 
   void ios_base_library_init (void)


[gcc r15-3581] fortran/openmp.cc: Fix var init and locus use to avoid uninit values [PR fortran/116661]

2024-09-11 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:4e9265a474def98cb6cdb59c15fbcb7630ba330e

commit r15-3581-g4e9265a474def98cb6cdb59c15fbcb7630ba330e
Author: Tobias Burnus 
Date:   Wed Sep 11 09:25:47 2024 +0200

fortran/openmp.cc: Fix var init and locus use to avoid uninit values [PR 
fortran/116661]

gcc/fortran/ChangeLog:

PR fortran/116661
* openmp.cc (gfc_match_omp_prefer_type): NULL init a gfc_expr
variable and use right locus in gfc_error.

Diff:
---
 gcc/fortran/openmp.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index c04d8b0f5281..1145e2ff8900 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -1860,6 +1860,7 @@ gfc_match_omp_prefer_type (char **pref_str, int 
*pref_str_len, int **pref_int_ar
  }
fr_found = true;
gfc_symbol *sym = NULL;
+   e = NULL;
locus loc = gfc_current_locus;
if (gfc_match_symbol (&sym, 0) != MATCH_YES
|| gfc_match (" _") == MATCH_YES)
@@ -1881,7 +1882,7 @@ gfc_match_omp_prefer_type (char **pref_str, int 
*pref_str_len, int **pref_int_ar
  {
gfc_error ("Expected constant integer identifier or "
   "non-empty default-kind character literal at %L",
-  &e->where);
+  &loc);
gfc_free_expr (e);
return MATCH_ERROR;
  }


[gcc r15-3582] OpenMP: Add interop routines to omp_runtime_api_procname

2024-09-11 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:6291f25631500c2d1c2328f919aa4405c3837f02

commit r15-3582-g6291f25631500c2d1c2328f919aa4405c3837f02
Author: Tobias Burnus 
Date:   Wed Sep 11 12:02:24 2024 +0200

OpenMP: Add interop routines to omp_runtime_api_procname

gcc/
* omp-general.cc (omp_runtime_api_procname): Add
omp_get_interop_{int,name,ptr,rc_desc,str,type_desc}
and omp_get_num_interop_properties.

Diff:
---
 gcc/omp-general.cc | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 0b61335dba40..aaa179afe13e 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -3260,7 +3260,10 @@ omp_runtime_api_procname (const char *name)
   "alloc",
   "calloc",
   "free",
+  "get_interop_int",
+  "get_interop_ptr",
   "get_mapped_ptr",
+  "get_num_interop_properties",
   "realloc",
   "target_alloc",
   "target_associate_ptr",
@@ -3289,6 +3292,10 @@ omp_runtime_api_procname (const char *name)
   "get_device_num",
   "get_dynamic",
   "get_initial_device",
+  "get_interop_name",
+  "get_interop_rc_desc",
+  "get_interop_str",
+  "get_interop_type_desc",
   "get_level",
   "get_max_active_levels",
   "get_max_task_priority",


[gcc r15-3583] c++: Ensure ANNOTATE_EXPRs remain outermost expressions in conditions [PR116140]

2024-09-11 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:f97d86242b86e4ad2bef3623c97e91481840a210

commit r15-3583-gf97d86242b86e4ad2bef3623c97e91481840a210
Author: Alex Coplan 
Date:   Fri Aug 2 09:52:50 2024 +0100

c++: Ensure ANNOTATE_EXPRs remain outermost expressions in conditions 
[PR116140]

For the testcase added with this patch, we would end up losing the:

  #pragma GCC unroll 4

and emitting "warning: ignoring loop annotation".  That warning comes
from tree-cfg.cc:replace_loop_annotate, and means that we failed to
process the ANNOTATE_EXPR in tree-cfg.cc:replace_loop_annotate_in_block.
That function walks backwards over the GIMPLE in an exiting BB for a
loop, skipping over the final gcond, and looks for any ANNOTATE_EXPRS
immediately preceding the gcond.

The function documents the following pre-condition:

   /* [...] We assume that the annotations come immediately before the
  condition in BB, if any.  */

now looking at the exiting BB of the loop, we have:

   :
  D.4524 = .ANNOTATE (iftmp.1, 1, 4);
  retval.0 = D.4524;
  if (retval.0 != 0)
goto ; [INV]
  else
goto ; [INV]

and crucially there is an intervening assignment between the gcond and
the preceding .ANNOTATE ifn call.  To see where this comes from, we can
look to the IR given by -fdump-tree-original:

  if (<::operator() (&pred, *first), unroll 4>>>)
goto ;
  else
goto ;

here the problem is that we've wrapped a CLEANUP_POINT_EXPR around the
ANNOTATE_EXPR, meaning the ANNOTATE_EXPR is no longer the outermost
expression in the condition.

The CLEANUP_POINT_EXPR gets added by the following call chain:

finish_while_stmt_cond
 -> maybe_convert_cond
 -> condition_conversion
 -> fold_build_cleanup_point_expr

this patch chooses to fix the issue by first introducing a new helper
class (annotate_saver) to save and restore outer chains of
ANNOTATE_EXPRs and then using it in maybe_convert_cond.

With this patch, we don't get any such warning and the loop gets unrolled as
expected at -O2.

gcc/cp/ChangeLog:

PR libstdc++/116140
* semantics.cc (anotate_saver): New. Use it ...
(maybe_convert_cond): ... here, to ensure any ANNOTATE_EXPRs
remain the outermost expression(s) of the condition.

gcc/testsuite/ChangeLog:

PR libstdc++/116140
* g++.dg/ext/pragma-unroll-lambda.C: New test.

Diff:
---
 gcc/cp/semantics.cc | 88 -
 gcc/testsuite/g++.dg/ext/pragma-unroll-lambda.C | 17 +
 2 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 3e117c216da5..63212afafb3b 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -951,6 +951,86 @@ maybe_warn_unparenthesized_assignment (tree t, bool 
nested_p,
 }
 }
 
+/* Helper class for saving/restoring ANNOTATE_EXPRs.  For a tree node t, users
+   can construct one of these like so:
+
+ annotate_saver s (&t);
+
+   and t will be updated to have any annotations removed.  The user can then
+   transform t, and later restore the ANNOTATE_EXPRs with:
+
+ t = s.restore (t).
+
+   The intent is to ensure that any ANNOTATE_EXPRs remain the outermost
+   expressions following any operations on t.  */
+
+class annotate_saver {
+  /* The chain of saved annotations, if there were any.  Otherwise null.  */
+  tree m_annotations;
+
+  /* If M_ANNOTATIONS is non-null, then M_INNER points to TREE_OPERAND (A, 0)
+ for the innermost annotation A.  */
+  tree *m_inner;
+
+public:
+  annotate_saver (tree *);
+  tree restore (tree);
+};
+
+/* If *COND is an ANNOTATE_EXPR, walk through the chain of annotations, and set
+   *COND equal to the first non-ANNOTATE_EXPR (saving a pointer to the
+   original chain of annotations for later use in restore).  */
+
+annotate_saver::annotate_saver (tree *cond) : m_annotations (nullptr)
+{
+  tree *t = cond;
+  while (TREE_CODE (*t) == ANNOTATE_EXPR)
+t = &TREE_OPERAND (*t, 0);
+
+  if (t != cond)
+{
+  m_annotations = *cond;
+  *cond = *t;
+  m_inner = t;
+}
+}
+
+/* If we didn't strip any annotations on construction, return NEW_INNER
+   unmodified.  Otherwise, wrap the saved annotations around NEW_INNER 
(updating
+   the types and flags of the annotations if needed) and return the resulting
+   expression.  */
+
+tree
+annotate_saver::restore (tree new_inner)
+{
+  if (!m_annotations)
+return new_inner;
+
+  /* If the type of the inner expression changed, we need to update the types
+ of all the ANNOTATE_EXPRs.  We may need to update the flags too, but we
+ assume they only change if the type of the inner expression changes.
+ The flag update logic assumes that the other operands to the
+ ANNOTATE_EXPRs are always INTEGER_CS

[gcc r15-3584] testsuite: Ensure ltrans dump files get cleaned up properly [PR116140]

2024-09-11 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:31ff173c70847bba94613eac5b1ef2c0bec842e6

commit r15-3584-g31ff173c70847bba94613eac5b1ef2c0bec842e6
Author: Alex Coplan 
Date:   Thu Aug 8 13:15:39 2024 +

testsuite: Ensure ltrans dump files get cleaned up properly [PR116140]

I noticed while working on a test that uses LTO and requests a dump
file, that we are failing to cleanup ltrans dump files in the testsuite.

E.g. the test I was working on compiles with -flto
-fdump-rtl-loop2_unroll, and we end up with the following file:

./gcc/testsuite/g++/pr116140.ltrans0.ltrans.287r.loop2_unroll

being left behind by the testsuite.  This is problematic not just from a
"missing cleanup" POV, but also because it can cause the test to pass
spuriously when the test is re-run wtih an unpatched compiler (without
the bug fix).  In the broken case, loop2_unroll isn't run at all, so we
end up scanning the old dumpfile (from the previous test run) and making
the dumpfile scan pass.

Running with `-v -v` in RUNTESTFLAGS we can see the following cleanup
attempt is made:

remove-build-file 
`pr116140.{C,exe}.{ltrans[0-9]*.,}[0-9][0-9][0-9]{l,i,r,t}.*'

looking again at the ltrans dump file above we can see this will fail for 
two
reasons:

 - The actual dump file has no {C,exe} extension between the basename and
   ltrans0.
 - The actual dump file has an additional `.ltrans` component after 
`.ltrans0`.

This patch therefore relaxes the pattern constructed for cleaning up such
dumpfiles to also match dumpfiles with the above form.

Running the testsuite before/after this patch shows the number of files in
gcc/testsuite (in the build dir) with "ltrans" in the name goes from 1416 
to 62
on aarch64.

gcc/testsuite/ChangeLog:

PR libstdc++/116140
* lib/gcc-dg.exp (schedule-cleanups): Relax ltrans dumpfile
cleanup pattern to handle missing cases.

Diff:
---
 gcc/testsuite/lib/gcc-dg.exp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp
index d9513e2859ce..cb401a704359 100644
--- a/gcc/testsuite/lib/gcc-dg.exp
+++ b/gcc/testsuite/lib/gcc-dg.exp
@@ -190,7 +190,7 @@ proc schedule-cleanups { opts } {
 # Handle ltrans files around -flto
 if [regexp -- {(^|\s+)-flto(\s+|$)} $opts] {
verbose "Cleanup -flto seen" 4
-   set ltrans "{ltrans\[0-9\]*.,}"
+   set ltrans "{ltrans\[0-9\]*{.ltrans,}.,}"
 } else {
set ltrans ""
 }
@@ -206,7 +206,7 @@ proc schedule-cleanups { opts } {
if {$basename_ext != ""} {
regsub -- {^.*\.} $basename_ext {} basename_ext
}
-   lappend tfiles "$stem.{$basename_ext,exe}"
+   lappend tfiles "$stem{.$basename_ext,.exe,}"
unset basename_ext
} else {
lappend tfiles $basename


[gcc r15-3585] lto: Stream has_unroll flag during LTO [PR116140]

2024-09-11 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:9759f6299d9633cabac540e5c893341c708093ac

commit r15-3585-g9759f6299d9633cabac540e5c893341c708093ac
Author: Alex Coplan 
Date:   Sat Aug 3 17:02:36 2024 +

lto: Stream has_unroll flag during LTO [PR116140]

When #pragma GCC unroll is processed in
tree-cfg.cc:replace_loop_annotate_in_block, we set both the loop->unroll
field (which is currently streamed out and back in during LTO) but also
the cfun->has_unroll flag.

cfun->has_unroll, however, is not currently streamed during LTO.  This
patch fixes that.

Prior to this patch, loops marked with #pragma GCC unroll that would be
unrolled by RTL loop2_unroll in a non-LTO compilation didn't get
unrolled under LTO.

gcc/ChangeLog:

PR libstdc++/116140
* lto-streamer-in.cc (input_struct_function_base): Stream in
fn->has_unroll.
* lto-streamer-out.cc (output_struct_function_base): Stream out
fn->has_unroll.

gcc/testsuite/ChangeLog:

PR libstdc++/116140
* g++.dg/ext/pragma-unroll-lambda-lto.C: New test.

Diff:
---
 gcc/lto-streamer-in.cc |  1 +
 gcc/lto-streamer-out.cc|  1 +
 .../g++.dg/ext/pragma-unroll-lambda-lto.C  | 32 ++
 3 files changed, 34 insertions(+)

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 64f758073280..9d0ec5d589c4 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1326,6 +1326,7 @@ input_struct_function_base (struct function *fn, class 
data_in *data_in,
   fn->has_force_vectorize_loops = bp_unpack_value (&bp, 1);
   fn->has_simduid_loops = bp_unpack_value (&bp, 1);
   fn->has_musttail = bp_unpack_value (&bp, 1);
+  fn->has_unroll = bp_unpack_value (&bp, 1);
   fn->assume_function = bp_unpack_value (&bp, 1);
   fn->va_list_fpr_size = bp_unpack_value (&bp, 8);
   fn->va_list_gpr_size = bp_unpack_value (&bp, 8);
diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc
index a4b171358d41..807b935537be 100644
--- a/gcc/lto-streamer-out.cc
+++ b/gcc/lto-streamer-out.cc
@@ -2283,6 +2283,7 @@ output_struct_function_base (struct output_block *ob, 
struct function *fn)
   bp_pack_value (&bp, fn->has_force_vectorize_loops, 1);
   bp_pack_value (&bp, fn->has_simduid_loops, 1);
   bp_pack_value (&bp, fn->has_musttail, 1);
+  bp_pack_value (&bp, fn->has_unroll, 1);
   bp_pack_value (&bp, fn->assume_function, 1);
   bp_pack_value (&bp, fn->va_list_fpr_size, 8);
   bp_pack_value (&bp, fn->va_list_gpr_size, 8);
diff --git a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C 
b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
new file mode 100644
index ..144c4c326924
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
@@ -0,0 +1,32 @@
+// { dg-do link { target c++11 } }
+// { dg-options "-O2 -flto -fdump-rtl-loop2_unroll" }
+
+#include 
+
+template
+inline Iter
+my_find(Iter first, Iter last, Pred pred)
+{
+#pragma GCC unroll 4
+while (first != last && !pred(*first))
+++first;
+return first;
+}
+
+__attribute__((noipa))
+short *use_find(short *p)
+{
+auto pred = [](short x) { return x == 42; };
+return my_find(p, p + 1024, pred);
+}
+
+int main(void)
+{
+  short a[1024];
+  for (int i = 0; i < 1024; i++)
+a[i] = rand ();
+
+  return use_find (a) - a;
+}
+
+// { dg-final { scan-ltrans-rtl-dump-times "Unrolled loop 3 times" 1 
"loop2_unroll" } }


[gcc r15-3586] libstdc++: Restore unrolling in std::find using pragma [PR116140]

2024-09-11 Thread Alex Coplan via Libstdc++-cvs
https://gcc.gnu.org/g:3fd07d4f04f43816a038daf9b16c6d5bf2e96c9b

commit r15-3586-g3fd07d4f04f43816a038daf9b16c6d5bf2e96c9b
Author: Alex Coplan 
Date:   Fri Aug 2 09:56:07 2024 +0100

libstdc++: Restore unrolling in std::find using pragma [PR116140]

Together with the preparatory compiler patches, this patch restores
unrolling in std::__find_if, but this time relying on the compiler to do
it by using:

  #pragma GCC unroll 4

which should restore the majority of the regression relative to the
hand-unrolled version while still being vectorizable with WIP alignment
peeling enhancements.

On Neoverse V1 with LTO, this reduces the regression in xalancbmk (from
SPEC CPU 2017) from 5.8% to 1.7% (restoring ~71% of the lost
performance).

libstdc++-v3/ChangeLog:

PR libstdc++/116140
* include/bits/stl_algobase.h (std::__find_if): Add #pragma to
request GCC to unroll the loop.

Diff:
---
 libstdc++-v3/include/bits/stl_algobase.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libstdc++-v3/include/bits/stl_algobase.h 
b/libstdc++-v3/include/bits/stl_algobase.h
index 27f6c377ad6f..f13662fc4482 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -2104,6 +2104,7 @@ _GLIBCXX_END_NAMESPACE_ALGO
 inline _Iterator
 __find_if(_Iterator __first, _Iterator __last, _Predicate __pred)
 {
+#pragma GCC unroll 4
   while (__first != __last && !__pred(__first))
++__first;
   return __first;


[gcc r15-3587] tree-optimization/116674 - vectorizable_simd_clone_call and re-analysis

2024-09-11 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:09a514fbb67caf7e33a6ceddf524ee21024c33c5

commit r15-3587-g09a514fbb67caf7e33a6ceddf524ee21024c33c5
Author: Richard Biener 
Date:   Wed Sep 11 13:54:33 2024 +0200

tree-optimization/116674 - vectorizable_simd_clone_call and re-analysis

When SLP analysis scraps an instance because it fails to analyze we
can end up calling vectorizable_* in analysis mode on a node that
was analyzed during the analysis of that instance again.
vectorizable_simd_clone_call wasn't expecting that and instead
guarded analysis/transform code on populated data structures.
The following changes it so it survives re-analysis.

PR tree-optimization/116674
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Support
re-analysis.

* g++.dg/vect/pr116674.cc: New testcase.

Diff:
---
 gcc/testsuite/g++.dg/vect/pr116674.cc | 85 +++
 gcc/tree-vect-stmts.cc|  8 ++--
 2 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/vect/pr116674.cc 
b/gcc/testsuite/g++.dg/vect/pr116674.cc
new file mode 100644
index ..1c13f12290bc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr116674.cc
@@ -0,0 +1,85 @@
+// { dg-do compile }
+// { dg-require-effective-target c++11 }
+// { dg-additional-options "-Ofast" }
+// { dg-additional-options "-march=x86-64-v3" { target { x86_64-*-* i?86-*-* } 
} }
+
+namespace std {
+typedef int a;
+template  struct b;
+template  class aa {};
+template  c d(c e, c) { return e; }
+template  struct b> {
+   using f = c;
+   using g = c *;
+   template  using j = aa;
+};
+} // namespace std
+namespace l {
+template  struct m : std::b {
+   typedef std::b n;
+   typedef typename n::f &q;
+   template  struct ac { typedef typename n::j ad; };
+};
+} // namespace l
+namespace std {
+template  struct o {
+   typedef typename l::m::ac::ad ae;
+   typedef typename l::m::g g;
+   struct p {
+   g af;
+   };
+   struct ag : p {
+   ag(ae) {}
+   };
+   typedef ab u;
+   o(a, u e) : ah(e) {}
+   ag ah;
+};
+template > class r : o {
+   typedef o s;
+   typedef typename s::ae ae;
+   typedef l::m w;
+
+public:
+   c f;
+   typedef typename w::q q;
+   typedef a t;
+   typedef ab u;
+   r(t x, u e = u()) : s(ai(x, e), e) {}
+   q operator[](t x) { return *(this->ah.af + x); }
+   t ai(t x, u) { return x; }
+};
+extern "C" __attribute__((__simd__)) double exp(double);
+} // namespace std
+using namespace std;
+int ak;
+double v, y;
+void am(double, int an, double, double, double, double, double, double, double,
+   double, double, double, int, double, double, double, double,
+   r ap, double, double, double, double, double, double, double,
+   double, r ar, r as, double, double, r at,
+   r au, r av, double, double) {
+double ba;
+for (int k;;)
+  for (int i; i < an; ++i) {
+ y = i;
+ v = d(y, 25.0);
+ ba = exp(v);
+ ar[i * (ak + 1)] = ba;
+ as[i * (ak + 1)] = ar[i * (ak + 1)];
+ if (k && ap[k]) {
+ at[i * (ak + 1)] = av[i * (ak + 1)] = as[i * (ak + 1)];
+ au[i * (ak + 1)] = ar[i * (ak + 1)];
+ } else {
+ au[i * (ak + 1)] = ba;
+ at[i * (ak + 1)] = av[i * (ak + 1)] = k;
+ }
+  }
+}
+void b(int bc) {
+double bd, be, bf, bg, bh, ao, ap, bn, bo, bp, bq, br, bs, bt, bu, bv, bw, 
bx,
+by, aq, ar, as, bz, ca, at, au, av, cb, aw;
+int bi;
+am(bh, bc, bi, bi, bi, bi, bv, bw, bx, by, bu, bt, bi, ao, bn, bo, bp, ap, 
bq,
+   br, bs, bd, be, bf, bg, aq, ar, as, bz, ca, at, au, av, cb, aw);
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f6c5b7a7e872..b1353c91fce1 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3985,6 +3985,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
 
   vec& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
: STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
+  if (!vec_stmt)
+simd_clone_info.truncate (0);
   arginfo.reserve (nargs, true);
   auto_vec slp_op;
   slp_op.safe_grow_cleared (nargs);
@@ -4033,10 +4035,10 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
 
   /* For linear arguments, the analyze phase should have saved
 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO.  */
-  if (i * 3 + 4 <= simd_clone_info.length ()
+  if (vec_stmt
+ && i * 3 + 4 <= simd_clone_info.length ()
  && simd_clone_info[i * 3 + 2])
{
- gcc_assert (vec_stmt);
  thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
  thisarginfo.op = simd_clone_info[i * 3 + 1];
  thisarginfo.simd_la

[gcc r15-3588] arm: avoid indirect sibcalls when IP is live [PR116597]

2024-09-11 Thread Richard Earnshaw via Gcc-cvs
https://gcc.gnu.org/g:670cfd5fe6433ee8f2e86eedb197d2523dbb033b

commit r15-3588-g670cfd5fe6433ee8f2e86eedb197d2523dbb033b
Author: Richard Earnshaw 
Date:   Wed Aug 21 16:15:34 2024 +0100

arm: avoid indirect sibcalls when IP is live [PR116597]

On Arm only r0-r3 (the argument registers) and IP are available for
use as an address for an indirect sibcall.  But if all the argument
registers are used and IP is clobbered during the epilogue, or is used
to pass closure information, then there is no spare register to hold
the address and we must reject the sibcall.

arm_function_ok_for_sibcall did try to handle this, but it did this by
examining the function declaration.  That doesn't work if the function
has no prototype, or if the prototype has variadic arguments: we must,
instead, look at the list of actuals for the call rather than the list
of formals.

The old code also worked by laying out all the arguments and then
trying to add one more integer argument at the end of the list, but
this missed a corner case where a hole had been left in the argument
register list due to argument alignment.

We fix all of this by now scanning the list of actual values to be
passed and then checking if a core register has been assigned to that
argument.  If it has, then we record which registers were assigned.
Once done we then look to see if all the argument registers have been
assigned and only block the sibcall if that is the case.  This permits
us to sibcall:

int (*d)(int, ...);
int g(void);
int i () { return d(g(), 2LL);}

because r1 remains free (the 2LL argument is passed in {r2,r3}).

gcc/
PR target/116597
* config/arm/arm.cc (arm_function_ok_for_sibcall): Use the list of
actuals for the call, not the list of formals.

gcc/testsuite/
PR target/116597
* gcc.target/arm/pac-sibcall-2.c: New test.
* gcc.target/arm/pac-sibcall-3.c: New test.

Diff:
---
 gcc/config/arm/arm.cc| 38 
 gcc/testsuite/gcc.target/arm/pac-sibcall-2.c | 14 ++
 gcc/testsuite/gcc.target/arm/pac-sibcall-3.c | 14 ++
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 17485447693b..de34e9867e67 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -8007,10 +8007,11 @@ arm_function_ok_for_sibcall (tree decl, tree exp)
   && DECL_WEAK (decl))
 return false;
 
-  /* We cannot tailcall an indirect call by descriptor if all the 
call-clobbered
- general registers are live (r0-r3 and ip).  This can happen when:
-  - IP contains the static chain, or
-  - IP is needed for validating the PAC signature.  */
+  /* Indirect tailcalls need a call-clobbered register to hold the function
+ address.  But we only have r0-r3 and ip in that class.  If r0-r3 all hold
+ function arguments, then we can only use IP.  But IP may be needed in the
+ epilogue (for PAC validation), or for passing the static chain.  We have
+ to disable the tail call if nothing is available.  */
   if (!decl
   && ((CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
  || arm_current_function_pac_enabled_p()))
@@ -8022,18 +8023,33 @@ arm_function_ok_for_sibcall (tree decl, tree exp)
   arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
   cum_v = pack_cumulative_args (&cum);
 
-  for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
+  tree arg;
+  call_expr_arg_iterator iter;
+  unsigned used_regs = 0;
+
+  /* Layout each actual argument in turn.  If it is allocated to
+core regs, note which regs have been allocated.  */
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
{
- tree type = TREE_VALUE (t);
- if (!VOID_TYPE_P (type))
+ tree type = TREE_TYPE (arg);
+ function_arg_info arg_info (type, /*named=*/true);
+ rtx reg = arm_function_arg (cum_v, arg_info);
+ if (reg && REG_P (reg)
+ && REGNO (reg) <= LAST_ARG_REGNUM)
{
- function_arg_info arg (type, /*named=*/true);
- arm_function_arg_advance (cum_v, arg);
+ /* Avoid any chance of UB here.  We don't care if TYPE
+is very large since it will use up all the argument regs.  */
+ unsigned nregs = MIN (ARM_NUM_REGS2 (GET_MODE (reg), type),
+   LAST_ARG_REGNUM + 1);
+ used_regs |= ((1 << nregs) - 1) << REGNO (reg);
}
+ arm_function_arg_advance (cum_v, arg_info);
}
 
-  function_arg_info arg (integer_type_node, /*named=*/true);
-  if (!arm_function_arg (cum_v, arg))
+  /* We've used all the argument regs, and we know IP is live during the
+epilogue for some re

[gcc r15-3589] ipa: Rename ipa_supports_p to ipa_vr_supported_type_p

2024-09-11 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:323291c29c77e3214f4850129bb8a3d0d8da6a45

commit r15-3589-g323291c29c77e3214f4850129bb8a3d0d8da6a45
Author: Martin Jambor 
Date:   Wed Sep 11 23:53:21 2024 +0200

ipa: Rename ipa_supports_p to ipa_vr_supported_type_p

ipa_supports_p is not a name that captures well what the predicate
determines.  Therefore, this patch renames it to ipa_vr_supported_type_p.

gcc/ChangeLog:

2024-09-06  Martin Jambor  

* ipa-cp.h (ipa_supports_p): Rename to ipa_vr_supported_type_p.
* ipa-cp.cc (ipa_vr_operation_and_type_effects): Adjust called
function name.
(propagate_vr_across_jump_function): Likewise.
* ipa-prop.cc (ipa_compute_jump_functions_for_edge): Likewise.
(ipcp_get_parm_bits): Likewise.

Diff:
---
 gcc/ipa-cp.cc   | 5 +++--
 gcc/ipa-cp.h| 2 +-
 gcc/ipa-prop.cc | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index 56468dc40ee4..a1033b81aefc 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -1649,7 +1649,8 @@ ipa_vr_operation_and_type_effects (vrange &dst_vr,
   enum tree_code operation,
   tree dst_type, tree src_type)
 {
-  if (!ipa_supports_p (dst_type) || !ipa_supports_p (src_type))
+  if (!ipa_vr_supported_type_p (dst_type)
+  || !ipa_vr_supported_type_p (src_type))
 return false;
 
   range_op_handler handler (operation);
@@ -2553,7 +2554,7 @@ propagate_vr_across_jump_function (cgraph_edge *cs, 
ipa_jump_func *jfunc,
  ipa_range_set_and_normalize (op_vr, op);
 
  if (!handler
- || !ipa_supports_p (operand_type)
+ || !ipa_vr_supported_type_p (operand_type)
  /* Sometimes we try to fold comparison operators using a
 pointer type to hold the result instead of a boolean
 type.  Avoid trapping in the sanity check in
diff --git a/gcc/ipa-cp.h b/gcc/ipa-cp.h
index 4616c61625ab..ba2ebfede63f 100644
--- a/gcc/ipa-cp.h
+++ b/gcc/ipa-cp.h
@@ -294,7 +294,7 @@ bool values_equal_for_ipcp_p (tree x, tree y);
 /* Return TRUE if IPA supports ranges of TYPE.  */
 
 static inline bool
-ipa_supports_p (tree type)
+ipa_vr_supported_type_p (tree type)
 {
   return irange::supports_p (type) || prange::supports_p (type);
 }
diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index 99ebd6229ec4..78d1fb7086d5 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -2392,8 +2392,8 @@ ipa_compute_jump_functions_for_edge (struct 
ipa_func_body_info *fbi,
   else
{
  if (param_type
- && ipa_supports_p (TREE_TYPE (arg))
- && ipa_supports_p (param_type)
+ && ipa_vr_supported_type_p (TREE_TYPE (arg))
+ && ipa_vr_supported_type_p (param_type)
  && get_range_query (cfun)->range_of_expr (vr, arg, cs->call_stmt)
  && !vr.undefined_p ())
{
@@ -5761,7 +5761,7 @@ ipcp_get_parm_bits (tree parm, tree *value, widest_int 
*mask)
   ipcp_transformation *ts = ipcp_get_transformation_summary (cnode);
   if (!ts
   || vec_safe_length (ts->m_vr) == 0
-  || !ipa_supports_p (TREE_TYPE (parm)))
+  || !ipa_vr_supported_type_p (TREE_TYPE (parm)))
 return false;
 
   int i = ts->get_param_index (current_function_decl, parm);


[gcc r15-3590] ipa-cp: One more use of ipa_vr_supported_type_p

2024-09-11 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:f910b02919036647a3f096265cda19358dded628

commit r15-3590-gf910b02919036647a3f096265cda19358dded628
Author: Martin Jambor 
Date:   Wed Sep 11 23:53:21 2024 +0200

ipa-cp: One more use of ipa_vr_supported_type_p

Since we have the predicate, this patch converts one more check for
essentially the same thing into its use.

2024-09-11  Martin Jambor  

* ipa-cp.cc (propagate_vr_across_jump_function): Use
ipa_vr_supported_type_p instead of explicit check for integral and
pointer types.

Diff:
---
 gcc/ipa-cp.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index a1033b81aefc..fa7bd6a15da7 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -2519,8 +2519,7 @@ propagate_vr_across_jump_function (cgraph_edge *cs, 
ipa_jump_func *jfunc,
 return false;
 
   if (!param_type
-  || (!INTEGRAL_TYPE_P (param_type)
- && !POINTER_TYPE_P (param_type)))
+  || !ipa_vr_supported_type_p (param_type))
 return dest_lat->set_to_bottom ();
 
   if (jfunc->type == IPA_JF_PASS_THROUGH)


[gcc r15-3592] c++/modules: Really always track partial specialisations [PR116496]

2024-09-11 Thread Nathaniel Shead via Gcc-cvs
https://gcc.gnu.org/g:ba393bf8879e5cf1f917bd88246d6b80ac081052

commit r15-3592-gba393bf8879e5cf1f917bd88246d6b80ac081052
Author: Nathaniel Shead 
Date:   Wed Sep 11 22:41:21 2024 +1000

c++/modules: Really always track partial specialisations [PR116496]

My last fix for this issue (PR c++/114947, r15-810) didn't go far
enough; I had assumed that the issue where we lost track of partial
specialisations we would need to walk again later was limited to
partitions (where we always re-walk all specialisations), but the linked
PR is the same cause but for header units, and it is possible to
construct test cases exposing the same bug just for normal modules.

As such this patch just unconditionally ensures that whenever we modify
DECL_TEMPLATE_SPECIALIZATIONS we also track any partial specialisations
that might have added.

Also clean up a couple of comments and assertions to make expected state
more obvious when processing these specs.

PR c++/116496

gcc/cp/ChangeLog:

* module.cc (trees_in::decl_value): Don't call
set_defining_module_for_partial_spec here.
(depset::hash::add_partial_entities): Clarity assertions.
* pt.cc (add_mergeable_specialization): Always call
set_defining_module_for_partial_spec when adding a partial spec.

gcc/testsuite/ChangeLog:

* g++.dg/modules/partial-5_a.C: New test.
* g++.dg/modules/partial-5_b.C: New test.

Signed-off-by: Nathaniel Shead 

Diff:
---
 gcc/cp/module.cc   | 25 -
 gcc/cp/pt.cc   |  1 +
 gcc/testsuite/g++.dg/modules/partial-5_a.C |  9 +
 gcc/testsuite/g++.dg/modules/partial-5_b.C |  9 +
 4 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index dc0e9e5520f9..f5df9e875d3a 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -8434,11 +8434,6 @@ trees_in::decl_value ()
  add_mergeable_specialization (!is_type, &spec, decl, spec_flags);
}
 
-  /* When making a CMI from a partition we're going to need to walk partial
-specializations again, so make sure they're tracked.  */
-  if (state->is_partition () && (spec_flags & 2))
-   set_defining_module_for_partial_spec (inner);
-
   if (NAMESPACE_SCOPE_P (decl)
  && (mk == MK_named || mk == MK_unique
  || mk == MK_enum || mk == MK_friend_spec)
@@ -13356,16 +13351,20 @@ depset::hash::add_partial_entities (vec 
*partial_classes)
 specialization.  */
  gcc_checking_assert (dep->get_entity_kind ()
   == depset::EK_PARTIAL);
+
+ /* Only emit GM entities if reached.  */
+ if (!DECL_LANG_SPECIFIC (inner)
+ || !DECL_MODULE_PURVIEW_P (inner))
+   dep->set_flag_bit ();
}
   else
-   /* It was an explicit specialization, not a partial one.  */
-   gcc_checking_assert (dep->get_entity_kind ()
-== depset::EK_SPECIALIZATION);
-
-  /* Only emit GM entities if reached.  */
-  if (!DECL_LANG_SPECIFIC (inner)
- || !DECL_MODULE_PURVIEW_P (inner))
-   dep->set_flag_bit ();
+   {
+ /* It was an explicit specialization, not a partial one.
+We should have already added this.  */
+ gcc_checking_assert (dep->get_entity_kind ()
+  == depset::EK_SPECIALIZATION);
+ gcc_checking_assert (dep->is_special ());
+   }
 }
 }
 
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 310e5dfff033..cb3164d49147 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -31684,6 +31684,7 @@ add_mergeable_specialization (bool decl_p, spec_entry 
*elt, tree decl,
 DECL_TEMPLATE_SPECIALIZATIONS (elt->tmpl));
   TREE_TYPE (cons) = decl_p ? TREE_TYPE (elt->spec) : elt->spec;
   DECL_TEMPLATE_SPECIALIZATIONS (elt->tmpl) = cons;
+  set_defining_module_for_partial_spec (STRIP_TEMPLATE (decl));
 }
 }
 
diff --git a/gcc/testsuite/g++.dg/modules/partial-5_a.C 
b/gcc/testsuite/g++.dg/modules/partial-5_a.C
new file mode 100644
index ..768e6995f0ff
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/partial-5_a.C
@@ -0,0 +1,9 @@
+// PR c++/116496
+// { dg-additional-options "-fmodules-ts -std=c++20 -Wno-global-module" }
+// { dg-module-cmi A }
+
+module;
+template  struct S {};
+export module A;
+template  struct S {};
+template  requires false struct S {};
diff --git a/gcc/testsuite/g++.dg/modules/partial-5_b.C 
b/gcc/testsuite/g++.dg/modules/partial-5_b.C
new file mode 100644
index ..95401fe8b562
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/partial-5_b.C
@@ -0,0 +1,9 @@
+// PR c++/116496
+// { dg-additional-options "-fmodules-ts -std=c++20 -Wno-global-module" }
+// { dg-module-cmi B }
+
+module;
+template  struct S {

[gcc r15-3593] RISC-V: Fix vl_used_by_non_rvv_insn logic of vsetvl pass

2024-09-11 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:c08e493ceee47bbeb466eeef100be7c1dd01a4e5

commit r15-3593-gc08e493ceee47bbeb466eeef100be7c1dd01a4e5
Author: garthlei 
Date:   Wed Sep 11 17:09:37 2024 +0800

RISC-V: Fix vl_used_by_non_rvv_insn logic of vsetvl pass

This patch fixes a bug in the current vsetvl pass.  The current pass uses
`m_vl` to determine whether the dest operand has been used by non-RVV
instructions.  However, `m_vl` may have been modified as a result of an
`update_avl` call, and thus would be no longer the dest operand of the
original instruction.  This can lead to incorrect vsetvl eliminations, as is
shown in the testcase.  In this patch, we create a `dest_vl` variable for
this scenerio.

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc: Use `dest_vl` for dest VL operand

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc| 16 +++-
 .../gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c  | 17 +
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 017efa8bc17e..ce831685439a 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1002,6 +1002,9 @@ public:
 
   void parse_insn (insn_info *insn)
   {
+/* The VL dest of the insn */
+rtx dest_vl = NULL_RTX;
+
 m_insn = insn;
 m_bb = insn->bb ();
 /* Return if it is debug insn for the consistency with optimize == 0.  */
@@ -1035,7 +1038,10 @@ public:
 if (m_avl)
   {
if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
- m_vl = ::get_vl (insn->rtl ());
+ {
+   m_vl = ::get_vl (insn->rtl ());
+   dest_vl = m_vl;
+ }
 
if (has_nonvlmax_reg_avl ())
  m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
@@ -1132,22 +1138,22 @@ public:
   }
 
 /* Determine if dest operand(vl) has been used by non-RVV instructions.  */
-if (has_vl ())
+if (dest_vl)
   {
const hash_set vl_uses
- = get_all_real_uses (get_insn (), REGNO (get_vl ()));
+ = get_all_real_uses (get_insn (), REGNO (dest_vl));
for (use_info *use : vl_uses)
  {
gcc_assert (use->insn ()->is_real ());
rtx_insn *rinsn = use->insn ()->rtl ();
if (!has_vl_op (rinsn)
-   || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1)
+   || count_regno_occurrences (rinsn, REGNO (dest_vl)) != 1)
  {
m_vl_used_by_non_rvv_insn = true;
break;
  }
rtx avl = ::get_avl (rinsn);
-   if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl))
+   if (!avl || !REG_P (avl) || REGNO (dest_vl) != REGNO (avl))
  {
m_vl_used_by_non_rvv_insn = true;
break;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c
new file mode 100644
index ..c155f5613d27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2 -fdump-rtl-vsetvl-details" } 
*/
+
+#include 
+
+uint64_t a[2], b[2];
+
+void
+foo ()
+{
+  size_t vl = __riscv_vsetvl_e64m1 (2);
+  vuint64m1_t vx = __riscv_vle64_v_u64m1 (a, vl);
+  vx = __riscv_vslide1down_vx_u64m1 (vx, 0xull, vl);
+  __riscv_vse64_v_u64m1 (b, vx, vl);
+}
+
+/* { dg-final { scan-rtl-dump-not "Eliminate insn" "vsetvl" } }  */


[gcc r15-3594] i386: Fix incorrect avx512f-mask-type.h include

2024-09-11 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:5958279509c4601499ac22629512f1723e6744b4

commit r15-3594-g5958279509c4601499ac22629512f1723e6744b4
Author: Haochen Jiang 
Date:   Tue Sep 3 13:38:36 2024 +0800

i386: Fix incorrect avx512f-mask-type.h include

In avx512f-mask-type.h, we need SIZE being defined to get
MASK_TYPE defined correctly. Fix those testcases where
SIZE are not defined before the include for avv512f-mask-type.h.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10-helper.h: Do not include
avx512f-mask-type.h.
* gcc.target/i386/avx10_2-512-vaddnepbf16-2.c:
Define SIZE and include avx512f-mask-type.h.
* gcc.target/i386/avx10_2-512-vcmppbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtnebf162iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttnebf162ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttnebf162iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vdivnepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vdpphps-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vfpclasspbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vgetexppbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vmaxpbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vminmaxnepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vminmaxpd-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vminmaxph-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vminmaxps-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vminpbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vmpsadbw-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vmulnepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpbssd-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpbssds-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpbsud-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpbsuds-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpbuud-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpbuuds-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpwsud-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpwsuds-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpwusd-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpwusds-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpwuud-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vpdpwuuds-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vrcppbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vreducenepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vrndscalenepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vrsqrtpbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vscalefpbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vsqrtnepbf16-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vsubnepbf16-2.c: Ditto.
* gcc.target/i386/avx512fp16-vfpclassph-1b.c: Ditto.

Diff:
---
 gcc/testsuite/gcc.target/i386/avx10-helper.h   |  1 -
 .../gcc.target/i386/avx10_2-512-vaddnepbf16-2.c| 11 ++-
 .../gcc.target/i386/avx10_2-512-vcmppbf16-2.c  |  5 +++--
 .../gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c | 16 
 .../i386/avx10_2-512-vcvtnebf162iubs-2.c   | 16 
 .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c | 16 
 .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c| 16 ++

[gcc r15-3595] RISC-V: Eliminate latter vsetvl when fused

2024-09-11 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:3f212eabbba3edc1827d6da53cf6d5a64c6524f0

commit r15-3595-g3f212eabbba3edc1827d6da53cf6d5a64c6524f0
Author: Bohan Lei 
Date:   Thu Sep 12 10:28:03 2024 +0800

RISC-V: Eliminate latter vsetvl when fused

Hi all,

A simple assembly check has been added in this version. Previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662783.html

Thanks,
Bohan

--

The current vsetvl pass eliminates a vsetvl instruction when the previous
info is "available," but does not when "compatible."  This can lead to not
only redundancy, but also incorrect behaviors when the previous info happens
to be compatible with a later vector instruction, which ends of using the
vsetvl info that should have been eliminated, as is shown in the testcase.
This patch eliminates the vsetvl when the previous info is "compatible."

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (pre_vsetvl::fuse_local_vsetvl_info):
Delete vsetvl insn when `prev_info` is compatible

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc  |  3 +++
 .../gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c| 19 +++
 2 files changed, 22 insertions(+)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index ce831685439a..030ffbe2ebbc 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2796,6 +2796,9 @@ pre_vsetvl::fuse_local_vsetvl_info ()
  curr_info.dump (dump_file, "");
}
  m_dem.merge (prev_info, curr_info);
+ if (!curr_info.vl_used_by_non_rvv_insn_p ()
+ && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
+   m_delete_list.safe_push (curr_info);
  if (curr_info.get_read_vl_insn ())
prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
  if (dump_file && (dump_flags & TDF_DETAILS))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c
new file mode 100644
index ..04a8ff2945a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O2 -fno-schedule-insns 
-fdump-rtl-vsetvl-details" } */
+
+#include 
+
+vuint16m1_t
+foo (vuint16m1_t a, vuint16m1_t b, size_t avl)
+{
+  size_t vl;
+  vuint16m1_t ret;
+  uint16_t c = __riscv_vmv_x_s_u16m1_u16(a);
+  vl = __riscv_vsetvl_e8mf2 (avl);
+  ret = __riscv_vadd_vx_u16m1 (a, c, avl);
+  ret = __riscv_vadd_vv_u16m1 (ret, a, vl);
+  return ret;
+}
+
+/* { dg-final { scan-rtl-dump "Eliminate insn" "vsetvl" } }  */
+/* { dg-final { scan-assembler-times {vsetvli} 2 } } */


[gcc r15-3596] i386: Enable V2BF/V4BF vec_cmp with AVX10.2 vcmppbf16

2024-09-11 Thread Levy Hsu via Gcc-cvs
https://gcc.gnu.org/g:89d50c45048e5d7230ddde9afc8fbc83143e34cb

commit r15-3596-g89d50c45048e5d7230ddde9afc8fbc83143e34cb
Author: Levy Hsu 
Date:   Wed Sep 4 16:34:04 2024 +0930

i386: Enable V2BF/V4BF vec_cmp with AVX10.2 vcmppbf16

gcc/ChangeLog:

* config/i386/i386.cc (ix86_get_mask_mode):
Enable BFmode for targetm.vectorize.get_mask_mode with AVX10.2.
* config/i386/mmx.md (vec_cmpqi):
Implement vec_cmpv2bfqi and vec_cmpv4bfqi.

gcc/testsuite/ChangeLog:

* gcc.target/i386/part-vect-vec_cmpbf.c: New test.

Diff:
---
 gcc/config/i386/i386.cc|  3 ++-
 gcc/config/i386/mmx.md | 17 ++
 .../gcc.target/i386/part-vect-vec_cmpbf.c  | 26 ++
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 45320124b91c..7dbae1d72e35 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -24682,7 +24682,8 @@ ix86_get_mask_mode (machine_mode data_mode)
   /* AVX512FP16 only supports vector comparison
 to kmask for _Float16.  */
   || (TARGET_AVX512VL && TARGET_AVX512FP16
- && GET_MODE_INNER (data_mode) == E_HFmode))
+ && GET_MODE_INNER (data_mode) == E_HFmode)
+  || (TARGET_AVX10_2_256 && GET_MODE_INNER (data_mode) == E_BFmode))
 {
   if (elem_size == 4
  || elem_size == 8
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 4bc191b874b3..2f8d958dd5f0 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2290,6 +2290,23 @@
   DONE;
 })
 
+;;This instruction does not generate floating point exceptions
+(define_expand "vec_cmpqi"
+  [(set (match_operand:QI 0 "register_operand")
+   (match_operator:QI 1 ""
+ [(match_operand:VBF_32_64 2 "register_operand")
+  (match_operand:VBF_32_64 3 "nonimmediate_operand")]))]
+  "TARGET_AVX10_2_256"
+{
+  rtx op2 = lowpart_subreg (V8BFmode,
+force_reg (mode, operands[2]), mode);
+  rtx op3 = lowpart_subreg (V8BFmode,
+force_reg (mode, operands[3]), mode);
+
+  emit_insn (gen_vec_cmpv8bfqi (operands[0], operands[1], op2, op3));
+  DONE;
+})
+
 ;
 ;;
 ;; Parallel half-precision floating point rounding operations.
diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c 
b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c
new file mode 100644
index ..0bb720b64324
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vcmppbf16" 10 } } */
+
+typedef __bf16 __attribute__((__vector_size__ (4))) v2bf;
+typedef __bf16 __attribute__((__vector_size__ (8))) v4bf;
+
+
+#define VCMPMN(type, op, name) \
+type  \
+__attribute__ ((noinline, noclone)) \
+vec_cmp_##type##type##name (type a, type b) \
+{ \
+  return a op b;  \
+}
+
+VCMPMN (v4bf, <, lt)
+VCMPMN (v2bf, <, lt)
+VCMPMN (v4bf, <=, le)
+VCMPMN (v2bf, <=, le)
+VCMPMN (v4bf, >, gt)
+VCMPMN (v2bf, >, gt)
+VCMPMN (v4bf, >=, ge)
+VCMPMN (v2bf, >=, ge)
+VCMPMN (v4bf, ==, eq)
+VCMPMN (v2bf, ==, eq)