[gcc r15-1024] Clarify that 'gcc.dg/initpri3.c' is a LTO variant of 'gcc.dg/initpri1.c': 'gcc.dg/initpri1-lto.c' [P

2024-06-05 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:38dd7419324490b386bbac06ddc5fafbfe8629d3

commit r15-1024-g38dd7419324490b386bbac06ddc5fafbfe8629d3
Author: Thomas Schwinge 
Date:   Wed Apr 24 10:11:02 2024 +0200

Clarify that 'gcc.dg/initpri3.c' is a LTO variant of 'gcc.dg/initpri1.c': 
'gcc.dg/initpri1-lto.c' [PR46083]

Added in commit 06c9eb5136fe0e778cc3a643131eba2a3dfb77a8 (Subversion 
r168642)
"re PR lto/46083 (gcc.dg/initpri1.c FAILs with -flto/-fwhopr (attribute 
constructor/destructor doesn't work))".

PR lto/46083
gcc/testsuite/
* gcc.dg/initpri3.c: Remove.
* gcc.dg/initpri1-lto.c: New.

Diff:
---
 gcc/testsuite/gcc.dg/initpri1-lto.c |  5 +++
 gcc/testsuite/gcc.dg/initpri3.c | 64 -
 2 files changed, 5 insertions(+), 64 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/initpri1-lto.c 
b/gcc/testsuite/gcc.dg/initpri1-lto.c
new file mode 100644
index 000..98a43c3ff0d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/initpri1-lto.c
@@ -0,0 +1,5 @@
+/* { dg-do run { target init_priority } } */
+/* { dg-require-effective-target lto } */
+/* { dg-options "-flto -O3" } */
+
+#include "initpri1.c"
diff --git a/gcc/testsuite/gcc.dg/initpri3.c b/gcc/testsuite/gcc.dg/initpri3.c
deleted file mode 100644
index 1633da0141f..000
--- a/gcc/testsuite/gcc.dg/initpri3.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/* { dg-do run { target init_priority } } */
-/* { dg-require-effective-target lto } */
-/* { dg-options "-flto -O3" } */
-
-extern void abort ();
-
-int i;
-int j;
-
-void c1() __attribute__((constructor (500)));
-void c2() __attribute__((constructor (700)));
-void c3() __attribute__((constructor (600)));
-
-void c1() {
-  if (i++ != 0)
-abort ();
-}
-
-void c2() {
-  if (i++ != 2)
-abort ();
-}
-
-void c3() {
-  if (i++ != 1)
-abort ();
-}
-
-void d1() __attribute__((destructor (500)));
-void d2() __attribute__((destructor (700)));
-void d3() __attribute__((destructor (600)));
-
-void d1() {
-  if (--i != 0)
-abort ();
-}
-
-void d2() {
-  if (--i != 2)
-abort ();
-}
-
-void d3() {
-  if (j != 2)
-abort ();
-  if (--i != 1)
-abort ();
-}
-
-void cd4() __attribute__((constructor (800), destructor (800)));
-
-void cd4() {
-  if (i != 3)
-abort ();
-  ++j;
-}
-
-int main () {
-  if (i != 3)
-return 1;
-  if (j != 1)
-abort ();
-  return 0;
-}


[gcc r15-1025] Consolidate similar C/C++ test cases for 'constructor', 'destructor' function attributes with priori

2024-06-05 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:a7d75773adadfcd536a5ded48ba215f18e8c5b3d

commit r15-1025-ga7d75773adadfcd536a5ded48ba215f18e8c5b3d
Author: Thomas Schwinge 
Date:   Wed Apr 24 09:26:39 2024 +0200

Consolidate similar C/C++ test cases for 'constructor', 'destructor' 
function attributes with priority

gcc/testsuite/
* gcc.dg/initpri1.c: Integrate this...
* g++.dg/special/initpri1.C: ..., and this...
* c-c++-common/initpri1.c: ... here.
* gcc.dg/initpri1-lto.c: Adjust.
* gcc.dg/initpri2.c: Integrate this...
* g++.dg/special/initpri2.C: ..., and this...
* c-c++-common/initpri2.c: ... here.

Diff:
---
 gcc/testsuite/{gcc.dg => c-c++-common}/initpri1.c | 21 
 gcc/testsuite/{gcc.dg => c-c++-common}/initpri2.c |  1 +
 gcc/testsuite/g++.dg/special/initpri1.C   | 62 ---
 gcc/testsuite/g++.dg/special/initpri2.C   | 39 --
 gcc/testsuite/gcc.dg/initpri1-lto.c   |  2 +-
 5 files changed, 12 insertions(+), 113 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/initpri1.c 
b/gcc/testsuite/c-c++-common/initpri1.c
similarity index 68%
rename from gcc/testsuite/gcc.dg/initpri1.c
rename to gcc/testsuite/c-c++-common/initpri1.c
index b6afd7690de..387f2a39658 100644
--- a/gcc/testsuite/gcc.dg/initpri1.c
+++ b/gcc/testsuite/c-c++-common/initpri1.c
@@ -1,6 +1,5 @@
 /* { dg-do run { target init_priority } } */
-
-extern void abort (void);
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
 
 int i;
 int j;
@@ -11,17 +10,17 @@ void c3() __attribute__((constructor (600)));
 
 void c1() {
   if (i++ != 0)
-abort ();
+__builtin_abort ();
 }
 
 void c2() {
   if (i++ != 2)
-abort ();
+__builtin_abort ();
 }
 
 void c3() {
   if (i++ != 1)
-abort ();
+__builtin_abort ();
 }
 
 void d1() __attribute__((destructor (500)));
@@ -30,26 +29,26 @@ void d3() __attribute__((destructor (600)));
 
 void d1() {
   if (--i != 0)
-abort ();
+__builtin_abort ();
 }
 
 void d2() {
   if (--i != 2)
-abort ();
+__builtin_abort ();
 }
 
 void d3() {
   if (j != 2)
-abort ();
+__builtin_abort ();
   if (--i != 1)
-abort ();
+__builtin_abort ();
 }
 
 void cd4() __attribute__((constructor (800), destructor (800)));
 
 void cd4() {
   if (i != 3)
-abort ();
+__builtin_abort ();
   ++j;
 }
 
@@ -57,6 +56,6 @@ int main () {
   if (i != 3)
 return 1;
   if (j != 1)
-abort ();
+__builtin_abort ();
   return 0;
 }
diff --git a/gcc/testsuite/gcc.dg/initpri2.c 
b/gcc/testsuite/c-c++-common/initpri2.c
similarity index 92%
rename from gcc/testsuite/gcc.dg/initpri2.c
rename to gcc/testsuite/c-c++-common/initpri2.c
index fa9fda0d7f3..bda2a626c64 100644
--- a/gcc/testsuite/gcc.dg/initpri2.c
+++ b/gcc/testsuite/c-c++-common/initpri2.c
@@ -1,4 +1,5 @@
 /* { dg-do compile { target init_priority } } */
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
 
 /* Priorities must be in the range [0, 65535].  */
 void c1()
diff --git a/gcc/testsuite/g++.dg/special/initpri1.C 
b/gcc/testsuite/g++.dg/special/initpri1.C
deleted file mode 100644
index bd24961e46b..000
--- a/gcc/testsuite/g++.dg/special/initpri1.C
+++ /dev/null
@@ -1,62 +0,0 @@
-/* { dg-do run { target init_priority } } */
-
-extern "C" void abort ();
-
-int i;
-int j;
-
-void c1() __attribute__((constructor (500)));
-void c2() __attribute__((constructor (700)));
-void c3() __attribute__((constructor (600)));
-
-void c1() {
-  if (i++ != 0)
-abort ();
-}
-
-void c2() {
-  if (i++ != 2)
-abort ();
-}
-
-void c3() {
-  if (i++ != 1)
-abort ();
-}
-
-void d1() __attribute__((destructor (500)));
-void d2() __attribute__((destructor (700)));
-void d3() __attribute__((destructor (600)));
-
-void d1() {
-  if (--i != 0)
-abort ();
-}
-
-void d2() {
-  if (--i != 2)
-abort ();
-}
-
-void d3() {
-  if (j != 2)
-abort ();
-  if (--i != 1)
-abort ();
-}
-
-void cd4() __attribute__((constructor (800), destructor (800)));
-
-void cd4() {
-  if (i != 3)
-abort ();
-  ++j;
-}
-
-int main () {
-  if (i != 3)
-return 1;
-  if (j != 1)
-abort ();
-  return 0;
-}
diff --git a/gcc/testsuite/g++.dg/special/initpri2.C 
b/gcc/testsuite/g++.dg/special/initpri2.C
deleted file mode 100644
index fa9fda0d7f3..000
--- a/gcc/testsuite/g++.dg/special/initpri2.C
+++ /dev/null
@@ -1,39 +0,0 @@
-/* { dg-do compile { target init_priority } } */
-
-/* Priorities must be in the range [0, 65535].  */
-void c1()
- __attribute__((constructor (-1))); /* { dg-error "priorities" } */
-void c2() 
- __attribute__((constructor (65536))); /* { dg-error "priorities" } */
-void d1() 
- __attribute__((destructor (-1))); /* { dg-error "priorities" } */
-void d2() 
- __attribute__((destructor (65536))); /* { dg-error "priorities" } */
-
-/* Priorities 0-

[gcc r15-1026] Add C++ testing for 'gcc.dg/initpri1-lto.c': 'c-c++-common/initpri1-lto.c'

2024-06-05 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:08c7e87f0da24b701042110c3927ecdb3367c0b4

commit r15-1026-g08c7e87f0da24b701042110c3927ecdb3367c0b4
Author: Thomas Schwinge 
Date:   Wed Apr 24 09:26:39 2024 +0200

Add C++ testing for 'gcc.dg/initpri1-lto.c': 'c-c++-common/initpri1-lto.c'

Similar to commit a7d75773adadfcd536a5ded48ba215f18e8c5b3d
"Consolidate similar C/C++ test cases for 'constructor', 'destructor' 
function attributes with priority".

gcc/testsuite/
* gcc.dg/initpri1-lto.c: Integrate this...
* c-c++-common/initpri1-lto.c: ... here.

Diff:
---
 gcc/testsuite/c-c++-common/initpri1-lto.c | 6 ++
 gcc/testsuite/gcc.dg/initpri1-lto.c   | 5 -
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/initpri1-lto.c 
b/gcc/testsuite/c-c++-common/initpri1-lto.c
new file mode 100644
index 000..433ef356c7e
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/initpri1-lto.c
@@ -0,0 +1,6 @@
+/* { dg-do run { target init_priority } } */
+/* { dg-require-effective-target lto } */
+/* { dg-options "-flto -O3" } */
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
+
+#include "initpri1.c"
diff --git a/gcc/testsuite/gcc.dg/initpri1-lto.c 
b/gcc/testsuite/gcc.dg/initpri1-lto.c
deleted file mode 100644
index 0c97cf4b1c9..000
--- a/gcc/testsuite/gcc.dg/initpri1-lto.c
+++ /dev/null
@@ -1,5 +0,0 @@
-/* { dg-do run { target init_priority } } */
-/* { dg-require-effective-target lto } */
-/* { dg-options "-flto -O3" } */
-
-#include "../c-c++-common/initpri1.c"


[gcc r15-1028] Add 'c-c++-common/initpri1{, -lto, -split}-static.c' as internal linkage variants

2024-06-05 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:d5ccc21f17b5c7ff20cca81c96a11a68f61f287b

commit r15-1028-gd5ccc21f17b5c7ff20cca81c96a11a68f61f287b
Author: Thomas Schwinge 
Date:   Wed Apr 24 11:51:54 2024 +0200

Add 'c-c++-common/initpri1{,-lto,-split}-static.c' as internal linkage 
variants

gcc/testsuite/
* c-c++-common/initpri1_part_c1.c: Consider 'CDTOR_LINKAGE'.
* c-c++-common/initpri1_part_c2.c: Likewise.
* c-c++-common/initpri1_part_c3.c: Likewise.
* c-c++-common/initpri1_part_cd4.c: Likewise.
* c-c++-common/initpri1_part_d1.c: Likewise.
* c-c++-common/initpri1_part_d2.c: Likewise.
* c-c++-common/initpri1_part_d3.c: Likewise.
* c-c++-common/initpri1.c: Specify it.
* c-c++-common/initpri1-lto.c: Likewise.
* c-c++-common/initpri1-split.c: Likewise.
* c-c++-common/initpri1-static.c: New.
* c-c++-common/initpri1-lto-static.c: Likewise.
* c-c++-common/initpri1-split-static.c: Likewise.

Diff:
---
 gcc/testsuite/c-c++-common/initpri1-lto-static.c   | 7 +++
 gcc/testsuite/c-c++-common/initpri1-lto.c  | 1 +
 gcc/testsuite/c-c++-common/initpri1-split-static.c | 4 
 gcc/testsuite/c-c++-common/initpri1-split.c| 1 +
 gcc/testsuite/c-c++-common/initpri1-static.c   | 5 +
 gcc/testsuite/c-c++-common/initpri1.c  | 1 +
 gcc/testsuite/c-c++-common/initpri1_part_c1.c  | 2 ++
 gcc/testsuite/c-c++-common/initpri1_part_c2.c  | 2 ++
 gcc/testsuite/c-c++-common/initpri1_part_c3.c  | 2 ++
 gcc/testsuite/c-c++-common/initpri1_part_cd4.c | 2 ++
 gcc/testsuite/c-c++-common/initpri1_part_d1.c  | 2 ++
 gcc/testsuite/c-c++-common/initpri1_part_d2.c  | 2 ++
 gcc/testsuite/c-c++-common/initpri1_part_d3.c  | 2 ++
 13 files changed, 33 insertions(+)

diff --git a/gcc/testsuite/c-c++-common/initpri1-lto-static.c 
b/gcc/testsuite/c-c++-common/initpri1-lto-static.c
new file mode 100644
index 000..6393f7ec99b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/initpri1-lto-static.c
@@ -0,0 +1,7 @@
+/* { dg-do run { target init_priority } } */
+/* { dg-require-effective-target lto } */
+/* { dg-options "-flto -O3" } */
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
+/* { dg-additional-options -DCDTOR_LINKAGE=static } */
+
+#include "initpri1.c"
diff --git a/gcc/testsuite/c-c++-common/initpri1-lto.c 
b/gcc/testsuite/c-c++-common/initpri1-lto.c
index 433ef356c7e..7fb4bf1aa82 100644
--- a/gcc/testsuite/c-c++-common/initpri1-lto.c
+++ b/gcc/testsuite/c-c++-common/initpri1-lto.c
@@ -2,5 +2,6 @@
 /* { dg-require-effective-target lto } */
 /* { dg-options "-flto -O3" } */
 /* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
+/* { dg-additional-options -DCDTOR_LINKAGE= } */
 
 #include "initpri1.c"
diff --git a/gcc/testsuite/c-c++-common/initpri1-split-static.c 
b/gcc/testsuite/c-c++-common/initpri1-split-static.c
new file mode 100644
index 000..02d8b162e19
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/initpri1-split-static.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target init_priority } } */
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
+/* { dg-additional-sources {initpri1_part_c1.c initpri1_part_c2.c 
initpri1_part_c3.c initpri1_part_d1.c initpri1_part_d2.c initpri1_part_d3.c 
initpri1_part_cd4.c initpri1_part_main.c} } */
+/* { dg-additional-options -DCDTOR_LINKAGE=static } */
diff --git a/gcc/testsuite/c-c++-common/initpri1-split.c 
b/gcc/testsuite/c-c++-common/initpri1-split.c
index 11755ee9f6a..f1482c7e0c1 100644
--- a/gcc/testsuite/c-c++-common/initpri1-split.c
+++ b/gcc/testsuite/c-c++-common/initpri1-split.c
@@ -1,3 +1,4 @@
 /* { dg-do run { target init_priority } } */
 /* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
 /* { dg-additional-sources {initpri1_part_c1.c initpri1_part_c2.c 
initpri1_part_c3.c initpri1_part_d1.c initpri1_part_d2.c initpri1_part_d3.c 
initpri1_part_cd4.c initpri1_part_main.c} } */
+/* { dg-additional-options -DCDTOR_LINKAGE= } */
diff --git a/gcc/testsuite/c-c++-common/initpri1-static.c 
b/gcc/testsuite/c-c++-common/initpri1-static.c
new file mode 100644
index 000..ac101ff63cb
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/initpri1-static.c
@@ -0,0 +1,5 @@
+/* { dg-do run { target init_priority } } */
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
+/* { dg-additional-options -DCDTOR_LINKAGE=static } */
+
+#include "initpri1.c"
diff --git a/gcc/testsuite/c-c++-common/initpri1.c 
b/gcc/testsuite/c-c++-common/initpri1.c
index f50137a489b..73579cdd06b 100644
--- a/gcc/testsuite/c-c++-common/initpri1.c
+++ b/gcc/testsuite/c-c++-common/initpri1.c
@@ -1,5 +1,6 @@
 /* { dg-do run { target i

[gcc r15-1027] Add 'c-c++-common/initpri1-split.c': 'c-c++-common/initpri1.c' split into separate translation units

2024-06-05 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:02e43e5596ee6a4d03eecaa48197280c4eb6a78f

commit r15-1027-g02e43e5596ee6a4d03eecaa48197280c4eb6a78f
Author: Thomas Schwinge 
Date:   Wed Apr 24 11:24:39 2024 +0200

Add 'c-c++-common/initpri1-split.c': 'c-c++-common/initpri1.c' split into 
separate translation units

gcc/testsuite/
* c-c++-common/initpri1.c: Split into...
* c-c++-common/initpri1_part_c1.c: ... this, and...
* c-c++-common/initpri1_part_c2.c: ... this, and...
* c-c++-common/initpri1_part_c3.c: ... this, and...
* c-c++-common/initpri1_part_cd4.c: ... this, and...
* c-c++-common/initpri1_part_d1.c: ... this, and...
* c-c++-common/initpri1_part_d2.c: ... this, and...
* c-c++-common/initpri1_part_d3.c: ... this, and...
* c-c++-common/initpri1_part_main.c: ... this part.
* c-c++-common/initpri1-split.c: New.

Diff:
---
 gcc/testsuite/c-c++-common/initpri1-split.c |  3 +
 gcc/testsuite/c-c++-common/initpri1.c   | 73 +
 gcc/testsuite/c-c++-common/initpri1_part_c1.c   | 11 
 gcc/testsuite/c-c++-common/initpri1_part_c2.c   | 11 
 gcc/testsuite/c-c++-common/initpri1_part_c3.c   | 11 
 gcc/testsuite/c-c++-common/initpri1_part_cd4.c  | 13 +
 gcc/testsuite/c-c++-common/initpri1_part_d1.c   | 11 
 gcc/testsuite/c-c++-common/initpri1_part_d2.c   | 11 
 gcc/testsuite/c-c++-common/initpri1_part_d3.c   | 14 +
 gcc/testsuite/c-c++-common/initpri1_part_main.c | 13 +
 10 files changed, 113 insertions(+), 58 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/initpri1-split.c 
b/gcc/testsuite/c-c++-common/initpri1-split.c
new file mode 100644
index 000..11755ee9f6a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/initpri1-split.c
@@ -0,0 +1,3 @@
+/* { dg-do run { target init_priority } } */
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
+/* { dg-additional-sources {initpri1_part_c1.c initpri1_part_c2.c 
initpri1_part_c3.c initpri1_part_d1.c initpri1_part_d2.c initpri1_part_d3.c 
initpri1_part_cd4.c initpri1_part_main.c} } */
diff --git a/gcc/testsuite/c-c++-common/initpri1.c 
b/gcc/testsuite/c-c++-common/initpri1.c
index 387f2a39658..f50137a489b 100644
--- a/gcc/testsuite/c-c++-common/initpri1.c
+++ b/gcc/testsuite/c-c++-common/initpri1.c
@@ -1,61 +1,18 @@
 /* { dg-do run { target init_priority } } */
 /* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
 
-int i;
-int j;
-
-void c1() __attribute__((constructor (500)));
-void c2() __attribute__((constructor (700)));
-void c3() __attribute__((constructor (600)));
-
-void c1() {
-  if (i++ != 0)
-__builtin_abort ();
-}
-
-void c2() {
-  if (i++ != 2)
-__builtin_abort ();
-}
-
-void c3() {
-  if (i++ != 1)
-__builtin_abort ();
-}
-
-void d1() __attribute__((destructor (500)));
-void d2() __attribute__((destructor (700)));
-void d3() __attribute__((destructor (600)));
-
-void d1() {
-  if (--i != 0)
-__builtin_abort ();
-}
-
-void d2() {
-  if (--i != 2)
-__builtin_abort ();
-}
-
-void d3() {
-  if (j != 2)
-__builtin_abort ();
-  if (--i != 1)
-__builtin_abort ();
-}
-
-void cd4() __attribute__((constructor (800), destructor (800)));
-
-void cd4() {
-  if (i != 3)
-__builtin_abort ();
-  ++j;
-}
-
-int main () {
-  if (i != 3)
-return 1;
-  if (j != 1)
-__builtin_abort ();
-  return 0;
-}
+#include "initpri1_part_c1.c"
+
+#include "initpri1_part_c2.c"
+
+#include "initpri1_part_c3.c"
+
+#include "initpri1_part_d1.c"
+
+#include "initpri1_part_d2.c"
+
+#include "initpri1_part_d3.c"
+
+#include "initpri1_part_cd4.c"
+
+#include "initpri1_part_main.c"
diff --git a/gcc/testsuite/c-c++-common/initpri1_part_c1.c 
b/gcc/testsuite/c-c++-common/initpri1_part_c1.c
new file mode 100644
index 000..0f85a2e6cb1
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/initpri1_part_c1.c
@@ -0,0 +1,11 @@
+/* { dg-skip-if part { *-*-* } } */
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
+
+extern int i;
+
+void c1() __attribute__((constructor (500)));
+
+void c1() {
+  if (i++ != 0)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/c-c++-common/initpri1_part_c2.c 
b/gcc/testsuite/c-c++-common/initpri1_part_c2.c
new file mode 100644
index 000..e8c556aab17
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/initpri1_part_c2.c
@@ -0,0 +1,11 @@
+/* { dg-skip-if part { *-*-* } } */
+/* Via the magic string "-std=*++" indicate that testing one (the default) C++ 
standard is sufficient.  */
+
+extern int i;
+
+void c2() __attribute__((constructor (700)));
+
+void c2() {
+  if (i++ != 2)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/c-c++-common/initpri1_part_c3.c 
b/gcc/testsuite/c-c++-common/initpri1_part_c3.c
new file mode 100644
index 000..70880882ce3
--- /dev/null

[gcc r15-1029] doc: Streamline recommendation of GNU awk

2024-06-05 Thread Gerald Pfeifer via Gcc-cvs
https://gcc.gnu.org/g:993142677e2cf780ef578e1d46309f0042743dd5

commit r15-1029-g993142677e2cf780ef578e1d46309f0042743dd5
Author: Gerald Pfeifer 
Date:   Wed Jun 5 09:26:58 2024 +0200

doc: Streamline recommendation of GNU awk

GNU awk 3.1.5 was released in August 2005; no need to specify this in
the context of "recent version".

gcc:
PR other/69374
* doc/install.texi (Prerequisites): Drop reference to GNU awk
version 3.1.5. Remove fluff.

Diff:
---
 gcc/doc/install.texi | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index c781646ac1f..906c78aaca5 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -339,8 +339,7 @@ work when configuring GCC@.
 @item A POSIX or SVR4 awk
 
 Necessary for creating some of the generated source files for GCC@.
-If in doubt, use a recent GNU awk version, as some of the older ones
-are broken.  GNU awk version 3.1.5 is known to work.
+If in doubt, use a recent GNU awk version.
 
 @item GNU binutils


[gcc r15-1030] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

2024-06-05 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:abe6d39365476e6be724815d09d072e305018755

commit r15-1030-gabe6d39365476e6be724815d09d072e305018755
Author: Pan Li 
Date:   Tue May 28 15:37:44 2024 +0800

Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

This patch would like to add the middle-end presentation for the
saturation sub.  Aka set the result of add to the min when downflow.
It will take the pattern similar as below.

SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));

For example for uint8_t, we have

* SAT_SUB (255, 0)   => 255
* SAT_SUB (1, 2) => 0
* SAT_SUB (254, 255) => 0
* SAT_SUB (0, 255)   => 0

Given below SAT_SUB for uint64

uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
{
  return (x - y) & (-(TYPE)(x >= y));
}

Before this patch:
uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
{
  _Bool _1;
  long unsigned int _3;
  uint64_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _1 = x_4(D) >= y_5(D);
  _3 = x_4(D) - y_5(D);
  _6 = _1 ? _3 : 0;
  return _6;
;;succ:   EXIT
}

After this patch:
uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
  return _6;
;;succ:   EXIT
}

The below tests are running for this patch:
*. The riscv fully regression tests.
*. The x86 bootstrap tests.
*. The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
* match.pd: Add new match for SAT_SUB.
* optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
new decl for generated in match.pd.
(build_saturation_binary_arith_call): Add new helper function
to build the gimple call to binary SAT alu.
(match_saturation_arith): Rename from.
(match_unsigned_saturation_add): Rename to.
(match_unsigned_saturation_sub): Add new func to match the
unsigned sat sub.
(math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
try when COND_EXPR.

Signed-off-by: Pan Li 

Diff:
---
 gcc/internal-fn.def   |  1 +
 gcc/match.pd  | 14 ++
 gcc/optabs.def|  4 +--
 gcc/tree-ssa-math-opts.cc | 67 +--
 4 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 8de1fa882e9..a8c83437ada 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | 
ECF_NOTHROW, first,
  smulhrs, umulhrs, binary)
 
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
 
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index f19ef702747..7c1ad428a3c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3096,6 +3096,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+/* Unsigned saturation sub, case 1 (branch with gt):
+   SAT_U_SUB = X > Y ? X - Y : 0  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond (gt @0 @1) (minus @0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
+/* Unsigned saturation sub, case 2 (branch with ge):
+   SAT_U_SUB = X >= Y ? X - Y : 0.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond (ge @0 @1) (minus @0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 3f2cb46aff8..bc2611abdc2 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3")
 OPTAB_NX(sub_optab, "sub$Q$a3")
 OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
 OPTAB_VX(subv_optab, "sub$F$a3")
-OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', 
gen_signed_fixed_libfunc)
-OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', 
gen_unsigned_fixed_libfunc)
+OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', 
gen_signed_fixed_libfunc)
+OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', 
gen_unsigned_fixed_libfunc)
 OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_in

[gcc r15-1031] ada: Replace use of LONG_DOUBLE_TYPE_SIZE

2024-06-05 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:6fa25aa970cb82ee7fd6884d75bb14673b14dbbe

commit r15-1031-g6fa25aa970cb82ee7fd6884d75bb14673b14dbbe
Author: Kewen Lin 
Date:   Wed Jun 5 04:22:25 2024 -0500

ada: Replace use of LONG_DOUBLE_TYPE_SIZE

Joseph pointed out "floating types should have their mode,
not a poorly defined precision value" in the discussion[1],
as he and Richi suggested, the existing macros
{FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE will be replaced with a
hook mode_for_floating_type.  To be prepared for that, this
patch is to replace use of LONG_DOUBLE_TYPE_SIZE in ada
with TYPE_PRECISION of long_double_type_node.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651209.html

gcc/ada/ChangeLog:

* gcc-interface/decl.cc (gnat_to_gnu_entity): Use TYPE_PRECISION of
long_double_type_node to replace LONG_DOUBLE_TYPE_SIZE.

Diff:
---
 gcc/ada/gcc-interface/decl.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index f6a4c0631b6..8b72c96c439 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -520,7 +520,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
  esize = UI_To_Int (Esize (gnat_entity));
 
  if (IN (kind, Float_Kind))
-   max_esize = fp_prec_to_size (LONG_DOUBLE_TYPE_SIZE);
+   max_esize
+ = fp_prec_to_size (TYPE_PRECISION (long_double_type_node));
  else if (IN (kind, Access_Kind))
max_esize = POINTER_SIZE * 2;
  else


[gcc r15-1032] d: Replace use of LONG_DOUBLE_TYPE_SIZE

2024-06-05 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:b36461f126148b027e7541aaf356d5322a0fbc08

commit r15-1032-gb36461f126148b027e7541aaf356d5322a0fbc08
Author: Kewen Lin 
Date:   Wed Jun 5 04:22:25 2024 -0500

d: Replace use of LONG_DOUBLE_TYPE_SIZE

Joseph pointed out "floating types should have their mode,
not a poorly defined precision value" in the discussion[1],
as he and Richi suggested, the existing macros
{FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE will be replaced with a
hook mode_for_floating_type.  To be prepared for that, this
patch is to remove the only one use of LONG_DOUBLE_TYPE_SIZE
in d.  Iain found that LONG_DOUBLE_TYPE_SIZE is poorly named
and used incorrectly before, so this patch follows his advice
with int_size_in_bytes.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651209.html

Co-authored-by: Iain Buclaw 

gcc/d/ChangeLog:

* d-target.cc (Target::_init): Use int_size_in_bytes of
long_double_type_node to replace the expression with
LONG_DOUBLE_TYPE_SIZE for c.long_doublesize assignment.

Diff:
---
 gcc/d/d-target.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/d/d-target.cc b/gcc/d/d-target.cc
index 127b9d7ce7c..dd46e535891 100644
--- a/gcc/d/d-target.cc
+++ b/gcc/d/d-target.cc
@@ -163,7 +163,7 @@ Target::_init (const Param &)
   this->c.intsize = (INT_TYPE_SIZE / BITS_PER_UNIT);
   this->c.longsize = (LONG_TYPE_SIZE / BITS_PER_UNIT);
   this->c.long_longsize = (LONG_LONG_TYPE_SIZE / BITS_PER_UNIT);
-  this->c.long_doublesize = (LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT);
+  this->c.long_doublesize = int_size_in_bytes (long_double_type_node);
   this->c.wchar_tsize = (WCHAR_TYPE_SIZE / BITS_PER_UNIT);
 
   this->c.bitFieldStyle = targetm.ms_bitfield_layout_p (unknown_type_node)


[gcc r15-1033] fortran: Replace uses of {FLOAT, {, LONG_}DOUBLE}_TYPE_SIZE

2024-06-05 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:37a4800939bd90400e03a8fa561d2a0df394bced

commit r15-1033-g37a4800939bd90400e03a8fa561d2a0df394bced
Author: Kewen Lin 
Date:   Wed Jun 5 04:22:25 2024 -0500

fortran: Replace uses of {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE

Joseph pointed out "floating types should have their mode,
not a poorly defined precision value" in the discussion[1],
as he and Richi suggested, the existing macros
{FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE will be replaced with a
hook mode_for_floating_type.  To be prepared for that, this
patch is to replace use of {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE
in fortran with TYPE_PRECISION of
{float,{,long_}double}_type_node.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651209.html

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (build_round_expr): Use TYPE_PRECISION of
long_double_type_node to replace LONG_DOUBLE_TYPE_SIZE.
* trans-types.cc (gfc_build_real_type): Use TYPE_PRECISION of
{float,double,long_double}_type_node to replace
{FLOAT,DOUBLE,LONG_DOUBLE}_TYPE_SIZE.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  3 ++-
 gcc/fortran/trans-types.cc | 10 ++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 912c1000e18..96839705112 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -395,7 +395,8 @@ build_round_expr (tree arg, tree restype)
  don't have an appropriate function that converts directly to the integer
  type (such as kind == 16), just use ROUND, and then convert the result to
  an integer.  We might also need to convert the result afterwards.  */
-  if (resprec <= INT_TYPE_SIZE && argprec <= LONG_DOUBLE_TYPE_SIZE)
+  if (resprec <= INT_TYPE_SIZE
+  && argprec <= TYPE_PRECISION (long_double_type_node))
 fn = builtin_decl_for_precision (BUILT_IN_IROUND, argprec);
   else if (resprec <= LONG_TYPE_SIZE)
 fn = builtin_decl_for_precision (BUILT_IN_LROUND, argprec);
diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc
index 8466c595e06..0ef67723fcd 100644
--- a/gcc/fortran/trans-types.cc
+++ b/gcc/fortran/trans-types.cc
@@ -873,13 +873,15 @@ gfc_build_real_type (gfc_real_info *info)
   int mode_precision = info->mode_precision;
   tree new_type;
 
-  if (mode_precision == FLOAT_TYPE_SIZE)
+  if (mode_precision == TYPE_PRECISION (float_type_node))
 info->c_float = 1;
-  if (mode_precision == DOUBLE_TYPE_SIZE)
+  if (mode_precision == TYPE_PRECISION (double_type_node))
 info->c_double = 1;
-  if (mode_precision == LONG_DOUBLE_TYPE_SIZE && !info->c_float128)
+  if (mode_precision == TYPE_PRECISION (long_double_type_node)
+  && !info->c_float128)
 info->c_long_double = 1;
-  if (mode_precision != LONG_DOUBLE_TYPE_SIZE && mode_precision == 128)
+  if (mode_precision != TYPE_PRECISION (long_double_type_node)
+  && mode_precision == 128)
 {
   /* TODO: see PR101835.  */
   info->c_float128 = 1;


[gcc r15-1034] darwin: Replace use of LONG_DOUBLE_TYPE_SIZE

2024-06-05 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:58ecd2eb507ab216861408cf10ec05efc4e8344e

commit r15-1034-g58ecd2eb507ab216861408cf10ec05efc4e8344e
Author: Kewen Lin 
Date:   Wed Jun 5 04:23:04 2024 -0500

darwin: Replace use of LONG_DOUBLE_TYPE_SIZE

Joseph pointed out "floating types should have their mode,
not a poorly defined precision value" in the discussion[1],
as he and Richi suggested, the existing macros
{FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE will be replaced with a
hook mode_for_floating_type.  To be prepared for that, this
patch is to replace use of LONG_DOUBLE_TYPE_SIZE in darwin
with TYPE_PRECISION of long_double_type_node.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651209.html

gcc/ChangeLog:

* config/darwin.cc (darwin_patch_builtins): Use TYPE_PRECISION of
long_double_type_node to replace LONG_DOUBLE_TYPE_SIZE.

Diff:
---
 gcc/config/darwin.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
index 63b8c509405..9129378be37 100644
--- a/gcc/config/darwin.cc
+++ b/gcc/config/darwin.cc
@@ -3620,7 +3620,7 @@ darwin_patch_builtin (enum built_in_function fncode)
 void
 darwin_patch_builtins (void)
 {
-  if (LONG_DOUBLE_TYPE_SIZE != 128)
+  if (TYPE_PRECISION (long_double_type_node) != 128)
 return;
 
 #define PATCH_BUILTIN(fncode) darwin_patch_builtin (fncode);


[gcc r15-1035] testsuite: Improve check-function-bodies

2024-06-05 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:acdc9df371fbe99e814a3f35a439531e08af79e7

commit r15-1035-gacdc9df371fbe99e814a3f35a439531e08af79e7
Author: Wilco Dijkstra 
Date:   Wed Jun 5 14:05:59 2024 +0100

testsuite: Improve check-function-bodies

Improve check-function-bodies by allowing single-character function names.

gcc/testsuite:
* lib/scanasm.exp (configure_check-function-bodies): Allow 
single-char
function names.

Diff:
---
 gcc/testsuite/lib/scanasm.exp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
index 6cf9997240d..42c719c512c 100644
--- a/gcc/testsuite/lib/scanasm.exp
+++ b/gcc/testsuite/lib/scanasm.exp
@@ -869,15 +869,15 @@ proc configure_check-function-bodies { config } {
 # Regexp for the start of a function definition (name in \1).
 if { [istarget nvptx*-*-*] } {
set up_config(start) {
-   {^// BEGIN(?: GLOBAL|) FUNCTION DEF: ([a-zA-Z_]\S+)$}
+   {^// BEGIN(?: GLOBAL|) FUNCTION DEF: ([a-zA-Z_]\S*)$}
}
 } elseif { [istarget *-*-darwin*] } {
set up_config(start) {
-   {^_([a-zA-Z_]\S+):$}
+   {^_([a-zA-Z_]\S*):$}
{^LFB[0-9]+:}
}
 } else {
-   set up_config(start) {{^([a-zA-Z_]\S+):$}}
+   set up_config(start) {{^([a-zA-Z_]\S*):$}}
 }
 
 # Regexp for the end of a function definition.


[gcc r15-1036] AArch64: Fix cpu features initialization [PR115342]

2024-06-05 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:d7cbcfe7c33645eaf95f175f19884d443817857b

commit r15-1036-gd7cbcfe7c33645eaf95f175f19884d443817857b
Author: Wilco Dijkstra 
Date:   Wed Jun 5 14:04:33 2024 +0100

AArch64: Fix cpu features initialization [PR115342]

The CPU features initialization code uses CPUID registers (rather than
HWCAP).  The equality comparisons it uses are incorrect: for example 
FEAT_SVE
is not set if SVE2 is available.  Using HWCAPs for these is both simpler and
correct.  The initialization must also be done atomically to avoid multiple
threads causing corruption due to non-atomic RMW accesses to the global.

libgcc:
PR target/115342
* config/aarch64/cpuinfo.c (__init_cpu_features_constructor):
Use HWCAP where possible.  Use atomic write for initialization.
Fix FEAT_PREDRES comparison.
(__init_cpu_features_resolver): Use atomic load for correct
initialization.
(__init_cpu_features): Likewise.

Diff:
---
 libgcc/config/aarch64/cpuinfo.c | 181 +---
 1 file changed, 75 insertions(+), 106 deletions(-)

diff --git a/libgcc/config/aarch64/cpuinfo.c b/libgcc/config/aarch64/cpuinfo.c
index 4b94fca8695..544c5516133 100644
--- a/libgcc/config/aarch64/cpuinfo.c
+++ b/libgcc/config/aarch64/cpuinfo.c
@@ -227,14 +227,22 @@ struct {
 #ifndef HWCAP2_SVE_EBF16
 #define HWCAP2_SVE_EBF16 (1UL << 33)
 #endif
+#ifndef HWCAP2_SME2
+#define HWCAP2_SME2 (1UL << 37)
+#endif
+#ifndef HWCAP2_LRCPC3
+#define HWCAP2_LRCPC3  (1UL << 46)
+#endif
 
 static void
-__init_cpu_features_constructor(unsigned long hwcap,
-   const __ifunc_arg_t *arg) {
-#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
+__init_cpu_features_constructor (unsigned long hwcap,
+const __ifunc_arg_t *arg)
+{
+  unsigned long feat = 0;
+#define setCPUFeature(F) feat |= 1UL << F
 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
 #define extractBits(val, start, number) \
-  (val & ((1ULL << number) - 1ULL) << start) >> start
+  (val & ((1UL << number) - 1UL) << start) >> start
   unsigned long hwcap2 = 0;
   if (hwcap & _IFUNC_ARG_HWCAP)
 hwcap2 = arg->_hwcap2;
@@ -244,26 +252,20 @@ __init_cpu_features_constructor(unsigned long hwcap,
 setCPUFeature(FEAT_PMULL);
   if (hwcap & HWCAP_FLAGM)
 setCPUFeature(FEAT_FLAGM);
-  if (hwcap2 & HWCAP2_FLAGM2) {
-setCPUFeature(FEAT_FLAGM);
+  if (hwcap2 & HWCAP2_FLAGM2)
 setCPUFeature(FEAT_FLAGM2);
-  }
-  if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
+  if (hwcap & HWCAP_SM4)
 setCPUFeature(FEAT_SM4);
   if (hwcap & HWCAP_ASIMDDP)
 setCPUFeature(FEAT_DOTPROD);
   if (hwcap & HWCAP_ASIMDFHM)
 setCPUFeature(FEAT_FP16FML);
-  if (hwcap & HWCAP_FPHP) {
+  if (hwcap & HWCAP_FPHP)
 setCPUFeature(FEAT_FP16);
-setCPUFeature(FEAT_FP);
-  }
   if (hwcap & HWCAP_DIT)
 setCPUFeature(FEAT_DIT);
   if (hwcap & HWCAP_ASIMDRDM)
 setCPUFeature(FEAT_RDM);
-  if (hwcap & HWCAP_ILRCPC)
-setCPUFeature(FEAT_RCPC2);
   if (hwcap & HWCAP_AES)
 setCPUFeature(FEAT_AES);
   if (hwcap & HWCAP_SHA1)
@@ -277,22 +279,21 @@ __init_cpu_features_constructor(unsigned long hwcap,
   if (hwcap & HWCAP_SB)
 setCPUFeature(FEAT_SB);
   if (hwcap & HWCAP_SSBS)
-setCPUFeature(FEAT_SSBS2);
-  if (hwcap2 & HWCAP2_MTE) {
-setCPUFeature(FEAT_MEMTAG);
-setCPUFeature(FEAT_MEMTAG2);
-  }
-  if (hwcap2 & HWCAP2_MTE3) {
-setCPUFeature(FEAT_MEMTAG);
-setCPUFeature(FEAT_MEMTAG2);
+{
+  setCPUFeature(FEAT_SSBS);
+  setCPUFeature(FEAT_SSBS2);
+}
+  if (hwcap2 & HWCAP2_MTE)
+{
+  setCPUFeature(FEAT_MEMTAG);
+  setCPUFeature(FEAT_MEMTAG2);
+}
+  if (hwcap2 & HWCAP2_MTE3)
 setCPUFeature(FEAT_MEMTAG3);
-  }
   if (hwcap2 & HWCAP2_SVEAES)
 setCPUFeature(FEAT_SVE_AES);
-  if (hwcap2 & HWCAP2_SVEPMULL) {
-setCPUFeature(FEAT_SVE_AES);
+  if (hwcap2 & HWCAP2_SVEPMULL)
 setCPUFeature(FEAT_SVE_PMULL128);
-  }
   if (hwcap2 & HWCAP2_SVEBITPERM)
 setCPUFeature(FEAT_SVE_BITPERM);
   if (hwcap2 & HWCAP2_SVESHA3)
@@ -329,108 +330,76 @@ __init_cpu_features_constructor(unsigned long hwcap,
 setCPUFeature(FEAT_WFXT);
   if (hwcap2 & HWCAP2_SME)
 setCPUFeature(FEAT_SME);
+  if (hwcap2 & HWCAP2_SME2)
+setCPUFeature(FEAT_SME2);
   if (hwcap2 & HWCAP2_SME_I16I64)
 setCPUFeature(FEAT_SME_I64);
   if (hwcap2 & HWCAP2_SME_F64F64)
 setCPUFeature(FEAT_SME_F64);
-  if (hwcap & HWCAP_CPUID) {
-unsigned long ftr;
-getCPUFeature(ID_AA64PFR1_EL1, ftr);
-/* ID_AA64PFR1_EL1.MTE >= 0b0001  */
-if (extractBits(ftr, 8, 4) >= 0x1)
-  setCPUFeature(FEAT_MEMTAG);
-/* ID_AA64PFR1_EL1.SSBS == 0b0001  */
-if (extractBits(ftr, 4, 4) == 0x1)
-  setCPUFeature(FEAT_SSBS);
-/* ID_AA64PFR1_EL1.SME == 0b0010  */
-if (extractBits(ftr, 24, 4) == 0x2)
-  setCPUFeature(FEAT_SME2);
-getCPUFeature(ID

[gcc r15-1038] AArch64: convert several predicate patterns to new compact syntax

2024-06-05 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:fd4898891ae0c73d6b7aa433cd1ef4539aaa2457

commit r15-1038-gfd4898891ae0c73d6b7aa433cd1ef4539aaa2457
Author: Tamar Christina 
Date:   Wed Jun 5 19:30:39 2024 +0100

AArch64: convert several predicate patterns to new compact syntax

This converts the single alternative patterns to the new compact syntax such
that when I add the new alternatives it's clearer what's being changed.

Note that this will spew out a bunch of warnings from geninsn as it'll warn 
that
@ is useless for a single alternative pattern.  These are not fatal so won't
break the build and are only temporary.

No change in functionality is expected with this patch.

gcc/ChangeLog:

* config/aarch64/aarch64-sve.md (and3,
@aarch64_pred__z, *3_cc,
*3_ptest, aarch64_pred__z,
*3_cc, *3_ptest,
aarch64_pred__z, *3_cc,
*3_ptest, *cmp_ptest,
@aarch64_pred_cmp_wide,
*aarch64_pred_cmp_wide_cc,
*aarch64_pred_cmp_wide_ptest, *aarch64_brk_cc,
*aarch64_brk_ptest, @aarch64_brk,
*aarch64_brk_cc, *aarch64_brk_ptest, 
aarch64_rdffr_z,
*aarch64_rdffr_z_ptest, *aarch64_rdffr_ptest, *aarch64_rdffr_z_cc,
*aarch64_rdffr_cc): Convert to compact syntax.
* config/aarch64/aarch64-sve2.md
(@aarch64_pred_): Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md  | 262 ++---
 gcc/config/aarch64/aarch64-sve2.md |  12 +-
 2 files changed, 161 insertions(+), 113 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 0434358122d..ca4d435e705 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1156,76 +1156,86 @@
 
 ;; Likewise with zero predication.
 (define_insn "aarch64_rdffr_z"
-  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+  [(set (match_operand:VNx16BI 0 "register_operand")
(and:VNx16BI
  (reg:VNx16BI FFRT_REGNUM)
- (match_operand:VNx16BI 1 "register_operand" "Upa")))]
+ (match_operand:VNx16BI 1 "register_operand")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  "rdffr\t%0.b, %1/z"
+  {@ [ cons: =0, 1   ]
+ [ Upa , Upa ] rdffr\t%0.b, %1/z
+  }
 )
 
 ;; Read the FFR to test for a fault, without using the predicate result.
 (define_insn "*aarch64_rdffr_z_ptest"
   [(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
- [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ [(match_operand:VNx16BI 1 "register_operand")
   (match_dup 1)
   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
   (and:VNx16BI
 (reg:VNx16BI FFRT_REGNUM)
 (match_dup 1))]
  UNSPEC_PTEST))
-   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+   (clobber (match_scratch:VNx16BI 0))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  "rdffrs\t%0.b, %1/z"
+  {@ [ cons: =0, 1   ]
+ [ Upa , Upa ] rdffrs\t%0.b, %1/z
+  }
 )
 
 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
 (define_insn "*aarch64_rdffr_ptest"
   [(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
- [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ [(match_operand:VNx16BI 1 "register_operand")
   (match_dup 1)
   (const_int SVE_KNOWN_PTRUE)
   (reg:VNx16BI FFRT_REGNUM)]
  UNSPEC_PTEST))
-   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
+   (clobber (match_scratch:VNx16BI 0))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  "rdffrs\t%0.b, %1/z"
+  {@ [ cons: =0, 1   ]
+ [ Upa , Upa ] rdffrs\t%0.b, %1/z
+  }
 )
 
 ;; Read the FFR with zero predication and test the result.
 (define_insn "*aarch64_rdffr_z_cc"
   [(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
- [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ [(match_operand:VNx16BI 1 "register_operand")
   (match_dup 1)
   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
   (and:VNx16BI
 (reg:VNx16BI FFRT_REGNUM)
 (match_dup 1))]
  UNSPEC_PTEST))
-   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+   (set (match_operand:VNx16BI 0 "register_operand")
(and:VNx16BI
  (reg:VNx16BI FFRT_REGNUM)
  (match_dup 1)))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  "rdffrs\t%0.b, %1/z"
+  {@ [ cons: =0, 1   ]
+ [ Upa , Upa ] rdffrs\t%0.b, %1/z
+  }
 )
 
 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
 (define_insn "*aarch64_rdffr_cc"
   [(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
- [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ [(match_operand:VNx16BI 1 "register_operand")
   (match_dup 1)
   (const_int SVE_KNOWN_PTRUE)
   (reg:VNx16BI FFRT_REGNUM)]
  UNSPEC_PTEST))
-   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+   (set (match_operand:VNx16BI 0 "registe

[gcc r15-1039] AArch64: add new tuning param and attribute for enabling conditional early clobber

2024-06-05 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:35f17c680ca650f8658994f857358e5a529c0b93

commit r15-1039-g35f17c680ca650f8658994f857358e5a529c0b93
Author: Tamar Christina 
Date:   Wed Jun 5 19:31:11 2024 +0100

AArch64: add new tuning param and attribute for enabling conditional early 
clobber

This adds a new tuning parameter AARCH64_EXTRA_TUNE_AVOID_PRED_RMW for 
AArch64 to
allow us to conditionally enable the early clobber alternatives based on the
tuning models.

gcc/ChangeLog:

* config/aarch64/aarch64-tuning-flags.def
(AVOID_PRED_RMW): New.
* config/aarch64/aarch64.h (TARGET_SVE_PRED_CLOBBER): New.
* config/aarch64/aarch64.md (pred_clobber): New.
(arch_enabled): Use it.

Diff:
---
 gcc/config/aarch64/aarch64-tuning-flags.def |  4 
 gcc/config/aarch64/aarch64.h|  5 +
 gcc/config/aarch64/aarch64.md   | 18 --
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def 
b/gcc/config/aarch64/aarch64-tuning-flags.def
index d5bcaebce77..a9f48f5d3d4 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -48,4 +48,8 @@ AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", 
AVOID_CROSS_LOOP_FMA)
 
 AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA)
 
+/* Enable is the target prefers to use a fresh register for predicate outputs
+   rather than re-use an input predicate register.  */
+AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index bbf11faaf4b..0997b82dbc0 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -495,6 +495,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = 
AARCH64_FL_SM_OFF;
 enabled through +gcs.  */
 #define TARGET_GCS (AARCH64_ISA_GCS)
 
+/* Prefer different predicate registers for the output of a predicated
+   operation over re-using an existing input predicate.  */
+#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \
+&& (aarch64_tune_params.extra_tuning_flags \
+& AARCH64_EXTRA_TUNE_AVOID_PRED_RMW))
 
 /* Standard register usage.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 9dff2d7a2b0..389a1906e23 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -445,6 +445,10 @@
 ;; target-independent code.
 (define_attr "is_call" "no,yes" (const_string "no"))
 
+;; Indicates whether we want to enable the pattern with an optional early
+;; clobber for SVE predicates.
+(define_attr "pred_clobber" "any,no,yes" (const_string "any"))
+
 ;; [For compatibility with Arm in pipeline models]
 ;; Attribute that specifies whether or not the instruction touches fp
 ;; registers.
@@ -460,7 +464,17 @@
 
 (define_attr "arch_enabled" "no,yes"
   (if_then_else
-(ior
+(and
+  (ior
+   (and
+ (eq_attr "pred_clobber" "no")
+ (match_test "!TARGET_SVE_PRED_CLOBBER"))
+   (and
+ (eq_attr "pred_clobber" "yes")
+ (match_test "TARGET_SVE_PRED_CLOBBER"))
+   (eq_attr "pred_clobber" "any"))
+
+  (ior
(eq_attr "arch" "any")
 
(and (eq_attr "arch" "rcpc8_4")
@@ -488,7 +502,7 @@
 (match_test "TARGET_SVE"))
 
(and (eq_attr "arch" "sme")
-(match_test "TARGET_SME")))
+(match_test "TARGET_SME"
 (const_string "yes")
 (const_string "no")))


[gcc r15-1040] AArch64: add new alternative with early clobber to patterns

2024-06-05 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:2de3bbde1ebea8689f3596967769f66bf903458e

commit r15-1040-g2de3bbde1ebea8689f3596967769f66bf903458e
Author: Tamar Christina 
Date:   Wed Jun 5 19:31:39 2024 +0100

AArch64: add new alternative with early clobber to patterns

This patch adds new alternatives to the patterns which are affected.  The 
new
alternatives with the conditional early clobbers are added before the normal
ones in order for LRA to prefer them in the event that we have enough free
registers to accommodate them.

In case register pressure is too high the normal alternatives will be 
preferred
before a reload is considered as we rather have the tie than a spill.

Tests are in the next patch.

gcc/ChangeLog:

* config/aarch64/aarch64-sve.md (and3,
@aarch64_pred__z, *3_cc,
*3_ptest, aarch64_pred__z,
*3_cc, *3_ptest,
aarch64_pred__z, *3_cc,
*3_ptest, @aarch64_pred_cmp,
*cmp_cc, *cmp_ptest,
@aarch64_pred_cmp_wide,
*aarch64_pred_cmp_wide_cc,
*aarch64_pred_cmp_wide_ptest, @aarch64_brk,
*aarch64_brk_cc, *aarch64_brk_ptest,
@aarch64_brk, *aarch64_brk_cc,
*aarch64_brk_ptest, aarch64_rdffr_z, *aarch64_rdffr_z_ptest,
*aarch64_rdffr_ptest, *aarch64_rdffr_z_cc, *aarch64_rdffr_cc): Add
new early clobber
alternative.
* config/aarch64/aarch64-sve2.md
(@aarch64_pred_): Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md  | 178 +
 gcc/config/aarch64/aarch64-sve2.md |   6 +-
 2 files changed, 124 insertions(+), 60 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index ca4d435e705..d902bce62fd 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1161,8 +1161,10 @@
  (reg:VNx16BI FFRT_REGNUM)
  (match_operand:VNx16BI 1 "register_operand")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {@ [ cons: =0, 1   ]
- [ Upa , Upa ] rdffr\t%0.b, %1/z
+  {@ [ cons: =0, 1   ; attrs: pred_clobber ]
+ [ &Upa, Upa ; yes ] rdffr\t%0.b, %1/z
+ [ ?Upa, 0Upa; yes ] ^
+ [ Upa , Upa ; no  ] ^
   }
 )
 
@@ -1179,8 +1181,10 @@
  UNSPEC_PTEST))
(clobber (match_scratch:VNx16BI 0))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {@ [ cons: =0, 1   ]
- [ Upa , Upa ] rdffrs\t%0.b, %1/z
+  {@ [ cons: =0, 1   ; attrs: pred_clobber ]
+ [ &Upa, Upa ; yes ] rdffrs\t%0.b, %1/z
+ [ ?Upa, 0Upa; yes ] ^
+ [ Upa , Upa ; no  ] ^
   }
 )
 
@@ -1195,8 +1199,10 @@
  UNSPEC_PTEST))
(clobber (match_scratch:VNx16BI 0))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {@ [ cons: =0, 1   ]
- [ Upa , Upa ] rdffrs\t%0.b, %1/z
+  {@ [ cons: =0, 1   ; attrs: pred_clobber ]
+ [ &Upa, Upa ; yes ] rdffrs\t%0.b, %1/z
+ [ ?Upa, 0Upa; yes ] ^
+ [ Upa , Upa ; no  ] ^
   }
 )
 
@@ -1216,8 +1222,10 @@
  (reg:VNx16BI FFRT_REGNUM)
  (match_dup 1)))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {@ [ cons: =0, 1   ]
- [ Upa , Upa ] rdffrs\t%0.b, %1/z
+  {@ [ cons: =0, 1   ; attrs: pred_clobber ]
+ [ &Upa, Upa ; yes ] rdffrs\t%0.b, %1/z
+ [ ?Upa, 0Upa; yes ] ^
+ [ Upa , Upa ; no  ] ^
   }
 )
 
@@ -1233,8 +1241,10 @@
(set (match_operand:VNx16BI 0 "register_operand")
(reg:VNx16BI FFRT_REGNUM))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {@ [ cons: =0, 1   ]
- [ Upa , Upa ] rdffrs\t%0.b, %1/z
+  {@ [ cons: =0, 1   ; attrs: pred_clobber ]
+ [ &Upa, Upa ; yes ] rdffrs\t%0.b, %1/z
+ [ ?Upa, 0Upa; yes ] ^
+ [ Upa , Upa ; no  ] ^
   }
 )
 
@@ -6651,8 +6661,10 @@
(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
  (match_operand:PRED_ALL 2 "register_operand")))]
   "TARGET_SVE"
-  {@ [ cons: =0, 1  , 2   ]
- [ Upa , Upa, Upa ] and\t%0.b, %1/z, %2.b, %2.b
+  {@ [ cons: =0, 1   , 2   ; attrs: pred_clobber ]
+ [ &Upa, Upa , Upa ; yes ] and\t%0.b, %1/z, %2.b, %2.b
+ [ ?Upa, 0Upa, 0Upa; yes ] ^
+ [ Upa , Upa , Upa ; no  ] ^
   }
 )
 
@@ -6679,8 +6691,10 @@
(match_operand:PRED_ALL 3 "register_operand"))
  (match_operand:PRED_ALL 1 "register_operand")))]
   "TARGET_SVE"
-  {@ [ cons: =0, 1  , 2  , 3   ]
- [ Upa , Upa, Upa, Upa ] \t%0.b, %1/z, %2.b, %3.b
+  {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
+ [ &Upa, Upa , Upa , Upa ; yes ] \t%0.b, 
%1/z, %2.b, %3.b
+ [ ?Upa, 0Upa, 0Upa, 0Upa; yes 

[gcc r15-1041] AArch64: enable new predicate tuning for Neoverse cores.

2024-06-05 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:3eb9f6eab9802d5ae65ead6b1f2ae6fe0833e06e

commit r15-1041-g3eb9f6eab9802d5ae65ead6b1f2ae6fe0833e06e
Author: Tamar Christina 
Date:   Wed Jun 5 19:32:16 2024 +0100

AArch64: enable new predicate tuning for Neoverse cores.

This enables the new tuning flag for Neoverse V1, Neoverse V2 and Neoverse 
N2.
It is kept off for generic codegen.

Note the reason for the +sve even though they are in aarch64-sve.exp is if 
the
testsuite is ran with a forced SVE off option, e.g. -march=armv8-a+nosve 
then
the intrinsics end up being disabled because the -march is preferred over 
the
-mcpu even though the -mcpu comes later.

This prevents the tests from failing in such runs.

gcc/ChangeLog:

* config/aarch64/tuning_models/neoversen2.h (neoversen2_tunings): 
Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
* config/aarch64/tuning_models/neoversev1.h (neoversev1_tunings): 
Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
* config/aarch64/tuning_models/neoversev2.h (neoversev2_tunings): 
Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/pred_clobber_1.c: New test.
* gcc.target/aarch64/sve/pred_clobber_2.c: New test.
* gcc.target/aarch64/sve/pred_clobber_3.c: New test.
* gcc.target/aarch64/sve/pred_clobber_4.c: New test.

Diff:
---
 gcc/config/aarch64/tuning_models/neoversen2.h  |  3 ++-
 gcc/config/aarch64/tuning_models/neoversev1.h  |  3 ++-
 gcc/config/aarch64/tuning_models/neoversev2.h  |  3 ++-
 .../gcc.target/aarch64/sve/pred_clobber_1.c| 22 +
 .../gcc.target/aarch64/sve/pred_clobber_2.c| 22 +
 .../gcc.target/aarch64/sve/pred_clobber_3.c| 23 ++
 .../gcc.target/aarch64/sve/pred_clobber_4.c| 22 +
 7 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h 
b/gcc/config/aarch64/tuning_models/neoversen2.h
index 7e799bbe762..be9a48ac3ad 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
   (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
-   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),   /* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS   /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h 
b/gcc/config/aarch64/tuning_models/neoversev1.h
index 9363f2ad98a..0fc41ce6a41 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
   (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),   /* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),   /* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS/* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h 
b/gcc/config/aarch64/tuning_models/neoversev2.h
index bc01ed767c9..f76e4ef358f 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
   (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
-   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),   /* tune_flags.  */
   &generic_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS   /* stp_policy_model.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644
index 000..25129e8d6f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include 
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue   p([1-3]).b, all
+** cmplo   p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...

[gcc r15-1042] RISC-V: Introduce -mvector-strict-align.

2024-06-05 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:68b0742a49de7122d5023f0bf46460ff2fb3e3dd

commit r15-1042-g68b0742a49de7122d5023f0bf46460ff2fb3e3dd
Author: Robin Dapp 
Date:   Tue May 28 21:19:26 2024 +0200

RISC-V: Introduce -mvector-strict-align.

this patch disables movmisalign by default and introduces
the -mno-vector-strict-align option to override it and re-enable
movmisalign.  For now, generic-ooo is the only uarch that supports
misaligned vector access.

The patch also adds a check_effective_target_riscv_v_misalign_ok to
the testsuite which enables or disables the vector misalignment tests
depending on whether the target under test can execute a misaligned
vle32.

Changes from v3:
 - Adressed Kito's comments.
 - Made -mscalar-strict-align a real alias.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
Move from here...
* config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
...to here and map to riscv_vector_unaligned_access_p.
* config/riscv/riscv.opt: Add -mvector-strict-align.
* config/riscv/riscv.cc (struct riscv_tune_param): Add
vector_unaligned_access.
(riscv_override_options_internal): Set
riscv_vector_unaligned_access_p.
* doc/invoke.texi: Document -mvector-strict-align.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add
check_effective_target_riscv_v_misalign_ok.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add
-mno-vector-strict-align.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto.

Diff:
---
 gcc/config/riscv/riscv-opts.h  |  3 --
 gcc/config/riscv/riscv.cc  | 19 
 gcc/config/riscv/riscv.h   |  5 
 gcc/config/riscv/riscv.opt |  8 +
 gcc/doc/invoke.texi| 17 +++
 .../vect/costmodel/riscv/rvv/dynamic-lmul2-7.c |  2 +-
 .../vect/costmodel/riscv/rvv/vla_vs_vls-10.c   |  2 +-
 .../vect/costmodel/riscv/rvv/vla_vs_vls-11.c   |  2 +-
 .../vect/costmodel/riscv/rvv/vla_vs_vls-12.c   |  2 +-
 .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c |  2 +-
 .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c |  2 +-
 .../gcc.target/riscv/rvv/autovec/vls/misalign-1.c  |  2 +-
 gcc/testsuite/lib/target-supports.exp  | 35 --
 13 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 1b2dd5757a8..f58a07abffc 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -147,9 +147,6 @@ enum rvv_vector_bits_enum {
  ? 0   
\
  : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
 
-/* TODO: Enable RVV movmisalign by default for now.  */
-#define TARGET_VECTOR_MISALIGN_SUPPORTED 1
-
 /* The maximmum LMUL according to user configuration.  */
 #define TARGET_MAX_LMUL
\
   (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index c5c4c777349..9704ff9c6a0 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -288,6 +288,7 @@ struct riscv_tune_param
   unsigned short memory_cost;
   unsigned short fmv_cost;
   bool slow_unaligned_access;
+  bool vector_unaligned_access;
   bool use_divmod_expansion;
   bool overlap_op_by_pieces;
   unsigned int fusible_ops;
@@ -300,6 +301,10 @@ struct riscv_tune_param
 /* Whether unaligned accesses execute very slowly.  */
 bool riscv_slow_unaligned_access_p;
 
+/* Whether misaligned vector accesses are supported (i.e. do not
+   throw an exception).  */
+bool riscv_vector_unaligned_access_p;
+
 /* Whether user explicitly passed -mstrict-align.  */
 bool riscv_user_wants_strict_align;
 
@@ -442,6 +447,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   5,   /* memory_cost */
   8,   /* fmv_cost */
   true,/* 
slow_unaligned_access */
+  false,   /* vector_unaligned_access */
   false,   /* use_divmod_expansion */
   false,   /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,  

[gcc r15-1043] check_GNU_style: Use raw strings.

2024-06-05 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:03e1a7270314800eb33632f778401570e65345bd

commit r15-1043-g03e1a7270314800eb33632f778401570e65345bd
Author: Robin Dapp 
Date:   Mon May 13 22:05:57 2024 +0200

check_GNU_style: Use raw strings.

This silences some warnings when using check_GNU_style.

contrib/ChangeLog:

* check_GNU_style_lib.py: Use raw strings for regexps.

Diff:
---
 contrib/check_GNU_style_lib.py | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/contrib/check_GNU_style_lib.py b/contrib/check_GNU_style_lib.py
index f1a120fa8d3..6dbe4b53559 100755
--- a/contrib/check_GNU_style_lib.py
+++ b/contrib/check_GNU_style_lib.py
@@ -103,7 +103,7 @@ class SpacesCheck:
 
 class SpacesAndTabsMixedCheck:
 def __init__(self):
-self.re = re.compile('\ \t')
+self.re = re.compile(r'\ \t')
 
 def check(self, filename, lineno, line):
 stripped = line.lstrip()
@@ -115,7 +115,7 @@ class SpacesAndTabsMixedCheck:
 
 class TrailingWhitespaceCheck:
 def __init__(self):
-self.re = re.compile('(\s+)$')
+self.re = re.compile(r'(\s+)$')
 
 def check(self, filename, lineno, line):
 assert(len(line) == 0 or line[-1] != '\n')
@@ -128,7 +128,7 @@ class TrailingWhitespaceCheck:
 
 class SentenceSeparatorCheck:
 def __init__(self):
-self.re = re.compile('\w\.(\s|\s{3,})\w')
+self.re = re.compile(r'\w\.(\s|\s{3,})\w')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -140,7 +140,7 @@ class SentenceSeparatorCheck:
 
 class SentenceEndOfCommentCheck:
 def __init__(self):
-self.re = re.compile('\w\.(\s{0,1}|\s{3,})\*/')
+self.re = re.compile(r'\w\.(\s{0,1}|\s{3,})\*/')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -152,7 +152,7 @@ class SentenceEndOfCommentCheck:
 
 class SentenceDotEndCheck:
 def __init__(self):
-self.re = re.compile('\w(\s*\*/)')
+self.re = re.compile(r'\w(\s*\*/)')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -164,7 +164,7 @@ class SentenceDotEndCheck:
 class FunctionParenthesisCheck:
 # TODO: filter out GTY stuff
 def __init__(self):
-self.re = re.compile('\w(\s{2,})?(\()')
+self.re = re.compile(r'\w(\s{2,})?(\()')
 
 def check(self, filename, lineno, line):
 if '#define' in line:
@@ -179,7 +179,7 @@ class FunctionParenthesisCheck:
 
 class SquareBracketCheck:
 def __init__(self):
-self.re = re.compile('\w\s+(\[)')
+self.re = re.compile(r'\w\s+(\[)')
 
 def check(self, filename, lineno, line):
 if filename.endswith('.md'):
@@ -194,7 +194,7 @@ class SquareBracketCheck:
 
 class ClosingParenthesisCheck:
 def __init__(self):
-self.re = re.compile('\S\s+(\))')
+self.re = re.compile(r'\S\s+(\))')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -208,7 +208,7 @@ class BracesOnSeparateLineCheck:
 # This will give false positives for C99 compound literals.
 
 def __init__(self):
-self.re = re.compile('(\)|else)\s*({)')
+self.re = re.compile(r'(\)|else)\s*({)')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -219,7 +219,7 @@ class BracesOnSeparateLineCheck:
 
 class TrailinigOperatorCheck:
 def __init__(self):
-regex = '^\s.*(([^a-zA-Z_]\*)|([-%<=&|^?])|([^*]/)|([^:][+]))$'
+regex = r'^\s.*(([^a-zA-Z_]\*)|([-%<=&|^?])|([^*]/)|([^:][+]))$'
 self.re = re.compile(regex)
 
 def check(self, filename, lineno, line):


[gcc r15-1044] contrib: header-tools scripts updated to python3

2024-06-05 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:ac6fb0ff7021f1e263034d3468615fd33fe16c96

commit r15-1044-gac6fb0ff7021f1e263034d3468615fd33fe16c96
Author: Sundeep KOKKONDA 
Date:   Fri Mar 29 03:22:11 2024 -0700

contrib: header-tools scripts updated to python3

The scripts in contrib/header-tools/ are incompatible with python3.
This updates them to use python3.

contrib/header-tools/ChangeLog:

* count-headers: Adapt to Python 3.
* gcc-order-headers: Likewise.
* graph-header-logs: Likewise.
* graph-include-web: Likewise.
* headerutils.py: Likewise.
* included-by: Likewise.
* reduce-headers: Likewise.
* replace-header: Likewise.
* show-headers: Likewise.

Signed-off-by: Sundeep KOKKONDA 

Diff:
---
 contrib/header-tools/count-headers |   8 +--
 contrib/header-tools/gcc-order-headers |  64 ++---
 contrib/header-tools/graph-header-logs |  42 +++---
 contrib/header-tools/graph-include-web |  26 -
 contrib/header-tools/headerutils.py|  36 ++--
 contrib/header-tools/included-by   |  32 +--
 contrib/header-tools/reduce-headers| 102 -
 contrib/header-tools/replace-header|  12 ++--
 contrib/header-tools/show-headers  |  32 +--
 9 files changed, 177 insertions(+), 177 deletions(-)

diff --git a/contrib/header-tools/count-headers 
b/contrib/header-tools/count-headers
index 7a92596a602..f1eb08da317 100755
--- a/contrib/header-tools/count-headers
+++ b/contrib/header-tools/count-headers
@@ -1,4 +1,4 @@
-#! /usr/bin/python2
+#! /usr/bin/python3
 import os.path
 import sys
 import shlex
@@ -45,11 +45,11 @@ if not usage and len (src) > 0:
   l.sort (key=lambda tup:tup[0], reverse=True)
 
   for f in l:
-print str (f[0]) + " : " + f[1]
+print (str (f[0]) + " : " + f[1])
 
 else:
-  print "count-headers file1 [filen]"
-  print "Count the number of occurrences of all includes across all listed 
files"
+  print ("count-headers file1 [filen]")
+  print ("Count the number of occurrences of all includes across all listed 
files")
 
  
 
diff --git a/contrib/header-tools/gcc-order-headers 
b/contrib/header-tools/gcc-order-headers
index ee76cba4b18..68bcc93f609 100755
--- a/contrib/header-tools/gcc-order-headers
+++ b/contrib/header-tools/gcc-order-headers
@@ -1,11 +1,11 @@
-#! /usr/bin/python2
+#! /usr/bin/python3
 import os
 import sys
 import shlex
 import re
 
 from headerutils import *
-import Queue
+import queue
 
 file_list = list ()
 usage = False
@@ -77,19 +77,19 @@ def create_master_list (fn, verbose):
 if fn != "diagnostic.h" and fn != "diagnostic-core.h":
   master_list.append (fn)
   if (verbose):
-print fn + "  included by: " + includes[fn][0]
+print (fn + "  included by: " + includes[fn][0])
 
 
 
 def print_dups ():
   if dups:
-print "\nduplicated includes"
+print ("\nduplicated includes")
   for i in dups:
 string =  "dup : " + i + " : "
 string += includes[i][0] 
 for i2 in dups[i]:
   string += ", "+i2
-print string
+print (string)
 
 
 def process_known_dups ():
@@ -230,11 +230,11 @@ for arg in sys.argv[1:]:
 elif arg[0:2] == "-v":
   show_master = True
 else:
-  print "Error: unrecognized option " + arg
+  print ("Error: unrecognized option " + arg)
   elif os.path.exists(arg):
 file_list.append (arg)
   else:
-print "Error: file " + arg + " Does not exist."
+print ("Error: file " + arg + " Does not exist.")
 usage = True
 
 if not file_list and not show_master:
@@ -242,7 +242,7 @@ if not file_list and not show_master:
 
 if not usage and not os.path.exists ("coretypes.h"):
   usage = True
-  print "Error: Must run command in main gcc source directory containing 
coretypes.h\n"
+  print ("Error: Must run command in main gcc source directory containing 
coretypes.h\n")
 
 # process diagnostic.h first.. it's special since GCC_DIAG_STYLE can be
 # overridden by languages, but must be done so by a file included BEFORE it.
@@ -268,20 +268,20 @@ process_known_dups ()
 desired_order = master_list
 
 if show_master:
-  print " Canonical order of gcc include files: "
+  print (" Canonical order of gcc include files: ")
   for x in master_list:
-print x
-  print " "
+print (x)
+  print (" ")
 
 if usage:
-  print "gcc-order-headers [-i] [-v] file1 [filen]"
-  print "Ensures gcc's headers files are included in a normalized form 
with"
-  print "redundant headers removed.  The original files are saved in 
filename.bak"
-  print "Outputs a list of files which changed."
-  print " -i ignore conditional compilation."
-  print "Use after examining the file to be sure includes within #ifs are 
safe"
-  print "Any headers within conditional sections will be ignored."
-  print " -v Show the canonical order of known headers"
+  print ("gcc-order-headers [-i] [-v] 

[gcc r15-1045] contrib: Fix spelling and capitalization in header-tools

2024-06-05 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:66fa2f1037207f7565cfa824a2894cd702067d8c

commit r15-1045-g66fa2f1037207f7565cfa824a2894cd702067d8c
Author: Jonathan Wakely 
Date:   Wed Jun 5 20:46:19 2024 +0100

contrib: Fix spelling and capitalization in header-tools

contrib/header-tools/ChangeLog:

* README: Fix spelling and capitalization typos.
* gcc-order-headers: Fix spelling typo.

Diff:
---
 contrib/header-tools/README| 24 
 contrib/header-tools/gcc-order-headers |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/contrib/header-tools/README b/contrib/header-tools/README
index 48381fe0ba4..906572e5059 100644
--- a/contrib/header-tools/README
+++ b/contrib/header-tools/README
@@ -1,8 +1,8 @@
 Quick start documentation for the header file utilities.  
 
-This isn't a full breakdown of the tools, just they typical use scenarios.
+This isn't a full breakdown of the tools, just the typical use scenarios.
 
-- Each tool accepts -h to show it's usage.  Usually no parameters will also
+- Each tool accepts -h to show its usage.  Usually no parameters will also
 trigger the help message.  Help may specify additional functionality to what is
 listed here.
 
@@ -37,7 +37,7 @@ gcc-order-headers
   command line.
 
   Any files which are changed are output, and the original is saved with a
-  .bak extention.
+  .bak extension.
 
   ex.: gcc-order-headers tree-ssa.cc c/c-decl.cc
 
@@ -45,7 +45,7 @@ gcc-order-headers
   show which of those headers include other headers, just the final canonical
   ordering.
 
-  if any header files are included within a conditional code block, the tool
+  If any header files are included within a conditional code block, the tool
   will issue a message and not change the file.  When this happens, you can
   manually inspect the file to determine if reordering it is actually OK.  Then
   rerun the command with the -i option.  This will ignore the conditional error
@@ -162,11 +162,11 @@ reduce-headers
   a native build and sometimes target builds, depending on what you are trying
   to reduce.
 
-  it is good practice to run 'gcc-order-headers' on a source file before trying
+  It is good practice to run 'gcc-order-headers' on a source file before trying
   to reduce it.  This removes duplicates and performs some simplifications 
   which reduce the chances of the reduction tool missing things.
   
-  start with a completely bootstrapped native compiler.
+  Start with a completely bootstrapped native compiler.
 
   Any desired target builds should be built in one directory using a modified
   config-list.mk file which does not delete the build directory when it is 
done.
@@ -198,7 +198,7 @@ reduce-headers
 
   A small subset of targets has been determined to provide excellent coverage,
   at least as of Aug 31/15 .  They were found by reducing all the files
-  contained in libbackend.a oer a full set of targets(207).  All conditions
+  contained in libbackend.a over a full set of targets(207).  All conditions
   which disallowed removal of a header file were triggered by one or more of
   these targets.  They are also known to the tool.  When building targets it
   will check those targets before the rest.  
@@ -223,7 +223,7 @@ reduce-headers
#  This will attempt to remove all header files from tree-ssa-live.cc
   
 
-  the tool will generate a number of log files:
+  The tool will generate a number of log files:
 
 reduce-headers.log : All compilation failures from attempted reductions.
 reduce-headers.sum : One line summary of what happened to each source file.
@@ -234,13 +234,13 @@ reduce-headers
 
 reduce-headers-kept.log: List of all the successful compiles that were
  ignored because of conditional macro dependencies
-and why it thinks that is the case
-$src.c.log  : for each failed header removal, the compilation
+and why it thinks that is the case.
+$src.c.log  : For each failed header removal, the compilation
  messages as to why it failed.
 $header.h.log: The same log is put into the relevant header log as well.
 
 
-a sample output from ira.cc.log:
+A sample output from ira.cc.log:
 
 Compilation failed:
  for shrink-wrap.h:
@@ -253,7 +253,7 @@ Compilation failed:
make: *** [ira.o] Error 1
 
 
-the same message would be put into shrink-wrap.h.log.
+The same message would be put into shrink-wrap.h.log.
 
 
 
diff --git a/contrib/header-tools/gcc-order-headers 
b/contrib/header-tools/gcc-order-headers
index 68bcc93f609..87fdddf60ee 100755
--- a/contrib/header-tools/gcc-order-headers
+++ b/contrib/header-tools/gcc-order-headers
@@ -389,7 +389,7 @@ if didnt_do:
   print ("Safeness is determined by checking whether any of the reordered 
headers are")
   print ("within a conditional and could be hauled out of the conditional, 
thus changing")
   print ("what the

[gcc r15-1047] Simplify (AND (ASHIFTRT A imm) mask) to (LSHIFTRT A imm) for vector mode.

2024-06-05 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:7876cde25cbd2f026a0ae488e5263e72f8e9bfa0

commit r15-1047-g7876cde25cbd2f026a0ae488e5263e72f8e9bfa0
Author: liuhongt 
Date:   Fri Apr 19 10:29:34 2024 +0800

Simplify (AND (ASHIFTRT A imm) mask) to (LSHIFTRT A imm) for vector mode.

When mask is (1 << (prec - imm) - 1) which is used to clear upper bits
of A, then it can be simplified to LSHIFTRT.

i.e Simplify
(and:v8hi
  (ashifrt:v8hi A 8)
  (const_vector 0xff x8))
to
(lshifrt:v8hi A 8)

gcc/ChangeLog:

PR target/114428
* simplify-rtx.cc
(simplify_context::simplify_binary_operation_1):
Simplify (AND (ASHIFTRT A imm) mask) to (LSHIFTRT A imm) for
specific mask.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr114428-1.c: New test.

Diff:
---
 gcc/simplify-rtx.cc| 25 +++
 gcc/testsuite/gcc.target/i386/pr114428-1.c | 39 ++
 2 files changed, 64 insertions(+)

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index f6b4d73b593..9bc3ef9ad9f 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -4065,6 +4065,31 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
return tem;
}
 
+  /* (and:v4si
+  (ashiftrt:v4si A 16)
+  (const_vector: 0x x4))
+is just (lshiftrt:v4si A 16).  */
+  if (VECTOR_MODE_P (mode) && GET_CODE (op0) == ASHIFTRT
+ && (CONST_INT_P (XEXP (op0, 1))
+ || (GET_CODE (XEXP (op0, 1)) == CONST_VECTOR
+ && CONST_VECTOR_DUPLICATE_P (XEXP (op0, 1
+ && GET_CODE (op1) == CONST_VECTOR
+ && CONST_VECTOR_DUPLICATE_P (op1))
+   {
+ unsigned HOST_WIDE_INT shift_count
+   = (CONST_INT_P (XEXP (op0, 1))
+  ? UINTVAL (XEXP (op0, 1))
+  : UINTVAL (XVECEXP (XEXP (op0, 1), 0, 0)));
+ unsigned HOST_WIDE_INT inner_prec
+   = GET_MODE_PRECISION (GET_MODE_INNER (mode));
+
+ /* Avoid UD shift count.  */
+ if (shift_count < inner_prec
+ && (UINTVAL (XVECEXP (op1, 0, 0))
+ == (HOST_WIDE_INT_1U << (inner_prec - shift_count)) - 1))
+   return simplify_gen_binary (LSHIFTRT, mode, XEXP (op0, 0), XEXP 
(op0, 1));
+   }
+
   tem = simplify_byte_swapping_operation (code, mode, op0, op1);
   if (tem)
return tem;
diff --git a/gcc/testsuite/gcc.target/i386/pr114428-1.c 
b/gcc/testsuite/gcc.target/i386/pr114428-1.c
new file mode 100644
index 000..927476f2269
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114428-1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-final { scan-assembler-times "psrlw" 1 } } */
+/* { dg-final { scan-assembler-times "psrld" 1 } } */
+/* { dg-final { scan-assembler-times "psrlq" 1 { target { ! ia32 } } } } */
+
+
+#define SHIFTC 12
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef long long v2di __attribute__((vector_size(16)));
+
+v8hi
+foo1 (v8hi a)
+{
+  return
+(a >> (16 - SHIFTC)) & (__extension__(v8hi){(1<> (32 - SHIFTC)) & (__extension__(v4si){(1<> (long long)(64 - SHIFTC)) & (__extension__(v2di){(1ULL<

[gcc r15-1048] Adjust rtx_cost for MEM to enable more simplication

2024-06-05 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:961dd0d635217c703a38c48903981e0d60962546

commit r15-1048-g961dd0d635217c703a38c48903981e0d60962546
Author: liuhongt 
Date:   Fri Apr 19 10:39:53 2024 +0800

Adjust rtx_cost for MEM to enable more simplication

For CONST_VECTOR_DUPLICATE_P in constant_pool, it is just broadcast or
variants in ix86_vector_duplicate_simode_const.
Adjust the cost to COSTS_N_INSNS (2) + speed which should be a little
bit larger than broadcast.

gcc/ChangeLog:
PR target/114428
* config/i386/i386.cc (ix86_rtx_costs): Adjust cost for
CONST_VECTOR_DUPLICATE_P in constant_pool.
* config/i386/i386-expand.cc (ix86_broadcast_from_constant):
Remove static.
* config/i386/i386-protos.h (ix86_broadcast_from_constant):
Declare.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr114428.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.cc   |  2 +-
 gcc/config/i386/i386-protos.h|  1 +
 gcc/config/i386/i386.cc  | 13 +
 gcc/testsuite/gcc.target/i386/pr114428.c | 18 ++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 215a998fc26..56d29c15f9a 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -588,7 +588,7 @@ ix86_expand_move (machine_mode mode, rtx operands[])
 
 /* OP is a memref of CONST_VECTOR, return scalar constant mem
if CONST_VECTOR is a vec_duplicate, else return NULL.  */
-static rtx
+rtx
 ix86_broadcast_from_constant (machine_mode mode, rtx op)
 {
   int nunits = GET_MODE_NUNITS (mode);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index dbc861fb1ea..90712769200 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -107,6 +107,7 @@ extern void ix86_expand_clear (rtx);
 extern void ix86_expand_move (machine_mode, rtx[]);
 extern void ix86_expand_vector_move (machine_mode, rtx[]);
 extern void ix86_expand_vector_move_misalign (machine_mode, rtx[]);
+extern rtx ix86_broadcast_from_constant (machine_mode, rtx);
 extern rtx ix86_fixup_binary_operands (enum rtx_code, machine_mode,
   rtx[], bool = false);
 extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, machine_mode,
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 271da127a89..a9d62c84c52 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22191,6 +22191,19 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
   return true;
 
 case MEM:
+  /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
+or variants in ix86_vector_duplicate_simode_const.  */
+
+  if (GET_MODE_SIZE (mode) >= 16
+ && VECTOR_MODE_P (mode)
+ && SYMBOL_REF_P (XEXP (x, 0))
+ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
+ && ix86_broadcast_from_constant (mode, x))
+   {
+ *total = COSTS_N_INSNS (2) + speed;
+ return true;
+   }
+
   /* An insn that accesses memory is slightly more expensive
  than one that does not.  */
   if (speed)
diff --git a/gcc/testsuite/gcc.target/i386/pr114428.c 
b/gcc/testsuite/gcc.target/i386/pr114428.c
new file mode 100644
index 000..bbbc5a080f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114428.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v3 -mno-avx512f -O2" } */
+/* { dg-final { scan-assembler-not "vpsra[dw]" } } */
+
+void
+foo2 (char* __restrict a, short* b)
+{
+  for (int i = 0; i != 32; i++)
+a[i] = b[i] >> (short)8;
+}
+
+void
+foo3 (char* __restrict a, short* b)
+{
+  for (int i = 0; i != 16; i++)
+a[i] = b[i] >> (short)8;
+}
+


[gcc r15-1049] [libstdc++] add _GLIBCXX_CLANG to workaround predefined __clang__

2024-06-05 Thread Alexandre Oliva via Libstdc++-cvs
https://gcc.gnu.org/g:67be156f952241ad99af1eedc4da91bc2e201cd3

commit r15-1049-g67be156f952241ad99af1eedc4da91bc2e201cd3
Author: Alexandre Oliva 
Date:   Wed Jun 5 22:43:54 2024 -0300

[libstdc++] add _GLIBCXX_CLANG to workaround predefined __clang__

A proprietary embedded operating system that uses clang as its primary
compiler ships headers that require __clang__ to be defined.  Defining
that macro causes libstdc++ to adopt workarounds that work for clang
but that break for GCC.

So, introduce a _GLIBCXX_CLANG macro, and a convention to test for it
rather than for __clang__, so that a GCC variant that adds -D__clang__
to satisfy system headers can also -D_GLIBCXX_CLANG=0 to avoid
workarounds that are not meant for GCC.

I've left fast_float and ryu files alone, their tests for __clang__
don't seem to be harmful for GCC, they don't include bits/c++config,
and patching such third-party files would just make trouble for
updating them without visible benefit.  pstl_config.h, though also
imported, required adjustment.


for  libstdc++-v3/ChangeLog

* include/bits/c++config (_GLIBCXX_CLANG): Define or undefine.
* include/bits/locale_facets_nonio.tcc: Test for it.
* include/bits/stl_bvector.h: Likewise.
* include/c_compatibility/stdatomic.h: Likewise.
* include/experimental/bits/simd.h: Likewise.
* include/experimental/bits/simd_builtin.h: Likewise.
* include/experimental/bits/simd_detail.h: Likewise.
* include/experimental/bits/simd_x86.h: Likewise.
* include/experimental/simd: Likewise.
* include/std/complex: Likewise.
* include/std/ranges: Likewise.
* include/std/variant: Likewise.
* include/pstl/pstl_config.h: Likewise.

Diff:
---
 libstdc++-v3/include/bits/c++config   | 13 -
 libstdc++-v3/include/bits/locale_facets_nonio.tcc |  2 +-
 libstdc++-v3/include/bits/stl_bvector.h   |  2 +-
 libstdc++-v3/include/c_compatibility/stdatomic.h  |  2 +-
 libstdc++-v3/include/experimental/bits/simd.h | 10 +-
 libstdc++-v3/include/experimental/bits/simd_builtin.h |  4 ++--
 libstdc++-v3/include/experimental/bits/simd_detail.h  |  8 
 libstdc++-v3/include/experimental/bits/simd_x86.h | 12 ++--
 libstdc++-v3/include/experimental/simd|  2 +-
 libstdc++-v3/include/pstl/pstl_config.h   |  4 ++--
 libstdc++-v3/include/std/complex  |  4 ++--
 libstdc++-v3/include/std/ranges   |  8 
 libstdc++-v3/include/std/variant  |  2 +-
 13 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/libstdc++-v3/include/bits/c++config 
b/libstdc++-v3/include/bits/c++config
index b57e3f338e9..6dca2d9467a 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -481,9 +481,20 @@ _GLIBCXX_END_NAMESPACE_VERSION
 // Define if compatibility should be provided for -mlong-double-64.
 #undef _GLIBCXX_LONG_DOUBLE_COMPAT
 
+// Use an alternate macro to test for clang, so as to provide an easy
+// workaround for systems (such as vxworks) whose headers require
+// __clang__ to be defined, even when compiling with GCC.
+#if !defined _GLIBCXX_CLANG && defined __clang__
+# define _GLIBCXX_CLANG __clang__
+// Turn -D_GLIBCXX_CLANG=0 into -U_GLIBCXX_CLANG, so that
+// _GLIBCXX_CLANG can be tested as defined, just like __clang__.
+#elif !_GLIBCXX_CLANG
+# undef _GLIBCXX_CLANG
+#endif
+
 // Define if compatibility should be provided for alternative 128-bit long
 // double formats. Not possible for Clang until __ibm128 is supported.
-#ifndef __clang__
+#ifndef _GLIBCXX_CLANG
 #undef _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT
 #endif
 
diff --git a/libstdc++-v3/include/bits/locale_facets_nonio.tcc 
b/libstdc++-v3/include/bits/locale_facets_nonio.tcc
index 8f67be5a614..72136f42f08 100644
--- a/libstdc++-v3/include/bits/locale_facets_nonio.tcc
+++ b/libstdc++-v3/include/bits/locale_facets_nonio.tcc
@@ -1465,7 +1465,7 @@ _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11
   ctype<_CharT> const& __ctype = use_facet >(__loc);
   __err = ios_base::goodbit;
   bool __use_state = false;
-#if __GNUC__ >= 5 && !defined(__clang__)
+#if __GNUC__ >= 5 && !defined(_GLIBCXX_CLANG)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpmf-conversions"
   // Nasty hack.  The C++ standard mandates that get invokes the do_get
diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index d567e26f4e4..52153cadf8f 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -185,7 +185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 void
 _M_assume_normalized() const
 {
-#if __has_attribute(__assume__) && !defined(__clang__)
+#if __has_attrib

[gcc r15-1050] Refine testcase for power10.

2024-06-05 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:fcfce55c85f842ed843cbc4aabe744c6a004dead

commit r15-1050-gfcfce55c85f842ed843cbc4aabe744c6a004dead
Author: liuhongt 
Date:   Thu Jun 6 11:27:53 2024 +0800

Refine testcase for power10.

For power10, there're extra 3 REG_EQUIV notes with (fix:SI. to avoid
the failure. Check (fix:SI is from the pattern not NOTE.

gcc/testsuite/ChangeLog:

PR target/115365
* gcc.dg/pr100927.c: Don't scan fix:SI from the note.

Diff:
---
 gcc/testsuite/gcc.dg/pr100927.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr100927.c b/gcc/testsuite/gcc.dg/pr100927.c
index ea0e627befa..8a7d69c3831 100644
--- a/gcc/testsuite/gcc.dg/pr100927.c
+++ b/gcc/testsuite/gcc.dg/pr100927.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -ftrapping-math -fdump-tree-optimized -fdump-rtl-final" } 
*/
 /* { dg-final { scan-tree-dump-times {(?n)= \(int\)} 3 "optimized" } }  */
-/* { dg-final { scan-rtl-dump-times {(?n)\(fix:SI} 3 "final" } }  */
+/* { dg-final { scan-rtl-dump-times {(?n)^[ \t]*\(fix:SI} 3 "final" } }  */
 
 int
 foo_ofr ()


[gcc r15-1051] MIPS: Need COSTS_N_INSNS in mips_insn_cost

2024-06-05 Thread YunQiang Su via Gcc-cvs
https://gcc.gnu.org/g:edd90d6d298f006e2c2e6c710ab97cd5ad733cb5

commit r15-1051-gedd90d6d298f006e2c2e6c710ab97cd5ad733cb5
Author: YunQiang Su 
Date:   Thu Jun 6 12:28:31 2024 +0800

MIPS: Need COSTS_N_INSNS in mips_insn_cost

In mips_insn_cost, COSTS_N_INSNS is missing when we return the cost
if count * ratio > 0.

gcc
* config/mips/mips.cc(mips_insn_cost): Add missing COSTS_N_INSNS
to count.

Diff:
---
 gcc/config/mips/mips.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index b478cddc8ad..278d9446482 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -4199,7 +4199,7 @@ mips_insn_cost (rtx_insn *x, bool speed)
 
   count = get_attr_insn_count (x);
   ratio = get_attr_perf_ratio (x);
-  cost = count * ratio;
+  cost = COSTS_N_INSNS (count) * ratio;
   if (cost > 0)
 return cost;


[gcc r15-1052] libgomp: Mark Loop transformation constructs as implemented in the implementation status

2024-06-05 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:6a6bab4ba36c5d190b3151055e683e7067be92c1

commit r15-1052-g6a6bab4ba36c5d190b3151055e683e7067be92c1
Author: Jakub Jelinek 
Date:   Thu Jun 6 08:30:42 2024 +0200

libgomp: Mark Loop transformation constructs as implemented in the 
implementation status

The implementation has been committed in r15-1037.

2024-06-06  Jakub Jelinek  

* libgomp.texi (OpenMP 5.1 status): Mark Loop transformation 
constructs
as implemented.

Diff:
---
 libgomp/libgomp.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index d612488ad10..c52bb2672c6 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -302,7 +302,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{error} directive @tab Y @tab
 @item @code{masked} construct @tab Y @tab
 @item @code{scope} directive @tab Y @tab
-@item Loop transformation constructs @tab N @tab
+@item Loop transformation constructs @tab Y @tab
 @item @code{strict} modifier in the @code{grainsize} and @code{num_tasks}
   clauses of the @code{taskloop} construct @tab Y @tab
 @item @code{align} clause in @code{allocate} directive @tab P


[gcc r15-1053] Relax COND_EXPR reduction vectorization SLP restriction

2024-06-05 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:28edeb1409a7b839407ec06031899b933390bff3

commit r15-1053-g28edeb1409a7b839407ec06031899b933390bff3
Author: Richard Biener 
Date:   Fri Feb 23 16:16:38 2024 +0100

Relax COND_EXPR reduction vectorization SLP restriction

Allow one-lane SLP but for the case where we need to swap the arms.

* tree-vect-stmts.cc (vectorizable_condition): Allow
single-lane SLP, but not when we need to swap then and
else clause.

Diff:
---
 gcc/tree-vect-stmts.cc | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b26cc74f417..c82381e799e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12116,7 +12116,7 @@ vectorizable_condition (vec_info *vinfo,
 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
   if (for_reduction)
 {
-  if (slp_node)
+  if (slp_node && SLP_TREE_LANES (slp_node) > 1)
return false;
   reduc_info = info_for_reduction (vinfo, stmt_info);
   reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
@@ -12205,6 +12205,10 @@ vectorizable_condition (vec_info *vinfo,
  cond_expr = NULL_TREE;
}
}
+  /* ???  The vectorized operand query below doesn't allow swapping
+this way for SLP.  */
+  if (slp_node)
+   return false;
   std::swap (then_clause, else_clause);
 }


[gcc r15-1055] Add double reduction support for SLP vectorization

2024-06-05 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:2ee41ef76a99ef5a8b62b351e2c01dad93f51b18

commit r15-1055-g2ee41ef76a99ef5a8b62b351e2c01dad93f51b18
Author: Richard Biener 
Date:   Tue Mar 5 15:28:58 2024 +0100

Add double reduction support for SLP vectorization

The following makes double reduction vectorization work when
using (single-lane) SLP vectorization.

* tree-vect-loop.cc (vect_analyze_scalar_cycles_1): Queue
double reductions in LOOP_VINFO_REDUCTIONS.
(vect_create_epilog_for_reduction): Remove asserts disabling
SLP for double reductions.
(vectorizable_reduction): Analyze SLP double reductions
only once and start off the correct places.
* tree-vect-slp.cc (vect_get_and_check_slp_defs): Allow
vect_double_reduction_def.
(vect_build_slp_tree_2): Fix condition for the ignored
reduction initial values.
* tree-vect-stmts.cc (vect_analyze_stmt): Allow
vect_double_reduction_def.

Diff:
---
 gcc/tree-vect-loop.cc  | 35 +--
 gcc/tree-vect-slp.cc   |  3 ++-
 gcc/tree-vect-stmts.cc |  4 
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ccd6acef5c5..b9e8e9b5559 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -685,6 +685,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, 
class loop *loop,
 
   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
  STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_double_reduction_def;
+ /* Make it accessible for SLP vectorization.  */
+ LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt_info);
 }
   else
 {
@@ -5975,7 +5977,6 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
   stmt_vec_info rdef_info = stmt_info;
   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 {
-  gcc_assert (!slp_node);
   double_reduc = true;
   stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
(stmt_info->stmt, 0));
@@ -6020,7 +6021,7 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
 {
   outer_loop = loop;
   loop = loop->inner;
-  gcc_assert (!slp_node && double_reduc);
+  gcc_assert (double_reduc);
 }
 
   vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
@@ -6035,7 +6036,7 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
 for induc_val, use initial_def.  */
   if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
-  /* ???  Coverage for double_reduc and 'else' isn't clear.  */
+  /* ???  Coverage for 'else' isn't clear.  */
 }
   else
 {
@@ -7605,15 +7606,16 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
   return true;
 }
-  if (slp_node)
-{
-  slp_node_instance->reduc_phis = slp_node;
-  /* ???  We're leaving slp_node to point to the PHIs, we only
-need it to get at the number of vector stmts which wasn't
-yet initialized for the instance root.  */
-}
   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 {
+  if (gimple_bb (stmt_info->stmt) != loop->header)
+   {
+ /* For SLP we arrive here for both the inner loop LC PHI and
+the outer loop PHI.  The latter is what we want to analyze
+the reduction with.  */
+ gcc_assert (slp_node);
+ return true;
+   }
   use_operand_p use_p;
   gimple *use_stmt;
   bool res = single_imm_use (gimple_phi_result (stmt_info->stmt),
@@ -7622,6 +7624,14 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   phi_info = loop_vinfo->lookup_stmt (use_stmt);
 }
 
+  if (slp_node)
+{
+  slp_node_instance->reduc_phis = slp_node;
+  /* ???  We're leaving slp_node to point to the PHIs, we only
+need it to get at the number of vector stmts which wasn't
+yet initialized for the instance root.  */
+}
+
   /* PHIs should not participate in patterns.  */
   gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
   gphi *reduc_def_phi = as_a  (phi_info->stmt);
@@ -7637,6 +7647,11 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   bool only_slp_reduc_chain = true;
   stmt_info = NULL;
   slp_tree slp_for_stmt_info = slp_node ? slp_node_instance->root : NULL;
+  /* For double-reductions we start SLP analysis at the inner loop LC PHI
+ which is the def of the outer loop live stmt.  */
+  if (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def
+  && slp_node)
+slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
   while (reduc_def != PHI_RESULT (reduc_def_phi))
 {
   stmt_vec_info def = loop_vinf

[gcc r15-1054] Allow single-lane COND_REDUCTION vectorization

2024-06-05 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:202a9c8fe7db9dd94e5a77f42e54ef3d966f88e8

commit r15-1054-g202a9c8fe7db9dd94e5a77f42e54ef3d966f88e8
Author: Richard Biener 
Date:   Fri Mar 1 14:39:08 2024 +0100

Allow single-lane COND_REDUCTION vectorization

The following enables single-lane COND_REDUCTION vectorization.

* tree-vect-loop.cc (vect_create_epilog_for_reduction):
Adjust for single-lane COND_REDUCTION SLP vectorization.
(vectorizable_reduction): Likewise.
(vect_transform_cycle_phi): Likewise.

Diff:
---
 gcc/tree-vect-loop.cc | 97 ++-
 1 file changed, 81 insertions(+), 16 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 06292ed8bbe..ccd6acef5c5 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6030,7 +6030,13 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
   tree induc_val = NULL_TREE;
   tree adjustment_def = NULL;
   if (slp_node)
-;
+{
+  /* Optimize: for induction condition reduction, if we can't use zero
+for induc_val, use initial_def.  */
+  if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
+   induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
+  /* ???  Coverage for double_reduc and 'else' isn't clear.  */
+}
   else
 {
   /* Optimize: for induction condition reduction, if we can't use zero
@@ -6075,23 +6081,46 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
   if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
 {
   auto_vec, 2> ccompares;
-  stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info);
-  cond_info = vect_stmt_to_vectorize (cond_info);
-  while (cond_info != reduc_info)
+  if (slp_node)
{
- if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
+ slp_tree cond_node = slp_node_instance->root;
+ while (cond_node != slp_node_instance->reduc_phis)
{
- gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)[0];
- gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
- ccompares.safe_push
-   (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)),
-STMT_VINFO_REDUC_IDX (cond_info) == 2));
+ stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node);
+ if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
+   {
+ gimple *vec_stmt
+   = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (cond_node)[0]);
+ gcc_assert (gimple_assign_rhs_code (vec_stmt) == 
VEC_COND_EXPR);
+ ccompares.safe_push
+   (std::make_pair (gimple_assign_rhs1 (vec_stmt),
+STMT_VINFO_REDUC_IDX (cond_info) == 2));
+   }
+ /* ???  We probably want to have REDUC_IDX on the SLP node?  */
+ cond_node = SLP_TREE_CHILDREN
+   (cond_node)[STMT_VINFO_REDUC_IDX (cond_info)];
}
- cond_info
-   = loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
-1 + STMT_VINFO_REDUC_IDX
-   (cond_info)));
+   }
+  else
+   {
+ stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info);
  cond_info = vect_stmt_to_vectorize (cond_info);
+ while (cond_info != reduc_info)
+   {
+ if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
+   {
+ gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)[0];
+ gcc_assert (gimple_assign_rhs_code (vec_stmt) == 
VEC_COND_EXPR);
+ ccompares.safe_push
+   (std::make_pair (gimple_assign_rhs1 (vec_stmt),
+STMT_VINFO_REDUC_IDX (cond_info) == 2));
+   }
+ cond_info
+   = loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
+1 + STMT_VINFO_REDUC_IDX
+(cond_info)));
+ cond_info = vect_stmt_to_vectorize (cond_info);
+   }
}
   gcc_assert (ccompares.length () != 0);
 
@@ -7844,7 +7873,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   /* If we have a condition reduction, see if we can simplify it further.  */
   if (v_reduc_type == COND_REDUCTION)
 {
-  if (slp_node)
+  if (slp_node && SLP_TREE_LANES (slp_node) != 1)
return false;
 
   /* When the condition uses the reduction value in the condition, fail.  
*/
@@ -8050,6 +8079,18 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
}
 }
 
+  if ((reduction_type == COND_REDUCTION
+   || reduction_type == INTEGER_INDUC_COND_REDUCTION
+

[gcc r15-1056] Allow single-lane SLP in-order reductions

2024-06-05 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:4653b682ef161c3c2fc7bf8462b8f9206a1349e6

commit r15-1056-g4653b682ef161c3c2fc7bf8462b8f9206a1349e6
Author: Richard Biener 
Date:   Tue Mar 5 15:46:24 2024 +0100

Allow single-lane SLP in-order reductions

The single-lane case isn't different from non-SLP, no re-association
implied.  But the transform stage cannot handle a conditional reduction
op which isn't checked during analysis - this makes it work, exercised
with a single-lane non-reduction-chain by gcc.target/i386/pr112464.c

* tree-vect-loop.cc (vectorizable_reduction): Allow
single-lane SLP in-order reductions.
(vectorize_fold_left_reduction): Handle SLP reduction with
conditional reduction op.

Diff:
---
 gcc/tree-vect-loop.cc | 48 +++-
 1 file changed, 19 insertions(+), 29 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index b9e8e9b5559..ceb92156b58 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7139,56 +7139,46 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
   gcc_assert (TREE_CODE_LENGTH (tree_code (code)) == binary_op);
 
   if (slp_node)
-{
-  if (is_cond_op)
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"fold-left reduction on SLP not supported.\n");
- return false;
-   }
-
-  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
-   TYPE_VECTOR_SUBPARTS (vectype_in)));
-}
+gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
+ TYPE_VECTOR_SUBPARTS (vectype_in)));
 
   /* The operands either come from a binary operation or an IFN_COND operation.
  The former is a gimple assign with binary rhs and the latter is a
  gimple call with four arguments.  */
   gcc_assert (num_ops == 2 || num_ops == 4);
-  tree op0, opmask;
-  if (!is_cond_op)
-op0 = ops[1 - reduc_index];
-  else
-{
-  op0 = ops[2 + (1 - reduc_index)];
-  opmask = ops[0];
-  gcc_assert (!slp_node);
-}
 
   int group_size = 1;
   stmt_vec_info scalar_dest_def_info;
   auto_vec vec_oprnds0, vec_opmask;
   if (slp_node)
 {
-  auto_vec > vec_defs (2);
-  vect_get_slp_defs (loop_vinfo, slp_node, &vec_defs);
-  vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]);
-  vec_defs[0].release ();
-  vec_defs[1].release ();
+  vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0)
+ + (1 - reduc_index)],
+ &vec_oprnds0);
   group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
   scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
+  /* For an IFN_COND_OP we also need the vector mask operand.  */
+  if (is_cond_op)
+   vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask);
 }
   else
 {
+  tree op0, opmask;
+  if (!is_cond_op)
+   op0 = ops[1 - reduc_index];
+  else
+   {
+ op0 = ops[2 + (1 - reduc_index)];
+ opmask = ops[0];
+   }
   vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
 op0, &vec_oprnds0);
   scalar_dest_def_info = stmt_info;
 
   /* For an IFN_COND_OP we also need the vector mask operand.  */
   if (is_cond_op)
- vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
-opmask, &vec_opmask);
+   vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
+  opmask, &vec_opmask);
 }
 
   gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt;
@@ -8210,7 +8200,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 }
 
   if (reduction_type == FOLD_LEFT_REDUCTION
-  && slp_node
+  && (slp_node && SLP_TREE_LANES (slp_node) > 1)
   && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
 {
   /* We cannot use in-order reductions in this case because there is