[gcc r15-4220] Remove support for HP-UX 10

2024-10-09 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:820cd5266e714750888dd2cdf4793cde8741c1db

commit r15-4220-g820cd5266e714750888dd2cdf4793cde8741c1db
Author: Eric Botcazou 
Date:   Wed Oct 9 21:21:36 2024 +0200

Remove support for HP-UX 10

gcc/ada
* Makefile.rtl: Remove HP-UX 10 section.
* libgnarl/s-osinte__hpux-dce.ads: Delete.
* libgnarl/s-osinte__hpux-dce.adb: Likewise.
* libgnarl/s-taprop__hpux-dce.adb: Likewise.
* libgnarl/s-taspri__hpux-dce.ads: Likewise.
* libgnat/s-oslock__hpux-dce.ads: Likewise.

Diff:
---
 gcc/ada/Makefile.rtl|   26 -
 gcc/ada/libgnarl/s-osinte__hpux-dce.adb |  494 -
 gcc/ada/libgnarl/s-osinte__hpux-dce.ads |  487 -
 gcc/ada/libgnarl/s-taprop__hpux-dce.adb | 1210 ---
 gcc/ada/libgnarl/s-taspri__hpux-dce.ads |  106 ---
 gcc/ada/libgnat/s-oslock__hpux-dce.ads  |   61 --
 6 files changed, 2384 deletions(-)

diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl
index 246c0059fb7f..a36f60170b5e 100644
--- a/gcc/ada/Makefile.rtl
+++ b/gcc/ada/Makefile.rtl
@@ -1972,32 +1972,6 @@ ifeq ($(strip $(filter-out s390% linux%,$(target_cpu) 
$(target_os))),)
 endif
 endif
 
-# HP/PA HP-UX 10
-ifeq ($(SELECTED_PAIRS),PAIRS_NONE)
-ifeq ($(strip $(filter-out hppa% hp hpux10%,$(target_cpu) $(target_vendor) 
$(target_os))),)
-
-  SELECTED_PAIRS=hppa-hpux10
-
-  LIBGNAT_TARGET_PAIRS = \
-  a-intnam.adshttp://www.gnu.org/licenses/>.  --
---  --
--- GNARL was developed by the GNARL team at Florida State University.   --
--- Extensive contributions were provided by Ada Core Technologies, Inc. --
---  --
---
-
---  This is a DCE version of this package.
---  Currently HP-UX and SNI use this file
-
---  This package encapsulates all direct interfaces to OS services
---  that are needed by children of System.
-
-with Interfaces.C; use Interfaces.C;
-
-package body System.OS_Interface is
-
-   -
-   -- To_Duration --
-   -
-
-   function To_Duration (TS : timespec) return Duration is
-   begin
-  return Duration (TS.tv_sec) + Duration (TS.tv_nsec) / 10#1#E9;
-   end To_Duration;
-
-   -
-   -- To_Timespec --
-   -
-
-   function To_Timespec (D : Duration) return timespec is
-  S : time_t;
-  F : Duration;
-
-   begin
-  S := time_t (Long_Long_Integer (D));
-  F := D - Duration (S);
-
-  --  If F has negative value due to a round-up, adjust for positive F
-  --  value.
-  if F < 0.0 then
- S := S - 1;
- F := F + 1.0;
-  end if;
-
-  return timespec'(tv_sec => S,
-   tv_nsec => long (Long_Long_Integer (F * 10#1#E9)));
-   end To_Timespec;
-
-   -
-   -- POSIX.1c  Section 3 --
-   -
-
-   function sigwait
- (set : access sigset_t;
-  sig : access Signal) return int
-   is
-  Result : int;
-
-   begin
-  Result := sigwait (set);
-
-  if Result = -1 then
- sig.all := 0;
- return errno;
-  end if;
-
-  sig.all := Signal (Result);
-  return 0;
-   end sigwait;
-
-   --  DCE_THREADS does not have pthread_kill. Instead, we just ignore it
-
-   function pthread_kill (thread : pthread_t; sig : Signal) return int is
-  pragma Unreferenced (thread, sig);
-   begin
-  return 0;
-   end pthread_kill;
-
-   --
-   -- POSIX.1c  Section 11 --
-   --
-
-   --  For all following functions, DCE Threads has a non standard behavior.
-   --  It sets errno but the standard Posix requires it to be returned.
-
-   function pthread_mutexattr_init
- (attr : access pthread_mutexattr_t) return int
-   is
-  function pthread_mutexattr_create
-(attr : access pthread_mutexattr_t) return int;
-  pragma Import (C, pthread_mutexattr_create, "pthread_mutexattr_create");
-
-   begin
-  if pthread_mutexattr_create (attr) /= 0 then
- return errno;
-  else
- return 0;
-  end if;
-   end pthread_mutexattr_init;
-
-   function pthread_mutexattr_destroy
- (attr : access pthread_mutexattr_t) return int
-   is
-  function pthread_mutexattr_delete
-(attr : access pthread_mutexattr_t) return int;
-  pragma Import (C, pthread_mutexattr_delete, "pthread_mutexattr_delete");
-
-   begin
-  if pthread_mutexattr_delete (attr) /= 0 then
- return errno;
-  else
- return 0;
-  end if;
-   end pthread_mutexattr_destroy;
-
-   function pthread_mutex_init
- (mutex : access pthread_mutex_t;
-  attr  : access pthread_mutexattr_t) return int
-   is
-  function pthread_mutex_init_base
-

[gcc r15-4221] Fix LTO bootstrap failure with -Werror=lto-type-mismatch

2024-10-09 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:7ac96b05cfa7478706dce175e7c7b09cbf559451

commit r15-4221-g7ac96b05cfa7478706dce175e7c7b09cbf559451
Author: Eric Botcazou 
Date:   Wed Oct 9 21:31:13 2024 +0200

Fix LTO bootstrap failure with -Werror=lto-type-mismatch

In GNAT's implementation model, using convention C (or C_Pass_By_Copy) has
no effect on the internal representation of types since the representation
is identical to that of C by default.  It's even counter-productive given
the implementation advice listed in B.3(63-71) so the interface between the
front-end and gigi does not use it and instead uses structurally identical
types on both sides.

gcc/ada
PR ada/117038
* fe.h (struct c_array): Add 'const' to declaration of pointer.
(C_Source_Buffer): Use consistent formatting.
* par-ch3.adb (P_Component_Items): Properly set Aliased_Present on
access definition.
* sinput.ads: Remove clause for Interfaces.C.
(C_Array): Change type of Length to Integer and make both components
aliased.  Remove Convention aspect.
(C_Source_Buffer): Remove all aspects.
* sinput.adb (C_Source_Buffer): Adjust to above change.

Diff:
---
 gcc/ada/fe.h|  6 +++---
 gcc/ada/par-ch3.adb |  2 +-
 gcc/ada/sinput.adb  |  6 ++
 gcc/ada/sinput.ads  | 12 ++--
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/gcc/ada/fe.h b/gcc/ada/fe.h
index 36f5e9bfe9ed..e3e65fe18bd1 100644
--- a/gcc/ada/fe.h
+++ b/gcc/ada/fe.h
@@ -348,17 +348,17 @@ extern void Set_Present_Expr  (Node_Id, Uint);
 /* sinput: */
 
 struct c_array {
-  char *pointer;
+  const char *pointer;
   int length;
 };
 
-#define C_Source_Buffer sinput__c_source_buffer
+#define C_Source_Buffersinput__c_source_buffer
 #define Debug_Source_Name  sinput__debug_source_name
 #define Get_Column_Number  sinput__get_column_number
 #define Get_Logical_Line_Numbersinput__get_logical_line_number
 #define Get_Source_File_Index  sinput__get_source_file_index
 
-extern struct c_array C_Source_Buffer (Source_File_Index);
+extern struct c_array C_Source_Buffer  (Source_File_Index);
 extern File_Name_Type Debug_Source_Name(Source_File_Index);
 extern Column_Number_Type Get_Column_Number(Source_Ptr);
 extern Line_Number_Type Get_Logical_Line_Number(Source_Ptr);
diff --git a/gcc/ada/par-ch3.adb b/gcc/ada/par-ch3.adb
index a5f4319debf6..04246dc04ebe 100644
--- a/gcc/ada/par-ch3.adb
+++ b/gcc/ada/par-ch3.adb
@@ -3841,7 +3841,7 @@ package body Ch3 is
--  end if;
 
Set_Subtype_Indication (CompDef_Node, Empty);
-   Set_Aliased_Present(CompDef_Node, False);
+   Set_Aliased_Present(CompDef_Node, Aliased_Present);
Set_Access_Definition  (CompDef_Node,
  P_Access_Definition (Not_Null_Present));
 else
diff --git a/gcc/ada/sinput.adb b/gcc/ada/sinput.adb
index f2e6dda1c991..2b7439f1036a 100644
--- a/gcc/ada/sinput.adb
+++ b/gcc/ada/sinput.adb
@@ -281,10 +281,8 @@ package body Sinput is
-
 
function C_Source_Buffer (S : SFI) return C_Array is
-  use type Interfaces.C.int;
-
-  Length : constant Interfaces.C.int :=
-Interfaces.C.int (Source_Last (S) - Source_First (S));
+  Length : constant Integer :=
+Integer (Source_Last (S) - Source_First (S));
 
   Text : constant Source_Buffer_Ptr := Source_Text (S);
 
diff --git a/gcc/ada/sinput.ads b/gcc/ada/sinput.ads
index ce47fef76db3..d33c47083526 100644
--- a/gcc/ada/sinput.ads
+++ b/gcc/ada/sinput.ads
@@ -56,7 +56,6 @@
 
 with Alloc;
 with Casing; use Casing;
-with Interfaces.C;
 with Namet;  use Namet;
 with System;
 with Table;
@@ -708,12 +707,13 @@ package Sinput is
--  to avoid memory leaks.
 
type C_Array is record
-  Pointer : access constant Character;
-  Length  : Interfaces.C.int range 0 .. Interfaces.C.int'Last;
-   end record with Convention => C_Pass_By_Copy;
+  Pointer : aliased access constant Character;
+  Length  : aliased Integer;
+   end record;
+   --  WARNING: There is a matching C declaration of this type in fe.h
 
-   function C_Source_Buffer (S : SFI) return C_Array with
- Export, Convention => C, External_Name => "sinput__c_source_buffer";
+   function C_Source_Buffer (S : SFI) return C_Array;
+   --  WARNING: There is a matching C declaration of this subprogram in fe.h
 
 private
pragma Inline (File_Name);


[gcc r15-4222] Revert "RISC-V: Enable builtin __riscv_mul with Zmmul extension."

2024-10-09 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:e889235cb004b62f3004408283ce91eb20eb521a

commit r15-4222-ge889235cb004b62f3004408283ce91eb20eb521a
Author: Jeff Law 
Date:   Wed Oct 9 16:21:56 2024 -0600

Revert "RISC-V: Enable builtin __riscv_mul with Zmmul extension."

This reverts commit 2990f5802a727cbd717587c3a345fa940193049f.

Diff:
---
 gcc/config/riscv/riscv-c.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 7e9c478e97bb..71112d9c66d7 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -123,7 +123,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   if (TARGET_ATOMIC)
 builtin_define ("__riscv_atomic");
 
-  if (TARGET_ZMMUL)
+  if (TARGET_MUL)
 builtin_define ("__riscv_mul");
   if (TARGET_DIV)
 builtin_define ("__riscv_div");


[gcc r15-4223] Revert "RISC-V: Add implication for M extension."

2024-10-09 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:df3bda457be49b29c09944a0d639ce9ec0b7d282

commit r15-4223-gdf3bda457be49b29c09944a0d639ce9ec0b7d282
Author: Jeff Law 
Date:   Wed Oct 9 16:22:06 2024 -0600

Revert "RISC-V: Add implication for M extension."

This reverts commit 0a193466f2e87acef9b86e0d086bc6f6017518b0.

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 60595a3e3561..2adebe0b6f29 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -75,8 +75,6 @@ struct riscv_implied_info_t
 /* Implied ISA info, must end with NULL sentinel.  */
 static const riscv_implied_info_t riscv_implied_info[] =
 {
-  {"m", "zmmul"},
-
   {"d", "f"},
   {"f", "zicsr"},
   {"d", "zicsr"},


[gcc r15-4216] libstdc++: Test 17_intro/names.cc with -D_FORTIFY_SOURCE=2 [PR116210]

2024-10-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:4f97411c0d45dc3d04b5d16384fee111889a7c41

commit r15-4216-g4f97411c0d45dc3d04b5d16384fee111889a7c41
Author: Jonathan Wakely 
Date:   Fri Oct 4 12:40:47 2024 +0100

libstdc++: Test 17_intro/names.cc with -D_FORTIFY_SOURCE=2 [PR116210]

Add a new testcase that repeats 17_intro/names.cc but with
_FORTIFY_SOURCE defined, to find problems in Glibc fortify wrappers like
https://sourceware.org/bugzilla/show_bug.cgi?id=32052 (which is fixed
now).

libstdc++-v3/ChangeLog:

PR libstdc++/116210
* testsuite/17_intro/names.cc (sz): Undef for versions of Glibc
that use it in the fortify wrappers.
* testsuite/17_intro/names_fortify.cc: New test.

Diff:
---
 libstdc++-v3/testsuite/17_intro/names.cc | 7 +++
 libstdc++-v3/testsuite/17_intro/names_fortify.cc | 6 ++
 2 files changed, 13 insertions(+)

diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index bea2d19ecba0..5deb310dc313 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -383,4 +383,11 @@
 #undef y
 #endif
 
+#if defined __GLIBC_PREREQ && defined _FORTIFY_SOURCE
+# if ! __GLIBC_PREREQ(2,41)
+// https://sourceware.org/bugzilla/show_bug.cgi?id=32052
+#  undef sz
+# endif
+#endif
+
 #include 
diff --git a/libstdc++-v3/testsuite/17_intro/names_fortify.cc 
b/libstdc++-v3/testsuite/17_intro/names_fortify.cc
new file mode 100644
index ..c975412074be
--- /dev/null
+++ b/libstdc++-v3/testsuite/17_intro/names_fortify.cc
@@ -0,0 +1,6 @@
+// { dg-do compile { target *-*-linux* } }
+// { dg-add-options no_pch }
+
+#define _FORTIFY_SOURCE 2
+// Now we can define the macros to poison uses of non-reserved names:
+#include "names.cc"


[gcc r13-9099] Fix build failure caused by previous change

2024-10-09 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:8f22fea3171a65a1c6db874a23b8ed9988ded9ba

commit r13-9099-g8f22fea3171a65a1c6db874a23b8ed9988ded9ba
Author: Eric Botcazou 
Date:   Wed Oct 9 21:14:52 2024 +0200

Fix build failure caused by previous change

gcc/ada/
PR ada/115535
* exp_put_image.adb: Remove clauses for Sem_Aux.

Diff:
---
 gcc/ada/exp_put_image.adb | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/ada/exp_put_image.adb b/gcc/ada/exp_put_image.adb
index 578637f3a3e2..4d7f8c228421 100644
--- a/gcc/ada/exp_put_image.adb
+++ b/gcc/ada/exp_put_image.adb
@@ -37,7 +37,6 @@ with Nlists; use Nlists;
 with Nmake;  use Nmake;
 with Opt;use Opt;
 with Rtsfind;use Rtsfind;
-with Sem_Aux;use Sem_Aux;
 with Sem_Util;   use Sem_Util;
 with Sinfo;  use Sinfo;
 with Sinfo.Nodes;use Sinfo.Nodes;


[gcc r15-4217] testsuite: arm: use effective-target for mod* tests

2024-10-09 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:08e91d71e5cc155f1fe7b9ee1c44829aa24ff921

commit r15-4217-g08e91d71e5cc155f1fe7b9ee1c44829aa24ff921
Author: Torbjörn SVENSSON 
Date:   Wed Oct 9 22:02:58 2024 +0200

testsuite: arm: use effective-target for mod* tests

This fixes a typo introduced in r15-4200-gcf08dd297ca that was reported
at https://linaro.atlassian.net/browse/GNU-1369.

gcc/testsuite/ChangeLog

* gcc.target/arm/mod_2.c: Corrected effective-target to
arm_cpu_cortex_a57_ok.
* gcc.target/arm/mod_256.c: Likewise.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/gcc.target/arm/mod_2.c   | 2 +-
 gcc/testsuite/gcc.target/arm/mod_256.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c 
b/gcc/testsuite/gcc.target/arm/mod_2.c
index 3a203b67d73e..5b8dec44ed54 100644
--- a/gcc/testsuite/gcc.target/arm/mod_2.c
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } { 
"-mpure-code" } } */
 /* { dg-require-effective-target arm32 } */
-/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-require-effective-target arm_cpu_cortex_a57_ok } */
 /* { dg-options "-O2 -save-temps" } */
 /* { dg-add-options arm_cpu_cortex_a57 } */
 
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c 
b/gcc/testsuite/gcc.target/arm/mod_256.c
index 3521d7a05f3e..8589b948f41d 100644
--- a/gcc/testsuite/gcc.target/arm/mod_256.c
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } { 
"-mpure-code" } } */
 /* { dg-require-effective-target arm32 } */
-/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-require-effective-target arm_cpu_cortex_a57_ok } */
 /* { dg-options "-O2 -save-temps" } */
 /* { dg-add-options arm_cpu_cortex_a57 } */


[gcc r15-4219] c++: more modules and -M

2024-10-09 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:dcee0b6547211a428b75adb03a461285fed0f20d

commit r15-4219-gdcee0b6547211a428b75adb03a461285fed0f20d
Author: Jason Merrill 
Date:   Wed Oct 9 12:28:46 2024 -0400

c++: more modules and -M

In r15-4119-gc877a27f04f648 I told preprocess_file to use the
directives-only scan with modules, but it seems that I also need to set the
cpp_option so that communication between _cpp_handle_directive and
scan_translation_unit_directives_only works properly in
c-c++-common/cpp/embed-6.c.

gcc/c-family/ChangeLog:

* c-ppoutput.cc (preprocess_file): Set directives_only flag.

Diff:
---
 gcc/c-family/c-ppoutput.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/c-family/c-ppoutput.cc b/gcc/c-family/c-ppoutput.cc
index 374252bb4f37..e2c38cbd9ebb 100644
--- a/gcc/c-family/c-ppoutput.cc
+++ b/gcc/c-family/c-ppoutput.cc
@@ -93,8 +93,11 @@ preprocess_file (cpp_reader *pfile)
   if (flag_no_output && pfile->buffer)
 {
   if (flag_modules)
-   /* For macros from imported headers we need directives_only_cb.  */
-   scan_translation_unit_directives_only (pfile);
+   {
+ /* For macros from imported headers we need directives_only_cb.  */
+ cpp_get_options (pfile)->directives_only = true;
+ scan_translation_unit_directives_only (pfile);
+   }
   else
{
  /* Scan -included buffers, then the main file.  */


[gcc r15-4218] libcpp: fix typo

2024-10-09 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:d264b75eb29cfc1916e3c1ccc7a3251a40458392

commit r15-4218-gd264b75eb29cfc1916e3c1ccc7a3251a40458392
Author: Jason Merrill 
Date:   Wed Oct 9 12:31:57 2024 -0400

libcpp: fix typo

libcpp/ChangeLog:

* macro.cc (_cpp_pop_context): Fix typo.

Diff:
---
 libcpp/macro.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcpp/macro.cc b/libcpp/macro.cc
index 056b38e60931..2fb38618246a 100644
--- a/libcpp/macro.cc
+++ b/libcpp/macro.cc
@@ -2905,7 +2905,7 @@ _cpp_pop_context (cpp_reader *pfile)
 }
 
   pfile->context = context->prev;
-  /* decrease peak memory consumption by feeing the context.  */
+  /* Decrease peak memory consumption by freeing the context.  */
   pfile->context->next = NULL;
   free (context);
 }


[gcc r15-4215] libstdc++: Drop format attribute from snprintf wrapper [PR116969]

2024-10-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:5247ee086f7fff5c6e7cd837478974dbfc2782db

commit r15-4215-g5247ee086f7fff5c6e7cd837478974dbfc2782db
Author: Jonathan Wakely 
Date:   Fri Oct 4 12:08:12 2024 +0100

libstdc++: Drop format attribute from snprintf wrapper [PR116969]

When __LONG_DOUBLE_IEEE128__ is defined we need to declare a wrapper for
Glibc's 'snprintf' symbol, so we can call the original definition that
works with the IBM128 format of long double. Because we were declaring
the wrapper using __typeof__(__builtin_snprintf) it inherited the
__attribute__((format(printf, 3, 4))) decoration, and then we got a
warning for calling that wrapper with an __ibm128 argument for a %Lf
conversion specifier. The warning is bogus, because the function we're
calling really does want __ibm128 for %Lf, but there's no "printf but
with a different long double format" archetype for the attribute.

In r15-4039-g28911f626864e7 I added a diagnostic pragma to suppress the
warning, but it would be better to just declare the wrapper without the
attribute, and not have to suppress a warning for code that we know is
actually correct.

libstdc++-v3/ChangeLog:

PR libstdc++/116969
* include/bits/locale_facets_nonio.tcc (money_put::__do_put):
Remove diagnostic pragmas.
(__glibcxx_snprintfibm128): Declare type manually, instead of
using __typeof__(__builtin_snprintf).

Diff:
---
 libstdc++-v3/include/bits/locale_facets_nonio.tcc | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/include/bits/locale_facets_nonio.tcc 
b/libstdc++-v3/include/bits/locale_facets_nonio.tcc
index 53553d113b23..863350a85f12 100644
--- a/libstdc++-v3/include/bits/locale_facets_nonio.tcc
+++ b/libstdc++-v3/include/bits/locale_facets_nonio.tcc
@@ -637,10 +637,14 @@ _GLIBCXX_BEGIN_NAMESPACE_LDBL_OR_CXX11
 
 #if defined _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT \
   && defined __LONG_DOUBLE_IEEE128__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wformat" // '%Lf' expects 'long double'
-extern "C"
-__typeof__(__builtin_snprintf) __glibcxx_snprintfibm128 __asm__("snprintf");
+// The snprintf symbol in glibc that works with __ibm128 format is not visible
+// when compiling with -mabi=ieeelongdouble so we use this name for it instead.
+// N.B. we don't use __typeof__(__builtin_snprintf) for the type because that
+// would inherit __attribute__((format(printf, 3, 4))) and give a warning for
+// passing __ibm128 to %Lf instead of long double. The warning would be wrong
+// because long double in this TU is __ieee128 and snprintf expects __ibm128.
+extern "C" int
+__glibcxx_snprintfibm128(char*, size_t, const char*, ...) __asm__("snprintf");
 
   template
 _OutIter
@@ -673,7 +677,6 @@ __typeof__(__builtin_snprintf) __glibcxx_snprintfibm128 
__asm__("snprintf");
   return __intl ? _M_insert(__s, __io, __fill, __digits)
: _M_insert(__s, __io, __fill, __digits);
 }
-#pragma GCC diagnostic pop
 #endif
 
 _GLIBCXX_END_NAMESPACE_LDBL_OR_CXX11


[gcc r15-4224] RISC-V: Optimize branches with shifted immediate operands

2024-10-09 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:c8957c8779954c3b0bade1dde0a8987b4db157b4

commit r15-4224-gc8957c8779954c3b0bade1dde0a8987b4db157b4
Author: Jovan Vukic 
Date:   Wed Oct 9 16:53:38 2024 -0600

RISC-V: Optimize branches with shifted immediate operands

After the valuable feedback I received, it’s clear to me that the
oversight was in the tests showing the benefits of the patch. In the
test file, I added functions f5 and f6, which now generate more
efficient code with fewer instructions.

Before the patch:

f5:
li  a4,2097152
addia4,a4,-2048
li  a5,1167360
and a0,a0,a4
addia5,a5,-2048
beq a0,a5,.L4

f6:
li  a5,3407872
addia5,a5,-2048
and a0,a0,a5
li  a5,1114112
beq a0,a5,.L7

After the patch:

f5:
srlia5,a0,11
andia5,a5,1023
li  a4,569
beq a5,a4,.L5

f6:
srlia5,a0,11
andia5,a5,1663
li  a4,544
beq a5,a4,.L9

PR target/115921

gcc/ChangeLog:

* config/riscv/iterators.md (any_eq): New code iterator.
* config/riscv/riscv.h (COMMON_TRAILING_ZEROS): New macro.
(SMALL_AFTER_COMMON_TRAILING_SHIFT): Ditto.
* config/riscv/riscv.md 
(*branch_shiftedarith__shifted):
New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/branch-1.c: Additional tests.

Diff:
---
 gcc/config/riscv/iterators.md |  4 
 gcc/config/riscv/riscv.h  | 12 
 gcc/config/riscv/riscv.md | 32 +++
 gcc/testsuite/gcc.target/riscv/branch-1.c | 18 ++---
 4 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 872c542e9065..081659499a99 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -233,6 +233,8 @@
 (define_code_iterator any_ge [ge geu])
 (define_code_iterator any_lt [lt ltu])
 (define_code_iterator any_le [le leu])
+(define_code_iterator any_eq [eq ne])
+
 ;; Iterators for conditions we can emit a sCC against 0 or a reg directly
 (define_code_iterator scc_0  [eq ne gt gtu])
 
@@ -285,6 +287,8 @@
 (le "le")
 (gt "gt")
 (lt "lt")
+(eq "eq")
+(ne "ne")
 (ior "ior")
 (xor "xor")
 (and "and")
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 53b7b2a40ed9..ca1b8329cdc9 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -667,6 +667,18 @@ enum reg_class
 /* True if bit BIT is set in VALUE.  */
 #define BITSET_P(VALUE, BIT) (((VALUE) & (1ULL << (BIT))) != 0)
 
+/* Returns the smaller (common) number of trailing zeros for VAL1 and VAL2.  */
+#define COMMON_TRAILING_ZEROS(VAL1, VAL2)  \
+  (ctz_hwi (VAL1) < ctz_hwi (VAL2) \
+   ? ctz_hwi (VAL1)\
+   : ctz_hwi (VAL2))
+
+/* Returns true if both VAL1 and VAL2 are SMALL_OPERANDs after shifting by
+   the common number of trailing zeros.  */
+#define SMALL_AFTER_COMMON_TRAILING_SHIFT(VAL1, VAL2)  \
+  (SMALL_OPERAND ((VAL1) >> COMMON_TRAILING_ZEROS (VAL1, VAL2))
\
+   && SMALL_OPERAND ((VAL2) >> COMMON_TRAILING_ZEROS (VAL1, VAL2)))
+
 /* Stack layout; function entry, exit and calling.  */
 
 #define STACK_GROWS_DOWNWARD 1
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 688c07df46c4..78112afbb261 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3129,6 +3129,38 @@
 }
 [(set_attr "type" "branch")])
 
+(define_insn_and_split "*branch_shiftedarith__shifted"
+  [(set (pc)
+   (if_then_else (any_eq
+   (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
+ (match_operand 2 "shifted_const_arith_operand" "i"))
+   (match_operand 3 "shifted_const_arith_operand" "i"))
+(label_ref (match_operand 0 "" ""))
+(pc)))
+   (clobber (match_scratch:X 4 "=&r"))
+   (clobber (match_scratch:X 5 "=&r"))]
+  "!SMALL_OPERAND (INTVAL (operands[2]))
+&& !SMALL_OPERAND (INTVAL (operands[3]))
+&& SMALL_AFTER_COMMON_TRAILING_SHIFT (INTVAL (operands[2]),
+INTVAL (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (lshiftrt:X (match_dup 1) (match_dup 7)))
+   (set (match_dup 4) (and:X (match_dup 4) (match_dup 8)))
+   (set (match_dup 5) (match_dup 9))
+   (set (pc) (if_then_else (any_eq (match_dup 4) (

[gcc r15-4194] tree-optimization/117000 - elide .REDUC_IOR with compare against zero

2024-10-09 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:5977b746db3925aaba37722f5312419d5f2968a5

commit r15-4194-g5977b746db3925aaba37722f5312419d5f2968a5
Author: Richard Biener 
Date:   Tue Oct 8 09:01:01 2024 +0200

tree-optimization/117000 - elide .REDUC_IOR with compare against zero

The following adds a pattern to elide a .REDUC_IOR operation when
the result is compared against zero with a cbranch.  I've resorted
to using can_compare_p since that's what RTL expansion eventually
checks - while GIMPLE allowed whole vector equality compares for long
I'll notice vector lowering won't lower unsupported ones and RTL
expansion doesn't seem to try using [u]cmp optabs
(and neither x86 nor aarch64 implements those).  There's cstore
but no target implements that for vector modes either.

PR tree-optimization/117000
* match.pd (.REDUC_IOR !=/== 0): New pattern.
* gimple-match-head.cc: Include memmodel.h and optabs.h.
* generic-match-head.cc: Likewise.

* gcc.target/i386/pr117000.c: New testcase.

Diff:
---
 gcc/generic-match-head.cc|  2 ++
 gcc/gimple-match-head.cc |  2 ++
 gcc/match.pd |  9 +
 gcc/testsuite/gcc.target/i386/pr117000.c | 13 +
 4 files changed, 26 insertions(+)

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index 42dee6266132..7d7e2a9f792d 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -35,6 +35,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "memmodel.h"
+#include "optabs.h"
 #include "optabs-tree.h"
 #include "dbgcnt.h"
 #include "tm.h"
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 4147a0eb38a7..b9d5f751b7cb 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "memmodel.h"
+#include "optabs.h"
 #include "optabs-tree.h"
 #include "tree-eh.h"
 #include "dbgcnt.h"
diff --git a/gcc/match.pd b/gcc/match.pd
index e73bb7e2109b..755ed13e77d1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10474,6 +10474,15 @@ and,
   (simplify (reduc (op @0 VECTOR_CST@1))
 (op (reduc:type @0) (reduc:type @1
 
+/* Simplify .REDUC_IOR (@0) ==/!= 0 to @0 ==/!= 0.  */
+(for cmp (eq ne)
+ (simplify
+  (cmp (IFN_REDUC_IOR @0) integer_zerop)
+  (if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (@0)))
+   && can_compare_p (cmp == EQ_EXPR ? EQ : NE, TYPE_MODE (TREE_TYPE (@0)),
+ccp_jump))
+   (cmp @0 { build_zero_cst (TREE_TYPE (@0)); }
+
 /* Simplify vector floating point operations of alternating sub/add pairs
into using an fneg of a wider element type followed by a normal add.
under IEEE 754 the fneg of the wider type will negate every even entry
diff --git a/gcc/testsuite/gcc.target/i386/pr117000.c 
b/gcc/testsuite/gcc.target/i386/pr117000.c
new file mode 100644
index ..04f94344eb17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117000.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" { target sse4 } } */
+
+int eq(unsigned long *x, unsigned long *y)
+{
+unsigned long folded = 0;
+for (int i = 0; i < 4; ++i)
+  folded |= x[i] ^ y[i];
+return folded == 0;
+}
+
+/* We want to elide the .REDUC_IOR with the compare against zero.  */
+/* { dg-final { scan-assembler "ptest" } } */


[gcc r15-4193] Fix memory leak in vect_cse_slp_nodes

2024-10-09 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:fd883919b2644c16a6bb00ba39bcba98cc26979d

commit r15-4193-gfd883919b2644c16a6bb00ba39bcba98cc26979d
Author: Richard Biener 
Date:   Tue Oct 8 14:24:27 2024 +0200

Fix memory leak in vect_cse_slp_nodes

The following avoids copying scalar stmts again for the re-lookup
of the slot to replace the NULL guard with node.

* tree-vect-slp.cc (vect_cse_slp_nodes): Fix memory leak.

Diff:
---
 gcc/tree-vect-slp.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 849863c15057..44ce9dbbab2d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -7024,7 +7024,7 @@ vect_cse_slp_nodes (scalar_stmts_to_slp_tree_map_t 
*bst_map, slp_tree& node)
 
   /* Now record the node for CSE in other siblings.  */
   if (put_p)
-bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
+*bst_map->get (SLP_TREE_SCALAR_STMTS (node)) = node;
 }
 
 /* Optimize the SLP graph of VINFO.  */


[gcc r15-4213] aarch64: Fix SVE ACLE gimple folds for C++ LTO [PR116629]

2024-10-09 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:fee3adbac055c3ff2649fed866c66d44ebfcbe90

commit r15-4213-gfee3adbac055c3ff2649fed866c66d44ebfcbe90
Author: Richard Sandiford 
Date:   Wed Oct 9 13:57:36 2024 +0100

aarch64: Fix SVE ACLE gimple folds for C++ LTO [PR116629]

The SVE ACLE code has two ways of handling overloaded functions.
One, used by C, is to define a single dummy function for each unique
overloaded name, with resolve_overloaded_builtin then resolving calls
to real non-overloaded functions.  The other, used by C++, is to
define a separate function for each individual overload.

The builtins harness assigns integer function codes programmatically.
However, LTO requires it to use the same assignment for every
translation unit, regardless of language.  This means that C++ TUs
need to create (unused) slots for the C overloads and that C TUs
need to create (unused) slots for the C++ overloads.

In many ways, it doesn't matter whether the LTO frontend itself
uses the C approach or the C++ approach to defining overloaded
functions, since the LTO frontend never has to resolve source-level
overloading.  However, the C++ approach of defining a separate
function for each overload means that C++ calls never need to
be redirected to a different function.  Calls to an overload
can appear in the LTO dump and survive until expand.  In contrast,
calls to C's dummy overload functions are resolved by the front
end and never survive to LTO (or expand).

Some optimisations work by moving between sibling functions, such as _m
to _x.  If the source function is an overload, the expected destination
function is too.  The LTO frontend needs to define C++ overloads if it
wants to do this optimisation properly for C++.

The PR is about a tree checking failure caused by trying to use a
stubbed-out C++ overload in LTO.  Dealing with that by detecting the
stub (rather than changing which overloads are defined) would have
turned this from an ice-on-valid to a missed optimisation.

In future, it would probably make sense to redirect overloads to
non-overloaded functions during gimple folding, in case that exposes
more CSE opportunities.  But it'd probably be of limited benefit, since
it should be rare for code to mix overloaded and non-overloaded uses of
the same operation.  It also wouldn't be suitable for backports.

gcc/
PR target/116629
* config/aarch64/aarch64-sve-builtins.cc
(function_builder::function_builder): Use direct overloads for LTO.

gcc/testsuite/
PR target/116629
* gcc.target/aarch64/sve/acle/general/pr106326_2.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins.cc |   2 +-
 .../aarch64/sve/acle/general/pr106326_2.c  | 381 +
 2 files changed, 382 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 5ff46212d18d..e7c703c987e8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1283,7 +1283,7 @@ function_builder::function_builder (handle_pragma_index 
pragma_index,
bool function_nulls)
 {
   m_overload_type = build_function_type (void_type_node, void_list_node);
-  m_direct_overloads = lang_GNU_CXX ();
+  m_direct_overloads = lang_GNU_CXX () || in_lto_p;
 
   if (initial_indexes[pragma_index] == 0)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr106326_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr106326_2.c
new file mode 100644
index ..deb936cac5c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr106326_2.c
@@ -0,0 +1,381 @@
+/* { dg-do link } */
+/* { dg-options "-O2 -flto -shared -fPIC --save-temps" } */
+/* { dg-require-effective-target shared } */
+/* { dg-require-effective-target fpic } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** add1:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add1 (svint32_t x, svint32_t y)
+{
+  return svadd_z (svptrue_b8 (), x, y);
+}
+
+/*
+** add2:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add2 (svint32_t x, svint32_t y)
+{
+  return svadd_z (svptrue_b16 (), x, y);
+}
+
+/*
+** add3:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add3 (svint32_t x, svint32_t y)
+{
+  return svadd_z (svptrue_b32 (), x, y);
+}
+
+/*
+** add4:
+** ...
+** movprfx [^\n]+
+** ...
+** ret
+*/
+svint32_t
+add4 (svint32_t x, svint32_t y)
+{
+  return svadd_z (svptrue_b64 (), x, y);
+}
+
+/*
+** add5:
+** add z0\.s, (z1\.s, z0\.s|z0\.s, z1\.s)
+** ret
+*/
+svint32_t
+add5 (svint32_t x, svint32_t y)
+{

[gcc r15-4212] testsuite: Make check-function-bodies work with LTO

2024-10-09 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:b94331d9a3f7efb451bfad9db0fda162d3c46748

commit r15-4212-gb94331d9a3f7efb451bfad9db0fda162d3c46748
Author: Richard Sandiford 
Date:   Wed Oct 9 13:57:36 2024 +0100

testsuite: Make check-function-bodies work with LTO

This patch tries to make check-function-bodies automatically
choose between reading the regular assembly file and reading the
LTO assembly file.  There should only ever be one right answer,
since check-function-bodies doesn't make sense on slim LTO output.

Maybe this will turn out to be impossible to get right, but I'd like
to try at least.

gcc/testsuite/
* lib/scanasm.exp (check-function-bodies): Look in ltrans0.ltrans.s
if the test appears to be using LTO.

Diff:
---
 gcc/testsuite/lib/scanasm.exp | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
index 737eefc655e9..26504deb0e62 100644
--- a/gcc/testsuite/lib/scanasm.exp
+++ b/gcc/testsuite/lib/scanasm.exp
@@ -997,16 +997,17 @@ proc check-function-bodies { args } {
error "too many arguments to check-function-bodies"
 }
 
+upvar 2 dg-extra-tool-flags extra_tool_flags
+set flags $extra_tool_flags
+
+global torture_current_flags
+if { [info exists torture_current_flags] } {
+   append flags " " $torture_current_flags
+}
+
 if { [llength $args] >= 3 } {
set required_flags [lindex $args 2]
 
-   upvar 2 dg-extra-tool-flags extra_tool_flags
-   set flags $extra_tool_flags
-
-   global torture_current_flags
-   if { [info exists torture_current_flags] } {
-   append flags " " $torture_current_flags
-   }
foreach required_flag $required_flags {
switch -- $required_flag {
target -
@@ -1043,7 +1044,14 @@ proc check-function-bodies { args } {
 
 global srcdir
 set input_filename "$srcdir/$filename"
-set output_filename "[file rootname [file tail $filename]].s"
+set output_filename "[file rootname [file tail $filename]]"
+if { [string match "* -flto *" " ${flags} "]
+&& ![string match "* -fno-use-linker-plugin *" " ${flags} "]
+&& ![string match "* -ffat-lto-objects *" " ${flags} "] } {
+   append output_filename ".ltrans0.ltrans.s"
+} else {
+   append output_filename ".s"
+}
 
 set prefix [lindex $args 0]
 set prefix_len [string length $prefix]


[gcc r15-4192] gcc/doc: adjust __builtin_choose_expr() description

2024-10-09 Thread Jan Beulich via Gcc-cvs
https://gcc.gnu.org/g:4b152f62e4acff41c6d0f1423f7f50e7a0528b5b

commit r15-4192-g4b152f62e4acff41c6d0f1423f7f50e7a0528b5b
Author: Jan Beulich 
Date:   Wed Oct 9 09:36:42 2024 +0200

gcc/doc: adjust __builtin_choose_expr() description

Present wording has misled people to believe the ?: operator would be
evaluating all three of the involved expressions.

gcc/

* doc/extend.texi: Clarify __builtin_choose_expr()
(dis)similarity to the ?: operator.

Diff:
---
 gcc/doc/extend.texi | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index f46c3df33030..302c3299ede8 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -15153,11 +15153,12 @@ evaluate code depending on the value of a constant 
expression.  This
 built-in function returns @var{exp1} if @var{const_exp}, which is an
 integer constant expression, is nonzero.  Otherwise it returns @var{exp2}.
 
-This built-in function is analogous to the @samp{? :} operator in C,
-except that the expression returned has its type unaltered by promotion
-rules.  Also, the built-in function does not evaluate the expression
-that is not chosen.  For example, if @var{const_exp} evaluates to @code{true},
-@var{exp2} is not evaluated even if it has side effects.
+Like the @samp{? :} operator, this built-in function does not evaluate the
+expression that is not chosen.  For example, if @var{const_exp} evaluates to
+@code{true}, @var{exp2} is not evaluated even if it has side effects.  On the
+other hand, @code{__builtin_choose_expr} differs from @samp{? :} in that the
+first operand must be a compile-time constant, and the other operands are not
+subject to the @samp{? :} type constraints and promotions.
 
 This built-in function can return an lvalue if the chosen argument is an
 lvalue.


[gcc r15-4214] libstdc++: Workaround glibc headers on ia64-linux

2024-10-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:c0bc9a153ae6ab649e2fcc3af53ebcd37df0a871

commit r15-4214-gc0bc9a153ae6ab649e2fcc3af53ebcd37df0a871
Author: Frank Scheiner 
Date:   Tue Oct 8 19:48:09 2024 +0100

libstdc++: Workaround glibc headers on ia64-linux

We see:

```
FAIL: 17_intro/names.cc  -std=gnu++17 (test for excess errors)
FAIL: 17_intro/names_pstl.cc  -std=gnu++17 (test for excess errors)
FAIL: experimental/names.cc  -std=gnu++17 (test for excess errors)
```

...on ia64-linux.

This is due to:

* /usr/include/bits/sigcontext.h:32-38:
```
32 struct __ia64_fpreg
33   {
34 union
35   {
36 unsigned long bits[2];
37   } u;
38   } __attribute__ ((__aligned__ (16)));
```

* /usr/include/sys/ucontext.h:39-45:
```
  39 struct __ia64_fpreg_mcontext
  40   {
  41 union
  42   {
  43 unsigned long __ctx(bits)[2];
  44   } __ctx(u);
  45   } __attribute__ ((__aligned__ (16)));
```

...from glibc 2.39 (w/ia64 support re-added). See the discussion
starting on [1].

[1]: https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654487.html

Signed-off-by: Frank Scheiner 

libstdc++-v3/ChangeLog:

* testsuite/17_intro/names.cc [__linux__ && __ia64__]: Undefine
'u' as used in glibc headers.

Diff:
---
 libstdc++-v3/testsuite/17_intro/names.cc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index 6b9a3639aad4..bea2d19ecba0 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -282,6 +282,12 @@
 #undef y
 #endif
 
+#if defined (__linux__) && defined (__ia64__)
+//  defines __ia64_fpreg::u
+//  defines __ia64_fpreg_mcontext::u
+#undef u
+#endif
+
 #if defined (__linux__) || defined (__gnu_hurd__)
 #if __has_include()
 #include 


[gcc r15-4195] tree-optimization/116575 - handle SLP of permuted masked loads

2024-10-09 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:dc90578f0b3b766303eef6f1acce45d603dee2c6

commit r15-4195-gdc90578f0b3b766303eef6f1acce45d603dee2c6
Author: Richard Biener 
Date:   Tue Oct 8 14:28:16 2024 +0200

tree-optimization/116575 - handle SLP of permuted masked loads

The following handles SLP discovery of permuted masked loads which
was prohibited (because wrongly handled) for PR114375.  In particular
with single-lane SLP at the moment all masked group loads appear
permuted and we fail to use masked load lanes as well.  The following
addresses parts of the issues, starting with doing correct basic
discovery - namely discover an unpermuted mask load followed by
a permute node.  In particular groups with gaps do not support masking
yet (and didn't before w/o SLP IIRC).  There's still issues with
how we represent masked load/store-lanes I think, but I first have to
get my hands on a good testcase.

PR tree-optimization/116575
PR tree-optimization/114375
* tree-vect-slp.cc (vect_build_slp_tree_2): Do not reject
permuted mask loads without gaps but instead discover a
node for the full unpermuted load and permute that with
a VEC_PERM node.

* gcc.dg/vect/vect-pr114375.c: Expect vectorization now with avx2.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-pr114375.c |  2 ++
 gcc/tree-vect-slp.cc  | 58 ---
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr114375.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
index 1e1cb0123d07..61e9bf152d73 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
@@ -30,6 +30,7 @@ int main()
 {
   check_vect ();
 
+#pragma GCC novector
   for (int i = 0; i < 512; ++i)
 a[i] = (i >> 1) & 1;
 
@@ -42,3 +43,4 @@ int main()
   return 0;
 }
 
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target avx2 } } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 44ce9dbbab2d..9bb765e2cbac 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2029,16 +2029,66 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
  || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
  || gimple_call_internal_p (stmt,
 IFN_MASK_LEN_GATHER_LOAD));
- load_permutation.release ();
- /* We cannot handle permuted masked loads, see PR114375.  */
+ bool has_gaps = false;
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+   for (stmt_vec_info si = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
+si; si = DR_GROUP_NEXT_ELEMENT (si))
+ if (DR_GROUP_GAP (si) != 1)
+   has_gaps = true;
+ /* We cannot handle permuted masked loads directly, see
+PR114375.  We cannot handle strided masked loads or masked
+loads with gaps.  */
+ if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
+  && (DR_GROUP_GAP (first_stmt_info) != 0 || has_gaps))
+ || STMT_VINFO_STRIDED_P (stmt_info))
+   {
+ load_permutation.release ();
+ matches[0] = false;
+ return NULL;
+   }
+
+ /* For permuted masked loads do an unpermuted masked load of
+the whole group followed by a SLP permute node.  */
  if (any_permute
  || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && DR_GROUP_SIZE (first_stmt_info) != group_size)
- || STMT_VINFO_STRIDED_P (stmt_info))
+ && DR_GROUP_SIZE (first_stmt_info) != group_size))
{
+ /* Discover the whole unpermuted load.  */
+ vec stmts2;
+ stmts2.create (DR_GROUP_SIZE (first_stmt_info));
+ stmts2.quick_grow_cleared (DR_GROUP_SIZE (first_stmt_info));
+ unsigned i = 0;
+ for (stmt_vec_info si = first_stmt_info;
+  si; si = DR_GROUP_NEXT_ELEMENT (si))
+   stmts2[i++] = si;
+ bool *matches2
+   = XALLOCAVEC (bool, DR_GROUP_SIZE (first_stmt_info));
+ slp_tree unperm_load
+   = vect_build_slp_tree (vinfo, stmts2,
+  DR_GROUP_SIZE (first_stmt_info),
+  &this_max_nunits, matches2, limit,
+  &this_tree_size, bst_map);
+ /* When we are able to do the full masked load emit that
+followed by 'node' being the desired final permutation.  */
+ if (unperm_load)
+   {
+ lane_permutatio

[gcc r15-4200] testsuite: arm: use effective-target for vsel*, mod* and pr65647.c tests

2024-10-09 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:cf08dd297ca9e13b46ba4ff203dbcdce49dbc067

commit r15-4200-gcf08dd297ca9e13b46ba4ff203dbcdce49dbc067
Author: Torbjörn SVENSSON 
Date:   Mon Oct 7 09:06:37 2024 +0200

testsuite: arm: use effective-target for vsel*, mod* and pr65647.c tests

Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog

* gcc.target/arm/pr65647.c: Use effective-target arm_arch_v6m.
Removed unneeded dg-skip-if.
* gcc.target/arm/mod_2.c: Use effective-target arm_cpu_cortex_a57.
* gcc.target/arm/mod_256.c: Likewise.
* gcc.target/arm/vseleqdf.c: Likewise.
* gcc.target/arm/vseleqsf.c: Likewise.
* gcc.target/arm/vselgedf.c: Likewise.
* gcc.target/arm/vselgesf.c: Likewise.
* gcc.target/arm/vselgtdf.c: Likewise.
* gcc.target/arm/vselgtsf.c: Likewise.
* gcc.target/arm/vselledf.c: Likewise.
* gcc.target/arm/vsellesf.c: Likewise.
* gcc.target/arm/vselltdf.c: Likewise.
* gcc.target/arm/vselltsf.c: Likewise.
* gcc.target/arm/vselnedf.c: Likewise.
* gcc.target/arm/vselnesf.c: Likewise.
* gcc.target/arm/vselvcdf.c: Likewise.
* gcc.target/arm/vselvcsf.c: Likewise.
* gcc.target/arm/vselvsdf.c: Likewise.
* gcc.target/arm/vselvssf.c: Likewise.
* lib/target-supports.exp: Define effective-target 
arm_cpu_cortex_a57.
Update effective-target arm_v8_1_lob_ok to use -mcpu=unset.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/gcc.target/arm/mod_2.c| 4 +++-
 gcc/testsuite/gcc.target/arm/mod_256.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pr65647.c  | 4 ++--
 gcc/testsuite/gcc.target/arm/vseleqdf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vseleqsf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselgedf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselgesf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselgtdf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselgtsf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselledf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vsellesf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselltdf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselltsf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselnedf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselnesf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselvcdf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselvcsf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselvsdf.c | 5 +++--
 gcc/testsuite/gcc.target/arm/vselvssf.c | 5 +++--
 gcc/testsuite/lib/target-supports.exp   | 3 ++-
 20 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c 
b/gcc/testsuite/gcc.target/arm/mod_2.c
index 1143725d59a6..3a203b67d73e 100644
--- a/gcc/testsuite/gcc.target/arm/mod_2.c
+++ b/gcc/testsuite/gcc.target/arm/mod_2.c
@@ -1,7 +1,9 @@
 /* { dg-do compile } */
 /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } { 
"-mpure-code" } } */
 /* { dg-require-effective-target arm32 } */
-/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_cpu_cortex_a57 } */
 
 #include "../aarch64/mod_2.x"
 
diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c 
b/gcc/testsuite/gcc.target/arm/mod_256.c
index d8dca0fe7d56..3521d7a05f3e 100644
--- a/gcc/testsuite/gcc.target/arm/mod_256.c
+++ b/gcc/testsuite/gcc.target/arm/mod_256.c
@@ -1,7 +1,9 @@
 /* { dg-do compile } */
 /* { dg-skip-if "-mpure-code supports M-profile only" { *-*-* } { 
"-mpure-code" } } */
 /* { dg-require-effective-target arm32 } */
-/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
+/* { dg-require-effective-target arm_cpu_cortex_a57 } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_cpu_cortex_a57 } */
 
 #include "../aarch64/mod_256.x"
 
diff --git a/gcc/testsuite/gcc.target/arm/pr65647.c 
b/gcc/testsuite/gcc.target/arm/pr65647.c
index 3cbf6b804ec0..e0c534bc813a 100644
--- a/gcc/testsuite/gcc.target/arm/pr65647.c
+++ b/gcc/testsuite/gcc.target/arm/pr65647.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_arch_v6m_ok } */
-/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } 
{"-mfloat-abi=soft" } } */
-/* { dg-options "-march=armv6-m -mthumb -O3 -w -mfloat-abi=soft -fpermissive" 
} */
+/* { dg-options "-O3 -w -fpermissive" } */
+/* { dg-add-options arm_arch_v6m } */
 
 a, b, c, e, g = &e, h, i = 7, l = 1, m, n, o, q = &m, r, s = &r, u, w = 9, x,
   y = 6, z, t6 = 7, t8, t9 = 1, t11 = 5, t12 = &t8, t13 = 3, t15,
diff --git a/gcc/testsuite/gcc.target/arm/vseleqdf.c 
b/gcc/testsuite/gcc.target/arm/vseleqdf.c
index 8a433356492d..5be3ed2b1f9b 100644
--- a/gcc/testsuite/gcc.target/arm/vseleqdf.c
+++ b/gcc/testsuite/gcc.target/arm/vseleqdf.c
@@ -1,7 +1,8 @@
 

[gcc] Created branch 'mikael/heads/inline_minmaxloc_v331' in namespace 'refs/users'

2024-10-09 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/inline_minmaxloc_v331' was created in namespace 
'refs/users' pointing to:

 6a3854898348... fortran: Evaluate once BACK argument of MINLOC/MAXLOC with 


[gcc(refs/users/mikael/heads/inline_minmaxloc_v331)] fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608]

2024-10-09 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:04dfc912b3888fd5303243d06910d5d3dfee61bc

commit 04dfc912b3888fd5303243d06910d5d3dfee61bc
Author: Mikael Morin 
Date:   Fri Nov 17 19:04:19 2023 +0100

fortran: Inline unmasked integral MINLOC/MAXLOC with DIM [PR90608]

Enable generation of inline code for the MINLOC and MAXLOC intrinsics,
if the ARRAY argument is of integral type and of any rank (only the rank 1
case was previously inlined), the DIM argument is a constant value and there
is no MASK argument.

The restriction to integral ARRAY and absent MASK limits the scope of
the change to the cases where we generate single loop inline code.

This change uses the existing scalarizer suport for reductions, that is
arrays used in scalarization loops, where each element uses a nested
scalarization loop to calculate its value.  The nested loop (and
respictively the nested scalarization chain) is created while walking the
MINLOC/MAXLOC expression, it's setup automatically by the outer scalarizer,
and gfc_conv_intrinsic_minmaxloc is changed to use it as a replacement for
the local loop variable (respectively ARRAY scalarization chain) used in the
non-reduction case (i.e. when DIM is absent).

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return true
if DIM is constant, ARRAY is integral and MASK is absent.
(walk_inline_intrinsic_minmaxloc): If DIM is present, walk ARRAY and
move the dimension corresponding to DIM to a nested chain, keeping
the rest of the dimensions as the returned scalarization chain.
(gfc_conv_intrinsic_minmaxloc): When inside the scalarization loops,
proceed with inline code generation If DIM is present.  If DIM is
present, skip result array creation and final initialization from
individual result local variables.  If DIM is present and ARRAY has
rank greater than 1, use the nested loop initialized by the
scalarizer instead of the local one, use 1 as scalarization
dimension, and evaluate ARRAY using the inherited scalarization
chain instead of creating a local one by walking the expression.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_1.f90: Also accept the error message
generated by the scalarizer in case the function call is implemented
through inline code.
* gfortran.dg/maxloc_bounds_2.f90: Likewise.
* gfortran.dg/maxloc_bounds_3.f90: Likewise.
* gfortran.dg/minmaxloc_19.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 227 ++
 gcc/testsuite/gfortran.dg/maxloc_bounds_1.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_2.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_3.f90 |   4 +-
 gcc/testsuite/gfortran.dg/minmaxloc_19.f90| 182 +
 5 files changed, 343 insertions(+), 78 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index a282ae1c0903..dedb49b4a64e 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5472,12 +5472,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   tree lab1, lab2;
   tree b_if, b_else;
   tree back;
-  gfc_loopinfo loop;
-  gfc_actual_arglist *actual;
-  gfc_ss *arrayss;
-  gfc_ss *maskss;
+  gfc_loopinfo loop, *ploop;
+  gfc_actual_arglist *actual, *array_arg, *dim_arg, *mask_arg, *kind_arg;
+  gfc_actual_arglist *back_arg;
+  gfc_ss *arrayss = nullptr;
+  gfc_ss *maskss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
+  gfc_se *base_se;
   gfc_expr *arrayexpr;
   gfc_expr *maskexpr;
   gfc_expr *backexpr;
@@ -5489,6 +5491,14 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   bool optional_mask;
 
   actual = expr->value.function.actual;
+  array_arg = actual;
+  dim_arg = array_arg->next;
+  mask_arg = dim_arg->next;
+  kind_arg = mask_arg->next;
+  back_arg = kind_arg->next;
+
+  bool dim_present = dim_arg->expr != nullptr;
+  bool nested_loop = dim_present && expr->rank > 0;
 
   /* The last argument, BACK, is passed by value. Ensure that
  by setting its name to %VAL. */
@@ -5502,11 +5512,15 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 {
   if (se->ss->info->useflags)
{
- /* The inline implementation of MINLOC/MAXLOC has been generated
-before, out of the scalarization loop; now we can just use the
-result.  */
- gfc_conv_tmp_array_ref (se);
- return;
+ if (!dim_present || !gfc_inline_intrinsic_function_p (expr))
+   {
+ /* The code generating and initializing the result array has been
+generated already before the scalari

[gcc(refs/users/mikael/heads/inline_minmaxloc_v331)] fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608]

2024-10-09 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:42fe6a85935302178ae2529bafaa2b21750adefa

commit 42fe6a85935302178ae2529bafaa2b21750adefa
Author: Mikael Morin 
Date:   Thu Nov 16 10:00:26 2023 +0100

fortran: Add tests covering inline MINLOC/MAXLOC with DIM [PR90608]

Add the tests covering the cases for which the following patches will
implement inline expansion of MINLOC and MAXLOC.  Those are cases where the
DIM argument is a constant value, and the ARRAY argument has rank greater
than 1.

PR fortran/90608

gcc/testsuite/ChangeLog:

* gfortran.dg/ieee/maxloc_nan_2.f90: New test.
* gfortran.dg/ieee/minloc_nan_2.f90: New test.
* gfortran.dg/maxloc_with_dim_1.f90: New test.
* gfortran.dg/maxloc_with_dim_and_mask_1.f90: New test.
* gfortran.dg/minloc_with_dim_1.f90: New test.
* gfortran.dg/minloc_with_dim_and_mask_1.f90: New test.

Diff:
---
 gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90|  64 +++
 gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90|  64 +++
 gcc/testsuite/gfortran.dg/maxloc_with_dim_1.f90| 201 +
 .../gfortran.dg/maxloc_with_dim_and_mask_1.f90 | 452 +
 gcc/testsuite/gfortran.dg/minloc_with_dim_1.f90| 201 +
 .../gfortran.dg/minloc_with_dim_and_mask_1.f90 | 452 +
 6 files changed, 1434 insertions(+)

diff --git a/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90 
b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90
new file mode 100644
index ..4d73431f8c23
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ieee/maxloc_nan_2.f90
@@ -0,0 +1,64 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline maxloc implementation,
+! when the dim argument is present.
+
+program p
+  implicit none
+  call check_without_mask
+  call check_with_mask
+contains
+  subroutine check_without_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,4,5), source = nan)
+r = maxloc(a, dim=1)
+if (any(shape(r) /= (/ 4, 5 /))) stop 21
+if (any(r /= 1)) stop 22
+r = maxloc(a, dim=2)
+if (any(shape(r) /= (/ 3, 5 /))) stop 23
+if (any(r /= 1)) stop 24
+r = maxloc(a, dim=3)
+if (any(shape(r) /= (/ 3, 4 /))) stop 25
+if (any(r /= 1)) stop 26
+  end subroutine
+  subroutine check_with_mask()
+real, allocatable :: a(:,:,:)
+logical, allocatable :: m(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(2,3,4), source = nan)
+allocate(m(2,3,4))
+m(:,:,:) = reshape((/ .false., .false., .true. , .true. ,  &
+  .false., .true. , .false., .false.,  &
+  .false., .true. , .true. , .false.,  &
+  .true. , .true. , .true. , .false.,  &
+  .false., .true. , .true. , .false.,  &
+  .false., .true. , .false., .false.  /), shape(m))
+r = maxloc(a, dim = 1, mask = m)
+if (any(shape(r) /= (/ 3, 4 /))) stop 51
+if (any(r /= reshape((/ 0, 1, 2,  &
+0, 2, 1,  &
+1, 1, 2,  &
+1, 2, 0  /), (/ 3, 4 / stop 52
+r = maxloc(a, dim = 2, mask = m)
+if (any(shape(r) /= (/ 2, 4 /))) stop 53
+if (any(r /= reshape((/ 2, 2,  &
+3, 2,  &
+1, 1,  &
+1, 2  /), (/ 2, 4 / stop 54
+r = maxloc(a, dim = 3, mask = m)
+if (any(shape(r) /= (/ 2, 3 /))) stop 55
+if (any(r /= reshape((/ 3, 3,  &
+1, 1,  &
+2, 1  /), (/ 2, 3 / stop 56
+  end subroutine
+end program p
+
diff --git a/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90 
b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90
new file mode 100644
index ..311526484fc8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ieee/minloc_nan_2.f90
@@ -0,0 +1,64 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline minloc implementation,
+! when the dim argument is present.
+
+program p
+  implicit none
+  call check_without_mask
+  call check_with_mask
+contains
+  subroutine check_without_mask()
+use, intrinsic :: ieee_arithmetic
+real, allocatable :: a(:,:,:)
+real :: nan
+integer, allocatable :: r(:,:)
+if (.not. ieee_support_nan(nan)) return
+nan = ieee_value(nan, ieee_quiet_nan)
+allocate(a(3,4,5), source = nan)
+r = minloc(a, dim=1)
+if (any(shape(r) /= (/ 4, 5 /))) stop 21
+if (any(r /= 1)) stop 22
+r = minloc(a, dim=2)
+if (any(shape(r) /= (/ 3, 5 /))) stop 23
+if (any(r /= 1)) stop 24
+r = mi

[gcc(refs/users/mikael/heads/inline_minmaxloc_v331)] fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608]

2024-10-09 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:e9f47c253cdfceca7eb0e4acb96ca83bd4c5b873

commit e9f47c253cdfceca7eb0e4acb96ca83bd4c5b873
Author: Mikael Morin 
Date:   Thu Aug 8 12:23:16 2024 +0200

fortran: Inline non-character MINLOC/MAXLOC with DIM [PR90608]

Enable generation of inline MINLOC/MAXLOC code in the cases where DIM is a
constant, and either ARRAY is of floating point or MASK is an array.  Those
cases are the remaining bits to fully support inlining of non-CHARACTER
MINLOC/MAXLOC with DIM.  They are treated together because they generate
similar code, the NANs for REAL types being handled a bit like a second
level of masking.  These are the cases for which we generate two loops.

This change affects the code generating the second loop, that was
previously accessible only in cases ARRAY had rank 1.

The main changes are in gfc_conv_intrinsic_minmaxloc the replacement of the
locally initialized scalarization loop with the one provided and previously
initialized by the scalarizer.  Same goes for the locally initialized MASK
scalarizer chain.

As this is enabling the code generating a second loop in a context of
reduction and nested loops, care is taken not to advance parent
scalarization chains twice.

The scalarization chain element(s) for an array MASK are inserted in the
chain at a different place from that of a scalar MASK.  This is done on
purpose to match the code consuming the chains which are in different places
for scalar and array MASK.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE
for MINLOC/MAXLOC with constant DIM and non-scalar MASK.
(walk_inline_intrinsic_minmaxloc): Walk MASK and if it's an array
add the chain obtained before that of ARRAY.
(gfc_conv_intrinsic_minmaxloc): Use the nested loop if there is one.
To evaluate MASK (respectively ARRAY in the second loop), inherit
the scalarizer chain if in a nested loop, otherwise keep using the
chain obtained by walking MASK (respectively ARRAY).  If there is a
nested loop, avoid advancing the parent scalarization chain a second
time in the second loop.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_21.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  94 ++---
 gcc/testsuite/gfortran.dg/minmaxloc_21.f90 | 572 +
 2 files changed, 623 insertions(+), 43 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index a146d7263c88..4beead175b77 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5477,6 +5477,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_actual_arglist *back_arg;
   gfc_ss *arrayss = nullptr;
   gfc_ss *maskss = nullptr;
+  gfc_ss *orig_ss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
   gfc_se nested_se;
@@ -5711,6 +5712,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   if (nested_loop)
 {
   ploop = enter_nested_loop (&nested_se);
+  orig_ss = nested_se.ss;
   ploop->temp_dim = 1;
 }
   else
@@ -5785,9 +5787,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 }
   else
 {
-  gcc_assert (!nested_loop);
-  for (int i = 0; i < loop.dimen; i++)
-   gfc_add_modify (&loop.pre, pos[i], gfc_index_zero_node);
+  for (int i = 0; i < ploop->dimen; i++)
+   gfc_add_modify (&ploop->pre, pos[i], gfc_index_zero_node);
   lab1 = gfc_build_label_decl (NULL_TREE);
   TREE_USED (lab1) = 1;
   lab2 = gfc_build_label_decl (NULL_TREE);
@@ -5818,10 +5819,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   /* If we have a mask, only check this element if the mask is set.  */
   if (maskexpr && maskexpr->rank > 0)
 {
-  gcc_assert (!nested_loop);
-  gfc_init_se (&maskse, NULL);
-  gfc_copy_loopinfo_to_se (&maskse, &loop);
-  maskse.ss = maskss;
+  gfc_init_se (&maskse, base_se);
+  gfc_copy_loopinfo_to_se (&maskse, ploop);
+  if (!nested_loop)
+   maskse.ss = maskss;
   gfc_conv_expr_val (&maskse, maskexpr);
   gfc_add_block_to_block (&body, &maskse.pre);
 
@@ -5849,13 +5850,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   stmtblock_t ifblock2;
   tree ifbody2;
 
-  gcc_assert (!nested_loop);
-
   gfc_start_block (&ifblock2);
-  for (int i = 0; i < loop.dimen; i++)
+  for (int i = 0; i < ploop->dimen; i++)
{
  tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[i]),
-loop.loopvar[i], offset[i]);
+ploop->loopvar[i

[gcc(refs/users/mikael/heads/inline_minmaxloc_v331)] fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]

2024-10-09 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:b6b64ce71087364ff60fd46525979d1d489465bf

commit b6b64ce71087364ff60fd46525979d1d489465bf
Author: Mikael Morin 
Date:   Thu Aug 8 13:44:16 2024 +0200

fortran: Inline MINLOC/MAXLOC with DIM and scalar MASK [PR90608]

Enable the generation of inline code for MINLOC/MAXLOC when argument
ARRAY is of integral type, DIM is a constant, and MASK is scalar (only
absent MASK or rank 1 ARRAY were inlined before).

Scalar masks are implemented with a wrapping condition around the code
one would generate if MASK wasn't present, so they are easy to support
once inline code without MASK is working.

With this change, there are both expressions evaluated inside the nested
loop (ARRAY, and in the future MASK if non-scalar) and expressions evaluated
outside of it (MASK if scalar).  Both have to advance the scalarization
chain passed in argument SE to gfc_conv_intrinsic_minmaxloc as they are
evaluated, but expressions evaluated from within the nested loop
additionally have to advance the nested scalarization chain of the reduction
loop.  This is normally handled transparently through the inheritance that
is defined when initializing gfc_se structs, but there has to be some
variable to inherit from, and there is a single one, SE.  This variable is
kept as base for out of nested loop expressions (scalar MASK), and this
change introduces a new variable to hold the current advance of the nested
loop scalarization chain and serve as inheritance base to evaluate nested
loop expressions (just ARRAY for now, additionally non-scalar MASK later).

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Return TRUE
if MASK is scalar.
(walk_inline_intrinsic_minmaxloc): Append to the scalarization chain
a scalar element for MASK if it's present.
(gfc_conv_intrinsic_minmaxloc): Use a local gfc_se struct to serve
as base for all the expressions evaluated in the nested loop.  To
evaluate MASK in a nested loop, enable usage of the scalarizer and
set the current scalarization chain element to use to that of the
original passed in SE argument.  And use the nested loop from the
scalarizer instead of the local loop in that case.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_8.f90: Accept the error message
generated by the scalarizer in case the MAXLOC intrinsic call is
implemented through inline code.
* gfortran.dg/minmaxloc_20.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc|  27 ++--
 gcc/testsuite/gfortran.dg/maxloc_bounds_8.f90 |   4 +-
 gcc/testsuite/gfortran.dg/minmaxloc_20.f90| 182 ++
 3 files changed, 201 insertions(+), 12 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index dedb49b4a64e..cd6aca51f218 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5479,6 +5479,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_ss *maskss = nullptr;
   gfc_se arrayse;
   gfc_se maskse;
+  gfc_se nested_se;
   gfc_se *base_se;
   gfc_expr *arrayexpr;
   gfc_expr *maskexpr;
@@ -5616,7 +5617,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_add_block_to_block (&se->pre, &backse.post);
 
   if (nested_loop)
-base_se = se;
+{
+  gfc_init_se (&nested_se, se);
+  base_se = &nested_se;
+}
   else
 {
   /* Walk the arguments.  */
@@ -5706,7 +5710,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   if (nested_loop)
 {
-  ploop = enter_nested_loop (se);
+  ploop = enter_nested_loop (&nested_se);
   ploop->temp_dim = 1;
 }
   else
@@ -6063,21 +6067,19 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 {
   tree ifmask;
 
-  gcc_assert (!nested_loop);
-
-  gfc_init_se (&maskse, NULL);
+  gfc_init_se (&maskse, nested_loop ? se : nullptr);
   gfc_conv_expr_val (&maskse, maskexpr);
   gfc_add_block_to_block (&se->pre, &maskse.pre);
   gfc_init_block (&block);
-  gfc_add_block_to_block (&block, &loop.pre);
-  gfc_add_block_to_block (&block, &loop.post);
+  gfc_add_block_to_block (&block, &ploop->pre);
+  gfc_add_block_to_block (&block, &ploop->post);
   tmp = gfc_finish_block (&block);
 
   /* For the else part of the scalar mask, just initialize
 the pos variable the same way as above.  */
 
   gfc_init_block (&elseblock);
-  for (int i = 0; i < loop.dimen; i++)
+  for (int i = 0; i < ploop->dimen; i++)
gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node);
   elsetmp = gfc_finish_bl

[gcc(refs/users/mikael/heads/inline_minmaxloc_v331)] fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608]

2024-10-09 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:6a385489834865043ec9a485b1a7a2f787dd4aeb

commit 6a385489834865043ec9a485b1a7a2f787dd4aeb
Author: Mikael Morin 
Date:   Thu Oct 3 15:57:50 2024 +0200

fortran: Evaluate once BACK argument of MINLOC/MAXLOC with DIM [pr90608]

Evaluate the BACK argument of MINLOC/MAXLOC once before the
scalarization loops in the case where the DIM argument is present.

This is a follow-up to r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3
which added knowledge of BACK to the scalarizer, to
r15-2701-ga10436a8404ad2f0cc5aa4d6a0cc850abe5ef49e which removed it to
handle it out of scalarization instead, and to more immediate previous
patches that added support for MINLOC/MAXLOC with DIM.  The recent
support for MINLOC/MAXLOC with DIM introduced nested loops, which made
the evaluation of BACK (removed from the scalarizer knowledge by previous
patches) wrapped in a loop, so possibly executed more than once.  This
change adds BACK to the scalarization chain if MINLOC/MAXLOC will use
nested loops, so that it is evaluated by the scalarizer only once before
the outermost loop in that case.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc
(walk_inline_intrinsic_minmaxloc): Add a scalar element for BACK as
first item of the list if BACK is present and there will be nested
loops.
(gfc_conv_intrinsic_minmaxloc): Evaluate BACK using an inherited
scalarization chain if there is a nested loop.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_8.f90: New test.
* gfortran.dg/minloc_9.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  20 +-
 gcc/testsuite/gfortran.dg/maxloc_8.f90 | 349 +
 gcc/testsuite/gfortran.dg/minloc_9.f90 | 349 +
 3 files changed, 716 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 90dcb759b378..5c25eedcc4f7 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5594,7 +5594,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 && maskexpr->symtree->n.sym->attr.optional;
   backexpr = back_arg->expr;
 
-  gfc_init_se (&backse, NULL);
+  gfc_init_se (&backse, nested_loop ? se : nullptr);
   if (backexpr == nullptr)
 back = logical_false_node;
   else if (maybe_absent_optional_variable (backexpr))
@@ -11885,10 +11885,13 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr 
*expr ATTRIBUTE_UNUSED)
   gfc_actual_arglist *array_arg = expr->value.function.actual;
   gfc_actual_arglist *dim_arg = array_arg->next;
   gfc_actual_arglist *mask_arg = dim_arg->next;
+  gfc_actual_arglist *kind_arg = mask_arg->next;
+  gfc_actual_arglist *back_arg = kind_arg->next;
 
   gfc_expr *array = array_arg->expr;
   gfc_expr *dim = dim_arg->expr;
   gfc_expr *mask = mask_arg->expr;
+  gfc_expr *back = back_arg->expr;
 
   if (dim == nullptr)
 return gfc_get_array_ss (ss, expr, 1, GFC_SS_INTRINSIC);
@@ -11914,7 +11917,20 @@ walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr 
*expr ATTRIBUTE_UNUSED)
  chain.  */
   int dim_val = mpz_get_si (dim->value.integer) - 1;
   gfc_ss *tail = nest_loop_dimension (tmp_ss, dim_val);
-  tail->next = ss;
+
+  if (back && array->rank > 1)
+{
+  /* If there are nested scalarization loops, include BACK in the
+scalarization chains to avoid evaluating it multiple times in a loop.
+Otherwise, prefer to handle it outside of scalarization.  */
+  gfc_ss *back_ss = gfc_get_scalar_ss (ss, back);
+  back_ss->info->type = GFC_SS_REFERENCE;
+  back_ss->info->can_be_null_ref = true;
+
+  tail->next = back_ss;
+}
+  else
+tail->next = ss;
 
   if (scalar_mask)
 tmp_ss = gfc_get_scalar_ss (tmp_ss, mask);
diff --git a/gcc/testsuite/gfortran.dg/maxloc_8.f90 
b/gcc/testsuite/gfortran.dg/maxloc_8.f90
new file mode 100644
index ..21bc4591235a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/maxloc_8.f90
@@ -0,0 +1,349 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check that the evaluation of MAXLOC's BACK argument is made only once
+! before the scalarization loops, when the DIM argument is present.
+
+program p
+  implicit none
+  integer, parameter :: data60(*) = (/ 7, 4, 5, 3, 9, 0, 6, 4, 5, 5,  &
+   8, 2, 6, 7, 8, 7, 4, 5, 3, 9,  &
+   0, 6, 4, 5, 5, 8, 2, 6, 7, 8,  &
+   7, 4, 5, 3, 9, 0, 6, 4, 5, 5,  &
+   8, 2, 6, 7, 8, 7, 4, 5, 3, 9,  &
+   0, 6, 4, 5, 5, 8, 2, 6, 7, 8  /)
+  logical, parameter :: mask60(*) = (/ .true. , .false., .false., .false., &
+   .true. , .false., .true. , .false.,

[gcc(refs/users/mikael/heads/inline_minmaxloc_v331)] fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only

2024-10-09 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:05f9170b29946b7fec47a1b957cf99be443cea8b

commit 05f9170b29946b7fec47a1b957cf99be443cea8b
Author: Mikael Morin 
Date:   Sat Nov 18 20:54:20 2023 +0100

fortran: Check for empty MINLOC/MAXLOC ARRAY along DIM only

In the function generating inline code to implement MINLOC and MAXLOC, only
get the size of ARRAY along DIM if DIM is present to check for emptyness.

The check for ARRAY emptyness had been checking the size of the full array,
which is correct for MINLOC and MAXLOC without DIM.  But if DIM is
present, the reduction is along DIM only so the check for emptyness
should consider that dimension only as well.

This sounds like a correctness issue, but fortunately the cases where it
makes a difference are cases where ARRAY is empty, so even if the MINLOC or
MAXLOC calculated value is wrong, it's wrapped in a zero iteration loop, and
the wrong values are not actually used.  In the end this just avoids
unnecessary calculations.

A previous version of this patch didn't support non-constant DIM with
rank 1 ARRAY.  The new testcase checks that that case is supported.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Only get the 
size
along DIM instead of the full size if DIM is present.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_22.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 19 ++-
 gcc/testsuite/gfortran.dg/minmaxloc_22.f90 | 24 
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 4beead175b77..90dcb759b378 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5641,7 +5641,24 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   if (!(maskexpr && maskexpr->rank > 0))
 {
   mpz_t asize;
-  if (gfc_array_size (arrayexpr, &asize))
+  bool reduction_size_known;
+
+  if (dim_present)
+   {
+ int reduction_dim;
+ if (dim_arg->expr->expr_type == EXPR_CONSTANT)
+   reduction_dim = mpz_get_si (dim_arg->expr->value.integer) - 1;
+ else if (arrayexpr->rank == 1)
+   reduction_dim = 0;
+ else
+   gcc_unreachable ();
+ reduction_size_known = gfc_array_dimen_size (arrayexpr, reduction_dim,
+  &asize);
+   }
+  else
+   reduction_size_known = gfc_array_size (arrayexpr, &asize);
+
+  if (reduction_size_known)
{
  nonempty = gfc_conv_mpz_to_tree (asize, gfc_index_integer_kind);
  mpz_clear (asize);
diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_22.f90 
b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90
new file mode 100644
index ..4f323ec5daba
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmaxloc_22.f90
@@ -0,0 +1,24 @@
+! { dg-do compile }
+!
+! Check that the inline code generated for MINLOC and MAXLOC supports
+! a non-constant DIM argument if ARRAY has rank 1.
+
+program p
+  implicit none
+  integer, parameter :: n = 5
+  integer :: a(n)
+  print *, f(a, 1)
+contains
+  function f(a, d)
+integer :: a(n)
+integer :: d
+integer :: f
+f = minloc(a, dim=d) 
+  end function
+  function g(a, d)
+integer :: a(n)
+integer :: d
+integer :: g
+g = maxloc(a, dim=d) 
+  end function
+end program p


[gcc(refs/users/mikael/heads/inline_minmaxloc_v331)] fortran: Check MASK directly instead of its scalarization chain

2024-10-09 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:05d4c7416e97ba45003e0dc9e52eaf5756c1eb75

commit 05d4c7416e97ba45003e0dc9e52eaf5756c1eb75
Author: Mikael Morin 
Date:   Thu Sep 12 16:56:39 2024 +0200

fortran: Check MASK directly instead of its scalarization chain

Update the conditions used by the inline MINLOC/MAXLOC code generation
function to check directly the properties of MASK instead of the
variable holding its scalarization chain.

The inline implementation of MINLOC/MAXLOC in gfc_conv_intrinsic_minmaxloc
uses several conditions checking the presence of a scalarization chain for
MASK, which means that the argument is present and non-scalar.  The next
patch will allow inlining MINLOC/MAXLOC with DIM and MASK, and in that
case the scalarization chain for MASK is initialized elsewhere, so the
variable usually holding it in the function is not used, and the conditions
won't work in that case.

This change updates the conditions to check directly the properties of
MASK so that they work even if the scalarization chain variable is not used.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Use
conditionals based on the MASK expression rather than on its
scalarization chains.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index cd6aca51f218..a146d7263c88 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5746,7 +5746,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gcc_assert (reduction_dimensions == ploop->dimen);
 
-  if (nonempty == NULL && maskss == NULL)
+  if (nonempty == NULL && !(maskexpr && maskexpr->rank > 0))
 {
   nonempty = logical_true_node;
 
@@ -5816,7 +5816,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_start_scalarized_body (ploop, &body);
 
   /* If we have a mask, only check this element if the mask is set.  */
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
 {
   gcc_assert (!nested_loop);
   gfc_init_se (&maskse, NULL);
@@ -5921,7 +5921,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 }
   gfc_add_expr_to_block (&block, ifbody);
 
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
 {
   /* We enclose the above in if (mask) {...}.  If the mask is an
 optional argument, generate IF (.NOT. PRESENT(MASK)
@@ -5972,7 +5972,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   gfc_add_expr_to_block (outer_block, build1_v (LABEL_EXPR, lab1));
 
   /* If we have a mask, only check this element if the mask is set.  */
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
{
  gfc_init_se (&maskse, NULL);
  gfc_copy_loopinfo_to_se (&maskse, &loop);
@@ -6038,7 +6038,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gfc_add_expr_to_block (&block, tmp);
 
-  if (maskss)
+  if (maskexpr && maskexpr->rank > 0)
{
  /* We enclose the above in if (mask) {...}.  If the mask is
 an optional argument, generate IF (.NOT. PRESENT(MASK)
@@ -6063,7 +6063,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 gfc_add_expr_to_block (&loop.pre, build1_v (LABEL_EXPR, lab2));
 
   /* For a scalar mask, enclose the loop in an if statement.  */
-  if (maskexpr && maskss == NULL)
+  if (maskexpr && maskexpr->rank == 0)
 {
   tree ifmask;


[gcc r15-4201] tree-optimization/117041 - fix load classification of former grouped load

2024-10-09 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:72c83f644dea755b4eba427aabde45f5d3694d9b

commit r15-4201-g72c83f644dea755b4eba427aabde45f5d3694d9b
Author: Richard Biener 
Date:   Wed Oct 9 11:42:59 2024 +0200

tree-optimization/117041 - fix load classification of former grouped load

When we first detect a grouped load but later dis-associate it we
only set DR_GROUP_FIRST_ELEMENT to NULL, indicating it is not a
STMT_VINFO_GROUPED_ACCESS but leave DR_GROUP_NEXT_ELEMENT set.  This
causes a stray DR_GROUP_NEXT_ELEMENT access in get_group_load_store_type
to go wrong, indicating a load isn't single_element_p when it actually
is, leading to wrong classification and an ICE.

PR tree-optimization/117041
* tree-vect-stmts.cc (get_group_load_store_type): Only
check DR_GROUP_NEXT_ELEMENT for STMT_VINFO_GROUPED_ACCESS.

* gcc.dg/torture/pr117041.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr117041.c | 10 ++
 gcc/tree-vect-stmts.cc  |  6 --
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr117041.c 
b/gcc/testsuite/gcc.dg/torture/pr117041.c
new file mode 100644
index ..09dbbf4c00ff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr117041.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+
+unsigned short a;
+int b, c[7][6];
+int main() {
+  for (a = 0; a < 6; a++)
+for (b = 5; b; b--)
+  c[a][b] = c[a+1][b];
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 433587679346..ad4a3141ab89 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1991,21 +1991,23 @@ get_group_load_store_type (vec_info *vinfo, 
stmt_vec_info stmt_info,
   stmt_vec_info first_stmt_info;
   unsigned int group_size;
   unsigned HOST_WIDE_INT gap;
+  bool single_element_p;
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
 {
   first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   group_size = DR_GROUP_SIZE (first_stmt_info);
   gap = DR_GROUP_GAP (first_stmt_info);
+  single_element_p = (stmt_info == first_stmt_info
+ && !DR_GROUP_NEXT_ELEMENT (stmt_info));
 }
   else
 {
   first_stmt_info = stmt_info;
   group_size = 1;
   gap = 0;
+  single_element_p = true;
 }
   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
-  bool single_element_p = (stmt_info == first_stmt_info
-  && !DR_GROUP_NEXT_ELEMENT (stmt_info));
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
   /* True if the vectorized statements would access beyond the last


[gcc r15-4202] Clear DR_GROUP_NEXT_ELEMENT upon group dissolving

2024-10-09 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:55dbb4b5261d3b0d9fb970ffd3c371bd691badea

commit r15-4202-g55dbb4b5261d3b0d9fb970ffd3c371bd691badea
Author: Richard Biener 
Date:   Wed Oct 9 11:47:08 2024 +0200

Clear DR_GROUP_NEXT_ELEMENT upon group dissolving

I've tried to sanitize DR_GROUP_NEXT_ELEMENT accesses but there are too
many so the following instead makes sure DR_GROUP_NEXT_ELEMENT is never
non-NULL for !STMT_VINFO_GROUPED_ACCESS.

* tree-vect-data-refs.cc (vect_analyze_data_ref_access): When
cancelling a DR group also clear DR_GROUP_NEXT_ELEMENT.

Diff:
---
 gcc/tree-vect-data-refs.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 920e3c120a66..202af7a89523 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3206,6 +3206,7 @@ vect_analyze_data_ref_access (vec_info *vinfo, 
dr_vec_info *dr_info)
   if (loop_vinfo && integer_zerop (step))
 {
   DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
+  DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
   if (!nested_in_vect_loop_p (loop, stmt_info))
return DR_IS_READ (dr);
   /* Allow references with zero step for outer loops marked
@@ -3225,6 +3226,7 @@ vect_analyze_data_ref_access (vec_info *vinfo, 
dr_vec_info *dr_info)
   /* Interleaved accesses are not yet supported within outer-loop
 vectorization for references in the inner-loop.  */
   DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
+  DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
 
   /* For the rest of the analysis we use the outer-loop step.  */
   step = STMT_VINFO_DR_STEP (stmt_info);
@@ -3247,6 +3249,7 @@ vect_analyze_data_ref_access (vec_info *vinfo, 
dr_vec_info *dr_info)
{
  /* Mark that it is not interleaving.  */
  DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
+ DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
  return true;
}
 }


[gcc r15-4207] libstdc++: Simplify std::aligned_storage and fix for versioned namespace [PR61458]

2024-10-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:6ce1df379a64439ea429b6c5834e9f853d520112

commit r15-4207-g6ce1df379a64439ea429b6c5834e9f853d520112
Author: Jonathan Wakely 
Date:   Thu Sep 26 16:42:27 2024 +0100

libstdc++: Simplify std::aligned_storage and fix for versioned namespace 
[PR61458]

This simplifies the implementation of std::aligned_storage. For the
unstable ABI it also fixes the bug where its size is too large when the
default alignment is used. We can't fix that for the stable ABI though,
so just add a comment about the bug.

libstdc++-v3/ChangeLog:

PR libstdc++/61458
* doc/doxygen/user.cfg.in (GENERATE_BUGLIST): Set to NO.
* include/std/type_traits (__aligned_storage_msa): Remove.
(__aligned_storage_max_align_t): New struct.
(__aligned_storage_default_alignment): New function.
(aligned_storage): Use __aligned_storage_default_alignment for
default alignment. Replace union with a struct containing an
aligned buffer. Improve Doxygen comment.
(aligned_storage_t): Use __aligned_storage_default_alignment for
default alignment.

Diff:
---
 libstdc++-v3/doc/doxygen/user.cfg.in |  2 +-
 libstdc++-v3/include/std/type_traits | 83 +---
 2 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/libstdc++-v3/doc/doxygen/user.cfg.in 
b/libstdc++-v3/doc/doxygen/user.cfg.in
index 8fe337adf751..ae50f6dd0c74 100644
--- a/libstdc++-v3/doc/doxygen/user.cfg.in
+++ b/libstdc++-v3/doc/doxygen/user.cfg.in
@@ -681,7 +681,7 @@ GENERATE_TESTLIST  = NO
 # list. This list is created by putting \bug commands in the documentation.
 # The default value is: YES.
 
-GENERATE_BUGLIST   = YES
+GENERATE_BUGLIST   = NO
 
 # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
 # the deprecated list. This list is created by putting \deprecated commands in
diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 17ae2c435b38..c650094f8c52 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -2244,39 +2244,74 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 using add_pointer_t = typename add_pointer<_Tp>::type;
 #endif
 
-  template
-struct __aligned_storage_msa
-{
-  union __type
-  {
-   unsigned char __data[_Len];
-   struct __attribute__((__aligned__)) { } __align;
-  };
-};
+  /// @cond undocumented
+
+  // Aligned to maximum fundamental alignment
+  struct __attribute__((__aligned__)) __aligned_storage_max_align_t
+  { };
+
+  constexpr size_t
+  __aligned_storage_default_alignment([[__maybe_unused__]] size_t __len)
+  {
+#if _GLIBCXX_INLINE_VERSION
+using _Max_align
+  = integral_constant;
+
+return __len > (_Max_align::value / 2)
+? _Max_align::value
+# if _GLIBCXX_USE_BUILTIN_TRAIT(__builtin_clzg)
+: 1 << (__SIZE_WIDTH__ - __builtin_clzg(__len - 1u));
+# else
+: 1 << (__LLONG_WIDTH__ - __builtin_clzll(__len - 1ull));
+# endif
+#else
+// Returning a fixed value is incorrect, but kept for ABI compatibility.
+// XXX GLIBCXX_ABI Deprecated
+return alignof(__aligned_storage_max_align_t);
+#endif
+  }
+  /// @endcond
 
   /**
-   *  @brief Alignment type.
+   *  @brief Aligned storage
+   *
+   *  The member typedef `type` is be a POD type suitable for use as
+   *  uninitialized storage for any object whose size is at most `_Len`
+   *  and whose alignment is a divisor of `_Align`.
+   *
+   *  It is important to use the nested `type` as uninitialized storage,
+   *  not the `std::aligned_storage` type itself which is an empty class
+   *  with 1-byte alignment. So this is correct:
+   *
+   *  `typename std::aligned_storage::type m_xobj;`
+   *
+   *  This is wrong:
+   *
+   *  `std::aligned_storage m_xobj;`
+   *
+   *  In C++14 and later `std::aligned_storage_t`
+   *  can be used to refer to the `type` member typedef.
+   *
+   *  The default value of _Align is supposed to be the most stringent
+   *  fundamental alignment requirement for any C++ object type whose size
+   *  is no greater than `_Len` (see [basic.align] in the C++ standard).
*
-   *  The value of _Align is a default-alignment which shall be the
-   *  most stringent alignment requirement for any C++ object type
-   *  whose size is no greater than _Len (3.9). The member typedef
-   *  type shall be a POD type suitable for use as uninitialized
-   *  storage for any object whose size is at most _Len and whose
-   *  alignment is a divisor of _Align.
+   *  @bug In this implementation the default value for _Align is always the
+   *  maximum fundamental alignment, i.e. `alignof(max_align_t)`, which is
+   *  incorrect. It should be an alignment value no greater than `_Len`.
*
*  @deprecated Deprecated in C++23. Uses can be replaced by an
-   *  array std::byte[_Len] declared with a

[gcc r15-4208] libstdc++: Enable _GLIBCXX_ASSERTIONS by default for -O0 [PR112808]

2024-10-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:361d230fd7800a7e749aba8ed020f54f5c26d504

commit r15-4208-g361d230fd7800a7e749aba8ed020f54f5c26d504
Author: Jonathan Wakely 
Date:   Thu Sep 26 16:55:07 2024 +0100

libstdc++: Enable _GLIBCXX_ASSERTIONS by default for -O0 [PR112808]

Too many users don't know about -D_GLIBCXX_ASSERTIONS and so are missing
valuable checks for C++ standard library preconditions. This change
enables libstdc++ assertions by default when compiling with -O0 so that
we diagnose more bugs by default.

When users enable optimization we don't add the assertions by default
(because they have non-zero overhead) so they still need to enable them
manually.

For users who really don't want the assertions even in unoptimized
builds, defining _GLIBCXX_NO_ASSERTIONS will prevent them from being
enabled automatically.

libstdc++-v3/ChangeLog:

PR libstdc++/112808
* doc/xml/manual/using.xml (_GLIBCXX_ASSERTIONS): Document
implicit definition for -O0 compilation.
(_GLIBCXX_NO_ASSERTIONS): Document.
* doc/html/manual/using_macros.html: Regenerate.
* include/bits/c++config [!__OPTIMIZE__] (_GLIBCXX_ASSERTIONS):
Define for unoptimized builds.

Diff:
---
 libstdc++-v3/doc/html/manual/using_macros.html | 12 +---
 libstdc++-v3/doc/xml/manual/using.xml  | 16 +---
 libstdc++-v3/include/bits/c++config|  9 +++--
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/libstdc++-v3/doc/html/manual/using_macros.html 
b/libstdc++-v3/doc/html/manual/using_macros.html
index 67623b5e2aff..c1406ec76f77 100644
--- a/libstdc++-v3/doc/html/manual/using_macros.html
+++ b/libstdc++-v3/doc/html/manual/using_macros.html
@@ -82,9 +82,15 @@
This is described in more detail in
Compile Time Checks.
   _GLIBCXX_ASSERTIONS
-   Undefined by default. When defined, enables extra error checking in
-the form of precondition assertions, such as bounds checking in
-strings and null pointer checks when dereferencing smart pointers.
+   Defined by default when compiling with no optimization, undefined
+   by default when compiling with optimization.
+   When defined, enables extra error checking in the form of
+   precondition assertions, such as bounds checking in strings
+   and null pointer checks when dereferencing smart pointers.
+  _GLIBCXX_NO_ASSERTIONS
+   Undefined by default.  When defined, prevents the implicit
+   definition of _GLIBCXX_ASSERTIONS when 
compiling
+   with no optimization.
   _GLIBCXX_DEBUG
Undefined by default. When defined, compiles user code using
the debug mode.
diff --git a/libstdc++-v3/doc/xml/manual/using.xml 
b/libstdc++-v3/doc/xml/manual/using.xml
index 89119f6fb2db..7ca3a3f4b4c0 100644
--- a/libstdc++-v3/doc/xml/manual/using.xml
+++ b/libstdc++-v3/doc/xml/manual/using.xml
@@ -1247,9 +1247,19 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 
hello.cc -o test.exe
 _GLIBCXX_ASSERTIONS
 
   
-   Undefined by default. When defined, enables extra error checking in
-the form of precondition assertions, such as bounds checking in
-strings and null pointer checks when dereferencing smart pointers.
+   Defined by default when compiling with no optimization, undefined
+   by default when compiling with optimization.
+   When defined, enables extra error checking in the form of
+   precondition assertions, such as bounds checking in strings
+   and null pointer checks when dereferencing smart pointers.
+  
+
+_GLIBCXX_NO_ASSERTIONS
+
+  
+   Undefined by default.  When defined, prevents the implicit
+   definition of _GLIBCXX_ASSERTIONS when compiling
+   with no optimization.
   
 
 _GLIBCXX_DEBUG
diff --git a/libstdc++-v3/include/bits/c++config 
b/libstdc++-v3/include/bits/c++config
index 29d795f687c6..b87a3527f24b 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -586,9 +586,14 @@ namespace std
 #pragma GCC visibility pop
 }
 
+#ifndef _GLIBCXX_ASSERTIONS
+# if defined(_GLIBCXX_DEBUG)
 // Debug Mode implies checking assertions.
-#if defined(_GLIBCXX_DEBUG) && !defined(_GLIBCXX_ASSERTIONS)
-# define _GLIBCXX_ASSERTIONS 1
+#  define _GLIBCXX_ASSERTIONS 1
+# elif ! defined(__OPTIMIZE__) && ! defined(_GLIBCXX_NO_ASSERTIONS)
+// Enable assertions for unoptimized builds.
+#  define _GLIBCXX_ASSERTIONS 1
+# endif
 #endif
 
 // Disable std::string explicit instantiation declarations in order to assert.


[gcc r15-4203] libstdc++: Fix formatting of chrono::duration with character rep [PR116755]

2024-10-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:b349c651ff16240b8cc4225db76479154c941c28

commit r15-4203-gb349c651ff16240b8cc4225db76479154c941c28
Author: Jonathan Wakely 
Date:   Wed Sep 18 17:20:29 2024 +0100

libstdc++: Fix formatting of chrono::duration with character rep [PR116755]

Implement Peter Dimov's suggestion for resolving LWG 4118, which is to
use +d.count() so that character types are promoted to an integer type
before formatting them. This didn't have unanimous consensus in the
committee as Howard Hinnant proposed that we should format the rep
consistently with std::format("{}", d.count()) instead. That ends up
being more complicated, because it makes std::formattable a precondition
of operator<< which was not previously the case, and it means that
ios_base::fmtflags from the stream would be ignored because std::format
doesn't use them.

libstdc++-v3/ChangeLog:

PR libstdc++/116755
* include/bits/chrono_io.h (operator<<): Use +d.count() for
duration inserter.
(__formatter_chrono::_M_format): Likewise for %Q format.
* testsuite/20_util/duration/io.cc: Test durations with
character types as reps.

Diff:
---
 libstdc++-v3/include/bits/chrono_io.h |  9 +++-
 libstdc++-v3/testsuite/20_util/duration/io.cc | 66 +++
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index 362bb5aa9e98..a337007266e9 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -150,7 +150,9 @@ namespace __detail
   __s.flags(__os.flags());
   __s.imbue(__os.getloc());
   __s.precision(__os.precision());
-  __s << __d.count();
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 4118. How should duration formatters format custom rep types?
+  __s << +__d.count();
   __detail::__fmt_units_suffix(_Out(__s));
   __os << std::move(__s).str();
   return __os;
@@ -635,8 +637,10 @@ namespace __format
case 'Q':
  // %Q The duration's numeric value.
  if constexpr (chrono::__is_duration_v<_Tp>)
+   // _GLIBCXX_RESOLVE_LIB_DEFECTS
+   // 4118. How should duration formatters format custom rep?
__out = std::format_to(__print_sign(), _S_empty_spec,
-  __t.count());
+  +__t.count());
  else
__throw_format_error("chrono format error: argument is "
 "not a duration");
@@ -1703,6 +1707,7 @@ namespace __format
 /// @endcond
 
   template
+requires __format::__formattable_impl<_Rep, _CharT>
 struct formatter, _CharT>
 {
   constexpr typename basic_format_parse_context<_CharT>::iterator
diff --git a/libstdc++-v3/testsuite/20_util/duration/io.cc 
b/libstdc++-v3/testsuite/20_util/duration/io.cc
index 57020f4f9537..383fb60afe2a 100644
--- a/libstdc++-v3/testsuite/20_util/duration/io.cc
+++ b/libstdc++-v3/testsuite/20_util/duration/io.cc
@@ -23,6 +23,24 @@ test01()
   VERIFY( s == "3[2]s" );
   std::getline(ss, s);
   VERIFY( s == "9[2/3]s" );
+
+  // LWG 4118. How should duration formatters format custom rep types?
+  ss.str("");
+  ss << duration(121) << ' ';
+  ss << duration(122) << ' ';
+  ss << duration(123) << ' ';
+  ss << duration(124) << ' ';
+  ss << duration(125) << ' ';
+  ss << duration(126) << ' ';
+  ss << duration(127) << ' ';
+  VERIFY( ss.str() == "121s 122s 123s 124s 125s 126s 127s " );
+
+  ss.str("");
+  ss << std::hex << std::uppercase << duration(0x1A) << ' ';
+  ss << std::hex << std::uppercase << duration(0x2A) << ' ';
+  ss << std::hex << std::uppercase << duration(0x3A) << ' ';
+  ss << std::scientific << duration(4.5) << ' ';
+  VERIFY( ss.str() == "1As 2As 3As 4.50E+00s " );
 }
 
 void
@@ -44,6 +62,24 @@ test02()
   VERIFY( s == L"3[2]s" );
   std::getline(ss, s);
   VERIFY( s == L"9[2/3]s" );
+
+  // LWG 4118. How should duration formatters format custom rep types?
+  ss.str(L"");
+  ss << duration(121) << ' ';
+  ss << duration(122) << ' ';
+  ss << duration(123) << ' ';
+  ss << duration(124) << ' ';
+  ss << duration(125) << ' ';
+  ss << duration(126) << ' ';
+  ss << duration(127) << ' ';
+  VERIFY( ss.str() == L"121s 122s 123s 124s 125s 126s 127s " );
+
+  ss.str(L"");
+  ss << std::hex << std::uppercase << duration(0x1A) << ' ';
+  ss << std::hex << std::uppercase << duration(0x2A) << ' ';
+  ss << std::hex << std::uppercase << duration(0x3A) << ' ';
+  ss << std::scientific << duration(4.5) << ' ';
+  VERIFY( ss.str() == L"1As 2As 3As 4.50E+00s " );
 #endif
 }
 
@@ -114,6 +150,36 @@ test_format()
   VERIFY( s == expected );
   s = std::format("{:%Q%q}", minsec);
   VERIFY( s == expected );
+
+  // LWG 4118. How should duration formatters for

[gcc r15-4205] libstdc++: Make std::construct_at support arrays (LWG 3436)

2024-10-09 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:993deb3a9a4eb78b05587f9b2f9d83a4ccc60c74

commit r15-4205-g993deb3a9a4eb78b05587f9b2f9d83a4ccc60c74
Author: Jonathan Wakely 
Date:   Mon Mar 18 16:59:50 2024 +

libstdc++: Make std::construct_at support arrays (LWG 3436)

The issue was approved at the recent St. Louis meeting, requiring
support for bounded arrays, but only without arguments to initialize the
array elements.

libstdc++-v3/ChangeLog:

* include/bits/stl_construct.h (construct_at): Support array
types (LWG 3436).
* testsuite/20_util/specialized_algorithms/construct_at/array.cc:
New test.
* 
testsuite/20_util/specialized_algorithms/construct_at/array_neg.cc:
New test.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/initlist-opt1.C: Adjust for different diagnostics
from std::construct_at by adding -fconcepts-diagnostics-depth=2.

Diff:
---
 gcc/testsuite/g++.dg/cpp0x/initlist-opt1.C |  1 +
 libstdc++-v3/include/bits/stl_construct.h  | 20 +--
 .../specialized_algorithms/construct_at/array.cc   | 41 ++
 .../construct_at/array_neg.cc  | 19 ++
 4 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-opt1.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist-opt1.C
index 391b7c47d503..38c4f00cec0d 100644
--- a/gcc/testsuite/g++.dg/cpp0x/initlist-opt1.C
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-opt1.C
@@ -1,5 +1,6 @@
 // PR c++/110102
 // { dg-do compile { target c++11 } }
+// { dg-additional-options "-fconcepts-diagnostics-depth=2" { target c++20 } }
 // { dg-skip-if "requires hosted libstdc++ for list" { ! hostedlib } }
 
 // { dg-error "deleted|construct_at" "" { target *-*-* } 0 }
diff --git a/libstdc++-v3/include/bits/stl_construct.h 
b/libstdc++-v3/include/bits/stl_construct.h
index dc08fb7ea33d..146ea14e99ad 100644
--- a/libstdc++-v3/include/bits/stl_construct.h
+++ b/libstdc++-v3/include/bits/stl_construct.h
@@ -90,11 +90,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #if __cpp_constexpr_dynamic_alloc // >= C++20
   template
-constexpr auto
+requires (!is_unbounded_array_v<_Tp>)
+  && requires { ::new((void*)0) _Tp(std::declval<_Args>()...); }
+constexpr _Tp*
 construct_at(_Tp* __location, _Args&&... __args)
 noexcept(noexcept(::new((void*)0) _Tp(std::declval<_Args>()...)))
--> decltype(::new((void*)0) _Tp(std::declval<_Args>()...))
-{ return ::new((void*)__location) _Tp(std::forward<_Args>(__args)...); }
+{
+  void* __loc = const_cast*>(__location);
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 3436. std::construct_at should support arrays
+  if constexpr (is_array_v<_Tp>)
+   {
+ static_assert(sizeof...(_Args) == 0, "std::construct_at for array "
+  "types must not use any arguments to initialize the "
+  "array");
+ return ::new(__loc) _Tp[1]();
+   }
+  else
+   return ::new(__loc) _Tp(std::forward<_Args>(__args)...);
+}
 #endif // C++20
 #endif// C++17
 
diff --git 
a/libstdc++-v3/testsuite/20_util/specialized_algorithms/construct_at/array.cc 
b/libstdc++-v3/testsuite/20_util/specialized_algorithms/construct_at/array.cc
new file mode 100644
index ..c36834628355
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/20_util/specialized_algorithms/construct_at/array.cc
@@ -0,0 +1,41 @@
+// { dg-do compile { target c++20 } }
+
+// LWG 3436. std::construct_at should support arrays
+
+#include 
+#include 
+
+constexpr void
+test_array()
+{
+  int arr[1] { 99 };
+  std::construct_at(&arr);
+  VERIFY( arr[0] == 0 );
+
+  union U {
+long long x;
+int arr[4];
+  } u;
+  u.x = -1;
+
+  auto p = std::construct_at(&u.arr);
+  VERIFY( (*p)[0] == 0 );
+  VERIFY( (*p)[1] == 0 );
+  VERIFY( (*p)[2] == 0 );
+  VERIFY( (*p)[3] == 0 );
+
+  struct NonTrivial {
+constexpr NonTrivial() : i(99) { }
+int i;
+  };
+
+  union U2 {
+char c = 'a';
+NonTrivial arr[2];
+  } u2;
+
+  auto p2 = std::construct_at(&u2.arr);
+  VERIFY( (*p2)[0].i == 99 );
+}
+
+static_assert( [] { test_array(); return true; }() );
diff --git 
a/libstdc++-v3/testsuite/20_util/specialized_algorithms/construct_at/array_neg.cc
 
b/libstdc++-v3/testsuite/20_util/specialized_algorithms/construct_at/array_neg.cc
new file mode 100644
index ..deb86930d1a3
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/20_util/specialized_algorithms/construct_at/array_neg.cc
@@ -0,0 +1,19 @@
+// { dg-do compile { target c++20 } }
+
+// LWG 3436. std::construct_at should support arrays
+
+#include 
+
+void
+test_array_args()
+{
+  int arr[2];
+  std::construct_at(&arr, 1, 2); // { dg-error "here" }
+  // { dg-error "must not use any arguments" "" { target *-*-* } 0 }
+}
+
+void
+test_unbounded_array(int (*p)[])
+{
+  std::construct_at(p); // { dg-error "no matching function" }
+}


[gcc r15-4204] libstdc++: Tweak %c formatting for chrono types

2024-10-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:ce89d2f3170e0d6474cee2c5cb9d478426a5b2f6

commit r15-4204-gce89d2f3170e0d6474cee2c5cb9d478426a5b2f6
Author: Jonathan Wakely 
Date:   Fri Sep 27 16:54:31 2024 +0100

libstdc++: Tweak %c formatting for chrono types

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (__formatter_chrono::_M_c): Add
[[unlikely]] attribute to condition for missing %c format in
locale. Use %T instead of %H:%M:%S in fallback.

Diff:
---
 libstdc++-v3/include/bits/chrono_io.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index a337007266e9..652e88ffe3ab 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -899,8 +899,8 @@ namespace __format
  const _CharT* __formats[2];
  __tp._M_date_time_formats(__formats);
  const _CharT* __rep = __formats[__mod];
- if (!*__rep)
-   __rep = _GLIBCXX_WIDEN("%a %b %e %H:%M:%S %Y");
+ if (!*__rep) [[unlikely]]
+   __rep = _GLIBCXX_WIDEN("%a %b %e %T %Y");
  basic_string<_CharT> __fmt(_S_empty_spec);
  __fmt.insert(1u, 1u, _S_colon);
  __fmt.insert(2u, __rep);


[gcc r15-4206] libstdc++: Do not cast away const-ness in std::construct_at (LWG 3870)

2024-10-09 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:2eaae1bd69302efe6d73d8d63739b081299f8641

commit r15-4206-g2eaae1bd69302efe6d73d8d63739b081299f8641
Author: Jonathan Wakely 
Date:   Thu Jul 11 20:38:05 2024 +0100

libstdc++: Do not cast away const-ness in std::construct_at (LWG 3870)

This change also requires implementing the proposed resolution of LWG
3216 so that std::make_shared and std::allocate_shared still work, and
the proposed resolution of LWG 3891 so that std::expected still works.

libstdc++-v3/ChangeLog:

* include/bits/shared_ptr_base.h: Remove cv-qualifiers from
type managed by _Sp_counted_ptr_inplace, as per LWG 3210.
* include/bits/stl_construct.h: Do not cast away cv-qualifiers
when passing pointer to placement new.
* include/std/expected: Use remove_cv_t for union member, as per
LWG 3891.
* testsuite/20_util/allocator/void.cc: Do not test construction
via const pointer.

Diff:
---
 libstdc++-v3/include/bits/shared_ptr_base.h  | 15 ---
 libstdc++-v3/include/bits/stl_construct.h|  6 +++---
 libstdc++-v3/include/std/expected|  2 +-
 libstdc++-v3/testsuite/20_util/allocator/void.cc | 15 ---
 4 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h 
b/libstdc++-v3/include/bits/shared_ptr_base.h
index 3d0b74ba1c6e..ef0658f61828 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -591,7 +591,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
_Alloc& _M_alloc() noexcept { return _A_base::_S_get(*this); }
 
-   __gnu_cxx::__aligned_buffer<_Tp> _M_storage;
+   __gnu_cxx::__aligned_buffer<__remove_cv_t<_Tp>> _M_storage;
   };
 
 public:
@@ -633,7 +633,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   virtual void*
   _M_get_deleter(const std::type_info& __ti) noexcept override
   {
-   auto __ptr = const_cast::type*>(_M_ptr());
// Check for the fake type_info first, so we don't try to access it
// as a real type_info object. Otherwise, check if it's the real
// type_info for this class. With RTTI enabled we can check directly,
@@ -646,11 +645,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Sp_make_shared_tag::_S_eq(__ti)
 #endif
   )
- return __ptr;
+ return _M_ptr();
return nullptr;
   }
 
-  _Tp* _M_ptr() noexcept { return _M_impl._M_storage._M_ptr(); }
+  __remove_cv_t<_Tp>*
+  _M_ptr() noexcept { return _M_impl._M_storage._M_ptr(); }
 
   _Impl _M_impl;
 };
@@ -674,13 +674,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   [[no_unique_address]] _Alloc _M_alloc;
 
   union {
-   _Tp _M_obj;
+   remove_cv_t<_Tp> _M_obj;
char _M_unused;
   };
 
   friend class __shared_count<_Lp>; // To be able to call _M_ptr().
 
-  _Tp* _M_ptr() noexcept { return std::__addressof(_M_obj); }
+  auto _M_ptr() noexcept { return std::__addressof(_M_obj); }
 
 public:
   using __allocator_type = __alloc_rebind<_Alloc, _Sp_counted_ptr_inplace>;
@@ -962,7 +962,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__shared_count(_Tp*& __p, _Sp_alloc_shared_tag<_Alloc> __a,
   _Args&&... __args)
{
- typedef _Sp_counted_ptr_inplace<_Tp, _Alloc, _Lp> _Sp_cp_type;
+ using _Tp2 = __remove_cv_t<_Tp>;
+ using _Sp_cp_type = _Sp_counted_ptr_inplace<_Tp2, _Alloc, _Lp>;
  typename _Sp_cp_type::__allocator_type __a2(__a._M_a);
  auto __guard = std::__allocate_guarded(__a2);
  _Sp_cp_type* __mem = __guard.get();
diff --git a/libstdc++-v3/include/bits/stl_construct.h 
b/libstdc++-v3/include/bits/stl_construct.h
index 146ea14e99ad..9d6111396e1c 100644
--- a/libstdc++-v3/include/bits/stl_construct.h
+++ b/libstdc++-v3/include/bits/stl_construct.h
@@ -96,7 +96,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 construct_at(_Tp* __location, _Args&&... __args)
 noexcept(noexcept(::new((void*)0) _Tp(std::declval<_Args>()...)))
 {
-  void* __loc = const_cast*>(__location);
+  void* __loc = __location;
   // _GLIBCXX_RESOLVE_LIB_DEFECTS
   // 3436. std::construct_at should support arrays
   if constexpr (is_array_v<_Tp>)
@@ -130,7 +130,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  return;
}
 #endif
-  ::new((void*)__p) _Tp(std::forward<_Args>(__args)...);
+  ::new(static_cast(__p)) _Tp(std::forward<_Args>(__args)...);
 }
 #else
   template
@@ -146,7 +146,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 inline void
 _Construct_novalue(_T1* __p)
-{ ::new((void*)__p) _T1; }
+{ ::new(static_cast(__p)) _T1; }
 
   template
 _GLIBCXX20_CONSTEXPR void
diff --git a/libstdc++-v3/include/std/expected 
b/libstdc++-v3/include/std/expected
index 9e92339e4066..d4a4bc175415 100644
--- a/libstdc++-v3/include/std/expected
+++ b/li

[gcc r15-4209] libstdc++: Fix -Wsign-compare in std::latch::count_down

2024-10-09 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:f5021ce9aa6be524beca99e0bbd0180b4e53029b

commit r15-4209-gf5021ce9aa6be524beca99e0bbd0180b4e53029b
Author: Jonathan Wakely 
Date:   Fri Oct 4 18:11:06 2024 +0100

libstdc++: Fix -Wsign-compare in std::latch::count_down

Also add assertions for the precondition on the parameter's value.

libstdc++-v3/ChangeLog:

* include/std/latch (latch::count_down): Add assertions for
preconditions. Cast parameter to avoid -Wsign-compare on some
targets.

Diff:
---
 libstdc++-v3/include/std/latch | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/latch b/libstdc++-v3/include/std/latch
index 146e1860979a..1d254aa2581a 100644
--- a/libstdc++-v3/include/std/latch
+++ b/libstdc++-v3/include/std/latch
@@ -63,9 +63,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_ALWAYS_INLINE void
 count_down(ptrdiff_t __update = 1)
 {
+  __glibcxx_assert(__update >= 0);
   auto const __old = __atomic_impl::fetch_sub(&_M_a,
__update, memory_order::release);
-  if (__old == __update)
+  __glibcxx_assert(__update >= 0);
+  if (__old == static_cast<__detail::__platform_wait_t>(__update))
__atomic_impl::notify_all(&_M_a);
 }
 
@@ -88,6 +90,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 }
 
   private:
+// This alignas is not redundant, it increases the alignment for
+// long long on x86.
 alignas(__alignof__(__detail::__platform_wait_t)) 
__detail::__platform_wait_t _M_a;
   };
 _GLIBCXX_END_NAMESPACE_VERSION


[gcc r15-4211] libstdc++: Ignore _GLIBCXX_USE_POSIX_SEMAPHORE if not supported [PR116992]

2024-10-09 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:9a5ac633f0f49c819f2745584475051c9eb8f6e0

commit r15-4211-g9a5ac633f0f49c819f2745584475051c9eb8f6e0
Author: Jonathan Wakely 
Date:   Mon Oct 7 10:22:24 2024 +0100

libstdc++: Ignore _GLIBCXX_USE_POSIX_SEMAPHORE if not supported [PR116992]

If _GLIBCXX_HAVE_POSIX_SEMAPHRE is undefined then users get an error
when defining _GLIBCXX_USE_POSIX_SEMAPHORE. We can just ignore it
instead (and warn them it's being ignored).

This fixes a testsuite failure on hppa64-hp-hpux11.11 (and probably some
other targets):

FAIL: 30_threads/semaphore/platform_try_acquire_for.cc  -std=gnu++20 (test 
for excess errors)
Excess errors:
semaphore:49: error: '__semaphore_impl' has not been declared

libstdc++-v3/ChangeLog:

PR libstdc++/116992
* include/bits/semaphore_base.h (_GLIBCXX_USE_POSIX_SEMAPHORE):
Undefine and issue a warning if POSIX sem_t is not supported.
* testsuite/30_threads/semaphore/platform_try_acquire_for.cc:
Prune new warning.

Diff:
---
 libstdc++-v3/include/bits/semaphore_base.h | 3 +++
 .../testsuite/30_threads/semaphore/platform_try_acquire_for.cc | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/libstdc++-v3/include/bits/semaphore_base.h 
b/libstdc++-v3/include/bits/semaphore_base.h
index 9d73b37e60a1..dd16d2c92498 100644
--- a/libstdc++-v3/include/bits/semaphore_base.h
+++ b/libstdc++-v3/include/bits/semaphore_base.h
@@ -45,6 +45,9 @@
 # include  // errno, EINTR, EAGAIN etc.
 # include// SEM_VALUE_MAX
 # include // sem_t, sem_init, sem_wait, sem_post etc.
+#elif defined(_GLIBCXX_USE_POSIX_SEMAPHORE)
+# warning "POSIX semaphore not available, ignoring 
_GLIBCXX_USE_POSIX_SEMAPHORE"
+# undef _GLIBCXX_USE_POSIX_SEMAPHORE
 #endif
 
 namespace std _GLIBCXX_VISIBILITY(default)
diff --git 
a/libstdc++-v3/testsuite/30_threads/semaphore/platform_try_acquire_for.cc 
b/libstdc++-v3/testsuite/30_threads/semaphore/platform_try_acquire_for.cc
index bf6cd142bf01..6d90564ea8ac 100644
--- a/libstdc++-v3/testsuite/30_threads/semaphore/platform_try_acquire_for.cc
+++ b/libstdc++-v3/testsuite/30_threads/semaphore/platform_try_acquire_for.cc
@@ -5,3 +5,5 @@
 // { dg-add-options libatomic }
 
 #include "try_acquire_for.cc"
+
+// { dg-prune-output "ignoring _GLIBCXX_USE_POSIX_SEMAPHORE" }


[gcc r15-4210] libstdc++: Fix -Wnarrowing in [PR116991]

2024-10-09 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:e998014d1b14592c43b0f655793011c6395ff02a

commit r15-4210-ge998014d1b14592c43b0f655793011c6395ff02a
Author: Jonathan Wakely 
Date:   Mon Oct 7 10:19:29 2024 +0100

libstdc++: Fix -Wnarrowing in  [PR116991]

When _GLIBCXX_USE_C99_COMPLEX_ARC is undefined we use the generic
__complex_acos function template for _Float32 etc. and that gives a
-Wnarrowing warning:

complex:2043: warning: ISO C++ does not allow converting to '_Float32' from 
'long double' with greater conversion rank [-Wnarrowing]

Use a cast to do the conversion so that it doesn't warn.

libstdc++-v3/ChangeLog:

PR libstdc++/116991
* include/std/complex (__complex_acos): Cast literal to
destination type.

Diff:
---
 libstdc++-v3/include/std/complex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/complex b/libstdc++-v3/include/std/complex
index 5bc6618f7de0..eb89e3a8bcfb 100644
--- a/libstdc++-v3/include/std/complex
+++ b/libstdc++-v3/include/std/complex
@@ -2040,7 +2040,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 __complex_acos(const std::complex<_Tp>& __z)
 {
   const std::complex<_Tp> __t = std::asin(__z);
-  const _Tp __pi_2 = 1.5707963267948966192313216916397514L;
+  const _Tp __pi_2 = (_Tp) 1.5707963267948966192313216916397514L;
   return std::complex<_Tp>(__pi_2 - __t.real(), -__t.imag());
 }


[gcc r15-4199] libcpp: Use ' instead of %< and %> [PR117039]

2024-10-09 Thread Ken Matsui via Gcc-cvs
https://gcc.gnu.org/g:f709990333597b30dff54876bfdaada14e9cde30

commit r15-4199-gf709990333597b30dff54876bfdaada14e9cde30
Author: Ken Matsui 
Date:   Wed Oct 9 07:32:20 2024 -0400

libcpp: Use ' instead of %< and %> [PR117039]

PR bootstrap/117039

libcpp/ChangeLog:

* directives.cc (do_pragma_once): Use ' instead of %< and %>.

Signed-off-by: Ken Matsui 

Diff:
---
 libcpp/directives.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcpp/directives.cc b/libcpp/directives.cc
index 95095b08eafe..9c906b390660 100644
--- a/libcpp/directives.cc
+++ b/libcpp/directives.cc
@@ -2075,7 +2075,7 @@ do_pragma_once (cpp_reader *pfile)
 {
   if (_cpp_in_main_source_file (pfile))
 cpp_warning (pfile, CPP_W_PRAGMA_ONCE_OUTSIDE_HEADER,
-"%<#pragma once%> in main file");
+"'#pragma once' in main file");
 
   check_eol (pfile, false);
   _cpp_mark_file_once_only (pfile, pfile->buffer->file);


[gcc r14-10761] c: fix crash when checking for compatibility of structures [PR116726]

2024-10-09 Thread Martin Uecker via Gcc-cvs
https://gcc.gnu.org/g:4123f546d77c42287d2fcae6fb04df7b87193454

commit r14-10761-g4123f546d77c42287d2fcae6fb04df7b87193454
Author: Martin Uecker 
Date:   Tue Sep 17 11:37:29 2024 +0200

c: fix crash when checking for compatibility of structures [PR116726]

When checking for compatibility of structure or union types in
tagged_types_tu_compatible_p, restore the old value of the pointer to
the top of the temporary cache after recursively calling comptypes_internal
when looping over the members of a structure of union.  While the next
iteration of the loop overwrites the pointer, I missed the fact that it can
be accessed again when types of function arguments are compared as part
of recursive type checking and the function is entered again.

PR c/116726

gcc/c/ChangeLog:

* c-typeck.cc (tagged_types_tu_compatible_p): Restore value
of the cache after recursing into comptypes_internal.

gcc/testsuite/ChangeLog:

* gcc.dg/pr116726.c: New test.

(cherry picked from commit 9227a64495d5594613604573b72422e8e3722fc5)

Diff:
---
 gcc/c/c-typeck.cc   |  5 -
 gcc/testsuite/gcc.dg/pr116726.c | 18 ++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 4567b114734b..ebc2c288f981 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -1611,8 +1611,11 @@ tagged_types_tu_compatible_p (const_tree t1, const_tree 
t2,
 
data->anon_field = !DECL_NAME (s1);
 
+   const struct tagged_tu_seen_cache *cache = data->cache;
data->cache = &entry;
-   if (!comptypes_internal (TREE_TYPE (s1), TREE_TYPE (s2), data))
+   bool ret = comptypes_internal (TREE_TYPE (s1), TREE_TYPE (s2), 
data);
+   data->cache = cache;
+   if (!ret)
  return false;
 
tree st1 = TYPE_SIZE (TREE_TYPE (s1));
diff --git a/gcc/testsuite/gcc.dg/pr116726.c b/gcc/testsuite/gcc.dg/pr116726.c
new file mode 100644
index ..bb25efca5864
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116726.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c23" } */
+
+struct s1 {
+  int f1;
+};
+struct s2 {
+  int f2;
+};
+struct s1 f(struct s2 *);
+
+struct s1 {
+  int f1;
+};
+struct s2 {
+  int f2;
+};
+struct s1 f(struct s2 *);


[gcc r15-4196] tree-optimization/116974 - Handle single-lane SLP for OMP scan store

2024-10-09 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:9df0772d50d8f8a75389d319949632e5d111cc6c

commit r15-4196-g9df0772d50d8f8a75389d319949632e5d111cc6c
Author: Richard Biener 
Date:   Wed Mar 13 14:59:27 2024 +0100

tree-optimization/116974 - Handle single-lane SLP for OMP scan store

The following massages the GIMPLE matching way of handling scan
stores to work with single-lane SLP.  I do not fully understand all
the cases that can happen and the stmt matching at vectorizable_store
time is less than ideal - but the following gets me all the testcases
to pass with and without forced SLP.

Long term we want to perform the matching at SLP discovery time,
properly chaining the various SLP instances the current state ends
up with.

PR tree-optimization/116974
* tree-vect-stmts.cc (check_scan_store): Pass in the SLP node
instead of just a flag.  Allow single-lane scan stores.
(vectorizable_store): Adjust.
* tree-vect-loop.cc (vect_analyze_loop_2): Empty scan_map
before re-trying.

Diff:
---
 gcc/tree-vect-loop.cc  |  2 ++
 gcc/tree-vect-stmts.cc | 84 ++
 2 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 6933f597b4df..9be50aaa621c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3369,6 +3369,8 @@ again:
   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
 = saved_can_use_partial_vectors_p;
   LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
+  if (loop_vinfo->scan_map)
+loop_vinfo->scan_map->empty ();
 
   goto start_over;
 }
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 0b0cf8f114e3..433587679346 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -7445,7 +7445,7 @@ scan_store_can_perm_p (tree vectype, tree init,
 
 static bool
 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
- enum vect_def_type rhs_dt, bool slp, tree mask,
+ enum vect_def_type rhs_dt, slp_tree slp_node, tree mask,
  vect_memory_access_type memory_access_type)
 {
   loop_vec_info loop_vinfo = dyn_cast  (vinfo);
@@ -7453,7 +7453,7 @@ check_scan_store (vec_info *vinfo, stmt_vec_info 
stmt_info, tree vectype,
   tree ref_type;
 
   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
-  if (slp
+  if ((slp_node && SLP_TREE_LANES (slp_node) > 1)
   || mask
   || memory_access_type != VMAT_CONTIGUOUS
   || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
@@ -7848,8 +7848,8 @@ check_scan_store (vec_info *vinfo, stmt_vec_info 
stmt_info, tree vectype,
Handle only the transformation, checking is done in check_scan_store.  */
 
 static bool
-vectorizable_scan_store (vec_info *vinfo,
-stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+vectorizable_scan_store (vec_info *vinfo, stmt_vec_info stmt_info,
+slp_tree slp_node, gimple_stmt_iterator *gsi,
 gimple **vec_stmt, int ncopies)
 {
   loop_vec_info loop_vinfo = dyn_cast  (vinfo);
@@ -7961,16 +7961,34 @@ vectorizable_scan_store (vec_info *vinfo,
   tree orig = NULL_TREE;
   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
-  auto_vec vec_oprnds1;
+  /* The initialization is invariant.  */
+  vec_oprnd1 = vect_init_vector (vinfo, stmt_info, *init, vectype, NULL);
   auto_vec vec_oprnds2;
   auto_vec vec_oprnds3;
-  vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
-*init, &vec_oprnds1,
-ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
-rhs2, &vec_oprnds3);
-  for (int j = 0; j < ncopies; j++)
+  if (ldataref_ptr == NULL)
+{
+  /* We want to lookup the vector operands of the reduction, not those
+of the store - for SLP we have to use the proper SLP node for the
+lookup, which should be the single child of the scan store.  */
+  vect_get_vec_defs (vinfo, stmt_info, SLP_TREE_CHILDREN (slp_node)[0],
+ncopies, rhs1, &vec_oprnds2, rhs2, &vec_oprnds3);
+  /* ???  For SLP we do not key the def on 'rhs1' or 'rhs2' but get
+them in SLP child order.  So we have to swap here with logic
+similar to above.  */
+  stmt_vec_info load
+   = SLP_TREE_SCALAR_STMTS (SLP_TREE_CHILDREN
+  (SLP_TREE_CHILDREN (slp_node)[0])[0])[0];
+  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (load);
+  tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+  if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)))
+   for (unsigned i = 0; i < vec_oprnds2.length (); ++i)
+ std::swap (vec_oprnds2[i], vec_oprnds3[i]);;
+}
+  else
+vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+   

[gcc r15-4197] Remove ia64*-*-linux from the list of obsolete targets

2024-10-09 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:452b12cea8869f120e7c703577c4270476fe6b76

commit r15-4197-g452b12cea8869f120e7c703577c4270476fe6b76
Author: René Rebe 
Date:   Wed Jun 12 12:42:00 2024 +0200

Remove ia64*-*-linux from the list of obsolete targets

The following un-deprecates ia64*-*-linux for GCC 15. Since we plan to
support this for some years to come.

gcc/
* config.gcc: Only list ia64*-*-(hpux|vms|elf) in the list of
obsoleted targets.

contrib/
* config-list.mk (LIST): no --enable-obsolete for ia64-linux.

Signed-off-by: René Rebe 

Diff:
---
 contrib/config-list.mk | 2 +-
 gcc/config.gcc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/config-list.mk b/contrib/config-list.mk
index f282cd95c8d0..4ddb3eeab124 100644
--- a/contrib/config-list.mk
+++ b/contrib/config-list.mk
@@ -61,7 +61,7 @@ LIST = \
   i686-rtems i686-solaris2.11 i686-wrs-vxworks \
   i686-wrs-vxworksae \
   i686-cygwinOPT-enable-threads=yes i686-mingw32crt 
ia64-elfOPT-enable-obsolete \
-  ia64-linuxOPT-enable-obsolete ia64-hpuxOPT-enable-obsolete \
+  ia64-linux ia64-hpuxOPT-enable-obsolete \
   ia64-hp-vmsOPT-enable-obsolete iq2000-elf lm32-elf \
   lm32-rtems lm32-uclinux \
   loongarch64-linux-gnuf64 loongarch64-linux-gnuf32 loongarch64-linux-gnusf \
diff --git a/gcc/config.gcc b/gcc/config.gcc
index f09ce9f63a01..71ac3badafd9 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -272,7 +272,7 @@ esac
 
 # Obsolete configurations.
 case ${target} in
- ia64*-*-* \
+ ia64*-*-hpux* | ia64*-*-*vms* | ia64*-*-elf*  \
| nios2*-*-*\
  )
 if test "x$enable_obsolete" != xyes; then


[gcc r15-4198] Enable LRA for ia64

2024-10-09 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:68afc7acf609be2b19ec05c8393c2ffc7f4adb4a

commit r15-4198-g68afc7acf609be2b19ec05c8393c2ffc7f4adb4a
Author: René Rebe 
Date:   Wed Jun 12 12:42:00 2024 +0200

Enable LRA for ia64

This was tested by bootstrapping GCC natively on ia64-t2-linux-gnu and
running the testsuite (based on
236116068151bbc72aaaf53d0f223fe06f7e3bac):

https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817268.html

For comparison, the same with just
236116068151bbc72aaaf53d0f223fe06f7e3bac:

https://gcc.gnu.org/pipermail/gcc-testresults/2024-June/817267.html

gcc/
* config/ia64/ia64.cc: Enable LRA for ia64.
* config/ia64/ia64.md: Likewise.
* config/ia64/predicates.md: Likewise.

Signed-off-by: René Rebe 

Diff:
---
 gcc/config/ia64/ia64.cc   | 7 ++-
 gcc/config/ia64/ia64.md   | 4 ++--
 gcc/config/ia64/predicates.md | 2 +-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc
index cd6ed8952114..54706da33364 100644
--- a/gcc/config/ia64/ia64.cc
+++ b/gcc/config/ia64/ia64.cc
@@ -619,9 +619,6 @@ static const scoped_attribute_specs *const 
ia64_attribute_table[] =
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
-
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
 
@@ -1333,7 +1330,7 @@ ia64_expand_move (rtx op0, rtx op1)
 {
   machine_mode mode = GET_MODE (op0);
 
-  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
+  if (!lra_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
 op1 = force_reg (mode, op1);
 
   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
@@ -1780,7 +1777,7 @@ ia64_expand_movxf_movrf (machine_mode mode, rtx 
operands[])
}
 }
 
-  if (!reload_in_progress && !reload_completed)
+  if (!lra_in_progress && !reload_completed)
 {
   operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
 
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
index 698e302081e7..d485acc0ea86 100644
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -2318,7 +2318,7 @@
  (match_operand:DI 3 "register_operand" "f"))
 (match_operand:DI 4 "nonmemory_operand" "rI")))
(clobber (match_scratch:DI 5 "=f"))]
-  "reload_in_progress"
+  "lra_in_progress"
   "#"
   [(set_attr "itanium_class" "unknown")])
 
@@ -3407,7 +3407,7 @@
   (match_operand:DI 2 "shladd_operand" "n"))
  (match_operand:DI 3 "nonmemory_operand" "r"))
 (match_operand:DI 4 "nonmemory_operand" "rI")))]
-  "reload_in_progress"
+  "lra_in_progress"
   "* gcc_unreachable ();"
   "reload_completed"
   [(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md
index 01a4effd3396..85f5380e7344 100644
--- a/gcc/config/ia64/predicates.md
+++ b/gcc/config/ia64/predicates.md
@@ -347,7 +347,7 @@
   allows reload the opportunity to avoid spilling addresses to
   the stack, and instead simply substitute in the value from a
   REG_EQUIV.  We'll split this up again when splitting the insn.  */
-   if (reload_in_progress || reload_completed)
+   if (lra_in_progress || reload_completed)
  return true;
 
/* Some symbol types we allow to use with any offset.  */


[gcc r15-4230] diagnostics: mark the JSON output format as deprecated

2024-10-09 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:a4e4f2d22589a8565bebf906930bf6a536a81167

commit r15-4230-ga4e4f2d22589a8565bebf906930bf6a536a81167
Author: David Malcolm 
Date:   Wed Oct 9 21:26:09 2024 -0400

diagnostics: mark the JSON output format as deprecated

The bulk of the documentation for -fdiagnostics-format= is taken up
by a description of the "json" format added in r9-4156-g478dd60ddcf177.

I don't plan to add any extra features to the "json" format; all my
future work on machine-readable GCC diagnostics is likely to be on the
SARIF output format (https://gcc.gnu.org/wiki/SARIF).

Hence users seeking machine-readable output from GCC should use SARIF.

This patch removes the long documentation of the format and describes it
as deprecated.

gcc/ChangeLog:
* doc/invoke.texi (fdiagnostics-format): Describe "json" et al as
deprecated, and remove the long description of the output format.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/doc/invoke.texi | 266 +---
 1 file changed, 2 insertions(+), 264 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 12477e6f9df3..575dffd2a2f5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -5907,276 +5907,14 @@ The @samp{sarif-stderr} and @samp{sarif-file} formats 
both emit
 diagnostics in SARIF Version 2.1.0 format, either to stderr, or to a file
 named @file{@var{source}.sarif}, respectively.
 
+The various @samp{json}, @samp{json-stderr}, and @samp{json-file} values
+are deprecated and refer to a legacy JSON-based output format.
 The @samp{json} format is a synonym for @samp{json-stderr}.
 The @samp{json-stderr} and @samp{json-file} formats are identical, apart from
 where the JSON is emitted to.  With @samp{json-stderr}, the JSON is emitted
 to stderr, whereas with @samp{json-file} it is written to
 @file{@var{source}.gcc.json}.
 
-The emitted JSON consists of a top-level JSON array containing JSON objects
-representing the diagnostics.
-
-Diagnostics can have child diagnostics.  For example, this error and note:
-
-@smallexample
-misleading-indentation.c:15:3: warning: this 'if' clause does not
-  guard... [-Wmisleading-indentation]
-   15 |   if (flag)
-  |   ^~
-misleading-indentation.c:17:5: note: ...this statement, but the latter
-  is misleadingly indented as if it were guarded by the 'if'
-   17 | y = 2;
-  | ^
-@end smallexample
-
-@noindent
-might be printed in JSON form (after formatting) like this:
-
-@smallexample
-[
-@{
-"kind": "warning",
-"locations": [
-@{
-"caret": @{
-   "display-column": 3,
-   "byte-column": 3,
-"column": 3,
-"file": "misleading-indentation.c",
-"line": 15
-@},
-"finish": @{
-   "display-column": 4,
-   "byte-column": 4,
-"column": 4,
-"file": "misleading-indentation.c",
-"line": 15
-@}
-@}
-],
-"message": "this \u2018if\u2019 clause does not guard...",
-"option": "-Wmisleading-indentation",
-"option_url": 
"https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wmisleading-indentation";,
-"children": [
-@{
-"kind": "note",
-"locations": [
-@{
-"caret": @{
-   "display-column": 5,
-   "byte-column": 5,
-"column": 5,
-"file": "misleading-indentation.c",
-"line": 17
-@}
-@}
-],
-"escape-source": false,
-"message": "...this statement, but the latter is @dots{}"
-@}
-]
-   "escape-source": false,
-   "column-origin": 1,
-@}
-]
-@end smallexample
-
-@noindent
-where the @code{note} is a child of the @code{warning}.
-
-A diagnostic has a @code{kind}.  If this is @code{warning}, then there is
-an @code{option} key describing the command-line option controlling the
-warning.
-
-A diagnostic can contain zero or more locations.  Each location has an
-optional @code{label} string and up to three positions within it: a
-@code{caret} position and optional @code{start} and @code{finish} positions.
-A position is described by a @code{file} name, a @code{line} number, and
-three numbers indicating a column position:
-@itemize @bullet
-
-@item
-@code{display-column} counts display columns, accounting for tabs and
-multibyte characters.
-
-@item
-@code{byte-column} counts raw bytes.
-
-@item
-@code{column} is equal to one of
-the previous two, as dictated by the @option{-fdiagnostics-column-unit}
-option.
-
-

[gcc r15-4231] diagnostics: move text output member functions to correct file

2024-10-09 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:00ede02bc8bb73da8f4bf1d7542142cd923b0c54

commit r15-4231-g00ede02bc8bb73da8f4bf1d7542142cd923b0c54
Author: David Malcolm 
Date:   Wed Oct 9 21:26:09 2024 -0400

diagnostics: move text output member functions to correct file

No functional change intended.

gcc/ChangeLog:
* diagnostic-format-text.cc
(diagnostic_text_output_format::after_diagnostic): Replace call to
show_any_path with body, taken from diagnostic.cc.
(diagnostic_text_output_format::build_prefix): Move here from
diagnostic.cc, updating to use get_diagnostic_kind_text and
diagnostic_get_color_for_kind.
(diagnostic_text_output_format::file_name_as_prefix): Move here
from diagnostic.cc
(diagnostic_text_output_format::append_note): Likewise.
* diagnostic-format-text.h
(diagnostic_text_output_format::show_any_path): Drop decl.
* diagnostic.cc
(diagnostic_text_output_format::file_name_as_prefix): Move to
diagnostic-format-text.cc.
(diagnostic_text_output_format::build_prefix): Likewise.
(diagnostic_text_output_format::show_any_path): Move to body of
diagnostic_text_output_format::after_diagnostic.
(diagnostic_text_output_format::append_note): Move to
diagnostic-format-text.cc.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic-format-text.cc | 72 -
 gcc/diagnostic-format-text.h  |  2 --
 gcc/diagnostic.cc | 84 ---
 3 files changed, 71 insertions(+), 87 deletions(-)

diff --git a/gcc/diagnostic-format-text.cc b/gcc/diagnostic-format-text.cc
index a6592fe93e6c..0d58d5fb082d 100644
--- a/gcc/diagnostic-format-text.cc
+++ b/gcc/diagnostic-format-text.cc
@@ -117,7 +117,77 @@ void
 diagnostic_text_output_format::
 after_diagnostic (const diagnostic_info &diagnostic)
 {
-  show_any_path (diagnostic);
+  if (const diagnostic_path *path = diagnostic.richloc->get_path ())
+print_path (*path);
+}
+
+/* Return a malloc'd string describing a location and the severity of the
+   diagnostic, e.g. "foo.c:42:10: error: ".  The caller is responsible for
+   freeing the memory.  */
+char *
+diagnostic_text_output_format::
+build_prefix (const diagnostic_info &diagnostic) const
+{
+  gcc_assert (diagnostic.kind < DK_LAST_DIAGNOSTIC_KIND);
+
+  const char *text = _(get_diagnostic_kind_text (diagnostic.kind));
+  const char *text_cs = "", *text_ce = "";
+  pretty_printer *pp = get_printer ();
+
+  if (const char *color_name = diagnostic_get_color_for_kind (diagnostic.kind))
+{
+  text_cs = colorize_start (pp_show_color (pp), color_name);
+  text_ce = colorize_stop (pp_show_color (pp));
+}
+
+  const expanded_location s = diagnostic_expand_location (&diagnostic);
+  label_text location_text = get_location_text (s);
+
+  char *result = build_message_string ("%s %s%s%s", location_text.get (),
+  text_cs, text, text_ce);
+  return result;
+}
+
+/* Same as build_prefix, but only the source FILE is given.  */
+char *
+diagnostic_text_output_format::file_name_as_prefix (const char *f) const
+{
+  pretty_printer *const pp = get_printer ();
+  const char *locus_cs
+= colorize_start (pp_show_color (pp), "locus");
+  const char *locus_ce = colorize_stop (pp_show_color (pp));
+  return build_message_string ("%s%s:%s ", locus_cs, f, locus_ce);
+}
+
+/* Add a purely textual note with text GMSGID and with LOCATION.  */
+
+void
+diagnostic_text_output_format::append_note (location_t location,
+   const char * gmsgid, ...)
+{
+  diagnostic_context *context = &get_context ();
+
+  diagnostic_info diagnostic;
+  va_list ap;
+  rich_location richloc (line_table, location);
+
+  va_start (ap, gmsgid);
+  diagnostic_set_info (&diagnostic, gmsgid, &ap, &richloc, DK_NOTE);
+  if (context->m_inhibit_notes_p)
+{
+  va_end (ap);
+  return;
+}
+  pretty_printer *pp = get_printer ();
+  char *saved_prefix = pp_take_prefix (pp);
+  pp_set_prefix (pp, build_prefix (diagnostic));
+  pp_format (pp, &diagnostic.message);
+  pp_output_formatted_text (pp);
+  pp_destroy_prefix (pp);
+  pp_set_prefix (pp, saved_prefix);
+  pp_newline (pp);
+  diagnostic_show_locus (context, &richloc, DK_NOTE, pp);
+  va_end (ap);
 }
 
 /* If DIAGNOSTIC has a CWE identifier, print it.
diff --git a/gcc/diagnostic-format-text.h b/gcc/diagnostic-format-text.h
index aacd699cd90a..2e57e27c739d 100644
--- a/gcc/diagnostic-format-text.h
+++ b/gcc/diagnostic-format-text.h
@@ -78,8 +78,6 @@ private:
   label_text get_location_text (const expanded_location &s) const;
   bool includes_seen_p (const line_map_ordinary *map);
 
-  void show_any_path (const diagnostic_info &diagnostic);
-
   diagnostic_column_policy m_column_policy;
 
   /* Used to detect whe

[gcc r15-4229] lto: reimplement print_lto_docs_link [PR116613]

2024-10-09 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:8d0de31c931ddacc03e8bd1ce6d89f517c62c7b3

commit r15-4229-g8d0de31c931ddacc03e8bd1ce6d89f517c62c7b3
Author: David Malcolm 
Date:   Wed Oct 9 21:26:08 2024 -0400

lto: reimplement print_lto_docs_link [PR116613]

gcc/ChangeLog:
PR other/116613
* lto-wrapper.cc (print_lto_docs_link): Use a format string rather
than building the string manually.  Fix memory leak of "url" by
using label_text.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/lto-wrapper.cc | 17 -
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
index 141856c9b67b..9d7fe18b2296 100644
--- a/gcc/lto-wrapper.cc
+++ b/gcc/lto-wrapper.cc
@@ -1366,19 +1366,10 @@ init_num_threads (void)
 void
 print_lto_docs_link ()
 {
-  bool print_url = global_dc->m_printer->supports_urls_p ();
-  const char *url = global_dc->make_option_url (OPT_flto);
-
-  pretty_printer pp;
-  pp.set_url_format (URL_FORMAT_DEFAULT);
-  pp_string (&pp, "see the ");
-  if (print_url)
-pp_begin_url (&pp, url);
-  pp_string (&pp, "%<-flto%> option documentation");
-  if (print_url)
-pp_end_url (&pp);
-  pp_string (&pp, " for more information");
-  inform (UNKNOWN_LOCATION, pp_formatted_text (&pp));
+  label_text url = label_text::take (global_dc->make_option_url (OPT_flto));
+  inform (UNKNOWN_LOCATION,
+ "see the %{%<-flto%> option documentation%} for more information",
+ url.get ());
 }
 
 /* Test that a make command is present and working, return true if so.  */


[gcc r15-4225] Enable vectorization for unknown tripcount in very cheap cost model but disable epilog vectorization

2024-10-09 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:70c3db511ba14ff5fa68cb41d0714a9fb957ea5d

commit r15-4225-g70c3db511ba14ff5fa68cb41d0714a9fb957ea5d
Author: liuhongt 
Date:   Mon Mar 25 21:28:14 2024 -0700

Enable vectorization for unknown tripcount in very cheap cost model but 
disable epilog vectorization.

gcc/ChangeLog:

* tree-vect-loop.cc (vect_analyze_loop_costing): Enable
vectorization for LOOP_VINFO_PEELING_FOR_NITER in very cheap
cost model.
(vect_analyze_loop): Disable epilogue vectorization in very
cheap cost model.
* doc/invoke.texi: Adjust documents for very-cheap cost model.

Diff:
---
 gcc/doc/invoke.texi   | 11 ---
 gcc/tree-vect-loop.cc |  6 +++---
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c0c8bf1c29a9..12477e6f9df3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14315,13 +14315,10 @@ counts that will likely execute faster than when 
executing the original
 scalar loop.  The @samp{cheap} model disables vectorization of
 loops where doing so would be cost prohibitive for example due to
 required runtime checks for data dependence or alignment but otherwise
-is equal to the @samp{dynamic} model.  The @samp{very-cheap} model only
-allows vectorization if the vector code would entirely replace the
-scalar code that is being vectorized.  For example, if each iteration
-of a vectorized loop would only be able to handle exactly four iterations
-of the scalar loop, the @samp{very-cheap} model would only allow
-vectorization if the scalar iteration count is known to be a multiple
-of four.
+is equal to the @samp{dynamic} model.  The @samp{very-cheap} model disables
+vectorization of loops when any runtime check for data dependence or alignment
+is required, it also disables vectorization of epilogue loops but otherwise is
+equal to the @samp{cheap} model.
 
 The default cost model depends on other optimization flags and is
 either @samp{dynamic} or @samp{cheap}.
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 9be50aaa621c..ade72a5124f7 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2375,8 +2375,7 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo,
  a copy of the scalar code (even if we might be able to vectorize it).  */
   if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
   && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
- || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)))
 {
   if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3683,7 +3682,8 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
   /* No code motion support for multiple epilogues so 
for now
  not supported when multiple exits.  */
 && !LOOP_VINFO_EARLY_BREAKS (first_loop_vinfo)
-&& !loop->simduid);
+&& !loop->simduid
+&& loop_cost_model (loop) > 
VECT_COST_MODEL_VERY_CHEAP);
   if (!vect_epilogues)
 return first_loop_vinfo;


[gcc r15-4226] Adjust testcase after relax O2 vectorization.

2024-10-09 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:d5d1189c12199db79f6feb5cfcc7e6475c3a4d91

commit r15-4226-gd5d1189c12199db79f6feb5cfcc7e6475c3a4d91
Author: liuhongt 
Date:   Thu Sep 19 13:38:34 2024 +0800

Adjust testcase after relax O2 vectorization.

gcc/testsuite/ChangeLog:

* gcc.dg/fstack-protector-strong.c: Adjust
scan-assembler-times.
* gcc.dg/graphite/scop-6.c: Refine the testcase to avoid array
out of bounds.
* gcc.dg/graphite/scop-9.c: Ditto.
* gcc.dg/tree-ssa/ivopts-lt-2.c: Add -fno-tree-vectorize.
* gcc.dg/tree-ssa/ivopts-lt.c: Ditto.
* gcc.dg/tree-ssa/loop-16.c: Ditto.
* gcc.dg/tree-ssa/loop-28.c: Ditto.
* gcc.dg/tree-ssa/loop-bound-2.c: Ditto.
* gcc.dg/tree-ssa/loop-bound-4.c: Ditto.
* gcc.dg/tree-ssa/loop-bound-6.c: Ditto.
* gcc.dg/tree-ssa/predcom-4.c: Ditto.
* gcc.dg/tree-ssa/predcom-5.c: Ditto.
* gcc.dg/tree-ssa/scev-11.c: Ditto.
* gcc.dg/tree-ssa/scev-9.c: Ditto.
* gcc.dg/tree-ssa/split-path-11.c: Ditto.
* gcc.dg/unroll-8.c: Ditto.
* gcc.dg/var-expand1.c: Ditto.
* gcc.dg/vect/vect-cost-model-6.c: Removed.
* gcc.target/i386/pr86270.c: Ditto.
* gcc.target/i386/pr86722.c: Ditto.
* gcc.target/x86_64/abi/callabi/leaf-2.c: Ditto.

Diff:
---
 gcc/testsuite/gcc.dg/fstack-protector-strong.c   |  2 +-
 gcc/testsuite/gcc.dg/graphite/scop-6.c   |  7 +++
 gcc/testsuite/gcc.dg/graphite/scop-9.c   |  4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c  |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c|  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/loop-16.c  |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/loop-28.c  |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/loop-bound-2.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/loop-bound-4.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/loop-bound-6.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c|  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c|  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/scev-11.c  |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/scev-9.c   |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/split-path-11.c|  2 +-
 gcc/testsuite/gcc.dg/unroll-8.c  |  3 +--
 gcc/testsuite/gcc.dg/var-expand1.c   |  2 +-
 gcc/testsuite/gcc.dg/vect/vect-cost-model-6.c| 12 
 gcc/testsuite/gcc.target/i386/pr86270.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr86722.c  |  2 +-
 gcc/testsuite/gcc.target/x86_64/abi/callabi/leaf-2.c |  2 +-
 21 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/fstack-protector-strong.c 
b/gcc/testsuite/gcc.dg/fstack-protector-strong.c
index 94dc3508f1ad..b9f63966b7cc 100644
--- a/gcc/testsuite/gcc.dg/fstack-protector-strong.c
+++ b/gcc/testsuite/gcc.dg/fstack-protector-strong.c
@@ -154,4 +154,4 @@ void foo12 ()
   global3 ();
 }
 
-/* { dg-final { scan-assembler-times "stack_chk_fail" 12 } } */
+/* { dg-final { scan-assembler-times "stack_chk_fail" 11 } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/scop-6.c 
b/gcc/testsuite/gcc.dg/graphite/scop-6.c
index 9bc1d9f4ccd0..e7e0a080c5fd 100644
--- a/gcc/testsuite/gcc.dg/graphite/scop-6.c
+++ b/gcc/testsuite/gcc.dg/graphite/scop-6.c
@@ -4,7 +4,7 @@ int toto()
 {
   int i, j, k;
   int a[100][100];
-  int b[100];
+  int b[200];
 
   for (i = 1; i < 100; i++)
 {
@@ -18,9 +18,8 @@ int toto()
 for (k = 1; k < 100; k++)
   b[i+k] = b[i+k-1] + 2;
 }
-  
-  for (k = 1; k < 100; k++)
-b[i+k] = b[i+k-5] + 2;
+  for (k = 4; k < 100; k++)
+   b[i+k] = b[i+k-5] + 2;
 }
 
   return a[3][5] + b[2];
diff --git a/gcc/testsuite/gcc.dg/graphite/scop-9.c 
b/gcc/testsuite/gcc.dg/graphite/scop-9.c
index b19291be2f81..2676452b1e60 100644
--- a/gcc/testsuite/gcc.dg/graphite/scop-9.c
+++ b/gcc/testsuite/gcc.dg/graphite/scop-9.c
@@ -4,7 +4,7 @@ int toto()
 {
   int i, j, k;
   int a[100][100];
-  int b[100];
+  int b[200];
 
   for (i = 1; i < 100; i++)
 {
@@ -14,7 +14,7 @@ int toto()
   if (i * 2 == i + 8)
a[i][i] = 2;
 
-  for (k = 1; k < 100; k++)
+  for (k = 4; k < 100; k++)
 b[i+k] = b[i+k-5] + 2;
 }
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c
index bdbdbff19ffb..be325775fbb7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fdump-tree-ivopts" } 
*/
+/* { dg-options "-O2 -fno-tree-vectorize -fno-tree-loop-distribute-patterns 
-fdump-tree-ivopts" } */
 /* { dg-skip-if "PR68644" { hppa*-*-* powerpc*-*-* } } */
 
 void
diff --git a/gcc/testsuite

[gcc r15-4228] SH: Use softfp for sh-elf

2024-10-09 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:e95512e2d5a317e8c043f232158df4b38186e51c

commit r15-4228-ge95512e2d5a317e8c043f232158df4b38186e51c
Author: Sébastien Michelland 
Date:   Thu Oct 10 09:24:39 2024 +0900

SH: Use softfp for sh-elf

libgcc/ChangeLog:

PR target/29845
* config.host (sh-*-elf*): Replace fdpbit with softfp.
* config/sh/sfp-machine.h: New file.

Signed-off-by: Sébastien Michelland 


Diff:
---
 libgcc/config.host |  2 +-
 libgcc/config/sh/sfp-machine.h | 83 ++
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index fa001c5e900b..06fae1545b18 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1414,7 +1414,7 @@ s390x-ibm-tpf*)
md_unwind_header=s390/tpf-unwind.h
;;
 sh-*-elf* | sh[12346l]*-*-elf*)
-   tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
+   tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-softfp-sfdf t-softfp"
extra_parts="$extra_parts crt1.o crti.o crtn.o crtbeginS.o crtendS.o \
libic_invalidate_array_4-100.a \
libic_invalidate_array_4-200.a \
diff --git a/libgcc/config/sh/sfp-machine.h b/libgcc/config/sh/sfp-machine.h
new file mode 100644
index ..26f65166976b
--- /dev/null
+++ b/libgcc/config/sh/sfp-machine.h
@@ -0,0 +1,83 @@
+/* Software floating-point machine description for SuperH.
+
+Copyright (C) 2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+.  */
+
+#define _FP_W_TYPE_SIZE32
+#define _FP_W_TYPE unsigned long
+#define _FP_WS_TYPEsigned long
+#define _FP_I_TYPE long
+
+#define _FP_MUL_MEAT_S(R,X,Y) \
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y) \
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y) \
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)  _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)  _FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)  _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_B  _FP_QNANBIT_B
+#define _FP_NANFRAC_H  _FP_QNANBIT_H
+#define _FP_NANFRAC_S  _FP_QNANBIT_S
+#define _FP_NANFRAC_D  _FP_QNANBIT_D, 0
+#define _FP_NANFRAC_Q  _FP_QNANBIT_Q, 0, 0, 0
+
+/* The type of the result of a floating point comparison.  This must
+   match __libgcc_cmp_return__ in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_NANSIGN_B  0
+#define _FP_NANSIGN_H  0
+#define _FP_NANSIGN_S  0
+#define _FP_NANSIGN_D  0
+#define _FP_NANSIGN_Q  0
+
+#define _FP_KEEPNANFRACP 0
+#define _FP_QNANNEGATEDP 0
+
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)  \
+  do {  \
+R##_s = _FP_NANSIGN_##fs;   \
+_FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs);  \
+R##_c = FP_CLS_NAN; \
+  } while (0)
+
+#define _FP_TININESS_AFTER_ROUNDING 1
+
+#define __LITTLE_ENDIAN 1234
+#define __BIG_ENDIAN4321
+
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));


[gcc r15-4232] x86: Implement Fast-Math Float Truncation to BF16 via PSRLD Instruction

2024-10-09 Thread Levy Hsu via Gcc-cvs
https://gcc.gnu.org/g:8718727509b2d038d00afa3bd5ef8e0df216a287

commit r15-4232-g8718727509b2d038d00afa3bd5ef8e0df216a287
Author: Levy Hsu 
Date:   Wed Sep 25 14:32:35 2024 +1100

x86: Implement Fast-Math Float Truncation to BF16 via PSRLD Instruction

gcc/ChangeLog:

* config/i386/i386.md: Rewrite insn truncsfbf2.

gcc/testsuite/ChangeLog:

* gcc.target/i386/truncsfbf-1.c: New test.
* gcc.target/i386/truncsfbf-2.c: New test.

Diff:
---
 gcc/config/i386/i386.md | 16 +++
 gcc/testsuite/gcc.target/i386/truncsfbf-1.c |  9 
 gcc/testsuite/gcc.target/i386/truncsfbf-2.c | 65 +
 3 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fb9befcf65b3..e4d1c56ea542 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5673,16 +5673,18 @@
(set_attr "mode" "HF")])
 
 (define_insn "truncsfbf2"
-  [(set (match_operand:BF 0 "register_operand" "=x, v")
+  [(set (match_operand:BF 0 "register_operand" "=x,x,v,Yv")
(float_truncate:BF
- (match_operand:SF 1 "register_operand" "x,v")))]
-  "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT)
-   && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations"
+ (match_operand:SF 1 "register_operand" "0,x,v,Yv")))]
+  "TARGET_SSE2 && flag_unsafe_math_optimizations && !HONOR_NANS (BFmode)"
   "@
+  psrld\t{$16, %0|%0, 16}
   %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
-  vcvtneps2bf16\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "avxneconvert,avx512bf16vl")
-   (set_attr "prefix" "vex,evex")])
+  vcvtneps2bf16\t{%1, %0|%0, %1}
+  vpsrld\t{$16, %1, %0|%0, %1, 16}"
+  [(set_attr "isa" "noavx,avxneconvert,avx512bf16vl,avx")
+   (set_attr "prefix" "orig,vex,evex,vex")
+   (set_attr "type" "sseishft1,ssecvt,ssecvt,sseishft1")])
 
 ;; Signed conversion to DImode.
 
diff --git a/gcc/testsuite/gcc.target/i386/truncsfbf-1.c 
b/gcc/testsuite/gcc.target/i386/truncsfbf-1.c
new file mode 100644
index ..dd3ff8a50b46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/truncsfbf-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2 -ffast-math" } */
+/* { dg-final { scan-assembler-times "psrld" 1 } } */
+
+__bf16
+foo (float a)
+{
+  return a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/truncsfbf-2.c 
b/gcc/testsuite/gcc.target/i386/truncsfbf-2.c
new file mode 100644
index ..f4952f88fc9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/truncsfbf-2.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-msse2 -O2 -ffast-math" } */
+
+#include 
+#include 
+#include 
+#include 
+
+__bf16
+foo (float a)
+{
+  return a;
+}
+
+static __bf16
+CALC (float *a)
+{
+  uint32_t bits;
+  memcpy (&bits, a, sizeof (bits));
+  bits >>= 16;
+  uint16_t bfloat16_bits = (uint16_t) bits;
+  __bf16 bf16;
+  memcpy (&bf16, &bfloat16_bits, sizeof (bf16));
+  return bf16;
+}
+
+int
+main (void)
+{
+  float test_values[] = { 0.0f, -0.0f, 1.0f, -1.0f, 0.5f, -0.5f, 1000.0f, 
-1000.0f,
+  3.1415926f, -3.1415926f, 1e-8f, -1e-8f,
+  1.0e+38f, -1.0e+38f, 1.0e-38f, -1.0e-38f };
+  size_t num_values = sizeof (test_values) / sizeof (test_values[0]);
+
+  for (size_t i = 0; i < num_values; ++i)
+{
+  float original = test_values[i];
+  __bf16 hw_bf16 = foo (original);
+  __bf16 sw_bf16 = CALC (&original);
+
+  /* Verify psrld $16, %0 == %0 >> 16 */
+  if (memcmp (&hw_bf16, &sw_bf16, sizeof (__bf16)) != 0)
+abort ();
+
+  /* Reconstruct the float value from the __bf16 bits */
+  uint16_t bf16_bits;
+  memcpy (&bf16_bits, &hw_bf16, sizeof (bf16_bits));
+  uint32_t reconstructed_bits = ((uint32_t) bf16_bits) << 16;
+  float converted;
+  memcpy (&converted, &reconstructed_bits, sizeof (converted));
+
+  float diff = fabsf (original - converted);
+
+  /* Expected Maximum Precision Loss */
+  uint32_t orig_bits;
+  memcpy (&orig_bits, &original, sizeof (orig_bits));
+  int exponent = ((orig_bits >> 23) & 0xFF) - 127;
+  float expected_loss = (exponent == -127)
+? ldexpf (1.0f, -126 - 7)
+: ldexpf (1.0f, exponent - 7);
+  if (diff > expected_loss)
+abort ();
+}
+  return 0;
+}


[gcc r15-4234] Add a new tune avx256_avoid_vec_perm for SRF.

2024-10-09 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:9eaecce3d8c1d9349adbf8c2cdaf8d87672ed29c

commit r15-4234-g9eaecce3d8c1d9349adbf8c2cdaf8d87672ed29c
Author: liuhongt 
Date:   Wed Sep 25 13:11:11 2024 +0800

Add a new tune avx256_avoid_vec_perm for SRF.

According to Intel SOM[1], For Crestmont,  most 256-bit Intel AVX2
instructions can be decomposed into two independent 128-bit
micro-operations, except for a subset of Intel AVX2 instructions,
known as cross-lane operations, can only compute the result for an
element by utilizing one or more sources belonging to other elements.

The 256-bit instructions listed below use more operand sources than
can be natively supported by a single reservation station within these
microarchitectures. They are decomposed into two μops, where the first
μop resolves a subset of operand dependencies across two cycles. The
dependent second μop executes the 256-bit operation by using a single
128-bit execution port for two consecutive cycles with a five-cycle
latency for a total latency of seven cycles.

VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
VPERMPD ymm1, ymm2/m256, imm8
VPERMPS ymm1, ymm2, ymm3/m256
VPERMD ymm1, ymm2, ymm3/m256
VPERMQ ymm1, ymm2/m256, imm8

Instead of setting tune avx128_optimal for SRF, the patch add a new
tune avx256_avoid_vec_perm for it. so by default, vectorizer still
uses 256-bit VF if cost is profitable, but lowers to 128-bit whenever
256-bit vec_perm is needed for auto-vectorization. w/o vec_perm,
performance of 256-bit vectorization should be similar as 128-bit
ones(some benchmark results show it's even better than 128-bit
vectorization since it enables more parallelism for convert cases.)

[1] 
https://www.intel.com/content/www/us/en/content-details/814198/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html

gcc/ChangeLog:

* config/i386/i386.cc (ix86_vector_costs::ix86_vector_costs):
Add new member m_num_avx256_vec_perm.
(ix86_vector_costs::add_stmt_cost): Record 256-bit vec_perm.
(ix86_vector_costs::finish_cost): Prevent vectorization for
TAREGT_AVX256_AVOID_VEC_PERM when there's 256-bit vec_perm
instruction.
* config/i386/i386.h (TARGET_AVX256_AVOID_VEC_PERM): New
Macro.
* config/i386/x86-tune.def (X86_TUNE_AVX256_SPLIT_REGS): Add
m_CORE_ATOM.
(X86_TUNE_AVX256_AVOID_VEC_PERM): New tune.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx256_avoid_vec_perm.c: New test.

Diff:
---
 gcc/config/i386/i386.cc| 14 +-
 gcc/config/i386/i386.h |  2 ++
 gcc/config/i386/x86-tune.def   |  7 ++-
 .../gcc.target/i386/avx256_avoid_vec_perm.c| 22 ++
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 90a564b2ffaa..ab0ade3790f2 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25025,12 +25025,15 @@ private:
  where we know it's not loaded from memory.  */
   unsigned m_num_gpr_needed[3];
   unsigned m_num_sse_needed[3];
+  /* Number of 256-bit vector permutation.  */
+  unsigned m_num_avx256_vec_perm[3];
 };
 
 ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
   : vector_costs (vinfo, costing_for_scalar),
 m_num_gpr_needed (),
-m_num_sse_needed ()
+m_num_sse_needed (),
+m_num_avx256_vec_perm ()
 {
 }
 
@@ -25264,6 +25267,10 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
   if (stmt_cost == -1)
 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
 
+  if (kind == vec_perm && vectype
+  && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32)
+m_num_avx256_vec_perm[where]++;
+
   /* Penalize DFmode vector operations for Bonnell.  */
   if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
   && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
@@ -25333,6 +25340,11 @@ ix86_vector_costs::finish_cost (const vector_costs 
*scalar_costs)
 
   ix86_vect_estimate_reg_pressure ();
 
+  for (int i = 0; i != 3; i++)
+if (m_num_avx256_vec_perm[i]
+   && TARGET_AVX256_AVOID_VEC_PERM)
+  m_costs[i] = INT_MAX;
+
   vector_costs::finish_cost (scalar_costs);
 }
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index d5d54ee66040..f5204aa1ed23 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -439,6 +439,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
 #define TARGET_AVX256_SPLIT_REGS \
ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
+#define TARGET_AVX256_AVOID_VEC_PERM \
+   ix86_tune_features[X86_TUNE_AVX256_AVO

[gcc r15-4233] Add new microarchitecture tune for SRF/GRR/CWF.

2024-10-09 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:9c8cea8feb6cd54ef73113a0b74f1df7b60d09dc

commit r15-4233-g9c8cea8feb6cd54ef73113a0b74f1df7b60d09dc
Author: liuhongt 
Date:   Tue Sep 24 15:53:14 2024 +0800

Add new microarchitecture tune for SRF/GRR/CWF.

For Crestmont, 4-operand vex blendv instructions come from MSROM and
is slower than 3-instructions sequence (op1 & mask) | (op2 & ~mask).
legacy blendv instruction can still be handled by the decoder.

The patch add a new tune which is enabled for all processors except
for SRF/CWF. It will use vpand + vpandn + vpor instead of
vpblendvb(similar for vblendvps/vblendvpd) for SRF/CWF.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_expand_sse_movcc): Guard
instruction blendv generation under new tune.
* config/i386/i386.h (TARGET_SSE_MOVCC_USE_BLENDV): New Macro.
* config/i386/x86-tune.def (X86_TUNE_SSE_MOVCC_USE_BLENDV):
New tune.

Diff:
---
 gcc/config/i386/i386-expand.cc | 24 +++---
 gcc/config/i386/i386.h |  2 ++
 gcc/config/i386/x86-tune.def   |  8 
 .../gcc.target/i386/sse_movcc_use_blendv.c | 12 +++
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 32840113cf60..0734399e4955 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4344,23 +4344,23 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
   switch (mode)
 {
 case E_V2SFmode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_mmx_blendvps;
   break;
 case E_V4SFmode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_sse4_1_blendvps;
   break;
 case E_V2DFmode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_sse4_1_blendvpd;
   break;
 case E_SFmode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_sse4_1_blendvss;
   break;
 case E_DFmode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_sse4_1_blendvsd;
   break;
 case E_V8QImode:
@@ -4368,7 +4368,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
 case E_V4HFmode:
 case E_V4BFmode:
 case E_V2SImode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
{
  gen = gen_mmx_pblendvb_v8qi;
  blend_mode = V8QImode;
@@ -4378,14 +4378,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
 case E_V2HImode:
 case E_V2HFmode:
 case E_V2BFmode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
{
  gen = gen_mmx_pblendvb_v4qi;
  blend_mode = V4QImode;
}
   break;
 case E_V2QImode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_mmx_pblendvb_v2qi;
   break;
 case E_V16QImode:
@@ -4395,18 +4395,18 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
 case E_V4SImode:
 case E_V2DImode:
 case E_V1TImode:
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
{
  gen = gen_sse4_1_pblendvb;
  blend_mode = V16QImode;
}
   break;
 case E_V8SFmode:
-  if (TARGET_AVX)
+  if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV)
gen = gen_avx_blendvps256;
   break;
 case E_V4DFmode:
-  if (TARGET_AVX)
+  if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV)
gen = gen_avx_blendvpd256;
   break;
 case E_V32QImode:
@@ -4415,7 +4415,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
 case E_V16BFmode:
 case E_V8SImode:
 case E_V4DImode:
-  if (TARGET_AVX2)
+  if (TARGET_AVX2 && TARGET_SSE_MOVCC_USE_BLENDV)
{
  gen = gen_avx2_pblendvb;
  blend_mode = V32QImode;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 82177b9d3839..d5d54ee66040 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -462,6 +462,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC]
 #define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC]
 #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
+#define TARGET_SSE_MOVCC_USE_BLENDV \
+   ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 3d123da95f0c..b815b6dc255b 100644
--- a/gcc/config/i386/x86-tune