[gcc r15-2400] AVR: Propose to use attribute signal(n) via AVR-LibC's ISR_N.

2024-07-30 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:922083693136be9516b9f916fd5139f419f8

commit r15-2400-g922083693136be9516b9f916fd5139f419f8
Author: Georg-Johann Lay 
Date:   Tue Jul 30 09:16:02 2024 +0200

AVR: Propose to use attribute signal(n) via AVR-LibC's ISR_N.

gcc/
* doc/extend.texi (AVR Function Attributes): Propose to use
attribute signal(n) via AVR-LibC's ISR_N from avr/interrupt.h

Diff:
---
 gcc/doc/extend.texi | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 927aa24ab635..48b27ff9f390 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5147,22 +5147,38 @@ the attribute, rather than providing the ISR name 
itself as the function name:
 
 @example
 __attribute__((signal(1)))
-void my_handler (void)
+static void my_handler (void)
 @{
// Code for __vector_1
 @}
+@end example
 
-#include 
+Notice that the handler function needs not to be externally visible.
+The recommended way to use these attributes is by means of the
+@code{ISR_N} macro provided by @code{avr/interrupt.h} from
+@w{@uref{https://www.nongnu.org/avr-libc/user-manual/group__avr__interrupts.html,,AVR-LibC}}:
+
+@example
+#include 
 
-__attribute__((__signal__(PCINT0_vect_num, PCINT1_vect_num)))
-static void my_pcint0_1_handler (void)
+ISR_N (PCINT0_vect_num)
+static void my_pcint0_handler (void)
 @{
-   // Code for PCINT0 and PCINT1 (__vector_3 and __vector_4
-   // on ATmega328).
+   // Code
+@}
+
+ISR_N (ADC_vect_num, ISR_NOBLOCK)
+static void my_adc_handler (void)
+@{
+// Code
 @}
 @end example
 
-Notice that the handler function needs not to be externally visible.
+@code{ISR_N} can be specified more than once, in which case several
+interrupt vectors are pointing to the same handler function.  This
+is similar to the @code{ISR_ALIASOF} macro provided by AVR-LibC, but
+without the overhead introduced by @code{ISR_ALIASOF}.
+
 
 @cindex @code{noblock} function attribute, AVR
 @item noblock


[gcc r14-10523] x86: Don't enable APX_F in 32-bit mode

2024-07-30 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:ee6c5afec36aee14d2244a37a833ef7c2d16ab88

commit r14-10523-gee6c5afec36aee14d2244a37a833ef7c2d16ab88
Author: Lingling Kong 
Date:   Wed Jul 24 14:52:47 2024 +0800

x86: Don't enable APX_F in 32-bit mode

gcc/ChangeLog:

PR target/115978
* config/i386/driver-i386.cc (host_detect_local_cpu):  Enable
APX_F only for 64-bit codegen.
* config/i386/i386-options.cc (DEF_PTA):  Skip PTA_APX_F if
not in 64-bit mode.

gcc/testsuite/ChangeLog:

PR target/115978
* gcc.target/i386/pr115978-1.c: New test.
* gcc.target/i386/pr115978-2.c: Ditto.

Diff:
---
 gcc/config/i386/driver-i386.cc |  3 ++-
 gcc/config/i386/i386-options.cc|  3 ++-
 gcc/testsuite/gcc.target/i386/pr115978-1.c | 22 ++
 gcc/testsuite/gcc.target/i386/pr115978-2.c |  6 ++
 4 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index bb53af4b2039..695d8e6cdf14 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -893,7 +893,8 @@ const char *host_detect_local_cpu (int argc, const char 
**argv)
if (has_feature (isa_names_table[i].feature))
  {
if (codegen_x86_64
-   || isa_names_table[i].feature != FEATURE_UINTR)
+   || (isa_names_table[i].feature != FEATURE_UINTR
+   && isa_names_table[i].feature != FEATURE_APX_F))
  options = concat (options, " ",
isa_names_table[i].option, NULL);
  }
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index af450dba73dd..6c212a8edeb9 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2385,7 +2385,8 @@ ix86_option_override_internal (bool main_args_p,
 #define DEF_PTA(NAME) \
if (((processor_alias_table[i].flags & PTA_ ## NAME) != 0) \
&& PTA_ ## NAME != PTA_64BIT \
-   && (TARGET_64BIT || PTA_ ## NAME != PTA_UINTR) \
+   && (TARGET_64BIT || (PTA_ ## NAME != PTA_UINTR \
+&& PTA_ ## NAME != PTA_APX_F))\
&& !TARGET_EXPLICIT_ ## NAME ## _P (opts)) \
  SET_TARGET_ ## NAME (opts);
 #include "i386-isa.def"
diff --git a/gcc/testsuite/gcc.target/i386/pr115978-1.c 
b/gcc/testsuite/gcc.target/i386/pr115978-1.c
new file mode 100644
index ..18a1c5f153a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115978-1.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -march=native" } */
+
+int
+main ()
+{
+  if (__builtin_cpu_supports ("apxf"))
+{
+#ifdef __x86_64__
+# ifndef __APX_F__
+  __builtin_abort ();
+# endif
+#else
+# ifdef __APX_F__
+  __builtin_abort ();
+# endif
+#endif
+  return 0;
+}
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr115978-2.c 
b/gcc/testsuite/gcc.target/i386/pr115978-2.c
new file mode 100644
index ..900d6eb096ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115978-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=native -mno-apxf" } */
+
+#ifdef __APX_F__
+# error APX_F should be disabled
+#endif


[gcc r15-2401] gfortran.dg/compiler-directive_2.f: Update dg-error

2024-07-30 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:15158a8853a69f27a3c14644f35a93029dea6a84

commit r15-2401-g15158a8853a69f27a3c14644f35a93029dea6a84
Author: Tobias Burnus 
Date:   Tue Jul 30 10:18:06 2024 +0200

gfortran.dg/compiler-directive_2.f: Update dg-error

This is a fallout of commit r15-2378-g29b1587e7d3466
  OpenMP/Fortran: Fix handling of 'declare target' with 'link' clause 
[PR115559]
where the '!GCC$' attributes were added in reverse order.
Result: The error diagnostic for the stdcall/fastcall was reversed.
Solution: Swap the order in dg-error.

gcc/testsuite/ChangeLog:

* gfortran.dg/compiler-directive_2.f: Update dg-error.

Diff:
---
 gcc/testsuite/gfortran.dg/compiler-directive_2.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gfortran.dg/compiler-directive_2.f 
b/gcc/testsuite/gfortran.dg/compiler-directive_2.f
index 79169a896ae6..c31349f3348c 100644
--- a/gcc/testsuite/gfortran.dg/compiler-directive_2.f
+++ b/gcc/testsuite/gfortran.dg/compiler-directive_2.f
@@ -5,6 +5,6 @@
 ! Check for calling convention consitency
 ! in procedure-pointer assignments.
 !
-  subroutine test() ! { dg-error "fastcall and stdcall attributes are not 
compatible" }
+  subroutine test() ! { dg-error "stdcall and fastcall attributes are not 
compatible" }
 cGCC$ attributes stdcall, fastcall::test
   end subroutine test


[gcc/devel/omp/gcc-14] gfortran.dg/compiler-directive_2.f: Update dg-error

2024-07-30 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:4d76fe46875b14296ffc675b9580cb3c543d2310

commit 4d76fe46875b14296ffc675b9580cb3c543d2310
Author: Tobias Burnus 
Date:   Tue Jul 30 10:35:11 2024 +0200

gfortran.dg/compiler-directive_2.f: Update dg-error

This is a fallout of commit r15-2378-g29b1587e7d3466
  OpenMP/Fortran: Fix handling of 'declare target' with 'link' clause 
[PR115559]
where the '!GCC$' attributes were added in reverse order.
Result: The error diagnostic for the stdcall/fastcall was reversed.
Solution: Swap the order in dg-error.

gcc/testsuite/ChangeLog:

* gfortran.dg/compiler-directive_2.f: Update dg-error.

(cherry picked from commit 15158a8853a69f27a3c14644f35a93029dea6a84)

Diff:
---
 gcc/testsuite/ChangeLog.omp  | 7 +++
 gcc/testsuite/gfortran.dg/compiler-directive_2.f | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 055730cbdebb..3df8cf8b3b87 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,10 @@
+2024-07-30  Tobias Burnus  
+
+   Backported from master:
+   2024-07-30  Tobias Burnus  
+
+   * gfortran.dg/compiler-directive_2.f: Update dg-error.
+
 2024-05-28  Tobias Burnus  
 
* c-c++-common/gomp/lastprivate-conditional-1.c: Remove
diff --git a/gcc/testsuite/gfortran.dg/compiler-directive_2.f 
b/gcc/testsuite/gfortran.dg/compiler-directive_2.f
index 79169a896ae6..c31349f3348c 100644
--- a/gcc/testsuite/gfortran.dg/compiler-directive_2.f
+++ b/gcc/testsuite/gfortran.dg/compiler-directive_2.f
@@ -5,6 +5,6 @@
 ! Check for calling convention consitency
 ! in procedure-pointer assignments.
 !
-  subroutine test() ! { dg-error "fastcall and stdcall attributes are not 
compatible" }
+  subroutine test() ! { dg-error "stdcall and fastcall attributes are not 
compatible" }
 cGCC$ attributes stdcall, fastcall::test
   end subroutine test


[gcc r15-2402] Fix warnings for tree formats in gfc_error

2024-07-30 Thread Paul-Antoine Arras via Gcc-cvs
https://gcc.gnu.org/g:0450a143d2d132a8b3e6cff896f69e191c3316e2

commit r15-2402-g0450a143d2d132a8b3e6cff896f69e191c3316e2
Author: Paul-Antoine Arras 
Date:   Fri Jun 28 14:14:38 2024 +0200

Fix warnings for tree formats in gfc_error

This enables proper warnings for formats like %qD.

gcc/c-family/ChangeLog:

* c-format.cc (gcc_gfc_char_table): Add formats for tree objects.

Diff:
---
 gcc/c-family/c-format.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc
index 07b91a1c7a1d..7614f1e97ead 100644
--- a/gcc/c-family/c-format.cc
+++ b/gcc/c-family/c-format.cc
@@ -850,6 +850,10 @@ static const format_char_info gcc_gfc_char_table[] =
   /* This will require a "locus" at runtime.  */
   { "L",   0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN  }, "", "R", NULL },
 
+  /* These will require a "tree" at runtime.  */
+  { "DFTV", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, 
 BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "'",   NULL },
+  { "E",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
+
   /* These will require nothing.  */
   { "<>",0, STD_C89, NOARGUMENTS, "",  "",   NULL },
   { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }


[gcc] Created branch 'mikael/heads/inline_minmaxloc_without_dim_v06' in namespace 'refs/users'

2024-07-30 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/inline_minmaxloc_without_dim_v06' was created in 
namespace 'refs/users' pointing to:

 b2a5e99f9fb7... fortran: Continue MINLOC/MAXLOC second loop where the first


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:c1eb87cb9470518cf499765fd72c8810f943c239

commit c1eb87cb9470518cf499765fd72c8810f943c239
Author: Mikael Morin 
Date:   Thu Jul 25 18:04:13 2024 +0200

fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608]

Add the tests covering the various cases for which we are about to implement
inline expansion of MINLOC and MAXLOC.  Those are cases where the DIM
argument is not present.

PR fortran/90608

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_7.f90: New test.
* gfortran.dg/maxloc_with_mask_1.f90: New test.
* gfortran.dg/minloc_8.f90: New test.
* gfortran.dg/minloc_with_mask_1.f90: New test.

Diff:
---
 gcc/testsuite/gfortran.dg/maxloc_7.f90   | 220 +
 gcc/testsuite/gfortran.dg/maxloc_with_mask_1.f90 | 393 +++
 gcc/testsuite/gfortran.dg/minloc_8.f90   | 220 +
 gcc/testsuite/gfortran.dg/minloc_with_mask_1.f90 | 392 ++
 4 files changed, 1225 insertions(+)

diff --git a/gcc/testsuite/gfortran.dg/maxloc_7.f90 
b/gcc/testsuite/gfortran.dg/maxloc_7.f90
new file mode 100644
index ..a875083052a9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/maxloc_7.f90
@@ -0,0 +1,220 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline maxloc implementation,
+! when there is no optional argument.
+
+program p
+  implicit none
+  integer, parameter :: data5(*) = (/ 1, 7, 2, 7, 0 /)
+  integer, parameter :: data64(*) = (/ 2, 5, 4, 6, 0, 9, 3, 5,  &
+   4, 4, 1, 7, 3, 2, 1, 2,  &
+   5, 4, 6, 0, 9, 3, 5, 4,  &
+   4, 1, 7, 3, 2, 1, 2, 5,  &
+   4, 6, 0, 9, 3, 5, 4, 4,  &
+   1, 7, 3, 2, 1, 2, 5, 4,  &
+   6, 0, 9, 3, 5, 4, 4, 1,  &
+   7, 3, 2, 1, 2, 5, 4, 6  /)
+  call check_int_const_shape_rank_1
+  call check_int_const_shape_rank_3
+  call check_int_const_shape_empty_4
+  call check_int_alloc_rank_1
+  call check_int_alloc_rank_3
+  call check_int_alloc_empty_4
+  call check_real_const_shape_rank_1
+  call check_real_const_shape_rank_3
+  call check_real_const_shape_empty_4
+  call check_real_alloc_rank_1
+  call check_real_alloc_rank_3
+  call check_real_alloc_empty_4
+  call check_int_lower_bounds
+  call check_real_lower_bounds
+  call check_all_nans
+  call check_dependencies
+contains
+  subroutine check_int_const_shape_rank_1()
+integer :: a(5)
+integer, allocatable :: m(:)
+a = data5
+m = maxloc(a)
+if (size(m, dim=1) /= 1) stop 11
+if (any(m /= (/ 2 /))) stop 12
+  end subroutine
+  subroutine check_int_const_shape_rank_3()
+integer :: a(4,4,4)
+integer, allocatable :: m(:)
+a = reshape(data64, shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 3) stop 21
+if (any(m /= (/ 2, 2, 1 /))) stop 22
+  end subroutine
+  subroutine check_int_const_shape_empty_4()
+integer :: a(9,3,0,7)
+integer, allocatable :: m(:)
+a = reshape((/ integer:: /), shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 4) stop 31
+if (any(m /= (/ 0, 0, 0, 0 /))) stop 32
+  end subroutine
+  subroutine check_int_alloc_rank_1()
+integer, allocatable :: a(:)
+integer, allocatable :: m(:)
+allocate(a(5))
+a(:) = data5
+m = maxloc(a)
+if (size(m, dim=1) /= 1) stop 41
+if (any(m /= (/ 2 /))) stop 42
+  end subroutine
+  subroutine check_int_alloc_rank_3()
+integer, allocatable :: a(:,:,:)
+integer, allocatable :: m(:)
+allocate(a(4,4,4))
+a(:,:,:) = reshape(data64, shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 3) stop 51
+if (any(m /= (/ 2, 2, 1 /))) stop 52
+  end subroutine
+  subroutine check_int_alloc_empty_4()
+integer, allocatable :: a(:,:,:,:)
+integer, allocatable :: m(:)
+allocate(a(9,3,0,7))
+a(:,:,:,:) = reshape((/ integer:: /), shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 4) stop 61
+if (any(m /= (/ 0, 0, 0, 0 /))) stop 62
+  end subroutine
+  subroutine check_real_const_shape_rank_1()
+real :: a(5)
+integer, allocatable :: m(:)
+a = (/ real:: data5 /)
+m = maxloc(a)
+if (size(m, dim=1) /= 1) stop 71
+if (any(m /= (/ 2 /))) stop 72
+  end subroutine
+  subroutine check_real_const_shape_rank_3()
+real :: a(4,4,4)
+integer, allocatable :: m(:)
+a = reshape((/ real:: data64 /), shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 3) stop 81
+if (any(m /= (/ 2, 2, 1 /))) stop 82
+  end subroutine
+  subroutine check_real_const_shape_empty_4()
+real :: a(9,3,0,7)
+integer, allocatable :: m(:)
+a = reshape((/ real:: /), shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 4) stop 91
+if (any(m /= (/ 0, 0, 0, 0 /))) stop 92
+  end subroutine

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Disable frontend passes for MINLOC/MAXLOC if it's inlined

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:0c6f911ae6433ab0c3cc668083bd45aee52723bf

commit 0c6f911ae6433ab0c3cc668083bd45aee52723bf
Author: Mikael Morin 
Date:   Wed Nov 15 10:23:32 2023 +0100

fortran: Disable frontend passes for MINLOC/MAXLOC if it's inlined

Disable rewriting of MINLOC/MAXLOC expressions for which inline code
generation is supported.  Update the gfc_inline_intrinsic_function_p
predicate (already existing) for that, with the current state of
MINLOC/MAXLOC inlining support, that is only the cases of a scalar
result and non-CHARACTER argument for now.

This change has no effect currently, as the MINLOC/MAXLOC front-end passes
only change expressions of rank 1, but the inlining control predicate
gfc_inline_intrinsic_function_p returns false for those.  However, later
changes will extend MINLOC/MAXLOC inline expansion support to array
expressions and update the inlining control predicate, and this will become
effective.

gcc/fortran/ChangeLog:

* frontend-passes.cc (optimize_minmaxloc): Skip if we can generate
inline code for the unmodified expression.
* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Add
MINLOC and MAXLOC cases.

Diff:
---
 gcc/fortran/frontend-passes.cc |  3 ++-
 gcc/fortran/trans-intrinsic.cc | 23 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc
index 3c06018fdbbf..8e4c6310ba8d 100644
--- a/gcc/fortran/frontend-passes.cc
+++ b/gcc/fortran/frontend-passes.cc
@@ -2277,7 +2277,8 @@ optimize_minmaxloc (gfc_expr **e)
   || fn->value.function.actual == NULL
   || fn->value.function.actual->expr == NULL
   || fn->value.function.actual->expr->ts.type == BT_CHARACTER
-  || fn->value.function.actual->expr->rank != 1)
+  || fn->value.function.actual->expr->rank != 1
+  || gfc_inline_intrinsic_function_p (fn))
 return;
 
   *e = gfc_get_array_expr (fn->ts.type, fn->ts.kind, &fn->where);
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 9f3c3ce47bc5..cc0d00f4e399 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -11650,6 +11650,29 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
 case GFC_ISYM_TRANSPOSE:
   return true;
 
+case GFC_ISYM_MINLOC:
+case GFC_ISYM_MAXLOC:
+  {
+   /* Disable inline expansion if code size matters.  */
+   if (optimize_size)
+ return false;
+
+   gfc_actual_arglist *array_arg = expr->value.function.actual;
+   gfc_actual_arglist *dim_arg = array_arg->next;
+
+   gfc_expr *array = array_arg->expr;
+   gfc_expr *dim = dim_arg->expr;
+
+   if (!(array->ts.type == BT_INTEGER
+ || array->ts.type == BT_REAL))
+ return false;
+
+   if (array->rank == 1 && dim != nullptr)
+ return true;
+
+   return false;
+  }
+
 default:
   return false;
 }


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Support optional dummy as BACK argument of MINLOC/MAXLOC.

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:826cbd802b72c2a5d450493f86e7ee0106432282

commit 826cbd802b72c2a5d450493f86e7ee0106432282
Author: Mikael Morin 
Date:   Mon Jul 22 13:27:24 2024 +0200

fortran: Support optional dummy as BACK argument of MINLOC/MAXLOC.

Hello,

this fixes a null pointer dereference with absent optional dummy passed
as BACK argument of MINLOC/MAXLOC.

Tested for regression on x86_64-linux.
OK for master?

-- >8 --

Protect the evaluation of BACK with a check that the reference is non-null
in case the expression is an optional dummy, in the inline code generated
for MINLOC and MAXLOC.

This change contains a revert of the non-testsuite part of commit
r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3, which factored the
evaluation of BACK out of the loop using the scalarizer.  It was a bad idea,
because delegating the argument evaluation to the scalarizer makes it
cumbersome to add a null pointer check next to the evaluation.

Instead, evaluate BACK at the beginning, before scalarization, add a check
that the argument is present if necessary, and evaluate the resulting
expression to a variable, before using the variable in the inline code.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (maybe_absent_optional_variable): New function.
(gfc_conv_intrinsic_minmaxloc): Remove BACK from scalarization and
evaluate it before.  Add a check that BACK is not null if the
expression is an optional dummy.  Save the resulting expression to a
variable.  Use the variable in the generated inline code.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_6.f90: New test.
* gfortran.dg/minloc_7.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  81 ++--
 gcc/testsuite/gfortran.dg/maxloc_6.f90 | 366 +
 gcc/testsuite/gfortran.dg/minloc_7.f90 | 366 +
 3 files changed, 799 insertions(+), 14 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 180d0d7a88c6..9f3c3ce47bc5 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5209,6 +5209,50 @@ gfc_conv_intrinsic_dot_product (gfc_se * se, gfc_expr * 
expr)
 }
 
 
+/* Tells whether the expression E is a reference to an optional variable whose
+   presence is not known at compile time.  Those are variable references 
without
+   subreference; if there is a subreference, we can assume the variable is
+   present.  We have to special case full arrays, which we represent with a 
fake
+   "full" reference, and class descriptors for which a reference to data is not
+   really a subreference.  */
+
+bool
+maybe_absent_optional_variable (gfc_expr *e)
+{
+  if (!(e && e->expr_type == EXPR_VARIABLE))
+return false;
+
+  gfc_symbol *sym = e->symtree->n.sym;
+  if (!sym->attr.optional)
+return false;
+
+  gfc_ref *ref = e->ref;
+  if (ref == nullptr)
+return true;
+
+  if (ref->type == REF_ARRAY
+  && ref->u.ar.type == AR_FULL
+  && ref->next == nullptr)
+return true;
+
+  if (!(sym->ts.type == BT_CLASS
+   && ref->type == REF_COMPONENT
+   && ref->u.c.component == CLASS_DATA (sym)))
+return false;
+
+  gfc_ref *next_ref = ref->next;
+  if (next_ref == nullptr)
+return true;
+
+  if (next_ref->type == REF_ARRAY
+  && next_ref->u.ar.type == AR_FULL
+  && next_ref->next == nullptr)
+return true;
+
+  return false;
+}
+
+
 /* Remove unneeded kind= argument from actual argument list when the
result conversion is dealt with in a different place.  */
 
@@ -5321,11 +5365,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   tree nonempty;
   tree lab1, lab2;
   tree b_if, b_else;
+  tree back;
   gfc_loopinfo loop;
   gfc_actual_arglist *actual;
   gfc_ss *arrayss;
   gfc_ss *maskss;
-  gfc_ss *backss;
   gfc_se arrayse;
   gfc_se maskse;
   gfc_expr *arrayexpr;
@@ -5391,10 +5435,27 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 && maskexpr->symtree->n.sym->attr.dummy
 && maskexpr->symtree->n.sym->attr.optional;
   backexpr = actual->next->next->expr;
-  if (backexpr)
-backss = gfc_get_scalar_ss (gfc_ss_terminator, backexpr);
+
+  gfc_init_se (&backse, NULL);
+  if (backexpr == nullptr)
+back = logical_false_node;
+  else if (maybe_absent_optional_variable (backexpr))
+{
+  gcc_assert (backexpr->expr_type == EXPR_VARIABLE);
+
+  gfc_conv_expr (&backse, backexpr);
+  tree present = gfc_conv_expr_present (backexpr->symtree->n.sym, false);
+  back = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+ logical_type_node, present, backse.expr);
+}
   else
-backss = nullptr;
+{
+  gfc_conv_expr (&backse, backexpr);
+  back = backse.expr;
+

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:243591a6c8b99b1a337fd79643a27a9b287a2fed

commit 243591a6c8b99b1a337fd79643a27a9b287a2fed
Author: Mikael Morin 
Date:   Tue Jul 9 21:05:40 2024 +0200

fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608]

Enable inline code generation for the MINLOC and MAXLOC intrinsic, if the
DIM argument is not present and ARRAY has rank 1.  This case is similar to
the case where the result is scalar (DIM present and rank 1 ARRAY), which
already supports inline expansion of the intrinsic.  Both cases return
the same value, with the difference that the result is an array of size 1 if
DIM is absent, whereas it's a scalar if DIM  is present.  So all there is
to do for this case to work is hook the inline expansion with the
scalarizer.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Set the scalarization
rank based on the MINLOC/MAXLOC rank if needed.  Call the inline
code generation and setup the scalarizer array descriptor info
in the MINLOC and MAXLOC cases.
* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Return the
result array element if the scalarizer is setup and we are inside
the loop.  Restrict library function call dispatch to the case
where inline expansion is not supported.  Declare an array result
if the expression isn't scalar.  Initialize the array result single
element and return the result variable if the expression isn't
scalar.
(walk_inline_intrinsic_minmaxloc): New function.
(walk_inline_intrinsic_function): Add MINLOC and MAXLOC cases,
dispatching to walk_inline_intrinsic_minmaxloc.
(gfc_add_intrinsic_ss_code): Add MINLOC and MAXLOC cases.
(gfc_inline_intrinsic_function_p): Return true if ARRAY has rank 1,
regardless of DIM.

Diff:
---
 gcc/fortran/trans-array.cc |  25 ++
 gcc/fortran/trans-intrinsic.cc | 196 +++--
 2 files changed, 155 insertions(+), 66 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index c93a5f1e7543..0c78e1fecd8f 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4771,6 +4771,8 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop)
case GFC_ISYM_UBOUND:
case GFC_ISYM_LCOBOUND:
case GFC_ISYM_UCOBOUND:
+   case GFC_ISYM_MAXLOC:
+   case GFC_ISYM_MINLOC:
case GFC_ISYM_SHAPE:
case GFC_ISYM_THIS_IMAGE:
  loop->dimen = ss->dimen;
@@ -4820,6 +4822,29 @@ done:
case GFC_SS_INTRINSIC:
  switch (expr->value.function.isym->id)
{
+   case GFC_ISYM_MINLOC:
+   case GFC_ISYM_MAXLOC:
+ {
+   gfc_se se;
+   gfc_init_se (&se, nullptr);
+   se.loop = loop;
+   se.ss = ss;
+   gfc_conv_intrinsic_function (&se, expr);
+   gfc_add_block_to_block (&outer_loop->pre, &se.pre);
+   gfc_add_block_to_block (&outer_loop->post, &se.post);
+
+   info->descriptor = se.expr;
+
+   info->data = gfc_conv_array_data (info->descriptor);
+   info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
+
+   info->offset = gfc_index_zero_node;
+   info->start[0] = gfc_index_zero_node;
+   info->end[0] = gfc_index_zero_node;
+   info->stride[0] = gfc_index_one_node;
+   continue;
+ }
+
/* Fall through to supply start and stride.  */
case GFC_ISYM_LBOUND:
case GFC_ISYM_UBOUND:
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index cc0d00f4e399..7b7d0102b86a 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5273,66 +5273,69 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
we need to handle.  For performance reasons we sometimes create two
loops instead of one, where the second one is much simpler.
Examples for minloc intrinsic:
-   1) Result is an array, a call is generated
-   2) Array mask is used and NaNs need to be supported:
-  limit = Infinity;
-  pos = 0;
-  S = from;
-  while (S <= to) {
-   if (mask[S]) {
- if (pos == 0) pos = S + (1 - from);
- if (a[S] <= limit) { limit = a[S]; pos = S + (1 - from); goto lab1; }
-   }
-   S++;
-  }
-  goto lab2;
-  lab1:;
-  while (S <= to) {
-   if (mask[S]) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
-   S++;
-  }
-  lab2:;
-   3) NaNs need to be supported, but it is known at compile time or cheaply
-  at runtime whether array is nonempty or not:
-  limit = Infinity;
-  pos = 0;
- 

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Outline array bound check generation code

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:0090c54560c24fead1245245626fe7afe6339373

commit 0090c54560c24fead1245245626fe7afe6339373
Author: Mikael Morin 
Date:   Wed Apr 10 21:18:03 2024 +0200

fortran: Outline array bound check generation code

The next patch will need reindenting of the array bound check generation
code.  This outlines it to its own function beforehand, reducing the churn
in the next patch.

-- >8 --

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Move array bound check
generation code...
(add_check_section_in_array_bounds): ... here as a new function.

Diff:
---
 gcc/fortran/trans-array.cc | 297 ++---
 1 file changed, 143 insertions(+), 154 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 0c78e1fecd8f..99a603a3afb2 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4736,6 +4736,146 @@ gfc_conv_section_startstride (stmtblock_t * block, 
gfc_ss * ss, int dim)
 }
 
 
+/* Generate in INNER the bounds checking code along the dimension DIM for
+   the array associated with SS_INFO.  */
+
+static void
+add_check_section_in_array_bounds (stmtblock_t *inner, gfc_ss_info *ss_info,
+  int dim)
+{
+  gfc_expr *expr = ss_info->expr;
+  locus *expr_loc = &expr->where;
+  const char *expr_name = expr->symtree->name;
+
+  gfc_array_info *info = &ss_info->data.array;
+
+  bool check_upper;
+  if (dim == info->ref->u.ar.dimen - 1
+  && info->ref->u.ar.as->type == AS_ASSUMED_SIZE)
+check_upper = false;
+  else
+check_upper = true;
+
+  /* Zero stride is not allowed.  */
+  tree tmp = fold_build2_loc (input_location, EQ_EXPR, logical_type_node,
+ info->stride[dim], gfc_index_zero_node);
+  char * msg = xasprintf ("Zero stride is not allowed, for dimension %d "
+ "of array '%s'", dim + 1, expr_name);
+  gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg);
+  free (msg);
+
+  tree desc = info->descriptor;
+
+  /* This is the run-time equivalent of resolve.cc's
+ check_dimension.  The logical is more readable there
+ than it is here, with all the trees.  */
+  tree lbound = gfc_conv_array_lbound (desc, dim);
+  tree end = info->end[dim];
+  tree ubound = check_upper ? gfc_conv_array_ubound (desc, dim) : NULL_TREE;
+
+  /* non_zerosized is true when the selected range is not
+ empty.  */
+  tree stride_pos = fold_build2_loc (input_location, GT_EXPR, 
logical_type_node,
+info->stride[dim], gfc_index_zero_node);
+  tmp = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
+info->start[dim], end);
+  stride_pos = fold_build2_loc (input_location, TRUTH_AND_EXPR,
+   logical_type_node, stride_pos, tmp);
+
+  tree stride_neg = fold_build2_loc (input_location, LT_EXPR, 
logical_type_node,
+info->stride[dim], gfc_index_zero_node);
+  tmp = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+info->start[dim], end);
+  stride_neg = fold_build2_loc (input_location, TRUTH_AND_EXPR,
+   logical_type_node, stride_neg, tmp);
+  tree non_zerosized = fold_build2_loc (input_location, TRUTH_OR_EXPR,
+   logical_type_node, stride_pos,
+   stride_neg);
+
+  /* Check the start of the range against the lower and upper
+ bounds of the array, if the range is not empty.
+ If upper bound is present, include both bounds in the
+ error message.  */
+  if (check_upper)
+{
+  tmp = fold_build2_loc (input_location, LT_EXPR, logical_type_node,
+info->start[dim], lbound);
+  tmp = fold_build2_loc (input_location, TRUTH_AND_EXPR, logical_type_node,
+non_zerosized, tmp);
+  tree tmp2 = fold_build2_loc (input_location, GT_EXPR, logical_type_node,
+  info->start[dim], ubound);
+  tmp2 = fold_build2_loc (input_location, TRUTH_AND_EXPR, 
logical_type_node,
+ non_zerosized, tmp2);
+  msg = xasprintf ("Index '%%ld' of dimension %d of array '%s' outside of "
+  "expected range (%%ld:%%ld)", dim + 1, expr_name);
+  gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg,
+ fold_convert (long_integer_type_node, info->start[dim]),
+ fold_convert (long_integer_type_node, lbound),
+ fold_convert (long_integer_type_node, ubound));
+  gfc_trans_runtime_check (true, false, tmp2, inner, expr_loc, msg,
+ fold_convert (long_integer_type_node, info->start[dim]),
+ fold_convert (long_integer_type_node, lbound),
+ fold_convert (long_integer_type_node, ubound));
+  free (

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:31aa4cd8489361a61cbf1f92327934bcc860a2f2

commit 31aa4cd8489361a61cbf1f92327934bcc860a2f2
Author: Mikael Morin 
Date:   Thu Nov 16 22:14:41 2023 +0100

fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]

Enable generation of inline code for the MINLOC and MAXLOC intrinsic,
if the ARRAY argument is of integral type and of any rank (only the rank 1
case was previously inlined), and neither DIM nor MASK arguments are
present.

This needs a few adjustments in gfc_conv_intrinsic_minmaxloc,
mainly to replace the single variables POS and OFFSET, with collections
of variables, one variable per dimension each.

The restriction to integral ARRAY and absent MASK limits the scope of
the change to the cases where we generate single loop inline code.  The
code generation for the second loop is only accessible with ARRAY of rank
1, so it can continue using a single variable.  A later change will extend
inlining to the double loop cases.

There is some bounds checking code that was previously handled by the
library, and that needed some changes in the scalarizer to avoid regressing.
The bounds check code generation was already by the scalarizer, but it was
only applying to array reference sections, checking both individual array
bounds and shape conformability between all the array involved.  For MINLOC
or MAXLOC, enable the conformability check between all the scalarized
arrays, and disable the check that the array reference is within its bounds.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Set the MINLOC/MAXLOC
result upper bound using the ARRAY argument rank.  Ajdust the error
message for intrinsic result arrays.  Only check array bounds for
array references.  Move bound check decision code...
(bounds_check_needed): ... here as a new predicate.  Allow bound
check for MINLOC/MAXLOC intrinsic results.
* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Change the
result array upper bound to the rank of ARRAY.  Update the NONEMPTY
variable to depend on the non-empty extent of every dimension.  Use
one variable per dimension instead of a single variable for the
position and the offset.  Update their declaration, initialization,
and update to affect the variable of each dimension.  Use the first
variable only in areas only accessed with rank 1 ARRAY argument.
Set every element of the result using its corresponding variable.
(gfc_inline_intrinsic_function_p): Return true for integral ARRAY
and absent DIM and MASK.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_4.f90: Additionally accept the error
message emitted by the scalarizer.

Diff:
---
 gcc/fortran/trans-array.cc|  70 +---
 gcc/fortran/trans-intrinsic.cc| 148 +++---
 gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 |   4 +-
 3 files changed, 165 insertions(+), 57 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 99a603a3afb2..c9d63d13509d 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4876,6 +4876,35 @@ add_check_section_in_array_bounds (stmtblock_t *inner, 
gfc_ss_info *ss_info,
 }
 
 
+/* Tells whether we need to generate bounds checking code for the array
+   associated with SS.  */
+
+bool
+bounds_check_needed (gfc_ss *ss)
+{
+  /* Catch allocatable lhs in f2003.  */
+  if (flag_realloc_lhs && ss->no_bounds_check)
+return false;
+
+  gfc_ss_info *ss_info = ss->info;
+  if (ss_info->type == GFC_SS_SECTION)
+return true;
+
+  if (!(ss_info->type == GFC_SS_INTRINSIC
+   && ss_info->expr
+   && ss_info->expr->expr_type == EXPR_FUNCTION))
+return false;
+
+  gfc_intrinsic_sym *isym = ss_info->expr->value.function.isym;
+  if (!(isym
+   && (isym->id == GFC_ISYM_MAXLOC
+   || isym->id == GFC_ISYM_MINLOC)))
+return false;
+
+  return gfc_inline_intrinsic_function_p (ss_info->expr);
+}
+
+
 /* Calculates the range start and stride for a SS chain.  Also gets the
descriptor and data pointer.  The range of vector subscripts is the size
of the vector.  Array bounds are also checked.  */
@@ -4977,10 +5006,19 @@ done:
info->data = gfc_conv_array_data (info->descriptor);
info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
 
-   info->offset = gfc_index_zero_node;
+   gfc_expr *array = expr->value.function.actual->expr;
+   tree rank = build_int_cst (gfc_array_index_type, array->rank);
+
+   tree tmp = fold_build2_loc (input_location, MINUS_EXPR,
+

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:26fc4fb4228dc6584ee9153498cc85a16a5ec822

commit 26fc4fb4228dc6584ee9153498cc85a16a5ec822
Author: Mikael Morin 
Date:   Fri Nov 17 15:40:55 2023 +0100

fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608]

Enable the generation of inline code for MINLOC/MAXLOC when argument ARRAY
is of integral type, DIM is not present, and MASK is present and is scalar
(only absent MASK or rank 1 ARRAY were inlined before).

Scalar masks are implemented with a wrapping condition around the code one
would generate if MASK wasn't present, so they are easy to support once
inline code without MASK is working.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate
variable initialization for each dimension in the else branch of
the toplevel condition.
(gfc_inline_intrinsic_function_p): Return TRUE for scalar MASK.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_7.f90: Additionally accept the error 
message
reported by the scalarizer.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 13 -
 gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 |  4 ++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index ac8bd2d48123..855208717973 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5886,7 +5886,6 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   /* For a scalar mask, enclose the loop in an if statement.  */
   if (maskexpr && maskss == NULL)
 {
-  gcc_assert (loop.dimen == 1);
   tree ifmask;
 
   gfc_init_se (&maskse, NULL);
@@ -5901,7 +5900,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 the pos variable the same way as above.  */
 
   gfc_init_block (&elseblock);
-  gfc_add_modify (&elseblock, pos[0], gfc_index_zero_node);
+  for (int i = 0; i < loop.dimen; i++)
+   gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node);
   elsetmp = gfc_finish_block (&elseblock);
   ifmask = conv_mask_condition (&maskse, maskexpr, optional_mask);
   tmp = build3_v (COND_EXPR, ifmask, tmp, elsetmp);
@@ -11795,9 +11795,12 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
if (array->rank == 1)
  return true;
 
-   if (array->ts.type == BT_INTEGER
-   && dim == nullptr
-   && mask == nullptr)
+   if (array->ts.type != BT_INTEGER
+   || dim != nullptr)
+ return false;
+
+   if (mask == nullptr
+   || mask->rank == 0)
  return true;
 
return false;
diff --git a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 
b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
index 206a29b149da..3aa9d3dcebee 100644
--- a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
+++ b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
@@ -1,6 +1,6 @@
 ! { dg-do run }
 ! { dg-options "-fbounds-check" }
-! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, 
should be 2" }
+! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, 
should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." }
 module tst
 contains
   subroutine foo(res)
@@ -18,4 +18,4 @@ program main
   integer :: res(3)
   call foo(res)
 end program main
-! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic: is 3, should be 2" }
+! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of 
array 'res' .3/2." }


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:c8f1b21681c6d3e4f313044545d5426f50cb

commit c8f1b21681c6d3e4f313044545d5426f50cb
Author: Mikael Morin 
Date:   Fri Nov 17 16:47:26 2023 +0100

fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR90608]

Enable generation of inline MINLOC/MAXLOC code in the case where DIM
is not present, and either ARRAY is of floating point type or MASK is an
array.  Those cases are the remaining bits to fully support inlining of
non-CHARACTER MINLOC/MAXLOC without DIM.  They are treated together because
they generate similar code, the NANs for REAL types being handled a bit like
a second level of masking.  These are the cases for which we generate two
sets of loops.

This change affects the code generating the second loop, that was previously
accessible only in the cases ARRAY has rank rank 1.  The single variable
initialization and update are changed to apply to multiple variables, one
per dimension.

This change generates slightly worse code if ARRAY has rank 1.  Indeed
the code we used to generate was:

for (idx1 in lower..upper)
  {
...
if (...)
  {
...
break;
  }
  }
for (idx2 in idx1..upper)
  {
...
  }

which avoided starting the second loop from lower, skipping in the second
loop the elements already processed in the first one.  Unfortunately,
extending that code the obvious way to apply to rank > 1 leads to wrong
code:

for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in index11..upper1)
  {
for (idx22 in index12..upper2)
  {
...
  }
  }

That code is incorrect, as the loop over idx22, being nested, may be run
more than once, and the second run should restart from lower2, not index12.
So with this change, we generate instead as second set of loops:

...
second_loop:
for (idx21 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
  }
  }

which means the second set of loops processes again elements already
processed by the first one, and the rank 1 case becomes:

for (idx1 in lower..upper)
  {
...
if (...)
  {
...
break;
  }
  }
for (idx2 in lower..upper)
  {
...
  }

processing the first elements twice as well, which was not the case
before.  A later change will avoid the duplicate processing and restore
the generated code in the rank 1 case.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Initialize
and update all the variables.  Put the label and goto in the
outermost scalarizer loop.  Don't start the second loop where the
first stopped.
(gfc_inline_intrinsic_function_p): Also return TRUE for array MASK
or for any REAL type.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_5.f90: Additionally accept error
messages reported by the scalarizer.
* gfortran.dg/maxloc_bounds_6.f90: Ditto.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 127 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_5.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_6.f90 |   4 +-
 3 files changed, 87 insertions(+), 48 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 855208717973..bae3b49a9498 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5332,12 +5332,55 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
   if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
   S++;
 }
-   B: ARRAY has rank 1, and DIM is absent.  Use the same code as the scalar
-  case and wrap the result in an array.
-   C: ARRAY has rank > 1, NANs are not supported, and DIM and MASK are absent.
-  Generate code similar to the single loop scalar case, but using one
-  variable per dimension, for example if ARRAY has rank 2:
-  4) NAN's aren't supported, no MASK:
+   B: Array result, non-CHARACTER type, DIM

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Continue MINLOC/MAXLOC second loop where the first stopped [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:b2a5e99f9fb724b9838533c1eed5f4fc024be633

commit b2a5e99f9fb724b9838533c1eed5f4fc024be633
Author: Mikael Morin 
Date:   Thu Jul 25 12:27:09 2024 +0200

fortran: Continue MINLOC/MAXLOC second loop where the first stopped 
[PR90608]

Continue the second set of loops where the first one stopped in the
generated inline MINLOC/MAXLOC code in the cases where the generated code
contains two sets of loops.  This fixes a regression that was introduced
when enabling the generation of inline MINLOC/MAXLOC code with ARRAY of rank
greater than 1, non-scalar MASK and no DIM arguments.

In the cases where two sets of loops are generated as inline MINLOC/MAXLOC
code, we previously generated code such as (for rank 2 ARRAY, so with two
levels of nesting):

for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in lower1..upper1)
  {
for (idx22 in lower2..upper2)
  {
...
  }
  }

which means we process the first elements twice, once in the first set
of loops and once in the second one.  This change avoids this duplicate
processing by using a conditional as lower bound for the second set of
loops, generating code like:

second_loop_entry = false;
for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
second_loop_entry = true;
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in (second_loop_entry ? idx11 : lower1)..upper1)
  {
for (idx22 in (second_loop_entry ? idx12 : lower2)..upper2)
  {
...
second_loop_entry = false;
  }
  }

It was expected that the compiler optimizations would be able to remove the
state variable second_loop_entry.  It is the case if ARRAY has rank 1 (so
without loop nesting), the variable is removed and the loop bounds become
unconditional, which restores previously generated code, fully fixing the
regression.  For larger rank, unfortunately, the state variable and
conditional loop bounds remain, but those cases were previously using
library calls, so it's not a regression.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate a set
of index variables.  Set them using the loop indexes before leaving
the first set of loops.  Generate a new loop entry predicate.
Set it before leaving the first set of loops.  Clear it in the body
of the second set of loops.  For the second set of loops, update
each loop variable to use the corresponding index variable if the
predicate variable is set.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 33 +++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index bae3b49a9498..29367c69d16b 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5342,6 +5342,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 pos0 = 0;
 pos1 = 1
 S1 = from1;
+second_loop_entry = false;
 while (S1 <= to1) {
   S0 = from0;
   while (s0 <= to0 {
@@ -5354,6 +5355,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 limit = a[S1][S0];
 pos0 = S0 + (1 - from0);
 pos1 = S1 + (1 - from1);
+second_loop_entry = true;
 goto lab1;
   }
 }
@@ -5363,9 +5365,9 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 }
 goto lab2;
 lab1:;
-S1 = from1;
+S1 = second_loop_entry ? S1 : from1;
 while (S1 <= to1) {
-  S0 = from0;
+  S0 = second_loop_entry ? S0 : from0;
   while (S0 <= to0) {
 if (mask[S1][S0])
   if (a[S1][S0] < limit) {
@@ -5373,6 +5375,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 pos0 = S + (1 - from0);
 pos1 = S + (1 - from1);
   }
+second_loop_entry = false;
 S0++;
   }
   S1++;
@@ -5444,6 +5447,7 

[gcc r15-2403] testsuite: fix dg-do run whitespace

2024-07-30 Thread Sam James via Gcc-cvs
https://gcc.gnu.org/g:136f364e26d9ad4f05e0005e480813cdc8f56c96

commit r15-2403-g136f364e26d9ad4f05e0005e480813cdc8f56c96
Author: Sam James 
Date:   Tue Jul 30 11:08:31 2024 +0100

testsuite: fix dg-do run whitespace

This caused the tests to not be run. I may do further passes for non-run
next.

Tested on x86_64-pc-linux-gnu and checked test logs before/after.

PR c/53548
PR target/101529
PR tree-optimization/102359
* c-c++-common/fam-in-union-alone-in-struct-1.c: Fix whitespace in 
dg directive.
* c-c++-common/fam-in-union-alone-in-struct-2.c: Likewise.
* c-c++-common/torture/builtin-shufflevector-2.c: Likewise.
* g++.dg/pr102359_2.C: Likewise.
* g++.target/i386/mvc1.C: Likewise.

Diff:
---
 gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c  | 2 +-
 gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c  | 2 +-
 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c | 2 +-
 gcc/testsuite/g++.dg/pr102359_2.C| 2 +-
 gcc/testsuite/g++.target/i386/mvc1.C | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c 
b/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c
index 7d4721aa95ac..39ebf17850bf 100644
--- a/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c
+++ b/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c
@@ -1,6 +1,6 @@
 /* testing the correct usage of flexible array members in unions 
and alone in structures.  */
-/* { dg-do run} */
+/* { dg-do run } */
 /* { dg-options "-Wpedantic" } */
 
 union with_fam_1 {
diff --git a/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c 
b/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c
index 3743f9e7dac5..93f9d5128f6e 100644
--- a/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c
+++ b/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c
@@ -1,6 +1,6 @@
 /* testing the correct usage of flexible array members in unions 
and alone in structures: initialization  */
-/* { dg-do run} */
+/* { dg-do run } */
 /* { dg-options "-O2" } */
 
 union with_fam_1 {
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c 
b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
index b1ffc95e39ae..a84e0a626211 100644
--- a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
+++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
@@ -1,4 +1,4 @@
-/* { dg-do run}  */
+/* { dg-do run }  */
 /* PR target/101529 */
 typedef unsigned char C;
 typedef unsigned char __attribute__((__vector_size__ (8))) V;
diff --git a/gcc/testsuite/g++.dg/pr102359_2.C 
b/gcc/testsuite/g++.dg/pr102359_2.C
index d026d727dd5c..1b3f6147dec1 100644
--- a/gcc/testsuite/g++.dg/pr102359_2.C
+++ b/gcc/testsuite/g++.dg/pr102359_2.C
@@ -1,6 +1,6 @@
 /* PR middle-end/102359 ICE gimplification failed since
r12-3433-ga25e0b5e6ac8a77a.  */
-/* { dg-do run} */
+/* { dg-do run } */
 /* { dg-options "-ftrivial-auto-var-init=zero" } */
 /* { dg-require-effective-target c++17 } */
 
diff --git a/gcc/testsuite/g++.target/i386/mvc1.C 
b/gcc/testsuite/g++.target/i386/mvc1.C
index b307d01ace63..348bd0ec7202 100644
--- a/gcc/testsuite/g++.target/i386/mvc1.C
+++ b/gcc/testsuite/g++.target/i386/mvc1.C
@@ -1,4 +1,4 @@
-/* { dg-do run} */
+/* { dg-do run } */
 /* { dg-require-ifunc "" } */
 
 __attribute__((target_clones("avx","arch=slm","arch=core-avx2","default")))


[gcc r15-2404] c++: make source_location follow DECL_RAMP_FN

2024-07-30 Thread Arsen Arsenović via Gcc-cvs
https://gcc.gnu.org/g:265aa32062167a5b299c2ffb616edce5997b64bf

commit r15-2404-g265aa32062167a5b299c2ffb616edce5997b64bf
Author: Arsen Arsenović 
Date:   Thu Jul 25 16:13:24 2024 +0200

c++: make source_location follow DECL_RAMP_FN

This fixes the value of current_function in compiler generated coroutine
code.

PR c++/110855 - std::source_location doesn't work with C++20 coroutine

gcc/cp/ChangeLog:

PR c++/110855
* cp-gimplify.cc (fold_builtin_source_location): Use the name of
the DECL_RAMP_FN of the current function if present.

gcc/testsuite/ChangeLog:

PR c++/110855
* g++.dg/coroutines/pr110855.C: New test.

Diff:
---
 gcc/cp/cp-gimplify.cc  |  9 -
 gcc/testsuite/g++.dg/coroutines/pr110855.C | 61 ++
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index 6a5e4cf62ca1..b88c3b7f370b 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -3933,7 +3933,14 @@ fold_builtin_source_location (const_tree t)
  const char *name = "";
 
  if (current_function_decl)
-   name = cxx_printable_name (current_function_decl, 2);
+   {
+ /* If this is a coroutine, we should get the name of the user
+function rather than the actor we generate.  */
+ if (tree ramp = DECL_RAMP_FN (current_function_decl))
+   name = cxx_printable_name (ramp, 2);
+ else
+   name = cxx_printable_name (current_function_decl, 2);
+   }
 
  val = build_string_literal (name);
}
diff --git a/gcc/testsuite/g++.dg/coroutines/pr110855.C 
b/gcc/testsuite/g++.dg/coroutines/pr110855.C
new file mode 100644
index ..6b5c0147ec83
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr110855.C
@@ -0,0 +1,61 @@
+// { dg-do run }
+// { dg-output {^} }
+// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} }
+// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} }
+// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} }
+// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} }
+// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} }
+// { dg-output {$} }
+// https://gcc.gnu.org/PR110855
+#include 
+#include 
+
+struct ReturnObject {
+  struct promise_type {
+auto
+initial_suspend(const std::source_location location =
+std::source_location::current()) {
+  __builtin_puts (location.function_name ());
+  return std::suspend_never{};
+}
+auto
+final_suspend(const std::source_location location =
+  std::source_location::current()) noexcept {
+  __builtin_puts (location.function_name ());
+  return std::suspend_never{};
+}
+auto
+get_return_object(const std::source_location location =
+  std::source_location::current()) {
+  __builtin_puts (location.function_name ());
+  return 
ReturnObject{std::coroutine_handle::from_promise(*this)};
+}
+auto
+unhandled_exception() { }
+auto return_void(const std::source_location location =
+ std::source_location::current()) {
+  __builtin_puts (location.function_name ());
+}
+  };
+  std::coroutine_handle<> handle;
+};
+
+struct awaitable : std::suspend_never
+{
+  void await_resume(const std::source_location location =
+ std::source_location::current())
+  {
+  __builtin_puts (location.function_name ());
+  }
+};
+
+ReturnObject
+bar(int, char, bool) {
+  co_await awaitable{};
+  co_return;
+}
+
+int
+main() {
+  bar(1, 'a', false);
+}


[gcc r15-2405] SVE intrinsics: Add strength reduction for division by constant.

2024-07-30 Thread Kyrylo Tkachov via Gcc-cvs
https://gcc.gnu.org/g:7cde140863edea536c676096cbc3d84a6d1424e4

commit r15-2405-g7cde140863edea536c676096cbc3d84a6d1424e4
Author: Jennifer Schmitz 
Date:   Tue Jul 16 01:59:50 2024 -0700

SVE intrinsics: Add strength reduction for division by constant.

This patch folds SVE division where all divisor elements are the same
power of 2 to svasrd (signed) or svlsr (unsigned).
Tests were added to check
1) whether the transform is applied (existing test harness was amended), and
2) correctness using runtime tests for all input types of svdiv; for signed
and unsigned integers, several corner cases were covered.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/

* config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold):
Implement strength reduction.

gcc/testsuite/

* gcc.target/aarch64/sve/div_const_run.c: New test.
* gcc.target/aarch64/sve/acle/asm/div_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/div_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/div_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/div_u64.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc|  49 +++-
 .../gcc.target/aarch64/sve/acle/asm/div_s32.c  | 273 +++--
 .../gcc.target/aarch64/sve/acle/asm/div_s64.c  | 273 +++--
 .../gcc.target/aarch64/sve/acle/asm/div_u32.c  | 201 +--
 .../gcc.target/aarch64/sve/acle/asm/div_u64.c  | 201 +--
 .../gcc.target/aarch64/sve/div_const_run.c |  91 +++
 6 files changed, 1031 insertions(+), 57 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index a2268353ae31..d55bee0b72fa 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -746,6 +746,53 @@ public:
   }
 };
 
+class svdiv_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svdiv_impl ()
+: rtx_code_function (DIV, UDIV, UNSPEC_COND_FDIV) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+tree divisor = gimple_call_arg (f.call, 2);
+tree divisor_cst = uniform_integer_cst_p (divisor);
+
+if (!divisor_cst || !integer_pow2p (divisor_cst))
+  return NULL;
+
+tree new_divisor;
+gcall *call;
+
+if (f.type_suffix (0).unsigned_p && tree_to_uhwi (divisor_cst) != 1)
+  {
+   function_instance instance ("svlsr", functions::svlsr,
+   shapes::binary_uint_opt_n, MODE_n,
+   f.type_suffix_ids, GROUP_none, f.pred);
+   call = f.redirect_call (instance);
+   tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst;
+   new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
+  }
+else
+  {
+   if (tree_int_cst_sign_bit (divisor_cst)
+   || tree_to_shwi (divisor_cst) == 1)
+ return NULL;
+
+   function_instance instance ("svasrd", functions::svasrd,
+   shapes::shift_right_imm, MODE_n,
+   f.type_suffix_ids, GROUP_none, f.pred);
+   call = f.redirect_call (instance);
+   new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t],
+   tree_log2 (divisor_cst));
+  }
+
+gimple_call_set_arg (call, 2, new_divisor);
+return call;
+  }
+};
+
+
 class svdot_impl : public function_base
 {
 public:
@@ -3043,7 +3090,7 @@ FUNCTION (svcreate3, svcreate_impl, (3))
 FUNCTION (svcreate4, svcreate_impl, (4))
 FUNCTION (svcvt, svcvt_impl,)
 FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
-FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
+FUNCTION (svdiv, svdiv_impl,)
 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdot, svdot_impl,)
 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
index c49ca1aa5243..d5a23bf07262 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
@@ -2,6 +2,8 @@
 
 #include "test_sve_acle.h"
 
+#define MAXPOW 1<<30
+
 /*
 ** div_s32_m_tied1:
 ** sdivz0\.s, p0/m, z0\.s, z1\.s
@@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t,
 z0 = svdiv_n_s32_m (p0, z1, x0),
 z0 = svdiv_m (p0, z1, x0))
 
+/*
+** div_1_s32_m_tied1:
+** sel z0\.s, p0, z0\.s, z0\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t,
+   z0 = svdiv_n_s32_m (p0, z0, 1),
+   z0 = svdiv_m (p0, z0, 1

[gcc r15-2406] c++: fix ICE on FUNCTION_DECLs inside coroutines [PR115906]

2024-07-30 Thread Arsen Arsenović via Gcc-cvs
https://gcc.gnu.org/g:a362c9ca4ef6585e678f899705043a9aa10dd670

commit r15-2406-ga362c9ca4ef6585e678f899705043a9aa10dd670
Author: Arsen Arsenovic 
Date:   Tue Jul 30 13:42:56 2024 +0200

c++: fix ICE on FUNCTION_DECLs inside coroutines [PR115906]

When register_local_var_uses iterates a BIND_EXPRs BIND_EXPR_VARS, it
fails to account for the fact that FUNCTION_DECLs might be present, and
later passes it to DECL_HAS_VALUE_EXPR_P.  This leads to a tree check
failure in DECL_HAS_VALUE_EXPR_P:

  tree check: expected var_decl or parm_decl or result_decl, have
  function_decl in register_local_var_uses

We only care about PARM_DECL and VAR_DECL, so select only those.

PR c++/115906 - [coroutines] missing diagnostic and ICE when co_await used 
as default argument in function declaration

gcc/cp/ChangeLog:

PR c++/115906
* coroutines.cc (register_local_var_uses): Only process
PARM_DECL and VAR_DECLs.

gcc/testsuite/ChangeLog:

PR c++/115906
* g++.dg/coroutines/coro-function-decl.C: New test.

Diff:
---
 gcc/cp/coroutines.cc |  4 ++--
 gcc/testsuite/g++.dg/coroutines/coro-function-decl.C | 19 +++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 2b16b4814d10..127a1c06b56e 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -3927,8 +3927,8 @@ register_local_var_uses (tree *stmt, int *do_subtree, 
void *d)
  local_var.field_idx = local_var.field_id = NULL_TREE;
 
  /* Make sure that we only present vars to the tests below.  */
- if (TREE_CODE (lvar) == TYPE_DECL
- || TREE_CODE (lvar) == NAMESPACE_DECL)
+ if (TREE_CODE (lvar) != PARM_DECL
+ && TREE_CODE (lvar) != VAR_DECL)
continue;
 
  /* We don't move static vars into the frame. */
diff --git a/gcc/testsuite/g++.dg/coroutines/coro-function-decl.C 
b/gcc/testsuite/g++.dg/coroutines/coro-function-decl.C
new file mode 100644
index ..86140569a76e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/coro-function-decl.C
@@ -0,0 +1,19 @@
+#include 
+
+struct task
+{
+  struct promise_type
+  {
+std::suspend_always initial_suspend () { return {}; }
+std::suspend_always final_suspend () noexcept { return {}; }
+void unhandled_exception () {}
+task get_return_object () noexcept { return {}; }
+void return_void () {}
+  };
+};
+
+task foo ()
+{
+  void bar ();
+  co_return;
+}


[gcc r15-2407] c++: diagnose usage of co_await and co_yield in default args [PR115906]

2024-07-30 Thread Arsen Arsenović via Gcc-cvs
https://gcc.gnu.org/g:0c382da0943dc7d14455ba2ada2f620a25bd1366

commit r15-2407-g0c382da0943dc7d14455ba2ada2f620a25bd1366
Author: Arsen Arsenović 
Date:   Thu Jul 25 01:00:02 2024 +0200

c++: diagnose usage of co_await and co_yield in default args [PR115906]

This is a partial fix for PR115906.  Per [expr.await] 2s3, "An
await-expression shall not appear in a default argument
([dcl.fct.default])".  This patch introduces the diagnostic in that
case, and in the case of a co_yield (as co_yield is defined in terms of
co_await, so prerequisites of co_await hold).

PR c++/115906 - [coroutines] missing diagnostic and ICE when co_await used 
as default argument in function declaration

gcc/cp/ChangeLog:

PR c++/115906
* parser.cc (cp_parser_unary_expression): Reject await
expressions if use of local variables is currently forbidden.
(cp_parser_yield_expression): Reject yield expressions if use of
local variables is currently forbidden.

gcc/testsuite/ChangeLog:

PR c++/115906
* g++.dg/coroutines/pr115906-yield.C: New test.
* g++.dg/coroutines/pr115906.C: New test.
* g++.dg/coroutines/co-await-syntax-02-outside-fn.C: Don't rely
on default arguments.
* g++.dg/coroutines/co-yield-syntax-01-outside-fn.C: Ditto.

Diff:
---
 gcc/cp/parser.cc   | 17 
 .../coroutines/co-await-syntax-02-outside-fn.C |  2 +-
 .../coroutines/co-yield-syntax-01-outside-fn.C |  3 +-
 gcc/testsuite/g++.dg/coroutines/pr115906-yield.C   | 29 
 gcc/testsuite/g++.dg/coroutines/pr115906.C | 32 ++
 5 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index e46cdfd20e19..eb102dea8299 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -9242,6 +9242,14 @@ cp_parser_unary_expression (cp_parser *parser, 
cp_id_kind * pidk,
if (expr == error_mark_node)
  return error_mark_node;
 
+   /* ... but, we cannot use co_await in default arguments.  */
+   if (parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN)
+ {
+   error_at (kw_loc,
+ "% cannot be used in default arguments");
+   return error_mark_node;
+ }
+
/* Handle [expr.await].  */
return cp_expr (finish_co_await_expr (kw_loc, expr));
  }
@@ -29651,6 +29659,15 @@ cp_parser_yield_expression (cp_parser* parser)
   else
 expr = cp_parser_assignment_expression (parser);
 
+  /* Similar to co_await, we cannot use co_yield in default arguments (as
+ co_awaits underlie co_yield).  */
+  if (parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN)
+{
+  error_at (kw_loc,
+   "% cannot be used in default arguments");
+  return error_mark_node;
+}
+
   if (expr == error_mark_node)
 return expr;
 
diff --git a/gcc/testsuite/g++.dg/coroutines/co-await-syntax-02-outside-fn.C 
b/gcc/testsuite/g++.dg/coroutines/co-await-syntax-02-outside-fn.C
index 4ce5c2e04a0a..132128f27192 100644
--- a/gcc/testsuite/g++.dg/coroutines/co-await-syntax-02-outside-fn.C
+++ b/gcc/testsuite/g++.dg/coroutines/co-await-syntax-02-outside-fn.C
@@ -2,4 +2,4 @@
 
 #include "coro.h"
 
-auto f (int x = co_await coro::suspend_always{}); // { dg-error {'co_await' 
cannot be used outside a function} }
+auto x = co_await coro::suspend_always{}; // { dg-error {'co_await' cannot be 
used outside a function} }
diff --git a/gcc/testsuite/g++.dg/coroutines/co-yield-syntax-01-outside-fn.C 
b/gcc/testsuite/g++.dg/coroutines/co-yield-syntax-01-outside-fn.C
index 30db0e963b09..51c304625278 100644
--- a/gcc/testsuite/g++.dg/coroutines/co-yield-syntax-01-outside-fn.C
+++ b/gcc/testsuite/g++.dg/coroutines/co-yield-syntax-01-outside-fn.C
@@ -2,5 +2,4 @@
 
 #include "coro.h"
 
-auto f (int x = co_yield 5); // { dg-error {'co_yield' cannot be used outside 
a function} }
-
+auto x = co_yield 5; // { dg-error {'co_yield' cannot be used outside a 
function} }
diff --git a/gcc/testsuite/g++.dg/coroutines/pr115906-yield.C 
b/gcc/testsuite/g++.dg/coroutines/pr115906-yield.C
new file mode 100644
index ..f8b6ded5001c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr115906-yield.C
@@ -0,0 +1,29 @@
+#include 
+
+struct Promise;
+
+struct Handle : std::coroutine_handle {
+using promise_type = Promise;
+};
+
+struct Promise {
+Handle get_return_object() noexcept {
+return {Handle::from_promise(*this)};
+}
+std::suspend_never initial_suspend() const noexcept { return {}; }
+std::suspend_never final_suspend() const noexcept { return {}; }
+void return_void() const noexcept {}
+void unhandled_exception() const noexcept {}
+std::suspend_never yield_value(int) { return {}; }
+};
+
+Handle Coro() {
+ 

[gcc] Deleted branch 'mikael/heads/inline_minmaxloc_without_dim_v06' in namespace 'refs/users'

2024-07-30 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/inline_minmaxloc_without_dim_v06' in namespace 
'refs/users' was deleted.
It previously pointed to:

 b2a5e99f9fb7... fortran: Continue MINLOC/MAXLOC second loop where the first

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  b2a5e99... fortran: Continue MINLOC/MAXLOC second loop where the first
  c8f... fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR9060
  26fc4fb... fortran: Inline integral MINLOC/MAXLOC with no DIM and scal
  31aa4cd... fortran: Inline integral MINLOC/MAXLOC with no DIM and no M
  0090c54... fortran: Outline array bound check generation code
  243591a... fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank
  0c6f911... fortran: Disable frontend passes for MINLOC/MAXLOC if it's 
  c1eb87c... fortran: Add tests covering inline MINLOC/MAXLOC without DI
  826cbd8... fortran: Support optional dummy as BACK argument of MINLOC/


[gcc] Created branch 'mikael/heads/inline_minmaxloc_without_dim_v06' in namespace 'refs/users'

2024-07-30 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/inline_minmaxloc_without_dim_v06' was created in 
namespace 'refs/users' pointing to:

 215d87c9e87f... fortran: Continue MINLOC/MAXLOC second loop where the first


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Support optional dummy as BACK argument of MINLOC/MAXLOC.

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:826cbd802b72c2a5d450493f86e7ee0106432282

commit 826cbd802b72c2a5d450493f86e7ee0106432282
Author: Mikael Morin 
Date:   Mon Jul 22 13:27:24 2024 +0200

fortran: Support optional dummy as BACK argument of MINLOC/MAXLOC.

Hello,

this fixes a null pointer dereference with absent optional dummy passed
as BACK argument of MINLOC/MAXLOC.

Tested for regression on x86_64-linux.
OK for master?

-- >8 --

Protect the evaluation of BACK with a check that the reference is non-null
in case the expression is an optional dummy, in the inline code generated
for MINLOC and MAXLOC.

This change contains a revert of the non-testsuite part of commit
r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3, which factored the
evaluation of BACK out of the loop using the scalarizer.  It was a bad idea,
because delegating the argument evaluation to the scalarizer makes it
cumbersome to add a null pointer check next to the evaluation.

Instead, evaluate BACK at the beginning, before scalarization, add a check
that the argument is present if necessary, and evaluate the resulting
expression to a variable, before using the variable in the inline code.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (maybe_absent_optional_variable): New function.
(gfc_conv_intrinsic_minmaxloc): Remove BACK from scalarization and
evaluate it before.  Add a check that BACK is not null if the
expression is an optional dummy.  Save the resulting expression to a
variable.  Use the variable in the generated inline code.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_6.f90: New test.
* gfortran.dg/minloc_7.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  81 ++--
 gcc/testsuite/gfortran.dg/maxloc_6.f90 | 366 +
 gcc/testsuite/gfortran.dg/minloc_7.f90 | 366 +
 3 files changed, 799 insertions(+), 14 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 180d0d7a88c6..9f3c3ce47bc5 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5209,6 +5209,50 @@ gfc_conv_intrinsic_dot_product (gfc_se * se, gfc_expr * 
expr)
 }
 
 
+/* Tells whether the expression E is a reference to an optional variable whose
+   presence is not known at compile time.  Those are variable references 
without
+   subreference; if there is a subreference, we can assume the variable is
+   present.  We have to special case full arrays, which we represent with a 
fake
+   "full" reference, and class descriptors for which a reference to data is not
+   really a subreference.  */
+
+bool
+maybe_absent_optional_variable (gfc_expr *e)
+{
+  if (!(e && e->expr_type == EXPR_VARIABLE))
+return false;
+
+  gfc_symbol *sym = e->symtree->n.sym;
+  if (!sym->attr.optional)
+return false;
+
+  gfc_ref *ref = e->ref;
+  if (ref == nullptr)
+return true;
+
+  if (ref->type == REF_ARRAY
+  && ref->u.ar.type == AR_FULL
+  && ref->next == nullptr)
+return true;
+
+  if (!(sym->ts.type == BT_CLASS
+   && ref->type == REF_COMPONENT
+   && ref->u.c.component == CLASS_DATA (sym)))
+return false;
+
+  gfc_ref *next_ref = ref->next;
+  if (next_ref == nullptr)
+return true;
+
+  if (next_ref->type == REF_ARRAY
+  && next_ref->u.ar.type == AR_FULL
+  && next_ref->next == nullptr)
+return true;
+
+  return false;
+}
+
+
 /* Remove unneeded kind= argument from actual argument list when the
result conversion is dealt with in a different place.  */
 
@@ -5321,11 +5365,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   tree nonempty;
   tree lab1, lab2;
   tree b_if, b_else;
+  tree back;
   gfc_loopinfo loop;
   gfc_actual_arglist *actual;
   gfc_ss *arrayss;
   gfc_ss *maskss;
-  gfc_ss *backss;
   gfc_se arrayse;
   gfc_se maskse;
   gfc_expr *arrayexpr;
@@ -5391,10 +5435,27 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 && maskexpr->symtree->n.sym->attr.dummy
 && maskexpr->symtree->n.sym->attr.optional;
   backexpr = actual->next->next->expr;
-  if (backexpr)
-backss = gfc_get_scalar_ss (gfc_ss_terminator, backexpr);
+
+  gfc_init_se (&backse, NULL);
+  if (backexpr == nullptr)
+back = logical_false_node;
+  else if (maybe_absent_optional_variable (backexpr))
+{
+  gcc_assert (backexpr->expr_type == EXPR_VARIABLE);
+
+  gfc_conv_expr (&backse, backexpr);
+  tree present = gfc_conv_expr_present (backexpr->symtree->n.sym, false);
+  back = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+ logical_type_node, present, backse.expr);
+}
   else
-backss = nullptr;
+{
+  gfc_conv_expr (&backse, backexpr);
+  back = backse.expr;
+

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:c1eb87cb9470518cf499765fd72c8810f943c239

commit c1eb87cb9470518cf499765fd72c8810f943c239
Author: Mikael Morin 
Date:   Thu Jul 25 18:04:13 2024 +0200

fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608]

Add the tests covering the various cases for which we are about to implement
inline expansion of MINLOC and MAXLOC.  Those are cases where the DIM
argument is not present.

PR fortran/90608

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_7.f90: New test.
* gfortran.dg/maxloc_with_mask_1.f90: New test.
* gfortran.dg/minloc_8.f90: New test.
* gfortran.dg/minloc_with_mask_1.f90: New test.

Diff:
---
 gcc/testsuite/gfortran.dg/maxloc_7.f90   | 220 +
 gcc/testsuite/gfortran.dg/maxloc_with_mask_1.f90 | 393 +++
 gcc/testsuite/gfortran.dg/minloc_8.f90   | 220 +
 gcc/testsuite/gfortran.dg/minloc_with_mask_1.f90 | 392 ++
 4 files changed, 1225 insertions(+)

diff --git a/gcc/testsuite/gfortran.dg/maxloc_7.f90 
b/gcc/testsuite/gfortran.dg/maxloc_7.f90
new file mode 100644
index ..a875083052a9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/maxloc_7.f90
@@ -0,0 +1,220 @@
+! { dg-do run }
+!
+! PR fortran/90608
+! Check the correct behaviour of the inline maxloc implementation,
+! when there is no optional argument.
+
+program p
+  implicit none
+  integer, parameter :: data5(*) = (/ 1, 7, 2, 7, 0 /)
+  integer, parameter :: data64(*) = (/ 2, 5, 4, 6, 0, 9, 3, 5,  &
+   4, 4, 1, 7, 3, 2, 1, 2,  &
+   5, 4, 6, 0, 9, 3, 5, 4,  &
+   4, 1, 7, 3, 2, 1, 2, 5,  &
+   4, 6, 0, 9, 3, 5, 4, 4,  &
+   1, 7, 3, 2, 1, 2, 5, 4,  &
+   6, 0, 9, 3, 5, 4, 4, 1,  &
+   7, 3, 2, 1, 2, 5, 4, 6  /)
+  call check_int_const_shape_rank_1
+  call check_int_const_shape_rank_3
+  call check_int_const_shape_empty_4
+  call check_int_alloc_rank_1
+  call check_int_alloc_rank_3
+  call check_int_alloc_empty_4
+  call check_real_const_shape_rank_1
+  call check_real_const_shape_rank_3
+  call check_real_const_shape_empty_4
+  call check_real_alloc_rank_1
+  call check_real_alloc_rank_3
+  call check_real_alloc_empty_4
+  call check_int_lower_bounds
+  call check_real_lower_bounds
+  call check_all_nans
+  call check_dependencies
+contains
+  subroutine check_int_const_shape_rank_1()
+integer :: a(5)
+integer, allocatable :: m(:)
+a = data5
+m = maxloc(a)
+if (size(m, dim=1) /= 1) stop 11
+if (any(m /= (/ 2 /))) stop 12
+  end subroutine
+  subroutine check_int_const_shape_rank_3()
+integer :: a(4,4,4)
+integer, allocatable :: m(:)
+a = reshape(data64, shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 3) stop 21
+if (any(m /= (/ 2, 2, 1 /))) stop 22
+  end subroutine
+  subroutine check_int_const_shape_empty_4()
+integer :: a(9,3,0,7)
+integer, allocatable :: m(:)
+a = reshape((/ integer:: /), shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 4) stop 31
+if (any(m /= (/ 0, 0, 0, 0 /))) stop 32
+  end subroutine
+  subroutine check_int_alloc_rank_1()
+integer, allocatable :: a(:)
+integer, allocatable :: m(:)
+allocate(a(5))
+a(:) = data5
+m = maxloc(a)
+if (size(m, dim=1) /= 1) stop 41
+if (any(m /= (/ 2 /))) stop 42
+  end subroutine
+  subroutine check_int_alloc_rank_3()
+integer, allocatable :: a(:,:,:)
+integer, allocatable :: m(:)
+allocate(a(4,4,4))
+a(:,:,:) = reshape(data64, shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 3) stop 51
+if (any(m /= (/ 2, 2, 1 /))) stop 52
+  end subroutine
+  subroutine check_int_alloc_empty_4()
+integer, allocatable :: a(:,:,:,:)
+integer, allocatable :: m(:)
+allocate(a(9,3,0,7))
+a(:,:,:,:) = reshape((/ integer:: /), shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 4) stop 61
+if (any(m /= (/ 0, 0, 0, 0 /))) stop 62
+  end subroutine
+  subroutine check_real_const_shape_rank_1()
+real :: a(5)
+integer, allocatable :: m(:)
+a = (/ real:: data5 /)
+m = maxloc(a)
+if (size(m, dim=1) /= 1) stop 71
+if (any(m /= (/ 2 /))) stop 72
+  end subroutine
+  subroutine check_real_const_shape_rank_3()
+real :: a(4,4,4)
+integer, allocatable :: m(:)
+a = reshape((/ real:: data64 /), shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 3) stop 81
+if (any(m /= (/ 2, 2, 1 /))) stop 82
+  end subroutine
+  subroutine check_real_const_shape_empty_4()
+real :: a(9,3,0,7)
+integer, allocatable :: m(:)
+a = reshape((/ real:: /), shape(a))
+m = maxloc(a)
+if (size(m, dim=1) /= 4) stop 91
+if (any(m /= (/ 0, 0, 0, 0 /))) stop 92
+  end subroutine

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Disable frontend passes for MINLOC/MAXLOC if it's inlined

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:0c6f911ae6433ab0c3cc668083bd45aee52723bf

commit 0c6f911ae6433ab0c3cc668083bd45aee52723bf
Author: Mikael Morin 
Date:   Wed Nov 15 10:23:32 2023 +0100

fortran: Disable frontend passes for MINLOC/MAXLOC if it's inlined

Disable rewriting of MINLOC/MAXLOC expressions for which inline code
generation is supported.  Update the gfc_inline_intrinsic_function_p
predicate (already existing) for that, with the current state of
MINLOC/MAXLOC inlining support, that is only the cases of a scalar
result and non-CHARACTER argument for now.

This change has no effect currently, as the MINLOC/MAXLOC front-end passes
only change expressions of rank 1, but the inlining control predicate
gfc_inline_intrinsic_function_p returns false for those.  However, later
changes will extend MINLOC/MAXLOC inline expansion support to array
expressions and update the inlining control predicate, and this will become
effective.

gcc/fortran/ChangeLog:

* frontend-passes.cc (optimize_minmaxloc): Skip if we can generate
inline code for the unmodified expression.
* trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Add
MINLOC and MAXLOC cases.

Diff:
---
 gcc/fortran/frontend-passes.cc |  3 ++-
 gcc/fortran/trans-intrinsic.cc | 23 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc
index 3c06018fdbbf..8e4c6310ba8d 100644
--- a/gcc/fortran/frontend-passes.cc
+++ b/gcc/fortran/frontend-passes.cc
@@ -2277,7 +2277,8 @@ optimize_minmaxloc (gfc_expr **e)
   || fn->value.function.actual == NULL
   || fn->value.function.actual->expr == NULL
   || fn->value.function.actual->expr->ts.type == BT_CHARACTER
-  || fn->value.function.actual->expr->rank != 1)
+  || fn->value.function.actual->expr->rank != 1
+  || gfc_inline_intrinsic_function_p (fn))
 return;
 
   *e = gfc_get_array_expr (fn->ts.type, fn->ts.kind, &fn->where);
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 9f3c3ce47bc5..cc0d00f4e399 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -11650,6 +11650,29 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
 case GFC_ISYM_TRANSPOSE:
   return true;
 
+case GFC_ISYM_MINLOC:
+case GFC_ISYM_MAXLOC:
+  {
+   /* Disable inline expansion if code size matters.  */
+   if (optimize_size)
+ return false;
+
+   gfc_actual_arglist *array_arg = expr->value.function.actual;
+   gfc_actual_arglist *dim_arg = array_arg->next;
+
+   gfc_expr *array = array_arg->expr;
+   gfc_expr *dim = dim_arg->expr;
+
+   if (!(array->ts.type == BT_INTEGER
+ || array->ts.type == BT_REAL))
+ return false;
+
+   if (array->rank == 1 && dim != nullptr)
+ return true;
+
+   return false;
+  }
+
 default:
   return false;
 }


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:7c8dc5220170816e9c44fb1e42e3feb80831c740

commit 7c8dc5220170816e9c44fb1e42e3feb80831c740
Author: Mikael Morin 
Date:   Tue Jul 9 21:05:40 2024 +0200

fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608]

Enable inline code generation for the MINLOC and MAXLOC intrinsic, if the
DIM argument is not present and ARRAY has rank 1.  This case is similar to
the case where the result is scalar (DIM present and rank 1 ARRAY), which
already supports inline expansion of the intrinsic.  Both cases return
the same value, with the difference that the result is an array of size 1 if
DIM is absent, whereas it's a scalar if DIM  is present.  So all there is
to do for this case to work is hook the inline expansion with the
scalarizer.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Set the scalarization
rank based on the MINLOC/MAXLOC rank if needed.  Call the inline
code generation and setup the scalarizer array descriptor info
in the MINLOC and MAXLOC cases.
* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Return the
result array element if the scalarizer is setup and we are inside
the loop.  Restrict library function call dispatch to the case
where inline expansion is not supported.  Declare an array result
if the expression isn't scalar.  Initialize the array result single
element and return the result variable if the expression isn't
scalar.
(walk_inline_intrinsic_minmaxloc): New function.
(walk_inline_intrinsic_function): Add MINLOC and MAXLOC cases,
dispatching to walk_inline_intrinsic_minmaxloc.
(gfc_add_intrinsic_ss_code): Add MINLOC and MAXLOC cases.
(gfc_inline_intrinsic_function_p): Return true if ARRAY has rank 1,
regardless of DIM.

Diff:
---
 gcc/fortran/trans-array.cc |  25 ++
 gcc/fortran/trans-intrinsic.cc | 198 +++--
 2 files changed, 155 insertions(+), 68 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index c93a5f1e7543..0c78e1fecd8f 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4771,6 +4771,8 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop)
case GFC_ISYM_UBOUND:
case GFC_ISYM_LCOBOUND:
case GFC_ISYM_UCOBOUND:
+   case GFC_ISYM_MAXLOC:
+   case GFC_ISYM_MINLOC:
case GFC_ISYM_SHAPE:
case GFC_ISYM_THIS_IMAGE:
  loop->dimen = ss->dimen;
@@ -4820,6 +4822,29 @@ done:
case GFC_SS_INTRINSIC:
  switch (expr->value.function.isym->id)
{
+   case GFC_ISYM_MINLOC:
+   case GFC_ISYM_MAXLOC:
+ {
+   gfc_se se;
+   gfc_init_se (&se, nullptr);
+   se.loop = loop;
+   se.ss = ss;
+   gfc_conv_intrinsic_function (&se, expr);
+   gfc_add_block_to_block (&outer_loop->pre, &se.pre);
+   gfc_add_block_to_block (&outer_loop->post, &se.post);
+
+   info->descriptor = se.expr;
+
+   info->data = gfc_conv_array_data (info->descriptor);
+   info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
+
+   info->offset = gfc_index_zero_node;
+   info->start[0] = gfc_index_zero_node;
+   info->end[0] = gfc_index_zero_node;
+   info->stride[0] = gfc_index_one_node;
+   continue;
+ }
+
/* Fall through to supply start and stride.  */
case GFC_ISYM_LBOUND:
case GFC_ISYM_UBOUND:
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index cc0d00f4e399..a947dd1ba0b2 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5273,66 +5273,69 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
we need to handle.  For performance reasons we sometimes create two
loops instead of one, where the second one is much simpler.
Examples for minloc intrinsic:
-   1) Result is an array, a call is generated
-   2) Array mask is used and NaNs need to be supported:
-  limit = Infinity;
-  pos = 0;
-  S = from;
-  while (S <= to) {
-   if (mask[S]) {
- if (pos == 0) pos = S + (1 - from);
- if (a[S] <= limit) { limit = a[S]; pos = S + (1 - from); goto lab1; }
-   }
-   S++;
-  }
-  goto lab2;
-  lab1:;
-  while (S <= to) {
-   if (mask[S]) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
-   S++;
-  }
-  lab2:;
-   3) NaNs need to be supported, but it is known at compile time or cheaply
-  at runtime whether array is nonempty or not:
-  limit = Infinity;
-  pos = 0;
- 

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Outline array bound check generation code

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:b06cfb12ade15dd221f4a3ffbe707da5597e172e

commit b06cfb12ade15dd221f4a3ffbe707da5597e172e
Author: Mikael Morin 
Date:   Wed Apr 10 21:18:03 2024 +0200

fortran: Outline array bound check generation code

The next patch will need reindenting of the array bound check generation
code.  This outlines it to its own function beforehand, reducing the churn
in the next patch.

-- >8 --

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Move array bound check
generation code...
(add_check_section_in_array_bounds): ... here as a new function.

Diff:
---
 gcc/fortran/trans-array.cc | 297 ++---
 1 file changed, 143 insertions(+), 154 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 0c78e1fecd8f..99a603a3afb2 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4736,6 +4736,146 @@ gfc_conv_section_startstride (stmtblock_t * block, 
gfc_ss * ss, int dim)
 }
 
 
+/* Generate in INNER the bounds checking code along the dimension DIM for
+   the array associated with SS_INFO.  */
+
+static void
+add_check_section_in_array_bounds (stmtblock_t *inner, gfc_ss_info *ss_info,
+  int dim)
+{
+  gfc_expr *expr = ss_info->expr;
+  locus *expr_loc = &expr->where;
+  const char *expr_name = expr->symtree->name;
+
+  gfc_array_info *info = &ss_info->data.array;
+
+  bool check_upper;
+  if (dim == info->ref->u.ar.dimen - 1
+  && info->ref->u.ar.as->type == AS_ASSUMED_SIZE)
+check_upper = false;
+  else
+check_upper = true;
+
+  /* Zero stride is not allowed.  */
+  tree tmp = fold_build2_loc (input_location, EQ_EXPR, logical_type_node,
+ info->stride[dim], gfc_index_zero_node);
+  char * msg = xasprintf ("Zero stride is not allowed, for dimension %d "
+ "of array '%s'", dim + 1, expr_name);
+  gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg);
+  free (msg);
+
+  tree desc = info->descriptor;
+
+  /* This is the run-time equivalent of resolve.cc's
+ check_dimension.  The logical is more readable there
+ than it is here, with all the trees.  */
+  tree lbound = gfc_conv_array_lbound (desc, dim);
+  tree end = info->end[dim];
+  tree ubound = check_upper ? gfc_conv_array_ubound (desc, dim) : NULL_TREE;
+
+  /* non_zerosized is true when the selected range is not
+ empty.  */
+  tree stride_pos = fold_build2_loc (input_location, GT_EXPR, 
logical_type_node,
+info->stride[dim], gfc_index_zero_node);
+  tmp = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
+info->start[dim], end);
+  stride_pos = fold_build2_loc (input_location, TRUTH_AND_EXPR,
+   logical_type_node, stride_pos, tmp);
+
+  tree stride_neg = fold_build2_loc (input_location, LT_EXPR, 
logical_type_node,
+info->stride[dim], gfc_index_zero_node);
+  tmp = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+info->start[dim], end);
+  stride_neg = fold_build2_loc (input_location, TRUTH_AND_EXPR,
+   logical_type_node, stride_neg, tmp);
+  tree non_zerosized = fold_build2_loc (input_location, TRUTH_OR_EXPR,
+   logical_type_node, stride_pos,
+   stride_neg);
+
+  /* Check the start of the range against the lower and upper
+ bounds of the array, if the range is not empty.
+ If upper bound is present, include both bounds in the
+ error message.  */
+  if (check_upper)
+{
+  tmp = fold_build2_loc (input_location, LT_EXPR, logical_type_node,
+info->start[dim], lbound);
+  tmp = fold_build2_loc (input_location, TRUTH_AND_EXPR, logical_type_node,
+non_zerosized, tmp);
+  tree tmp2 = fold_build2_loc (input_location, GT_EXPR, logical_type_node,
+  info->start[dim], ubound);
+  tmp2 = fold_build2_loc (input_location, TRUTH_AND_EXPR, 
logical_type_node,
+ non_zerosized, tmp2);
+  msg = xasprintf ("Index '%%ld' of dimension %d of array '%s' outside of "
+  "expected range (%%ld:%%ld)", dim + 1, expr_name);
+  gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg,
+ fold_convert (long_integer_type_node, info->start[dim]),
+ fold_convert (long_integer_type_node, lbound),
+ fold_convert (long_integer_type_node, ubound));
+  gfc_trans_runtime_check (true, false, tmp2, inner, expr_loc, msg,
+ fold_convert (long_integer_type_node, info->start[dim]),
+ fold_convert (long_integer_type_node, lbound),
+ fold_convert (long_integer_type_node, ubound));
+  free (

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:081f12a1d9d1bd793e3571daf5ab25db594ff57a

commit 081f12a1d9d1bd793e3571daf5ab25db594ff57a
Author: Mikael Morin 
Date:   Thu Nov 16 22:14:41 2023 +0100

fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]

Enable generation of inline code for the MINLOC and MAXLOC intrinsic,
if the ARRAY argument is of integral type and of any rank (only the rank 1
case was previously inlined), and neither DIM nor MASK arguments are
present.

This needs a few adjustments in gfc_conv_intrinsic_minmaxloc,
mainly to replace the single variables POS and OFFSET, with collections
of variables, one variable per dimension each.

The restriction to integral ARRAY and absent MASK limits the scope of
the change to the cases where we generate single loop inline code.  The
code generation for the second loop is only accessible with ARRAY of rank
1, so it can continue using a single variable.  A later change will extend
inlining to the double loop cases.

There is some bounds checking code that was previously handled by the
library, and that needed some changes in the scalarizer to avoid regressing.
The bounds check code generation was already by the scalarizer, but it was
only applying to array reference sections, checking both individual array
bounds and shape conformability between all the array involved.  For MINLOC
or MAXLOC, enable the conformability check between all the scalarized
arrays, and disable the check that the array reference is within its bounds.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-array.cc (gfc_conv_ss_startstride): Set the MINLOC/MAXLOC
result upper bound using the ARRAY argument rank.  Ajdust the error
message for intrinsic result arrays.  Only check array bounds for
array references.  Move bound check decision code...
(bounds_check_needed): ... here as a new predicate.  Allow bound
check for MINLOC/MAXLOC intrinsic results.
* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Change the
result array upper bound to the rank of ARRAY.  Update the NONEMPTY
variable to depend on the non-empty extent of every dimension.  Use
one variable per dimension instead of a single variable for the
position and the offset.  Update their declaration, initialization,
and update to affect the variable of each dimension.  Use the first
variable only in areas only accessed with rank 1 ARRAY argument.
Set every element of the result using its corresponding variable.
(gfc_inline_intrinsic_function_p): Return true for integral ARRAY
and absent DIM and MASK.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_4.f90: Additionally accept the error
message emitted by the scalarizer.

Diff:
---
 gcc/fortran/trans-array.cc|  70 +---
 gcc/fortran/trans-intrinsic.cc| 150 +++---
 gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 |   4 +-
 3 files changed, 167 insertions(+), 57 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 99a603a3afb2..c9d63d13509d 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4876,6 +4876,35 @@ add_check_section_in_array_bounds (stmtblock_t *inner, 
gfc_ss_info *ss_info,
 }
 
 
+/* Tells whether we need to generate bounds checking code for the array
+   associated with SS.  */
+
+bool
+bounds_check_needed (gfc_ss *ss)
+{
+  /* Catch allocatable lhs in f2003.  */
+  if (flag_realloc_lhs && ss->no_bounds_check)
+return false;
+
+  gfc_ss_info *ss_info = ss->info;
+  if (ss_info->type == GFC_SS_SECTION)
+return true;
+
+  if (!(ss_info->type == GFC_SS_INTRINSIC
+   && ss_info->expr
+   && ss_info->expr->expr_type == EXPR_FUNCTION))
+return false;
+
+  gfc_intrinsic_sym *isym = ss_info->expr->value.function.isym;
+  if (!(isym
+   && (isym->id == GFC_ISYM_MAXLOC
+   || isym->id == GFC_ISYM_MINLOC)))
+return false;
+
+  return gfc_inline_intrinsic_function_p (ss_info->expr);
+}
+
+
 /* Calculates the range start and stride for a SS chain.  Also gets the
descriptor and data pointer.  The range of vector subscripts is the size
of the vector.  Array bounds are also checked.  */
@@ -4977,10 +5006,19 @@ done:
info->data = gfc_conv_array_data (info->descriptor);
info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
 
-   info->offset = gfc_index_zero_node;
+   gfc_expr *array = expr->value.function.actual->expr;
+   tree rank = build_int_cst (gfc_array_index_type, array->rank);
+
+   tree tmp = fold_build2_loc (input_location, MINUS_EXPR,
+

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:64fa63b902146982db4fd81d8240aef16d24311d

commit 64fa63b902146982db4fd81d8240aef16d24311d
Author: Mikael Morin 
Date:   Fri Nov 17 15:40:55 2023 +0100

fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608]

Enable the generation of inline code for MINLOC/MAXLOC when argument ARRAY
is of integral type, DIM is not present, and MASK is present and is scalar
(only absent MASK or rank 1 ARRAY were inlined before).

Scalar masks are implemented with a wrapping condition around the code one
would generate if MASK wasn't present, so they are easy to support once
inline code without MASK is working.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate
variable initialization for each dimension in the else branch of
the toplevel condition.
(gfc_inline_intrinsic_function_p): Return TRUE for scalar MASK.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_7.f90: Additionally accept the error 
message
reported by the scalarizer.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 13 -
 gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 |  4 ++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index ac8bd2d48123..855208717973 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5886,7 +5886,6 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   /* For a scalar mask, enclose the loop in an if statement.  */
   if (maskexpr && maskss == NULL)
 {
-  gcc_assert (loop.dimen == 1);
   tree ifmask;
 
   gfc_init_se (&maskse, NULL);
@@ -5901,7 +5900,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 the pos variable the same way as above.  */
 
   gfc_init_block (&elseblock);
-  gfc_add_modify (&elseblock, pos[0], gfc_index_zero_node);
+  for (int i = 0; i < loop.dimen; i++)
+   gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node);
   elsetmp = gfc_finish_block (&elseblock);
   ifmask = conv_mask_condition (&maskse, maskexpr, optional_mask);
   tmp = build3_v (COND_EXPR, ifmask, tmp, elsetmp);
@@ -11795,9 +11795,12 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
if (array->rank == 1)
  return true;
 
-   if (array->ts.type == BT_INTEGER
-   && dim == nullptr
-   && mask == nullptr)
+   if (array->ts.type != BT_INTEGER
+   || dim != nullptr)
+ return false;
+
+   if (mask == nullptr
+   || mask->rank == 0)
  return true;
 
return false;
diff --git a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 
b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
index 206a29b149da..3aa9d3dcebee 100644
--- a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
+++ b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90
@@ -1,6 +1,6 @@
 ! { dg-do run }
 ! { dg-options "-fbounds-check" }
-! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, 
should be 2" }
+! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, 
should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." }
 module tst
 contains
   subroutine foo(res)
@@ -18,4 +18,4 @@ program main
   integer :: res(3)
   call foo(res)
 end program main
-! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic: is 3, should be 2" }
+! { dg-output "Fortran runtime error: Incorrect extent in return value of 
MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of 
array 'res' .3/2." }


[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:ebde7ff486ec700d59eb2cc530c3ece3f9a07f67

commit ebde7ff486ec700d59eb2cc530c3ece3f9a07f67
Author: Mikael Morin 
Date:   Fri Nov 17 16:47:26 2023 +0100

fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR90608]

Enable generation of inline MINLOC/MAXLOC code in the case where DIM
is not present, and either ARRAY is of floating point type or MASK is an
array.  Those cases are the remaining bits to fully support inlining of
non-CHARACTER MINLOC/MAXLOC without DIM.  They are treated together because
they generate similar code, the NANs for REAL types being handled a bit like
a second level of masking.  These are the cases for which we generate two
sets of loops.

This change affects the code generating the second loop, that was previously
accessible only in the cases ARRAY has rank rank 1.  The single variable
initialization and update are changed to apply to multiple variables, one
per dimension.

This change generates slightly worse code if ARRAY has rank 1.  Indeed
the code we used to generate was:

for (idx1 in lower..upper)
  {
...
if (...)
  {
...
break;
  }
  }
for (idx2 in idx1..upper)
  {
...
  }

which avoided starting the second loop from lower, skipping in the second
loop the elements already processed in the first one.  Unfortunately,
extending that code the obvious way to apply to rank > 1 leads to wrong
code:

for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in index11..upper1)
  {
for (idx22 in index12..upper2)
  {
...
  }
  }

That code is incorrect, as the loop over idx22, being nested, may be run
more than once, and the second run should restart from lower2, not index12.
So with this change, we generate instead as second set of loops:

...
second_loop:
for (idx21 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
  }
  }

which means the second set of loops processes again elements already
processed by the first one, and the rank 1 case becomes:

for (idx1 in lower..upper)
  {
...
if (...)
  {
...
break;
  }
  }
for (idx2 in lower..upper)
  {
...
  }

processing the first elements twice as well, which was not the case
before.  A later change will avoid the duplicate processing and restore
the generated code in the rank 1 case.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Initialize
and update all the variables.  Put the label and goto in the
outermost scalarizer loop.  Don't start the second loop where the
first stopped.
(gfc_inline_intrinsic_function_p): Also return TRUE for array MASK
or for any REAL type.

gcc/testsuite/ChangeLog:

* gfortran.dg/maxloc_bounds_5.f90: Additionally accept error
messages reported by the scalarizer.
* gfortran.dg/maxloc_bounds_6.f90: Ditto.

Diff:
---
 gcc/fortran/trans-intrinsic.cc| 127 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_5.f90 |   4 +-
 gcc/testsuite/gfortran.dg/maxloc_bounds_6.f90 |   4 +-
 3 files changed, 87 insertions(+), 48 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 855208717973..bae3b49a9498 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5332,12 +5332,55 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
   if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
   S++;
 }
-   B: ARRAY has rank 1, and DIM is absent.  Use the same code as the scalar
-  case and wrap the result in an array.
-   C: ARRAY has rank > 1, NANs are not supported, and DIM and MASK are absent.
-  Generate code similar to the single loop scalar case, but using one
-  variable per dimension, for example if ARRAY has rank 2:
-  4) NAN's aren't supported, no MASK:
+   B: Array result, non-CHARACTER type, DIM

[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Continue MINLOC/MAXLOC second loop where the first stopped [PR90608]

2024-07-30 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:215d87c9e87f09f7b49dd679fdccb6fa22c02f74

commit 215d87c9e87f09f7b49dd679fdccb6fa22c02f74
Author: Mikael Morin 
Date:   Thu Jul 25 12:27:09 2024 +0200

fortran: Continue MINLOC/MAXLOC second loop where the first stopped 
[PR90608]

Continue the second set of loops where the first one stopped in the
generated inline MINLOC/MAXLOC code in the cases where the generated code
contains two sets of loops.  This fixes a regression that was introduced
when enabling the generation of inline MINLOC/MAXLOC code with ARRAY of rank
greater than 1, non-scalar MASK and no DIM arguments.

In the cases where two sets of loops are generated as inline MINLOC/MAXLOC
code, we previously generated code such as (for rank 2 ARRAY, so with two
levels of nesting):

for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in lower1..upper1)
  {
for (idx22 in lower2..upper2)
  {
...
  }
  }

which means we process the first elements twice, once in the first set
of loops and once in the second one.  This change avoids this duplicate
processing by using a conditional as lower bound for the second set of
loops, generating code like:

second_loop_entry = false;
for (idx11 in lower1..upper1)
  {
for (idx12 in lower2..upper2)
  {
...
if (...)
  {
...
second_loop_entry = true;
goto second_loop;
  }
  }
  }
second_loop:
for (idx21 in (second_loop_entry ? idx11 : lower1)..upper1)
  {
for (idx22 in (second_loop_entry ? idx12 : lower2)..upper2)
  {
...
second_loop_entry = false;
  }
  }

It was expected that the compiler optimizations would be able to remove the
state variable second_loop_entry.  It is the case if ARRAY has rank 1 (so
without loop nesting), the variable is removed and the loop bounds become
unconditional, which restores previously generated code, fully fixing the
regression.  For larger rank, unfortunately, the state variable and
conditional loop bounds remain, but those cases were previously using
library calls, so it's not a regression.

PR fortran/90608

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate a set
of index variables.  Set them using the loop indexes before leaving
the first set of loops.  Generate a new loop entry predicate.
Set it before leaving the first set of loops.  Clear it in the body
of the second set of loops.  For the second set of loops, update
each loop variable to use the corresponding index variable if the
predicate variable is set.

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 33 +++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index bae3b49a9498..29367c69d16b 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5342,6 +5342,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 pos0 = 0;
 pos1 = 1
 S1 = from1;
+second_loop_entry = false;
 while (S1 <= to1) {
   S0 = from0;
   while (s0 <= to0 {
@@ -5354,6 +5355,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 limit = a[S1][S0];
 pos0 = S0 + (1 - from0);
 pos1 = S1 + (1 - from1);
+second_loop_entry = true;
 goto lab1;
   }
 }
@@ -5363,9 +5365,9 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 }
 goto lab2;
 lab1:;
-S1 = from1;
+S1 = second_loop_entry ? S1 : from1;
 while (S1 <= to1) {
-  S0 = from0;
+  S0 = second_loop_entry ? S0 : from0;
   while (S0 <= to0) {
 if (mask[S1][S0])
   if (a[S1][S0] < limit) {
@@ -5373,6 +5375,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
 pos0 = S + (1 - from0);
 pos1 = S + (1 - from1);
   }
+second_loop_entry = false;
 S0++;
   }
   S1++;
@@ -5444,6 +5447,7 

[gcc r15-2408] libstdc++: Fix fs::hard_link_count behaviour on MinGW [PR113663]

2024-07-30 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:658193658f05e9a8ebf0bce8bab1f43bfee1

commit r15-2408-g658193658f05e9a8ebf0bce8bab1f43bfee1
Author: Lennox Shou Hao Ho 
Date:   Mon Jul 29 21:09:27 2024 +0100

libstdc++: Fix fs::hard_link_count behaviour on MinGW [PR113663]

std::filesystem::hard_link_count() always returns 1 on
mingw-w64ucrt-11.0.1-r3 on Windows 10 19045

hard_link_count() queries _wstat64() on MinGW-w64
The MSFT documentation claims _wstat64() will always return 1 *non*-NTFS 
volumes

https://learn.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2013/14h5k7ff(v=vs.120)

My tests suggest that is not always true -
hard_link_count()/_wstat64() still returns 1 on NTFS.
GetFileInformationByHandle does return the correct result of 2.
Please see the PR for a minimal repro.

This patch changes the Windows implementation to always call
GetFileInformationByHandle.

PR libstdc++/113663

libstdc++-v3/ChangeLog:

* src/c++17/fs_ops.cc (fs::equivalent): Moved helper class
auto_handle to anonymous namespace as auto_win_file_handle.
(fs::hard_link_count): Changed Windows implementation to use
information provided by GetFileInformationByHandle which is more
reliable.
* testsuite/27_io/filesystem/operations/hard_link_count.cc: New
test.

Signed-off-by: "Lennox" Shou Hao Ho 
Reviewed-by: Jonathan Wakely 

Diff:
---
 libstdc++-v3/src/c++17/fs_ops.cc   | 59 ++
 .../27_io/filesystem/operations/hard_link_count.cc | 37 ++
 2 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc
index 07bc2a0fa88d..81227c49dfde 100644
--- a/libstdc++-v3/src/c++17/fs_ops.cc
+++ b/libstdc++-v3/src/c++17/fs_ops.cc
@@ -822,6 +822,34 @@ fs::equivalent(const path& p1, const path& p2)
   return result;
 }
 
+#if _GLIBCXX_FILESYSTEM_IS_WINDOWS
+namespace
+{
+  // An RAII type that opens a handle for an existing file.
+  struct auto_win_file_handle
+  {
+explicit
+auto_win_file_handle(const fs::path& p_)
+: handle(CreateFileW(p_.c_str(), 0,
+FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0))
+{ }
+
+~auto_win_file_handle()
+{ if (*this) CloseHandle(handle); }
+
+explicit operator bool() const
+{ return handle != INVALID_HANDLE_VALUE; }
+
+bool get_info()
+{ return GetFileInformationByHandle(handle, &info); }
+
+HANDLE handle;
+BY_HANDLE_FILE_INFORMATION info;
+  };
+}
+#endif
+
 bool
 fs::equivalent(const path& p1, const path& p2, error_code& ec) noexcept
 {
@@ -858,27 +886,8 @@ fs::equivalent(const path& p1, const path& p2, error_code& 
ec) noexcept
   if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev)
return false;
 
-  struct auto_handle {
-   explicit auto_handle(const path& p_)
-   : handle(CreateFileW(p_.c_str(), 0,
- FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
- 0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0))
-   { }
-
-   ~auto_handle()
-   { if (*this) CloseHandle(handle); }
-
-   explicit operator bool() const
-   { return handle != INVALID_HANDLE_VALUE; }
-
-   bool get_info()
-   { return GetFileInformationByHandle(handle, &info); }
-
-   HANDLE handle;
-   BY_HANDLE_FILE_INFORMATION info;
-  };
-  auto_handle h1(p1);
-  auto_handle h2(p2);
+  auto_win_file_handle h1(p1);
+  auto_win_file_handle h2(p2);
   if (!h1 || !h2)
{
  if (!h1 && !h2)
@@ -982,7 +991,13 @@ fs::hard_link_count(const path& p)
 std::uintmax_t
 fs::hard_link_count(const path& p, error_code& ec) noexcept
 {
-#ifdef _GLIBCXX_HAVE_SYS_STAT_H
+#if _GLIBCXX_FILESYSTEM_IS_WINDOWS
+  auto_win_file_handle h(p);
+  if (h && h.get_info())
+return static_cast(h.info.nNumberOfLinks);
+  ec = __last_system_error();
+  return static_cast(-1);
+#elif defined _GLIBCXX_HAVE_SYS_STAT_H
   return do_stat(p, ec, std::mem_fn(&stat_type::st_nlink),
 static_cast(-1));
 #else
diff --git 
a/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc
new file mode 100644
index ..8b2fb4f190e2
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc
@@ -0,0 +1,37 @@
+// { dg-do run { target c++17 } }
+// { dg-require-filesystem-ts "" }
+
+#include 
+#include 
+#include 
+
+namespace fs = std::filesystem;
+
+void test01()
+{
+  // PR libstdc++/113663
+
+  fs::path p1 = __gnu_test::nonexistent_path();
+  VERIFY( !fs::exists(p1) );
+
+  __gnu_test::scoped_file f1(p1);
+  VERIFY( fs::exists(p1) );
+
+  VERIFY( fs::hard_link_count(p1) == 1 );
+
+ 

[gcc r15-2409] libstdc++: Fix overwriting files with fs::copy_file on Windows

2024-07-30 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:017e3f89b081e4828a588a3bd27b5feacea042b7

commit r15-2409-g017e3f89b081e4828a588a3bd27b5feacea042b7
Author: Jonathan Wakely 
Date:   Tue Jul 30 10:55:55 2024 +0100

libstdc++: Fix overwriting files with fs::copy_file on Windows

There are no inode numbers on Windows filesystems, so stat_type::st_ino
is always zero and the check for equivalent files in do_copy_file was
incorrectly identifying distinct files as equivalent. This caused
copy_file to incorrectly report errors when trying to overwrite existing
files.

The fs::equivalent function already does the right thing on Windows, so
factor that logic out into a new function that can be reused by
fs::copy_file.

The tests for fs::copy_file were quite inadequate, so this also adds
checks for that function's error conditions.

libstdc++-v3/ChangeLog:

* src/c++17/fs_ops.cc (auto_win_file_handle): Change constructor
parameter from const path& to const wchar_t*.
(fs::equiv_files): New function.
(fs::equivalent): Use equiv_files.
* src/filesystem/ops-common.h (fs::equiv_files): Declare.
(do_copy_file): Use equiv_files.
* src/filesystem/ops.cc (fs::equiv_files): Define.
(fs::copy, fs::equivalent): Use equiv_files.
* testsuite/27_io/filesystem/operations/copy.cc: Test
overwriting directory contents recursively.
* testsuite/27_io/filesystem/operations/copy_file.cc: Test
overwriting existing files.

Diff:
---
 libstdc++-v3/src/c++17/fs_ops.cc   |  71 +++-
 libstdc++-v3/src/filesystem/ops-common.h   |  12 +-
 libstdc++-v3/src/filesystem/ops.cc |  18 ++-
 .../testsuite/27_io/filesystem/operations/copy.cc  |   9 ++
 .../27_io/filesystem/operations/copy_file.cc   | 122 +
 5 files changed, 199 insertions(+), 33 deletions(-)

diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc
index 81227c49dfde..7ffdce67782a 100644
--- a/libstdc++-v3/src/c++17/fs_ops.cc
+++ b/libstdc++-v3/src/c++17/fs_ops.cc
@@ -350,7 +350,7 @@ fs::copy(const path& from, const path& to, copy_options 
options,
   f = make_file_status(from_st);
 
   if (exists(t) && !is_other(t) && !is_other(f)
-  && to_st.st_dev == from_st.st_dev && to_st.st_ino == from_st.st_ino)
+  && fs::equiv_files(from.c_str(), from_st, to.c_str(), to_st, ec))
 {
   ec = std::make_error_code(std::errc::file_exists);
   return;
@@ -829,8 +829,8 @@ namespace
   struct auto_win_file_handle
   {
 explicit
-auto_win_file_handle(const fs::path& p_)
-: handle(CreateFileW(p_.c_str(), 0,
+auto_win_file_handle(const wchar_t* p)
+: handle(CreateFileW(p, 0,
 FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
 0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0))
 { }
@@ -850,6 +850,44 @@ namespace
 }
 #endif
 
+#ifdef _GLIBCXX_HAVE_SYS_STAT_H
+#ifdef NEED_DO_COPY_FILE // Only define this once, not in cow-ops.o too
+bool
+fs::equiv_files([[maybe_unused]] const char_type* p1, const stat_type& st1,
+   [[maybe_unused]] const char_type* p2, const stat_type& st2,
+   [[maybe_unused]] error_code& ec)
+{
+#if ! _GLIBCXX_FILESYSTEM_IS_WINDOWS
+  // For POSIX the device ID and inode number uniquely identify a file.
+  return st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino;
+#else
+  // For Windows st_ino is not set, so can't be used to distinguish files.
+  // We can compare modes and device IDs as a cheap initial check:
+  if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev)
+return false;
+
+  // Need to use GetFileInformationByHandle to get more info about the files.
+  auto_win_file_handle h1(p1);
+  auto_win_file_handle h2(p2);
+  if (!h1 || !h2)
+{
+  if (!h1 && !h2)
+   ec = __last_system_error();
+  return false;
+}
+  if (!h1.get_info() || !h2.get_info())
+{
+  ec = __last_system_error();
+  return false;
+}
+  return h1.info.dwVolumeSerialNumber == h2.info.dwVolumeSerialNumber
+  && h1.info.nFileIndexHigh == h2.info.nFileIndexHigh
+  && h1.info.nFileIndexLow == h2.info.nFileIndexLow;
+#endif // _GLIBCXX_FILESYSTEM_IS_WINDOWS
+}
+#endif // NEED_DO_COPY_FILE
+#endif // _GLIBCXX_HAVE_SYS_STAT_H
+
 bool
 fs::equivalent(const path& p1, const path& p2, error_code& ec) noexcept
 {
@@ -881,30 +919,7 @@ fs::equivalent(const path& p1, const path& p2, error_code& 
ec) noexcept
   ec.clear();
   if (is_other(s1) || is_other(s2))
return false;
-#if _GLIBCXX_FILESYSTEM_IS_WINDOWS
-  // st_ino is not set, so can't be used to distinguish files
-  if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev)
-   return false;
-
-  auto_win_file_handle h1(p1);
-  auto_win_file_handle h2(p2);
-  if (!h1 || !h2)
-   

[gcc r15-2410] RISC-V: Remove configure check for zabha

2024-07-30 Thread Patrick O'Neill via Gcc-cvs
https://gcc.gnu.org/g:c0af64af636a801850fc8fabee12635ec73daa22

commit r15-2410-gc0af64af636a801850fc8fabee12635ec73daa22
Author: Patrick O'Neill 
Date:   Mon Jul 29 19:52:02 2024 -0700

RISC-V: Remove configure check for zabha

This patch removes the zabha configure check since it's not a breaking 
change
and updates the existing zaamo/zalrsc comment.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
(riscv_subset_list::to_string): Remove zabha configure check
handling and clarify zaamo/zalrsc comment.
* config.in: Regenerate.
* configure: Regenerate.
* configure.ac: Remove zabha configure check.

Signed-off-by: Patrick O'Neill 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 12 +++-
 gcc/config.in   |  6 --
 gcc/configure   | 31 ---
 gcc/configure.ac|  5 -
 4 files changed, 3 insertions(+), 51 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 682826c0e344..d2912877784d 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -855,7 +855,6 @@ riscv_subset_list::to_string (bool version_p) const
 
   bool skip_zifencei = false;
   bool skip_zaamo_zalrsc = false;
-  bool skip_zabha = false;
   bool skip_zicsr = false;
   bool i2p0 = false;
 
@@ -884,13 +883,11 @@ riscv_subset_list::to_string (bool version_p) const
   skip_zifencei = true;
 #endif
 #ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC
-  /* Skip since binutils 2.42 and earlier don't recognize zaamo/zalrsc.  */
+  /* Skip since binutils 2.42 and earlier don't recognize zaamo/zalrsc.
+ Expanding 'a' to zaamo/zalrsc would otherwise break compilations
+ for users with an older version of binutils.  */
   skip_zaamo_zalrsc = true;
 #endif
-#ifndef HAVE_AS_MARCH_ZABHA
-  /* Skip since binutils 2.42 and earlier don't recognize zabha.  */
-  skip_zabha = true;
-#endif
 
   for (subset = m_head; subset != NULL; subset = subset->next)
 {
@@ -908,9 +905,6 @@ riscv_subset_list::to_string (bool version_p) const
   if (skip_zaamo_zalrsc && subset->name == "zalrsc")
continue;
 
-  if (skip_zabha && subset->name == "zabha")
-   continue;
-
   /* For !version_p, we only separate extension with underline for
 multi-letter extension.  */
   if (!first &&
diff --git a/gcc/config.in b/gcc/config.in
index bc819005bd62..3af153eaec5c 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -635,12 +635,6 @@
 #endif
 
 
-/* Define if the assembler understands -march=rv*_zabha. */
-#ifndef USED_FOR_TARGET
-#undef HAVE_AS_MARCH_ZABHA
-#endif
-
-
 /* Define if the assembler understands -march=rv*_zifencei. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_MARCH_ZIFENCEI
diff --git a/gcc/configure b/gcc/configure
index 01acca7fb5cc..7541bdeb7248 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -30882,37 +30882,6 @@ if test $gcc_cv_as_riscv_march_zaamo_zalrsc = yes; then
 
 $as_echo "#define HAVE_AS_MARCH_ZAAMO_ZALRSC 1" >>confdefs.h
 
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
-march=rv32i_zabha support" >&5
-$as_echo_n "checking assembler for -march=rv32i_zabha support... " >&6; }
-if ${gcc_cv_as_riscv_march_zabha+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  gcc_cv_as_riscv_march_zabha=no
-  if test x$gcc_cv_as != x; then
-$as_echo '' > conftest.s
-if { ac_try='$gcc_cv_as $gcc_cv_as_flags -march=rv32i_zabha -o conftest.o 
conftest.s >&5'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }
-then
-   gcc_cv_as_riscv_march_zabha=yes
-else
-  echo "configure: failed program was" >&5
-  cat conftest.s >&5
-fi
-rm -f conftest.o conftest.s
-  fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_riscv_march_zabha" 
>&5
-$as_echo "$gcc_cv_as_riscv_march_zabha" >&6; }
-if test $gcc_cv_as_riscv_march_zabha = yes; then
-
-$as_echo "#define HAVE_AS_MARCH_ZABHA 1" >>confdefs.h
-
 fi
 
 ;;
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 3f20c107b6aa..52c1780379d5 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5461,11 +5461,6 @@ configured with --enable-newlib-nano-formatted-io.])
   [-march=rv32i_zaamo_zalrsc],,,
   [AC_DEFINE(HAVE_AS_MARCH_ZAAMO_ZALRSC, 1,
 [Define if the assembler understands 
-march=rv*_zaamo_zalrsc.])])
-gcc_GAS_CHECK_FEATURE([-march=rv32i_zabha support],
-  gcc_cv_as_riscv_march_zabha,
-  [-march=rv32i_zabha],,,
-  [AC_DEFINE(HAVE_AS_MARCH_ZABHA, 1,
-[Define if the assembler understands -march=rv*_zabha.])])
 ;;
 loongarch*-*-*)
 gcc_GAS_CHECK_FEATURE([.dtprelword support],


[gcc r15-2411] RISC-V: Add basic support for the Zacas extension

2024-07-30 Thread Patrick O'Neill via Gcc-cvs
https://gcc.gnu.org/g:11c2453a16b725b7fb67778e1ab4636a51a1217d

commit r15-2411-g11c2453a16b725b7fb67778e1ab4636a51a1217d
Author: Gianluca Guida 
Date:   Mon Jul 29 15:13:46 2024 -0700

RISC-V: Add basic support for the Zacas extension

This patch adds support for amocas.{b|h|w|d}. Support for amocas.q
(64/128 bit cas for rv32/64) will be added in a future patch.

Extension: https://github.com/riscv/riscv-zacas
Ratification: https://jira.riscv.org/browse/RVS-680

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: Add zacas extension.
* config/riscv/arch-canonicalize: Make zacas imply zaamo.
* config/riscv/riscv.opt: Add zacas.
* config/riscv/sync.md (zacas_atomic_cas_value): New pattern.
(atomic_compare_and_swap): Use new pattern for 
compare-and-swap ops.
(zalrsc_atomic_cas_value_strong): Rename 
atomic_cas_value_strong.
* doc/sourcebuild.texi: Add Zacas documentation.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add zacas testsuite infra support.
* 
gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-acquire-release.c:
Remove zacas to continue to test the lr/sc pairs.
* gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-acquire.c: 
Ditto.
* gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-consume.c: 
Ditto.
* gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-relaxed.c: 
Ditto.
* gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-release.c: 
Ditto.
* 
gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-seq-cst-relaxed.c: Ditto.
* gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-seq-cst.c: 
Ditto.
* 
gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-acquire-release.c: Ditto.
* gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-acquire.c: 
Ditto.
* gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-consume.c: 
Ditto.
* gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-relaxed.c: 
Ditto.
* gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-release.c: 
Ditto.
* 
gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-seq-cst-relaxed.c: Ditto.
* gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-seq-cst.c: 
Ditto.
* gcc.target/riscv/amo/zabha-zacas-preferred-over-zalrsc.c: New 
test.
* gcc.target/riscv/amo/zacas-char-requires-zabha.c: New test.
* gcc.target/riscv/amo/zacas-char-requires-zacas.c: New test.
* gcc.target/riscv/amo/zacas-preferred-over-zalrsc.c: New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-acq-rel.c: 
New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-acquire.c: 
New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-relaxed.c: 
New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-release.c: 
New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-seq-cst.c: 
New test.
* 
gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-compatability-mapping-no-fence.c:
New test.
* 
gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-compatability-mapping.cc: New 
test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-acq-rel.c: 
New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-acquire.c: 
New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-relaxed.c: 
New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-release.c: 
New test.
* gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-seq-cst.c: 
New test.
* 
gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-acq-rel.c: New test.
* 
gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-acquire.c: New test.
* 
gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-relaxed.c: New test.
* 
gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-release.c: New test.
* 
gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-seq-cst.c: New test.
* gcc.target/riscv/amo/zacas-ztso-compare-exchange-char-seq-cst.c: 
New test.
* gcc.target/riscv/amo/zacas-ztso-compare-exchange-char.c: New test.
* 
gcc.target/riscv/amo/zacas-ztso-compare-exchange-compatability-mapping-no-fence.c:
New test.
* 
gcc.target/riscv/amo/zacas-ztso-compare-exchange-compatability-mapping.cc: New 
test.
* gcc.target/riscv/amo/zacas-ztso-compare-exchange-int-seq-cst.c: 
New test.
* gcc.target/riscv/amo/zacas-ztso-compare-exchange-int.c: New test.
* gcc.target/riscv/amo/zacas-ztso-compare-exchange-short-seq-cst.c: 
New test.
* gc

[gcc r15-2412] testsuite: fix whitespace in dg-do compile directives

2024-07-30 Thread Sam James via Gcc-cvs
https://gcc.gnu.org/g:2e662dedf84aa23fdff7bceca040432bf9f1ab72

commit r15-2412-g2e662dedf84aa23fdff7bceca040432bf9f1ab72
Author: Sam James 
Date:   Tue Jul 30 12:20:47 2024 +0100

testsuite: fix whitespace in dg-do compile directives

Nothing seems to change here in reality at least on x86_64-pc-linux-gnu,
but important to fix nonetheless in case people copy it.

PR rtl-optimization/48633
PR tree-optimization/83072
PR tree-optimization/83073
PR tree-optimization/96542
PR tree-optimization/96707
PR tree-optimization/97567
PR target/69225
PR target/89929
PR target/96562
* g++.dg/pr48633.C: Fix whitespace in dg directive.
* g++.dg/pr96707.C: Likewise.
* g++.target/i386/mv28.C: Likewise.
* gcc.dg/Warray-bounds-flex-arrays-1.c: Likewise.
* gcc.dg/pr83072-2.c: Likewise.
* gcc.dg/pr83073.c: Likewise.
* gcc.dg/pr96542.c: Likewise.
* gcc.dg/pr97567-2.c: Likewise.
* gcc.target/i386/avx512fp16-11a.c: Likewise.
* gcc.target/i386/avx512fp16-13.c: Likewise.
* gcc.target/i386/avx512fp16-14.c: Likewise.
* gcc.target/i386/avx512fp16-conjugation-1.c: Likewise.
* gcc.target/i386/avx512fp16-neg-1a.c: Likewise.
* gcc.target/i386/avx512fp16-set1-pch-1a.c: Likewise.
* gcc.target/i386/avx512fp16vl-conjugation-1.c: Likewise.
* gcc.target/i386/avx512fp16vl-neg-1a.c: Likewise.
* gcc.target/i386/avx512fp16vl-set1-pch-1a.c: Likewise.
* gcc.target/i386/avx512vlfp16-11a.c: Likewise.
* gcc.target/i386/pr69225-1.c: Likewise.
* gcc.target/i386/pr69225-2.c: Likewise.
* gcc.target/i386/pr69225-3.c: Likewise.
* gcc.target/i386/pr69225-4.c: Likewise.
* gcc.target/i386/pr69225-5.c: Likewise.
* gcc.target/i386/pr69225-6.c: Likewise.
* gcc.target/i386/pr69225-7.c: Likewise.
* gcc.target/i386/pr96562-1.c: Likewise.
* gcc.target/riscv/rv32e_stack.c: Likewise.
* gfortran.dg/c-interop/removed-restrictions-3.f90: Likewise.
* gnat.dg/renaming1.adb: Likewise.

Diff:
---
 gcc/testsuite/g++.dg/pr48633.C | 2 +-
 gcc/testsuite/g++.dg/pr96707.C | 2 +-
 gcc/testsuite/g++.target/i386/mv28.C   | 2 +-
 gcc/testsuite/gcc.dg/Warray-bounds-flex-arrays-1.c | 2 +-
 gcc/testsuite/gcc.dg/pr83072-2.c   | 2 +-
 gcc/testsuite/gcc.dg/pr83073.c | 2 +-
 gcc/testsuite/gcc.dg/pr96542.c | 2 +-
 gcc/testsuite/gcc.dg/pr97567-2.c   | 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16-11a.c | 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16-13.c  | 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16-14.c  | 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16-conjugation-1.c   | 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16-neg-1a.c  | 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c | 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16vl-conjugation-1.c | 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1a.c| 2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c   | 2 +-
 gcc/testsuite/gcc.target/i386/avx512vlfp16-11a.c   | 2 +-
 gcc/testsuite/gcc.target/i386/pr69225-1.c  | 2 +-
 gcc/testsuite/gcc.target/i386/pr69225-2.c  | 2 +-
 gcc/testsuite/gcc.target/i386/pr69225-3.c  | 2 +-
 gcc/testsuite/gcc.target/i386/pr69225-4.c  | 2 +-
 gcc/testsuite/gcc.target/i386/pr69225-5.c  | 2 +-
 gcc/testsuite/gcc.target/i386/pr69225-6.c  | 2 +-
 gcc/testsuite/gcc.target/i386/pr69225-7.c  | 2 +-
 gcc/testsuite/gcc.target/i386/pr96562-1.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/rv32e_stack.c   | 2 +-
 gcc/testsuite/gfortran.dg/c-interop/removed-restrictions-3.f90 | 2 +-
 gcc/testsuite/gnat.dg/renaming1.adb| 2 +-
 29 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/gcc/testsuite/g++.dg/pr48633.C b/gcc/testsuite/g++.dg/pr48633.C
index 90f053a74c88..efcdab02acbd 100644
--- a/gcc/testsuite/g++.dg/pr48633.C
+++ b/gcc/testsuite/g++.dg/pr48633.C
@@ -1,4 +1,4 @@
-/* { dg-do compile} */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fira-region=all -fnon-call-exceptions" } */
 extern long double getme (void);
 extern void useme (long double);
diff --git a/gcc/testsuite/g++.dg/pr96707.C b/gcc/testsuite/g++.dg/pr96707.C
index 2653fe3d0431..868ee416e269 100644
--- a/gcc/testsuite/g++.dg/pr96707.C
+++ b/gcc

[gcc r15-2413] testsuite: fix whitespace in dg-do preprocess directive

2024-07-30 Thread Sam James via Gcc-cvs
https://gcc.gnu.org/g:7f1aa73bde0babde0ed3ff58d7226b86d25d075d

commit r15-2413-g7f1aa73bde0babde0ed3ff58d7226b86d25d075d
Author: Sam James 
Date:   Tue Jul 30 12:21:42 2024 +0100

testsuite: fix whitespace in dg-do preprocess directive

PR preprocessor/90581
* c-c++-common/cpp/fmax-include-depth.c: Fix whitespace in dg 
directive.

Diff:
---
 gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c 
b/gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c
index bd8cc3adcdd7..134c29805c89 100644
--- a/gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c
+++ b/gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c
@@ -1,4 +1,4 @@
-/* { dg-do preprocess} */
+/* { dg-do preprocess } */
 /* { dg-options "-fmax-include-depth=1" } */
 
 #include "fmax-include-depth-1b.h" /* { dg-error ".include nested depth 1 
exceeds maximum of 1 .use -fmax-include-depth=DEPTH to increase the maximum." } 
*/


[gcc r15-2414] testsuite: fix whitespace in dg-do assemble directive

2024-07-30 Thread Sam James via Gcc-cvs
https://gcc.gnu.org/g:2d105efd6f60dce4d57380cf9820a4dd52cc8abb

commit r15-2414-g2d105efd6f60dce4d57380cf9820a4dd52cc8abb
Author: Sam James 
Date:   Tue Jul 30 17:10:01 2024 +0100

testsuite: fix whitespace in dg-do assemble directive

* gcc.target/aarch64/simd/vmmla.c: Fix whitespace in dg directive.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/simd/vmmla.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c 
b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c
index 5eec2b5cfb96..777decc56a20 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c
@@ -1,4 +1,4 @@
-/* { dg-do assemble} */
+/* { dg-do assemble } */
 /* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
 /* { dg-additional-options "-march=armv8.2-a+i8mm" } */


[gcc r15-2415] libbacktrace: fix syntax of Windows registration functions

2024-07-30 Thread Ian Lance Taylor via Gcc-cvs
https://gcc.gnu.org/g:37aa98f79a7fbad620c0318a48552b5442a49456

commit r15-2415-g37aa98f79a7fbad620c0318a48552b5442a49456
Author: Ian Lance Taylor 
Date:   Tue Jul 30 09:25:03 2024 -0700

libbacktrace: fix syntax of Windows registration functions

Adjust the syntax to keep MSVC happy.

Fixes https://github.com/ianlancetaylor/libbacktrace/issues/131

* pecoff.c (LDR_DLL_NOTIFICATION): Put function modifier
inside parentheses.
(LDR_REGISTER_FUNCTION): Likewise.

Diff:
---
 libbacktrace/pecoff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libbacktrace/pecoff.c b/libbacktrace/pecoff.c
index 636e1b11296b..ccd5ccbce2ce 100644
--- a/libbacktrace/pecoff.c
+++ b/libbacktrace/pecoff.c
@@ -83,10 +83,10 @@ struct dll_notification_data
 #define LDR_DLL_NOTIFICATION_REASON_LOADED 1
 
 typedef LONG NTSTATUS;
-typedef VOID CALLBACK (*LDR_DLL_NOTIFICATION)(ULONG,
+typedef VOID (CALLBACK *LDR_DLL_NOTIFICATION)(ULONG,
  struct dll_notification_data*,
  PVOID);
-typedef NTSTATUS NTAPI (*LDR_REGISTER_FUNCTION)(ULONG,
+typedef NTSTATUS (NTAPI *LDR_REGISTER_FUNCTION)(ULONG,
LDR_DLL_NOTIFICATION, PVOID,
PVOID*);
 #endif


[gcc r15-2416] gimple ssa: Teach switch conversion to optimize powers of 2 switches

2024-07-30 Thread Filip Kastl via Gcc-cvs
https://gcc.gnu.org/g:2b3533cd871f62923e7a4f06a826f37bf0f35c5c

commit r15-2416-g2b3533cd871f62923e7a4f06a826f37bf0f35c5c
Author: Filip Kastl 
Date:   Tue Jul 30 18:40:29 2024 +0200

gimple ssa: Teach switch conversion to optimize powers of 2 switches

Sometimes a switch has case numbers that are powers of 2.  Switch
conversion usually isn't able to optimize these switches.  This patch
adds "exponential index transformation" to switch conversion.  After
switch conversion applies this transformation on the switch the index
variable of the switch becomes the exponent instead of the whole value.
For example:

switch (i)
  {
case (1 << 0): return 0;
case (1 << 1): return 1;
case (1 << 2): return 2;
...
case (1 << 30): return 30;
default: return 31;
  }

gets transformed roughly into

switch (log2(i))
  {
case 0: return 0;
case 1: return 1;
case 2: return 2;
...
case 30: return 30;
default: return 31;
  }

This enables switch conversion to further optimize the switch.

This patch only enables this transformation if there are optabs for FFS
so that the base 2 logarithm can be computed efficiently at runtime.

gcc/ChangeLog:

* tree-switch-conversion.cc (can_log2): New static function to
check if gen_log2 can be used on current target.
(gen_log2): New static function to generate efficient GIMPLE
code for taking an exact base 2 log.
(gen_pow2p): New static function to generate efficient GIMPLE
code for checking if a value is a power of 2.
(switch_conversion::switch_conversion): Track if the
transformation happened.
(switch_conversion::is_exp_index_transform_viable): New function
to decide whether the transformation should be applied.
(switch_conversion::exp_index_transform): New function to
execute the transformation.
(switch_conversion::gen_inbound_check): Don't remove the default
BB if the transformation happened.
(switch_conversion::expand): Execute the transform if it is
viable.  Skip the "sufficiently small case range" test if the
transformation is going to be executed.
* tree-switch-conversion.h: Add is_exp_index_transform_viable
and exp_index_transform.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/switch-3.c: Disable switch conversion.
* gcc.target/i386/switch-exp-transform-1.c: New test.
* gcc.target/i386/switch-exp-transform-2.c: New test.
* gcc.target/i386/switch-exp-transform-3.c: New test.

Signed-off-by: Filip Kastl 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/switch-3.c   |   2 +-
 .../gcc.target/i386/switch-exp-transform-1.c   |  32 ++
 .../gcc.target/i386/switch-exp-transform-2.c   |  35 +++
 .../gcc.target/i386/switch-exp-transform-3.c   | 148 ++
 gcc/tree-switch-conversion.cc  | 326 -
 gcc/tree-switch-conversion.h   |  18 ++
 6 files changed, 555 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/switch-3.c
index 44981e1d1861..83aae3843e91 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/switch-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-3.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fdump-tree-switchlower1" } */
+/* { dg-options "-O2 -fdump-tree-switchlower1 -fdisable-tree-switchconv" } */
 
 int cipher_to_alg(int cipher)
 {
diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c 
b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
new file mode 100644
index ..53d31460ba37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-switchconv -mpopcnt -mbmi" } */
+
+/* Checks that exponential index transform enables switch conversion to convert
+   this switch into an array lookup.  Also checks that the "index variable is a
+   power of two" check has been generated.  */
+
+int foo(unsigned bar)
+{
+switch (bar)
+{
+case (1 << 0):
+return 1;
+case (1 << 1):
+return 2;
+case (1 << 2):
+return 3;
+case (1 << 3):
+return 4;
+case (1 << 4):
+return 8;
+case (1 << 5):
+return 13;
+case (1 << 6):
+return 21;
+default:
+return 0;
+}
+}
+
+/* { dg-final { scan-tree-dump "CSWTCH" "switchconv" } } */
+/* { dg-final { scan-tree-dump "POPCOUNT" "switchconv" } } */
diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-2.c 
b/gcc/testsuite/

[gcc r15-2417] testsuite: fix whitespace in dg-require-effective-target directives

2024-07-30 Thread Sam James via Gcc-cvs
https://gcc.gnu.org/g:ee12a13d25778a1ad8a9b5dc63aadf9f4320088b

commit r15-2417-gee12a13d25778a1ad8a9b5dc63aadf9f4320088b
Author: Sam James 
Date:   Tue Jul 30 17:23:08 2024 +0100

testsuite: fix whitespace in dg-require-effective-target directives

PR middle-end/54400
PR target/98161
* gcc.dg/vect/bb-slp-layout-18.c: Fix whitespace in dg directive.
* gcc.dg/vect/bb-slp-pr54400.c: Likewise.
* gcc.target/i386/pr98161.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c | 2 +-
 gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c   | 2 +-
 gcc/testsuite/gcc.target/i386/pr98161.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c
index ff4627225074..ebbf9d2da7ca 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target vect_float} */
+/* { dg-require-effective-target vect_float } */
 /* { dg-additional-options "-w -Wno-psabi -ffast-math" } */
 
 typedef float v4sf __attribute__((vector_size(sizeof(float)*4)));
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c
index 6ecd51103ed8..745e3ced70ea 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c
@@ -1,4 +1,4 @@
-/* { dg-require-effective-target vect_float} */
+/* { dg-require-effective-target vect_float } */
 /* { dg-additional-options "-w -Wno-psabi -ffast-math" } */
 
 #include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.target/i386/pr98161.c 
b/gcc/testsuite/gcc.target/i386/pr98161.c
index 5825b9bd1dbb..8ea93325214f 100644
--- a/gcc/testsuite/gcc.target/i386/pr98161.c
+++ b/gcc/testsuite/gcc.target/i386/pr98161.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -msse4" } */
-/* { dg-require-effective-target sse4} */
+/* { dg-require-effective-target sse4 } */
 
 typedef unsigned short u16;
 typedef unsigned int   u32;


[gcc r15-2418] RISC-V: Add configure check for B extention support

2024-07-30 Thread Edwin Lu via Gcc-cvs
https://gcc.gnu.org/g:7ef8a9d4b1cea3fea3791859074df79b71abd549

commit r15-2418-g7ef8a9d4b1cea3fea3791859074df79b71abd549
Author: Edwin Lu 
Date:   Wed Jul 24 16:37:18 2024 -0700

RISC-V: Add configure check for B extention support

Binutils 2.42 and before don't recognize the b extension in the march
strings even though it supports zba_zbb_zbs. Add a configure check to
ignore the b in the march string if found.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc 
(riscv_subset_list::to_string):
Skip b in march string
* config.in: Regenerate.
* configure: Regenerate.
* configure.ac: Add B assembler check

Signed-off-by: Edwin Lu 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc |  8 
 gcc/config.in   |  6 ++
 gcc/configure   | 31 +++
 gcc/configure.ac|  5 +
 4 files changed, 50 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 0c12e12cde51..1944c7785c48 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -858,6 +858,7 @@ riscv_subset_list::to_string (bool version_p) const
   bool skip_zifencei = false;
   bool skip_zaamo_zalrsc = false;
   bool skip_zicsr = false;
+  bool skip_b = false;
   bool i2p0 = false;
 
   /* For RISC-V ISA version 2.2 or earlier version, zicsr and zifencei is
@@ -890,6 +891,10 @@ riscv_subset_list::to_string (bool version_p) const
  for users with an older version of binutils.  */
   skip_zaamo_zalrsc = true;
 #endif
+#ifndef HAVE_AS_MARCH_B
+  /* Skip since binutils 2.42 and earlier don't recognize b.  */
+  skip_b = true;
+#endif
 
   for (subset = m_head; subset != NULL; subset = subset->next)
 {
@@ -907,6 +912,9 @@ riscv_subset_list::to_string (bool version_p) const
   if (skip_zaamo_zalrsc && subset->name == "zalrsc")
continue;
 
+  if (skip_b && subset->name == "b")
+   continue;
+
   /* For !version_p, we only separate extension with underline for
 multi-letter extension.  */
   if (!first &&
diff --git a/gcc/config.in b/gcc/config.in
index 3af153eaec5c..7fcabbe5061d 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -629,6 +629,12 @@
 #endif
 
 
+/* Define if the assembler understands -march=rv*_b. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_MARCH_B
+#endif
+
+
 /* Define if the assembler understands -march=rv*_zaamo_zalrsc. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_MARCH_ZAAMO_ZALRSC
diff --git a/gcc/configure b/gcc/configure
index 7541bdeb7248..557ea5fa3ac9 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -30882,6 +30882,37 @@ if test $gcc_cv_as_riscv_march_zaamo_zalrsc = yes; then
 
 $as_echo "#define HAVE_AS_MARCH_ZAAMO_ZALRSC 1" >>confdefs.h
 
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
-march=rv32i_b support" >&5
+$as_echo_n "checking assembler for -march=rv32i_b support... " >&6; }
+if ${gcc_cv_as_riscv_march_b+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_riscv_march_b=no
+  if test x$gcc_cv_as != x; then
+$as_echo '' > conftest.s
+if { ac_try='$gcc_cv_as $gcc_cv_as_flags -march=rv32i_b -o conftest.o 
conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then
+   gcc_cv_as_riscv_march_b=yes
+else
+  echo "configure: failed program was" >&5
+  cat conftest.s >&5
+fi
+rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_riscv_march_b" >&5
+$as_echo "$gcc_cv_as_riscv_march_b" >&6; }
+if test $gcc_cv_as_riscv_march_b = yes; then
+
+$as_echo "#define HAVE_AS_MARCH_B 1" >>confdefs.h
+
 fi
 
 ;;
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 52c1780379d5..eaa01d0d7e56 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5461,6 +5461,11 @@ configured with --enable-newlib-nano-formatted-io.])
   [-march=rv32i_zaamo_zalrsc],,,
   [AC_DEFINE(HAVE_AS_MARCH_ZAAMO_ZALRSC, 1,
 [Define if the assembler understands 
-march=rv*_zaamo_zalrsc.])])
+gcc_GAS_CHECK_FEATURE([-march=rv32i_b support],
+  gcc_cv_as_riscv_march_b,
+  [-march=rv32i_b],,,
+  [AC_DEFINE(HAVE_AS_MARCH_B, 1,
+[Define if the assembler understands -march=rv*_b.])])
 ;;
 loongarch*-*-*)
 gcc_GAS_CHECK_FEATURE([.dtprelword support],


[gcc r15-2419] i386/testsuite: Add testcase for fixed PR [PR51492]

2024-07-30 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:8b737ec289da83e9e2a9672be0336980616e8932

commit r15-2419-g8b737ec289da83e9e2a9672be0336980616e8932
Author: Uros Bizjak 
Date:   Tue Jul 30 20:02:36 2024 +0200

i386/testsuite: Add testcase for fixed PR [PR51492]

PR target/51492

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr51492.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr51492.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr51492.c 
b/gcc/testsuite/gcc.target/i386/pr51492.c
new file mode 100644
index ..0892e0c79a7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr51492.c
@@ -0,0 +1,19 @@
+/* PR target/51492 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define SIZE 65536
+#define WSIZE 64
+unsigned short head[SIZE] __attribute__((aligned(64)));
+
+void
+f(void)
+{
+  for (unsigned n = 0; n < SIZE; ++n) {
+unsigned short m = head[n];
+head[n] = (unsigned short)(m >= WSIZE ? m-WSIZE : 0);
+  }
+}
+
+/* { dg-final { scan-assembler "psubusw" } } */
+/* { dg-final { scan-assembler-not "paddw" } } */


[gcc r15-2420] libstdc++: Fix name of source file in comment

2024-07-30 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:df67f383d8f6a0fc6820510b500ea74ddde5001d

commit r15-2420-gdf67f383d8f6a0fc6820510b500ea74ddde5001d
Author: Jonathan Wakely 
Date:   Tue Jul 30 15:42:04 2024 +0100

libstdc++: Fix name of source file in comment

libstdc++-v3/ChangeLog:

* src/c++17/fs_ops.cc: Fix file name in comment.

Diff:
---
 libstdc++-v3/src/c++17/fs_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc
index 7ffdce67782a..9606afa9f1f7 100644
--- a/libstdc++-v3/src/c++17/fs_ops.cc
+++ b/libstdc++-v3/src/c++17/fs_ops.cc
@@ -851,7 +851,7 @@ namespace
 #endif
 
 #ifdef _GLIBCXX_HAVE_SYS_STAT_H
-#ifdef NEED_DO_COPY_FILE // Only define this once, not in cow-ops.o too
+#ifdef NEED_DO_COPY_FILE // Only define this once, not in cow-fs_ops.o too
 bool
 fs::equiv_files([[maybe_unused]] const char_type* p1, const stat_type& st1,
[[maybe_unused]] const char_type* p2, const stat_type& st2,


[gcc r15-2421] testsuite: fix 'dg-compile' typos

2024-07-30 Thread Sam James via Gcc-cvs
https://gcc.gnu.org/g:acc70606c59e3f14072cc8a164362e728d8df5d6

commit r15-2421-gacc70606c59e3f14072cc8a164362e728d8df5d6
Author: Sam James 
Date:   Tue Jul 30 20:04:40 2024 +0100

testsuite: fix 'dg-compile' typos

'dg-compile' is not a thing, replace it with 'dg-do compile'.

PR target/68015
PR c++/83979
* c-c++-common/goacc/loop-shape.c: Fix 'dg-compile' typo.
* g++.dg/pr83979.C: Likewise.
* g++.target/aarch64/sve/acle/general-c++/attributes_2.C: Likewise.
* gcc.dg/tree-ssa/builtin-sprintf-7.c: Likewise.
* gcc.dg/tree-ssa/builtin-sprintf-8.c: Likewise.
* gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-char.c: Likewise.
* gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-short.c: Likewise.
* gcc.target/s390/20181024-1.c: Likewise.
* gcc.target/s390/addr-constraints-1.c: Likewise.
* gcc.target/s390/arch12/aghsghmgh-1.c: Likewise.
* gcc.target/s390/arch12/mul-1.c: Likewise.
* gcc.target/s390/arch13/bitops-1.c: Likewise.
* gcc.target/s390/arch13/bitops-2.c: Likewise.
* gcc.target/s390/arch13/fp-signedint-convert-1.c: Likewise.
* gcc.target/s390/arch13/fp-unsignedint-convert-1.c: Likewise.
* gcc.target/s390/arch13/popcount-1.c: Likewise.
* gcc.target/s390/pr68015.c: Likewise.
* gcc.target/s390/vector/fp-signedint-convert-1.c: Likewise.
* gcc.target/s390/vector/fp-unsignedint-convert-1.c: Likewise.
* gcc.target/s390/vector/reverse-elements-1.c: Likewise.
* gcc.target/s390/vector/reverse-elements-2.c: Likewise.
* gcc.target/s390/vector/reverse-elements-3.c: Likewise.
* gcc.target/s390/vector/reverse-elements-4.c: Likewise.
* gcc.target/s390/vector/reverse-elements-5.c: Likewise.
* gcc.target/s390/vector/reverse-elements-6.c: Likewise.
* gcc.target/s390/vector/reverse-elements-7.c: Likewise.
* gnat.dg/alignment15.adb: Likewise.
* gnat.dg/debug4.adb: Likewise.
* gnat.dg/inline21.adb: Likewise.
* gnat.dg/inline22.adb: Likewise.
* gnat.dg/opt37.adb: Likewise.
* gnat.dg/warn13.adb: Likewise.

Diff:
---
 gcc/testsuite/c-c++-common/goacc/loop-shape.c| 2 +-
 gcc/testsuite/g++.dg/pr83979.C   | 2 +-
 gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/attributes_2.C | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-7.c| 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-8.c| 2 +-
 gcc/testsuite/gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-char.c| 2 +-
 gcc/testsuite/gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-short.c   | 2 +-
 gcc/testsuite/gcc.target/s390/20181024-1.c   | 2 +-
 gcc/testsuite/gcc.target/s390/addr-constraints-1.c   | 2 +-
 gcc/testsuite/gcc.target/s390/arch12/aghsghmgh-1.c   | 2 +-
 gcc/testsuite/gcc.target/s390/arch12/mul-1.c | 2 +-
 gcc/testsuite/gcc.target/s390/arch13/bitops-1.c  | 2 +-
 gcc/testsuite/gcc.target/s390/arch13/bitops-2.c  | 2 +-
 gcc/testsuite/gcc.target/s390/arch13/fp-signedint-convert-1.c| 2 +-
 gcc/testsuite/gcc.target/s390/arch13/fp-unsignedint-convert-1.c  | 2 +-
 gcc/testsuite/gcc.target/s390/arch13/popcount-1.c| 2 +-
 gcc/testsuite/gcc.target/s390/pr68015.c  | 2 +-
 gcc/testsuite/gcc.target/s390/vector/fp-signedint-convert-1.c| 2 +-
 gcc/testsuite/gcc.target/s390/vector/fp-unsignedint-convert-1.c  | 2 +-
 gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c| 2 +-
 gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c| 2 +-
 gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c| 2 +-
 gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c| 2 +-
 gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c| 2 +-
 gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c| 2 +-
 gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c| 2 +-
 gcc/testsuite/gnat.dg/alignment15.adb| 2 +-
 gcc/testsuite/gnat.dg/debug4.adb | 2 +-
 gcc/testsuite/gnat.dg/inline21.adb   | 2 +-
 gcc/testsuite/gnat.dg/inline22.adb   | 2 +-
 gcc/testsuite/gnat.dg/opt37.adb  | 2 +-
 gcc/testsuite/gnat.dg/warn13.adb | 2 +-
 32 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/goacc/loop-shape.c 
b/gcc/testsuite/c-c++-common/goacc/loop-shape.c
index 9708f7bf5eb3..b3199b4044

[gcc(refs/users/meissner/heads/work174)] Add more processors to arch flags.

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:48f7791835b495c254452a20d4d06bc9bedd7cae

commit 48f7791835b495c254452a20d4d06bc9bedd7cae
Author: Michael Meissner 
Date:   Tue Jul 30 16:45:47 2024 -0400

Add more processors to arch flags.

2024-07-30  Michael Meissner  

* config/rs6000/rs6000-arch.def: Add support for 476, a2, and cell
processors.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Use arch
flags for power4, power5, and power6.
* config/rs6000/rs6000.cc (get_arch_flags): Add 476, a2, and cell
processors.

Diff:
---
 gcc/config/rs6000/rs6000-arch.def | 3 +++
 gcc/config/rs6000/rs6000-c.cc | 6 +++---
 gcc/config/rs6000/rs6000.cc   | 4 
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
index 6725736076da..96f176775b59 100644
--- a/gcc/config/rs6000/rs6000-arch.def
+++ b/gcc/config/rs6000/rs6000-arch.def
@@ -37,6 +37,9 @@
the appropriate architecture flags based on the actual processor
enumeration.  */
 
+ARCH_EXPAND(PPC476,  "476")
+ARCH_EXPAND(PPCA2,   "a2")
+ARCH_EXPAND(CELL,"cell")
 ARCH_EXPAND(POWER4,  "power4")
 ARCH_EXPAND(POWER5,  "power5")
 ARCH_EXPAND(POWER6,  "power6")
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index a8a6a956874f..27f18f48e837 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -420,13 +420,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR");
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");
-  if ((flags & OPTION_MASK_MFCRF) != 0)
+  if ((arch_flags & ARCH_MASK_POWER4) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR4");
-  if ((flags & OPTION_MASK_POPCNTB) != 0)
+  if ((arch_flags & ARCH_MASK_POWER5) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5");
   if ((flags & OPTION_MASK_FPRND) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X");
-  if ((flags & OPTION_MASK_CMPB) != 0)
+  if ((arch_flags & ARCH_MASK_POWER6) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6");
   if ((arch_flags & ARCH_MASK_POWER7) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 19adc66cc801..f9ccaa67e619 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1851,14 +1851,18 @@ get_arch_flags (int cpu_index)
/* fall through.  */
 
   case PROCESSOR_POWER6:
+  case PROCESSOR_PPCA2:
ret |= ARCH_MASK_POWER6;
/* fall through.  */
 
+  case PROCESSOR_PPC476:
   case PROCESSOR_POWER5:
ret |= ARCH_MASK_POWER5;
/* fall through.  */
 
+  case PROCESSOR_CELL:
   case PROCESSOR_POWER4:
+  case PROCESSOR_PPCE6500:
ret |= ARCH_MASK_POWER4;
break;


[gcc(refs/users/meissner/heads/work174)] Update ChangeLog.*

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a10a34fe1cb8d4c96f1888942e52f1a4216cb7f3

commit a10a34fe1cb8d4c96f1888942e52f1a4216cb7f3
Author: Michael Meissner 
Date:   Tue Jul 30 16:46:54 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 13 +
 1 file changed, 13 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 58ffd411e22d..16ad0870ec3e 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,3 +1,16 @@
+ Branch work174, patch #7 
+
+Add more processors to arch flags.
+
+2024-07-30  Michael Meissner  
+
+   * config/rs6000/rs6000-arch.def: Add support for 476, a2, and cell
+   processors.
+   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Use arch
+   flags for power4, power5, and power6.
+   * config/rs6000/rs6000.cc (get_arch_flags): Add 476, a2, and cell
+   processors.
+
  Branch work174, patch #6 
 
 Update tests to work with architecture flags changes.


[gcc(refs/users/meissner/heads/work174)] Revert changes

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b0847289e8894dd1069958bd902bcd66df5c0c71

commit b0847289e8894dd1069958bd902bcd66df5c0c71
Author: Michael Meissner 
Date:   Tue Jul 30 16:51:34 2024 -0400

Revert changes

Diff:
---
 gcc/ChangeLog.meissner| 13 +
 gcc/config/rs6000/rs6000-arch.def |  3 ---
 gcc/config/rs6000/rs6000-c.cc |  6 +++---
 gcc/config/rs6000/rs6000.cc   |  4 
 4 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 16ad0870ec3e..0f6cd95f9269 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,15 +1,4 @@
- Branch work174, patch #7 
-
-Add more processors to arch flags.
-
-2024-07-30  Michael Meissner  
-
-   * config/rs6000/rs6000-arch.def: Add support for 476, a2, and cell
-   processors.
-   * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Use arch
-   flags for power4, power5, and power6.
-   * config/rs6000/rs6000.cc (get_arch_flags): Add 476, a2, and cell
-   processors.
+ Branch work174, patch #7 was reverted 
 
  Branch work174, patch #6 
 
diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
index 96f176775b59..6725736076da 100644
--- a/gcc/config/rs6000/rs6000-arch.def
+++ b/gcc/config/rs6000/rs6000-arch.def
@@ -37,9 +37,6 @@
the appropriate architecture flags based on the actual processor
enumeration.  */
 
-ARCH_EXPAND(PPC476,  "476")
-ARCH_EXPAND(PPCA2,   "a2")
-ARCH_EXPAND(CELL,"cell")
 ARCH_EXPAND(POWER4,  "power4")
 ARCH_EXPAND(POWER5,  "power5")
 ARCH_EXPAND(POWER6,  "power6")
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 27f18f48e837..a8a6a956874f 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -420,13 +420,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR");
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64");
-  if ((arch_flags & ARCH_MASK_POWER4) != 0)
+  if ((flags & OPTION_MASK_MFCRF) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR4");
-  if ((arch_flags & ARCH_MASK_POWER5) != 0)
+  if ((flags & OPTION_MASK_POPCNTB) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5");
   if ((flags & OPTION_MASK_FPRND) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X");
-  if ((arch_flags & ARCH_MASK_POWER6) != 0)
+  if ((flags & OPTION_MASK_CMPB) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6");
   if ((arch_flags & ARCH_MASK_POWER7) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f9ccaa67e619..19adc66cc801 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1851,18 +1851,14 @@ get_arch_flags (int cpu_index)
/* fall through.  */
 
   case PROCESSOR_POWER6:
-  case PROCESSOR_PPCA2:
ret |= ARCH_MASK_POWER6;
/* fall through.  */
 
-  case PROCESSOR_PPC476:
   case PROCESSOR_POWER5:
ret |= ARCH_MASK_POWER5;
/* fall through.  */
 
-  case PROCESSOR_CELL:
   case PROCESSOR_POWER4:
-  case PROCESSOR_PPCE6500:
ret |= ARCH_MASK_POWER4;
break;


[gcc r15-2422] libstdc++: Implement LWG 3886 for std::optional and std::expected

2024-07-30 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:a9e472c6b748abde55b5ecde2e2d98dcb2f96ded

commit r15-2422-ga9e472c6b748abde55b5ecde2e2d98dcb2f96ded
Author: Jonathan Wakely 
Date:   Thu Jul 25 23:08:14 2024 +0100

libstdc++: Implement LWG 3886 for std::optional and std::expected

This uses remove_cv_t for the default template argument used for
deducing a type for a braced-init-list used with std::optional and
std::expected.

libstdc++-v3/ChangeLog:

* include/std/expected (expected(U&&), operator=(U&&))
(value_or): Use remove_cv_t on default template argument, as per
LWG 3886.
* include/std/optional (optional(U&&), operator=(U&&))
(value_or): Likewise.
* testsuite/20_util/expected/lwg3886.cc: New test.
* testsuite/20_util/optional/cons/lwg3886.cc: New test.

Diff:
---
 libstdc++-v3/include/std/expected  |  8 +--
 libstdc++-v3/include/std/optional  | 12 ++---
 libstdc++-v3/testsuite/20_util/expected/lwg3886.cc | 58 ++
 .../testsuite/20_util/optional/cons/lwg3886.cc | 58 ++
 4 files changed, 126 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/include/std/expected 
b/libstdc++-v3/include/std/expected
index 515a1e6ab8f5..b8217e577fa3 100644
--- a/libstdc++-v3/include/std/expected
+++ b/libstdc++-v3/include/std/expected
@@ -468,7 +468,7 @@ namespace __expected
  std::move(__x)._M_unex);
}
 
-  template
+  template>
requires (!is_same_v, expected>)
  && (!is_same_v, in_place_t>)
  && is_constructible_v<_Tp, _Up>
@@ -582,7 +582,7 @@ namespace __expected
return *this;
   }
 
-  template
+  template>
requires (!is_same_v>)
  && (!__expected::__is_unexpected>)
  && is_constructible_v<_Tp, _Up> && is_assignable_v<_Tp&, _Up>
@@ -818,7 +818,7 @@ namespace __expected
return std::move(_M_unex);
   }
 
-  template
+  template>
constexpr _Tp
value_or(_Up&& __v) const &
noexcept(__and_v,
@@ -832,7 +832,7 @@ namespace __expected
  return static_cast<_Tp>(std::forward<_Up>(__v));
}
 
-  template
+  template>
constexpr _Tp
value_or(_Up&& __v) &&
noexcept(__and_v,
diff --git a/libstdc++-v3/include/std/optional 
b/libstdc++-v3/include/std/optional
index 4694d594f98a..2c4cc260f90e 100644
--- a/libstdc++-v3/include/std/optional
+++ b/libstdc++-v3/include/std/optional
@@ -868,7 +868,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Converting constructors for engaged optionals.
 #ifdef _GLIBCXX_USE_CONSTRAINTS_FOR_OPTIONAL
-  template
+  template>
requires (!is_same_v>)
  && (!is_same_v>)
  && is_constructible_v<_Tp, _Up>
@@ -919,7 +919,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
: _Base(std::in_place, __il, std::forward<_Args>(__args)...)
{ }
 #else
-  template,
   _Requires<__not_self<_Up>, __not_tag<_Up>,
 is_constructible<_Tp, _Up>,
 is_convertible<_Up, _Tp>,
@@ -929,7 +929,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
noexcept(is_nothrow_constructible_v<_Tp, _Up>)
: _Base(std::in_place, std::forward<_Up>(__t)) { }
 
-  template,
   _Requires<__not_self<_Up>, __not_tag<_Up>,
 is_constructible<_Tp, _Up>,
 __not_>,
@@ -1017,7 +1017,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return *this;
   }
 
-  template
+  template>
 #ifdef _GLIBCXX_USE_CONSTRAINTS_FOR_OPTIONAL
requires (!is_same_v>)
  && (!(is_scalar_v<_Tp> && is_same_v<_Tp, decay_t<_Up>>))
@@ -1242,7 +1242,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_bad_optional_access();
   }
 
-  template
+  template>
constexpr _Tp
value_or(_Up&& __u) const&
{
@@ -1255,7 +1255,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return static_cast<_Tp>(std::forward<_Up>(__u));
}
 
-  template
+  template>
constexpr _Tp
value_or(_Up&& __u) &&
{
diff --git a/libstdc++-v3/testsuite/20_util/expected/lwg3886.cc 
b/libstdc++-v3/testsuite/20_util/expected/lwg3886.cc
new file mode 100644
index ..cf1a2ce4421f
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/expected/lwg3886.cc
@@ -0,0 +1,58 @@
+// { dg-do compile { target c++23 } }
+
+// LWG 3886. Monad mo' problems
+
+#include 
+
+void
+test_constructor()
+{
+  struct MoveOnly {
+MoveOnly(int, int) { }
+MoveOnly(MoveOnly&&) { }
+  };
+
+  // The {0,0} should be deduced as MoveOnly not const MoveOnly
+  [[maybe_unused]] std::expected e({0,0});
+}
+
+struct Tracker {
+  bool moved = false;
+  constexpr Tracker(int, int) { }
+  constexpr Tracker(const Tracker&) { }
+  constexpr Tracker(Tracker&&) : moved(true) { }
+
+  // The follow means that is_assignable is tr

[gcc r15-2423] libstdc++: Fix std::format output for std::chrono::zoned_time

2024-07-30 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:8f05ada7dfb9a40d4333a2aa9ccb5ddcdf8e2b06

commit r15-2423-g8f05ada7dfb9a40d4333a2aa9ccb5ddcdf8e2b06
Author: Jonathan Wakely 
Date:   Fri Jul 26 18:11:26 2024 +0100

libstdc++: Fix std::format output for std::chrono::zoned_time

When formatting a chrono::zoned_time with an empty chrono-specs, we were
only formatting its _M_time member, but the ostream insertion operator
uses the format "{:L%F %T %Z}" which includes the time zone
abbreviation. The %Z should also be used when formatting with an empty
chrono-specs.

This commit makes _M_format_to_ostream handle __local_time_fmt
specializations directly, rather than calling itself recursively to
format the _M_time member. We need to be able to customize the output of
_M_format_to_ostream for __local_time_fmt, because we use that type for
gps_time and tai_time as well as for zoned_time and __local_time_fmt.
When formatting gps_time and tai_time we don't want to include the time
zone abbreviation in the "{}" output, but for zoned_time we do want to.
We can reuse the __is_neg flag passed to _M_format_to_ostream (via
_M_format) to say that we want the time zone abbreviation.  Currently
the __is_neg flag is only used for duration specializations, so it's
available for __local_time_fmt to use.

In addition to fixing the zoned_time output to use %Z, this commit also
changes the __local_time_fmt output to use %Z. Previously it didn't use
it, just like zoned_time.  The standard doesn't actually say how to
format local-time-format-t for an empty chrono-specs, but this behaviour
seems sensible and is what I'm proposing as part of LWG 4124.

While testing this I noticed that some chrono types were not being
tested with empty chrono-specs, so this adds more tests. I also noticed
that std/time/clock/local/io.cc was testing tai_time instead of
local_time, which was completely wrong. That's fixed now too.

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (__local_fmt_t): Remove unused
declaration.
(__formatter_chrono::_M_format_to_ostream): Add explicit
handling for specializations of __local_time_fmt, including the
time zone abbreviation in the output if __is_neg is true.
(formatter>::format): Add comment.
(formatter>::format): Likewise.
(formatter
 struct __local_time_fmt
 {
@@ -163,8 +164,6 @@ namespace __detail
   const string* _M_abbrev;
   const seconds* _M_offset_sec;
 };
-
-  struct __local_fmt_t;
 }
 /// @endcond
 
@@ -695,13 +694,34 @@ namespace __format
  using ::std::chrono::__detail::__utc_leap_second;
  using ::std::chrono::__detail::__local_time_fmt;
 
+ basic_ostringstream<_CharT> __os;
+ __os.imbue(_M_locale(__fc));
+
  if constexpr (__is_specialization_of<_Tp, __local_time_fmt>)
-   return _M_format_to_ostream(__t._M_time, __fc, false);
- else
{
- basic_ostringstream<_CharT> __os;
- __os.imbue(_M_locale(__fc));
+ // Format as "{:L%F %T}"
+ auto __days = chrono::floor(__t._M_time);
+ __os << chrono::year_month_day(__days) << ' '
+  << chrono::hh_mm_ss(__t._M_time - __days);
 
+ // For __local_time_fmt the __is_neg flags says whether to
+ // append " %Z" to the result.
+ if (__is_neg)
+   {
+ if (!__t._M_abbrev) [[unlikely]]
+   __format::__no_timezone_available();
+ else if constexpr (is_same_v<_CharT, char>)
+   __os << ' ' << *__t._M_abbrev;
+ else
+   {
+ __os << L' ';
+ for (char __c : *__t._M_abbrev)
+   __os << __c;
+   }
+   }
+   }
+ else
+   {
  if constexpr (__is_specialization_of<_Tp, __utc_leap_second>)
__os << __t._M_date << ' ' << __t._M_time;
  else if constexpr (chrono::__is_time_point_v<_Tp>)
@@ -727,11 +747,11 @@ namespace __format
  __os << _S_plus_minus[1];
  __os << __t;
}
-
- auto __str = std::move(__os).str();
- return __format::__write_padded_as_spec(__str, __str.size(),
- __fc, _M_spec);
}
+
+ auto __str = std::move(__os).str();
+ return __format::__write_padded_as_spec(__str, __str.size(),
+ __fc, _M_spec);
}
 
   static constexpr const _CharT* _S_chars
@@ -2008,6 +2028,8 @@ namespace __format
   _FormatContext& __fc) const
{
  // Convert to __local_time_fmt with abbrev "TAI" and offset 0s.
+

[gcc r15-2424] libstdc++: Fix formatter for low-resolution chrono::zoned_time (LWG 4124)

2024-07-30 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:4883c9571f5fb8fc7e873bb8a31aa164c5cfd0e0

commit r15-2424-g4883c9571f5fb8fc7e873bb8a31aa164c5cfd0e0
Author: Jonathan Wakely 
Date:   Mon Jul 29 12:52:40 2024 +0100

libstdc++: Fix formatter for low-resolution chrono::zoned_time (LWG 4124)

This implements the proposed resolution of LWG 4124, so that
low-resolution chrono::zoned_time objects can be formatted. The
formatter for zoned_time needs to account for get_local_time
returning local_time> not local_time.

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (__local_time_fmt_for): New alias
template.
(formatter>): Use __local_time_fmt_for.
* testsuite/std/time/zoned_time/io.cc: Check zoned_time
can be formatted.

Diff:
---
 libstdc++-v3/include/bits/chrono_io.h| 12 +---
 libstdc++-v3/testsuite/std/time/zoned_time/io.cc |  4 
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index e7e7deb2cde3..d8a4a121113c 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -164,6 +164,12 @@ namespace __detail
   const string* _M_abbrev;
   const seconds* _M_offset_sec;
 };
+
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 4124. Cannot format zoned_time with resolution coarser than seconds
+  template
+using __local_time_fmt_for
+  = __local_time_fmt>;
 }
 /// @endcond
 
@@ -2137,15 +2143,15 @@ namespace __format
 #if _GLIBCXX_USE_CXX11_ABI || ! _GLIBCXX_USE_DUAL_ABI
   template
 struct formatter, _CharT>
-: formatter, _CharT>
+: formatter, _CharT>
 {
   template
typename _FormatContext::iterator
format(const chrono::zoned_time<_Duration, _TimeZonePtr>& __tp,
   _FormatContext& __ctx) const
{
- using chrono::__detail::__local_time_fmt;
- using _Base = formatter<__local_time_fmt<_Duration>, _CharT>;
+ using _Ltf = chrono::__detail::__local_time_fmt_for<_Duration>;
+ using _Base = formatter<_Ltf, _CharT>;
  const chrono::sys_info __info = __tp.get_info();
  const auto __lf = chrono::local_time_format(__tp.get_local_time(),
  &__info.abbrev,
diff --git a/libstdc++-v3/testsuite/std/time/zoned_time/io.cc 
b/libstdc++-v3/testsuite/std/time/zoned_time/io.cc
index ee3b9edba810..c113eea6d3fe 100644
--- a/libstdc++-v3/testsuite/std/time/zoned_time/io.cc
+++ b/libstdc++-v3/testsuite/std/time/zoned_time/io.cc
@@ -66,6 +66,10 @@ test_format()
   ws = std::format(L"{:+^34}", zoned_time(zone, t));
   VERIFY( ws == L"++2022-12-19 12:26:25.708000 EST++" );
 #endif
+
+  // LWG 4124. Cannot format zoned_time with resolution coarser than seconds
+  s = std::format("{}", zoned_time(zone, 
time_point_cast(t)));
+  VERIFY( s == "2022-12-19 12:26:00 EST" );
 }
 
 int main()


[gcc(refs/users/meissner/heads/work174)] Remove arch flags power4 and power5. Use const HOST_WIDE_INT for arch masks.

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:816307fad902b82fd4ed6079b9113057b7496252

commit 816307fad902b82fd4ed6079b9113057b7496252
Author: Michael Meissner 
Date:   Tue Jul 30 18:05:54 2024 -0400

Remove arch flags power4 and power5.  Use const HOST_WIDE_INT for arch 
masks.

2024-07-30  Michael Meissner  

* config/rs6000/rs6000-arch.def: Remove power4 and power5 
architecture
masks.
* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Use arch 
flags
for power6.
* config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as 
const
HOST_WIDE_INT and not as an enumeratio.
* config/rs6000/rs6000.cc (get_arch_flags): Drop power4 and power5 
arch
flags support.
(rs6000_machine_from_flags): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-arch.def |  2 --
 gcc/config/rs6000/rs6000-c.cc |  2 +-
 gcc/config/rs6000/rs6000-opts.h   |  9 -
 gcc/config/rs6000/rs6000.cc   | 12 ++--
 4 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
index 6725736076da..d317200016a2 100644
--- a/gcc/config/rs6000/rs6000-arch.def
+++ b/gcc/config/rs6000/rs6000-arch.def
@@ -37,8 +37,6 @@
the appropriate architecture flags based on the actual processor
enumeration.  */
 
-ARCH_EXPAND(POWER4,  "power4")
-ARCH_EXPAND(POWER5,  "power5")
 ARCH_EXPAND(POWER6,  "power6")
 ARCH_EXPAND(POWER7,  "power7")
 ARCH_EXPAND(POWER8,  "power8")
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index a8a6a956874f..7d0b24b7c09e 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -426,7 +426,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5");
   if ((flags & OPTION_MASK_FPRND) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X");
-  if ((flags & OPTION_MASK_CMPB) != 0)
+  if ((arch_flags & ARCH_MASK_POWER6) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6");
   if ((arch_flags & ARCH_MASK_POWER7) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index 9a52a1d4b147..c7764e66cd03 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -80,15 +80,14 @@ enum {
   ARCH_ENUM_LAST
 };
 
-/* Create an architecture mask for the newer architectures (power7 and
+/* Create an architecture mask for the newer architectures (power6 and
up)..  */
 #undef  ARCH_EXPAND
-#define ARCH_EXPAND(PROC, NAME)ARCH_MASK_ ## PROC = 1 << ARCH_ENUM_ ## 
PROC,
+#define ARCH_EXPAND(PROC, NAME)
\
+  static const HOST_WIDE_INT ARCH_MASK_ ## PROC
\
+= HOST_WIDE_INT_1 << ARCH_ENUM_ ## PROC;
 
-enum {
 #include "rs6000-arch.def"
-  ARCH_MASK_ZERO   = 0
-};
 
 #undef ARCH_EXPAND
 
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 19adc66cc801..bccf30a44b20 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1852,14 +1852,6 @@ get_arch_flags (int cpu_index)
 
   case PROCESSOR_POWER6:
ret |= ARCH_MASK_POWER6;
-   /* fall through.  */
-
-  case PROCESSOR_POWER5:
-   ret |= ARCH_MASK_POWER5;
-   /* fall through.  */
-
-  case PROCESSOR_POWER4:
-   ret |= ARCH_MASK_POWER4;
break;
 
   default:
@@ -5937,9 +5929,9 @@ rs6000_machine_from_flags (void)
 return "power7";
   if ((arch_flags & ARCH_MASK_POWER6) != 0)
 return "power6";
-  if ((arch_flags & ARCH_MASK_POWER5) != 0)
+  if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
 return "power5";
-  if ((arch_flags & ARCH_MASK_POWER4) != 0)
+  if ((flags & ISA_2_1_MASKS) != 0)
 return "power4";
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 return "ppc64";


[gcc(refs/users/meissner/heads/work174)] Update ChangeLog.*

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:e2a3cfdcb3909f7893afeb36c366c4dba2306206

commit e2a3cfdcb3909f7893afeb36c366c4dba2306206
Author: Michael Meissner 
Date:   Tue Jul 30 18:07:02 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 16 
 1 file changed, 16 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 0f6cd95f9269..eb0b90a45dbe 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,3 +1,19 @@
+ Branch work174, patch #8 
+
+Remove arch flags power4 and power5.  Use const HOST_WIDE_INT for arch masks.
+
+2024-07-30  Michael Meissner  
+
+   * config/rs6000/rs6000-arch.def: Remove power4 and power5 architecture
+   masks.
+   * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Use arch flags
+   for power6.
+   * config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as const
+   HOST_WIDE_INT and not as an enumeratio.
+   * config/rs6000/rs6000.cc (get_arch_flags): Drop power4 and power5 arch
+   flags support.
+   (rs6000_machine_from_flags): Likewise.
+
  Branch work174, patch #7 was reverted 
 
  Branch work174, patch #6 


[gcc(refs/users/meissner/heads/work174)] Add a2 processor to arch flags.

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7d2ba2da50c107056bc9a5f589012d42ff1439de

commit 7d2ba2da50c107056bc9a5f589012d42ff1439de
Author: Michael Meissner 
Date:   Tue Jul 30 18:13:07 2024 -0400

Add a2 processor to arch flags.

2024-07-30  Michael Meissner  

* config/rs6000/rs6000-arch.def: Add a2 processor.

Diff:
---
 gcc/config/rs6000/rs6000-arch.def | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
index d317200016a2..89b1b87c89ac 100644
--- a/gcc/config/rs6000/rs6000-arch.def
+++ b/gcc/config/rs6000/rs6000-arch.def
@@ -37,6 +37,7 @@
the appropriate architecture flags based on the actual processor
enumeration.  */
 
+ARCH_EXPAND(PPCA2,   "a2")
 ARCH_EXPAND(POWER6,  "power6")
 ARCH_EXPAND(POWER7,  "power7")
 ARCH_EXPAND(POWER8,  "power8")


[gcc(refs/users/meissner/heads/work174)] Update ChangeLog.*

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:51e8229f65e2d7e5fd846ae016cd2634e5828e65

commit 51e8229f65e2d7e5fd846ae016cd2634e5828e65
Author: Michael Meissner 
Date:   Tue Jul 30 18:13:59 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index eb0b90a45dbe..86cc52c8636d 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,3 +1,11 @@
+ Branch work174, patch #9 
+
+Add a2 processor to arch flags.
+
+2024-07-30  Michael Meissner  
+
+   * config/rs6000/rs6000-arch.def: Add a2 processor.
+
  Branch work174, patch #8 
 
 Remove arch flags power4 and power5.  Use const HOST_WIDE_INT for arch masks.


[gcc(refs/users/meissner/heads/work174)] Revert changes

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4d9ccfc12fa1c534d01eb7c3aac57fe0268fc913

commit 4d9ccfc12fa1c534d01eb7c3aac57fe0268fc913
Author: Michael Meissner 
Date:   Tue Jul 30 18:19:52 2024 -0400

Revert changes

Diff:
---
 gcc/ChangeLog.meissner| 26 ++
 gcc/config/rs6000/rs6000-arch.def |  3 ++-
 gcc/config/rs6000/rs6000-c.cc |  2 +-
 gcc/config/rs6000/rs6000-opts.h   |  9 +
 gcc/config/rs6000/rs6000.cc   | 12 ++--
 5 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 86cc52c8636d..75d085b17f0c 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,27 +1,5 @@
- Branch work174, patch #9 
-
-Add a2 processor to arch flags.
-
-2024-07-30  Michael Meissner  
-
-   * config/rs6000/rs6000-arch.def: Add a2 processor.
-
- Branch work174, patch #8 
-
-Remove arch flags power4 and power5.  Use const HOST_WIDE_INT for arch masks.
-
-2024-07-30  Michael Meissner  
-
-   * config/rs6000/rs6000-arch.def: Remove power4 and power5 architecture
-   masks.
-   * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Use arch flags
-   for power6.
-   * config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as const
-   HOST_WIDE_INT and not as an enumeratio.
-   * config/rs6000/rs6000.cc (get_arch_flags): Drop power4 and power5 arch
-   flags support.
-   (rs6000_machine_from_flags): Likewise.
-
+ Branch work174, patch #9 was reverted 
+ Branch work174, patch #8 was reverted 
  Branch work174, patch #7 was reverted 
 
  Branch work174, patch #6 
diff --git a/gcc/config/rs6000/rs6000-arch.def 
b/gcc/config/rs6000/rs6000-arch.def
index 89b1b87c89ac..6725736076da 100644
--- a/gcc/config/rs6000/rs6000-arch.def
+++ b/gcc/config/rs6000/rs6000-arch.def
@@ -37,7 +37,8 @@
the appropriate architecture flags based on the actual processor
enumeration.  */
 
-ARCH_EXPAND(PPCA2,   "a2")
+ARCH_EXPAND(POWER4,  "power4")
+ARCH_EXPAND(POWER5,  "power5")
 ARCH_EXPAND(POWER6,  "power6")
 ARCH_EXPAND(POWER7,  "power7")
 ARCH_EXPAND(POWER8,  "power8")
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 7d0b24b7c09e..a8a6a956874f 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -426,7 +426,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5");
   if ((flags & OPTION_MASK_FPRND) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X");
-  if ((arch_flags & ARCH_MASK_POWER6) != 0)
+  if ((flags & OPTION_MASK_CMPB) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6");
   if ((arch_flags & ARCH_MASK_POWER7) != 0)
 rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index c7764e66cd03..9a52a1d4b147 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -80,14 +80,15 @@ enum {
   ARCH_ENUM_LAST
 };
 
-/* Create an architecture mask for the newer architectures (power6 and
+/* Create an architecture mask for the newer architectures (power7 and
up)..  */
 #undef  ARCH_EXPAND
-#define ARCH_EXPAND(PROC, NAME)
\
-  static const HOST_WIDE_INT ARCH_MASK_ ## PROC
\
-= HOST_WIDE_INT_1 << ARCH_ENUM_ ## PROC;
+#define ARCH_EXPAND(PROC, NAME)ARCH_MASK_ ## PROC = 1 << ARCH_ENUM_ ## 
PROC,
 
+enum {
 #include "rs6000-arch.def"
+  ARCH_MASK_ZERO   = 0
+};
 
 #undef ARCH_EXPAND
 
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index bccf30a44b20..19adc66cc801 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1852,6 +1852,14 @@ get_arch_flags (int cpu_index)
 
   case PROCESSOR_POWER6:
ret |= ARCH_MASK_POWER6;
+   /* fall through.  */
+
+  case PROCESSOR_POWER5:
+   ret |= ARCH_MASK_POWER5;
+   /* fall through.  */
+
+  case PROCESSOR_POWER4:
+   ret |= ARCH_MASK_POWER4;
break;
 
   default:
@@ -5929,9 +5937,9 @@ rs6000_machine_from_flags (void)
 return "power7";
   if ((arch_flags & ARCH_MASK_POWER6) != 0)
 return "power6";
-  if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
+  if ((arch_flags & ARCH_MASK_POWER5) != 0)
 return "power5";
-  if ((flags & ISA_2_1_MASKS) != 0)
+  if ((arch_flags & ARCH_MASK_POWER4) != 0)
 return "power4";
   if ((flags & OPTION_MASK_POWERPC64) != 0)
 return "ppc64";


[gcc(refs/users/meissner/heads/work174)] Use const HOST_WIDE_INT for arch masks.

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a66ceb34e3df733362f3ee4b56bb504ff5e2a81c

commit a66ceb34e3df733362f3ee4b56bb504ff5e2a81c
Author: Michael Meissner 
Date:   Tue Jul 30 18:23:14 2024 -0400

Use const HOST_WIDE_INT for arch masks.

2024-07-30  Michael Meissner  

* config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as 
const
HOST_WIDE_INT and not as an enumeratio.

Diff:
---
 gcc/config/rs6000/rs6000-opts.h | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index 9a52a1d4b147..c7764e66cd03 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -80,15 +80,14 @@ enum {
   ARCH_ENUM_LAST
 };
 
-/* Create an architecture mask for the newer architectures (power7 and
+/* Create an architecture mask for the newer architectures (power6 and
up)..  */
 #undef  ARCH_EXPAND
-#define ARCH_EXPAND(PROC, NAME)ARCH_MASK_ ## PROC = 1 << ARCH_ENUM_ ## 
PROC,
+#define ARCH_EXPAND(PROC, NAME)
\
+  static const HOST_WIDE_INT ARCH_MASK_ ## PROC
\
+= HOST_WIDE_INT_1 << ARCH_ENUM_ ## PROC;
 
-enum {
 #include "rs6000-arch.def"
-  ARCH_MASK_ZERO   = 0
-};
 
 #undef ARCH_EXPAND


[gcc(refs/users/meissner/heads/work174)] Update ChangeLog.*

2024-07-30 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1b8c6a9878846cc8925df7189f8aee0c1013036c

commit 1b8c6a9878846cc8925df7189f8aee0c1013036c
Author: Michael Meissner 
Date:   Tue Jul 30 18:24:50 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.meissner | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
index 75d085b17f0c..ce7ed892daf3 100644
--- a/gcc/ChangeLog.meissner
+++ b/gcc/ChangeLog.meissner
@@ -1,3 +1,12 @@
+ Branch work174, patch #10 
+
+Use const HOST_WIDE_INT for arch masks.
+
+2024-07-30  Michael Meissner  
+
+   * config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as const
+   HOST_WIDE_INT and not as an enumeratio.
+
  Branch work174, patch #9 was reverted 
  Branch work174, patch #8 was reverted 
  Branch work174, patch #7 was reverted 


[gcc r15-2426] LoongArch: Expand some SImode operations through "si3_extend" instructions if TARGET_64BIT

2024-07-30 Thread Xi Ruoyao via Gcc-cvs
https://gcc.gnu.org/g:b929083dd83ab50f26e10bbaa5097d5f6fb3c908

commit r15-2426-gb929083dd83ab50f26e10bbaa5097d5f6fb3c908
Author: Xi Ruoyao 
Date:   Sat Jul 20 20:38:13 2024 +0800

LoongArch: Expand some SImode operations through "si3_extend" instructions 
if TARGET_64BIT

We already had "si3_extend" insns and we hoped the fwprop or combine
passes can use them to remove unnecessary sign extensions.  But this
does not always work: for cases like x << 1 | y, the compiler
tends to do

(sign_extend:DI
  (ior:SI (ashift:SI (reg:SI $r4)
 (const_int 1))
  (reg:SI $r5)))

instead of

(ior:DI (sign_extend:DI (ashift:SI (reg:SI $r4) (const_int 1)))
(sign_extend:DI (reg:SI $r5)))

So we cannot match the ashlsi3_extend instruction here and we get:

slli.w $r4,$r4,1
or $r4,$r5,$r4
slli.w $r4,$r4,0# <= redundant
jr $r1

To eliminate this redundant extension we need to turn SImode shift etc.
to DImode "si3_extend" operations earlier, when we expand the SImode
operation.  We are already doing this for addition, now do it for
shifts, rotates, substract, multiplication, division, and modulo as
well.

The bytepick.w definition for TARGET_64BIT needs to be adjusted so it
won't be undone by the shift expanding.

gcc/ChangeLog:

* config/loongarch/loongarch.md (optab): Add (rotatert "rotr").
(3, 3,
sub3, rotr3, mul3): Add a "*" to the insn name
so we can redefine the names with define_expand.
(*si3_extend): Remove "*" so we can use them
in expanders.
(*subsi3_extended, *mulsi3_extended): Likewise, also remove the
trailing "ed" for consistency.
(*si3_extended): Add mode for sign_extend to
prevent an ICE using it in expanders.
(shift_w, arith_w): New define_code_iterator.
(3): New define_expand.  Expand with
si3_extend for SImode if TARGET_64BIT.
(3): Likewise.
(mul3): Expand to mulsi3_extended for SImode if
TARGET_64BIT and ISA_HAS_DIV32.
(3): Expand to si3_extended
for SImode if TARGET_64BIT.
(rotl3): Expand to rotrsi3_extend for SImode if
TARGET_64BIT.
(bytepick_w_): Add mode for lshiftrt and ashift.
(bitsize, bytepick_imm, bytepick_w_ashift_amount): New
define_mode_attr.
(bytepick_w__extend): Adjust for the RTL change
caused by 32-bit shift expanding.  Now bytepick_imm only covers
2 and 3, separate one remaining case to ...
(bytepick_w_1_extend): ... here, new define_insn.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/bitwise_extend.c: New test.

Diff:
---
 gcc/config/loongarch/loongarch.md  | 131 +
 .../gcc.target/loongarch/bitwise_extend.c  |  45 +++
 2 files changed, 154 insertions(+), 22 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 459ad30b9bb9..9bad79bbf45e 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -546,6 +546,7 @@
 (define_code_attr optab [(ashift "ashl")
 (ashiftrt "ashr")
 (lshiftrt "lshr")
+(rotatert "rotr")
 (ior "ior")
 (xor "xor")
 (and "and")
@@ -624,6 +625,49 @@
 (48 "6")
 (56 "7")])
 
+;; Expand some 32-bit operations to si3_extend operations if TARGET_64BIT
+;; so the redundant sign extension can be removed if the output is used as
+;; an input of a bitwise operation.  Note plus, rotl, and div are handled
+;; separately.
+(define_code_iterator shift_w [any_shift rotatert])
+(define_code_iterator arith_w [minus mult])
+
+(define_expand "3"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (shift_w:GPR (match_operand:GPR 1 "register_operand" "r")
+(match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+{
+  if (TARGET_64BIT && mode == SImode)
+{
+  rtx t = gen_reg_rtx (DImode);
+  emit_insn (gen_si3_extend (t, operands[1], operands[2]));
+  t = gen_lowpart (SImode, t);
+  SUBREG_PROMOTED_VAR_P (t) = 1;
+  SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+  emit_move_insn (operands[0], t);
+  DONE;
+}
+})
+
+(define_expand "3"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (arith_w:GPR (match_operand:GPR 1 "register_operand" "r")
+(match_operand:GPR 2 "register_operand" "r")))]
+  ""
+{
+  if (TARGET_64BIT && mode == SImode)
+{
+  rtx t = gen_reg_rtx (DImode);
+  emit_insn (gen_si3_extend 

[gcc r15-2427] rs6000: Use standard name uabd for absdu insns

2024-07-30 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:169341f0893a009736f9715db969909880d0e876

commit r15-2427-g169341f0893a009736f9715db969909880d0e876
Author: Kewen Lin 
Date:   Tue Jul 30 21:20:51 2024 -0500

rs6000: Use standard name uabd for absdu insns

r14-1832 adds recognition pattern, ifn and optab for ABD
(ABsolute Difference), we have some vector absolute
difference unsigned instructions since ISA 3.0, as the
associated test cases shown, they are not exploited well
as we don't define it (them) with a standard name.  So this
patch is to rename it with standard name first.  And it
merges both define_expand and define_insn as a separated
define_expand isn't needed.  Besides, it adjusts the RTL
pattern by using generic umax and umin rather than
UNSPEC_VADU, it's more meaningful and can catch umin/umax
opportunity.

gcc/ChangeLog:

* config/rs6000/altivec.md (p9_vadu3): Rename to ...
(uabd3): ... this.  Update RTL pattern with umin and umax 
rather
than UNSPEC_VADU.
(vadu3): Remove.
(UNSPEC_VADU): Remove.
(usadv16qi): Replace gen_p9_vaduv16qi3 with gen_uabdv16qi3.
(usadv8hi): Replace gen_p9_vaduv8hi3 with gen_uabdv8hi3.
* config/rs6000/rs6000-builtins.def (__builtin_altivec_vadub): 
Replace
expander with uabdv16qi3.
(__builtin_altivec_vaduh): Adjust expander with uabdv8hi3.
(__builtin_altivec_vaduw): Adjust expander with uabdv4si3.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/abd-vectorize-1.c: New test.
* gcc.target/powerpc/abd-vectorize-2.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 25 ++-
 gcc/config/rs6000/rs6000-builtins.def  |  6 ++--
 gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c | 27 
 gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c | 37 ++
 4 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 5af9bf920a2e..aa9d8fffc901 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -119,7 +119,6 @@
UNSPEC_STVLXL
UNSPEC_STVRX
UNSPEC_STVRXL
-   UNSPEC_VADU
UNSPEC_VSLV
UNSPEC_VSRV
UNSPEC_VMULWHUB
@@ -4323,19 +4322,15 @@
   [(set_attr "type" "vecsimple")])
 
 ;; Vector absolute difference unsigned
-(define_expand "vadu3"
-  [(set (match_operand:VI 0 "register_operand")
-(unspec:VI [(match_operand:VI 1 "register_operand")
-   (match_operand:VI 2 "register_operand")]
- UNSPEC_VADU))]
-  "TARGET_P9_VECTOR")
-
-;; Vector absolute difference unsigned
-(define_insn "p9_vadu3"
+(define_insn "uabd3"
   [(set (match_operand:VI 0 "register_operand" "=v")
-(unspec:VI [(match_operand:VI 1 "register_operand" "v")
-   (match_operand:VI 2 "register_operand" "v")]
- UNSPEC_VADU))]
+   (minus:VI
+ (umax:VI
+   (match_operand:VI 1 "register_operand" "v")
+   (match_operand:VI 2 "register_operand" "v"))
+ (umin:VI
+   (match_dup 1)
+   (match_dup 2]
   "TARGET_P9_VECTOR"
   "vabsdu %0,%1,%2"
   [(set_attr "type" "vecsimple")])
@@ -4500,7 +4495,7 @@
   rtx zero = gen_reg_rtx (V4SImode);
   rtx psum = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2]));
+  emit_insn (gen_uabdv16qi3 (absd, operands[1], operands[2]));
   emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
   emit_insn (gen_altivec_vsum4ubs (psum, absd, zero));
   emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
@@ -4521,7 +4516,7 @@
   rtx zero = gen_reg_rtx (V4SImode);
   rtx psum = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2]));
+  emit_insn (gen_uabdv8hi3 (absd, operands[1], operands[2]));
   emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
   emit_insn (gen_altivec_vsum4shs (psum, absd, zero));
   emit_insn (gen_addv4si3 (operands[0], psum, operands[3]));
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 12d131d016d6..0c3c884c1104 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2345,13 +2345,13 @@
 VFIRSTMISMATCHOREOSINDEX_V4SI first_mismatch_or_eos_index_v4si {}
 
   const vsc __builtin_altivec_vadub (vsc, vsc);
-VADUB vaduv16qi3 {}
+VADUB uabdv16qi3 {}
 
   const vss __builtin_altivec_vaduh (vss, vss);
-VADUH vaduv8hi3 {}
+VADUH uabdv8hi3 {}
 
   const vsi __builtin_altivec_vaduw (vsi, vsi);
-VADUW vaduv4si3 {}
+VADUW uabdv4si3 {}
 
   const vsll __builtin_altivec_vbpermd (vsll, vsc);
 VBPERMD altivec_vbpermd {}
diff --git a/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c
new file mode 100644
index ..d63b887b4b8f
--- /d

[gcc r15-2428] rs6000: Relax some FLOAT128 expander condition for FLOAT128_IEEE_P [PR105359]

2024-07-30 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:993a3c0894c487dce5efc6cfb5b31a8358905e8f

commit r15-2428-g993a3c0894c487dce5efc6cfb5b31a8358905e8f
Author: Kewen Lin 
Date:   Tue Jul 30 21:21:15 2024 -0500

rs6000: Relax some FLOAT128 expander condition for FLOAT128_IEEE_P 
[PR105359]

As PR105359 shows, we disable some FLOAT128 expanders for
64-bit long double, but in fact IEEE float128 types like
__ieee128 are only guarded with TARGET_FLOAT128_TYPE and
TARGET_LONG_DOUBLE_128 is only checked when determining if
we can reuse long_double_type_node.  So this patch is to
relax all affected FLOAT128 expander conditions for
FLOAT128_IEEE_P.  By the way, currently IBM double double
type __ibm128 is guarded by TARGET_LONG_DOUBLE_128, so we
have to use TARGET_LONG_DOUBLE_128 for it.  IMHO, it's not
necessary and can be enhanced later.

Btw, for all test cases mentioned in PR105359, I removed
the xfails and tested them with explicit -mlong-double-64,
both pr79004.c and float128-hw.c are tested well and
float128-hw4.c isn't tested (unsupported due to 64 bit
long double conflicts with -mabi=ieeelongdouble).

PR target/105359

gcc/ChangeLog:

* config/rs6000/rs6000.md (@extenddf2): Don't check
TARGET_LONG_DOUBLE_128 for FLOAT128_IEEE_P modes.
(extendsf2): Likewise.
(truncdf2): Likewise.
(truncsf2): Likewise.
(floatsi2): Likewise.
(fix_truncsi2): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr79004.c: Remove xfails.

Diff:
---
 gcc/config/rs6000/rs6000.md| 18 --
 gcc/testsuite/gcc.target/powerpc/pr79004.c | 14 ++
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index cfb22a3cb7da..d352a1431add 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8845,7 +8845,8 @@
 (define_expand "@extenddf2"
   [(set (match_operand:FLOAT128 0 "gpc_reg_operand")
(float_extend:FLOAT128 (match_operand:DF 1 "gpc_reg_operand")))]
-  "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128"
+  "TARGET_HARD_FLOAT
+   && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))"
 {
   if (FLOAT128_IEEE_P (mode))
 rs6000_expand_float128_convert (operands[0], operands[1], false);
@@ -8903,7 +8904,8 @@
 (define_expand "extendsf2"
   [(set (match_operand:FLOAT128 0 "gpc_reg_operand")
(float_extend:FLOAT128 (match_operand:SF 1 "gpc_reg_operand")))]
-  "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128"
+  "TARGET_HARD_FLOAT
+   && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))"
 {
   if (FLOAT128_IEEE_P (mode))
 rs6000_expand_float128_convert (operands[0], operands[1], false);
@@ -8919,7 +8921,8 @@
 (define_expand "truncdf2"
   [(set (match_operand:DF 0 "gpc_reg_operand")
(float_truncate:DF (match_operand:FLOAT128 1 "gpc_reg_operand")))]
-  "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128"
+  "TARGET_HARD_FLOAT
+   && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))"
 {
   if (FLOAT128_IEEE_P (mode))
 {
@@ -8956,7 +8959,8 @@
 (define_expand "truncsf2"
   [(set (match_operand:SF 0 "gpc_reg_operand")
(float_truncate:SF (match_operand:FLOAT128 1 "gpc_reg_operand")))]
-  "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128"
+  "TARGET_HARD_FLOAT
+   && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))"
 {
   if (FLOAT128_IEEE_P (mode))
 rs6000_expand_float128_convert (operands[0], operands[1], false);
@@ -8973,7 +8977,8 @@
   [(parallel [(set (match_operand:FLOAT128 0 "gpc_reg_operand")
   (float:FLOAT128 (match_operand:SI 1 "gpc_reg_operand")))
  (clobber (match_scratch:DI 2))])]
-  "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128"
+  "TARGET_HARD_FLOAT
+   && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))"
 {
   rtx op0 = operands[0];
   rtx op1 = operands[1];
@@ -9009,7 +9014,8 @@
 (define_expand "fix_truncsi2"
   [(set (match_operand:SI 0 "gpc_reg_operand")
(fix:SI (match_operand:FLOAT128 1 "gpc_reg_operand")))]
-  "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128"
+  "TARGET_HARD_FLOAT
+   && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))"
 {
   rtx op0 = operands[0];
   rtx op1 = operands[1];
diff --git a/gcc/testsuite/gcc.target/powerpc/pr79004.c 
b/gcc/testsuite/gcc.target/powerpc/pr79004.c
index 60c576cd36b6..ac89a4c9f327 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr79004.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr79004.c
@@ -100,12 +100,10 @@ void to_uns_short_store_n (TYPE a, unsigned short *p, 
long n) { p[n] = (unsigned
 void to_uns_int_store_n (TYPE a, unsigned int *p, long n) { p[n] = (unsigned 
int)a; }
 void to_uns_long_store_n (TYPE a, unsigned long *p, long n) { p[n] = (unsigned 
long)a; }
 
-/* On targets with 64-bit long double, some opcodes to deal with __float128 are
-   disabled, see PR target/105359.  */
-/* { dg-final { scan-assembler-not