[gcc r13-9305] c++: ICE with variable template and [[deprecated]] [PR110031]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:605803cf4089955c39dcbed97b343550581b8eed

commit r13-9305-g605803cf4089955c39dcbed97b343550581b8eed
Author: Marek Polacek 
Date:   Fri Jan 10 17:29:36 2025 -0500

c++: ICE with variable template and [[deprecated]] [PR110031]

lookup_and_finish_template_variable already has and uses the complain
parameter but it is not passing it down to mark_used so we got the
default tf_warning_or_error, which causes various problems when
lookup_and_finish_template_variable gets called with complain=tf_none.

PR c++/110031

gcc/cp/ChangeLog:

* pt.cc (lookup_and_finish_template_variable): Pass complain to
mark_used.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/inline-var11.C: New test.

Diff:
---
 gcc/cp/pt.cc  |  2 +-
 gcc/testsuite/g++.dg/cpp1z/inline-var11.C | 32 +++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index ddfa3c25d10e..bb53d9881405 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -10427,7 +10427,7 @@ lookup_and_finish_template_variable (tree templ, tree 
targs,
  deduction to work.  */
   complain &= ~tf_partial;
   var = finish_template_variable (var, complain);
-  mark_used (var);
+  mark_used (var, complain);
   return convert_from_reference (var);
 }
 
diff --git a/gcc/testsuite/g++.dg/cpp1z/inline-var11.C 
b/gcc/testsuite/g++.dg/cpp1z/inline-var11.C
new file mode 100644
index ..d92911ed3a93
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/inline-var11.C
@@ -0,0 +1,32 @@
+// PR c++/110031
+// { dg-do compile { target c++17 } }
+
+template 
+[[deprecated]]
+inline constexpr bool t = true ;
+
+template 
+struct enableif;
+
+template<>
+struct enableif
+{
+using y = int;
+};
+template 
+using enableif_t = typename enableif::y;
+
+template > = 0>   // { dg-warning "deprecated" }
+struct A {  A(T &&)  {  }};
+
+template 
+struct A {
+  A(T &&) = delete;
+  A() = delete;
+};
+
+int main(void)
+{
+  A a(5.3); // { dg-error "use of deleted function" }
+  return 0;
+}


[gcc r15-6817] LoongArch: Generate the final immediate for lu12i.w, lu32i.d and lu52i.d

2025-01-10 Thread LuluCheng via Gcc-cvs
https://gcc.gnu.org/g:f30423ea8c2152dcee91056e75a4f3736cce6a6e

commit r15-6817-gf30423ea8c2152dcee91056e75a4f3736cce6a6e
Author: mengqinggang 
Date:   Fri Jan 10 10:27:09 2025 +0800

LoongArch: Generate the final immediate for lu12i.w, lu32i.d and lu52i.d

Generate 0x1010 instead of 0x101>>12 for lu12i.w. lu32i.d and lu52i.d 
use
the same processing.

gcc/ChangeLog:

* config/loongarch/lasx.md: Use new loongarch_output_move.
* config/loongarch/loongarch-protos.h (loongarch_output_move):
Change parameters from (rtx, rtx) to (rtx *).
* config/loongarch/loongarch.cc (loongarch_output_move):
Generate final immediate for lu12i.w and lu52i.d.
* config/loongarch/loongarch.md:
Generate final immediate for lu32i.d and lu52i.d.
* config/loongarch/lsx.md: Use new loongarch_output_move.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/imm-load.c: Not generate ">>".

Diff:
---
 gcc/config/loongarch/lasx.md  |  2 +-
 gcc/config/loongarch/loongarch-protos.h   |  2 +-
 gcc/config/loongarch/loongarch.cc | 14 ---
 gcc/config/loongarch/loongarch.md | 34 +--
 gcc/config/loongarch/lsx.md   |  2 +-
 gcc/testsuite/gcc.target/loongarch/imm-load.c |  1 +
 6 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index edaf64eeb959..a37c85a25a4b 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -723,7 +723,7 @@
   [(set (match_operand:LASX 0 "nonimmediate_operand" "=f,f,R,*r,*f")
(match_operand:LASX 1 "move_operand" "fYGYI,R,f,*f,*r"))]
   "ISA_HAS_LASX"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert")
(set_attr "mode" "")
(set_attr "length" "8,4,4,4,4")])
diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index fb544ad75ca1..6601f767dab4 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -86,7 +86,7 @@ extern void loongarch_split_move (rtx, rtx);
 extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
 extern void loongarch_split_plus_constant (rtx *, machine_mode);
 extern void loongarch_split_vector_move (rtx, rtx);
-extern const char *loongarch_output_move (rtx, rtx);
+extern const char *loongarch_output_move (rtx *);
 #ifdef RTX_CODE
 extern void loongarch_expand_scc (rtx *);
 extern void loongarch_expand_vec_cmp (rtx *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 24c19031026a..9d97f0216f0d 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4721,8 +4721,10 @@ loongarch_split_vector_move (rtx dest, rtx src)
that SRC is operand 1 and DEST is operand 0.  */
 
 const char *
-loongarch_output_move (rtx dest, rtx src)
+loongarch_output_move (rtx *operands)
 {
+  rtx src = operands[1];
+  rtx dest = operands[0];
   enum rtx_code dest_code = GET_CODE (dest);
   enum rtx_code src_code = GET_CODE (src);
   machine_mode mode = GET_MODE (dest);
@@ -4877,13 +4879,19 @@ loongarch_output_move (rtx dest, rtx src)
   if (src_code == CONST_INT)
{
  if (LU12I_INT (src))
-   return "lu12i.w\t%0,%1>>12\t\t\t# %X1";
+   {
+ operands[1] = GEN_INT (INTVAL (operands[1]) >> 12);
+ return "lu12i.w\t%0,%1\t\t\t# %X1";
+   }
  else if (IMM12_INT (src))
return "addi.w\t%0,$r0,%1\t\t\t# %X1";
  else if (IMM12_INT_UNSIGNED (src))
return "ori\t%0,$r0,%1\t\t\t# %X1";
  else if (LU52I_INT (src))
-   return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1";
+   {
+ operands[1] = GEN_INT (INTVAL (operands[1]) >> 52);
+ return "lu52i.d\t%0,$r0,%X1\t\t\t# %1";
+   }
  else
gcc_unreachable ();
}
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 3eff4077160e..59f457703110 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2209,7 +2209,7 @@
   "!TARGET_64BIT
&& (register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
   (operands[0]))"
   [(const_int 0)]
@@ -2228,7 +2228,9 @@
   "TARGET_64BIT
&& (register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  {
+return loongarch_output_move (operands);

[gcc r14-11201] Fortran: Cray pointer comparison wrongly optimized away [PR106692]

2025-01-10 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:5ae344e3acabf11cde001419f9bec64a2cf89f5a

commit r14-11201-g5ae344e3acabf11cde001419f9bec64a2cf89f5a
Author: Harald Anlauf 
Date:   Thu Jan 2 20:22:23 2025 +0100

Fortran: Cray pointer comparison wrongly optimized away [PR106692]

PR fortran/106692

gcc/fortran/ChangeLog:

* trans-expr.cc (gfc_conv_expr_op): Inhibit excessive optimization
of Cray pointers by treating them as volatile in comparisons.

gcc/testsuite/ChangeLog:

* gfortran.dg/cray_pointers_13.f90: New test.

(cherry picked from commit c7754a2fb2e60987524947fe189f3ffac035ea1d)

Diff:
---
 gcc/fortran/trans-expr.cc  | 13 +++
 gcc/testsuite/gfortran.dg/cray_pointers_13.f90 | 51 ++
 2 files changed, 64 insertions(+)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 10eade22f2a2..8e74fbfb257d 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -4025,6 +4025,19 @@ gfc_conv_expr_op (gfc_se * se, gfc_expr * expr)
 
   if (lop)
 {
+  // Inhibit overeager optimization of Cray pointer comparisons (PR106692).
+  if (expr->value.op.op1->expr_type == EXPR_VARIABLE
+ && expr->value.op.op1->ts.type == BT_INTEGER
+ && expr->value.op.op1->symtree
+ && expr->value.op.op1->symtree->n.sym->attr.cray_pointer)
+   TREE_THIS_VOLATILE (lse.expr) = 1;
+
+  if (expr->value.op.op2->expr_type == EXPR_VARIABLE
+ && expr->value.op.op2->ts.type == BT_INTEGER
+ && expr->value.op.op2->symtree
+ && expr->value.op.op2->symtree->n.sym->attr.cray_pointer)
+   TREE_THIS_VOLATILE (rse.expr) = 1;
+
   /* The result of logical ops is always logical_type_node.  */
   tmp = fold_build2_loc (input_location, code, logical_type_node,
 lse.expr, rse.expr);
diff --git a/gcc/testsuite/gfortran.dg/cray_pointers_13.f90 
b/gcc/testsuite/gfortran.dg/cray_pointers_13.f90
new file mode 100644
index ..766d24546ab2
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/cray_pointers_13.f90
@@ -0,0 +1,51 @@
+! { dg-do run }
+! { dg-additional-options "-fcray-pointer" }
+!
+! PR fortran/106692 - Cray pointer comparison wrongly optimized away
+!
+! Contributed by Marek Polacek
+
+program test
+  call test_cray()
+  call test_cray2()
+end
+
+subroutine test_cray()
+  pointer(ptrzz1 , zz1)
+  ptrzz1=0
+  if (ptrzz1 .ne. 0) then
+print *, "test_cray: ptrzz1=", ptrzz1
+stop 1
+  else
+call shape_cray(zz1)
+  end if
+end
+
+subroutine shape_cray(zz1)
+  pointer(ptrzz , zz)
+  ptrzz=loc(zz1)
+  if (ptrzz .ne. 0) then
+print *, "shape_cray: ptrzz=", ptrzz
+stop 3
+  end if
+end
+
+subroutine test_cray2()
+  pointer(ptrzz1 , zz1)
+  ptrzz1=0
+  if (0 == ptrzz1) then
+call shape_cray2(zz1)
+  else
+print *, "test_cray2: ptrzz1=", ptrzz1
+stop 2
+  end if
+end
+
+subroutine shape_cray2(zz1)
+  pointer(ptrzz , zz)
+  ptrzz=loc(zz1)
+  if (.not. (0 == ptrzz)) then
+print *, "shape_cray2: ptrzz=", ptrzz
+stop 4
+  end if
+end


[gcc r14-11200] libstdc++: backport inline keyword on std::find

2025-01-10 Thread Tamar Christina via Libstdc++-cvs
https://gcc.gnu.org/g:e4a9fb7448a687f4fd7e621942006c2820b803d6

commit r14-11200-ge4a9fb7448a687f4fd7e621942006c2820b803d6
Author: Tamar Christina 
Date:   Fri Jan 10 21:37:40 2025 +

libstdc++: backport inline keyword on std::find

This is a backport version of the same patch as
g:18aff7644ad1e44dc146d36a2b7e397977aa47ac

In GCC 12 there was a ~40% regression in the performance of hashmap->find.

This regression came about accidentally:

Before GCC 12 the find function was small enough that IPA would inline it 
even
though it wasn't marked inline.  In GCC-12 an optimization was added to 
perform
a linear search when the entries in the hashmap are small.

This increased the size of the function enough that IPA would no longer 
inline.
Inlining had two benefits:

1.  The return value is a reference. so it has to be returned and 
dereferenced
even though the search loop may have already dereference it.
2.  The pattern is a hard pattern to track for branch predictors.  This 
causes
a large number of branch misses if the value is immediately checked and
branched on. i.e. if (a != m.end()) which is a common pattern.

The patch fixes both these issues by adding the inline keyword to _M_locate
to allow the inliner to consider inlining again.

This and the other patches have been ran through serveral benchmarks where
the size, number of elements searched for and type (reference vs value) etc
were tested.

The change shows no statistical regression, but an average find improvement 
of
~27% and a range between ~10-60% improvements.

Thanks,
Tamar

libstdc++-v3/ChangeLog:

* include/bits/hashtable.h (find): Add inline keyword.

Diff:
---
 libstdc++-v3/include/bits/hashtable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 834288c747c2..f5f421d2fd32 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -1723,7 +1723,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typename _ExtractKey, typename _Equal,
   typename _Hash, typename _RangeHash, typename _Unused,
   typename _RehashPolicy, typename _Traits>
-auto
+auto inline
 _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
   _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
 find(const key_type& __k)
@@ -1746,7 +1746,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typename _ExtractKey, typename _Equal,
   typename _Hash, typename _RangeHash, typename _Unused,
   typename _RehashPolicy, typename _Traits>
-auto
+auto inline
 _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
   _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
 find(const key_type& __k) const


[gcc r13-9303] libstdc++: backport inline keyword on std::find

2025-01-10 Thread Tamar Christina via Libstdc++-cvs
https://gcc.gnu.org/g:f00e19a0491223d2782f9f863a4f3a31d509f76b

commit r13-9303-gf00e19a0491223d2782f9f863a4f3a31d509f76b
Author: Tamar Christina 
Date:   Fri Jan 10 21:37:40 2025 +

libstdc++: backport inline keyword on std::find

This is a backport version of the same patch as
g:18aff7644ad1e44dc146d36a2b7e397977aa47ac

In GCC 12 there was a ~40% regression in the performance of hashmap->find.

This regression came about accidentally:

Before GCC 12 the find function was small enough that IPA would inline it 
even
though it wasn't marked inline.  In GCC-12 an optimization was added to 
perform
a linear search when the entries in the hashmap are small.

This increased the size of the function enough that IPA would no longer 
inline.
Inlining had two benefits:

1.  The return value is a reference. so it has to be returned and 
dereferenced
even though the search loop may have already dereference it.
2.  The pattern is a hard pattern to track for branch predictors.  This 
causes
a large number of branch misses if the value is immediately checked and
branched on. i.e. if (a != m.end()) which is a common pattern.

The patch fixes both these issues by adding the inline keyword to _M_locate
to allow the inliner to consider inlining again.

This and the other patches have been ran through serveral benchmarks where
the size, number of elements searched for and type (reference vs value) etc
were tested.

The change shows no statistical regression, but an average find improvement 
of
~27% and a range between ~10-60% improvements.

Thanks,
Tamar

libstdc++-v3/ChangeLog:

* include/bits/hashtable.h (find): Add inline keyword.

(cherry picked from commit e4a9fb7448a687f4fd7e621942006c2820b803d6)

Diff:
---
 libstdc++-v3/include/bits/hashtable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 1b5d0a7f42f4..c9ae0ed2c013 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -1660,7 +1660,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typename _ExtractKey, typename _Equal,
   typename _Hash, typename _RangeHash, typename _Unused,
   typename _RehashPolicy, typename _Traits>
-auto
+auto inline
 _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
   _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
 find(const key_type& __k)
@@ -1683,7 +1683,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   typename _ExtractKey, typename _Equal,
   typename _Hash, typename _RangeHash, typename _Unused,
   typename _RehashPolicy, typename _Traits>
-auto
+auto inline
 _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
   _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
 find(const key_type& __k) const


[gcc r15-6766] testsuite: arm: Use -std=c17 and effective-target arm_arch_v5te_thumb

2025-01-10 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:f447c3c0dff4c24acc4c3130925b95ff401cb1ec

commit r15-6766-gf447c3c0dff4c24acc4c3130925b95ff401cb1ec
Author: Torbjörn SVENSSON 
Date:   Fri Dec 27 09:18:36 2024 +0100

testsuite: arm: Use -std=c17 and effective-target arm_arch_v5te_thumb

With -std=c23, the following errors are now emitted as the function
prototype and implementation does not match:

.../pr59858.c: In function 're_search_internal':
.../pr59858.c:95:17: error: too many arguments to function 'check_matching'
.../pr59858.c:75:12: note: declared here
.../pr59858.c: At top level:
.../pr59858.c:100:1: error: conflicting types for 'check_matching'; have 
'int(re_match_context_t *, int *)'
.../pr59858.c:75:12: note: previous declaration of 'check_matching' with 
type 'int(void)'
.../pr59858.c: In function 'check_matching':
.../pr59858.c:106:14: error: too many arguments to function 'transit_state'
.../pr59858.c:77:23: note: declared here
.../pr59858.c: At top level:
.../pr59858.c:111:1: error: conflicting types for 'transit_state'; have 
're_dfastate_t *(re_match_context_t *, re_dfastate_t *)'
.../pr59858.c:77:23: note: previous declaration of 'transit_state' with 
type 're_dfastate_t *(void)'
.../pr59858.c: In function 'transit_state':
.../pr59858.c:116:7: error: too many arguments to function 'build_trtable'
.../pr59858.c:79:12: note: declared here
.../pr59858.c: At top level:
.../pr59858.c:121:1: error: conflicting types for 'build_trtable'; have 
'int(const re_dfa_t *, re_dfastate_t *)'
.../pr59858.c:79:12: note: previous declaration of 'build_trtable' with 
type 'int(void)'

Adding -std=c17 removes these errors.

Also, updated test case to use -mcpu=unset/-march=unset feature
introduced in r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr59858.c: Use -std=c17 and effective-target
arm_arch_v5te_thumb.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/gcc.target/arm/pr59858.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr59858.c 
b/gcc/testsuite/gcc.target/arm/pr59858.c
index 9336edfce277..8fc63b57af4c 100644
--- a/gcc/testsuite/gcc.target/arm/pr59858.c
+++ b/gcc/testsuite/gcc.target/arm/pr59858.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
-/* { dg-options "-march=armv5te -fno-builtin -mfloat-abi=soft -mthumb 
-fno-stack-protector -Os -fno-tree-loop-optimize -fno-tree-dominator-opts -fPIC 
-w -fpermissive" } */
+/* { dg-options "-std=c17 -fno-builtin -fno-stack-protector -Os 
-fno-tree-loop-optimize -fno-tree-dominator-opts -fPIC -w -fpermissive" } */
 /* { dg-require-effective-target fpic } */
-/* { dg-skip-if "Incompatible command line options: -mfloat-abi=soft 
-mfloat-abi=hard" { *-*-* } { "-mfloat-abi=hard" } { "" } } */
 /* { dg-require-effective-target arm_arch_v5te_thumb_ok } */
+/* { dg-add-options arm_arch_v5te_thumb } */
 
 typedef enum {
  REG_ENOSYS = -1,


[gcc r15-6767] nvptx: Add '__builtin_stack_address()' test case

2025-01-10 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:91dec10f8b7502bdd333d75ab7a9e23a58c3f32d

commit r15-6767-g91dec10f8b7502bdd333d75ab7a9e23a58c3f32d
Author: Thomas Schwinge 
Date:   Fri Dec 13 11:40:01 2024 +0100

nvptx: Add '__builtin_stack_address()' test case

Documenting the status quo.

gcc/testsuite/
* gcc.target/nvptx/__builtin_stack_address-1.c: New.

Diff:
---
 .../gcc.target/nvptx/__builtin_stack_address-1.c   | 36 ++
 1 file changed, 36 insertions(+)

diff --git a/gcc/testsuite/gcc.target/nvptx/__builtin_stack_address-1.c 
b/gcc/testsuite/gcc.target/nvptx/__builtin_stack_address-1.c
new file mode 100644
index ..5e976dc384bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/__builtin_stack_address-1.c
@@ -0,0 +1,36 @@
+/* Document what we do for '__builtin_stack_address()'.  */
+
+/* { dg-do compile }
+   TODO We can't 'assemble' this -- it's invalid PTX code.  */
+/* { dg-options -O3 } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(void)
+{
+  void *p;
+  p = __builtin_stack_address();
+  sink(p);
+}
+/*
+** f:
+** \.visible \.func f
+** {
+** {
+** \.param\.u64 %out_arg1;
+** st\.param\.u64 \[%out_arg1\], %stack;
+** call sink, \(%out_arg1\);
+** }
+** ret;
+*/
+
+/* The concept of a '%stack' pointer doesn't apply like this for
+   '-mno-soft-stack': PTX "native" stacks (TODO), and for '-msoft-stack' in
+   this form also constitutes invalid PTX code (TODO).
+
+   { dg-final { scan-assembler-not {%stack} { xfail *-*-* } } } */
+
+/* As this is an internal-use built-in function, we don't bother with
+   emitting proper error diagnostics.  */


[gcc r15-6759] c++: Fix up modules handling of namespace scope structured bindings

2025-01-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:933f0c20d4ce1dba85e85d9d117cfd9f5376a945

commit r15-6759-g933f0c20d4ce1dba85e85d9d117cfd9f5376a945
Author: Jakub Jelinek 
Date:   Fri Jan 10 10:31:12 2025 +0100

c++: Fix up modules handling of namespace scope structured bindings

With the following patch I actually get a simple namespace scope structured
binding working with modules.

The core_vals change ensure we actually save/restore DECL_VALUE_EXPR even
for namespace scope vars, the get_merge_kind is based on the assumption
that structured bindings are always unique, one can't redeclare them and
without it we really ICE because their base vars have no name.

2025-01-10  Jakub Jelinek  

* module.cc (trees_out::core_vals): Note DECL_VALUE_EXPR even for
vars outside of functions.
(trees_in::core_vals): Read in DECL_VALUE_EXPR even for vars outside
of functions.
(trees_out::get_merge_kind): Make DECL_DECOMPOSITION_P MK_unique.

* g++.dg/modules/decomp-2_b.C: New test.
* g++.dg/modules/decomp-2_a.H: New file.

Diff:
---
 gcc/cp/module.cc  | 21 +++--
 gcc/testsuite/g++.dg/modules/decomp-2_a.H | 11 +++
 gcc/testsuite/g++.dg/modules/decomp-2_b.C | 11 +++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index fec820603521..7288c46a7baa 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -6318,7 +6318,11 @@ trees_out::core_vals (tree t)
 case VAR_DECL:
   if (DECL_CONTEXT (t)
  && TREE_CODE (DECL_CONTEXT (t)) != FUNCTION_DECL)
-   break;
+   {
+ if (DECL_HAS_VALUE_EXPR_P (t))
+   WT (DECL_VALUE_EXPR (t));
+ break;
+   }
   /* FALLTHROUGH  */
 
 case RESULT_DECL:
@@ -6848,7 +6852,14 @@ trees_in::core_vals (tree t)
 case VAR_DECL:
   if (DECL_CONTEXT (t)
  && TREE_CODE (DECL_CONTEXT (t)) != FUNCTION_DECL)
-   break;
+   {
+ if (DECL_HAS_VALUE_EXPR_P (t))
+   {
+ tree val = tree_node ();
+ SET_DECL_VALUE_EXPR (t, val);
+   }
+ break;
+   }
   /* FALLTHROUGH  */
 
 case RESULT_DECL:
@@ -10990,6 +11001,12 @@ trees_out::get_merge_kind (tree decl, depset *dep)
break;
  }
 
+   if (DECL_DECOMPOSITION_P (decl))
+ {
+   mk = MK_unique;
+   break;
+ }
+
if (IDENTIFIER_ANON_P (DECL_NAME (decl)))
  {
if (RECORD_OR_UNION_TYPE_P (ctx))
diff --git a/gcc/testsuite/g++.dg/modules/decomp-2_a.H 
b/gcc/testsuite/g++.dg/modules/decomp-2_a.H
new file mode 100644
index ..df2d82abcbc1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/decomp-2_a.H
@@ -0,0 +1,11 @@
+// { dg-additional-options -fmodule-header }
+// { dg-module-cmi {} }
+
+struct A {
+  int a, b, c;
+};
+
+namespace {
+A d = { 1, 2, 3 };
+auto [a, b, c] = d;
+}
diff --git a/gcc/testsuite/g++.dg/modules/decomp-2_b.C 
b/gcc/testsuite/g++.dg/modules/decomp-2_b.C
new file mode 100644
index ..0353c8e87c93
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/decomp-2_b.C
@@ -0,0 +1,11 @@
+// { dg-do run }
+// { dg-additional-options "-fmodules-ts" }
+
+import "decomp-2_a.H";
+
+int
+main ()
+{
+  if (a != 1 || b != 2 || c != 3)
+__builtin_abort ();
+}


[gcc r15-6758] fortran: use_iso_fortran_env_module tweaks [PR118337]

2025-01-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:f5e488c0ee663c2355e6d712ffc15da215d9cd96

commit r15-6758-gf5e488c0ee663c2355e6d712ffc15da215d9cd96
Author: Jakub Jelinek 
Date:   Fri Jan 10 10:28:18 2025 +0100

fortran: use_iso_fortran_env_module tweaks [PR118337]

This patch adds a comment to explain why we initialize the non-constant
elts of symbol array separately and checking assert to verify that separate
initialization bumps the iterator for each macro.

2025-01-10  Jakub Jelinek  

PR fortran/118337
* module.cc (use_iso_fortran_env_module): Add a comment explaining
the optimization performed.  Add gcc_checking_assert that i was
incremented for all the elements.  Formatting fix.

Diff:
---
 gcc/fortran/module.cc | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/gcc/fortran/module.cc b/gcc/fortran/module.cc
index 63d0cdbee9f7..490eaa97a49d 100644
--- a/gcc/fortran/module.cc
+++ b/gcc/fortran/module.cc
@@ -7122,6 +7122,13 @@ use_iso_fortran_env_module (void)
 #include "iso-fortran-env.def"
 { ISOFORTRANENV_INVALID, NULL, -1234, 0 } };
 
+  /* We could have used c in the NAMED_{,U}INTCST macros
+ instead of 0, but then current g++ expands the initialization
+ as clearing the whole object followed by explicit stores of
+ all the non-zero elements (over 150), while by using 0s for
+ the non-constant initializers and initializing them afterwards
+ g++ will often copy everything from .rodata and then only override
+ over 30 non-constant ones.  */
   i = 0;
 #define NAMED_INTCST(a,b,c,d) symbol[i++].value = c;
 #define NAMED_UINTCST(a,b,c,d) symbol[i++].value = c;
@@ -7130,6 +7137,7 @@ use_iso_fortran_env_module (void)
 #define NAMED_FUNCTION(a,b,c,d) i++;
 #define NAMED_SUBROUTINE(a,b,c,d) i++;
 #include "iso-fortran-env.def"
+  gcc_checking_assert (i == (int) ARRAY_SIZE (symbol) - 1);
 
   /* Generate the symbol for the module itself.  */
   mod_symtree = gfc_find_symtree (gfc_current_ns->sym_root, mod);
@@ -7288,12 +7296,11 @@ use_iso_fortran_env_module (void)
break;
 
 #define NAMED_FUNCTION(a,b,c,d) \
-   case a:
+ case a:
 #include "iso-fortran-env.def"
- create_intrinsic_function (symbol[i].name, symbol[i].id, mod,
-INTMOD_ISO_FORTRAN_ENV, false,
-NULL);
- break;
+   create_intrinsic_function (symbol[i].name, symbol[i].id, mod,
+  INTMOD_ISO_FORTRAN_ENV, false, NULL);
+   break;
 
  default:
gcc_unreachable ();


[gcc r15-6760] c++: Fix up ICEs on constexpr inline asm strings in templates [PR118277]

2025-01-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:38a13ea4117b96e467f78b3f86d737ecbe326935

commit r15-6760-g38a13ea4117b96e467f78b3f86d737ecbe326935
Author: Jakub Jelinek 
Date:   Fri Jan 10 10:32:36 2025 +0100

c++: Fix up ICEs on constexpr inline asm strings in templates [PR118277]

The following patch fixes ICEs when the new inline asm syntax
to use C++26 static_assert-like constant expressions in place
of string literals is used in templates.
As finish_asm_stmt doesn't do any checking for
processing_template_decl, this patch also just defers handling
those strings in templates rather than say trying fold_non_dependent_expr
and if the result is non-dependent and usable, try to extract.

The patch also reverts changes to cp_parser_asm_specification_opt
which allowed something like
void foo () asm ((std::string_view ("bar")));
but it would be really hard to support
template 
void baz () asm ((std::string_view ("qux")));
(especially with dependent constant expression).

And the patch adds extensive test coverage for the various errors.

2025-01-10  Jakub Jelinek  

PR c++/118277
* cp-tree.h (finish_asm_string_expression): Declare.
* semantics.cc (finish_asm_string_expression): New function.
(finish_asm_stmt): Use it.
* parser.cc (cp_parser_asm_string_expression): Likewise.
Wrap string into PAREN_EXPR in the ("") case.
(cp_parser_asm_definition): Don't ICE if finish_asm_stmt
returns error_mark_node.
(cp_parser_asm_specification_opt): Revert 2024-06-24 changes.
* pt.cc (tsubst_stmt): Don't ICE if finish_asm_stmt returns
error_mark_node.

* g++.dg/cpp1z/constexpr-asm-4.C: New test.
* g++.dg/cpp1z/constexpr-asm-5.C: New test.

Diff:
---
 gcc/cp/cp-tree.h |   1 +
 gcc/cp/parser.cc |  21 +-
 gcc/cp/pt.cc |   9 +-
 gcc/cp/semantics.cc  |  43 
 gcc/testsuite/g++.dg/cpp1z/constexpr-asm-4.C |  83 ++
 gcc/testsuite/g++.dg/cpp1z/constexpr-asm-5.C | 367 +++
 6 files changed, 509 insertions(+), 15 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c08494705e9f..b65a2677b4ec 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7947,6 +7947,7 @@ enum {
 extern tree begin_compound_stmt(unsigned int);
 
 extern void finish_compound_stmt   (tree);
+extern tree finish_asm_string_expression   (location_t, tree);
 extern tree finish_asm_stmt(location_t, int, tree, tree,
 tree, tree, tree, bool, bool);
 extern tree finish_label_stmt  (tree);
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index f548dc31c2b8..80bc2d8e9e1e 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -23107,15 +23107,11 @@ cp_parser_asm_string_expression (cp_parser *parser)
   matching_parens parens;
   parens.consume_open (parser);
   tree string = cp_parser_constant_expression (parser);
-  if (string != error_mark_node)
-   string = cxx_constant_value (string, tf_error);
-  cexpr_str cstr (string);
-  if (!cstr.type_check (tok->location))
-   return error_mark_node;
-  if (!cstr.extract (tok->location, string))
-   string = error_mark_node;
   parens.require_close (parser);
-  return string;
+  if (TREE_CODE (string) == STRING_CST)
+   string = build1_loc (tok->location, PAREN_EXPR, TREE_TYPE (string),
+string);
+  return finish_asm_string_expression (tok->location, string);
 }
   else if (!cp_parser_is_string_literal (tok))
 {
@@ -23396,7 +23392,7 @@ cp_parser_asm_definition (cp_parser* parser)
  inputs, clobbers, labels, inline_p,
  false);
  /* If the extended syntax was not used, mark the ASM_EXPR.  */
- if (!extended_p)
+ if (!extended_p && asm_stmt != error_mark_node)
{
  tree temp = asm_stmt;
  if (TREE_CODE (temp) == CLEANUP_POINT_EXPR)
@@ -30044,7 +30040,7 @@ cp_parser_yield_expression (cp_parser* parser)
 /* Parse an (optional) asm-specification.
 
asm-specification:
- asm ( asm-string-expr )
+ asm ( string-literal )
 
If the asm-specification is present, returns a STRING_CST
corresponding to the string-literal.  Otherwise, returns
@@ -30067,8 +30063,9 @@ cp_parser_asm_specification_opt (cp_parser* parser)
   parens.require_open (parser);
 
   /* Look for the string-literal.  */
-  tree asm_specification = cp_parser_asm_string_expression (parser);
-
+  tree asm_specification = cp_parser_string_literal (parser,
+/*translate=*/false,
+

[gcc r15-6765] ada: Incorrect accessibilty level for library level subprograms

2025-01-10 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:3ff216b7121f832c87eaa03ece327c1e113e155a

commit r15-6765-g3ff216b7121f832c87eaa03ece327c1e113e155a
Author: squirek 
Date:   Fri Nov 1 16:33:02 2024 +

ada: Incorrect accessibilty level for library level subprograms

The patch fixes an issue in the compiler whereby accessibility level
calculations for objects declared witihin library-level subprograms
were done incorrectly - potentially allowing runtime accessibility
checks to spuriously pass.

gcc/ada/ChangeLog:

* accessibility.adb:
(Innermost_master_Scope_Depth): Add special case for expressions
within library level subprograms.

Diff:
---
 gcc/ada/accessibility.adb | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/ada/accessibility.adb b/gcc/ada/accessibility.adb
index b808e88b128a..8c85173aa34c 100644
--- a/gcc/ada/accessibility.adb
+++ b/gcc/ada/accessibility.adb
@@ -187,6 +187,15 @@ package body Accessibility is
  or else (Nkind (Node_Par) = N_Object_Renaming_Declaration
and then Comes_From_Iterator (Node_Par))
then
+  --  Handle the case of expressions within library level
+  --  subprograms here by adding one to the level modifier.
+
+  if Encl_Scop = Standard_Standard
+and then Nkind (Node_Par) = N_Subprogram_Body
+  then
+ Master_Lvl_Modifier := Master_Lvl_Modifier + 1;
+  end if;
+
   --  Note that in some rare cases the scope depth may not be
   --  set, for example, when we are in the middle of analyzing
   --  a type and the enclosing scope is said type. In that case


[gcc r15-6761] ada: Reorder syntactic node fields to match the Ada RM grammar

2025-01-10 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:2b27522090c4f98081db0dbfa035ae6501bbf498

commit r15-6761-g2b27522090c4f98081db0dbfa035ae6501bbf498
Author: Piotr Trojanek 
Date:   Fri Dec 20 13:00:37 2024 +0100

ada: Reorder syntactic node fields to match the Ada RM grammar

Several AST nodes had their syntactic fields in a different order than
specified by the Ada RM grammar. With the variable-size nodes this no longer
had an impact on the AST memory layout and was making the automatically
generated Nmake routines a bit unintuitive to use.

gcc/ada/ChangeLog:

* exp_ch3.adb (Predef_Spec_Or_Body): Add explicit parameter
associations, because now the Empty_List actual parameter would be
confused as being for the Aspect_Specifications formal parameter.
* gen_il-gen-gen_nodes.adb (Gen_Nodes): Reorder syntactic fields.
* sem_util.adb (Declare_Indirect_Temp): Add explicit parameter
association, because now the parameter will be interpreted as a
subpool handle name.

Diff:
---
 gcc/ada/exp_ch3.adb  |  5 ++-
 gcc/ada/gen_il-gen-gen_nodes.adb | 78 
 gcc/ada/sem_util.adb |  9 ++---
 3 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
index 6c69e63b2ddb..d95b91780306 100644
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -12399,7 +12399,10 @@ package body Exp_Ch3 is
   --  on the body to add the appropriate stuff.
 
   elsif For_Body then
- return Make_Subprogram_Body (Loc, Spec, Empty_List, Empty);
+ return Make_Subprogram_Body (Loc,
+  Specification  => Spec,
+  Declarations   => Empty_List,
+  Handled_Statement_Sequence => Empty);
 
   --  For the case of an Input attribute predefined for an abstract type,
   --  generate an abstract specification. This will never be called, but we
diff --git a/gcc/ada/gen_il-gen-gen_nodes.adb b/gcc/ada/gen_il-gen-gen_nodes.adb
index c512d85dbb26..ca46bcebdd98 100644
--- a/gcc/ada/gen_il-gen-gen_nodes.adb
+++ b/gcc/ada/gen_il-gen-gen_nodes.adb
@@ -377,10 +377,10 @@ begin -- Gen_IL.Gen.Gen_Nodes
 Sm (Is_Qualified_Universal_Literal, Flag)));
 
Cc (N_Quantified_Expression, N_Subexpr,
-   (Sy (Iterator_Specification, Node_Id, Default_Empty),
+   (Sy (All_Present, Flag),
+Sy (Iterator_Specification, Node_Id, Default_Empty),
 Sy (Loop_Parameter_Specification, Node_Id, Default_Empty),
-Sy (Condition, Node_Id, Default_Empty),
-Sy (All_Present, Flag)));
+Sy (Condition, Node_Id, Default_Empty)));
 
Cc (N_Aggregate, N_Subexpr,
(Sy (Expressions, List_Id, Default_No_List),
@@ -395,9 +395,9 @@ begin -- Gen_IL.Gen.Gen_Nodes
 Sm (Has_Self_Reference, Flag)));
 
Cc (N_Allocator, N_Subexpr,
-   (Sy (Expression, Node_Id, Default_Empty),
-Sy (Subpool_Handle_Name, Node_Id, Default_Empty),
+   (Sy (Subpool_Handle_Name, Node_Id, Default_Empty),
 Sy (Null_Exclusion_Present, Flag, Default_False),
+Sy (Expression, Node_Id, Default_Empty),
 Sm (For_Special_Return_Object, Flag),
 Sm (Do_Storage_Check, Flag),
 Sm (Is_Dynamic_Coextension, Flag),
@@ -494,11 +494,11 @@ begin -- Gen_IL.Gen.Gen_Nodes
 Sm (Prev_Ids, Flag)));
 
Cc (N_Entry_Declaration, N_Declaration,
-   (Sy (Defining_Identifier, Node_Id),
+   (Sy (Must_Override, Flag),
+Sy (Must_Not_Override, Flag),
+Sy (Defining_Identifier, Node_Id),
 Sy (Discrete_Subtype_Definition, Node_Id, Default_Empty),
 Sy (Parameter_Specifications, List_Id, Default_No_List),
-Sy (Must_Override, Flag),
-Sy (Must_Not_Override, Flag),
 Sy (Aspect_Specifications, List_Id, Default_No_List),
 Sm (Corresponding_Body, Node_Id)));
 
@@ -513,8 +513,8 @@ begin -- Gen_IL.Gen.Gen_Nodes
 Sy (In_Present, Flag),
 Sy (Out_Present, Flag),
 Sy (Null_Exclusion_Present, Flag, Default_False),
-Sy (Subtype_Mark, Node_Id, Default_Empty),
 Sy (Access_Definition, Node_Id, Default_Empty),
+Sy (Subtype_Mark, Node_Id, Default_Empty),
 Sy (Default_Expression, Node_Id, Default_Empty),
 Sy (Aspect_Specifications, List_Id, Default_No_List),
 Sm (More_Ids, Flag),
@@ -545,17 +545,17 @@ begin -- Gen_IL.Gen.Gen_Nodes
 
Cc (N_Iterator_Specification, N_Declaration,
(Sy (Defining_Identifier, Node_Id),
-Sy (Name, Node_Id, Default_Empty),
-Sy (Reverse_Present, Flag),
+Sy (Subtype_Indication, Node_Id, Default_Empty),
 Sy (Of_Present, Flag),
-Sy (Iterator_Filter, Node_Id, Default_Empty),
-Sy (Subtype_Indication, Node_Id, Default_Empty)));
+Sy (Reverse_Present, Flag),
+Sy (Name, Node_Id, Default_Empty),
+Sy (Iterator_Filter, No

[gcc r15-6763] ada: Set syntactic node properties immediately when crating the nodes

2025-01-10 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:c43a53307d2e26663e9a2aca39672aea39c25e7b

commit r15-6763-gc43a53307d2e26663e9a2aca39672aea39c25e7b
Author: Piotr Trojanek 
Date:   Mon Dec 23 10:05:47 2024 +0100

ada: Set syntactic node properties immediately when crating the nodes

When creating a node, we can directly set its syntactic properties.
Code cleanup; semantics is unaffected.

gcc/ada/ChangeLog:

* contracts.adb (Build_Call_Helper_Decl): Tune whitespace.
* exp_attr.adb (Analyze_Attribute): Set Of_Present while
creating the node; reorder setting Subtype_Indication to match the
syntax order.
* exp_ch3.adb (Build_Equivalent_Aggregate): Likewise for Box_Present
and Expression properties.
* sem_ch12.adb (Analyze_Formal_Derived_Type): Set type properties
when creating the nodes.
* sem_ch3.adb (Check_Anonymous_Access_Component): Likewise.

Diff:
---
 gcc/ada/contracts.adb |  4 ++--
 gcc/ada/exp_attr.adb  |  8 
 gcc/ada/exp_ch3.adb   |  5 ++---
 gcc/ada/sem_ch12.adb  | 15 +--
 gcc/ada/sem_ch3.adb   | 13 ++---
 5 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb
index 1c9161b8a37e..8b94a67639f2 100644
--- a/gcc/ada/contracts.adb
+++ b/gcc/ada/contracts.adb
@@ -4066,8 +4066,8 @@ package body Contracts is
 
  begin
 Spec := Build_Call_Helper_Spec (Helper_Id);
-Set_Must_Override  (Spec, False);
-Set_Must_Not_Override  (Spec, False);
+Set_Must_Override (Spec, False);
+Set_Must_Not_Override (Spec, False);
 Set_Is_Inlined (Helper_Id);
 Set_Is_Public  (Helper_Id);
 
diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
index cc42d6470601..b896228a70e3 100644
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -6422,10 +6422,10 @@ package body Exp_Attr is
begin
   Iter :=
 Make_Iterator_Specification (Loc,
-Defining_Identifier => Elem,
-Name => Relocate_Node (Prefix (N)),
-Subtype_Indication => Empty);
-  Set_Of_Present (Iter);
+  Defining_Identifier => Elem,
+  Subtype_Indication  => Empty,
+  Of_Present  => True,
+  Name=> Relocate_Node (Prefix (N)));
 
   New_Loop := Make_Loop_Statement (Loc,
 Iteration_Scheme =>
diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
index d95b91780306..0dfd8102df18 100644
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -1349,9 +1349,8 @@ package body Exp_Ch3 is
 
  Append_To (Component_Associations (Aggr),
Make_Component_Association (Loc,
- Choices=> New_List (Make_Others_Choice (Loc)),
- Expression => Empty));
- Set_Box_Present (Last (Component_Associations (Aggr)));
+ Choices => New_List (Make_Others_Choice (Loc)),
+ Box_Present => True));
 
  if Typ /= Full_Typ then
 Analyze_And_Resolve (Aggr, Full_View (Base_Type (Full_Typ)));
diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
index 088a9ccfb589..dad8c73729e9 100644
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -3097,13 +3097,11 @@ package body Sem_Ch12 is
  Defining_Identifier   => T,
  Discriminant_Specifications   => Discriminant_Specifications (N),
  Unknown_Discriminants_Present => Unk_Disc,
+ Abstract_Present  => Abstract_Present (Def),
+ Limited_Present   => Limited_Present (Def),
  Subtype_Indication=> Subtype_Mark (Def),
+ Synchronized_Present  => Synchronized_Present (Def),
  Interface_List=> Interface_List (Def));
-
- Set_Abstract_Present (New_N, Abstract_Present (Def));
- Set_Limited_Present  (New_N, Limited_Present  (Def));
- Set_Synchronized_Present (New_N, Synchronized_Present (Def));
-
   else
  New_N :=
Make_Full_Type_Declaration (Loc,
@@ -3112,12 +3110,9 @@ package body Sem_Ch12 is
Discriminant_Specifications (Parent (T)),
  Type_Definition =>
Make_Derived_Type_Definition (Loc,
+ Abstract_Present   => Abstract_Present (Def),
+ Limited_Present=> Limited_Present (Def),
  Subtype_Indication => Subtype_Mark (Def)));
-
- Set_Abstract_Present
-   (Type_Definition (New_N), Abstract_Present (Def));
- Set_Limited_Present
-   (Type_Definition (New_N), Limited_Present  (Def));
   end if;
 
   Rewrite (N, New_N);
diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem

[gcc r15-6764] ada: Remove empty line.

2025-01-10 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:c92f9f0d949c5c36fbd257a80644ae7231e396fd

commit r15-6764-gc92f9f0d949c5c36fbd257a80644ae7231e396fd
Author: Marc Poulhiès 
Date:   Thu Jan 2 16:03:32 2025 +0100

ada: Remove empty line.

gcc/ada/ChangeLog:

* env.h: Remove last empty line.

Diff:
---
 gcc/ada/env.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/ada/env.h b/gcc/ada/env.h
index b80b7e9a0fca..58a92b9d7f23 100644
--- a/gcc/ada/env.h
+++ b/gcc/ada/env.h
@@ -33,5 +33,4 @@ extern void __gnat_getenv (char *name, int *len, char 
**value);
 extern void __gnat_setenv (char *name, char *value);
 extern char **__gnat_environ (void);
 extern void __gnat_unsetenv (char *name);
-extern void __gnat_clearenv (void);
-
+extern void __gnat_clearenv(void);


[gcc r15-6762] ada: Turn Is_Effective_Use_Clause from syntactic to semantic flag

2025-01-10 Thread Marc Poulhi?s via Gcc-cvs
https://gcc.gnu.org/g:8c850ddf0f2648e9be6067bb76cc2322c1a8b6a5

commit r15-6762-g8c850ddf0f2648e9be6067bb76cc2322c1a8b6a5
Author: Piotr Trojanek 
Date:   Fri Dec 20 13:09:22 2024 +0100

ada: Turn Is_Effective_Use_Clause from syntactic to semantic flag

For a USE clause being effective is a semantic property, not a syntactic.
AST cleanup; behavior is unaffected.

gcc/ada/ChangeLog:

* gen_il-gen-gen_nodes.adb (Gen_Nodes): Change 
Is_Effective_Use_Clause
from syntactic to semantic property.

Diff:
---
 gcc/ada/gen_il-gen-gen_nodes.adb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/gen_il-gen-gen_nodes.adb b/gcc/ada/gen_il-gen-gen_nodes.adb
index ca46bcebdd98..1f5dc6d3803e 100644
--- a/gcc/ada/gen_il-gen-gen_nodes.adb
+++ b/gcc/ada/gen_il-gen-gen_nodes.adb
@@ -782,7 +782,7 @@ begin -- Gen_IL.Gen.Gen_Nodes
 
Cc (N_Use_Package_Clause, N_Later_Decl_Item,
(Sy (Name, Node_Id, Default_Empty),
-Sy (Is_Effective_Use_Clause, Flag),
+Sm (Is_Effective_Use_Clause, Flag),
 Sm (Entity_Or_Associated_Node, Node_Id), -- just Associated_Node
 Sm (Hidden_By_Use_Clause, Elist_Id),
 Sm (More_Ids, Flag),
@@ -1497,8 +1497,8 @@ begin -- Gen_IL.Gen.Gen_Nodes
 
Cc (N_Use_Type_Clause, Node_Kind,
(Sy (Subtype_Mark, Node_Id, Default_Empty),
-Sy (Is_Effective_Use_Clause, Flag),
 Sy (All_Present, Flag),
+Sm (Is_Effective_Use_Clause, Flag),
 Sm (Hidden_By_Use_Clause, Elist_Id),
 Sm (More_Ids, Flag),
 Sm (Next_Use_Clause, Node_Id),


[gcc r15-6769] ipa-cp: Fold-convert values when necessary (PR 118138)

2025-01-10 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:d019ab4f115caab48316c185c007765719e93052

commit r15-6769-gd019ab4f115caab48316c185c007765719e93052
Author: Martin Jambor 
Date:   Sat Jan 4 20:40:07 2025 +0100

ipa-cp: Fold-convert values when necessary (PR 118138)

PR 118138 and quite a few duplicates that it has acquired in a short
time show that even though we are careful to make sure we do not loose
any bits when newly allowing type conversions in jump-functions, we
still need to perform the fold conversions during IPA constant
propagation and not just at the end in order to properly perform
sign-extensions or zero-extensions as appropriate.

This patch does just that, changing a safety predicate we already use
at the appropriate places to return the necessary type.

gcc/ChangeLog:

2025-01-03  Martin Jambor  

PR ipa/118138
* ipa-cp.cc (ipacp_value_safe_for_type): Return the appropriate
type instead of a bool, accept NULL_TREE VALUEs.
(propagate_vals_across_arith_jfunc): Use the new returned value of
ipacp_value_safe_for_type.
(propagate_vals_across_ancestor): Likewise.
(propagate_scalar_across_jump_function): Likewise.

gcc/testsuite/ChangeLog:

2025-01-03  Martin Jambor  

PR ipa/118138
* gcc.dg/ipa/pr118138.c: New test.

Diff:
---
 gcc/ipa-cp.cc   | 33 +++--
 gcc/testsuite/gcc.dg/ipa/pr118138.c | 30 ++
 2 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index 294389fba4c7..d89324a00775 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -1448,19 +1448,23 @@ initialize_node_lattices (struct cgraph_node *node)
   }
 }
 
-/* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
-   PARAM_TYPE.  */
+/* Return VALUE if it is NULL_TREE or if it can be directly safely IPA-CP
+   propagated to a parameter of type PARAM_TYPE, or return a fold-converted
+   VALUE to PARAM_TYPE if that is possible.  Return NULL_TREE otherwise.  */
 
-static bool
+static tree
 ipacp_value_safe_for_type (tree param_type, tree value)
 {
+  if (!value)
+return NULL_TREE;
   tree val_type = TREE_TYPE (value);
   if (param_type == val_type
-  || useless_type_conversion_p (param_type, val_type)
-  || fold_convertible_p (param_type, value))
-return true;
+  || useless_type_conversion_p (param_type, val_type))
+return value;
+  if (fold_convertible_p (param_type, value))
+return fold_convert (param_type, value);
   else
-return false;
+return NULL_TREE;
 }
 
 /* Return the result of a (possibly arithmetic) operation on the constant
@@ -2210,8 +2214,8 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs,
{
  tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
 src_val, res_type);
- if (!cstval
- || !ipacp_value_safe_for_type (res_type, cstval))
+ cstval = ipacp_value_safe_for_type (res_type, cstval);
+ if (!cstval)
break;
 
  ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
@@ -2235,8 +2239,8 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs,
 
tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
   src_val, res_type);
-   if (cstval
-   && ipacp_value_safe_for_type (res_type, cstval))
+   cstval = ipacp_value_safe_for_type (res_type, cstval);
+   if (cstval)
  ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
  src_offset);
else
@@ -2284,8 +2288,8 @@ propagate_vals_across_ancestor (struct cgraph_edge *cs,
   for (src_val = src_lat->values; src_val; src_val = src_val->next)
 {
   tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
-
-  if (t && ipacp_value_safe_for_type (param_type, t))
+  t = ipacp_value_safe_for_type (param_type, t);
+  if (t)
ret |= dest_lat->add_value (t, cs, src_val, src_idx);
   else
ret |= dest_lat->set_contains_variable ();
@@ -2310,7 +2314,8 @@ propagate_scalar_across_jump_function (struct cgraph_edge 
*cs,
   if (jfunc->type == IPA_JF_CONST)
 {
   tree val = ipa_get_jf_constant (jfunc);
-  if (ipacp_value_safe_for_type (param_type, val))
+  val = ipacp_value_safe_for_type (param_type, val);
+  if (val)
return dest_lat->add_value (val, cs, NULL, 0);
   else
return dest_lat->set_contains_variable ();
diff --git a/gcc/testsuite/gcc.dg/ipa/pr118138.c 
b/gcc/testsuite/gcc.dg/ipa/pr118138.c
new file mode 100644
index ..5c94253f58b2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr118138.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inli

[gcc r14-11199] AArch64: correct Cortex-X4 MIDR

2025-01-10 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:26f78a4249b051c7755a44ba1ab1743f4133b0c2

commit r14-11199-g26f78a4249b051c7755a44ba1ab1743f4133b0c2
Author: Tamar Christina 
Date:   Fri Jan 10 21:33:57 2025 +

AArch64: correct Cortex-X4 MIDR

The Parts Num field for the MIDR for Cortex-X4 is wrong.  It's currently the
parts number for a Cortex-A720 (which does have the right number).

The correct number can be found in the Cortex-X4 Technical Reference Manual 
[1]
on page 382 in Issue Number 5.

[1] https://developer.arm.com/documentation/102484/latest/

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (AARCH64_CORE): Fix cortex-x4 
parts
num.

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index a919ab7d8a5a..b1eaf5512b57 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -185,7 +185,7 @@ AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  
(SVE2_BITPERM, MEMTAG, I8M
 
 AARCH64_CORE("cortex-x3",  cortexx3, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversen2, 0x41, 0xd4e, -1)
 
-AARCH64_CORE("cortex-x4",  cortexx4, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, 
PROFILE), neoversen2, 0x41, 0xd81, -1)
+AARCH64_CORE("cortex-x4",  cortexx4, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, 
PROFILE), neoversen2, 0x41, 0xd82, -1)
 
 AARCH64_CORE("cortex-x925", cortexx925, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG, PROFILE), neoversen2, 0x41, 0xd85, -1)


[gcc r15-6815] Use relations when simplifying MIN and MAX.

2025-01-10 Thread Andrew Macleod via Gcc-cvs
https://gcc.gnu.org/g:b0eeb540497c7b9dee01f8724f9a4978b53a12ae

commit r15-6815-gb0eeb540497c7b9dee01f8724f9a4978b53a12ae
Author: Andrew MacLeod 
Date:   Fri Jan 10 13:33:01 2025 -0500

Use relations when simplifying MIN and MAX.

Query for known relations between the operands, and pass that to
fold_range to help simplify MIN and MAX relations.
Make it type agnostic as well.

Adapt testcases from DOM to EVRP (e suffix) and test floats (f suffix).

PR tree-optimization/88575
gcc/
* vr-values.cc (simplify_using_ranges::fold_cond_with_ops): Query
relation between op0 and op1 and utilize it.
(simplify_using_ranges::simplify): Do not eliminate float checks.

gcc/testsuite/
* gcc.dg/tree-ssa/minmax-27.c: Disable VRP.
* gcc.dg/tree-ssa/minmax-27e.c: New.
* gcc.dg/tree-ssa/minmax-27f.c: New.
* gcc.dg/tree-ssa/minmax-28.c: Disable VRP.
* gcc.dg/tree-ssa/minmax-28e.c: New.
* gcc.dg/tree-ssa/minmax-28f.c: New.

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c  |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/minmax-27e.c | 118 +
 gcc/testsuite/gcc.dg/tree-ssa/minmax-27f.c | 118 +
 gcc/testsuite/gcc.dg/tree-ssa/minmax-28.c  |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/minmax-28e.c | 117 
 gcc/testsuite/gcc.dg/tree-ssa/minmax-28f.c | 117 
 gcc/vr-values.cc   |  13 +++-
 7 files changed, 481 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c 
b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c
index 4b94203b0d05..a99af6eb521e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-dom2" } */
+/* { dg-options "-O2 -fdump-tree-dom2 -fno-tree-vrp" } */
 
 
 int min1(int a, int b)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-27e.c 
b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27e.c
new file mode 100644
index ..8498ffd20173
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27e.c
@@ -0,0 +1,118 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+
+int min1(int a, int b)
+{
+if (a <= b)
+return a < b ? a : b;
+return 0;
+}
+
+int min2(int a, int b)
+{
+if (a <= b)
+return a > b ? b : a;
+return 0;
+}
+
+int min3(int a, int b)
+{
+if (a < b)
+return a < b ? a : b;
+return 0;
+}
+
+int min4(int a, int b)
+{
+if (a < b)
+return a > b ? b : a;
+return 0;
+}
+
+int min5(int a, int b)
+{
+if (a <= b)
+return a <= b ? a : b;
+return 0;
+}
+
+int min6(int a, int b)
+{
+if (a <= b)
+return a >= b ? b : a;
+return 0;
+}
+
+int min7(int a, int b)
+{
+if (a < b)
+return a <= b ? a : b;
+return 0;
+}
+
+int min8(int a, int b)
+{
+if (b > a)
+return a >= b ? b : a;
+return 0;
+}
+
+int min9(int a, int b)
+{
+if (b >= a)
+return a < b ? a : b;
+return 0;
+}
+
+int min10(int a, int b)
+{
+if (b >= a)
+return a > b ? b : a;
+return 0;
+}
+
+int min11(int a, int b)
+{
+if (b > a)
+return a < b ? a : b;
+return 0;
+}
+
+int min12(int a, int b)
+{
+if (b > a)
+return a > b ? b : a;
+return 0;
+}
+
+int min13(int a, int b)
+{
+if (b >= a)
+return a <= b ? a : b;
+return 0;
+}
+
+int min14(int a, int b)
+{
+if (b >= a)
+return a >= b ? b : a;
+return 0;
+}
+
+int min15(int a, int b)
+{
+if (b > a)
+return a <= b ? a : b;
+return 0;
+}
+
+int min16(int a, int b)
+{
+if (b > a)
+return a >= b ? b : a;
+return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "evrp" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-27f.c 
b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27f.c
new file mode 100644
index ..63398d4495f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27f.c
@@ -0,0 +1,118 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-evrp" } */
+
+
+float min1(float a, float b)
+{
+if (a <= b)
+return a < b ? a : b;
+return 0.0;
+}
+
+float min2(float a, float b)
+{
+if (a <= b)
+return a > b ? b : a;
+return 0.0;
+}
+
+float min3(float a, float b)
+{
+if (a < b)
+return a < b ? a : b;
+return 0.0;
+}
+
+float min4(float a, float b)
+{
+if (a < b)
+return a > b ? b : a;
+return 0.0;
+}
+
+float min5(float a, float b)
+{
+if (a <= b)
+return a <= b ? a : b;
+return 0.0;
+}
+
+float min6(float a, float b)
+{
+if (a <= b)
+return a >= b ? b : a;
+return 0.0;
+}
+
+float min7(float a, float b)
+{
+if (a < b)
+return a <= b ? a : b;
+return 0.0;
+}
+
+float mi

[gcc r13-9304] c++: ICE with noexcept and local specialization, again [PR114349]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:0430ec8881d657ddedff6c9d9fa4ea5db125f462

commit r13-9304-g0430ec8881d657ddedff6c9d9fa4ea5db125f462
Author: Marek Polacek 
Date:   Fri Jan 10 17:26:18 2025 -0500

c++: ICE with noexcept and local specialization, again [PR114349]

Patrick noticed that my r14-9339-gdc6c3bfb59baab patch is wrong;
we're dealing with a noexcept-spec there, not a noexcept-expr, so
setting cp_noexcept_operand et al is incorrect.  Back to the drawing
board then.

To fix noexcept84.C, we should probably avoid doing push_to_top_level
in certain cases.  maybe_push_to_top_level didn't work here as-is, so
I changed it to not push to top level if decl_function_context is
non-null, when we are not dealing with a lambda.

This also fixes c++/114349, introduced by r14-9339.

This GCC 13 backport squashes r14-9659 and r14-9339.

PR c++/114349

gcc/cp/ChangeLog:

* name-lookup.cc (maybe_push_to_top_level): For a non-lambda,
don't push to top level if decl_function_context is non-null.
* pt.cc (maybe_instantiate_noexcept): Use maybe_push_to_top_level.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept84.C: New test.
* g++.dg/cpp0x/noexcept85.C: New test.
* g++.dg/cpp0x/noexcept86.C: New test.

Diff:
---
 gcc/cp/name-lookup.cc   | 11 +++
 gcc/cp/pt.cc|  4 ++--
 gcc/testsuite/g++.dg/cpp0x/noexcept84.C | 32 
 gcc/testsuite/g++.dg/cpp0x/noexcept85.C | 33 +
 gcc/testsuite/g++.dg/cpp0x/noexcept86.C | 25 +
 5 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc
index 7c61bc3bf611..1ea25f076b85 100644
--- a/gcc/cp/name-lookup.cc
+++ b/gcc/cp/name-lookup.cc
@@ -8244,10 +8244,13 @@ maybe_push_to_top_level (tree d)
 {
   /* Push if D isn't function-local, or is a lambda function, for which name
  resolution is already done.  */
-  bool push_to_top
-= !(current_function_decl
-   && !LAMBDA_FUNCTION_P (d)
-   && decl_function_context (d) == current_function_decl);
+  const bool push_to_top
+= (LAMBDA_FUNCTION_P (d)
+   || (TREE_CODE (d) == TYPE_DECL
+  && TREE_TYPE (d)
+  && LAMBDA_TYPE_P (TREE_TYPE (d)))
+   || !current_function_decl
+   || !decl_function_context (d));
 
   if (push_to_top)
 push_to_top_level ();
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 5a6bf80c3d42..ddfa3c25d10e 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -26772,7 +26772,7 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
}
   else if (push_tinst_level (fn))
{
- push_to_top_level ();
+ const bool push_to_top = maybe_push_to_top_level (fn);
  push_access_scope (fn);
  push_deferring_access_checks (dk_no_deferred);
  input_location = DECL_SOURCE_LOCATION (fn);
@@ -26809,7 +26809,7 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
  pop_deferring_access_checks ();
  pop_access_scope (fn);
  pop_tinst_level ();
- pop_from_top_level ();
+ maybe_pop_from_top_level (push_to_top);
}
   else
spec = noexcept_false_spec;
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept84.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept84.C
new file mode 100644
index ..06f33264f77c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept84.C
@@ -0,0 +1,32 @@
+// PR c++/114114
+// { dg-do compile { target c++11 } }
+
+template
+constexpr void
+test ()
+{
+  constexpr bool is_yes = B;
+  struct S {
+constexpr S() noexcept(is_yes) { }
+  };
+  S s;
+}
+
+constexpr bool foo() { return true; }
+
+template
+constexpr void
+test2 ()
+{
+  constexpr T (*pfn)() = &foo;
+  struct S {
+constexpr S() noexcept(pfn()) { }
+  };
+  S s;
+}
+
+int main()
+{
+  test();
+  test2();
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept85.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept85.C
new file mode 100644
index ..b415bb46bc94
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept85.C
@@ -0,0 +1,33 @@
+// PR c++/114349
+// { dg-do compile { target c++11 } }
+
+using A = struct {};
+template  class, typename, typename>
+using B = A;
+template 
+using C = typename T::D;
+struct E {
+  using D = B;
+};
+template  constexpr bool foo (A) { return false; }
+template  struct F {
+  using G = T;
+  using H = E;
+  F(const F &);
+  void operator=(F) noexcept(foo  (H::D{}));
+};
+template 
+using I = F;
+template 
+using J = I;
+struct K {
+  typedef J L;
+  L k;
+  K();
+};
+struct M {
+  bool bar () const;
+  K::L m;
+};
+K n;
+bool M::bar () const { n.k = m; return true; }
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept86.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept86.C
new file mode 100644
index ..2d040c090f50
--- /dev/nu

[gcc r15-6819] c++: modules and function attributes

2025-01-10 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:664bd76a23def2d458bb3c531486b4c220f29c11

commit r15-6819-g664bd76a23def2d458bb3c531486b4c220f29c11
Author: Jason Merrill 
Date:   Fri Jan 10 18:00:20 2025 -0500

c++: modules and function attributes

30_threads/stop_token/stop_source/109339.cc was failing because we weren't
representing attribute access on the METHOD_TYPE for _Stop_state_ref.

The modules code expected attributes to appear on tt_variant_type and not
on tt_derived_type, but that's backwards since build_type_attribute_variant
gives a type with attributes its own TYPE_MAIN_VARIANT.

gcc/cp/ChangeLog:

* module.cc (trees_out::type_node): Write attributes for
tt_derived_type, not tt_variant_type.
(trees_in::tree_node): Likewise for reading.

gcc/testsuite/ChangeLog:

* g++.dg/modules/attrib-2_a.C: New test.
* g++.dg/modules/attrib-2_b.C: New test.

Diff:
---
 gcc/cp/module.cc  | 17 +
 gcc/testsuite/g++.dg/modules/attrib-2_a.C | 12 
 gcc/testsuite/g++.dg/modules/attrib-2_b.C |  9 +
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 321d4164a6a7..c932c4d0a90d 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -9189,7 +9189,10 @@ trees_out::type_node (tree type)
  tree_node (raises);
}
 
-  tree_node (TYPE_ATTRIBUTES (type));
+  /* build_type_attribute_variant creates a new TYPE_MAIN_VARIANT, so
+variants should all have the same set of attributes.  */
+  gcc_checking_assert (TYPE_ATTRIBUTES (type)
+  == TYPE_ATTRIBUTES (TYPE_MAIN_VARIANT (type)));
 
   if (streaming_p ())
{
@@ -9406,6 +9409,8 @@ trees_out::type_node (tree type)
   break;
 }
 
+  tree_node (TYPE_ATTRIBUTES (type));
+
   /* We may have met the type during emitting the above.  */
   if (ref_node (type) != WK_none)
 {
@@ -10090,6 +10095,13 @@ trees_in::tree_node (bool is_use)
break;
  }
 
+   /* In the exporting TU, a derived type with attributes was built by
+  build_type_attribute_variant as a distinct copy, with itself as
+  TYPE_MAIN_VARIANT.  We repeat that on import to get the version
+  without attributes as TYPE_CANONICAL.  */
+   if (tree attribs = tree_node ())
+ res = cp_build_type_attribute_variant (res, attribs);
+
int tag = i ();
if (!tag)
  {
@@ -10133,9 +10145,6 @@ trees_in::tree_node (bool is_use)
TYPE_USER_ALIGN (res) = true;
  }
 
-   if (tree attribs = tree_node ())
- res = cp_build_type_attribute_variant (res, attribs);
-
int quals = i ();
if (quals >= 0 && !get_overrun ())
  res = cp_build_qualified_type (res, quals);
diff --git a/gcc/testsuite/g++.dg/modules/attrib-2_a.C 
b/gcc/testsuite/g++.dg/modules/attrib-2_a.C
new file mode 100644
index ..96f667ceec8c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/attrib-2_a.C
@@ -0,0 +1,12 @@
+// { dg-additional-options "-fmodules -Wno-global-module" }
+// { dg-module-cmi M }
+
+export module M;
+
+export
+{
+  struct A { int i; };
+
+  __attribute ((access (none, 1)))
+  void f(const A&);
+}
diff --git a/gcc/testsuite/g++.dg/modules/attrib-2_b.C 
b/gcc/testsuite/g++.dg/modules/attrib-2_b.C
new file mode 100644
index ..c12ad117ce4f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/attrib-2_b.C
@@ -0,0 +1,9 @@
+// { dg-additional-options "-fmodules -Wmaybe-uninitialized" }
+
+import M;
+
+int main()
+{
+  A a;
+  f(a);
+}


[gcc r15-6818] c++: modules and class attributes

2025-01-10 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:fdabd93cde4aae38d6a67fe0927eca8cea1b22b9

commit r15-6818-gfdabd93cde4aae38d6a67fe0927eca8cea1b22b9
Author: Jason Merrill 
Date:   Sat Nov 23 10:00:18 2024 +0100

c++: modules and class attributes

std/time/traits/is_clock.cc was getting a warning about applying the
deprecated attribute to a variant of auto_ptr, which was wrong because it's
on the primary type.  This turned out to be because we were ignoring the
attributes on the definition of auto_ptr because the forward declaration in
unique_ptr.h has no attributes.  We need to merge attributes as usual in a
redeclaration.

gcc/cp/ChangeLog:

* module.cc (trees_in::decl_value): Merge attributes.

gcc/testsuite/ChangeLog:

* g++.dg/modules/attrib-1_a.C: New test.
* g++.dg/modules/attrib-1_b.C: New test.

Diff:
---
 gcc/cp/module.cc  |  4 
 gcc/testsuite/g++.dg/modules/attrib-1_a.C | 13 +
 gcc/testsuite/g++.dg/modules/attrib-1_b.C | 10 ++
 3 files changed, 27 insertions(+)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 4fbe522264b3..321d4164a6a7 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -8637,6 +8637,10 @@ trees_in::decl_value ()
  TYPE_STUB_DECL (type) = stub_decl ? stub_decl : inner;
  if (stub_decl)
TREE_TYPE (stub_decl) = type;
+
+ /* Handle separate declarations with different attributes.  */
+ tree &eattr = TYPE_ATTRIBUTES (TREE_TYPE (existing));
+ eattr = merge_attributes (eattr, TYPE_ATTRIBUTES (type));
}
 
   if (inner_tag)
diff --git a/gcc/testsuite/g++.dg/modules/attrib-1_a.C 
b/gcc/testsuite/g++.dg/modules/attrib-1_a.C
new file mode 100644
index ..d5f89d0c0688
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/attrib-1_a.C
@@ -0,0 +1,13 @@
+// { dg-additional-options "-fmodules -Wno-global-module" }
+// { dg-module-cmi M }
+
+module;
+
+template  struct A {
+  void f() const { }
+} __attribute__ ((deprecated ("y tho")));
+
+export module M;
+
+export template 
+A a;// { dg-warning "deprecated" }
diff --git a/gcc/testsuite/g++.dg/modules/attrib-1_b.C 
b/gcc/testsuite/g++.dg/modules/attrib-1_b.C
new file mode 100644
index ..48ac751b03d1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/attrib-1_b.C
@@ -0,0 +1,10 @@
+// { dg-additional-options -fmodules }
+
+template  struct A;
+
+import M;
+
+int main()
+{
+  a.f();
+}


[gcc r15-6778] c: Fix up expr location for __builtin_stdc_rotate_* [PR118376]

2025-01-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:76b7f60ffdb26c56d2c71bbeddf7da601de0e50e

commit r15-6778-g76b7f60ffdb26c56d2c71bbeddf7da601de0e50e
Author: Jakub Jelinek 
Date:   Fri Jan 10 15:07:41 2025 +0100

c: Fix up expr location for __builtin_stdc_rotate_* [PR118376]

Seems I forgot to set_c_expr_source_range for the __builtin_stdc_rotate_*
case (the other __builtin_stdc_* cases already have it), which means
the locations in expr are uninitialized, sometimes causing ICEs in linemap
code, at other times just valgrind errors about uninitialized var uses.

2025-01-10  Jakub Jelinek  

PR c/118376
* c-parser.cc (c_parser_postfix_expression): Call
set_c_expr_source_range before break in the __builtin_stdc_rotate_*
case.

* gcc.dg/pr118376.c: New test.

Diff:
---
 gcc/c/c-parser.cc   |  1 +
 gcc/testsuite/gcc.dg/pr118376.c | 11 +++
 2 files changed, 12 insertions(+)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index c46aac5f0a2b..d2f45912cc43 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -12906,6 +12906,7 @@ c_parser_postfix_expression (c_parser *parser)
  expr.value = build2_loc (loc, COMPOUND_EXPR,
   TREE_TYPE (expr.value),
   instrument_expr, expr.value);
+   set_c_expr_source_range (&expr, loc, close_paren_loc);
break;
  }
tree barg1 = arg;
diff --git a/gcc/testsuite/gcc.dg/pr118376.c b/gcc/testsuite/gcc.dg/pr118376.c
new file mode 100644
index ..16b2f54549b4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr118376.c
@@ -0,0 +1,11 @@
+/* PR c/118376 */
+/* { dg-do compile } */
+/* { dg-options "-Wsign-conversion" } */
+
+unsigned x;
+
+void
+foo ()
+{
+  __builtin_memset (&x, (long long) __builtin_stdc_rotate_right (x, 0), 1);
+} /* { dg-warning "conversion to 'int' from 'long long int' may change the 
sign of the result" "" { target *-*-* } .-1 } */


[gcc r12-10893] tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield

2025-01-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:5d6afc601443fa5c03680fb7f39b7dc1f36766a8

commit r12-10893-g5d6afc601443fa5c03680fb7f39b7dc1f36766a8
Author: Richard Biener 
Date:   Tue Jun 25 16:13:02 2024 +0200

tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield

The following makes analysis and transform agree on constraints.

PR tree-optimization/115646
* tree-call-cdce.cc (check_pow): Check for bit_sz values
as allowed by transform.

* gcc.dg/pr115646.c: New testcase.

(cherry picked from commit 453b1d291d1a0f89087ad91cf6b1bed1ec68eff3)

Diff:
---
 gcc/testsuite/gcc.dg/pr115646.c | 14 ++
 gcc/tree-call-cdce.cc   |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr115646.c b/gcc/testsuite/gcc.dg/pr115646.c
new file mode 100644
index ..7938a309513f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115646.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target int32plus } */
+
+extern double pow(double x, double y);
+
+struct S {
+unsigned int a : 3, b : 8, c : 21;
+};
+
+void foo (struct S *p)
+{
+  pow (p->c, 42);
+}
diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc
index 83991fe373e2..918781298357 100644
--- a/gcc/tree-call-cdce.cc
+++ b/gcc/tree-call-cdce.cc
@@ -260,7 +260,7 @@ check_pow (gcall *pow_call)
   /* If the type of the base is too wide,
  the resulting shrink wrapping condition
 will be too conservative.  */
-  if (bit_sz > MAX_BASE_INT_BIT_SIZE)
+  if (bit_sz != 8 && bit_sz != 16 && bit_sz != MAX_BASE_INT_BIT_SIZE)
 return false;
 
   return true;


[gcc r15-6777] rtl: Remove invalid compare simplification [PR117186]

2025-01-10 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:06c4cf398947b53b4bfc65752f9f879bb2d07924

commit r15-6777-g06c4cf398947b53b4bfc65752f9f879bb2d07924
Author: Richard Sandiford 
Date:   Fri Jan 10 12:51:15 2025 +

rtl: Remove invalid compare simplification [PR117186]

g:d882fe5150fbbeb4e44d007bb4964e5b22373021, posted at
https://gcc.gnu.org/pipermail/gcc-patches/2000-July/033786.html ,
added code to treat:

  (set (reg:CC cc) (compare:CC (gt:M (reg:CC cc) 0) (lt:M (reg:CC cc) 0)))

as a nop.  This PR shows that that isn't always correct.
The compare in the set above is between two 0/1 booleans (at least
on STORE_FLAG_VALUE==1 targets), whereas the unknown comparison that
produced the incoming (reg:CC cc) is unconstrained; it could be between
arbitrary integers, or even floats.  The fold is therefore replacing a
cc that is valid for both signed and unsigned comparisons with one that
is only known to be valid for signed comparisons.

  (gt (compare (gt cc 0) (lt cc 0) 0)

does simplify to:

  (gt cc 0)

but:

  (gtu (compare (gt cc 0) (lt cc 0) 0)

does not simplify to:

  (gtu cc 0)

The optimisation didn't come with a testcase, but it was added for
i386's cmpstrsi, now cmpstrnsi.  That probably doesn't matter as much
as it once did, since it's now conditional on -minline-all-stringops.
But the patch is almost 25 years old, so whatever the original
motivation was, it seems likely that other things now rely on it.

It therefore seems better to try to preserve the optimisation on rtl
rather than get rid of it.  To do that, we need to look at how the
result of the outer compare is used.  We'd therefore be looking at four
instructions (the gt, the lt, the compare, and the use of the compare),
but combine already allows that for 3-instruction combinations thanks
to:

  /* If the source is a COMPARE, look for the use of the comparison result
 and try to simplify it unless we already have used undobuf.other_insn. 
 */

When applied to boolean inputs, a comparison operator is
effectively a boolean logical operator (AND, ANDNOT, XOR, etc.).
simplify_logical_relational_operation already had code to simplify
logical operators between two comparison results, but:

* It only handled IOR, which doesn't cover all the cases needed here.
  The others are easily added.

* It treated comparisons of integers as having an ORDERED/UNORDERED result.
  Therefore:

  * it would not treat "true for LT + EQ + GT" as "always true" for
comparisons between integers, because the mask excluded the UNORDERED
condition.

  * it would try to convert "true for LT + GT" into LTGT even for 
comparisons
between integers.  To prevent an ICE later, the code used:

   /* Many comparison codes are only valid for certain mode classes.  */
   if (!comparison_code_valid_for_mode (code, mode))
 return 0;

However, this used the wrong mode, since "mode" is here the integer
result of the comparisons (and the mode of the IOR), not the mode of
the things being compared.  Thus the effect was to reject all
floating-point-only codes, even when comparing floats.

  I think instead the code should detect whether the comparison is between
  integer values and remove UNORDERED from consideration if so.  It then
  always produces a valid comparison (or an always true/false result),
  and so comparison_code_valid_for_mode is not needed.  In particular,
  "true for LT + GT" becomes NE for comparisons between integers but
  remains LTGT for comparisons between floats.

* There was a missing check for whether the comparison inputs had
  side effects.

While there, it also seemed worth extending
simplify_logical_relational_operation to unsigned comparisons, since
that makes the testing easier.

As far as that testing goes: the patch exhaustively tests all
combinations of integer comparisons in:

  (cmp1 (cmp2 X Y) (cmp3 X Y))

for the 10 integer comparisons, giving 1000 fold attempts in total.
It then tries all combinations of (X in {-1,0,1} x Y in {-1,0,1})
on the result of the fold, giving 9 checks per fold, or 9000 in total.
That's probably more than is typical for self-tests, but it seems to
complete in neglible time, even for -O0 builds.

gcc/
PR rtl-optimization/117186
* rtl.h (simplify_context::simplify_logical_relational_operation): 
Add
an invert0_p parameter.
* simplify-rtx.cc (unsigned_comparison_to_mask): New function.
(mask_to_unsigned_comparison): Likewise.
(comparison_code_valid_for_mode): Delete.
(simplify_context::simplify_logical_relational_operation): Add
  

[gcc r15-6772] testsuite: generalized field-merge tests for <32-bit int [PR118025]

2025-01-10 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:d3c91b0439f67a6dc20ebb3bee4eeaf436eb7190

commit r15-6772-gd3c91b0439f67a6dc20ebb3bee4eeaf436eb7190
Author: Alexandre Oliva 
Date:   Fri Jan 10 09:32:27 2025 -0300

testsuite: generalized field-merge tests for <32-bit int [PR118025]

Explicitly convert constants to the desired types, so as to not elicit
warnings about implicit truncations, nor execution errors, on targets
whose ints are narrower than 32 bits.


for  gcc/testsuite/ChangeLog

PR testsuite/118025
* gcc.dg/field-merge-1.c: Convert constants to desired types.
* gcc.dg/field-merge-3.c: Likewise.
* gcc.dg/field-merge-4.c: Likewise.
* gcc.dg/field-merge-5.c: Likewise.
* gcc.dg/field-merge-11.c: Likewise.
* gcc.dg/field-merge-17.c: Don't mess with padding bits.

Diff:
---
 gcc/testsuite/gcc.dg/field-merge-1.c  |  4 ++--
 gcc/testsuite/gcc.dg/field-merge-11.c | 10 +++---
 gcc/testsuite/gcc.dg/field-merge-17.c |  4 +++-
 gcc/testsuite/gcc.dg/field-merge-3.c  |  4 ++--
 gcc/testsuite/gcc.dg/field-merge-4.c  |  6 +++---
 gcc/testsuite/gcc.dg/field-merge-5.c  |  6 +++---
 6 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c 
b/gcc/testsuite/gcc.dg/field-merge-1.c
index 4405d40ee79d..4e7f6ae9332a 100644
--- a/gcc/testsuite/gcc.dg/field-merge-1.c
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -25,8 +25,8 @@ struct TB {
   unsigned char s;
 } __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
 
-#define vc 0xaa
-#define vi 0x12345678
+#define vc (unsigned char)0xaa
+#define vi (unsigned int)0x12345678
 
 struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
 struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
diff --git a/gcc/testsuite/gcc.dg/field-merge-11.c 
b/gcc/testsuite/gcc.dg/field-merge-11.c
index fe627cddd7fd..9e606e3bef16 100644
--- a/gcc/testsuite/gcc.dg/field-merge-11.c
+++ b/gcc/testsuite/gcc.dg/field-merge-11.c
@@ -10,7 +10,11 @@ struct s {
   int c;
 } __attribute__ ((aligned (4)));
 
-struct s p = { 42, (short)(0xef1 - 0x1000), 0x12345678 };
+struct s p = {
+  (short)(unsigned short)42,
+  (short)(unsigned short)(0xef1 - 0x1000),
+  (int)(unsigned int)0x12345678
+};
 
 void f (void) {
   if (0
@@ -19,9 +23,9 @@ void f (void) {
   || (int)(signed char)p.b != (int)(signed char)(0xef1 - 0x1000)
   || (unsigned)(unsigned char)p.b != (unsigned)(unsigned char)(0xef1 - 
0x1000)
   || (unsigned)p.b != (unsigned short)(0xef1 - 0x1000)
-  || (int)(short)p.b != (int)(0xef1 - 0x1000)
+  || (int)(short)p.b != (int)(short)(unsigned short)(0xef1 - 0x1000)
   || (long)(unsigned char)(p.c >> 8) != (long)(unsigned char)0x123456
-  || p.c != 0x12345678
+  || p.c != (int)(unsigned int)0x12345678
   )
 __builtin_abort ();
 }
diff --git a/gcc/testsuite/gcc.dg/field-merge-17.c 
b/gcc/testsuite/gcc.dg/field-merge-17.c
index a42658ac5c51..35ead9540606 100644
--- a/gcc/testsuite/gcc.dg/field-merge-17.c
+++ b/gcc/testsuite/gcc.dg/field-merge-17.c
@@ -3,6 +3,8 @@
 
 /* Check that we can optimize misaligned double-words.  */
 
+#include 
+
 struct s {
   short a;
   long long b;
@@ -33,7 +35,7 @@ int main () {
   if (fp () > 0)
 __builtin_abort ();
   unsigned char *pc = (unsigned char *)&p;
-  for (int i = 0; i < sizeof (p); i++)
+  for (int i = 0; i < offsetof (struct s, e) + sizeof (p.e); i++)
 {
   pc[i] = 1;
   if (fp () < 0)
diff --git a/gcc/testsuite/gcc.dg/field-merge-3.c 
b/gcc/testsuite/gcc.dg/field-merge-3.c
index a9fe404fa426..e9af4915ad8c 100644
--- a/gcc/testsuite/gcc.dg/field-merge-3.c
+++ b/gcc/testsuite/gcc.dg/field-merge-3.c
@@ -15,8 +15,8 @@ struct T2 {
   unsigned int z;
 } __attribute__((__aligned__(8)));
 
-#define vc 0xaa
-#define vi 0x12345678
+#define vc (unsigned char)0xaa
+#define vi (unsigned int)0x12345678
 
 struct T1 v1 = { { vc + !BIG_ENDIAN_P, vc + BIG_ENDIAN_P }, vc, vi };
 struct T2 v2 = { (vc << 8) | (vc - 1), vc, vi };
diff --git a/gcc/testsuite/gcc.dg/field-merge-4.c 
b/gcc/testsuite/gcc.dg/field-merge-4.c
index c629069e52b2..7c63123a282d 100644
--- a/gcc/testsuite/gcc.dg/field-merge-4.c
+++ b/gcc/testsuite/gcc.dg/field-merge-4.c
@@ -18,9 +18,9 @@ struct T2 {
   unsigned int z;
 } __attribute__((__packed__, __aligned__(4)));
 
-#define vc 0xaa
-#define vs 0xccdd
-#define vi 0x12345678
+#define vc (unsigned char)0xaa
+#define vs (unsigned short)0xccdd
+#define vi (unsigned int)0x12345678
 
 struct T1 v1 = { -1, vc, 1, vs, vi };
 struct T2 v2 = { -1, 0, vc, 1, vs, vi };
diff --git a/gcc/testsuite/gcc.dg/field-merge-5.c 
b/gcc/testsuite/gcc.dg/field-merge-5.c
index 1580b14bcc93..1b5d1a8cb16e 100644
--- a/gcc/testsuite/gcc.dg/field-merge-5.c
+++ b/gcc/testsuite/gcc.dg/field-merge-5.c
@@ -18,9 +18,9 @@ struct T2 {
   unsigned int z;
 } __attribute__((__packed__, __aligned__(8)));
 
-#define vc 0xaa
-#define vs 0xccdd
-#define vi 0x12345678
+#define vc (unsigned char)0xaa
+#defin

[gcc r15-6775] [ifcombine] fix mask variable test to match use [PR118344]

2025-01-10 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:fd4e979d0c66567c2cb89f97b51abd35a8773d88

commit r15-6775-gfd4e979d0c66567c2cb89f97b51abd35a8773d88
Author: Alexandre Oliva 
Date:   Fri Jan 10 09:32:43 2025 -0300

[ifcombine] fix mask variable test to match use [PR118344]

There was a cut&pasto in the rr_and_mask's adjustment to match the
combined type: the test on whether there was a mask already was
testing the wrong variable, and then it might crash or otherwise fail
accessing an undefined mask.  This only hit with checking enabled,
and rarely at that.


for  gcc/ChangeLog

PR tree-optimization/118344
* gimple-fold.cc (fold_truth_andor_for_ifcombine): Fix typo in
rr_and_mask's type adjustment test.

for  gcc/testsuite/ChangeLog

PR tree-optimization/118344
* gcc.dg/field-merge-19.c: New.

Diff:
---
 gcc/gimple-fold.cc|  2 +-
 gcc/testsuite/gcc.dg/field-merge-19.c | 41 +++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 0ad92de3a218..20b5024d861d 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8644,7 +8644,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
  xlr_bitpos);
   else
lr_mask = wi::shifted_mask (xlr_bitpos, lr_bitsize, false, rnprec);
-  if (rl_and_mask.get_precision ())
+  if (rr_and_mask.get_precision ())
rr_mask = wi::lshift (wide_int::from (rr_and_mask, rnprec, UNSIGNED),
  xrr_bitpos);
   else
diff --git a/gcc/testsuite/gcc.dg/field-merge-19.c 
b/gcc/testsuite/gcc.dg/field-merge-19.c
new file mode 100644
index ..5622baa52b0a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-19.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fchecking" } */
+
+/* PR tree-optimization/118344 */
+
+/* This used to ICE attempting to extend a mask variable after testing the
+   wrong mask variable.  */
+
+int d, e, g, h, i, c, j;
+static short k;
+char o;
+static int *p;
+static long *a;
+int b[0];
+int q(int s, int t, int *u, int *v) {
+  for (int f = 0; f < s; f++)
+if ((t & v[f]) != u[f])
+  return 0;
+  return 1;
+}
+int w(int s, int t) {
+  int l[] = {t, t, t, t}, m[] = {e, e, 3, 1};
+  int n = q(s, d, l, m);
+  return n;
+}
+int x(unsigned s) {
+  unsigned r;
+  if (s >= -1)
+return 1;
+  r = 1000;
+  while (s > 1 / r)
+r /= 2;
+  return g ? 2 : 0;
+}
+void y() {
+  for (;;) {
+b[w(8, *p)] = h;
+for (; a + k; j = o)
+  i &= c = x(6) < 0;
+  }
+}


[gcc r15-6770] ifcombine field-merge: improve handling of dwords

2025-01-10 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:38401c58f4aae31fd29a16607e9018cb1f66c3ed

commit r15-6770-g38401c58f4aae31fd29a16607e9018cb1f66c3ed
Author: Alexandre Oliva 
Date:   Fri Jan 10 09:32:05 2025 -0300

ifcombine field-merge: improve handling of dwords

On 32-bit hosts, data types with 64-bit alignment aren't getting
treated as desired by ifcombine field-merging: we limit the choice of
modes at BITS_PER_WORD sizes, but when deciding the boundary for a
split, we'd limit the choice only by the alignment, so we wouldn't
even consider a split at an odd 32-bit boundary.  Fix that by limiting
the boundary choice by word choice as well.

Now, this would still leave misaligned 64-bit fields in 64-bit-aligned
data structures unhandled by ifcombine on 32-bit hosts.  We already
need to loading them as double words, and if they're not byte-aligned,
the code gets really ugly, but ifcombine could improve it if it allows
double-word loads as a last resort.  I've added that.


for  gcc/ChangeLog

* gimple-fold.cc (fold_truth_andor_for_ifcombine): Limit
boundary choice by word size as well.  Try aligned double-word
loads as a last resort.

for  gcc/testsuite/ChangeLog

* gcc.dg/field-merge-17.c: New.

Diff:
---
 gcc/gimple-fold.cc| 30 ---
 gcc/testsuite/gcc.dg/field-merge-17.c | 46 +++
 2 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 0402c7666b66..c8a726e0ae3f 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8381,16 +8381,40 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
 {
   /* Consider the possibility of recombining loads if any of the
 fields straddles across an alignment boundary, so that either
-part can be loaded along with the other field.  */
+part can be loaded along with the other field.  Since we
+limit access modes to BITS_PER_WORD, don't exceed that,
+otherwise on a 32-bit host and a 64-bit-aligned data
+structure, we'll fail the above for a field that straddles
+across two words, and would fail here for not even trying to
+split it at between 32-bit words.  */
   HOST_WIDE_INT boundary = compute_split_boundary_from_align
-   (ll_align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+   (MIN (ll_align, BITS_PER_WORD),
+ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
 
   if (boundary < 0
  || !get_best_mode (boundary - first_bit, first_bit, 0, ll_end_region,
 ll_align, BITS_PER_WORD, volatilep, &lnmode)
  || !get_best_mode (end_bit - boundary, boundary, 0, ll_end_region,
 ll_align, BITS_PER_WORD, volatilep, &lnmode2))
-   return 0;
+   {
+ if (ll_align <= BITS_PER_WORD)
+   return 0;
+
+ /* As a last resort, try double-word access modes.  This
+enables us to deal with misaligned double-word fields
+that straddle across 3 separate words.  */
+ boundary = compute_split_boundary_from_align
+   (MIN (ll_align, 2 * BITS_PER_WORD),
+ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+ if (boundary < 0
+ || !get_best_mode (boundary - first_bit, first_bit,
+0, ll_end_region, ll_align, 2 * BITS_PER_WORD,
+volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary,
+0, ll_end_region, ll_align, 2 * BITS_PER_WORD,
+volatilep, &lnmode2))
+   return 0;
+   }
 
   /* If we can't have a single load, but can with two, figure out whether
 the two compares can be separated, i.e., whether the entirety of the
diff --git a/gcc/testsuite/gcc.dg/field-merge-17.c 
b/gcc/testsuite/gcc.dg/field-merge-17.c
new file mode 100644
index ..06c8ec16e86c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-17.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ifcombine-details" } */
+
+/* Check that we can optimize misaligned double-words.  */
+
+struct s {
+  short a;
+  long long b;
+  int c;
+  long long d;
+  short e;
+} __attribute__ ((packed, aligned (8)));
+
+struct s p = { 0, 0, 0, 0, 0 };
+
+__attribute__ ((__noinline__, __noipa__, __noclone__))
+int fp ()
+{
+  if (p.a
+  || p.b
+  || p.c
+  || p.d
+  || p.e)
+return 1;
+  else
+return -1;
+}
+
+int main () {
+  /* Unlikely, but play safe.  */
+  if (sizeof (long long) == sizeof (short))
+return 0;
+  if (fp () > 0)
+__builtin_abort ();
+  unsigned char *pc = (unsigned char *)&p;
+  for (int i = 0; i < sizeof (p); i++)
+{
+  pc[i] = 1;
+  if (fp () < 0)
+   __builtin_abort ();
+  pc

[gcc r15-6771] testsuite: generalize ifcombine field-merge tests [PR118025]

2025-01-10 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:261ffe685f3865ea61599d61d6b32b92e476a342

commit r15-6771-g261ffe685f3865ea61599d61d6b32b92e476a342
Author: Alexandre Oliva 
Date:   Fri Jan 10 09:32:20 2025 -0300

testsuite: generalize ifcombine field-merge tests [PR118025]

A number of tests that check for specific ifcombine transformations
fail on AVR and PRU targets, whose type sizes and alignments aren't
conducive of the expected transformations.  Adjust the expectations.

Most execution tests should run successfully regardless of the
transformations, but a few that could conceivably fail if short and
char have the same bit width now check for that and bypass the tests
that would fail.

Conversely, one test that had such a runtime test, but that would work
regardless, no longer has that runtime test, and its types are
narrowed so that the transformations on 32-bit targets are more likely
to be the same as those that used to take place on 64-bit targets.
This latter change is somewhat obviated by a separate patch, but I've
left it in place anyway.


for  gcc/testsuite/ChangeLog

PR testsuite/118025
* gcc.dg/field-merge-1.c: Skip BIT_FIELD_REF counting on AVR and 
PRU.
* gcc.dg/field-merge-3.c: Bypass the test if short doesn't have the
expected size.
* gcc.dg/field-merge-8.c: Likewise.
* gcc.dg/field-merge-9.c: Likewise.  Skip optimization counting on
AVR and PRU.
* gcc.dg/field-merge-13.c: Skip optimization counting on AVR and 
PRU.
* gcc.dg/field-merge-15.c: Likewise.
* gcc.dg/field-merge-17.c: Likewise.
* gcc.dg/field-merge-16.c: Likewise.  Drop runtime bypass.  Use
smaller types.
* gcc.dg/field-merge-14.c: Add comments.

Diff:
---
 gcc/testsuite/gcc.dg/field-merge-1.c  |  2 +-
 gcc/testsuite/gcc.dg/field-merge-13.c |  2 +-
 gcc/testsuite/gcc.dg/field-merge-14.c |  3 ++-
 gcc/testsuite/gcc.dg/field-merge-15.c |  2 +-
 gcc/testsuite/gcc.dg/field-merge-16.c | 17 +++--
 gcc/testsuite/gcc.dg/field-merge-17.c |  2 +-
 gcc/testsuite/gcc.dg/field-merge-3.c  |  2 ++
 gcc/testsuite/gcc.dg/field-merge-8.c  |  2 ++
 gcc/testsuite/gcc.dg/field-merge-9.c  |  4 +++-
 9 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c 
b/gcc/testsuite/gcc.dg/field-merge-1.c
index 1818e104437e..4405d40ee79d 100644
--- a/gcc/testsuite/gcc.dg/field-merge-1.c
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -58,7 +58,7 @@ int main () {
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" { target { 
! { avr-*-* pru-*-* } } } } } */
 /* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } 
} } } */
 /* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || 
x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || 
rs6000-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/field-merge-13.c 
b/gcc/testsuite/gcc.dg/field-merge-13.c
index 7e4f4c499347..eeef73338f8e 100644
--- a/gcc/testsuite/gcc.dg/field-merge-13.c
+++ b/gcc/testsuite/gcc.dg/field-merge-13.c
@@ -90,4 +90,4 @@ int main () {
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "optimizing" 9 "ifcombine" } } */
+/* { dg-final { scan-tree-dump-times "optimizing" 9 "ifcombine" { target { ! { 
avr-*-* pru-*-* } } } } } */
diff --git a/gcc/testsuite/gcc.dg/field-merge-14.c 
b/gcc/testsuite/gcc.dg/field-merge-14.c
index 91d84cfebf19..73259e0936e4 100644
--- a/gcc/testsuite/gcc.dg/field-merge-14.c
+++ b/gcc/testsuite/gcc.dg/field-merge-14.c
@@ -1,7 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O -fdump-tree-ifcombine-details" } */
 
-/* Check that we don't get confused by multiple conversions.  */
+/* Check that we don't get confused by multiple conversions.  Conceivably, we
+   could combine both tests using b, but the current logic won't do that.  */
 
 __attribute__((noipa))
 int f(int *a,int *d)
diff --git a/gcc/testsuite/gcc.dg/field-merge-15.c 
b/gcc/testsuite/gcc.dg/field-merge-15.c
index 34641e893c92..fc3846452716 100644
--- a/gcc/testsuite/gcc.dg/field-merge-15.c
+++ b/gcc/testsuite/gcc.dg/field-merge-15.c
@@ -33,4 +33,4 @@ int main () {
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "optimizing" 6 "ifcombine" } } */
+/* { dg-final { scan-tree-dump-times "optimizing" 6 "ifcombine" { target { ! { 
avr-*-* pru-*-* } } } } } */
diff --git a/gcc/testsuite/gcc.dg/field-merge-16.c 
b/gcc/testsuite/gcc.dg/field-merge-16.c
index 2ca23ea663a4..afdaf45b6a94 100644
--- a/gcc/testsuite/gcc.dg/field-merge-16.c
+++ b/gcc/testsuite/gcc.dg/field-merge-16.c
@@ -4,17 +4,17 @@
 /* Check that tests for sign-extension bits are handled correctly.  */
 
 struct s {
-  short a;
-  short b;
-  unsigned short c;
-  unsigned short d;
-} __attribu

[gcc r15-6773] [ifcombine] adjust for narrowing converts before shifts [PR118206]

2025-01-10 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:c96a6c2c264776d8138c6b303d005e74f047cfa0

commit r15-6773-gc96a6c2c264776d8138c6b303d005e74f047cfa0
Author: Alexandre Oliva 
Date:   Fri Jan 10 09:32:33 2025 -0300

[ifcombine] adjust for narrowing converts before shifts [PR118206]

A narrowing conversion and a shift both drop bits from the loaded
value, but we need to take into account which one comes first to get
the right number of bits and mask.

Fold when applying masks to parts, comparing the parts, and combining
the results, in the odd chance either mask happens to be zero.


for  gcc/ChangeLog

PR tree-optimization/118206
* gimple-fold.cc (decode_field_reference): Account for upper
bits dropped by narrowing conversions whether before or after
a right shift.
(fold_truth_andor_for_ifcombine): Fold masks, compares, and
combined results.

for  gcc/testsuite/ChangeLog

PR tree-optimization/118206
* gcc.dg/field-merge-18.c: New.

Diff:
---
 gcc/gimple-fold.cc| 39 -
 gcc/testsuite/gcc.dg/field-merge-18.c | 46 +++
 2 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index c8a726e0ae3f..d95f04213ee4 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7547,6 +7547,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   int shiftrt = 0;
   tree res_ops[2];
   machine_mode mode;
+  bool convert_before_shift = false;
 
   *load = NULL;
   *psignbit = false;
@@ -7651,6 +7652,12 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   if (*load)
loc[3] = gimple_location (*load);
   exp = res_ops[0];
+  /* This looks backwards, but we're going back the def chain, so if we
+find the conversion here, after finding a shift, that's because the
+convert appears before the shift, and we should thus adjust the bit
+pos and size because of the shift after adjusting it due to type
+conversion.  */
+  convert_before_shift = true;
 }
 
   /* Identify the load, if there is one.  */
@@ -7693,6 +7700,15 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   *pvolatilep = volatilep;
 
   /* Adjust shifts...  */
+  if (convert_before_shift
+  && outer_type && *pbitsize > TYPE_PRECISION (outer_type))
+{
+  HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type);
+  if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+   *pbitpos += excess;
+  *pbitsize -= excess;
+}
+
   if (shiftrt)
 {
   if (!*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -7701,7 +7717,8 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
 }
 
   /* ... and bit position.  */
-  if (outer_type && *pbitsize > TYPE_PRECISION (outer_type))
+  if (!convert_before_shift
+  && outer_type && *pbitsize > TYPE_PRECISION (outer_type))
 {
   HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type);
   if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -8377,6 +8394,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region,
 ll_align, BITS_PER_WORD, volatilep, &lnmode))
 l_split_load = false;
+  /* ??? If ll and rl share the same load, reuse that?
+ See PR 118206 -> gcc.dg/field-merge-18.c  */
   else
 {
   /* Consider the possibility of recombining loads if any of the
@@ -8757,11 +8776,11 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
   /* Apply masks.  */
   for (int j = 0; j < 2; j++)
if (mask[j] != wi::mask (0, true, mask[j].get_precision ()))
- op[j] = build2_loc (locs[j][2], BIT_AND_EXPR, type,
- op[j], wide_int_to_tree (type, mask[j]));
+ op[j] = fold_build2_loc (locs[j][2], BIT_AND_EXPR, type,
+  op[j], wide_int_to_tree (type, mask[j]));
 
-  cmp[i] = build2_loc (i ? rloc : lloc, wanted_code, truth_type,
-  op[0], op[1]);
+  cmp[i] = fold_build2_loc (i ? rloc : lloc, wanted_code, truth_type,
+   op[0], op[1]);
 }
 
   /* Reorder the compares if needed.  */
@@ -8773,7 +8792,15 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
   if (parts == 1)
 result = cmp[0];
   else if (!separatep || !maybe_separate)
-result = build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]);
+{
+  /* Only fold if any of the cmp is known, otherwise we may lose the
+sequence point, and that may prevent further optimizations.  */
+  if (TREE_CODE (cmp[0]) == INTEGER_CST
+ || TREE_CODE (cmp[1]) == INTEGER_CST)
+   result = fold_build2_loc (rloc, orig_code, truth_type, cm

[gcc r15-6774] [ifcombine] reuse left-hand mask to decode right-hand xor operand

2025-01-10 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:740c84975ceb7426da656dc7115445872a9e5b6f

commit r15-6774-g740c84975ceb7426da656dc7115445872a9e5b6f
Author: Alexandre Oliva 
Date:   Fri Jan 10 09:32:38 2025 -0300

[ifcombine] reuse left-hand mask to decode right-hand xor operand

If fold_truth_andor_for_ifcombine applies a mask to an xor, say
because the result of the xor is compared with a power of two [minus
one], we have to apply the same mask when processing both the left-
and right-hand xor paths for the transformation to be sound.  Arrange
for decode_field_reference to propagate the incoming mask along with
the expression to the right-hand operand.

Don't require the right-hand xor operand to be a constant, that was a
cut&pasto.


for  gcc/ChangeLog

* gimple-fold.cc (decode_field_reference): Add xor_pand_mask.
Propagate pand_mask to the right-hand xor operand.  Don't
require the right-hand xor operand to be a constant.
(fold_truth_andor_for_ifcombine): Pass right-hand mask when
appropriate.

Diff:
---
 gcc/gimple-fold.cc | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index d95f04213ee4..0ad92de3a218 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7519,8 +7519,9 @@ gimple_binop_def_p (enum tree_code code, tree t, tree 
op[2])
 
*XOR_P is to be FALSE if EXP might be a XOR used in a compare, in which
case, if XOR_CMP_OP is a zero constant, it will be overridden with *PEXP,
-   *XOR_P will be set to TRUE, and the left-hand operand of the XOR will be
-   decoded.  If *XOR_P is TRUE, XOR_CMP_OP is supposed to be NULL, and then the
+   *XOR_P will be set to TRUE, *XOR_PAND_MASK will be copied from *PAND_MASK,
+   and the left-hand operand of the XOR will be decoded.  If *XOR_P is TRUE,
+   XOR_CMP_OP and XOR_PAND_MASK are supposed to be NULL, and then the
right-hand operand of the XOR will be decoded.
 
*LOAD is set to the load stmt of the innermost reference, if any,
@@ -7537,7 +7538,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
HOST_WIDE_INT *pbitpos,
bool *punsignedp, bool *preversep, bool *pvolatilep,
wide_int *pand_mask, bool *psignbit,
-   bool *xor_p, tree *xor_cmp_op,
+   bool *xor_p, tree *xor_cmp_op, wide_int *xor_pand_mask,
gimple **load, location_t loc[4])
 {
   tree exp = *pexp;
@@ -7599,15 +7600,14 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
 and_mask = *pand_mask;
 
   /* Turn (a ^ b) [!]= 0 into a [!]= b.  */
-  if (xor_p && gimple_binop_def_p (BIT_XOR_EXPR, exp, res_ops)
-  && uniform_integer_cst_p (res_ops[1]))
+  if (xor_p && gimple_binop_def_p (BIT_XOR_EXPR, exp, res_ops))
 {
   /* No location recorded for this one, it's entirely subsumed by the
 compare.  */
   if (*xor_p)
{
  exp = res_ops[1];
- gcc_checking_assert (!xor_cmp_op);
+ gcc_checking_assert (!xor_cmp_op && !xor_pand_mask);
}
   else if (!xor_cmp_op)
/* Not much we can do when xor appears in the right-hand compare
@@ -7618,6 +7618,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
  *xor_p = true;
  exp = res_ops[0];
  *xor_cmp_op = *pexp;
+ *xor_pand_mask = *pand_mask;
}
 }
 
@@ -8152,19 +8153,21 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
   bool l_xor = false, r_xor = false;
   ll_inner = decode_field_reference (&ll_arg, &ll_bitsize, &ll_bitpos,
 &ll_unsignedp, &ll_reversep, &volatilep,
-&ll_and_mask, &ll_signbit, &l_xor, &lr_arg,
+&ll_and_mask, &ll_signbit,
+&l_xor, &lr_arg, &lr_and_mask,
 &ll_load, ll_loc);
   lr_inner = decode_field_reference (&lr_arg, &lr_bitsize, &lr_bitpos,
 &lr_unsignedp, &lr_reversep, &volatilep,
-&lr_and_mask, &lr_signbit, &l_xor, 0,
+&lr_and_mask, &lr_signbit, &l_xor, 0, 0,
 &lr_load, lr_loc);
   rl_inner = decode_field_reference (&rl_arg, &rl_bitsize, &rl_bitpos,
 &rl_unsignedp, &rl_reversep, &volatilep,
-&rl_and_mask, &rl_signbit, &r_xor, &rr_arg,
+&rl_and_mask, &rl_signbit,
+&r_xor, &rr_arg, &rr_and_mask,
 &rl_load, rl_loc);
   rr_inner = decode_field_reference (&rr_arg, &rr_bitsize, &rr_bitpos,
 &rr_unsignedp, &rr_r

[gcc r15-6776] [ifcombine] drop other misuses of uniform_integer_cst_p

2025-01-10 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:47ac6ca9cb08d915532c59a3895497523a6edb58

commit r15-6776-g47ac6ca9cb08d915532c59a3895497523a6edb58
Author: Alexandre Oliva 
Date:   Fri Jan 10 09:32:47 2025 -0300

[ifcombine] drop other misuses of uniform_integer_cst_p

As Jakub pointed out in PR118206, the use of uniform_integer_cst_p in
ifcombine makes no sense, we're not dealing with vectors.  Indeed,
I've been misunderstanding and misusing it since I cut&pasted it from
some preexisting match predicate in earlier version of the ifcombine
field-merge patch.


for  gcc/ChangeLog

* gimple-fold.cc (decode_field_reference): Drop misuses of
uniform_integer_cst_p.
(fold_truth_andor_for_ifcombine): Likewise.

Diff:
---
 gcc/gimple-fold.cc | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 20b5024d861d..a3987c4590ae 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -7577,7 +7577,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
   /* Recognize and save a masking operation.  Combine it with an
  incoming mask.  */
   if (pand_mask && gimple_binop_def_p (BIT_AND_EXPR, exp, res_ops)
-  && uniform_integer_cst_p (res_ops[1]))
+  && TREE_CODE (res_ops[1]) == INTEGER_CST)
 {
   loc[1] = gimple_location (SSA_NAME_DEF_STMT (exp));
   exp = res_ops[0];
@@ -7632,7 +7632,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
*pbitsize,
 
   /* Take note of shifts.  */
   if (gimple_binop_def_p (RSHIFT_EXPR, exp, res_ops)
-  && uniform_integer_cst_p (res_ops[1]))
+  && TREE_CODE (res_ops[1]) == INTEGER_CST)
 {
   loc[2] = gimple_location (SSA_NAME_DEF_STMT (exp));
   exp = res_ops[0];
@@ -8092,7 +8092,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   else if ((lcode == LT_EXPR || lcode == GE_EXPR)
   && INTEGRAL_TYPE_P (TREE_TYPE (ll_arg))
   && TYPE_UNSIGNED (TREE_TYPE (ll_arg))
-  && uniform_integer_cst_p (lr_arg)
+  && TREE_CODE (lr_arg) == INTEGER_CST
   && wi::popcount (wi::to_wide (lr_arg)) == 1)
 {
   ll_and_mask = ~(wi::to_wide (lr_arg) - 1);
@@ -8104,7 +8104,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   else if ((lcode == LE_EXPR || lcode == GT_EXPR)
   && INTEGRAL_TYPE_P (TREE_TYPE (ll_arg))
   && TYPE_UNSIGNED (TREE_TYPE (ll_arg))
-  && uniform_integer_cst_p (lr_arg)
+  && TREE_CODE (lr_arg) == INTEGER_CST
   && wi::popcount (wi::to_wide (lr_arg) + 1) == 1)
 {
   ll_and_mask = ~wi::to_wide (lr_arg);
@@ -8123,7 +8123,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   else if ((rcode == LT_EXPR || rcode == GE_EXPR)
   && INTEGRAL_TYPE_P (TREE_TYPE (rl_arg))
   && TYPE_UNSIGNED (TREE_TYPE (rl_arg))
-  && uniform_integer_cst_p (rr_arg)
+  && TREE_CODE (rr_arg) == INTEGER_CST
   && wi::popcount (wi::to_wide (rr_arg)) == 1)
 {
   rl_and_mask = ~(wi::to_wide (rr_arg) - 1);
@@ -8133,7 +8133,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   else if ((rcode == LE_EXPR || rcode == GT_EXPR)
   && INTEGRAL_TYPE_P (TREE_TYPE (rl_arg))
   && TYPE_UNSIGNED (TREE_TYPE (rl_arg))
-  && uniform_integer_cst_p (rr_arg)
+  && TREE_CODE (rr_arg) == INTEGER_CST
   && wi::popcount (wi::to_wide (rr_arg) + 1) == 1)
 {
   rl_and_mask = ~wi::to_wide (rr_arg);
@@ -8392,7 +8392,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   HOST_WIDE_INT ll_align = TYPE_ALIGN (TREE_TYPE (ll_inner));
   poly_uint64 ll_end_region = 0;
   if (TYPE_SIZE (TREE_TYPE (ll_inner))
-  && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (ll_inner
+  && tree_fits_poly_uint64_p (TYPE_SIZE (TREE_TYPE (ll_inner
 ll_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (ll_inner)));
   if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region,
 ll_align, BITS_PER_WORD, volatilep, &lnmode))
@@ -8585,7 +8585,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   HOST_WIDE_INT lr_align = TYPE_ALIGN (TREE_TYPE (lr_inner));
   poly_uint64 lr_end_region = 0;
   if (TYPE_SIZE (TREE_TYPE (lr_inner))
- && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (lr_inner
+ && tree_fits_poly_uint64_p (TYPE_SIZE (TREE_TYPE (lr_inner
lr_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (lr_inner)));
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, lr_end_region,
  lr_align, BITS_PER_WORD, volatilep, &rnmode))


[gcc r15-6793] rtl-optimization/117467 - limit ext-dce memory use

2025-01-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:03faac507913803de76eab04fd74e754c70aa8c4

commit r15-6793-g03faac507913803de76eab04fd74e754c70aa8c4
Author: Richard Biener 
Date:   Fri Jan 10 12:30:29 2025 +0100

rtl-optimization/117467 - limit ext-dce memory use

The following puts in a hard limit on ext-dce because it might end
up requiring memory on the order of the number of basic blocks
times the number of pseudo registers.  The limiting follows what
GCSE based passes do and thus I re-use --param max-gcse-memory here.

This doesn't in any way address the implementation issues of the pass,
but it reduces the memory-use when compiling the
module_first_rk_step_part1.F90 TU from 521.wrf_r from 25GB to 1GB.

PR rtl-optimization/117467
PR rtl-optimization/117934
* ext-dce.cc (ext_dce_execute): Do nothing if a memory
allocation estimate exceeds what is allowed by
--param max-gcse-memory.

Diff:
---
 gcc/ext-dce.cc | 16 
 1 file changed, 16 insertions(+)

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index 6cf641873494..e257e3bc873a 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "df.h"
 #include "print-rtl.h"
 #include "dbgcnt.h"
+#include "diagnostic-core.h"
 
 /* These should probably move into a C++ class.  */
 static vec livein;
@@ -1110,6 +,21 @@ static bool ext_dce_rd_confluence_n (edge) { return 
true; }
 void
 ext_dce_execute (void)
 {
+  /* Limit the amount of memory we use for livein, with 4 bits per
+ reg per basic-block including overhead that maps to one byte
+ per reg per basic-block.  */
+  uint64_t memory_request
+= (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
+  if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
+{
+  warning (OPT_Wdisabled_optimization,
+  "ext-dce disabled: %d basic blocks and %d registers; "
+  "increase %<--param max-gcse-memory%> above %wu",
+  n_basic_blocks_for_fn (cfun), max_reg_num (),
+  memory_request / 1024);
+  return;
+}
+
   /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
  to this pass.  Clear it for those cases.  */
   maybe_clear_subreg_promoted_p ();


[gcc r15-6794] Fix bootstrap on !HARDREG_PRE_REGNOS targets

2025-01-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:553411851d9d20510979eb4daa6675d01a79aea4

commit r15-6794-g553411851d9d20510979eb4daa6675d01a79aea4
Author: Richard Biener 
Date:   Fri Jan 10 15:40:36 2025 +0100

Fix bootstrap on !HARDREG_PRE_REGNOS targets

Pushed as obvious.

* gcse.cc (pass_hardreg_pre::gate): Wrap possibly unused
fun argument.

Diff:
---
 gcc/gcse.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/gcse.cc b/gcc/gcse.cc
index 3f3f7fe15b0c..4ae19f28430e 100644
--- a/gcc/gcse.cc
+++ b/gcc/gcse.cc
@@ -4351,7 +4351,7 @@ public:
 }; // class pass_rtl_pre
 
 bool
-pass_hardreg_pre::gate (function *fun)
+pass_hardreg_pre::gate (function * ARG_UNUSED (fun))
 {
 #ifdef HARDREG_PRE_REGNOS
   return optimize > 0


[gcc r12-10895] tree-optimization/116057 - wrong code with CCP and vector CTORs

2025-01-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:c8b549857d968d634a74709112e5acc9f9caf35c

commit r12-10895-gc8b549857d968d634a74709112e5acc9f9caf35c
Author: Richard Biener 
Date:   Wed Jul 24 13:16:35 2024 +0200

tree-optimization/116057 - wrong code with CCP and vector CTORs

The following fixes an issue with CCPs likely_value when faced with
a vector CTOR containing undef SSA names and constants.  This should
be classified as CONSTANT and not UNDEFINED.

PR tree-optimization/116057
* tree-ssa-ccp.cc (likely_value): Also walk CTORs in stmt
operands to look for constants.

* gcc.dg/torture/pr116057.c: New testcase.

(cherry picked from commit 1ea551514b9c285d801ac5ab8d78b22483ff65af)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116057.c | 20 
 gcc/tree-ssa-ccp.cc | 11 +++
 2 files changed, 31 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116057.c 
b/gcc/testsuite/gcc.dg/torture/pr116057.c
new file mode 100644
index ..a7021c8e746e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116057.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-additional-options "-Wno-psabi" } */
+
+#define vect8 __attribute__((vector_size(8)))
+
+vect8 int __attribute__((noipa))
+f(int a)
+{
+  int b;
+  vect8 int t={1,1};
+  if(a) return t;
+  return (vect8 int){0, b};
+}
+
+int main ()
+{
+  if (f(0)[0] != 0)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index 3c63f2dd8a3b..629cb3c2d82d 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -750,6 +750,17 @@ likely_value (gimple *stmt)
continue;
   if (is_gimple_min_invariant (op))
has_constant_operand = true;
+  else if (TREE_CODE (op) == CONSTRUCTOR)
+   {
+ unsigned j;
+ tree val;
+ FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (op), j, val)
+   if (CONSTANT_CLASS_P (val))
+ {
+   has_constant_operand = true;
+   break;
+ }
+   }
 }
 
   if (has_constant_operand)


[gcc r12-10894] tree-optimization/115669 - fix SLP reduction association

2025-01-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:882f7e53a7664f2c76b10dc020e720ba9f55f022

commit r12-10894-g882f7e53a7664f2c76b10dc020e720ba9f55f022
Author: Richard Biener 
Date:   Thu Jun 27 11:26:08 2024 +0200

tree-optimization/115669 - fix SLP reduction association

The following avoids associating a reduction path as that might
get STMT_VINFO_REDUC_IDX out-of-sync with the SLP operand order.
This is a latent issue with SLP reductions but now easily exposed
as we're doing single-lane SLP reductions.

When we achieved SLP only we can move and update this meta-data.

PR tree-optimization/115669
* tree-vect-slp.cc (vect_build_slp_tree_2): Do not reassociate
chains that participate in a reduction.

* gcc.dg/vect/pr115669.c: New testcase.

(cherry picked from commit 7886830bb45c4f5dca0496d4deae9a45204d78f5)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115669.c | 22 ++
 gcc/tree-vect-slp.cc |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115669.c 
b/gcc/testsuite/gcc.dg/vect/pr115669.c
new file mode 100644
index ..361a17a64e68
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115669.c
@@ -0,0 +1,22 @@
+/* { dg-additional-options "-fwrapv" } */
+
+#include "tree-vect.h"
+
+int a = 10;
+unsigned b;
+long long c[100];
+int foo()
+{
+  long long *d = c;
+  for (short e = 0; e < a; e++)
+b += ~(d ? d[e] : 0);
+  return b;
+}
+
+int main()
+{
+  check_vect ();
+  if (foo () != -10)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 19cab93761c9..0462fa01020d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1825,6 +1825,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   else if (is_a  (vinfo)
   /* ???  We don't handle !vect_internal_def defs below.  */
   && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
+  /* ???  Do not associate a reduction, this will wreck REDUC_IDX
+ mapping as long as that exists on the stmt_info level.  */
+  && STMT_VINFO_REDUC_IDX (stmt_info) == -1
   && is_gimple_assign (stmt_info->stmt)
   && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt))
   || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)


[gcc r15-6795] arm: [MVE intrinsics] Fix tuples field name (PR 118332)

2025-01-10 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:288ac095b4df1a47a4cf9ba2efdc56a568a6e951

commit r15-6795-g288ac095b4df1a47a4cf9ba2efdc56a568a6e951
Author: Christophe Lyon 
Date:   Wed Jan 8 18:51:27 2025 +

arm: [MVE intrinsics] Fix tuples field name (PR 118332)

The previous fix only worked for C, for C++ we need to add more
information to the underlying type so that
finish_class_member_access_expr accepts it.

We use the same logic as in aarch64's register_tuple_type for AdvSIMD
tuples.

This patch makes gcc.target/arm/mve/intrinsics/pr118332.c pass in C++
mode.

gcc/ChangeLog:

PR target/118332
* config/arm/arm-mve-builtins.cc (wrap_type_in_struct): Delete.
(register_type_decl): Delete.
(register_builtin_tuple_types): Use
lang_hooks.types.simulate_record_decl.

Diff:
---
 gcc/config/arm/arm-mve-builtins.cc | 52 ++
 1 file changed, 8 insertions(+), 44 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins.cc 
b/gcc/config/arm/arm-mve-builtins.cc
index 4c52415f3f1b..42b53cc05e77 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -463,47 +463,6 @@ register_vector_type (vector_type_index type)
   acle_vector_types[0][type] = vectype;
 }
 
-/* Return a structure type that contains a single field of type FIELD_TYPE.
-   The field is called 'val', as mandated by ACLE.  */
-static tree
-wrap_type_in_struct (tree field_type)
-{
-  tree field = build_decl (input_location, FIELD_DECL,
-  get_identifier ("val"), field_type);
-  tree struct_type = lang_hooks.types.make_type (RECORD_TYPE);
-  DECL_FIELD_CONTEXT (field) = struct_type;
-  TYPE_FIELDS (struct_type) = field;
-  layout_type (struct_type);
-  return struct_type;
-}
-
-/* Register a built-in TYPE_DECL called NAME for TYPE.  This is used/needed
-   when TYPE is a structure type.  */
-static void
-register_type_decl (tree type, const char *name)
-{
-  tree decl = build_decl (input_location, TYPE_DECL,
- get_identifier (name), type);
-  TYPE_NAME (type) = decl;
-  TYPE_STUB_DECL (type) = decl;
-  lang_hooks.decls.pushdecl (decl);
-  /* ??? Undo the effect of set_underlying_type for C.  The C frontend
- doesn't recognize DECL as a built-in because (as intended) the decl has
- a real location instead of BUILTINS_LOCATION.  The frontend therefore
- treats the decl like a normal C "typedef struct foo foo;", expecting
- the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead
- of the named one we attached above.  It then sets DECL_ORIGINAL_TYPE
- on the supposedly unnamed decl, creating a circularity that upsets
- dwarf2out.
-
- We don't want to follow the normal C model and create "struct foo"
- tags for tuple types since (a) the types are supposed to be opaque
- and (b) they couldn't be defined as a real struct anyway.  Treating
- the TYPE_DECLs as "typedef struct foo foo;" without creating
- "struct foo" would lead to confusing error messages.  */
-  DECL_ORIGINAL_TYPE (decl) = NULL_TREE;
-}
-
 /* Register tuple types of element type TYPE under their arm_mve_types.h
names.  */
 static void
@@ -538,13 +497,18 @@ register_builtin_tuple_types (vector_type_index type)
  && TYPE_MODE_RAW (arrtype) == TYPE_MODE (arrtype)
  && TYPE_ALIGN (arrtype) == 64);
 
-  tree tuple_type = wrap_type_in_struct (arrtype);
+  /* Build a structure type that contains a single field of type ARRTYPE.
+The field is called 'val', as mandated by ACLE.  */
+  tree field = build_decl (input_location, FIELD_DECL,
+  get_identifier ("val"), arrtype);
+  tree tuple_type
+   = lang_hooks.types.simulate_record_decl (input_location,
+buffer,
+make_array_slice (&field, 1));
   gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type))
  && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type)
  && TYPE_ALIGN (tuple_type) == 64);
 
-  register_type_decl (tuple_type, buffer);
-
   acle_vector_types[num_vectors >> 1][type] = tuple_type;
 }
 }


[gcc r15-6790] docs: Document new hardreg PRE pass

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:016e2f00d40d76676f38fb9d268ac550e5ec878a

commit r15-6790-g016e2f00d40d76676f38fb9d268ac550e5ec878a
Author: Andrew Carlotti 
Date:   Wed Dec 18 15:59:24 2024 +

docs: Document new hardreg PRE pass

gcc/ChangeLog:

* doc/passes.texi: Document hardreg PRE pass.

Diff:
---
 gcc/doc/passes.texi | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi
index 59a143292c78..282fc1a6a12b 100644
--- a/gcc/doc/passes.texi
+++ b/gcc/doc/passes.texi
@@ -959,6 +959,12 @@ global constant and  copy propagation.
 The source file for this pass is @file{gcse.cc}, and the LCM routines
 are in @file{lcm.cc}.
 
+A third version of this pass is run on some targets to optimise assignments to
+specific hard registers.  This can be used in cases where a register has a
+single purpose, such as specifying a mode as an extra input for specific
+instructions (@pxref{mode switching optimization} for another way of handling
+instruction modes).
+
 @item Loop optimization
 
 This pass performs several loop related optimizations.
@@ -1018,6 +1024,7 @@ combination approaches as well.
 The pass runs twice, once before register allocation and once after
 register allocation.  The code is located in @file{late-combine.cc}.
 
+@anchor{mode switching optimization}
 @item Mode switching optimization
 
 This pass looks for instructions that require the processor to be in a


[gcc r15-6789] Add new hardreg PRE pass

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:e7f98d9603808b1c17106d3d9f2000bc34f2c50c

commit r15-6789-ge7f98d9603808b1c17106d3d9f2000bc34f2c50c
Author: Andrew Carlotti 
Date:   Tue Oct 15 17:31:28 2024 +0100

Add new hardreg PRE pass

This pass is used to optimise assignments to the FPMR register in
aarch64.  I chose to implement this as a middle-end pass because it
mostly reuses the existing RTL PRE code within gcse.cc.

Compared to RTL PRE, the key difference in this new pass is that we
insert new writes directly to the destination hardreg, instead of
writing to a new pseudo-register and copying the result later.  This
requires changes to the analysis portion of the pass, because sets
cannot be moved before existing instructions that set, use or clobber
the hardreg, and the value becomes unavailable after any uses of
clobbers of the hardreg.

Any uses of the hardreg in debug insns will be deleted.  We could do
better than this, but for the aarch64 fpmr I don't think we emit useful
debuginfo for deleted fp8 instructions anyway (and I don't even know if
it's possible to have a debug fpmr use when entering hardreg PRE).

gcc/ChangeLog:

* config/aarch64/aarch64.h (HARDREG_PRE_REGNOS): New macro.
* gcse.cc (doing_hardreg_pre_p): New global variable.
(do_load_motion): New boolean check.
(current_hardreg_regno): New global variable.
(compute_local_properties): Unset transp for hardreg clobbers.
(prune_hardreg_uses): New function.
(want_to_gcse_p): Use different checks for hardreg PRE.
(oprs_unchanged_p): Disable load motion for hardreg PRE pass.
(hash_scan_set): For hardreg PRE, skip non-hardreg sets and
check for hardreg clobbers.
(record_last_mem_set_info): Skip for hardreg PRE.
(compute_pre_data): Prune hardreg uses from transp bitmap.
(pre_expr_reaches_here_p_work): Add sentence to comment.
(insert_insn_start_basic_block): New functions.
(pre_edge_insert): Don't add hardreg sets to predecessor block.
(pre_delete): Use hardreg for the reaching reg.
(reset_hardreg_debug_uses): New function.
(pre_gcse): For hardreg PRE, reset debug uses and don't insert
copies.
(one_pre_gcse_pass): Disable load motion for hardreg PRE.
(execute_hardreg_pre): New.
(class pass_hardreg_pre): New.
(pass_hardreg_pre::gate): New.
(make_pass_hardreg_pre): New.
* passes.def (pass_hardreg_pre): New pass.
* tree-pass.h (make_pass_hardreg_pre): New.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/acle/fpmr-1.c: New test.
* gcc.target/aarch64/acle/fpmr-2.c: New test.
* gcc.target/aarch64/acle/fpmr-3.c: New test.
* gcc.target/aarch64/acle/fpmr-4.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.h   |   4 +
 gcc/gcse.cc| 339 ++---
 gcc/passes.def |   1 +
 gcc/testsuite/gcc.target/aarch64/acle/fpmr-1.c |  58 +
 gcc/testsuite/gcc.target/aarch64/acle/fpmr-2.c |  15 ++
 gcc/testsuite/gcc.target/aarch64/acle/fpmr-3.c |  18 ++
 gcc/testsuite/gcc.target/aarch64/acle/fpmr-4.c |  23 ++
 gcc/tree-pass.h|   1 +
 8 files changed, 427 insertions(+), 32 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 3f3a475eb01d..1ab49e229b08 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1652,6 +1652,10 @@ enum class aarch64_tristate_mode : int { NO, YES, MAYBE 
};
   { int (aarch64_tristate_mode::MAYBE), \
 int (aarch64_local_sme_state::ANY) }
 
+/* Zero terminated list of regnos for which hardreg PRE should be
+   applied.  */
+#define HARDREG_PRE_REGNOS { FPM_REGNUM, 0 }
+
 #endif
 
 #endif /* GCC_AARCH64_H */
diff --git a/gcc/gcse.cc b/gcc/gcse.cc
index 839cac3ddab3..3f3f7fe15b0c 100644
--- a/gcc/gcse.cc
+++ b/gcc/gcse.cc
@@ -415,6 +415,17 @@ static int gcse_create_count;
 
 /* Doing code hoisting.  */
 static bool doing_code_hoisting_p = false;
+
+/* Doing hardreg_pre.  */
+static bool doing_hardreg_pre_p = false;
+
+inline bool
+do_load_motion ()
+{
+  return flag_gcse_lm && !doing_hardreg_pre_p;
+}
+
+static unsigned int current_hardreg_regno;
 
 /* For available exprs */
 static sbitmap *ae_kill;
@@ -689,14 +700,32 @@ compute_local_properties (sbitmap *transp, sbitmap *comp, 
sbitmap *antloc,
  int indx = expr->bitmap_index;
  struct gcse_occr *occr;
 
- /* The expression is transparent in this block if it is not killed.
-We start by assuming all are transparent [none are killed], and
-then reset the bits for those that are.  */
+ /* In most cases, the expre

[gcc r15-6791] s390: Add expander for uaddc/usubc optabs

2025-01-10 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:8a2d5bc28089b2660310b964ef75fb05eb387f88

commit r15-6791-g8a2d5bc28089b2660310b964ef75fb05eb387f88
Author: Stefan Schulze Frielinghaus 
Date:   Fri Jan 10 15:14:08 2025 +0100

s390: Add expander for uaddc/usubc optabs

gcc/ChangeLog:

* config/s390/s390-protos.h (s390_emit_compare): Add mode
parameter for the resulting RTX.
* config/s390/s390.cc (s390_emit_compare): Dito.
(s390_emit_compare_and_swap): Change.
(s390_expand_vec_strlen): Change.
(s390_expand_cs_hqi): Change.
(s390_expand_split_stack_prologue): Change.
* config/s390/s390.md (*add3_carry1_cc): Renamed to ...
(add3_carry1_cc): this and in order to use the
corresponding gen function, encode CC mode into pattern.
(*sub3_borrow_cc): Renamed to ...
(sub3_borrow_cc): this and in order to use the
corresponding gen function, encode CC mode into pattern.
(*add3_alc_carry1_cc): Renamed to ...
(add3_alc_carry1_cc): this and in order to use the
corresponding gen function, encode CC mode into pattern.
(sub3_slb_borrow1_cc): New.
(uaddc5): New.
(usubc5): New.

gcc/testsuite/ChangeLog:

* gcc.target/s390/uaddc-1.c: New test.
* gcc.target/s390/uaddc-2.c: New test.
* gcc.target/s390/uaddc-3.c: New test.
* gcc.target/s390/usubc-1.c: New test.
* gcc.target/s390/usubc-2.c: New test.
* gcc.target/s390/usubc-3.c: New test.

Diff:
---
 gcc/config/s390/s390-protos.h   |   2 +-
 gcc/config/s390/s390.cc |  20 ++--
 gcc/config/s390/s390.md | 115 ++-
 gcc/testsuite/gcc.target/s390/uaddc-1.c | 156 
 gcc/testsuite/gcc.target/s390/uaddc-2.c |  25 +
 gcc/testsuite/gcc.target/s390/uaddc-3.c |  27 ++
 gcc/testsuite/gcc.target/s390/usubc-1.c | 156 
 gcc/testsuite/gcc.target/s390/usubc-2.c |  25 +
 gcc/testsuite/gcc.target/s390/usubc-3.c |  29 ++
 9 files changed, 519 insertions(+), 36 deletions(-)

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 00848008dcf0..e8c7f8308496 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -86,7 +86,7 @@ extern int tls_symbolic_operand (rtx);
 extern bool s390_match_ccmode (rtx_insn *, machine_mode);
 extern machine_mode s390_tm_ccmode (rtx, rtx, bool);
 extern machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx);
-extern rtx s390_emit_compare (enum rtx_code, rtx, rtx);
+extern rtx s390_emit_compare (machine_mode, enum rtx_code, rtx, rtx);
 extern rtx_insn *s390_emit_jump (rtx, rtx);
 extern bool symbolic_reference_mentioned_p (rtx);
 extern bool tls_symbolic_reference_mentioned_p (rtx);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 08acb69de3e8..a98e067bb06f 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -2029,9 +2029,9 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx 
*op1,
the IF_THEN_ELSE of the conditional branch testing the result.  */
 
 rtx
-s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
+s390_emit_compare (machine_mode mode, enum rtx_code code, rtx op0, rtx op1)
 {
-  machine_mode mode = s390_select_ccmode (code, op0, op1);
+  machine_mode cc_mode = s390_select_ccmode (code, op0, op1);
   rtx cc;
 
   /* Force OP1 into register in order to satisfy VXE TFmode patterns.  */
@@ -2043,17 +2043,17 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
   /* Do not output a redundant compare instruction if a
 compare_and_swap pattern already computed the result and the
 machine modes are compatible.  */
-  gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
+  gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), cc_mode)
  == GET_MODE (op0));
   cc = op0;
 }
   else
 {
-  cc = gen_rtx_REG (mode, CC_REGNUM);
-  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
+  cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op0, op1)));
 }
 
-  return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
+  return gen_rtx_fmt_ee (code, mode, cc, const0_rtx);
 }
 
 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
@@ -2103,7 +2103,7 @@ s390_emit_compare_and_swap (enum rtx_code code, rtx old, 
rtx mem,
 default:
   gcc_unreachable ();
 }
-  return s390_emit_compare (code, cc, const0_rtx);
+  return s390_emit_compare (VOIDmode, code, cc, const0_rtx);
 }
 
 /* Emit a jump instruction to TARGET and return it.  If COND is
@@ -6647,7 +6647,7 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx 
alignment)
  Now we have to check whether the resulting index lies 

[gcc r15-6780] aarch64: Add new +fcma flag

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:9bbb91e8e0a3a26fe2ff651a89011ca5a0b4794d

commit r15-6780-g9bbb91e8e0a3a26fe2ff651a89011ca5a0b4794d
Author: Andrew Carlotti 
Date:   Thu Aug 1 11:54:20 2024 +0100

aarch64: Add new +fcma flag

This includes +fcma as a dependency of +sve, and means that we can
finally support fcma intrinsics on a64fx.

Also add fcma to the Features list in several cpunative testcases that
incorrectly included sve without fcma.

gcc/ChangeLog:

* config/aarch64/aarch64-arches.def (V8_3A): Add FCMA.
* config/aarch64/aarch64-option-extensions.def (FCMA): New flag.
(SVE): Add FCMA dependency.
* config/aarch64/aarch64.h (TARGET_COMPLEX): Use new flag.
* config/aarch64/arm_neon.h: Use new flag for fcma intrinsics.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/cpunative/info_15: Add fcma to Features.
* gcc.target/aarch64/cpunative/info_16: Ditto.
* gcc.target/aarch64/cpunative/info_17: Ditto.
* gcc.target/aarch64/cpunative/info_8: Ditto.
* gcc.target/aarch64/cpunative/info_9: Ditto.

Diff:
---
 gcc/config/aarch64/aarch64-arches.def  | 2 +-
 gcc/config/aarch64/aarch64-option-extensions.def   | 4 +++-
 gcc/config/aarch64/aarch64.h   | 2 +-
 gcc/config/aarch64/arm_neon.h  | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/info_15 | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/info_16 | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/info_17 | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/info_8  | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/info_9  | 2 +-
 9 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index 62a7d9fd2442..d85b14be4c3f 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -33,7 +33,7 @@
 AARCH64_ARCH("armv8-a",   generic_armv8_a,   V8A,   8,  (SIMD))
 AARCH64_ARCH("armv8.1-a", generic_armv8_a,   V8_1A, 8,  (V8A, LSE, 
CRC, RDMA))
 AARCH64_ARCH("armv8.2-a", generic_armv8_a,   V8_2A, 8,  (V8_1A))
-AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 8,  (V8_2A, PAUTH, 
RCPC))
+AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 8,  (V8_2A, PAUTH, 
RCPC, FCMA))
 AARCH64_ARCH("armv8.4-a", generic_armv8_a,   V8_4A, 8,  (V8_3A, 
F16FML, DOTPROD, FLAGM))
 AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES))
 AARCH64_ARCH("armv8.6-a", generic_armv8_a,   V8_6A, 8,  (V8_5A, I8MM, 
BF16))
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 6a70a63afd0a..c41c4998c5cb 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -151,6 +151,8 @@ AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), 
"asimdfhm")
 
 AARCH64_FMV_FEATURE("fp16fml", FP16FML, (F16FML))
 
+AARCH64_OPT_FMV_EXTENSION("fcma", FCMA, (SIMD), (), (), "fcma")
+
 AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc")
 
 AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC), (), (), "lrcpc3")
@@ -163,7 +165,7 @@ AARCH64_OPT_FMV_EXTENSION("bf16", BF16, (FP), (SIMD), (), 
"bf16")
 
 AARCH64_FMV_FEATURE("rpres", RPRES, ())
 
-AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16), (), (), "sve")
+AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16, FCMA), (), (), "sve")
 
 /* This specifically does not imply +sve.  */
 AARCH64_OPT_EXTENSION("sve-b16b16", SVE_B16B16, (), (), (), "sveb16b16")
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 75ea2a6910ee..250edb7d426d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -364,7 +364,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 #define TARGET_JSCVT   (TARGET_FLOAT && TARGET_ARMV8_3)
 
 /* Armv8.3-a Complex number extension to AdvSIMD extensions.  */
-#define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3)
+#define TARGET_COMPLEX AARCH64_HAVE_ISA (FCMA)
 
 /* Floating-point rounding instructions from Armv8.5-a.  */
 #define TARGET_FRINT (AARCH64_HAVE_ISA (V8_5A) && TARGET_FLOAT)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 20849b0b8a6d..da145adf6749 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26975,7 +26975,7 @@ vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
 /* AdvSIMD Complex numbers intrinsics.  */
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.3-a")
+#pragma GCC target ("+nothing+fcma")
 
 #pragma GCC push_options
 #pragma GCC target ("+fp16")
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 
b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
index 6b425ea20135..1a31a75d6b48 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpu

[gcc r15-6784] aarch64: Add new +rcpc2 flag

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:5747c121e9caa66a8173ad01db78769be08c407e

commit r15-6784-g5747c121e9caa66a8173ad01db78769be08c407e
Author: Andrew Carlotti 
Date:   Tue Jul 30 18:48:48 2024 +0100

aarch64: Add new +rcpc2 flag

gcc/ChangeLog:

* config/aarch64/aarch64-arches.def (V8_4A): Add RCPC2.
* config/aarch64/aarch64-option-extensions.def
(RCPC2): New flag.
(RCPC3): Add RCPC2 dependency.
* config/aarch64/aarch64.h (TARGET_RCPC2): Use new flag.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/cpunative/native_cpu_21.c: Add rcpc2 to
expected feature string instead of rcpc.
* gcc.target/aarch64/cpunative/native_cpu_22.c: Ditto.

Diff:
---
 gcc/config/aarch64/aarch64-arches.def  | 2 +-
 gcc/config/aarch64/aarch64-option-extensions.def   | 4 +++-
 gcc/config/aarch64/aarch64.h   | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c | 2 +-
 5 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index 8c2aa4e477fc..bcd08e21fde5 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -34,7 +34,7 @@ AARCH64_ARCH("armv8-a",   generic_armv8_a,   V8A,   
8,  (SIMD))
 AARCH64_ARCH("armv8.1-a", generic_armv8_a,   V8_1A, 8,  (V8A, LSE, 
CRC, RDMA))
 AARCH64_ARCH("armv8.2-a", generic_armv8_a,   V8_2A, 8,  (V8_1A))
 AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 8,  (V8_2A, PAUTH, 
RCPC, FCMA, JSCVT))
-AARCH64_ARCH("armv8.4-a", generic_armv8_a,   V8_4A, 8,  (V8_3A, 
F16FML, DOTPROD, FLAGM))
+AARCH64_ARCH("armv8.4-a", generic_armv8_a,   V8_4A, 8,  (V8_3A, 
F16FML, DOTPROD, FLAGM, RCPC2))
 AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES, FRINTTS, FLAGM2))
 AARCH64_ARCH("armv8.6-a", generic_armv8_a,   V8_6A, 8,  (V8_5A, I8MM, 
BF16))
 AARCH64_ARCH("armv8.7-a", generic_armv8_a,   V8_7A, 8,  (V8_6A))
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 00533c38839b..3558a6ed5ad7 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -159,7 +159,9 @@ AARCH64_OPT_FMV_EXTENSION("fcma", FCMA, (SIMD), (), (), 
"fcma")
 
 AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc")
 
-AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC), (), (), "lrcpc3")
+AARCH64_OPT_FMV_EXTENSION("rcpc2", RCPC2, (RCPC), (), (), "ilrcpc")
+
+AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC2), (), (), "lrcpc3")
 
 AARCH64_OPT_FMV_EXTENSION("frintts", FRINTTS, (FP), (), (), "frint")
 
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 1c8fcd5c582f..3f3a475eb01d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -427,7 +427,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 
 /* The RCPC2 extensions from Armv8.4-a that allow immediate offsets to LDAPR
and sign-extending versions.*/
-#define TARGET_RCPC2 ((AARCH64_HAVE_ISA (V8_4A) && TARGET_RCPC) || 
TARGET_RCPC3)
+#define TARGET_RCPC2 AARCH64_HAVE_ISA (RCPC2)
 
 /* RCPC3 (Release Consistency) extensions, optional from Armv8.2-a.  */
 #define TARGET_RCPC3 AARCH64_HAVE_ISA (RCPC3)
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
index c1d5896e1eb0..904cdf452263 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
@@ -7,7 +7,7 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n}
 } } */
+/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc2\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n}
 } } */
 
 /* Check that an Armv8-A core doesn't fall apart on extensions without midr
values.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
index 4533a2bf5912..feb959b11b0e 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
@@ -7,7 +7,7 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\+pauth\n}
 } } */
+/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+js

[gcc r15-6786] aarch64: Add new +xs flag

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:f06c6f8bf33e0b13d410b2305c58803a79754009

commit r15-6786-gf06c6f8bf33e0b13d410b2305c58803a79754009
Author: Andrew Carlotti 
Date:   Tue Jul 30 19:01:27 2024 +0100

aarch64: Add new +xs flag

GCC does not emit tlbi instructions, so this only affects the flags
passed through to the assembler.

gcc/ChangeLog:

* config/aarch64/aarch64-arches.def (V8_7A): Add XS.
* config/aarch64/aarch64-option-extensions.def (XS): New flag.

Diff:
---
 gcc/config/aarch64/aarch64-arches.def| 2 +-
 gcc/config/aarch64/aarch64-option-extensions.def | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index ff873a372431..fd4881a8ebfb 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -37,7 +37,7 @@ AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 
8,  (V8_2A, PAUTH, R
 AARCH64_ARCH("armv8.4-a", generic_armv8_a,   V8_4A, 8,  (V8_3A, 
F16FML, DOTPROD, FLAGM, RCPC2))
 AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES, FRINTTS, FLAGM2))
 AARCH64_ARCH("armv8.6-a", generic_armv8_a,   V8_6A, 8,  (V8_5A, I8MM, 
BF16))
-AARCH64_ARCH("armv8.7-a", generic_armv8_a,   V8_7A, 8,  (V8_6A, WFXT))
+AARCH64_ARCH("armv8.7-a", generic_armv8_a,   V8_7A, 8,  (V8_6A, WFXT, 
XS))
 AARCH64_ARCH("armv8.8-a", generic_armv8_a,   V8_8A, 8,  (V8_7A, MOPS))
 AARCH64_ARCH("armv8.9-a", generic_armv8_a,   V8_9A, 8,  (V8_8A, CSSC))
 AARCH64_ARCH("armv8-r",   generic_armv8_a,   V8R  , 8,  (V8_4A))
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index d0d01f91c0fc..a1133accfce5 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -227,6 +227,8 @@ AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "")
 
 AARCH64_OPT_FMV_EXTENSION("wfxt", WFXT, (), (), (), "wfxt")
 
+AARCH64_OPT_EXTENSION("xs", XS, (), (), (), "")
+
 AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "smef64f64")
 
 AARCH64_FMV_FEATURE("sme-f64f64", SME_F64, (SME_F64F64))


[gcc r15-6782] aarch64: Add new +frintts flag

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:32a45a216e63a205eed62f26c20ba919a77b025b

commit r15-6782-g32a45a216e63a205eed62f26c20ba919a77b025b
Author: Andrew Carlotti 
Date:   Tue Jul 30 18:36:22 2024 +0100

aarch64: Add new +frintts flag

gcc/ChangeLog:

* config/aarch64/aarch64-arches.def (V8_5A): Add FRINTTS
* config/aarch64/aarch64-option-extensions.def (FRINTTS): New flag.
* config/aarch64/aarch64.h (TARGET_FRINT): Use new flag.
* config/aarch64/arm_acle.h: Use new flag for frintts intrinsics.
* config/aarch64/arm_neon.h: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/cpunative/native_cpu_21.c: Add frintts to
expected feature string.
* gcc.target/aarch64/cpunative/native_cpu_22.c: Ditto.

Diff:
---
 gcc/config/aarch64/aarch64-arches.def  | 2 +-
 gcc/config/aarch64/aarch64-option-extensions.def   | 2 ++
 gcc/config/aarch64/aarch64.h   | 2 +-
 gcc/config/aarch64/arm_acle.h  | 2 +-
 gcc/config/aarch64/arm_neon.h  | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c | 2 +-
 7 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index a21e5de496e9..e0f6cc21d198 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -35,7 +35,7 @@ AARCH64_ARCH("armv8.1-a", generic_armv8_a,   V8_1A, 
8,  (V8A, LSE, CRC,
 AARCH64_ARCH("armv8.2-a", generic_armv8_a,   V8_2A, 8,  (V8_1A))
 AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 8,  (V8_2A, PAUTH, 
RCPC, FCMA, JSCVT))
 AARCH64_ARCH("armv8.4-a", generic_armv8_a,   V8_4A, 8,  (V8_3A, 
F16FML, DOTPROD, FLAGM))
-AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES))
+AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES, FRINTTS))
 AARCH64_ARCH("armv8.6-a", generic_armv8_a,   V8_6A, 8,  (V8_5A, I8MM, 
BF16))
 AARCH64_ARCH("armv8.7-a", generic_armv8_a,   V8_7A, 8,  (V8_6A))
 AARCH64_ARCH("armv8.8-a", generic_armv8_a,   V8_8A, 8,  (V8_7A, MOPS))
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 96518ba6..9921e51c85f9 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -159,6 +159,8 @@ AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc")
 
 AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC), (), (), "lrcpc3")
 
+AARCH64_OPT_FMV_EXTENSION("frintts", FRINTTS, (FP), (), (), "frint")
+
 AARCH64_OPT_FMV_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm")
 
 /* An explicit +bf16 implies +simd, but +bf16+nosimd still enables scalar BF16
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index f652869625a8..1c8fcd5c582f 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -367,7 +367,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 #define TARGET_COMPLEX AARCH64_HAVE_ISA (FCMA)
 
 /* Floating-point rounding instructions from Armv8.5-a.  */
-#define TARGET_FRINT (AARCH64_HAVE_ISA (V8_5A) && TARGET_FLOAT)
+#define TARGET_FRINT AARCH64_HAVE_ISA (FRINTTS)
 
 /* TME instructions are enabled.  */
 #define TARGET_TME AARCH64_HAVE_ISA (TME)
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 985a18fba678..7976c117daf7 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -130,7 +130,7 @@ __jcvt (double __a)
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.5-a")
+#pragma GCC target ("+nothing+frintts")
 __extension__ extern __inline float
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint32zf (float __a)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index da145adf6749..33594cb65d28 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -27638,7 +27638,7 @@ vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t 
__a, float16x8_t __b,
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.5-a")
+#pragma GCC target ("+nothing+simd+frintts")
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
index 603ee48d584b..aa70d1d22b82 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
@@ -7,7 +7,7 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-assembler {\.arc

[gcc r15-6785] aarch64: Add new +wfxt flag

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:4984119b75e0fb8b653dd46b5d196017c90fd6a5

commit r15-6785-g4984119b75e0fb8b653dd46b5d196017c90fd6a5
Author: Andrew Carlotti 
Date:   Tue Jul 30 18:56:01 2024 +0100

aarch64: Add new +wfxt flag

GCC does not currently emit the wfet or wfit instructions, so this
primarily affects the flags passed through to the assembler.

gcc/ChangeLog:

* config/aarch64/aarch64-arches.def (V8_7A): Add WFXT.
* config/aarch64/aarch64-option-extensions.def (WFXT): New flag.

Diff:
---
 gcc/config/aarch64/aarch64-arches.def| 2 +-
 gcc/config/aarch64/aarch64-option-extensions.def | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index bcd08e21fde5..ff873a372431 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -37,7 +37,7 @@ AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 
8,  (V8_2A, PAUTH, R
 AARCH64_ARCH("armv8.4-a", generic_armv8_a,   V8_4A, 8,  (V8_3A, 
F16FML, DOTPROD, FLAGM, RCPC2))
 AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES, FRINTTS, FLAGM2))
 AARCH64_ARCH("armv8.6-a", generic_armv8_a,   V8_6A, 8,  (V8_5A, I8MM, 
BF16))
-AARCH64_ARCH("armv8.7-a", generic_armv8_a,   V8_7A, 8,  (V8_6A))
+AARCH64_ARCH("armv8.7-a", generic_armv8_a,   V8_7A, 8,  (V8_6A, WFXT))
 AARCH64_ARCH("armv8.8-a", generic_armv8_a,   V8_8A, 8,  (V8_7A, MOPS))
 AARCH64_ARCH("armv8.9-a", generic_armv8_a,   V8_9A, 8,  (V8_8A, CSSC))
 AARCH64_ARCH("armv8-r",   generic_armv8_a,   V8R  , 8,  (V8_4A))
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 3558a6ed5ad7..d0d01f91c0fc 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -225,6 +225,8 @@ AARCH64_OPT_EXTENSION("pauth", PAUTH, (), (), (), "paca 
pacg")
 
 AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "")
 
+AARCH64_OPT_FMV_EXTENSION("wfxt", WFXT, (), (), (), "wfxt")
+
 AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "smef64f64")
 
 AARCH64_FMV_FEATURE("sme-f64f64", SME_F64, (SME_F64F64))


[gcc r15-6787] docs: Add new AArch64 flags

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:abbe2905eddcedf2c6247e6bb76136e850458d74

commit r15-6787-gabbe2905eddcedf2c6247e6bb76136e850458d74
Author: Andrew Carlotti 
Date:   Tue Nov 5 17:24:12 2024 +

docs: Add new AArch64 flags

gcc/ChangeLog:

* doc/invoke.texi: Add new AArch64 flags.

Diff:
---
 gcc/doc/invoke.texi | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 17fe2c64c1f8..dd0d2b41a1a9 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21653,11 +21653,11 @@ and the features that they enable by default:
 @item @samp{armv8-a} @tab Armv8-A @tab @samp{+fp}, @samp{+simd}
 @item @samp{armv8.1-a} @tab Armv8.1-A @tab @samp{armv8-a}, @samp{+crc}, 
@samp{+lse}, @samp{+rdma}
 @item @samp{armv8.2-a} @tab Armv8.2-A @tab @samp{armv8.1-a}
-@item @samp{armv8.3-a} @tab Armv8.3-A @tab @samp{armv8.2-a}, @samp{+pauth}
-@item @samp{armv8.4-a} @tab Armv8.4-A @tab @samp{armv8.3-a}, @samp{+flagm}, 
@samp{+fp16fml}, @samp{+dotprod}
-@item @samp{armv8.5-a} @tab Armv8.5-A @tab @samp{armv8.4-a}, @samp{+sb}, 
@samp{+ssbs}, @samp{+predres}
+@item @samp{armv8.3-a} @tab Armv8.3-A @tab @samp{armv8.2-a}, @samp{+pauth}, 
@samp{+fcma}, @samp{+jscvt}
+@item @samp{armv8.4-a} @tab Armv8.4-A @tab @samp{armv8.3-a}, @samp{+flagm}, 
@samp{+fp16fml}, @samp{+dotprod}, @samp{+rcpc2}
+@item @samp{armv8.5-a} @tab Armv8.5-A @tab @samp{armv8.4-a}, @samp{+sb}, 
@samp{+ssbs}, @samp{+predres}, @samp{+frintts}, @samp{+flagm2}
 @item @samp{armv8.6-a} @tab Armv8.6-A @tab @samp{armv8.5-a}, @samp{+bf16}, 
@samp{+i8mm}
-@item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}
+@item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+wfxt}, 
@samp{+xs}
 @item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops}
 @item @samp{armv8.9-a} @tab Armv8.9-a @tab @samp{armv8.8-a}
 @item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, 
@samp{+sve2}
@@ -21997,6 +21997,8 @@ Enable the instructions to accelerate memory operations 
like @code{memcpy},
 @option{-march=armv8.8-a}
 @item flagm
 Enable the Flag Manipulation instructions Extension.
+@item flagm2
+Enable the FlagM2 flag conversion instructions.
 @item pauth
 Enable the Pointer Authentication Extension.
 @item cssc
@@ -22020,6 +22022,16 @@ instructions.
 @item sme2p1
 Enable the Scalable Matrix Extension version 2.1.  This also enables SME2
 instructions.
+@item fcma
+Enable the complex number SIMD extensions.
+@item jscvt
+Enable the @code{fjcvtzs} JavaScript conversion instruction.
+@item frintts
+Enable floating-point round to integral value instructions.
+@item wfxt
+Enable @code{wfet} and @code{wfit} instructions.
+@item xs
+Enable the XS memory attribute extension.
 @item lse128
 Enable the LSE128 128-bit atomic instructions extension.  This also
 enables LSE instructions.
@@ -22030,6 +22042,8 @@ This also enables the LSE128 extension.
 Enable support for Armv9.4-a Guarded Control Stack extension.
 @item the
 Enable support for Armv8.9-a/9.4-a translation hardening extension.
+@item rcpc2
+Enable the RCpc2 extension.
 @item rcpc3
 Enable the RCpc3 (Release Consistency) extension.
 @item fp8


[gcc r15-6783] aarch64: Add new +flagm2 flag

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:f5915726fd14cbf76a170338d6a91b11817e808a

commit r15-6783-gf5915726fd14cbf76a170338d6a91b11817e808a
Author: Andrew Carlotti 
Date:   Tue Jul 30 18:43:51 2024 +0100

aarch64: Add new +flagm2 flag

GCC does not currently emit the axflag or xaflag instructions, so this
primarily affects the flags passed through to the assembler.

gcc/ChangeLog:

* config/aarch64/aarch64-arches.def (V8_5A): Add FLAGM2.
* config/aarch64/aarch64-option-extensions.def (FLAGM2): New flag.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/cpunative/native_cpu_21.c: Add flagm2 to
expected feature string instead of flagm.
* gcc.target/aarch64/cpunative/native_cpu_22.c: Ditto.

Diff:
---
 gcc/config/aarch64/aarch64-arches.def  | 2 +-
 gcc/config/aarch64/aarch64-option-extensions.def   | 2 ++
 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index e0f6cc21d198..8c2aa4e477fc 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -35,7 +35,7 @@ AARCH64_ARCH("armv8.1-a", generic_armv8_a,   V8_1A, 
8,  (V8A, LSE, CRC,
 AARCH64_ARCH("armv8.2-a", generic_armv8_a,   V8_2A, 8,  (V8_1A))
 AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 8,  (V8_2A, PAUTH, 
RCPC, FCMA, JSCVT))
 AARCH64_ARCH("armv8.4-a", generic_armv8_a,   V8_4A, 8,  (V8_3A, 
F16FML, DOTPROD, FLAGM))
-AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES, FRINTTS))
+AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES, FRINTTS, FLAGM2))
 AARCH64_ARCH("armv8.6-a", generic_armv8_a,   V8_6A, 8,  (V8_5A, I8MM, 
BF16))
 AARCH64_ARCH("armv8.7-a", generic_armv8_a,   V8_7A, 8,  (V8_6A))
 AARCH64_ARCH("armv8.8-a", generic_armv8_a,   V8_8A, 8,  (V8_7A, MOPS))
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 9921e51c85f9..00533c38839b 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -103,6 +103,8 @@ AARCH64_OPT_FMV_EXTENSION("rng", RNG, (), (), (), "rng")
 
 AARCH64_OPT_FMV_EXTENSION("flagm", FLAGM, (), (), (), "flagm")
 
+AARCH64_OPT_FMV_EXTENSION("flagm2", FLAGM2, (FLAGM), (), (), "flagm2")
+
 AARCH64_OPT_FMV_EXTENSION("lse", LSE, (), (), (), "atomics")
 
 AARCH64_OPT_FMV_EXTENSION("fp", FP, (), (), (), "fp")
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
index aa70d1d22b82..c1d5896e1eb0 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
@@ -7,7 +7,7 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n}
 } } */
+/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n}
 } } */
 
 /* Check that an Armv8-A core doesn't fall apart on extensions without midr
values.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
index ccd5d0d9bb7d..4533a2bf5912 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
@@ -7,7 +7,7 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\+pauth\n}
 } } */
+/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\+pauth\n}
 } } */
 
 /* Check that an Armv8-A core doesn't fall apart on extensions without midr
values and that it enables optional features.  */


[gcc r15-6779] aarch64: Use PAUTH instead of V8_3A in some places

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:20385cb92cbd4a1934661ab97a162c1e25935836

commit r15-6779-g20385cb92cbd4a1934661ab97a162c1e25935836
Author: Andrew Carlotti 
Date:   Tue Jul 30 16:26:04 2024 +0100

aarch64: Use PAUTH instead of V8_3A in some places

gcc/ChangeLog:

* config/aarch64/aarch64.cc
(aarch64_expand_epilogue): Use TARGET_PAUTH.
* config/aarch64/aarch64.md: Update comment.

Diff:
---
 gcc/config/aarch64/aarch64.cc | 6 +++---
 gcc/config/aarch64/aarch64.md | 8 
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 6fe0fa2722bd..ad31e9d255c0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -10283,12 +10283,12 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
1) Sibcalls don't return in a normal way, so if we're about to call one
   we must authenticate.
 
-   2) The RETAA instruction is not available before ARMv8.3-A, so if we are
-  generating code for !TARGET_ARMV8_3 we can't use it and must
+   2) The RETAA instruction is not available without FEAT_PAuth, so if we
+  are generating code for !TARGET_PAUTH we can't use it and must
   explicitly authenticate.
 */
   if (aarch64_return_address_signing_enabled ()
-  && (sibcall || !TARGET_ARMV8_3))
+  && (sibcall || !TARGET_PAUTH))
 {
   switch (aarch64_ra_sign_key)
{
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 0ed3c93b379e..44f5b7a54d25 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7846,11 +7846,11 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
-;; Pointer authentication patterns are always provided.  In architecture
-;; revisions prior to ARMv8.3-A these HINT instructions operate as NOPs.
+;; Pointer authentication patterns are always provided.  On targets that
+;; don't implement FEAT_PAuth these HINT instructions operate as NOPs.
 ;; This lets the user write portable software which authenticates pointers
-;; when run on something which implements ARMv8.3-A, and which runs
-;; correctly, but does not authenticate pointers, where ARMv8.3-A is not
+;; when run on something which implements FEAT_PAuth, and which runs
+;; correctly, but does not authenticate pointers, where FEAT_PAuth is not
 ;; implemented.
 
 ;; Signing/Authenticating R30 using SP as the salt.


[gcc r15-6788] Disable a broken multiversioning optimisation

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:21212f08d8258fa6d4cfdd21a35d0ee7c44ccbea

commit r15-6788-g21212f08d8258fa6d4cfdd21a35d0ee7c44ccbea
Author: Andrew Carlotti 
Date:   Tue Jan 7 18:32:23 2025 +

Disable a broken multiversioning optimisation

This patch skips redirect_to_specific clone for aarch64 and riscv,
because the optimisation has two flaws:

1. It checks the value of the "target" attribute, even on targets that
don't use this attribute for multiversioning.

2. The algorithm used is too aggressive, and will eliminate the
indirection in some cases where the runtime choice of callee version
can't be determined statically at compile time.  A correct would need to
verify that:
 - if the current caller version were selected at runtime, then the
   chosen callee version would be eligible for selection.
 - if any higher priority callee version were selected at runtime, then
   a higher priority caller version would have been eligble for
   selection (and hence the current caller version wouldn't have been
   selected).

The current checks only verify a more restrictive version of the first
condition, and don't check the second condition at all.

Fixing the optimisation properly would require implementing target hooks
to check for implications between version attributes, which is too
complicated for this stage.  However, I would like to see this hook
implemented in the future, since it could also help deduplicate other
multiversioning code.

Since this behaviour has existed for x86 and powerpc for a while, I
think it's best to preserve the existing behaviour on those targets,
unless any maintainer for those targets disagrees.

gcc/ChangeLog:

* multiple_target.cc
(redirect_to_specific_clone): Assert that "target" attribute is
used for FMV before checking it.
(ipa_target_clone): Skip redirect_to_specific_clone on some
targets.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-pragma.C: New test.

Diff:
---
 gcc/multiple_target.cc   | 15 +++---
 gcc/testsuite/g++.target/aarch64/mv-pragma.C | 31 
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
index 552b9626aa71..d8becf4d9a96 100644
--- a/gcc/multiple_target.cc
+++ b/gcc/multiple_target.cc
@@ -442,7 +442,14 @@ expand_target_clones (struct cgraph_node *node, bool 
definition)
 
 /* When NODE is a target clone, consider all callees and redirect
to a clone with equal target attributes.  That prevents multiple
-   multi-versioning dispatches and a call-chain can be optimized.  */
+   multi-versioning dispatches and a call-chain can be optimized.
+
+   This optimisation might pick the wrong version in some cases, since knowing
+   that we meet the target requirements for a matching callee version does not
+   tell us that we won't also meet the target requirements for a higher
+   priority callee version at runtime.  Since this is longstanding behaviour
+   for x86 and powerpc, we preserve it for those targets, but skip the 
optimisation
+   for targets that use the "target_version" attribute for multi-versioning.  
*/
 
 static void
 redirect_to_specific_clone (cgraph_node *node)
@@ -451,6 +458,7 @@ redirect_to_specific_clone (cgraph_node *node)
   if (fv == NULL)
 return;
 
+  gcc_assert (TARGET_HAS_FMV_TARGET_ATTRIBUTE);
   tree attr_target = lookup_attribute ("target", DECL_ATTRIBUTES (node->decl));
   if (attr_target == NULL_TREE)
 return;
@@ -503,8 +511,9 @@ ipa_target_clone (void)
   for (unsigned i = 0; i < to_dispatch.length (); i++)
 create_dispatcher_calls (to_dispatch[i]);
 
-  FOR_EACH_FUNCTION (node)
-redirect_to_specific_clone (node);
+  if (TARGET_HAS_FMV_TARGET_ATTRIBUTE)
+FOR_EACH_FUNCTION (node)
+  redirect_to_specific_clone (node);
 
   return 0;
 }
diff --git a/gcc/testsuite/g++.target/aarch64/mv-pragma.C 
b/gcc/testsuite/g++.target/aarch64/mv-pragma.C
new file mode 100644
index ..545d0735438d
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/mv-pragma.C
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+#pragma GCC target ("+sve")
+
+__attribute__((target_version("default")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target_version("sve2")))
+int foo ()
+{
+  return 2;
+}
+
+__attribute__((target_version("default")))
+int bar ()
+{
+  return foo();
+}
+
+__attribute__((target_version("sha3")))
+int bar ()
+{
+  return foo() + 5;
+}
+
+/* { dg-final { scan-assembler-times "\n\tbl\t_Z3foov\n" 2 } } */


[gcc r15-6781] aarch64: Add new +jscvt flag

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:2c8913576fa0bf234bde3c9c1f137a1b9cca95f1

commit r15-6781-g2c8913576fa0bf234bde3c9c1f137a1b9cca95f1
Author: Andrew Carlotti 
Date:   Thu Aug 1 11:54:41 2024 +0100

aarch64: Add new +jscvt flag

gcc/ChangeLog:

* config/aarch64/aarch64-arches.def (V8_3A): Add JSCVT.
* config/aarch64/aarch64-option-extensions.def (JSCVT): New flag.
* config/aarch64/aarch64.h (TARGET_JSCVT): Use new flag.
* config/aarch64/arm_acle.h: Use new flag for jscvt intrinsics.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/cpunative/native_cpu_21.c: Add jscvt to
expected feature string.
* gcc.target/aarch64/cpunative/native_cpu_22.c: Ditto.

Diff:
---
 gcc/config/aarch64/aarch64-arches.def  | 2 +-
 gcc/config/aarch64/aarch64-option-extensions.def   | 2 ++
 gcc/config/aarch64/aarch64.h   | 2 +-
 gcc/config/aarch64/arm_acle.h  | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c | 2 +-
 6 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index d85b14be4c3f..a21e5de496e9 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -33,7 +33,7 @@
 AARCH64_ARCH("armv8-a",   generic_armv8_a,   V8A,   8,  (SIMD))
 AARCH64_ARCH("armv8.1-a", generic_armv8_a,   V8_1A, 8,  (V8A, LSE, 
CRC, RDMA))
 AARCH64_ARCH("armv8.2-a", generic_armv8_a,   V8_2A, 8,  (V8_1A))
-AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 8,  (V8_2A, PAUTH, 
RCPC, FCMA))
+AARCH64_ARCH("armv8.3-a", generic_armv8_a,   V8_3A, 8,  (V8_2A, PAUTH, 
RCPC, FCMA, JSCVT))
 AARCH64_ARCH("armv8.4-a", generic_armv8_a,   V8_4A, 8,  (V8_3A, 
F16FML, DOTPROD, FLAGM))
 AARCH64_ARCH("armv8.5-a", generic_armv8_a,   V8_5A, 8,  (V8_4A, SB, 
SSBS, PREDRES))
 AARCH64_ARCH("armv8.6-a", generic_armv8_a,   V8_6A, 8,  (V8_5A, I8MM, 
BF16))
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index c41c4998c5cb..96518ba6 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -151,6 +151,8 @@ AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), 
"asimdfhm")
 
 AARCH64_FMV_FEATURE("fp16fml", FP16FML, (F16FML))
 
+AARCH64_OPT_FMV_EXTENSION("jscvt", JSCVT, (FP), (), (), "jscvt")
+
 AARCH64_OPT_FMV_EXTENSION("fcma", FCMA, (SIMD), (), (), "fcma")
 
 AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc")
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 250edb7d426d..f652869625a8 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -361,7 +361,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 #define TARGET_ARMV8_3 AARCH64_HAVE_ISA (V8_3A)
 
 /* Javascript conversion instruction from Armv8.3-a.  */
-#define TARGET_JSCVT   (TARGET_FLOAT && TARGET_ARMV8_3)
+#define TARGET_JSCVT   AARCH64_HAVE_ISA (JSCVT)
 
 /* Armv8.3-a Complex number extension to AdvSIMD extensions.  */
 #define TARGET_COMPLEX AARCH64_HAVE_ISA (FCMA)
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 5c3478d4ee82..985a18fba678 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -119,7 +119,7 @@ __revl (unsigned long __value)
 }
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.3-a")
+#pragma GCC target ("+nothing+jscvt")
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __jcvt (double __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
index 1d90e9ec9d97..603ee48d584b 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c
@@ -7,7 +7,7 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm\+lse\+dotprod\+rdma\+crc\+fp16fml\+rcpc\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n}
 } } */
+/* { dg-final { scan-assembler {\.arch 
armv8-a\+flagm\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n}
 } } */
 
 /* Check that an Armv8-A core doesn't fall apart on extensions without midr
values.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
index 17050a0b72c9..e0ba97fb6e9a 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c
@@ -7,7 +7,7 @@ int main()
   return 0;
 }
 
-/* { dg

[gcc r14-11197] Disable a broken multiversioning optimisation

2025-01-10 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:d0191d1631647436c4707ca6b915a46adf591d87

commit r14-11197-gd0191d1631647436c4707ca6b915a46adf591d87
Author: Andrew Carlotti 
Date:   Tue Jan 7 18:32:23 2025 +

Disable a broken multiversioning optimisation

This patch skips redirect_to_specific clone for aarch64 and riscv,
because the optimisation has two flaws:

1. It checks the value of the "target" attribute, even on targets that
don't use this attribute for multiversioning.

2. The algorithm used is too aggressive, and will eliminate the
indirection in some cases where the runtime choice of callee version
can't be determined statically at compile time.  A correct would need to
verify that:
 - if the current caller version were selected at runtime, then the
   chosen callee version would be eligible for selection.
 - if any higher priority callee version were selected at runtime, then
   a higher priority caller version would have been eligble for
   selection (and hence the current caller version wouldn't have been
   selected).

The current checks only verify a more restrictive version of the first
condition, and don't check the second condition at all.

Fixing the optimisation properly would require implementing target hooks
to check for implications between version attributes, which is too
complicated for this stage.  However, I would like to see this hook
implemented in the future, since it could also help deduplicate other
multiversioning code.

Since this behavior has existed for x86 and powerpc for a while, I
think it's best to preserve the existing behavior on those targets,
unless any maintainer for those targets disagrees.

gcc/ChangeLog:

* multiple_target.cc
(redirect_to_specific_clone): Assert that "target" attribute is
used for FMV before checking it.
(ipa_target_clone): Skip redirect_to_specific_clone on some
targets.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-pragma.C: New test.

Diff:
---
 gcc/multiple_target.cc   | 16 +++---
 gcc/testsuite/g++.target/aarch64/mv-pragma.C | 31 
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
index 1fdd279da04a..60958a56c92d 100644
--- a/gcc/multiple_target.cc
+++ b/gcc/multiple_target.cc
@@ -437,7 +437,15 @@ expand_target_clones (struct cgraph_node *node, bool 
definition)
 
 /* When NODE is a target clone, consider all callees and redirect
to a clone with equal target attributes.  That prevents multiple
-   multi-versioning dispatches and a call-chain can be optimized.  */
+   multi-versioning dispatches and a call-chain can be optimized.
+
+   This optimisation might pick the wrong version in some cases, since knowing
+   that we meet the target requirements for a matching callee version does not
+   tell us that we won't also meet the target requirements for a higher
+   priority callee version at runtime.  Since this is longstanding behavior
+   for x86 and powerpc, we preserve it for those targets, but skip the
+   optimisation for targets that use the "target_version" attribute for
+   multi-versioning.  */
 
 static void
 redirect_to_specific_clone (cgraph_node *node)
@@ -446,6 +454,7 @@ redirect_to_specific_clone (cgraph_node *node)
   if (fv == NULL)
 return;
 
+  gcc_assert (TARGET_HAS_FMV_TARGET_ATTRIBUTE);
   tree attr_target = lookup_attribute ("target", DECL_ATTRIBUTES (node->decl));
   if (attr_target == NULL_TREE)
 return;
@@ -498,8 +507,9 @@ ipa_target_clone (void)
   for (unsigned i = 0; i < to_dispatch.length (); i++)
 create_dispatcher_calls (to_dispatch[i]);
 
-  FOR_EACH_FUNCTION (node)
-redirect_to_specific_clone (node);
+  if (TARGET_HAS_FMV_TARGET_ATTRIBUTE)
+FOR_EACH_FUNCTION (node)
+  redirect_to_specific_clone (node);
 
   return 0;
 }
diff --git a/gcc/testsuite/g++.target/aarch64/mv-pragma.C 
b/gcc/testsuite/g++.target/aarch64/mv-pragma.C
new file mode 100644
index ..545d0735438d
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/mv-pragma.C
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+#pragma GCC target ("+sve")
+
+__attribute__((target_version("default")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target_version("sve2")))
+int foo ()
+{
+  return 2;
+}
+
+__attribute__((target_version("default")))
+int bar ()
+{
+  return foo();
+}
+
+__attribute__((target_version("sha3")))
+int bar ()
+{
+  return foo() + 5;
+}
+
+/* { dg-final { scan-assembler-times "\n\tbl\t_Z3foov\n" 2 } } */


[gcc r15-6792] c++: ICE with pack indexing and partial inst [PR117937]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:d64447946b0c8964dfd9731c3792af0fe4158cda

commit r15-6792-gd64447946b0c8964dfd9731c3792af0fe4158cda
Author: Marek Polacek 
Date:   Thu Dec 19 17:26:27 2024 -0500

c++: ICE with pack indexing and partial inst [PR117937]

Here we ICE in expand_expr_real_1:

  if (exp)
{
  tree context = decl_function_context (exp);
  gcc_assert (SCOPE_FILE_SCOPE_P (context)
  || context == current_function_decl

on something like this test:

  void
  f (auto... args)
  {
[&](seq) {
g(args...[i]...);
}(seq<0>());
  }

because while current_function_decl is:

  f(int)::)> [with long unsigned int ...i = {0}]

(correct), context is:

  f(int)::)>

which is only the partial instantiation.

I think that when tsubst_pack_index gets a partial instantiation, e.g.
{*args#0} as the pack, we should still tsubst it.  The args#0's value-expr
can be __closure->__args#0 where the closure's context is the partially
instantiated operator().  So we should let retrieve_local_specialization
find the right args#0.

PR c++/117937

gcc/cp/ChangeLog:

* pt.cc (tsubst_pack_index): tsubst the pack even when it's not
PACK_EXPANSION_P.

gcc/testsuite/ChangeLog:

* g++.dg/cpp26/pack-indexing13.C: New test.
* g++.dg/cpp26/pack-indexing14.C: New test.

Diff:
---
 gcc/cp/pt.cc |  8 
 gcc/testsuite/g++.dg/cpp26/pack-indexing13.C | 23 +++
 gcc/testsuite/g++.dg/cpp26/pack-indexing14.C | 18 ++
 3 files changed, 49 insertions(+)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 67964d41ab8b..ff0a3a4f7d81 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -14063,6 +14063,14 @@ tsubst_pack_index (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
   tree pack = PACK_INDEX_PACK (t);
   if (PACK_EXPANSION_P (pack))
 pack = tsubst_pack_expansion (pack, args, complain, in_decl);
+  else
+{
+  /* PACK can be {*args#0} whose args#0's value-expr refers to
+a partially instantiated closure.  Let tsubst find the
+fully-instantiated one.  */
+  gcc_assert (TREE_CODE (pack) == TREE_VEC);
+  pack = tsubst (pack, args, complain, in_decl);
+}
   if (TREE_CODE (pack) == TREE_VEC && TREE_VEC_LENGTH (pack) == 0)
 {
   if (complain & tf_error)
diff --git a/gcc/testsuite/g++.dg/cpp26/pack-indexing13.C 
b/gcc/testsuite/g++.dg/cpp26/pack-indexing13.C
new file mode 100644
index ..e0dd9c21c67b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp26/pack-indexing13.C
@@ -0,0 +1,23 @@
+// PR c++/117937
+// { dg-do compile { target c++26 } }
+
+using size_t = decltype(sizeof(0));
+
+template
+struct seq {};
+
+void g(auto...) {}
+
+void
+f (auto... args)
+{
+  [&](seq) {
+  g(args...[i]...);
+  }(seq<0>());
+}
+
+int
+main ()
+{
+  f(0);
+}
diff --git a/gcc/testsuite/g++.dg/cpp26/pack-indexing14.C 
b/gcc/testsuite/g++.dg/cpp26/pack-indexing14.C
new file mode 100644
index ..c8a67ee16edb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp26/pack-indexing14.C
@@ -0,0 +1,18 @@
+// PR c++/117937
+// { dg-do compile { target c++26 } }
+
+void operate_one(const int) {}
+
+template
+void operate_multi(T... args)
+{
+[&]()
+{
+   ::operate_one(args...[idx]);
+}.template operator()<0>();
+}
+
+int main()
+{
+::operate_multi(0);
+}


[gcc r15-6796] [PR118017][LRA]: Fix test for i686

2025-01-10 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:94d8de53388793f4d5fc0d0aa00fef32ca4aa870

commit r15-6796-g94d8de53388793f4d5fc0d0aa00fef32ca4aa870
Author: Vladimir N. Makarov 
Date:   Fri Jan 10 10:36:24 2025 -0500

[PR118017][LRA]: Fix test for i686

My previous patch for PR118017 contains a test which fails on i686.  The 
patch fixes this.

gcc/testsuite/ChangeLog:

PR target/118017
* gcc.target/i386/pr118017.c: Check target int128.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr118017.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr118017.c 
b/gcc/testsuite/gcc.target/i386/pr118017.c
index c82d71e8d293..28797a0ad73f 100644
--- a/gcc/testsuite/gcc.target/i386/pr118017.c
+++ b/gcc/testsuite/gcc.target/i386/pr118017.c
@@ -1,5 +1,5 @@
 /* PR target/118017 */
-/* { dg-do compile } */
+/* { dg-do compile { target int128 } } */
 /* { dg-options "-Og -frounding-math -mno-80387 -mno-mmx -Wno-psabi" } */
 
 typedef __attribute__((__vector_size__ (64))) _Float128 F;


[gcc r15-6797] Fix some memory leaks

2025-01-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:9193641d1695293006ed0b818bb4161a1b6fbed2

commit r15-6797-g9193641d1695293006ed0b818bb4161a1b6fbed2
Author: Richard Biener 
Date:   Fri Jan 10 15:17:58 2025 +0100

Fix some memory leaks

The following fixes memory leaks found compiling SPEC CPU 2017 with
valgrind.

* df-core.cc (rest_of_handle_df_finish): Release dflow for
problems without free function (like LR).
* gimple-crc-optimization.cc 
(crc_optimization::loop_may_calculate_crc):
Release loop_bbs on all exits.
* tree-vectorizer.h (supportable_indirect_convert_operation): 
Change.
* tree-vect-generic.cc (expand_vector_conversion): Adjust.
* tree-vect-stmts.cc (vectorizable_conversion): Use auto_vec for
converts.
(supportable_indirect_convert_operation): Get a reference to
the output vector of converts.

Diff:
---
 gcc/df-core.cc |  2 ++
 gcc/gimple-crc-optimization.cc |  6 +-
 gcc/tree-vect-generic.cc   |  2 +-
 gcc/tree-vect-stmts.cc | 12 ++--
 gcc/tree-vectorizer.h  |  2 +-
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/gcc/df-core.cc b/gcc/df-core.cc
index a7011decf0bb..abfc0e63d352 100644
--- a/gcc/df-core.cc
+++ b/gcc/df-core.cc
@@ -808,6 +808,8 @@ rest_of_handle_df_finish (void)
   struct dataflow *dflow = df->problems_in_order[i];
   if (dflow->problem->free_fun)
dflow->problem->free_fun ();
+  else
+   free (dflow);
 }
 
   free (df->postorder);
diff --git a/gcc/gimple-crc-optimization.cc b/gcc/gimple-crc-optimization.cc
index 0e1f2a99d72b..a98cbe6752b5 100644
--- a/gcc/gimple-crc-optimization.cc
+++ b/gcc/gimple-crc-optimization.cc
@@ -947,6 +947,7 @@ crc_optimization::loop_may_calculate_crc (class loop *loop)
fprintf (dump_file,
 "The number of conditional "
 "branches in the loop isn't 2.\n");
+  free (loop_bbs);
   return false;
 }
 
@@ -977,8 +978,11 @@ crc_optimization::loop_may_calculate_crc (class loop *loop)
  return true;
}
 
-   if (++checked_xor_count == 2)
+ if (++checked_xor_count == 2)
+   {
+ free (loop_bbs);
  return false;
+   }
}
}
 }
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index fa5e9a54dbf9..c2f7a29d539b 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1757,7 +1757,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
   auto_vec > converts;
   if (supportable_indirect_convert_operation (code,
  ret_type, arg_type,
- &converts,
+ converts,
  arg))
 {
   new_rhs = arg;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index c0e38d00246d..f5b3608f6b13 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5583,7 +5583,7 @@ vectorizable_conversion (vec_info *vinfo,
   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
   opt_scalar_mode rhs_mode_iter;
-  vec > converts = vNULL;
+  auto_vec > converts;
 
   /* Supportable by target?  */
   switch (modifier)
@@ -5597,7 +5597,7 @@ vectorizable_conversion (vec_info *vinfo,
   if (supportable_indirect_convert_operation (code,
  vectype_out,
  vectype_in,
- &converts,
+ converts,
  op0))
{
  gcc_assert (converts.length () <= 2);
@@ -15170,7 +15170,7 @@ bool
 supportable_indirect_convert_operation (code_helper code,
tree vectype_out,
tree vectype_in,
-   vec > 
*converts,
+   vec > 
&converts,
tree op0)
 {
   bool found_mode = false;
@@ -15187,7 +15187,7 @@ supportable_indirect_convert_operation (code_helper 
code,
 vectype_in,
 &tc1))
 {
-  converts->safe_push (std::make_pair (vectype_out, tc1));
+  converts.safe_push (std::make_pair (vectype_out, tc1));
   return true;
 }
 
@@ -15278,9 +15278,9 @@ supportable_indirect_convert_operation (code_helper 
code,
 
   if (found_mode)
{
- converts->safe_push (std::make_pair (cvt_type, tc2));
+ converts.safe_push (std::make_pair (cvt_type, tc2));
  if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out))
-   converts->s

[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Correction régression coarray dummy_3

2025-01-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:8eb0b9eb3d8c7a538117b7054e7c8f7d1ea8e9a5

commit 8eb0b9eb3d8c7a538117b7054e7c8f7d1ea8e9a5
Author: Mikael Morin 
Date:   Fri Jan 10 14:09:37 2025 +0100

Correction régression coarray dummy_3

Diff:
---
 gcc/fortran/trans-expr.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 57a976df58ff..14f92e7575dc 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -832,6 +832,9 @@ gfc_get_vptr_from_expr (tree expr)
 int
 gfc_descriptor_rank (tree descriptor)
 {
+  if (TREE_TYPE (descriptor) != NULL_TREE)
+return GFC_TYPE_ARRAY_RANK (TREE_TYPE (descriptor));
+
   tree dim = gfc_get_descriptor_dimension (descriptor);
   tree dim_type = TREE_TYPE (dim);
   gcc_assert (TREE_CODE (dim_type) == ARRAY_TYPE);


[gcc r15-6798] c++: modules and DECL_REPLACEABLE_P

2025-01-10 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:e86daddbde93b4b508f0957e9aac0b935f99ed93

commit r15-6798-ge86daddbde93b4b508f0957e9aac0b935f99ed93
Author: Jason Merrill 
Date:   Fri Nov 22 12:00:20 2024 +0100

c++: modules and DECL_REPLACEABLE_P

We need to remember that the ::operator new is replaceable to avoid a bogus
error about __builtin_operator_new finding a non-replaceable function.

This affected __get_temporary_buffer in stl_tempbuf.h.

gcc/cp/ChangeLog:

* module.cc (trees_out::core_bools): Write replaceable_operator.
(trees_in::core_bools): Read it.

gcc/testsuite/ChangeLog:

* g++.dg/modules/operator-2_a.C: New test.
* g++.dg/modules/operator-2_b.C: New test.

Diff:
---
 gcc/cp/module.cc|  2 ++
 gcc/testsuite/g++.dg/modules/operator-2_a.C | 14 ++
 gcc/testsuite/g++.dg/modules/operator-2_b.C |  8 
 3 files changed, 24 insertions(+)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 7288c46a7baa..4fbe522264b3 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -5640,6 +5640,7 @@ trees_out::core_bools (tree t, bits_out& bits)
 
   WB (t->function_decl.has_debug_args_flag);
   WB (t->function_decl.versioned_function);
+  WB (t->function_decl.replaceable_operator);
 
   /* decl_type is a (misnamed) 2 bit discriminator. */
   unsigned kind = t->function_decl.decl_type;
@@ -5796,6 +5797,7 @@ trees_in::core_bools (tree t, bits_in& bits)
 
   RB (t->function_decl.has_debug_args_flag);
   RB (t->function_decl.versioned_function);
+  RB (t->function_decl.replaceable_operator);
 
   /* decl_type is a (misnamed) 2 bit discriminator. */
   unsigned kind = 0;
diff --git a/gcc/testsuite/g++.dg/modules/operator-2_a.C 
b/gcc/testsuite/g++.dg/modules/operator-2_a.C
new file mode 100644
index ..0b1f6e80422f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/operator-2_a.C
@@ -0,0 +1,14 @@
+// { dg-additional-options -fmodules }
+// { dg-module-cmi M }
+
+module;
+
+#include 
+
+export module M;
+
+export template 
+inline T* alloc (__SIZE_TYPE__ n)
+{
+  return (T*) __builtin_operator_new (n * sizeof (T), std::nothrow_t{});
+};
diff --git a/gcc/testsuite/g++.dg/modules/operator-2_b.C 
b/gcc/testsuite/g++.dg/modules/operator-2_b.C
new file mode 100644
index ..fb21ccb6d30f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/operator-2_b.C
@@ -0,0 +1,8 @@
+// { dg-additional-options -fmodules }
+
+import M;
+
+int main()
+{
+  int *p = alloc(42);
+}


[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Correction régression dummy_3

2025-01-10 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:310e573c848de18bd55d2800c60bb675892f4d99

commit 310e573c848de18bd55d2800c60bb675892f4d99
Author: Mikael Morin 
Date:   Fri Jan 10 19:03:04 2025 +0100

Correction régression dummy_3

Diff:
---
 gcc/fortran/trans-expr.cc | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 14f92e7575dc..77e8a55af457 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -919,8 +919,17 @@ gfc_class_array_data_assign (stmtblock_t *block, tree 
lhs_desc, tree rhs_desc,
 type = TREE_TYPE (tmp);
   else
 {
-  gcc_assert (TREE_TYPE (tmp) == TREE_TYPE (tmp2));
-  type = TREE_TYPE (tmp);
+  int corank = GFC_TYPE_ARRAY_CORANK (TREE_TYPE (lhs_desc));
+  int corank2 = GFC_TYPE_ARRAY_CORANK (TREE_TYPE (rhs_desc));
+  if (corank > 0 && corank2 == 0)
+   type = TREE_TYPE (tmp2);
+  else if (corank2 > 0 && corank == 0)
+   type = TREE_TYPE (tmp);
+  else
+   {
+ gcc_assert (TREE_TYPE (tmp) == TREE_TYPE (tmp2));
+ type = TREE_TYPE (tmp);
+   }
 }
 
   tmp = build4_loc (input_location, ARRAY_RANGE_REF, type, tmp,


[gcc r15-6799] c++: Fix ICE with invalid defaulted operator <=> [PR118387]

2025-01-10 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:4c688399db12c509c081d52b8926ac6d7de6068c

commit r15-6799-g4c688399db12c509c081d52b8926ac6d7de6068c
Author: Jakub Jelinek 
Date:   Fri Jan 10 18:42:58 2025 +0100

c++: Fix ICE with invalid defaulted operator <=> [PR118387]

In the following testcase there are 2 issues, one is that B doesn't
have operator<=> and the other is that A's operator<=> has int return
type, i.e. not the standard comparison category.
Because of the int return type, retcat is cc_last; when we first
try to synthetize it, it is therefore with tentative false and complain
tf_none, we find that B doesn't have operator<=> and because retcat isn't
tc_last, don't try to search for other operators in genericize_spaceship.
And then mark the operator deleted.
When trying to explain the use of the deleted operator, tentative is still
false, but complain is tf_error_or_warning.
do_one_comp will first do:
  tree comp = build_new_op (loc, code, flags, lhs, rhs,
NULL_TREE, NULL_TREE, &overload,
tentative ? tf_none : complain);
and because complain isn't tf_none, it will actually diagnose the bug
already, but then (tentative || complain) is true and we call
genericize_spaceship, which has
  if (tag == cc_last && is_auto (type))
{
...
}

  gcc_checking_assert (tag < cc_last);
and because tag is cc_last and type isn't auto, we just ICE on that
assertion.

The patch fixes it by returning error_mark_node from genericize_spaceship
instead of failing the assertion.

Note, the PR raises another problem.
If on the same testcase the B b; line is removed, we silently synthetize
operator<=> which will crash at runtime due to returning without a return
statement.  That is because the standard says that in that case
it should return static_cast(std::strong_ordering::equal);
but I can't find anywhere wording which would say that if that isn't
valid, the function is deleted.
https://eel.is/c++draft/class.compare#class.spaceship-2.2
seems to talk just about cases where there are some members and their
comparison is invalid it is deleted, but here there are none and it
follows
https://eel.is/c++draft/class.compare#class.spaceship-3.sentence-2
So, we synthetize with tf_none, see the static_cast is invalid, don't
add error_mark_node statement silently, but as the function isn't deleted,
we just silently emit it.
Should the standard be amended to say that the operator should be deleted
even if it has no elements and the static cast from
https://eel.is/c++draft/class.compare#class.spaceship-3.sentence-2
?

2025-01-10  Jakub Jelinek  

PR c++/118387
* method.cc (genericize_spaceship): For tag == cc_last if
type is not auto just return error_mark_node instead of failing
checking assertion.

* g++.dg/cpp2a/spaceship-synth17.C: New test.

Diff:
---
 gcc/cp/method.cc   |  4 ++--
 gcc/testsuite/g++.dg/cpp2a/spaceship-synth17.C | 19 +++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc
index 64535f52019f..304d11d145c5 100644
--- a/gcc/cp/method.cc
+++ b/gcc/cp/method.cc
@@ -1097,8 +1097,8 @@ genericize_spaceship (location_t loc, tree type, tree 
op0, tree op1)
   if (type == error_mark_node)
return error_mark_node;
 }
-
-  gcc_checking_assert (tag < cc_last);
+  else if (tag == cc_last)
+return error_mark_node;
 
   tree r;
   bool scalar = SCALAR_TYPE_P (TREE_TYPE (op0));
diff --git a/gcc/testsuite/g++.dg/cpp2a/spaceship-synth17.C 
b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth17.C
new file mode 100644
index ..a7793314ce1e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth17.C
@@ -0,0 +1,19 @@
+// PR c++/118387
+// { dg-do compile { target c++20 } }
+
+#include 
+
+struct B {};
+
+struct A
+{
+  B b; // { dg-error "no match for 'operator<=>' in 
'\[^\n\r]*' \\\(operand types are 'B' and 'B'\\\)" }
+  int operator<=> (const A &) const = default;
+};
+
+int
+main ()
+{
+  A a;
+  return a <=> a;  // { dg-error "use of deleted function 'constexpr int 
A::operator<=>\\\(const A&\\\) const'" }
+}


[gcc r15-6800] Do not call cp_parser_omp_dispatch directly in cp_parser_pragma

2025-01-10 Thread Paul-Antoine Arras via Gcc-cvs
https://gcc.gnu.org/g:b5a679898986ae22ffdec538374c5378c26a229f

commit r15-6800-gb5a679898986ae22ffdec538374c5378c26a229f
Author: Paul-Antoine Arras 
Date:   Mon Jan 6 16:06:43 2025 +0100

Do not call cp_parser_omp_dispatch directly in cp_parser_pragma

This is a followup to
ed49709acda OpenMP: C++ front-end support for dispatch + adjust_args.

The call to cp_parser_omp_dispatch only belongs in cp_parser_omp_construct. 
In
cp_parser_pragma, handle PRAGMA_OMP_DISPATCH by calling 
cp_parser_omp_construct.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_pragma): Replace call to 
cp_parser_omp_dispatch
with cp_parser_omp_construct and check context.

gcc/testsuite/ChangeLog:

* g++.dg/gomp/dispatch-8.C: New test.

Diff:
---
 gcc/cp/parser.cc   |  4 +++-
 gcc/testsuite/g++.dg/gomp/dispatch-8.C | 10 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 80bc2d8e9e1e..9600b1409164 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -53057,7 +53057,9 @@ cp_parser_pragma (cp_parser *parser, enum 
pragma_context context, bool *if_p)
   break;
 
 case PRAGMA_OMP_DISPATCH:
-  cp_parser_omp_dispatch (parser, pragma_tok);
+  if (context != pragma_stmt && context != pragma_compound)
+   goto bad_stmt;
+  cp_parser_omp_construct (parser, pragma_tok, if_p);
   return true;
 
 case PRAGMA_IVDEP:
diff --git a/gcc/testsuite/g++.dg/gomp/dispatch-8.C 
b/gcc/testsuite/g++.dg/gomp/dispatch-8.C
new file mode 100644
index ..b8e8e73db1ff
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/dispatch-8.C
@@ -0,0 +1,10 @@
+// { dg-do compile }
+
+// Check that an appropriate diagnostic is emitted when a dispatch directive
+// appears in a pragma_member context.
+
+void k();
+struct t {
+ #pragma omp dispatch  // { dg-error "expected declaration specifiers before 
end of line" }
+  k();  // { dg-error ".*" }
+};


[gcc r13-9296] c++: constexpr error with fn redecl in local scope [PR111132]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:294140d752fc9a3a790497da9f1e968e9849b40f

commit r13-9296-g294140d752fc9a3a790497da9f1e968e9849b40f
Author: Marek Polacek 
Date:   Tue Apr 2 12:59:38 2024 -0400

c++: constexpr error with fn redecl in local scope [PR32]

We evaluate constexpr functions on the original, pre-genericization bodies.
That means that the function body we're evaluating will not have gone
through cp_genericize_r's "Map block scope extern declarations to visible
declarations with the same name and type in outer scopes if any".  Here:

  constexpr bool bar() { return true; } // #1
  constexpr bool foo() {
constexpr bool bar(void); // #2
return bar();
  }

it means that we:
1) register_constexpr_fundef (#1)
2) cp_genericize (#1)
   nothing interesting happens
3) register_constexpr_fundef (foo)
   does copy_fn, so we have two copies of the BIND_EXPR
4) cp_genericize (foo)
   this remaps #2 to #1, but only on one copy of the BIND_EXPR
5) retrieve_constexpr_fundef (foo)
   we find it, no problem
6) retrieve_constexpr_fundef (#2)
   and here #2 isn't found in constexpr_fundef_table, because
   we're working on the BIND_EXPR copy where #2 wasn't mapped to #1
   so we fail.  We've only registered #1.

It should work to use DECL_LOCAL_DECL_ALIAS (which used to be
extern_decl_map).  We evaluate constexpr functions on pre-cp_fold
bodies to avoid diagnostic problems, but the remapping I'm proposing
should not interfere with diagnostics.

This is not a problem for a global scope redeclaration; there we go
through duplicate_decls which keeps the DECL_UID:
  DECL_UID (olddecl) = olddecl_uid;
and DECL_UID is what constexpr_fundef_hasher::hash uses.

PR c++/32

gcc/cp/ChangeLog:

* constexpr.cc (get_function_named_in_call): Use
cp_get_fndecl_from_callee.
* cvt.cc (cp_get_fndecl_from_callee): If there's a
DECL_LOCAL_DECL_ALIAS, use it.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-redeclaration3.C: New test.
* g++.dg/cpp0x/constexpr-redeclaration4.C: New test.

(cherry picked from commit 8c9063825ce726fcbbc067d8a6d062cc2d4acf5e)

Diff:
---
 gcc/cp/constexpr.cc   | 10 --
 gcc/cp/cvt.cc | 18 --
 gcc/testsuite/g++.dg/cpp0x/constexpr-redeclaration3.C | 13 +
 gcc/testsuite/g++.dg/cpp0x/constexpr-redeclaration4.C | 14 ++
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index d34b8bdbd166..8a66528d5caf 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -702,16 +702,14 @@ build_constexpr_constructor_member_initializers (tree 
type, tree body)
 
 /* We have an expression tree T that represents a call, either CALL_EXPR
or AGGR_INIT_EXPR.  If the call is lexically to a named function,
-   retrun the _DECL for that function.  */
+   return the _DECL for that function.  */
 
 static tree
 get_function_named_in_call (tree t)
 {
-  tree fun = cp_get_callee (t);
-  if (fun && TREE_CODE (fun) == ADDR_EXPR
-  && TREE_CODE (TREE_OPERAND (fun, 0)) == FUNCTION_DECL)
-fun = TREE_OPERAND (fun, 0);
-  return fun;
+  tree callee = cp_get_callee (t);
+  tree fun = cp_get_fndecl_from_callee (callee, /*fold*/false);
+  return fun ? fun : callee;
 }
 
 /* Subroutine of check_constexpr_fundef.  BODY is the body of a function
diff --git a/gcc/cp/cvt.cc b/gcc/cp/cvt.cc
index 17827d06a4a6..1b34dc73f128 100644
--- a/gcc/cp/cvt.cc
+++ b/gcc/cp/cvt.cc
@@ -1001,8 +1001,22 @@ cp_get_fndecl_from_callee (tree fn, bool fold /* = true 
*/)
 {
   if (fn == NULL_TREE)
 return fn;
+
+  /* We evaluate constexpr functions on the original, pre-genericization
+ bodies.  So block-scope extern declarations have not been mapped to
+ declarations in outer scopes.  Use the namespace-scope declaration,
+ if any, so that retrieve_constexpr_fundef can find it (PR32).  */
+  auto fn_or_local_alias = [] (tree f)
+{
+  if (DECL_LOCAL_DECL_P (f))
+   if (tree alias = DECL_LOCAL_DECL_ALIAS (f))
+ if (alias != error_mark_node)
+   return alias;
+  return f;
+};
+
   if (TREE_CODE (fn) == FUNCTION_DECL)
-return fn;
+return fn_or_local_alias (fn);
   tree type = TREE_TYPE (fn);
   if (type == NULL_TREE || !INDIRECT_TYPE_P (type))
 return NULL_TREE;
@@ -1013,7 +1027,7 @@ cp_get_fndecl_from_callee (tree fn, bool fold /* = true 
*/)
   || TREE_CODE (fn) == FDESC_EXPR)
 fn = TREE_OPERAND (fn, 0);
   if (TREE_CODE (fn) == FUNCTION_DECL)
-return fn;
+return fn_or_local_alias (fn);
   return NULL_TREE;
 }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-redeclaration3.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-redeclaration3.C
new f

[gcc r13-9297] c++: wrong looser excep spec for dep noexcept [PR113158]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:c22c3a743d9827e58b9ff11a09d7227500c0dae3

commit r13-9297-gc22c3a743d9827e58b9ff11a09d7227500c0dae3
Author: Marek Polacek 
Date:   Thu Feb 15 17:07:43 2024 -0500

c++: wrong looser excep spec for dep noexcept [PR113158]

Here we find ourselves in maybe_check_overriding_exception_spec in
a template context where we can't instantiate a dependent noexcept.
That's OK, but we have to defer the checking otherwise we give wrong
errors.

PR c++/113158

gcc/cp/ChangeLog:

* search.cc (maybe_check_overriding_exception_spec): Defer checking
when a noexcept couldn't be instantiated & evaluated to false/true.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept83.C: New test.

(cherry picked from commit 876fa432ef4074053fa65b1855e7d43320515576)

Diff:
---
 gcc/cp/search.cc| 11 ++
 gcc/testsuite/g++.dg/cpp0x/noexcept83.C | 37 +
 2 files changed, 48 insertions(+)

diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
index 3f521b3bd72f..211e5724a04d 100644
--- a/gcc/cp/search.cc
+++ b/gcc/cp/search.cc
@@ -1928,6 +1928,17 @@ maybe_check_overriding_exception_spec (tree overrider, 
tree basefn)
   || UNPARSED_NOEXCEPT_SPEC_P (over_throw))
 return true;
 
+  /* We also have to defer checking when we're in a template and couldn't
+ instantiate & evaluate the noexcept to true/false.  */
+  if (processing_template_decl)
+if ((base_throw
+&& base_throw != noexcept_true_spec
+&& base_throw != noexcept_false_spec)
+   || (over_throw
+   && over_throw != noexcept_true_spec
+   && over_throw != noexcept_false_spec))
+  return true;
+
   if (!comp_except_specs (base_throw, over_throw, ce_derived))
 {
   auto_diagnostic_group d;
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept83.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
new file mode 100644
index ..47832bbb44d3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
@@ -0,0 +1,37 @@
+// PR c++/113158
+// { dg-do compile { target c++11 } }
+
+template
+struct V {
+  static constexpr bool t = false;
+};
+struct base {
+virtual int f() = 0;
+};
+
+template
+struct derived : base {
+int f() noexcept(V::t) override;
+};
+
+struct base2 {
+virtual int f() noexcept = 0;
+};
+
+template
+struct W {
+  static constexpr bool t = B;
+};
+
+template
+struct derived2 : base2 {
+int f() noexcept(W::t) override; // { dg-error "looser exception 
specification" }
+};
+
+void
+g ()
+{
+  derived d1;
+  derived2 d2; // { dg-message "required from here" }
+  derived2 d3;
+}


[gcc r13-9299] c++: wrong std::is_convertible with cv-qual fn [PR109680]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:005b1f418350a3ef7c5280b19a82fb28c0856e7c

commit r13-9299-g005b1f418350a3ef7c5280b19a82fb28c0856e7c
Author: Marek Polacek 
Date:   Tue May 2 17:36:00 2023 -0400

c++: wrong std::is_convertible with cv-qual fn [PR109680]

This PR points out that std::is_convertible has given the wrong answer
in

  static_assert (!std::is_convertible_v , "");

since r13-2822 implemented __is_{,nothrow_}convertible.

std::is_convertible uses the imaginary

  To test() { return std::declval(); }

to do its job.  Here, From is 'int () const'.  std::declval is defined as:

  template
  typename std::add_rvalue_reference::type declval() noexcept;

std::add_rvalue_reference is defined as "If T is a function type that
has no cv- or ref- qualifier or an object type, provides a member typedef
type which is T&&, otherwise type is T."

In our case, T is cv-qualified, so the result is T, so we end up with

  int () const declval() noexcept;

which is invalid.  In other words, this is pretty much like:

  using T = int () const;
  T fn1(); // bad, fn returning a fn
  T& fn2(); // bad, cannot declare reference to qualified function type
  T* fn3(); // bad, cannot declare pointer to qualified function type

  using U = int ();
  U fn4(); // bad, fn returning a fn
  U& fn5(); // OK
  U* fn6(); // OK

I think is_convertible_helper needs to simulate std::declval better.
To that end, I'm introducing build_trait_object, to be used where
a declval is needed.

PR c++/109680

gcc/cp/ChangeLog:

* method.cc (build_trait_object): New.
(assignable_expr): Use it.
(ref_xes_from_temporary): Likewise.
(is_convertible_helper): Likewise.  Check FUNC_OR_METHOD_TYPE_P.

gcc/testsuite/ChangeLog:

* g++.dg/ext/is_convertible6.C: New test.

(cherry picked from commit 4c2ffb02fd4104d77c5d907662f04434dc4c3fe8)

Diff:
---
 gcc/cp/method.cc   | 39 +++---
 gcc/testsuite/g++.dg/ext/is_convertible6.C | 16 
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc
index 09ea6d732dfc..c9d9e3516f3b 100644
--- a/gcc/cp/method.cc
+++ b/gcc/cp/method.cc
@@ -1907,6 +1907,27 @@ build_stub_object (tree reftype)
   return convert_from_reference (stub);
 }
 
+/* Build a std::declval() expression and return it.  */
+
+tree
+build_trait_object (tree type)
+{
+  /* TYPE can't be a function with cv-/ref-qualifiers: std::declval is
+ defined as
+
+   template
+   typename std::add_rvalue_reference::type declval() noexcept;
+
+ and std::add_rvalue_reference yields T when T is a function with
+ cv- or ref-qualifiers, making the definition ill-formed.  */
+  if (FUNC_OR_METHOD_TYPE_P (type)
+  && (type_memfn_quals (type) != TYPE_UNQUALIFIED
+ || type_memfn_rqual (type) != REF_QUAL_NONE))
+return error_mark_node;
+
+  return build_stub_object (type);
+}
+
 /* Determine which function will be called when looking up NAME in TYPE,
called with a single ARGTYPE argument, or no argument if ARGTYPE is
null.  FLAGS and COMPLAIN are as for build_new_method_call.
@@ -2055,8 +2076,8 @@ static tree
 assignable_expr (tree to, tree from)
 {
   cp_unevaluated cp_uneval_guard;
-  to = build_stub_object (to);
-  from = build_stub_object (from);
+  to = build_trait_object (to);
+  from = build_trait_object (from);
   tree r = cp_build_modify_expr (input_location, to, NOP_EXPR, from, tf_none);
   return r;
 }
@@ -2235,7 +2256,9 @@ ref_xes_from_temporary (tree to, tree from, bool 
direct_init_p)
 return false;
   /* We don't check is_constructible: if T isn't constructible
  from U, we won't be able to create a conversion.  */
-  tree val = build_stub_object (from);
+  tree val = build_trait_object (from);
+  if (val == error_mark_node)
+return false;
   if (!TYPE_REF_P (from) && TREE_CODE (from) != FUNCTION_TYPE)
 val = CLASS_TYPE_P (from) ? force_rvalue (val, tf_none) : rvalue (val);
   return ref_conv_binds_to_temporary (to, val, direct_init_p).is_true ();
@@ -2250,7 +2273,15 @@ is_convertible_helper (tree from, tree to)
   if (VOID_TYPE_P (from) && VOID_TYPE_P (to))
 return integer_one_node;
   cp_unevaluated u;
-  tree expr = build_stub_object (from);
+  tree expr = build_trait_object (from);
+  /* std::is_{,nothrow_}convertible test whether the imaginary function
+ definition
+
+   To test() { return std::declval(); }
+
+ is well-formed.  A function can't return a function.  */
+  if (FUNC_OR_METHOD_TYPE_P (to) || expr == error_mark_node)
+return error_mark_node;
   deferring_access_check_sentinel acs (dk_no_deferred);
   return perform_implicit_conversion (to, expr, tf_none);
 }
diff --git a/gcc/testsuite/g++.dg/ext/is_convertible6.

[gcc r13-9293] c++: ICE with enum and conversion fn in template [PR115657]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:45ff9fcd465f445ca43a584e20a4568f4e41539f

commit r13-9293-g45ff9fcd465f445ca43a584e20a4568f4e41539f
Author: Marek Polacek 
Date:   Thu Aug 15 18:47:29 2024 -0400

c++: ICE with enum and conversion fn in template [PR115657]

Here we initialize an enumerator with a class prvalue with a conversion
function.  When we fold it in build_enumerator, we create a TARGET_EXPR
for the object, and subsequently crash in tsubst_expr, which should not
see such a code.

Normally, we fix similar problems by using an IMPLICIT_CONV_EXPR but here
I may get away with not using the result of fold_non_dependent_expr unless
the result is a constant.  A TARGET_EXPR is not constant.

PR c++/115657

gcc/cp/ChangeLog:

* decl.cc (build_enumerator): Call maybe_fold_non_dependent_expr
instead of fold_non_dependent_expr.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/constexpr-recursion2.C: New test.
* g++.dg/template/conv21.C: New test.

(cherry picked from commit 53283c3231a7b94e728619cccbf21170fb36b2a8)

Diff:
---
 gcc/cp/decl.cc| 10 --
 gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C | 22 ++
 gcc/testsuite/g++.dg/template/conv21.C| 14 ++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 0126684c7896..dbd1ee664c60 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -16905,9 +16905,15 @@ build_enumerator (tree name, tree value, tree 
enumtype, tree attributes,
   tree type;
 
   /* scalar_constant_value will pull out this expression, so make sure
- it's folded as appropriate.  */
+ it's folded as appropriate.
+
+ Creating a TARGET_EXPR in a template breaks when substituting, and
+ here we would create it for instance when using a class prvalue with
+ a user-defined conversion function.  So don't use such a tree.  We
+ instantiate VALUE here to get errors about bad enumerators even in
+ a template that does not get instantiated.  */
   if (processing_template_decl)
-value = fold_non_dependent_expr (value);
+value = maybe_fold_non_dependent_expr (value);
 
   /* If the VALUE was erroneous, pretend it wasn't there; that will
  result in the enum being assigned the next value in sequence.  */
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C 
b/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C
new file mode 100644
index ..f268f52e2b5b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C
@@ -0,0 +1,22 @@
+// PR c++/115657
+// { dg-do compile { target c++14 } }
+// { dg-options "-Wall" }
+
+// Like constexpr-recursion1.C but use a class with a conversion function.
+
+struct X {
+  constexpr operator int() { return 0; }
+};
+
+template 
+constexpr X f1 ()
+{
+  enum E { a = f1<0> () }; // { dg-error "called in a constant expression 
before its definition is complete|is not an integer constant" }
+  return {};
+}
+
+constexpr X f3 ()
+{
+  enum E { a = f3 () };// { dg-error "called in a constant expression 
before its definition is complete|is not an integer constant" }
+  return {};
+}
diff --git a/gcc/testsuite/g++.dg/template/conv21.C 
b/gcc/testsuite/g++.dg/template/conv21.C
new file mode 100644
index ..1dc7b3d50d9e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/conv21.C
@@ -0,0 +1,14 @@
+// PR c++/115657
+// { dg-do compile { target c++11 } }
+
+struct NonIntegral
+{
+constexpr operator int() { return 0; }
+};
+
+template struct TemplatedStructural
+{
+enum { e = NonIntegral{} };
+};
+
+template struct TemplatedStructural;


[gcc r13-9298] c++: ICE initializing array of aggrs [PR117985]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:ff0e01a99daa1784f1d0adc5b2a1aab86693b1ca

commit r13-9298-gff0e01a99daa1784f1d0adc5b2a1aab86693b1ca
Author: Marek Polacek 
Date:   Thu Dec 12 14:56:07 2024 -0500

c++: ICE initializing array of aggrs [PR117985]

This crash started with my r12-7803 but I believe the problem lies
elsewhere.

build_vec_init has cleanup_flags whose purpose is -- if I grok this
correctly -- to avoid destructing an object multiple times.  Let's
say we are initializing an array of A.  Then we might end up in
a scenario similar to initlist-eh1.C:

  try
{
  call A::A in a loop
  // #0
  try
{
  call a fn using the array
}
  finally
{
  // #1
  call A::~A in a loop
}
}
  catch
{
  // #2
  call A::~A in a loop
}

cleanup_flags makes us emit a statement like

  D.3048 = 2;

at #0 to disable performing the cleanup at #2, since #1 will take
care of the destruction of the array.

But if we are not emitting the loop because we can use a constant
initializer (and use a single { a, b, ...}), we shouldn't generate
the statement resetting the iterator to its initial value.  Otherwise
we crash in gimplify_var_or_parm_decl because it gets the stray decl
D.3048.

PR c++/117985

gcc/cp/ChangeLog:

* init.cc (build_vec_init): Pop CLEANUP_FLAGS if we're not
generating the loop.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/initlist-array23.C: New test.
* g++.dg/cpp0x/initlist-array24.C: New test.

(cherry picked from commit 40e5636e086e51f5908a1a01be9cba2218dc26d8)

Diff:
---
 gcc/cp/init.cc|  9 +
 gcc/testsuite/g++.dg/cpp0x/initlist-array23.C | 28 +++
 gcc/testsuite/g++.dg/cpp0x/initlist-array24.C | 27 ++
 3 files changed, 64 insertions(+)

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 4977ce343984..65cb35b98500 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -5019,6 +5019,15 @@ build_vec_init (tree base, tree maxindex, tree init,
 {
   if (!saw_non_const)
{
+ /* If we're not generating the loop, we don't need to reset the
+iterator.  */
+ if (cleanup_flags
+ && !vec_safe_is_empty (*cleanup_flags))
+   {
+ auto l = (*cleanup_flags)->last ();
+ gcc_assert (TREE_PURPOSE (l) == iterator);
+ (*cleanup_flags)->pop ();
+   }
  tree const_init = build_constructor (atype, const_vec);
  return build2 (INIT_EXPR, atype, obase, const_init);
}
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-array23.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist-array23.C
new file mode 100644
index ..cda2afb9fccc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-array23.C
@@ -0,0 +1,28 @@
+// PR c++/117985
+// { dg-do compile { target c++11 } }
+
+struct _Vector_impl {
+  constexpr
+_Vector_impl() {}
+};
+struct _Vector_base {
+  ~_Vector_base();
+  _Vector_impl _M_impl;
+};
+struct vector : private _Vector_base {};
+struct string {
+  string();
+};
+struct VEC {
+  vector pane{};
+};
+struct FOO {
+  VEC screen[1]{};
+  string debug_name;
+};
+
+int
+main ()
+{
+  FOO{};
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-array24.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist-array24.C
new file mode 100644
index ..7dda00d5c0b8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-array24.C
@@ -0,0 +1,27 @@
+// PR c++/117985
+// { dg-do compile { target c++20 } }
+
+struct _Vector_impl {
+  constexpr _Vector_impl() {}
+};
+struct _Vector_base {
+  constexpr ~_Vector_base() {}
+  _Vector_impl _M_impl;
+};
+struct vector : private _Vector_base {};
+struct string {
+  string();
+};
+struct VEC {
+  vector pane{};
+};
+struct FOO {
+  VEC screen[1]{};
+  string debug_name;
+};
+
+int
+main ()
+{
+  FOO{};
+}


[gcc r13-9300] c++: concept in default argument [PR109859]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:a2fd45adfdb3ff219fd97d158fb66cc99ef0afd4

commit r13-9300-ga2fd45adfdb3ff219fd97d158fb66cc99ef0afd4
Author: Marek Polacek 
Date:   Wed Sep 18 15:44:31 2024 -0400

c++: concept in default argument [PR109859]

1) We're hitting the assert in cp_parser_placeholder_type_specifier.
It says that if it turns out to be false, we should do error() instead.
Do so, then.

2) lambda-targ8.C should compile fine, though.  The problem was that
local_variables_forbidden_p wasn't cleared when we're about to parse
the optional template-parameter-list for a lambda in a default argument.

PR c++/109859

gcc/cp/ChangeLog:

* parser.cc (cp_parser_lambda_declarator_opt): Temporarily clear
local_variables_forbidden_p.
(cp_parser_placeholder_type_specifier): Turn an assert into an
error.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-defarg3.C: New test.
* g++.dg/cpp2a/lambda-targ8.C: New test.

Reviewed-by: Jason Merrill 
(cherry picked from commit 4bcfaaed25b1b8ecc81f6a28d9ca76f00870dedf)

Diff:
---
 gcc/cp/parser.cc  |  9 +++--
 gcc/testsuite/g++.dg/cpp2a/concepts-defarg3.C |  8 
 gcc/testsuite/g++.dg/cpp2a/lambda-targ8.C | 10 ++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 47c15cff3468..d25ceff9ec38 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -11564,6 +11564,11 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, 
tree lambda_expr)
 "lambda templates are only available with "
 "%<-std=c++20%> or %<-std=gnu++20%>");
 
+  /* Even though the whole lambda may be a default argument, its
+template-parameter-list is a context where it's OK to create
+new parameters.  */
+  auto lvf = make_temp_override (parser->local_variables_forbidden_p, 0u);
+
   cp_lexer_consume_token (parser->lexer);
 
   template_param_list = cp_parser_template_parameter_list (parser);
@@ -20204,8 +20209,8 @@ cp_parser_placeholder_type_specifier (cp_parser 
*parser, location_t loc,
   /* In a default argument we may not be creating new parameters.  */
   if (parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN)
{
- /* If this assert turns out to be false, do error() instead.  */
- gcc_assert (tentative);
+ if (!tentative)
+   error_at (loc, "invalid use of concept-name %qD", con);
  return error_mark_node;
}
   return build_constrained_parameter (con, proto, args);
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-defarg3.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-defarg3.C
new file mode 100644
index ..6fe82f91e434
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-defarg3.C
@@ -0,0 +1,8 @@
+// PR c++/109859
+// { dg-do compile { target c++20 } }
+
+template
+concept C = true;
+
+template  // { dg-error "invalid use of concept-name .C." }
+int f();
diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-targ8.C 
b/gcc/testsuite/g++.dg/cpp2a/lambda-targ8.C
new file mode 100644
index ..3685b0ef880b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/lambda-targ8.C
@@ -0,0 +1,10 @@
+// PR c++/109859
+// { dg-do compile { target c++20 } }
+
+template
+concept A = true;
+
+template {}>
+int x;
+
+void g() { (void) x<>; }


[gcc r13-9301] c++: ICE with temporary of class type in array DMI [PR109966]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:9ad64458dd8fdb384e45aa3647380de53e04cedd

commit r13-9301-g9ad64458dd8fdb384e45aa3647380de53e04cedd
Author: Marek Polacek 
Date:   Mon Mar 11 17:45:55 2024 -0400

c++: ICE with temporary of class type in array DMI [PR109966]

This ICE started with the fairly complicated r13-765.  We crash in
gimplify_var_or_parm_decl because a stray VAR_DECL leaked there.
The problem is ultimately that potential_prvalue_result_of wasn't
correctly handling arrays and replace_placeholders_for_class_temp_r
replaced a PLACEHOLDER_EXPR in a TARGET_EXPR which is used in the
context of copy elision.  If I have

  M m[2] = { M{""}, M{""} };

then we don't invoke the M(const M&) copy-ctor.

One part of the fix is to use TARGET_EXPR_ELIDING_P rather than
potential_prvalue_result_of.  That unfortunately doesn't handle the
case like

  struct N { N(M); };
  N arr[2] = { M{""}, M{""} };

because TARGET_EXPRs that initialize a function argument are not
marked TARGET_EXPR_ELIDING_P even though gimplify_arg drops such
TARGET_EXPRs on the floor.  We can use a pset to avoid replacing
placeholders in them.

I made an attempt to use set_target_expr_eliding in
convert_for_arg_passing but that regressed constexpr-diag1.C, and does
not seem like a prudent change in stage 4 anyway.

PR c++/109966

gcc/cp/ChangeLog:

* typeck2.cc (potential_prvalue_result_of): Remove.
(replace_placeholders_for_class_temp_r): Check 
TARGET_EXPR_ELIDING_P.
Use a pset.  Don't replace_placeholders in TARGET_EXPRs that 
initialize
a function argument.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/nsdmi-aggr20.C: New test.
* g++.dg/cpp1y/nsdmi-aggr21.C: New test.

(cherry picked from commit 6039925631780741ba77666ef2ef743aa2a925a8)

Diff:
---
 gcc/cp/typeck2.cc | 55 +---
 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr20.C | 17 +
 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr21.C | 59 +++
 3 files changed, 92 insertions(+), 39 deletions(-)

diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc
index daa651b260fb..27263f503f18 100644
--- a/gcc/cp/typeck2.cc
+++ b/gcc/cp/typeck2.cc
@@ -1396,41 +1396,6 @@ digest_init_flags (tree type, tree init, int flags, 
tsubst_flags_t complain)
   return digest_init_r (type, init, 0, flags, complain);
 }
 
-/* Return true if SUBOB initializes the same object as FULL_EXPR.
-   For instance:
-
- A a = A{};  // initializer
- A a = (A{});// initializer
- A a = (1, A{}); // initializer
- A a = true ? A{} : A{};  // initializer
- auto x = A{}.x; // temporary materialization
- auto x = foo(A{});  // temporary materialization
-
-   FULL_EXPR is the whole expression, SUBOB is its TARGET_EXPR subobject.  */
-
-static bool
-potential_prvalue_result_of (tree subob, tree full_expr)
-{
-  if (subob == full_expr)
-return true;
-  else if (TREE_CODE (full_expr) == TARGET_EXPR)
-{
-  tree init = TARGET_EXPR_INITIAL (full_expr);
-  if (TREE_CODE (init) == COND_EXPR)
-   return (potential_prvalue_result_of (subob, TREE_OPERAND (init, 1))
-   || potential_prvalue_result_of (subob, TREE_OPERAND (init, 2)));
-  else if (TREE_CODE (init) == COMPOUND_EXPR)
-   return potential_prvalue_result_of (subob, TREE_OPERAND (init, 1));
-  /* ??? I don't know if this can be hit.  */
-  else if (TREE_CODE (init) == PAREN_EXPR)
-   {
- gcc_checking_assert (false);
- return potential_prvalue_result_of (subob, TREE_OPERAND (init, 0));
-   }
-}
-  return false;
-}
-
 /* Callback to replace PLACEHOLDER_EXPRs in a TARGET_EXPR (which isn't used
in the context of guaranteed copy elision).  */
 
@@ -1438,11 +1403,13 @@ static tree
 replace_placeholders_for_class_temp_r (tree *tp, int *, void *data)
 {
   tree t = *tp;
-  tree full_expr = *static_cast(data);
+  auto pset = static_cast *>(data);
 
   /* We're looking for a TARGET_EXPR nested in the whole expression.  */
   if (TREE_CODE (t) == TARGET_EXPR
-  && !potential_prvalue_result_of (t, full_expr))
+  /* That serves as temporary materialization, not an initializer.  */
+  && !TARGET_EXPR_ELIDING_P (t)
+  && !pset->add (t))
 {
   tree init = TARGET_EXPR_INITIAL (t);
   while (TREE_CODE (init) == COMPOUND_EXPR)
@@ -1457,6 +1424,16 @@ replace_placeholders_for_class_temp_r (tree *tp, int *, 
void *data)
  gcc_checking_assert (!find_placeholders (init));
}
 }
+  /* TARGET_EXPRs initializing function arguments are not marked as eliding,
+ even though gimplify_arg drops them on the floor.  Don't go replacing
+ placeholders in them.  */
+  else if (TREE_CODE (t) == CALL_EXPR || TREE_CODE (t) == AGGR_INIT_EXPR

[gcc r13-9302] c++: ICE with reference NSDMI [PR114854]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:c2581c76c0738298d27b417067dfc2e500f5e21a

commit r13-9302-gc2581c76c0738298d27b417067dfc2e500f5e21a
Author: Marek Polacek 
Date:   Wed May 8 15:43:58 2024 -0400

c++: ICE with reference NSDMI [PR114854]

Here we crash on a cp_gimplify_expr/TARGET_EXPR assert:

  /* A TARGET_EXPR that expresses direct-initialization should have been
 elided by cp_gimplify_init_expr.  */
  gcc_checking_assert (!TARGET_EXPR_DIRECT_INIT_P (*expr_p));

the TARGET_EXPR in question is created for the NSDMI in:

  class Vector { int m_size; };
  struct S {
const Vector &vec{};
  };

where we first need to create a Vector{} temporary, and then bind the
vec reference to it.  The temporary is represented by a TARGET_EXPR
and it cannot be elided.  When we create an object of type S, we get

  D.2848 = {.vec=(const struct Vector &) &TARGET_EXPR }

where the TARGET_EXPR is no longer direct-initializing anything.

Fixed by not setting TARGET_EXPR_DIRECT_INIT_P in 
convert_like_internal/ck_user.

PR c++/114854

gcc/cp/ChangeLog:

* call.cc (convert_like_internal) : Don't set
TARGET_EXPR_DIRECT_INIT_P.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/nsdmi-aggr22.C: New test.

(cherry picked from commit 1a05332bbac98a4c002bef3fb45a3ad9d56b3a71)

Diff:
---
 gcc/cp/call.cc|  6 +-
 gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C | 12 
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index f3efacafe137..d4aaeba94f6d 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -8475,16 +8475,12 @@ convert_like_internal (conversion *convs, tree expr, 
tree fn, int argnum,
&& TYPE_HAS_DEFAULT_CONSTRUCTOR (totype)
&& !processing_template_decl)
  {
-   bool direct = CONSTRUCTOR_IS_DIRECT_INIT (expr);
if (abstract_virtuals_error (NULL_TREE, totype, complain))
  return error_mark_node;
expr = build_value_init (totype, complain);
expr = get_target_expr (expr, complain);
if (expr != error_mark_node)
- {
-   TARGET_EXPR_LIST_INIT_P (expr) = true;
-   TARGET_EXPR_DIRECT_INIT_P (expr) = direct;
- }
+ TARGET_EXPR_LIST_INIT_P (expr) = true;
return expr;
  }
 
diff --git a/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C 
b/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C
new file mode 100644
index ..a4f9ae19ca9d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C
@@ -0,0 +1,12 @@
+// PR c++/114854
+// { dg-do compile { target c++14 } }
+
+struct Vector {
+  int m_size;
+};
+struct S {
+  const Vector &vec{};
+};
+
+void spawn(S);
+void test() { spawn({}); }


[gcc r13-9294] c++: fix ICE with constexpr ARRAY_REF [PR110382]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:a82583e43eaacab9a111d1fca14c7d272aa3e039

commit r13-9294-ga82583e43eaacab9a111d1fca14c7d272aa3e039
Author: Marek Polacek 
Date:   Fri Jul 21 17:48:37 2023 -0400

c++: fix ICE with constexpr ARRAY_REF [PR110382]

This code in cxx_eval_array_reference has been hard to get right.
In r12-2304 I added some code; in r13-5693 I removed some of it.

Here the problematic line is "S s = arr[0];" which causes a crash
on the assert in verify_ctor_sanity:

  gcc_assert (!ctx->object || !DECL_P (ctx->object)
  || ctx->global->get_value (ctx->object) == ctx->ctor);

ctx->object is the VAR_DECL 's', which is correct here.  The second
line points to the problem: we replaced ctx->ctor in
cxx_eval_array_reference:

  new_ctx.ctor = build_constructor (elem_type, NULL); // #1

which I think we shouldn't have; the CONSTRUCTOR we created in
cxx_eval_constant_expression/DECL_EXPR

  new_ctx.ctor = build_constructor (TREE_TYPE (r), NULL);

had the right type.

We still need #1 though.  E.g., in constexpr-96241.C, we never
set ctx.ctor/object before calling cxx_eval_array_reference, so
we have to build a CONSTRUCTOR there.  And in constexpr-101371-2.C
we have a ctx.ctor, but it has the wrong type, so we need a new one.

We can fix the problem by always clearing the object, and, as an
optimization, only create/free a new ctor when actually needed.

PR c++/110382

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Create a new constructor
only when we don't already have a matching one.  Clear the object
when the type is non-scalar.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/constexpr-110382.C: New test.

(cherry picked from commit 6e424febfbcb27c21a7fe3a137e614765f9cf9d2)

Diff:
---
 gcc/cp/constexpr.cc   | 13 +++--
 gcc/testsuite/g++.dg/cpp1y/constexpr-110382.C | 17 +
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index f885a806c0a2..d34b8bdbd166 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -4301,15 +4301,24 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, 
tree t,
   else
 val = build_value_init (elem_type, tf_warning_or_error);
 
-  if (!SCALAR_TYPE_P (elem_type))
+  /* Create a new constructor only if we don't already have a suitable one.  */
+  const bool new_ctor = (!SCALAR_TYPE_P (elem_type)
+&& (!ctx->ctor
+|| !same_type_ignoring_top_level_qualifiers_p
+ (elem_type, TREE_TYPE (ctx->ctor;
+  if (new_ctor)
 {
   new_ctx = *ctx;
+  /* We clear the object here.  We used to replace it with T, but that
+caused problems (101371, 108158); and anyway, T is the initializer,
+not the target object.  */
+  new_ctx.object = NULL_TREE;
   new_ctx.ctor = build_constructor (elem_type, NULL);
   ctx = &new_ctx;
 }
   t = cxx_eval_constant_expression (ctx, val, lval, non_constant_p,
overflow_p);
-  if (!SCALAR_TYPE_P (elem_type) && t != ctx->ctor)
+  if (new_ctor && t != ctx->ctor)
 free_constructor (ctx->ctor);
   return t;
 }
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-110382.C 
b/gcc/testsuite/g++.dg/cpp1y/constexpr-110382.C
new file mode 100644
index ..317c5ecfcd52
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-110382.C
@@ -0,0 +1,17 @@
+// PR c++/110382
+// { dg-do compile { target c++14 } }
+
+struct S {
+  double a = 0;
+};
+
+constexpr double
+g ()
+{
+  S arr[1];
+  S s = arr[0];
+  (void) arr[0];
+  return s.a;
+}
+
+int main() { return  g (); }


[gcc r13-9292] c++: fix ICE with designated initializer [PR110114]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:dcad20ccb6cd3c3db076c50cc36b1bf15aa78495

commit r13-9292-gdcad20ccb6cd3c3db076c50cc36b1bf15aa78495
Author: Marek Polacek 
Date:   Wed Jul 19 08:47:29 2023 -0400

c++: fix ICE with designated initializer [PR110114]

r13-1227 added an assert checking that the index in a CONSTRUCTOR
is a FIELD_DECL.  That's a reasonable assumption but in this case
we never called reshape_init due to the type being incomplete, and
so the index remained an identifier node: get_class_binding never
got around to looking up the FIELD_DECL.

We can avoid the crash by returning early in implicit_conversion_1; we'd
return NULL anyway due to:

  if (i < CONSTRUCTOR_NELTS (ctor))
return NULL;

in build_aggr_conv.

PR c++/110114

gcc/cp/ChangeLog:

* call.cc (implicit_conversion_1): Return early if the type isn't
complete.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/initlist100.C: Adjust expected diagnostic.
* g++.dg/cpp2a/desig28.C: New test.
* g++.dg/cpp2a/desig29.C: New test.

(cherry picked from commit 2cb0dc866e8f95151df5d759157708108e850dd9)

Diff:
---
 gcc/cp/call.cc   | 19 +++
 gcc/testsuite/g++.dg/cpp0x/initlist100.C |  4 ++--
 gcc/testsuite/g++.dg/cpp2a/desig28.C | 17 +
 gcc/testsuite/g++.dg/cpp2a/desig29.C | 10 ++
 4 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 70c7f6178b88..18a3db8b1dc2 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -2058,15 +2058,18 @@ implicit_conversion_1 (tree to, tree from, tree expr, 
bool c_cast_p,
   complain &= ~tf_error;
 
   /* Call reshape_init early to remove redundant braces.  */
-  if (expr && BRACE_ENCLOSED_INITIALIZER_P (expr)
-  && CLASS_TYPE_P (to)
-  && COMPLETE_TYPE_P (complete_type (to))
-  && !CLASSTYPE_NON_AGGREGATE (to))
+  if (expr && BRACE_ENCLOSED_INITIALIZER_P (expr) && CLASS_TYPE_P (to))
 {
-  expr = reshape_init (to, expr, complain);
-  if (expr == error_mark_node)
-   return NULL;
-  from = TREE_TYPE (expr);
+  to = complete_type (to);
+  if (!COMPLETE_TYPE_P (to))
+   return nullptr;
+  if (!CLASSTYPE_NON_AGGREGATE (to))
+   {
+ expr = reshape_init (to, expr, complain);
+ if (expr == error_mark_node)
+   return nullptr;
+ from = TREE_TYPE (expr);
+   }
 }
 
   if (TYPE_REF_P (to))
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist100.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist100.C
index 9d80a004c173..6865d34a6f97 100644
--- a/gcc/testsuite/g++.dg/cpp0x/initlist100.C
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist100.C
@@ -2,9 +2,9 @@
 // { dg-do compile { target c++11 } }
 
 namespace std {
-template  class initializer_list;  // { dg-message "declaration" }
+template  class initializer_list;
 }
 
 template  struct B { B (std::initializer_list); };
 struct C { virtual int foo (); };
-struct D : C {} d { B { D {} } };  // { dg-error "incomplete|no matching" }
+struct D : C {} d { B { D {} } };  // { dg-error "no matching" }
diff --git a/gcc/testsuite/g++.dg/cpp2a/desig28.C 
b/gcc/testsuite/g++.dg/cpp2a/desig28.C
new file mode 100644
index ..b63265fea514
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/desig28.C
@@ -0,0 +1,17 @@
+// PR c++/110114
+// { dg-do compile { target c++20 } }
+
+struct A {
+int a,b;
+};
+
+struct B;
+
+void foo(const A &) {}
+void foo(const B &) {}
+
+int
+main ()
+{
+  foo({.a=0});
+}
diff --git a/gcc/testsuite/g++.dg/cpp2a/desig29.C 
b/gcc/testsuite/g++.dg/cpp2a/desig29.C
new file mode 100644
index ..bd1a82b041dd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/desig29.C
@@ -0,0 +1,10 @@
+// PR c++/110114
+// { dg-do compile { target c++20 } }
+
+struct B;
+
+void foo(const B &) {}
+
+int main() {
+foo({.a=0}); // { dg-error "invalid" }
+}


[gcc r13-9295] c++: mutable temps in rodata [PR116369]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:dfe7b5e9e7010f10e9737d5f15f5b48ba536e8f5

commit r13-9295-gdfe7b5e9e7010f10e9737d5f15f5b48ba536e8f5
Author: Marek Polacek 
Date:   Thu Aug 29 15:13:03 2024 -0400

c++: mutable temps in rodata [PR116369]

Here we wrongly mark the reference temporary for g TREE_READONLY,
so it's put in .rodata and so we can't modify its subobject even
when the subobject is marked mutable.  This is so since r9-869.
r14-1785 fixed a similar problem, but not in set_up_extended_ref_temp.

PR c++/116369

gcc/cp/ChangeLog:

* call.cc (set_up_extended_ref_temp): Don't mark a temporary
TREE_READONLY if its type is TYPE_HAS_MUTABLE_P.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/initlist-opt7.C: New test.

(cherry picked from commit 2801a49d1144bce5568b527d1972952ad3420f66)

Diff:
---
 gcc/cp/call.cc|  4 +++-
 gcc/testsuite/g++.dg/tree-ssa/initlist-opt7.C | 13 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 18a3db8b1dc2..f3efacafe137 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -13650,7 +13650,9 @@ set_up_extended_ref_temp (tree decl, tree expr, 
vec **cleanups,
   init = cp_fully_fold (init);
   if (TREE_CONSTANT (init))
 {
-  if (literal_type_p (type) && CP_TYPE_CONST_NON_VOLATILE_P (type))
+  if (literal_type_p (type)
+ && CP_TYPE_CONST_NON_VOLATILE_P (type)
+ && !TYPE_HAS_MUTABLE_P (type))
{
  /* 5.19 says that a constant expression can include an
 lvalue-rvalue conversion applied to "a glvalue of literal type
diff --git a/gcc/testsuite/g++.dg/tree-ssa/initlist-opt7.C 
b/gcc/testsuite/g++.dg/tree-ssa/initlist-opt7.C
new file mode 100644
index ..2420db502a67
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/initlist-opt7.C
@@ -0,0 +1,13 @@
+// PR c++/116369
+// { dg-do run { target c++11 } }
+
+struct f{
+  mutable int t;
+};
+
+const f &g = {1};
+
+int main()
+{
+  g.t++;
+}


[gcc r15-6802] libatomic: Cleanup AArch64 ifunc selection

2025-01-10 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:81bcf412c1c221bc2557666a6ca8381dac1de097

commit r15-6802-g81bcf412c1c221bc2557666a6ca8381dac1de097
Author: Wilco Dijkstra 
Date:   Fri Jan 10 18:01:58 2025 +

libatomic: Cleanup AArch64 ifunc selection

Simplify and cleanup ifunc selection logic.  Since LRCPC3 does
not imply LSE2, has_rcpc3() should also check LSE2 is enabled.

Passes regress and bootstrap, OK for commit?

libatomic:
* config/linux/aarch64/host-config.h (has_lse2): Cleanup.
(has_lse128): Likewise.
(has_rcpc3): Add early check for LSE2.

Diff:
---
 libatomic/config/linux/aarch64/host-config.h | 76 +---
 1 file changed, 35 insertions(+), 41 deletions(-)

diff --git a/libatomic/config/linux/aarch64/host-config.h 
b/libatomic/config/linux/aarch64/host-config.h
index f75d27bf2ff5..d0d44bf18eaa 100644
--- a/libatomic/config/linux/aarch64/host-config.h
+++ b/libatomic/config/linux/aarch64/host-config.h
@@ -91,69 +91,63 @@ has_lse2 (unsigned long hwcap, const __ifunc_arg_t 
*features)
   /* Check for LSE2.  */
   if (hwcap & HWCAP_USCAT)
 return true;
-  /* No point checking further for atomic 128-bit load/store if LSE
- prerequisite not met.  */
-  if (!(hwcap & HWCAP_ATOMICS))
-return false;
-  if (!(hwcap & HWCAP_CPUID))
-return false;
 
-  unsigned long midr;
-  asm volatile ("mrs %0, midr_el1" : "=r" (midr));
+  /* If LSE and CPUID are supported, check MIDR.  */
+  if (hwcap & HWCAP_CPUID && hwcap & HWCAP_ATOMICS)
+{
+  unsigned long midr;
+  asm volatile ("mrs %0, midr_el1" : "=r" (midr));
 
-  /* Neoverse N1 supports atomic 128-bit load/store.  */
-  if (MIDR_IMPLEMENTOR (midr) == 'A' && MIDR_PARTNUM (midr) == 0xd0c)
-return true;
+  /* Neoverse N1 supports atomic 128-bit load/store.  */
+  return MIDR_IMPLEMENTOR (midr) == 'A' && MIDR_PARTNUM (midr) == 0xd0c;
+}
 
   return false;
 }
 
-/* LSE128 atomic support encoded in ID_AA64ISAR0_EL1.Atomic,
-   bits[23:20].  The expected value is 0b0011.  Check that.  */
+/* LSE128 atomic support encoded in ID_AA64ISAR0_EL1.Atomic, bits[23:20].
+   The minimum value for LSE128 is 0b0011.  */
 
 #define AT_FEAT_FIELD(isar0)   (((isar0) >> 20) & 15)
 
 static inline bool
 has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features)
 {
-  if (hwcap & _IFUNC_ARG_HWCAP
-  && features->_hwcap2 & HWCAP2_LSE128)
-return true;
-  /* A 0 HWCAP2_LSE128 bit may be just as much a sign of missing HWCAP2 bit
- support in older kernels as it is of CPU feature absence.  Try fallback
- method to guarantee LSE128 is not implemented.
-
- In the absence of HWCAP_CPUID, we are unable to check for LSE128.
- If feature check available, check LSE2 prerequisite before proceeding.  */
-  if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT))
- return false;
-
-  unsigned long isar0;
-  asm volatile ("mrs %0, ID_AA64ISAR0_EL1" : "=r" (isar0));
-  if (AT_FEAT_FIELD (isar0) >= 3)
+  if (hwcap & _IFUNC_ARG_HWCAP && features->_hwcap2 & HWCAP2_LSE128)
 return true;
+
+  /* If LSE2 and CPUID are supported, check for LSE128.  */
+  if (hwcap & HWCAP_CPUID && hwcap & HWCAP_USCAT)
+{
+  unsigned long isar0;
+  asm volatile ("mrs %0, ID_AA64ISAR0_EL1" : "=r" (isar0));
+  return AT_FEAT_FIELD (isar0) >= 3;
+}
+
   return false;
 }
 
-/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20].  The
-   expected value is 0b0011.  Check that.  */
+/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20].
+   The minimum value for LRCPC3 is 0b0011.  */
 
 static inline bool
 has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features)
 {
-  if (hwcap & _IFUNC_ARG_HWCAP
-  && features->_hwcap2 & HWCAP2_LRCPC3)
-return true;
-  /* Try fallback feature check method to guarantee LRCPC3 is not implemented.
-
- In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return.
- If feature check available, check LSE2 prerequisite before proceeding.  */
-  if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT))
+  /* LSE2 is a prerequisite for atomic LDIAPP/STILP - check HWCAP_USCAT since
+ has_lse2 is more expensive and Neoverse N1 does not have LRCPC3. */
+  if (!(hwcap & HWCAP_USCAT))
 return false;
-  unsigned long isar1;
-  asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
-  if (AT_FEAT_FIELD (isar1) >= 3)
+
+  if (hwcap & _IFUNC_ARG_HWCAP && features->_hwcap2 & HWCAP2_LRCPC3)
 return true;
+
+  if (hwcap & HWCAP_CPUID)
+{
+  unsigned long isar1;
+  asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
+  return AT_FEAT_FIELD (isar1) >= 3;
+}
+
   return false;
 }


[gcc r15-6803] c++: add fixed test [PR118391]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:d201715989ce2f110e09cd15246969f9fcae5c61

commit r15-6803-gd201715989ce2f110e09cd15246969f9fcae5c61
Author: Marek Polacek 
Date:   Fri Jan 10 15:05:00 2025 -0500

c++: add fixed test [PR118391]

Fixed by r15-6740.

PR c++/118391

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/lambda-uneval20.C: New test.

Diff:
---
 gcc/testsuite/g++.dg/cpp2a/lambda-uneval20.C | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-uneval20.C 
b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval20.C
new file mode 100644
index ..fa8b504da9db
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval20.C
@@ -0,0 +1,15 @@
+// PR c++/118391
+// { dg-do compile { target c++20 } }
+
+template
+using A = int;
+
+template
+using B = decltype([] {}.template operator()());
+
+template
+using C = A>;
+
+C x;
+
+int main() {}


[gcc(refs/vendors/redhat/heads/gcc-14-branch)] Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9706ee9' into redhat/gcc-14-branch

2025-01-10 Thread Jakub Jelinek via Libstdc++-cvs
https://gcc.gnu.org/g:e525669e462dd777a1af9932fe9188937acdeb69

commit e525669e462dd777a1af9932fe9188937acdeb69
Merge: b84ce6a258e0 a2de88e5d49f
Author: Jakub Jelinek 
Date:   Fri Jan 10 19:56:03 2025 +0100

Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9706ee9' into 
redhat/gcc-14-branch

Diff:

 gcc/ChangeLog  |   768 +
 gcc/DATESTAMP  | 2 +-
 gcc/ada/ChangeLog  |70 +
 gcc/ada/exp_aggr.adb   |32 +-
 gcc/ada/freeze.adb |26 +-
 gcc/ada/gcc-interface/trans.cc |10 +-
 gcc/ada/gnatvsn.ads| 3 +-
 gcc/ada/libgnarl/s-taprop__dummy.adb   |11 +-
 gcc/ada/libgnat/a-ngrear.adb   |24 +-
 gcc/ada/par-ch6.adb| 1 +
 gcc/ada/version.c  | 5 +-
 gcc/analyzer/ChangeLog |43 +
 gcc/analyzer/analyzer.cc   |15 +-
 gcc/analyzer/analyzer.h| 4 +-
 gcc/analyzer/engine.cc | 2 +-
 gcc/analyzer/kf.cc |26 +
 gcc/analyzer/known-function-manager.cc |38 +-
 gcc/analyzer/known-function-manager.h  | 5 +
 gcc/analyzer/sm-file.cc| 8 +
 gcc/analyzer/sm-malloc.cc  | 1 +
 gcc/analyzer/sm-signal.cc  |11 +-
 gcc/builtins.cc|42 +-
 gcc/c-family/ChangeLog |29 +
 gcc/c-family/c-common.cc   | 8 +-
 gcc/c-family/c-cppbuiltin.cc   |13 +-
 gcc/c/ChangeLog|10 +
 gcc/c/c-parser.cc  |12 +-
 gcc/cfgexpand.cc   | 7 +-
 gcc/common/config/i386/cpuinfo.h   | 1 +
 gcc/config/aarch64/aarch64-cores.def   | 7 +
 gcc/config/aarch64/aarch64-sve-builtins-base.cc|11 +-
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc| 8 +-
 gcc/config/aarch64/aarch64-sve-builtins-sve2.def   | 4 +-
 gcc/config/aarch64/aarch64-sve-builtins-sve2.h | 4 +-
 gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +-
 gcc/config/aarch64/aarch64-sve2.md | 8 +-
 gcc/config/aarch64/aarch64-tune.md | 2 +-
 gcc/config/aarch64/aarch64.cc  | 1 +
 gcc/config/aarch64/tuning_models/fujitsu_monaka.h  |65 +
 gcc/config/arm/arm-mve-builtins.cc |42 +-
 gcc/config/arm/arm-protos.h| 1 +
 gcc/config/arm/arm.cc  |24 +
 gcc/config/arm/arm_mve.h   | 4 +
 gcc/config/arm/arm_mve_types.h | 4 +
 gcc/config/arm/constraints.md  | 8 +-
 gcc/config/arm/predicates.md   | 4 +
 gcc/config/arm/sync.md | 2 +-
 gcc/config/avr/avr.cc  |   168 +-
 gcc/config/avr/avr.md  | 7 +-
 gcc/config/i386/i386-builtin.def   |10 +-
 gcc/config/i386/i386-expand.cc |12 +
 gcc/config/i386/i386.md| 2 +-
 gcc/config/i386/mmx.md |92 +-
 gcc/config/i386/sse.md | 5 +-
 gcc/config/loongarch/lasx.md   | 2 +-
 gcc/config/loongarch/lasxintrin.h  | 4 +-
 gcc/config/loongarch/loongarch-builtins.cc | 4 +-
 gcc/config/loongarch/lsx.md| 2 +-
 gcc/config/loongarch/lsxintrin.h   | 4 +-
 gcc/config/pa/pa.cc| 1 +
 gcc/config/pa/pa.md|22 +-
 gcc/config/pa/predicates.md| 2 +-
 gcc/config/s390/s390.cc| 2 +-
 gcc/config/v850/v850.opt.urls  |81 +-
 gcc/config/vax/vax.opt.urls|21 +-
 gcc/cp/ChangeLog   |   208 +
 gcc/cp/call.cc |10 +-
 gcc/cp/constexpr.cc|27 +-
 gcc/cp/constraint.cc   | 4 +-
 gcc/cp/decl.cc |13 +-
 gcc/cp/init.cc |21 +-
 gcc/cp/logic.cc|68 +-
 gcc/cp/module.cc   |19 +-
 gcc/cp/parser.cc   | 9 +-
 gcc/cp/pt.cc

[gcc/redhat/heads/gcc-14-branch] (322 commits) Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9

2025-01-10 Thread Jakub Jelinek via Gcc-cvs
The branch 'redhat/heads/gcc-14-branch' was updated to point to:

 e525669e462d... Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9

It previously pointed to:

 b84ce6a258e0... Merge commit 'r14-10877-g2a9fbe1920779b65eb817db7ce0c60096b

Diff:

Summary of changes (added commits):
---

  e525669... Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9
  a2de88e... testsuite: arm: Add pattern for armv8-m.base to cmse-15.c t (*)
  d0191d1... Disable a broken multiversioning optimisation (*)
  02b1172... tree-optimization/117912 - bogus address equivalences for _ (*)
  91b524a... doc: cpp: fix version test example syntax (*)
  1bf4bfc... Daily bump. (*)
  a4c0f16... libstdc++: Use feature test macro for pmr::polymorphic_allo (*)
  e6d2bcf... libstdc++: Improve Doxygen docs for std::allocator_traits s (*)
  734d7da... libstdc++: Undeprecate std::pmr::polymorphic_allocator::des (*)
  72fe42c... libstdc++: Give std::memory_order a fixed underlying type [ (*)
  d05d583... libstdc++: Fix typo in comment in src/c++17/fs_dir.cc (*)
  0cdd4c9... libstdc++: Make std::println use locale from ostream (LWG 4 (*)
  2f20d09... libstdc++: Fix some typos and grammatical errors in docs (*)
  b84070e... libstdc++: Document when std::string::shrink_to_fit was add (*)
  cfe866e... libstdc++: Remove __builtin_expect from consteval assertion (*)
  f0eb0ba... libstdc++: Fix parallel std::exclusive_scan [PR108236] (*)
  3590d9f... libstdc++: Fix debug containers for constant evaluation [PR (*)
  845a0b7... libstdc++: Disable __gnu_debug::__is_singular(T*) in conste (*)
  1e696ca... libstdc++: Skip redundant assertions in std::array equality (*)
  83fa082... libstdc++: Skip redundant assertions in std::span construct (*)
  67c457d... libstdc++: Fix std::deque::insert(pos, first, last) undefin (*)
  f73ecaf... c++: ICE during requires-expr partial subst [PR118060] (*)
  70cea06... c++: constexpr potentiality of CAST_EXPR [PR117925] (*)
  aa1e19d... c++: relax ICE for unexpected trees during constexpr [PR117 (*)
  4dbfc2f... c++: template-id dependence wrt local static arg [PR117792] (*)
  f236c89... libstdc++: Avoid unnecessary copies in ranges::min/max [PR1 (*)
  03d0440... libstdc++: Implement LWG 3563 changes to keys_view and valu (*)
  9d650c2... libstdc++: Fix complexity of drop_view::begin() const [PR11 (*)
  1f509da... testsuite: arm: Use -Os in memset-inline-8* tests (*)
  dedaccb... c++: ICE initializing array of aggrs [PR117985] (*)
  21600f3... c++: unresolved overload with comma op [PR115430] (*)
  3fe6135... c++: noexcept and pointer to member function type [PR113108 (*)
  df3ae94... c++: ICE with structured bindings and m-d array [PR102594] (*)
  078089a... c++: mutable temps in rodata [PR116369] (*)
  13242e5... c++: ICE with enum and conversion fn in template [PR115657] (*)
  da983b3... c++: ICE with reference NSDMI [PR114854] (*)
  e6dfe71... c++: concept in default argument [PR109859] (*)
  61de759... arm: [MVE intrinsics] Fix support for predicate constants [ (*)
  409e766... libstdc++: Update references to gcc.gnu.org/onlinedocs (*)
  a7c5c49... libstdc++: Fix std::future::wait_until for subsecond negati (*)
  785ddc2... libstdc++: Add Doxygen docs for std::forward_like (*)
  7178e38... libstdc++: Fix incorrect DocBook element in manual (*)
  da82bf0... c++: Honor complain in cp_build_function_call_vec for check (*)
  ec9ccda... c++: Diagnose earlier non-static data members with cv conta (*)
  1dd428c... warn-access: Fix up matching_alloc_calls_p [PR118024] (*)
  fbbc1a4... cse: Fix up record_jump_equiv checks [PR117095] (*)
  7ae55c2... c++: allow stores to anon union vars to change current unio (*)
  5a78e36... docs: Clarify -fsanitize=hwaddress target support [PR117960 (*)
  26615af... doloop: Fix up doloop df use [PR116799] (*)
  b602b32... bitintlower: Fix up ?ROTATE_EXPR lowering [PR117847] (*)
  326b6ba... openmp: Add crtoffloadtableS.o and use it [PR117851] (*)
  5d71861... docs: Fix up __sync_* documentation [PR117642] (*)
  448f84a... builtins: Handle BITINT_TYPE in __builtin_iseqsig folding [ (*)
  0183fb1... c: Fix sizeof error recovery [PR117745] (*)
  54c381d... builtins: Fix up DFP ICEs on __builtin_fpclassify [PR102674 (*)
  59eec2e... builtins: Fix up DFP ICEs on __builtin_is{inf,finite,normal (*)
  6b417da... c-family: Yet another fix for _BitInt & __sync_* builtins [ (*)
  3190d62... phiopt: Fix a pasto in spaceship_replacement [PR117612] (*)
  6d0503f... c-family: Fix ICE with __sync_*_and_* on _BitInt [PR117641] (*)
  98eabda... expand: Fix up ICE on VCE from _Complex types to _BitInt [P (*)
  e3b2c17... bitintlower: Handle PAREN_EXPR [PR117459] (*)
  04d7d02... m2: Fix up dependencies some more (*)
  bef6c31... c++: Fix ICE on constexpr virtual function [PR117317] (*)
  c56b465... store-merging: Apply --param=store-merging-max-size= in mor (*)
  67379c5... store-merging: Don't use sub_byte_op_p mode for empty_ctor_ (*)
  c4d2308... Daily bump. (*

[gcc r14-11198] testsuite: arm: Add pattern for armv8-m.base to cmse-15.c test

2025-01-10 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:a2de88e5d49f7084677ef2728cd99db0a9706ee9

commit r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9706ee9
Author: Torbjörn SVENSSON 
Date:   Tue Jan 7 21:04:17 2025 +0100

testsuite: arm: Add pattern for armv8-m.base to cmse-15.c test

Since armv8-m.base uses thumb1 that does not suport sibcall/tailcall,
a pattern is needed that uses PUSH/BL/POP sequence instead of a single
B instruction to reuse an already existing function in the compile unit.

gcc/testsuite/ChangeLog:

* gcc.target/arm/cmse/cmse-15.c: Added pattern for armv8-m.base.

Signed-off-by: Torbjörn SVENSSON 
(cherry picked from commit cfd7c54bdfe109f7e801122a093d0d2a85324fc5)

Diff:
---
 gcc/testsuite/gcc.target/arm/cmse/cmse-15.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c 
b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
index 5188f1d697f1..0e35830c35ea 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
@@ -65,6 +65,10 @@ int nonsecure1 (ns_foo_t ** ns_foo_p)
 ** bl  __gnu_cmse_nonsecure_call
 ** |
 ** b   nonsecure0
+** |
+** push{r4, lr}
+** bl  nonsecure0
+** pop {r4, pc}
 ** )
 ** ...
 */
@@ -129,6 +133,10 @@ int secure1 (s_bar_t ** s_bar_p)
 ** blx r[0-3]
 ** |
 ** b   secure0
+** |
+** push{r4, lr}
+** bl  secure0
+** pop {r4, pc}
 ** )
 ** ...
 */
@@ -146,6 +154,10 @@ int secure2 (s_bar_ptr s_bar_p)
 ** blx r[0-3]
 ** |
 ** b   secure1
+** |
+** push{r4, lr}
+** bl  secure1
+** pop {r4, pc}
 ** )
 ** ...
 */


[gcc r15-6801] testsuite: arm: Add pattern for armv8-m.base to cmse-15.c test

2025-01-10 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:cfd7c54bdfe109f7e801122a093d0d2a85324fc5

commit r15-6801-gcfd7c54bdfe109f7e801122a093d0d2a85324fc5
Author: Torbjörn SVENSSON 
Date:   Tue Jan 7 21:04:17 2025 +0100

testsuite: arm: Add pattern for armv8-m.base to cmse-15.c test

Since armv8-m.base uses thumb1 that does not suport sibcall/tailcall,
a pattern is needed that uses PUSH/BL/POP sequence instead of a single
B instruction to reuse an already existing function in the compile unit.

gcc/testsuite/ChangeLog:

* gcc.target/arm/cmse/cmse-15.c: Added pattern for armv8-m.base.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/gcc.target/arm/cmse/cmse-15.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c 
b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
index 5188f1d697f1..0e35830c35ea 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
@@ -65,6 +65,10 @@ int nonsecure1 (ns_foo_t ** ns_foo_p)
 ** bl  __gnu_cmse_nonsecure_call
 ** |
 ** b   nonsecure0
+** |
+** push{r4, lr}
+** bl  nonsecure0
+** pop {r4, pc}
 ** )
 ** ...
 */
@@ -129,6 +133,10 @@ int secure1 (s_bar_t ** s_bar_p)
 ** blx r[0-3]
 ** |
 ** b   secure0
+** |
+** push{r4, lr}
+** bl  secure0
+** pop {r4, pc}
 ** )
 ** ...
 */
@@ -146,6 +154,10 @@ int secure2 (s_bar_ptr s_bar_p)
 ** blx r[0-3]
 ** |
 ** b   secure1
+** |
+** push{r4, lr}
+** bl  secure1
+** pop {r4, pc}
 ** )
 ** ...
 */


[gcc r13-9291] c++: ICE with __has_unique_object_representations [PR115476]

2025-01-10 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:b0426fbc85333775ef97cc135e87dd6cae876af7

commit r13-9291-gb0426fbc85333775ef97cc135e87dd6cae876af7
Author: Marek Polacek 
Date:   Mon Jun 17 17:53:12 2024 -0400

c++: ICE with __has_unique_object_representations [PR115476]

Here we started to ICE with r13-25: in check_trait_type, for "X[]" we
return true here:

  if (kind == 1 && TREE_CODE (type) == ARRAY_TYPE && !TYPE_DOMAIN (type))
return true; // Array of unknown bound. Don't care about completeness.

and then end up crashing in record_has_unique_obj_representations:

4836  if (cur != wi::to_offset (sz))

because sz is null.


https://eel.is/c++draft/type.traits#tab:meta.unary.prop-row-47-column-3-sentence-1
says that the preconditions for __has_unique_object_representations are:
"T shall be a complete type, cv void, or an array of unknown bound" and
that "For an array type T, the same result as
has_unique_object_representations_v>" so T[]
should be treated as T.  So we should use kind==2 for the trait.

PR c++/115476

gcc/cp/ChangeLog:

* semantics.cc (finish_trait_expr)
: Move below to call
check_trait_type with kind==2.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/has-unique-obj-representations4.C: New test.

(cherry picked from commit c314867fc06d475e3c2ace32032e0d72e3915b55)

Diff:
---
 gcc/cp/semantics.cc  |  2 +-
 .../g++.dg/cpp1z/has-unique-obj-representations4.C   | 16 
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 079ad5c93bf1..886186403691 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12246,7 +12246,6 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_HAS_NOTHROW_COPY:
 case CPTK_HAS_TRIVIAL_COPY:
 case CPTK_HAS_TRIVIAL_DESTRUCTOR:
-case CPTK_HAS_UNIQUE_OBJ_REPRESENTATIONS:
   if (!check_trait_type (type1))
return error_mark_node;
   break;
@@ -12256,6 +12255,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_STD_LAYOUT:
 case CPTK_IS_TRIVIAL:
 case CPTK_IS_TRIVIALLY_COPYABLE:
+case CPTK_HAS_UNIQUE_OBJ_REPRESENTATIONS:
   if (!check_trait_type (type1, /* kind = */ 2))
return error_mark_node;
   break;
diff --git a/gcc/testsuite/g++.dg/cpp1z/has-unique-obj-representations4.C 
b/gcc/testsuite/g++.dg/cpp1z/has-unique-obj-representations4.C
new file mode 100644
index ..d6949dc7005e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/has-unique-obj-representations4.C
@@ -0,0 +1,16 @@
+// PR c++/115476
+// { dg-do compile { target c++11 } }
+
+struct X;
+static_assert(__has_unique_object_representations(X), "");   // { dg-error 
"invalid use of incomplete type" }
+static_assert(__has_unique_object_representations(X[]), "");  // { dg-error 
"invalid use of incomplete type" }
+static_assert(__has_unique_object_representations(X[1]), "");  // { dg-error 
"invalid use of incomplete type" }
+static_assert(__has_unique_object_representations(X[][1]), "");  // { dg-error 
"invalid use of incomplete type" }
+
+struct X {
+  int x;
+};
+static_assert(__has_unique_object_representations(X), "");
+static_assert(__has_unique_object_representations(X[]), "");
+static_assert(__has_unique_object_representations(X[1]), "");
+static_assert(__has_unique_object_representations(X[][1]), "");


[gcc r15-6768] nvptx: Add '__builtin_frame_address(0)' test case

2025-01-10 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:86175a64f167e3b1701132fa1684d76230054c36

commit r15-6768-g86175a64f167e3b1701132fa1684d76230054c36
Author: Thomas Schwinge 
Date:   Fri Dec 13 11:40:01 2024 +0100

nvptx: Add '__builtin_frame_address(0)' test case

Documenting the status quo.

gcc/testsuite/
* gcc.target/nvptx/__builtin_frame_address_0-1.c: New.

Diff:
---
 .../gcc.target/nvptx/__builtin_frame_address_0-1.c | 36 ++
 1 file changed, 36 insertions(+)

diff --git a/gcc/testsuite/gcc.target/nvptx/__builtin_frame_address_0-1.c 
b/gcc/testsuite/gcc.target/nvptx/__builtin_frame_address_0-1.c
new file mode 100644
index ..35817769d31f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/__builtin_frame_address_0-1.c
@@ -0,0 +1,36 @@
+/* Document what we do for '__builtin_frame_address(0)'.  */
+
+/* { dg-do compile }
+   TODO We can't 'assemble' this -- it's invalid PTX code.  */
+/* { dg-options -O3 } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(void)
+{
+  void *p;
+  p = __builtin_frame_address(0);
+  sink(p);
+}
+/*
+** f:
+** \.visible \.func f
+** {
+** {
+** \.param\.u64 %out_arg1;
+** st\.param\.u64 \[%out_arg1\], %frame;
+** call sink, \(%out_arg1\);
+** }
+** ret;
+*/
+
+/* The concept of a '%frame' pointer doesn't apply like this for
+   '-mno-soft-stack': PTX "native" stacks (TODO), and for '-msoft-stack' in
+   this form also constitutes invalid PTX code (TODO).
+
+   { dg-final { scan-assembler-not {%frame} { xfail *-*-* } } } */
+
+/* As this is an internal-use built-in function, we don't bother with
+   emitting proper error diagnostics.  */


[gcc r14-11196] tree-optimization/117912 - bogus address equivalences for __builtin_object_size

2025-01-10 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:02b1172dad4c444953868f411fc8dd4483f320e7

commit r14-11196-g02b1172dad4c444953868f411fc8dd4483f320e7
Author: Richard Biener 
Date:   Thu Dec 5 10:47:13 2024 +0100

tree-optimization/117912 - bogus address equivalences for 
__builtin_object_size

VN again is the culprit for exploiting address equivalences before
__builtin_object_size got the chance to do its job.  This time
it isn't about union members but adjacent structure fields where
an address to one after the last element of an array field can
spill over to the next field.

The following protects all out-of-bound accesses on the upper bound
side (singling out TYPE_MAX_VALUE + 1 is more expensive).  It
ignores other out-of-bound addresses that would invoke UB.

Zero-sized arrays are a bit awkward because the C++ represents them
with a -1U upper bound.

There's a similar issue for zero-sized components whose address can
be the same as the adjacent field in C.

PR tree-optimization/117912
* tree-ssa-sccvn.cc (copy_reference_ops_from_ref): For addresses
of zero-sized components do not set ->off if the object size pass
didn't run.
For OOB ARRAY_REF accesses in address expressions avoid setting
->off if the object size pass didn't run.
(valueize_refs_1): Likewise.

* c-c++-common/torture/pr117912-1.c: New testcase.
* c-c++-common/torture/pr117912-2.c: Likewise.
* c-c++-common/torture/pr117912-3.c: Likewise.

(cherry picked from commit 233972ab3b5338d7a5d1d7af9108c1f366170e44)

Diff:
---
 gcc/testsuite/c-c++-common/torture/pr117912-1.c | 28 
 gcc/testsuite/c-c++-common/torture/pr117912-2.c | 28 
 gcc/testsuite/c-c++-common/torture/pr117912-3.c | 61 +
 gcc/tree-ssa-sccvn.cc   | 51 +++--
 4 files changed, 164 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/torture/pr117912-1.c 
b/gcc/testsuite/c-c++-common/torture/pr117912-1.c
new file mode 100644
index ..2750585c7f77
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr117912-1.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+struct S { int a; int b[24]; int c[24]; int d; };
+volatile int *p;
+
+void __attribute__((noipa))
+bar (int *q)
+{
+ p = q;
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+foo (struct S *p)
+{
+  bar (&p->b[24]);
+  bar (&p->c[0]);
+  return __builtin_object_size (&p->c[0], 1);
+}
+
+int
+main()
+{
+  struct S s;
+  __SIZE_TYPE__ x = foo (&s);
+  if (x < sizeof (int) * 24)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/torture/pr117912-2.c 
b/gcc/testsuite/c-c++-common/torture/pr117912-2.c
new file mode 100644
index ..a3a621575635
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr117912-2.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+struct S { int a; int b[0]; int c[24]; int d; };
+volatile int *p;
+
+void __attribute__((noipa))
+bar (int *q)
+{
+ p = q;
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+foo (struct S *p)
+{
+  bar (&p->b[0]);
+  bar (&p->c[0]);
+  return __builtin_object_size (&p->c[0], 1);
+}
+
+int
+main()
+{
+  struct S s;
+  __SIZE_TYPE__ x = foo (&s);
+  if (x < sizeof (int) * 24)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/torture/pr117912-3.c 
b/gcc/testsuite/c-c++-common/torture/pr117912-3.c
new file mode 100644
index ..64e981d2a5e7
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr117912-3.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-additional-options "-std=gnu++20" { target c++ } } */
+
+struct B {};
+struct A { int a;
+#ifdef __cplusplus
+  [[no_unique_address]]
+#endif
+  struct B b;
+  char c[]; };
+volatile void *p;
+
+void __attribute__((noipa))
+bar (void *q)
+{
+  p = q;
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+foo (struct A *p)
+{
+  bar (&p->b);
+  bar (&p->c);
+  return __builtin_object_size (&p->c, 1);
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+baz (void)
+{
+  struct A *p = (struct A *) __builtin_malloc (__builtin_offsetof (struct A, 
c) + 64);
+  bar (&p->b);
+  bar (&p->c);
+  return __builtin_object_size (&p->c, 1);
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+qux (struct A *p)
+{
+  bar (&p->b);
+  bar (&p->c);
+  return __builtin_object_size (&p->c, 3);
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+boo (void)
+{
+  struct A *p = (struct A *) __builtin_malloc (__builtin_offsetof (struct A, 
c) + 64);
+  bar (&p->b);
+  bar (&p->c);
+  return __builtin_object_size (&p->c, 3);
+}
+
+int
+main ()
+{
+  static struct A a = { .a = 1, .b = {}, .c = { 1, 2, 3, 4, 0 } };
+  if (foo (&a) < 5)
+__builtin_abort ();
+  if (baz () < 64)
+__builtin_abort ();
+}
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 0b5c638df455..ff27b75313e0 100644
--- a/gcc/tree-ssa-sccvn.cc

[gcc r15-6804] libstdc++: Fix unused parameter warnings in

2025-01-10 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:c9353e0fcd0ddc0d48ae8a2b0518f0f82670d708

commit r15-6804-gc9353e0fcd0ddc0d48ae8a2b0518f0f82670d708
Author: Jonathan Wakely 
Date:   Fri Jan 10 10:32:22 2025 +

libstdc++: Fix unused parameter warnings in 

This fixes warnings like the following during bootstrap:

sparc-sun-solaris2.11/libstdc++-v3/include/bits/atomic_futex.h:324:53: 
warning: unused parameter ‘__mo’ [-Wunused-parameter]
  324 | _M_load_when_equal(unsigned __val, memory_order __mo)
  |~^~~~

libstdc++-v3/ChangeLog:

* include/bits/atomic_futex.h (__atomic_futex_unsigned): Remove
names of unused parameters in non-futex implementation.

Diff:
---
 libstdc++-v3/include/bits/atomic_futex.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/bits/atomic_futex.h 
b/libstdc++-v3/include/bits/atomic_futex.h
index e69420d23055..9326cba67153 100644
--- a/libstdc++-v3/include/bits/atomic_futex.h
+++ b/libstdc++-v3/include/bits/atomic_futex.h
@@ -305,14 +305,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { }
 
 _GLIBCXX_ALWAYS_INLINE unsigned
-_M_load(memory_order __mo)
+_M_load(memory_order)
 {
   unique_lock __lock(_M_mutex);
   return _M_data;
 }
 
 _GLIBCXX_ALWAYS_INLINE unsigned
-_M_load_when_not_equal(unsigned __val, memory_order __mo)
+_M_load_when_not_equal(unsigned __val, memory_order)
 {
   unique_lock __lock(_M_mutex);
   while (_M_data == __val)
@@ -321,7 +321,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 }
 
 _GLIBCXX_ALWAYS_INLINE void
-_M_load_when_equal(unsigned __val, memory_order __mo)
+_M_load_when_equal(unsigned __val, memory_order)
 {
   unique_lock __lock(_M_mutex);
   while (_M_data != __val)
@@ -330,7 +330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 template
   _GLIBCXX_ALWAYS_INLINE bool
-  _M_load_when_equal_for(unsigned __val, memory_order __mo,
+  _M_load_when_equal_for(unsigned __val, memory_order,
  const chrono::duration<_Rep, _Period>& __rtime)
   {
unique_lock __lock(_M_mutex);
@@ -340,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 template
   _GLIBCXX_ALWAYS_INLINE bool
-  _M_load_when_equal_until(unsigned __val, memory_order __mo,
+  _M_load_when_equal_until(unsigned __val, memory_order,
  const chrono::time_point<_Clock, _Duration>& __atime)
   {
unique_lock __lock(_M_mutex);
@@ -349,7 +349,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   }
 
 _GLIBCXX_ALWAYS_INLINE void
-_M_store_notify_all(unsigned __val, memory_order __mo)
+_M_store_notify_all(unsigned __val, memory_order)
 {
   unique_lock __lock(_M_mutex);
   _M_data = __val;


[gcc r15-6806] AArch64: correct Cortex-X4 MIDR

2025-01-10 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:ddcfae1d1dfe5875875c9897f0dda14e342b2534

commit r15-6806-gddcfae1d1dfe5875875c9897f0dda14e342b2534
Author: Tamar Christina 
Date:   Fri Jan 10 21:13:50 2025 +

AArch64: correct Cortex-X4 MIDR

The Parts Num field for the MIDR for Cortex-X4 is wrong.  It's currently the
parts number for a Cortex-A720 (which does have the right number).

The correct number can be found in the Cortex-X4 Technical Reference Manual 
[1]
on page 382 in Issue Number 5.

[1] https://developer.arm.com/documentation/102484/latest/

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (AARCH64_CORE): Fix cortex-x4 
parts
num.

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index caf61437d180..5ac81332b67c 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -193,7 +193,7 @@ AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  
(SVE2_BITPERM, MEMTAG, I8M
 
 AARCH64_CORE("cortex-x3",  cortexx3, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
I8MM, BF16), neoversev2, 0x41, 0xd4e, -1)
 
-AARCH64_CORE("cortex-x4",  cortexx4, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, 
PROFILE), neoversev3, 0x41, 0xd81, -1)
+AARCH64_CORE("cortex-x4",  cortexx4, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, 
PROFILE), neoversev3, 0x41, 0xd82, -1)
 AARCH64_CORE("cortex-x925", cortexx925, cortexa57, V9_2A,  (SVE2_BITPERM, 
MEMTAG, PROFILE), cortexx925, 0x41, 0xd85, -1)
 
 AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, 
SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1)


[gcc r15-6807] vect: Force alignment peeling to vectorize more early break loops [PR118211]

2025-01-10 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:68326d5d1a593dc0bf098c03aac25916168bc5a9

commit r15-6807-g68326d5d1a593dc0bf098c03aac25916168bc5a9
Author: Alex Coplan 
Date:   Mon Mar 11 13:09:10 2024 +

vect: Force alignment peeling to vectorize more early break loops [PR118211]

This allows us to vectorize more loops with early exits by forcing
peeling for alignment to make sure that we're guaranteed to be able to
safely read an entire vector iteration without crossing a page boundary.

To make this work for VLA architectures we have to allow compile-time
non-constant target alignments.  We also have to override the result of
the target's preferred_vector_alignment hook if it isn't a power-of-two
multiple of the TYPE_SIZE of the chosen vector type.

gcc/ChangeLog:

PR tree-optimization/118211
PR tree-optimization/116126
* tree-vect-data-refs.cc (vect_analyze_early_break_dependences):
Set need_peeling_for_alignment flag on read DRs instead of
failing vectorization.  Punt on gathers.
(dr_misalignment): Handle non-constant target alignments.
(vect_compute_data_ref_alignment): If need_peeling_for_alignment
flag is set on the DR, then override the target alignment chosen
by the preferred_vector_alignment hook to choose a safe
alignment.
(vect_supportable_dr_alignment): Override
support_vector_misalignment hook if need_peeling_for_alignment
is set on the DR: in this case we must return
dr_unaligned_unsupported in order to force peeling.
* tree-vect-loop-manip.cc (vect_do_peeling): Allow prolog
peeling by a compile-time non-constant amount.
* tree-vectorizer.h (dr_vec_info): Add new flag
need_peeling_for_alignment.

gcc/testsuite/ChangeLog:

PR tree-optimization/118211
PR tree-optimization/116126
* gcc.dg/tree-ssa/cunroll-13.c: Don't vectorize.
* gcc.dg/tree-ssa/cunroll-14.c: Likewise.
* gcc.dg/unroll-6.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-28.c: Likewise.
* gcc.dg/vect/vect-104.c: Expect to vectorize.
* gcc.dg/vect/vect-early-break_108-pr113588.c: Likewise.
* gcc.dg/vect/vect-early-break_109-pr113588.c: Likewise.
* gcc.dg/vect/vect-early-break_110-pr113467.c: Likewise.
* gcc.dg/vect/vect-early-break_3.c: Likewise.
* gcc.dg/vect/vect-early-break_65.c: Likewise.
* gcc.dg/vect/vect-early-break_8.c: Likewise.
* gfortran.dg/vect/vect-5.f90: Likewise.
* gfortran.dg/vect/vect-8.f90: Likewise.
* gcc.dg/vect/vect-switch-search-line-fast.c:

Co-Authored-By: Tamar Christina 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c|   1 +
 gcc/testsuite/gcc.dg/unroll-6.c|   2 +-
 gcc/testsuite/gcc.dg/vect/vect-104.c   |   1 +
 .../gcc.dg/vect/vect-early-break_108-pr113588.c|   2 +-
 .../gcc.dg/vect/vect-early-break_109-pr113588.c|   2 +-
 .../gcc.dg/vect/vect-early-break_110-pr113467.c|   2 +-
 gcc/testsuite/gcc.dg/vect/vect-early-break_3.c |   2 +-
 gcc/testsuite/gcc.dg/vect/vect-early-break_65.c|   2 +-
 gcc/testsuite/gcc.dg/vect/vect-early-break_8.c |   2 +-
 .../gcc.dg/vect/vect-switch-search-line-fast.c |   3 +-
 gcc/testsuite/gfortran.dg/vect/vect-5.f90  |   1 +
 gcc/testsuite/gfortran.dg/vect/vect-8.f90  |   5 +-
 gcc/tree-vect-data-refs.cc | 113 ++---
 gcc/tree-vect-loop-manip.cc|   6 --
 gcc/tree-vectorizer.h  |   5 +
 17 files changed, 119 insertions(+), 34 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c
index 98cb56a8564b..154e2963f12d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fgimple -fdump-tree-cunroll-blocks-details" } */
+/* { dg-options "-O3 -fgimple -fdump-tree-cunroll-blocks-details 
-fno-tree-vectorize" } */
 
 #if __SIZEOF_INT__ < 4
 __extension__ typedef __INT32_TYPE__ i32;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c
index 5f112da310c8..4b369f7ad278 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-cunroll-blocks-details" } */
+/* { dg-options "-O3 -fdump-tree-cunroll-blocks-details -fno-tree-vectorize" } 
*/
 struct a {int a[100];};
 void
 t(struct a *a)
diff --git a/gcc/testsuite/

[gcc r15-6811] vect: Also cost gconds for scalar [PR118211]

2025-01-10 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:086031c058598512d09bf898e4db3735b3e1f22c

commit r15-6811-g086031c058598512d09bf898e4db3735b3e1f22c
Author: Alex Coplan 
Date:   Mon Jun 24 13:54:48 2024 +0100

vect: Also cost gconds for scalar [PR118211]

Currently we only cost gconds for the vector loop while we omit costing
them when analyzing the scalar loop; this unfairly penalizes the vector
loop in the case of loops with early exits.

This (together with the previous patches) enables us to vectorize
std::find with 64-bit element sizes.

gcc/ChangeLog:

PR tree-optimization/118211
PR tree-optimization/116126
* tree-vect-loop.cc (vect_compute_single_scalar_iteration_cost):
Don't skip over gconds.

Diff:
---
 gcc/tree-vect-loop.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index bb1138bfcfba..edd7d4d87630 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1688,7 +1688,9 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info 
loop_vinfo)
  gimple *stmt = gsi_stmt (si);
  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
 
-  if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
+ if (!is_gimple_assign (stmt)
+ && !is_gimple_call (stmt)
+ && !is_a (stmt))
 continue;
 
   /* Skip stmts that are not vectorized inside the loop.  */


[gcc r15-6810] vect: Ensure we add vector skip guard even when versioning for aliasing [PR118211]

2025-01-10 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:f4e259b4a66c81c234608056117836e13606e4c8

commit r15-6810-gf4e259b4a66c81c234608056117836e13606e4c8
Author: Alex Coplan 
Date:   Thu Jul 25 16:34:05 2024 +

vect: Ensure we add vector skip guard even when versioning for aliasing 
[PR118211]

This fixes a latent wrong code issue whereby vect_do_peeling determined
the wrong condition for inserting the vector skip guard.  Specifically
in the case where the loop niters are unknown at compile time we used to
check:

  !LOOP_REQUIRES_VERSIONING (loop_vinfo)

but LOOP_REQUIRES_VERSIONING is true for loops which we have versioned
for aliasing, and that has nothing to do with prolog peeling.  I think
this condition should instead be checking specifically if we aren't
versioning for alignment.

As it stands, when we version for alignment, we don't peel, so the
vector skip guard is indeed redundant in that case.

With the testcase added (reduced from the Fortran frontend) we would
version for aliasing, omit the vector skip guard, and then at runtime we
would peel sufficient iterations for alignment that there wasn't a full
vector iteration left when we entered the vector body, thus overflowing
the output buffer.

gcc/ChangeLog:

PR tree-optimization/118211
PR tree-optimization/116126
* tree-vect-loop-manip.cc (vect_do_peeling): Adjust skip_vector
condition to only omit the edge if we're versioning for
alignment.

gcc/testsuite/ChangeLog:

PR tree-optimization/118211
PR tree-optimization/116126
* gcc.dg/vect/vect-early-break_130.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-early-break_130.c | 91 
 gcc/tree-vect-loop-manip.cc  |  2 +-
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c
new file mode 100644
index ..ce43fcd5681c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c
@@ -0,0 +1,91 @@
+/* { dg-require-effective-target mmap } */
+/* { dg-add-options vect_early_break } */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* This was reduced from gcc/fortran/scanner.cc:gfc_widechar_to_char.
+   The problem was that we omitted adding the vector skip guard when
+   versioning for aliasing.  When invoked on a string that is 28 bytes
+   long, that caused us to enter the vector body after having peeled 15
+   iterations, leaving only 13 iterations to be performed as vector, but
+   the vector body performs 16 (thus overflowing the res buffer by three
+   bytes).  */
+__attribute__((noipa))
+void f (const uint32_t *s, char *res, int length)
+{
+  unsigned long i;
+
+  for (i = 0; i < length; i++)
+{
+  if (s[i] > 255)
+__builtin_abort ();
+  res[i] = (char)s[i];
+}
+}
+
+int main(void)
+{
+  long pgsz = sysconf (_SC_PAGESIZE);
+  if (pgsz == -1) {
+fprintf (stderr, "sysconf failed: %m\n");
+return 0;
+  }
+
+  void *p = mmap (NULL,
+  pgsz * 2,
+  PROT_READ | PROT_WRITE,
+  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (p == MAP_FAILED) {
+fprintf (stderr, "mmap failed: %m\n");
+return 0;
+  }
+
+  if (mprotect (p + pgsz, pgsz, PROT_NONE)) {
+fprintf (stderr, "mprotect failed: %m\n");
+return 0;
+  }
+
+  uint32_t in[128];
+  memset (in, 0, sizeof(in));
+
+  uintptr_t x = (uintptr_t)in;
+
+  /* We want to make our input pointer maximally misaligned (so we have
+ to peel the greatest possible number of iterations for alignment).
+ We need two bits of alignment for our uint32_t pointer to be
+ aligned.  Assuming we process 16 chars per vector iteration, we
+ will need to load 16 uint32_ts, thus we need a further 4 bits of
+ alignment.  */
+  const uintptr_t align_bits = 2 + 4;
+  const uintptr_t align_p2 = (1 << align_bits);
+  const uintptr_t align_p2m1 = align_p2 - 1;
+
+  if (x & align_p2m1 <= 4)
+x &= -align_p2; /* Round down.  */
+  else
+x = (x + align_p2m1) & -align_p2; /* Round up.  */
+
+  /* Add one uint32_t to get maximally misaligned.  */
+  uint32_t *inp = (uint32_t *)x + 1;
+
+  const char *str = "dec-comparison-complex_1.f90";
+  long n;
+#pragma GCC novector
+  for (n = 0; str[n]; n++)
+inp[n] = str[n];
+
+  if (n > pgsz)
+__builtin_abort ();
+
+  char *buf = p + pgsz - n;
+  f (inp, buf, n);
+
+#pragma GCC novector
+  for (int i = 0; i < n; i++)
+if (buf[i] != str[i])
+  __builtin_abort ();
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 9a55a5611ccc..06ca99eaab95 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3271,7 +3271,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo

[gcc r15-6809] vect: Fix dominators when adding a guard to skip the vector loop [PR118211]

2025-01-10 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:f1c6789ab6c5443ccefab96c74b0e862119d1781

commit r15-6809-gf1c6789ab6c5443ccefab96c74b0e862119d1781
Author: Tamar Christina 
Date:   Mon Jul 8 12:16:11 2024 +0100

vect: Fix dominators when adding a guard to skip the vector loop [PR118211]

The alignment peeling changes exposed a latent missing dominator update
with early break vectorization, specifically when inserting the vector
skip edge, since the new edge bypasses the prolog skip block and thus
has the potential to subvert its dominance.  This patch fixes that.

gcc/ChangeLog:

PR tree-optimization/118211
PR tree-optimization/116126
* tree-vect-loop-manip.cc (vect_do_peeling): Update immediate
dominators of nodes that were dominated by the prolog skip block
after inserting vector skip edge.  Initialize prolog variable to
NULL to avoid bogus -Wmaybe-uninitialized during bootstrap.

gcc/testsuite/ChangeLog:

PR tree-optimization/118211
PR tree-optimization/116126
* g++.dg/vect/vect-early-break_6.cc: New test.

Co-Authored-By: Alex Coplan 

Diff:
---
 gcc/testsuite/g++.dg/vect/vect-early-break_6.cc | 25 
 gcc/tree-vect-loop-manip.cc | 26 -
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/vect/vect-early-break_6.cc 
b/gcc/testsuite/g++.dg/vect/vect-early-break_6.cc
new file mode 100644
index ..fdd9af832a74
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vect-early-break_6.cc
@@ -0,0 +1,25 @@
+// { dg-do compile }
+// ICE in verify_dominators, reduced from charset.cc (libstdc++).
+
+void convert_escape(int *);
+int cpp_interpret_string_1_to, cpp_interpret_string_1_tbuf;
+char *cpp_interpret_string_1_base;
+char cpp_interpret_string_1_limit;
+void cpp_interpret_string_1() {
+  char *p;
+  for (;;) {
+cpp_interpret_string_1_base = p;
+while (p < &cpp_interpret_string_1_limit && *p)
+  p++;
+if (p > cpp_interpret_string_1_base)
+  if (cpp_interpret_string_1_to)
+goto fail;
+if (p >= &cpp_interpret_string_1_limit)
+  break;
+int *tbuf_ptr =
+cpp_interpret_string_1_to ? &cpp_interpret_string_1_tbuf : __null;
+convert_escape(tbuf_ptr);
+  }
+fail:
+  ;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 4505e5d87ddb..9a55a5611ccc 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3197,7 +3197,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   prob_prolog = prob_epilog = profile_probability::guessed_always ()
.apply_scale (estimated_vf - 1, estimated_vf);
 
-  class loop *prolog, *epilog = NULL;
+  class loop *prolog = NULL, *epilog = NULL;
   class loop *first_loop = loop;
   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
 
@@ -3464,6 +3464,30 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
  skip_e = guard_e;
  e = EDGE_PRED (guard_to, 0);
  e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
+
+ /* Handle any remaining dominator updates needed after
+inserting the loop skip edge above.  */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && prolog_peeling)
+   {
+ /* Adding a skip edge to skip a loop with multiple exits
+means the dominator of the join blocks for all exits shifts
+from the prolog skip guard to the loop skip guard.  */
+ auto prolog_skip_bb
+   = single_pred (loop_preheader_edge (prolog)->src);
+ auto needs_update
+   = get_dominated_by (CDI_DOMINATORS, prolog_skip_bb);
+
+ /* Update everything except for the immediate children of
+the prolog skip block (the prolog and vector preheaders).
+Those should remain dominated by the prolog skip block itself,
+since the loop guard edge goes to the epilogue.  */
+ for (auto bb : needs_update)
+   if (bb != EDGE_SUCC (prolog_skip_bb, 0)->dest
+   && bb != EDGE_SUCC (prolog_skip_bb, 1)->dest)
+ set_immediate_dominator (CDI_DOMINATORS, bb, guard_bb);
+   }
+
  slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
 
  /* Simply propagate profile info from guard_bb to guard_to which is


[gcc r15-6808] vect: Don't guard scalar epilogue for inverted loops [PR118211]

2025-01-10 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:0a46245174123ad2802753e7fee689a541570ca0

commit r15-6808-g0a46245174123ad2802753e7fee689a541570ca0
Author: Alex Coplan 
Date:   Fri Jun 7 11:13:02 2024 +

vect: Don't guard scalar epilogue for inverted loops [PR118211]

For loops with LOOP_VINFO_EARLY_BREAKS_VECT_PEELED we should always
enter the scalar epilogue, so avoid emitting a guard on entry to the
epilogue.

gcc/ChangeLog:

PR tree-optimization/118211
PR tree-optimization/116126
* tree-vect-loop-manip.cc (vect_do_peeling): Avoid emitting an
epilogue guard for inverted early-exit loops.

Diff:
---
 gcc/tree-vect-loop-manip.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 15cac0fe27df..4505e5d87ddb 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3530,7 +3530,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
 
   /* If we have a peeled vector iteration we will never skip the epilog 
loop
 and we can simplify the cfg a lot by not doing the edge split.  */
-  if (skip_epilog || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+  if (skip_epilog
+ || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
{
  guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
niters, niters_vector_mult_vf);