[gcc r13-9305] c++: ICE with variable template and [[deprecated]] [PR110031]
https://gcc.gnu.org/g:605803cf4089955c39dcbed97b343550581b8eed commit r13-9305-g605803cf4089955c39dcbed97b343550581b8eed Author: Marek Polacek Date: Fri Jan 10 17:29:36 2025 -0500 c++: ICE with variable template and [[deprecated]] [PR110031] lookup_and_finish_template_variable already has and uses the complain parameter but it is not passing it down to mark_used so we got the default tf_warning_or_error, which causes various problems when lookup_and_finish_template_variable gets called with complain=tf_none. PR c++/110031 gcc/cp/ChangeLog: * pt.cc (lookup_and_finish_template_variable): Pass complain to mark_used. gcc/testsuite/ChangeLog: * g++.dg/cpp1z/inline-var11.C: New test. Diff: --- gcc/cp/pt.cc | 2 +- gcc/testsuite/g++.dg/cpp1z/inline-var11.C | 32 +++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index ddfa3c25d10e..bb53d9881405 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -10427,7 +10427,7 @@ lookup_and_finish_template_variable (tree templ, tree targs, deduction to work. */ complain &= ~tf_partial; var = finish_template_variable (var, complain); - mark_used (var); + mark_used (var, complain); return convert_from_reference (var); } diff --git a/gcc/testsuite/g++.dg/cpp1z/inline-var11.C b/gcc/testsuite/g++.dg/cpp1z/inline-var11.C new file mode 100644 index ..d92911ed3a93 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/inline-var11.C @@ -0,0 +1,32 @@ +// PR c++/110031 +// { dg-do compile { target c++17 } } + +template +[[deprecated]] +inline constexpr bool t = true ; + +template +struct enableif; + +template<> +struct enableif +{ +using y = int; +}; +template +using enableif_t = typename enableif::y; + +template > = 0> // { dg-warning "deprecated" } +struct A { A(T &&) { }}; + +template +struct A { + A(T &&) = delete; + A() = delete; +}; + +int main(void) +{ + A a(5.3); // { dg-error "use of deleted function" } + return 0; +}
[gcc r15-6817] LoongArch: Generate the final immediate for lu12i.w, lu32i.d and lu52i.d
https://gcc.gnu.org/g:f30423ea8c2152dcee91056e75a4f3736cce6a6e commit r15-6817-gf30423ea8c2152dcee91056e75a4f3736cce6a6e Author: mengqinggang Date: Fri Jan 10 10:27:09 2025 +0800 LoongArch: Generate the final immediate for lu12i.w, lu32i.d and lu52i.d Generate 0x1010 instead of 0x101>>12 for lu12i.w. lu32i.d and lu52i.d use the same processing. gcc/ChangeLog: * config/loongarch/lasx.md: Use new loongarch_output_move. * config/loongarch/loongarch-protos.h (loongarch_output_move): Change parameters from (rtx, rtx) to (rtx *). * config/loongarch/loongarch.cc (loongarch_output_move): Generate final immediate for lu12i.w and lu52i.d. * config/loongarch/loongarch.md: Generate final immediate for lu32i.d and lu52i.d. * config/loongarch/lsx.md: Use new loongarch_output_move. gcc/testsuite/ChangeLog: * gcc.target/loongarch/imm-load.c: Not generate ">>". Diff: --- gcc/config/loongarch/lasx.md | 2 +- gcc/config/loongarch/loongarch-protos.h | 2 +- gcc/config/loongarch/loongarch.cc | 14 --- gcc/config/loongarch/loongarch.md | 34 +-- gcc/config/loongarch/lsx.md | 2 +- gcc/testsuite/gcc.target/loongarch/imm-load.c | 1 + 6 files changed, 36 insertions(+), 19 deletions(-) diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index edaf64eeb959..a37c85a25a4b 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -723,7 +723,7 @@ [(set (match_operand:LASX 0 "nonimmediate_operand" "=f,f,R,*r,*f") (match_operand:LASX 1 "move_operand" "fYGYI,R,f,*f,*r"))] "ISA_HAS_LASX" - { return loongarch_output_move (operands[0], operands[1]); } + { return loongarch_output_move (operands); } [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert") (set_attr "mode" "") (set_attr "length" "8,4,4,4,4")]) diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index fb544ad75ca1..6601f767dab4 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -86,7 +86,7 @@ extern void loongarch_split_move (rtx, rtx); extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); extern void loongarch_split_plus_constant (rtx *, machine_mode); extern void loongarch_split_vector_move (rtx, rtx); -extern const char *loongarch_output_move (rtx, rtx); +extern const char *loongarch_output_move (rtx *); #ifdef RTX_CODE extern void loongarch_expand_scc (rtx *); extern void loongarch_expand_vec_cmp (rtx *); diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 24c19031026a..9d97f0216f0d 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -4721,8 +4721,10 @@ loongarch_split_vector_move (rtx dest, rtx src) that SRC is operand 1 and DEST is operand 0. */ const char * -loongarch_output_move (rtx dest, rtx src) +loongarch_output_move (rtx *operands) { + rtx src = operands[1]; + rtx dest = operands[0]; enum rtx_code dest_code = GET_CODE (dest); enum rtx_code src_code = GET_CODE (src); machine_mode mode = GET_MODE (dest); @@ -4877,13 +4879,19 @@ loongarch_output_move (rtx dest, rtx src) if (src_code == CONST_INT) { if (LU12I_INT (src)) - return "lu12i.w\t%0,%1>>12\t\t\t# %X1"; + { + operands[1] = GEN_INT (INTVAL (operands[1]) >> 12); + return "lu12i.w\t%0,%1\t\t\t# %X1"; + } else if (IMM12_INT (src)) return "addi.w\t%0,$r0,%1\t\t\t# %X1"; else if (IMM12_INT_UNSIGNED (src)) return "ori\t%0,$r0,%1\t\t\t# %X1"; else if (LU52I_INT (src)) - return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1"; + { + operands[1] = GEN_INT (INTVAL (operands[1]) >> 52); + return "lu52i.d\t%0,$r0,%X1\t\t\t# %1"; + } else gcc_unreachable (); } diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 3eff4077160e..59f457703110 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -2209,7 +2209,7 @@ "!TARGET_64BIT && (register_operand (operands[0], DImode) || reg_or_0_operand (operands[1], DImode))" - { return loongarch_output_move (operands[0], operands[1]); } + { return loongarch_output_move (operands); } "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO (operands[0]))" [(const_int 0)] @@ -2228,7 +2228,9 @@ "TARGET_64BIT && (register_operand (operands[0], DImode) || reg_or_0_operand (operands[1], DImode))" - { return loongarch_output_move (operands[0], operands[1]); } + { +return loongarch_output_move (operands);
[gcc r14-11201] Fortran: Cray pointer comparison wrongly optimized away [PR106692]
https://gcc.gnu.org/g:5ae344e3acabf11cde001419f9bec64a2cf89f5a commit r14-11201-g5ae344e3acabf11cde001419f9bec64a2cf89f5a Author: Harald Anlauf Date: Thu Jan 2 20:22:23 2025 +0100 Fortran: Cray pointer comparison wrongly optimized away [PR106692] PR fortran/106692 gcc/fortran/ChangeLog: * trans-expr.cc (gfc_conv_expr_op): Inhibit excessive optimization of Cray pointers by treating them as volatile in comparisons. gcc/testsuite/ChangeLog: * gfortran.dg/cray_pointers_13.f90: New test. (cherry picked from commit c7754a2fb2e60987524947fe189f3ffac035ea1d) Diff: --- gcc/fortran/trans-expr.cc | 13 +++ gcc/testsuite/gfortran.dg/cray_pointers_13.f90 | 51 ++ 2 files changed, 64 insertions(+) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 10eade22f2a2..8e74fbfb257d 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -4025,6 +4025,19 @@ gfc_conv_expr_op (gfc_se * se, gfc_expr * expr) if (lop) { + // Inhibit overeager optimization of Cray pointer comparisons (PR106692). + if (expr->value.op.op1->expr_type == EXPR_VARIABLE + && expr->value.op.op1->ts.type == BT_INTEGER + && expr->value.op.op1->symtree + && expr->value.op.op1->symtree->n.sym->attr.cray_pointer) + TREE_THIS_VOLATILE (lse.expr) = 1; + + if (expr->value.op.op2->expr_type == EXPR_VARIABLE + && expr->value.op.op2->ts.type == BT_INTEGER + && expr->value.op.op2->symtree + && expr->value.op.op2->symtree->n.sym->attr.cray_pointer) + TREE_THIS_VOLATILE (rse.expr) = 1; + /* The result of logical ops is always logical_type_node. */ tmp = fold_build2_loc (input_location, code, logical_type_node, lse.expr, rse.expr); diff --git a/gcc/testsuite/gfortran.dg/cray_pointers_13.f90 b/gcc/testsuite/gfortran.dg/cray_pointers_13.f90 new file mode 100644 index ..766d24546ab2 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/cray_pointers_13.f90 @@ -0,0 +1,51 @@ +! { dg-do run } +! { dg-additional-options "-fcray-pointer" } +! +! PR fortran/106692 - Cray pointer comparison wrongly optimized away +! +! Contributed by Marek Polacek + +program test + call test_cray() + call test_cray2() +end + +subroutine test_cray() + pointer(ptrzz1 , zz1) + ptrzz1=0 + if (ptrzz1 .ne. 0) then +print *, "test_cray: ptrzz1=", ptrzz1 +stop 1 + else +call shape_cray(zz1) + end if +end + +subroutine shape_cray(zz1) + pointer(ptrzz , zz) + ptrzz=loc(zz1) + if (ptrzz .ne. 0) then +print *, "shape_cray: ptrzz=", ptrzz +stop 3 + end if +end + +subroutine test_cray2() + pointer(ptrzz1 , zz1) + ptrzz1=0 + if (0 == ptrzz1) then +call shape_cray2(zz1) + else +print *, "test_cray2: ptrzz1=", ptrzz1 +stop 2 + end if +end + +subroutine shape_cray2(zz1) + pointer(ptrzz , zz) + ptrzz=loc(zz1) + if (.not. (0 == ptrzz)) then +print *, "shape_cray2: ptrzz=", ptrzz +stop 4 + end if +end
[gcc r14-11200] libstdc++: backport inline keyword on std::find
https://gcc.gnu.org/g:e4a9fb7448a687f4fd7e621942006c2820b803d6 commit r14-11200-ge4a9fb7448a687f4fd7e621942006c2820b803d6 Author: Tamar Christina Date: Fri Jan 10 21:37:40 2025 + libstdc++: backport inline keyword on std::find This is a backport version of the same patch as g:18aff7644ad1e44dc146d36a2b7e397977aa47ac In GCC 12 there was a ~40% regression in the performance of hashmap->find. This regression came about accidentally: Before GCC 12 the find function was small enough that IPA would inline it even though it wasn't marked inline. In GCC-12 an optimization was added to perform a linear search when the entries in the hashmap are small. This increased the size of the function enough that IPA would no longer inline. Inlining had two benefits: 1. The return value is a reference. so it has to be returned and dereferenced even though the search loop may have already dereference it. 2. The pattern is a hard pattern to track for branch predictors. This causes a large number of branch misses if the value is immediately checked and branched on. i.e. if (a != m.end()) which is a common pattern. The patch fixes both these issues by adding the inline keyword to _M_locate to allow the inliner to consider inlining again. This and the other patches have been ran through serveral benchmarks where the size, number of elements searched for and type (reference vs value) etc were tested. The change shows no statistical regression, but an average find improvement of ~27% and a range between ~10-60% improvements. Thanks, Tamar libstdc++-v3/ChangeLog: * include/bits/hashtable.h (find): Add inline keyword. Diff: --- libstdc++-v3/include/bits/hashtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 834288c747c2..f5f421d2fd32 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -1723,7 +1723,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _ExtractKey, typename _Equal, typename _Hash, typename _RangeHash, typename _Unused, typename _RehashPolicy, typename _Traits> -auto +auto inline _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: find(const key_type& __k) @@ -1746,7 +1746,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _ExtractKey, typename _Equal, typename _Hash, typename _RangeHash, typename _Unused, typename _RehashPolicy, typename _Traits> -auto +auto inline _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: find(const key_type& __k) const
[gcc r13-9303] libstdc++: backport inline keyword on std::find
https://gcc.gnu.org/g:f00e19a0491223d2782f9f863a4f3a31d509f76b commit r13-9303-gf00e19a0491223d2782f9f863a4f3a31d509f76b Author: Tamar Christina Date: Fri Jan 10 21:37:40 2025 + libstdc++: backport inline keyword on std::find This is a backport version of the same patch as g:18aff7644ad1e44dc146d36a2b7e397977aa47ac In GCC 12 there was a ~40% regression in the performance of hashmap->find. This regression came about accidentally: Before GCC 12 the find function was small enough that IPA would inline it even though it wasn't marked inline. In GCC-12 an optimization was added to perform a linear search when the entries in the hashmap are small. This increased the size of the function enough that IPA would no longer inline. Inlining had two benefits: 1. The return value is a reference. so it has to be returned and dereferenced even though the search loop may have already dereference it. 2. The pattern is a hard pattern to track for branch predictors. This causes a large number of branch misses if the value is immediately checked and branched on. i.e. if (a != m.end()) which is a common pattern. The patch fixes both these issues by adding the inline keyword to _M_locate to allow the inliner to consider inlining again. This and the other patches have been ran through serveral benchmarks where the size, number of elements searched for and type (reference vs value) etc were tested. The change shows no statistical regression, but an average find improvement of ~27% and a range between ~10-60% improvements. Thanks, Tamar libstdc++-v3/ChangeLog: * include/bits/hashtable.h (find): Add inline keyword. (cherry picked from commit e4a9fb7448a687f4fd7e621942006c2820b803d6) Diff: --- libstdc++-v3/include/bits/hashtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 1b5d0a7f42f4..c9ae0ed2c013 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -1660,7 +1660,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _ExtractKey, typename _Equal, typename _Hash, typename _RangeHash, typename _Unused, typename _RehashPolicy, typename _Traits> -auto +auto inline _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: find(const key_type& __k) @@ -1683,7 +1683,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _ExtractKey, typename _Equal, typename _Hash, typename _RangeHash, typename _Unused, typename _RehashPolicy, typename _Traits> -auto +auto inline _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: find(const key_type& __k) const
[gcc r15-6766] testsuite: arm: Use -std=c17 and effective-target arm_arch_v5te_thumb
https://gcc.gnu.org/g:f447c3c0dff4c24acc4c3130925b95ff401cb1ec commit r15-6766-gf447c3c0dff4c24acc4c3130925b95ff401cb1ec Author: Torbjörn SVENSSON Date: Fri Dec 27 09:18:36 2024 +0100 testsuite: arm: Use -std=c17 and effective-target arm_arch_v5te_thumb With -std=c23, the following errors are now emitted as the function prototype and implementation does not match: .../pr59858.c: In function 're_search_internal': .../pr59858.c:95:17: error: too many arguments to function 'check_matching' .../pr59858.c:75:12: note: declared here .../pr59858.c: At top level: .../pr59858.c:100:1: error: conflicting types for 'check_matching'; have 'int(re_match_context_t *, int *)' .../pr59858.c:75:12: note: previous declaration of 'check_matching' with type 'int(void)' .../pr59858.c: In function 'check_matching': .../pr59858.c:106:14: error: too many arguments to function 'transit_state' .../pr59858.c:77:23: note: declared here .../pr59858.c: At top level: .../pr59858.c:111:1: error: conflicting types for 'transit_state'; have 're_dfastate_t *(re_match_context_t *, re_dfastate_t *)' .../pr59858.c:77:23: note: previous declaration of 'transit_state' with type 're_dfastate_t *(void)' .../pr59858.c: In function 'transit_state': .../pr59858.c:116:7: error: too many arguments to function 'build_trtable' .../pr59858.c:79:12: note: declared here .../pr59858.c: At top level: .../pr59858.c:121:1: error: conflicting types for 'build_trtable'; have 'int(const re_dfa_t *, re_dfastate_t *)' .../pr59858.c:79:12: note: previous declaration of 'build_trtable' with type 'int(void)' Adding -std=c17 removes these errors. Also, updated test case to use -mcpu=unset/-march=unset feature introduced in r15-3606-g7d6c6a0d15c. gcc/testsuite/ChangeLog: * gcc.target/arm/pr59858.c: Use -std=c17 and effective-target arm_arch_v5te_thumb. Signed-off-by: Torbjörn SVENSSON Diff: --- gcc/testsuite/gcc.target/arm/pr59858.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/arm/pr59858.c b/gcc/testsuite/gcc.target/arm/pr59858.c index 9336edfce277..8fc63b57af4c 100644 --- a/gcc/testsuite/gcc.target/arm/pr59858.c +++ b/gcc/testsuite/gcc.target/arm/pr59858.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-march=armv5te -fno-builtin -mfloat-abi=soft -mthumb -fno-stack-protector -Os -fno-tree-loop-optimize -fno-tree-dominator-opts -fPIC -w -fpermissive" } */ +/* { dg-options "-std=c17 -fno-builtin -fno-stack-protector -Os -fno-tree-loop-optimize -fno-tree-dominator-opts -fPIC -w -fpermissive" } */ /* { dg-require-effective-target fpic } */ -/* { dg-skip-if "Incompatible command line options: -mfloat-abi=soft -mfloat-abi=hard" { *-*-* } { "-mfloat-abi=hard" } { "" } } */ /* { dg-require-effective-target arm_arch_v5te_thumb_ok } */ +/* { dg-add-options arm_arch_v5te_thumb } */ typedef enum { REG_ENOSYS = -1,
[gcc r15-6767] nvptx: Add '__builtin_stack_address()' test case
https://gcc.gnu.org/g:91dec10f8b7502bdd333d75ab7a9e23a58c3f32d commit r15-6767-g91dec10f8b7502bdd333d75ab7a9e23a58c3f32d Author: Thomas Schwinge Date: Fri Dec 13 11:40:01 2024 +0100 nvptx: Add '__builtin_stack_address()' test case Documenting the status quo. gcc/testsuite/ * gcc.target/nvptx/__builtin_stack_address-1.c: New. Diff: --- .../gcc.target/nvptx/__builtin_stack_address-1.c | 36 ++ 1 file changed, 36 insertions(+) diff --git a/gcc/testsuite/gcc.target/nvptx/__builtin_stack_address-1.c b/gcc/testsuite/gcc.target/nvptx/__builtin_stack_address-1.c new file mode 100644 index ..5e976dc384bc --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/__builtin_stack_address-1.c @@ -0,0 +1,36 @@ +/* Document what we do for '__builtin_stack_address()'. */ + +/* { dg-do compile } + TODO We can't 'assemble' this -- it's invalid PTX code. */ +/* { dg-options -O3 } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { check-function-bodies {** } {} } } */ + +void sink(void *); + +void f(void) +{ + void *p; + p = __builtin_stack_address(); + sink(p); +} +/* +** f: +** \.visible \.func f +** { +** { +** \.param\.u64 %out_arg1; +** st\.param\.u64 \[%out_arg1\], %stack; +** call sink, \(%out_arg1\); +** } +** ret; +*/ + +/* The concept of a '%stack' pointer doesn't apply like this for + '-mno-soft-stack': PTX "native" stacks (TODO), and for '-msoft-stack' in + this form also constitutes invalid PTX code (TODO). + + { dg-final { scan-assembler-not {%stack} { xfail *-*-* } } } */ + +/* As this is an internal-use built-in function, we don't bother with + emitting proper error diagnostics. */
[gcc r15-6759] c++: Fix up modules handling of namespace scope structured bindings
https://gcc.gnu.org/g:933f0c20d4ce1dba85e85d9d117cfd9f5376a945 commit r15-6759-g933f0c20d4ce1dba85e85d9d117cfd9f5376a945 Author: Jakub Jelinek Date: Fri Jan 10 10:31:12 2025 +0100 c++: Fix up modules handling of namespace scope structured bindings With the following patch I actually get a simple namespace scope structured binding working with modules. The core_vals change ensure we actually save/restore DECL_VALUE_EXPR even for namespace scope vars, the get_merge_kind is based on the assumption that structured bindings are always unique, one can't redeclare them and without it we really ICE because their base vars have no name. 2025-01-10 Jakub Jelinek * module.cc (trees_out::core_vals): Note DECL_VALUE_EXPR even for vars outside of functions. (trees_in::core_vals): Read in DECL_VALUE_EXPR even for vars outside of functions. (trees_out::get_merge_kind): Make DECL_DECOMPOSITION_P MK_unique. * g++.dg/modules/decomp-2_b.C: New test. * g++.dg/modules/decomp-2_a.H: New file. Diff: --- gcc/cp/module.cc | 21 +++-- gcc/testsuite/g++.dg/modules/decomp-2_a.H | 11 +++ gcc/testsuite/g++.dg/modules/decomp-2_b.C | 11 +++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index fec820603521..7288c46a7baa 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -6318,7 +6318,11 @@ trees_out::core_vals (tree t) case VAR_DECL: if (DECL_CONTEXT (t) && TREE_CODE (DECL_CONTEXT (t)) != FUNCTION_DECL) - break; + { + if (DECL_HAS_VALUE_EXPR_P (t)) + WT (DECL_VALUE_EXPR (t)); + break; + } /* FALLTHROUGH */ case RESULT_DECL: @@ -6848,7 +6852,14 @@ trees_in::core_vals (tree t) case VAR_DECL: if (DECL_CONTEXT (t) && TREE_CODE (DECL_CONTEXT (t)) != FUNCTION_DECL) - break; + { + if (DECL_HAS_VALUE_EXPR_P (t)) + { + tree val = tree_node (); + SET_DECL_VALUE_EXPR (t, val); + } + break; + } /* FALLTHROUGH */ case RESULT_DECL: @@ -10990,6 +11001,12 @@ trees_out::get_merge_kind (tree decl, depset *dep) break; } + if (DECL_DECOMPOSITION_P (decl)) + { + mk = MK_unique; + break; + } + if (IDENTIFIER_ANON_P (DECL_NAME (decl))) { if (RECORD_OR_UNION_TYPE_P (ctx)) diff --git a/gcc/testsuite/g++.dg/modules/decomp-2_a.H b/gcc/testsuite/g++.dg/modules/decomp-2_a.H new file mode 100644 index ..df2d82abcbc1 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/decomp-2_a.H @@ -0,0 +1,11 @@ +// { dg-additional-options -fmodule-header } +// { dg-module-cmi {} } + +struct A { + int a, b, c; +}; + +namespace { +A d = { 1, 2, 3 }; +auto [a, b, c] = d; +} diff --git a/gcc/testsuite/g++.dg/modules/decomp-2_b.C b/gcc/testsuite/g++.dg/modules/decomp-2_b.C new file mode 100644 index ..0353c8e87c93 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/decomp-2_b.C @@ -0,0 +1,11 @@ +// { dg-do run } +// { dg-additional-options "-fmodules-ts" } + +import "decomp-2_a.H"; + +int +main () +{ + if (a != 1 || b != 2 || c != 3) +__builtin_abort (); +}
[gcc r15-6758] fortran: use_iso_fortran_env_module tweaks [PR118337]
https://gcc.gnu.org/g:f5e488c0ee663c2355e6d712ffc15da215d9cd96 commit r15-6758-gf5e488c0ee663c2355e6d712ffc15da215d9cd96 Author: Jakub Jelinek Date: Fri Jan 10 10:28:18 2025 +0100 fortran: use_iso_fortran_env_module tweaks [PR118337] This patch adds a comment to explain why we initialize the non-constant elts of symbol array separately and checking assert to verify that separate initialization bumps the iterator for each macro. 2025-01-10 Jakub Jelinek PR fortran/118337 * module.cc (use_iso_fortran_env_module): Add a comment explaining the optimization performed. Add gcc_checking_assert that i was incremented for all the elements. Formatting fix. Diff: --- gcc/fortran/module.cc | 17 - 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/gcc/fortran/module.cc b/gcc/fortran/module.cc index 63d0cdbee9f7..490eaa97a49d 100644 --- a/gcc/fortran/module.cc +++ b/gcc/fortran/module.cc @@ -7122,6 +7122,13 @@ use_iso_fortran_env_module (void) #include "iso-fortran-env.def" { ISOFORTRANENV_INVALID, NULL, -1234, 0 } }; + /* We could have used c in the NAMED_{,U}INTCST macros + instead of 0, but then current g++ expands the initialization + as clearing the whole object followed by explicit stores of + all the non-zero elements (over 150), while by using 0s for + the non-constant initializers and initializing them afterwards + g++ will often copy everything from .rodata and then only override + over 30 non-constant ones. */ i = 0; #define NAMED_INTCST(a,b,c,d) symbol[i++].value = c; #define NAMED_UINTCST(a,b,c,d) symbol[i++].value = c; @@ -7130,6 +7137,7 @@ use_iso_fortran_env_module (void) #define NAMED_FUNCTION(a,b,c,d) i++; #define NAMED_SUBROUTINE(a,b,c,d) i++; #include "iso-fortran-env.def" + gcc_checking_assert (i == (int) ARRAY_SIZE (symbol) - 1); /* Generate the symbol for the module itself. */ mod_symtree = gfc_find_symtree (gfc_current_ns->sym_root, mod); @@ -7288,12 +7296,11 @@ use_iso_fortran_env_module (void) break; #define NAMED_FUNCTION(a,b,c,d) \ - case a: + case a: #include "iso-fortran-env.def" - create_intrinsic_function (symbol[i].name, symbol[i].id, mod, -INTMOD_ISO_FORTRAN_ENV, false, -NULL); - break; + create_intrinsic_function (symbol[i].name, symbol[i].id, mod, + INTMOD_ISO_FORTRAN_ENV, false, NULL); + break; default: gcc_unreachable ();
[gcc r15-6760] c++: Fix up ICEs on constexpr inline asm strings in templates [PR118277]
https://gcc.gnu.org/g:38a13ea4117b96e467f78b3f86d737ecbe326935 commit r15-6760-g38a13ea4117b96e467f78b3f86d737ecbe326935 Author: Jakub Jelinek Date: Fri Jan 10 10:32:36 2025 +0100 c++: Fix up ICEs on constexpr inline asm strings in templates [PR118277] The following patch fixes ICEs when the new inline asm syntax to use C++26 static_assert-like constant expressions in place of string literals is used in templates. As finish_asm_stmt doesn't do any checking for processing_template_decl, this patch also just defers handling those strings in templates rather than say trying fold_non_dependent_expr and if the result is non-dependent and usable, try to extract. The patch also reverts changes to cp_parser_asm_specification_opt which allowed something like void foo () asm ((std::string_view ("bar"))); but it would be really hard to support template void baz () asm ((std::string_view ("qux"))); (especially with dependent constant expression). And the patch adds extensive test coverage for the various errors. 2025-01-10 Jakub Jelinek PR c++/118277 * cp-tree.h (finish_asm_string_expression): Declare. * semantics.cc (finish_asm_string_expression): New function. (finish_asm_stmt): Use it. * parser.cc (cp_parser_asm_string_expression): Likewise. Wrap string into PAREN_EXPR in the ("") case. (cp_parser_asm_definition): Don't ICE if finish_asm_stmt returns error_mark_node. (cp_parser_asm_specification_opt): Revert 2024-06-24 changes. * pt.cc (tsubst_stmt): Don't ICE if finish_asm_stmt returns error_mark_node. * g++.dg/cpp1z/constexpr-asm-4.C: New test. * g++.dg/cpp1z/constexpr-asm-5.C: New test. Diff: --- gcc/cp/cp-tree.h | 1 + gcc/cp/parser.cc | 21 +- gcc/cp/pt.cc | 9 +- gcc/cp/semantics.cc | 43 gcc/testsuite/g++.dg/cpp1z/constexpr-asm-4.C | 83 ++ gcc/testsuite/g++.dg/cpp1z/constexpr-asm-5.C | 367 +++ 6 files changed, 509 insertions(+), 15 deletions(-) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index c08494705e9f..b65a2677b4ec 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -7947,6 +7947,7 @@ enum { extern tree begin_compound_stmt(unsigned int); extern void finish_compound_stmt (tree); +extern tree finish_asm_string_expression (location_t, tree); extern tree finish_asm_stmt(location_t, int, tree, tree, tree, tree, tree, bool, bool); extern tree finish_label_stmt (tree); diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index f548dc31c2b8..80bc2d8e9e1e 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -23107,15 +23107,11 @@ cp_parser_asm_string_expression (cp_parser *parser) matching_parens parens; parens.consume_open (parser); tree string = cp_parser_constant_expression (parser); - if (string != error_mark_node) - string = cxx_constant_value (string, tf_error); - cexpr_str cstr (string); - if (!cstr.type_check (tok->location)) - return error_mark_node; - if (!cstr.extract (tok->location, string)) - string = error_mark_node; parens.require_close (parser); - return string; + if (TREE_CODE (string) == STRING_CST) + string = build1_loc (tok->location, PAREN_EXPR, TREE_TYPE (string), +string); + return finish_asm_string_expression (tok->location, string); } else if (!cp_parser_is_string_literal (tok)) { @@ -23396,7 +23392,7 @@ cp_parser_asm_definition (cp_parser* parser) inputs, clobbers, labels, inline_p, false); /* If the extended syntax was not used, mark the ASM_EXPR. */ - if (!extended_p) + if (!extended_p && asm_stmt != error_mark_node) { tree temp = asm_stmt; if (TREE_CODE (temp) == CLEANUP_POINT_EXPR) @@ -30044,7 +30040,7 @@ cp_parser_yield_expression (cp_parser* parser) /* Parse an (optional) asm-specification. asm-specification: - asm ( asm-string-expr ) + asm ( string-literal ) If the asm-specification is present, returns a STRING_CST corresponding to the string-literal. Otherwise, returns @@ -30067,8 +30063,9 @@ cp_parser_asm_specification_opt (cp_parser* parser) parens.require_open (parser); /* Look for the string-literal. */ - tree asm_specification = cp_parser_asm_string_expression (parser); - + tree asm_specification = cp_parser_string_literal (parser, +/*translate=*/false, +
[gcc r15-6765] ada: Incorrect accessibilty level for library level subprograms
https://gcc.gnu.org/g:3ff216b7121f832c87eaa03ece327c1e113e155a commit r15-6765-g3ff216b7121f832c87eaa03ece327c1e113e155a Author: squirek Date: Fri Nov 1 16:33:02 2024 + ada: Incorrect accessibilty level for library level subprograms The patch fixes an issue in the compiler whereby accessibility level calculations for objects declared witihin library-level subprograms were done incorrectly - potentially allowing runtime accessibility checks to spuriously pass. gcc/ada/ChangeLog: * accessibility.adb: (Innermost_master_Scope_Depth): Add special case for expressions within library level subprograms. Diff: --- gcc/ada/accessibility.adb | 9 + 1 file changed, 9 insertions(+) diff --git a/gcc/ada/accessibility.adb b/gcc/ada/accessibility.adb index b808e88b128a..8c85173aa34c 100644 --- a/gcc/ada/accessibility.adb +++ b/gcc/ada/accessibility.adb @@ -187,6 +187,15 @@ package body Accessibility is or else (Nkind (Node_Par) = N_Object_Renaming_Declaration and then Comes_From_Iterator (Node_Par)) then + -- Handle the case of expressions within library level + -- subprograms here by adding one to the level modifier. + + if Encl_Scop = Standard_Standard +and then Nkind (Node_Par) = N_Subprogram_Body + then + Master_Lvl_Modifier := Master_Lvl_Modifier + 1; + end if; + -- Note that in some rare cases the scope depth may not be -- set, for example, when we are in the middle of analyzing -- a type and the enclosing scope is said type. In that case
[gcc r15-6761] ada: Reorder syntactic node fields to match the Ada RM grammar
https://gcc.gnu.org/g:2b27522090c4f98081db0dbfa035ae6501bbf498 commit r15-6761-g2b27522090c4f98081db0dbfa035ae6501bbf498 Author: Piotr Trojanek Date: Fri Dec 20 13:00:37 2024 +0100 ada: Reorder syntactic node fields to match the Ada RM grammar Several AST nodes had their syntactic fields in a different order than specified by the Ada RM grammar. With the variable-size nodes this no longer had an impact on the AST memory layout and was making the automatically generated Nmake routines a bit unintuitive to use. gcc/ada/ChangeLog: * exp_ch3.adb (Predef_Spec_Or_Body): Add explicit parameter associations, because now the Empty_List actual parameter would be confused as being for the Aspect_Specifications formal parameter. * gen_il-gen-gen_nodes.adb (Gen_Nodes): Reorder syntactic fields. * sem_util.adb (Declare_Indirect_Temp): Add explicit parameter association, because now the parameter will be interpreted as a subpool handle name. Diff: --- gcc/ada/exp_ch3.adb | 5 ++- gcc/ada/gen_il-gen-gen_nodes.adb | 78 gcc/ada/sem_util.adb | 9 ++--- 3 files changed, 48 insertions(+), 44 deletions(-) diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb index 6c69e63b2ddb..d95b91780306 100644 --- a/gcc/ada/exp_ch3.adb +++ b/gcc/ada/exp_ch3.adb @@ -12399,7 +12399,10 @@ package body Exp_Ch3 is -- on the body to add the appropriate stuff. elsif For_Body then - return Make_Subprogram_Body (Loc, Spec, Empty_List, Empty); + return Make_Subprogram_Body (Loc, + Specification => Spec, + Declarations => Empty_List, + Handled_Statement_Sequence => Empty); -- For the case of an Input attribute predefined for an abstract type, -- generate an abstract specification. This will never be called, but we diff --git a/gcc/ada/gen_il-gen-gen_nodes.adb b/gcc/ada/gen_il-gen-gen_nodes.adb index c512d85dbb26..ca46bcebdd98 100644 --- a/gcc/ada/gen_il-gen-gen_nodes.adb +++ b/gcc/ada/gen_il-gen-gen_nodes.adb @@ -377,10 +377,10 @@ begin -- Gen_IL.Gen.Gen_Nodes Sm (Is_Qualified_Universal_Literal, Flag))); Cc (N_Quantified_Expression, N_Subexpr, - (Sy (Iterator_Specification, Node_Id, Default_Empty), + (Sy (All_Present, Flag), +Sy (Iterator_Specification, Node_Id, Default_Empty), Sy (Loop_Parameter_Specification, Node_Id, Default_Empty), -Sy (Condition, Node_Id, Default_Empty), -Sy (All_Present, Flag))); +Sy (Condition, Node_Id, Default_Empty))); Cc (N_Aggregate, N_Subexpr, (Sy (Expressions, List_Id, Default_No_List), @@ -395,9 +395,9 @@ begin -- Gen_IL.Gen.Gen_Nodes Sm (Has_Self_Reference, Flag))); Cc (N_Allocator, N_Subexpr, - (Sy (Expression, Node_Id, Default_Empty), -Sy (Subpool_Handle_Name, Node_Id, Default_Empty), + (Sy (Subpool_Handle_Name, Node_Id, Default_Empty), Sy (Null_Exclusion_Present, Flag, Default_False), +Sy (Expression, Node_Id, Default_Empty), Sm (For_Special_Return_Object, Flag), Sm (Do_Storage_Check, Flag), Sm (Is_Dynamic_Coextension, Flag), @@ -494,11 +494,11 @@ begin -- Gen_IL.Gen.Gen_Nodes Sm (Prev_Ids, Flag))); Cc (N_Entry_Declaration, N_Declaration, - (Sy (Defining_Identifier, Node_Id), + (Sy (Must_Override, Flag), +Sy (Must_Not_Override, Flag), +Sy (Defining_Identifier, Node_Id), Sy (Discrete_Subtype_Definition, Node_Id, Default_Empty), Sy (Parameter_Specifications, List_Id, Default_No_List), -Sy (Must_Override, Flag), -Sy (Must_Not_Override, Flag), Sy (Aspect_Specifications, List_Id, Default_No_List), Sm (Corresponding_Body, Node_Id))); @@ -513,8 +513,8 @@ begin -- Gen_IL.Gen.Gen_Nodes Sy (In_Present, Flag), Sy (Out_Present, Flag), Sy (Null_Exclusion_Present, Flag, Default_False), -Sy (Subtype_Mark, Node_Id, Default_Empty), Sy (Access_Definition, Node_Id, Default_Empty), +Sy (Subtype_Mark, Node_Id, Default_Empty), Sy (Default_Expression, Node_Id, Default_Empty), Sy (Aspect_Specifications, List_Id, Default_No_List), Sm (More_Ids, Flag), @@ -545,17 +545,17 @@ begin -- Gen_IL.Gen.Gen_Nodes Cc (N_Iterator_Specification, N_Declaration, (Sy (Defining_Identifier, Node_Id), -Sy (Name, Node_Id, Default_Empty), -Sy (Reverse_Present, Flag), +Sy (Subtype_Indication, Node_Id, Default_Empty), Sy (Of_Present, Flag), -Sy (Iterator_Filter, Node_Id, Default_Empty), -Sy (Subtype_Indication, Node_Id, Default_Empty))); +Sy (Reverse_Present, Flag), +Sy (Name, Node_Id, Default_Empty), +Sy (Iterator_Filter, No
[gcc r15-6763] ada: Set syntactic node properties immediately when crating the nodes
https://gcc.gnu.org/g:c43a53307d2e26663e9a2aca39672aea39c25e7b commit r15-6763-gc43a53307d2e26663e9a2aca39672aea39c25e7b Author: Piotr Trojanek Date: Mon Dec 23 10:05:47 2024 +0100 ada: Set syntactic node properties immediately when crating the nodes When creating a node, we can directly set its syntactic properties. Code cleanup; semantics is unaffected. gcc/ada/ChangeLog: * contracts.adb (Build_Call_Helper_Decl): Tune whitespace. * exp_attr.adb (Analyze_Attribute): Set Of_Present while creating the node; reorder setting Subtype_Indication to match the syntax order. * exp_ch3.adb (Build_Equivalent_Aggregate): Likewise for Box_Present and Expression properties. * sem_ch12.adb (Analyze_Formal_Derived_Type): Set type properties when creating the nodes. * sem_ch3.adb (Check_Anonymous_Access_Component): Likewise. Diff: --- gcc/ada/contracts.adb | 4 ++-- gcc/ada/exp_attr.adb | 8 gcc/ada/exp_ch3.adb | 5 ++--- gcc/ada/sem_ch12.adb | 15 +-- gcc/ada/sem_ch3.adb | 13 ++--- 5 files changed, 19 insertions(+), 26 deletions(-) diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb index 1c9161b8a37e..8b94a67639f2 100644 --- a/gcc/ada/contracts.adb +++ b/gcc/ada/contracts.adb @@ -4066,8 +4066,8 @@ package body Contracts is begin Spec := Build_Call_Helper_Spec (Helper_Id); -Set_Must_Override (Spec, False); -Set_Must_Not_Override (Spec, False); +Set_Must_Override (Spec, False); +Set_Must_Not_Override (Spec, False); Set_Is_Inlined (Helper_Id); Set_Is_Public (Helper_Id); diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb index cc42d6470601..b896228a70e3 100644 --- a/gcc/ada/exp_attr.adb +++ b/gcc/ada/exp_attr.adb @@ -6422,10 +6422,10 @@ package body Exp_Attr is begin Iter := Make_Iterator_Specification (Loc, -Defining_Identifier => Elem, -Name => Relocate_Node (Prefix (N)), -Subtype_Indication => Empty); - Set_Of_Present (Iter); + Defining_Identifier => Elem, + Subtype_Indication => Empty, + Of_Present => True, + Name=> Relocate_Node (Prefix (N))); New_Loop := Make_Loop_Statement (Loc, Iteration_Scheme => diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb index d95b91780306..0dfd8102df18 100644 --- a/gcc/ada/exp_ch3.adb +++ b/gcc/ada/exp_ch3.adb @@ -1349,9 +1349,8 @@ package body Exp_Ch3 is Append_To (Component_Associations (Aggr), Make_Component_Association (Loc, - Choices=> New_List (Make_Others_Choice (Loc)), - Expression => Empty)); - Set_Box_Present (Last (Component_Associations (Aggr))); + Choices => New_List (Make_Others_Choice (Loc)), + Box_Present => True)); if Typ /= Full_Typ then Analyze_And_Resolve (Aggr, Full_View (Base_Type (Full_Typ))); diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb index 088a9ccfb589..dad8c73729e9 100644 --- a/gcc/ada/sem_ch12.adb +++ b/gcc/ada/sem_ch12.adb @@ -3097,13 +3097,11 @@ package body Sem_Ch12 is Defining_Identifier => T, Discriminant_Specifications => Discriminant_Specifications (N), Unknown_Discriminants_Present => Unk_Disc, + Abstract_Present => Abstract_Present (Def), + Limited_Present => Limited_Present (Def), Subtype_Indication=> Subtype_Mark (Def), + Synchronized_Present => Synchronized_Present (Def), Interface_List=> Interface_List (Def)); - - Set_Abstract_Present (New_N, Abstract_Present (Def)); - Set_Limited_Present (New_N, Limited_Present (Def)); - Set_Synchronized_Present (New_N, Synchronized_Present (Def)); - else New_N := Make_Full_Type_Declaration (Loc, @@ -3112,12 +3110,9 @@ package body Sem_Ch12 is Discriminant_Specifications (Parent (T)), Type_Definition => Make_Derived_Type_Definition (Loc, + Abstract_Present => Abstract_Present (Def), + Limited_Present=> Limited_Present (Def), Subtype_Indication => Subtype_Mark (Def))); - - Set_Abstract_Present - (Type_Definition (New_N), Abstract_Present (Def)); - Set_Limited_Present - (Type_Definition (New_N), Limited_Present (Def)); end if; Rewrite (N, New_N); diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem
[gcc r15-6764] ada: Remove empty line.
https://gcc.gnu.org/g:c92f9f0d949c5c36fbd257a80644ae7231e396fd commit r15-6764-gc92f9f0d949c5c36fbd257a80644ae7231e396fd Author: Marc Poulhiès Date: Thu Jan 2 16:03:32 2025 +0100 ada: Remove empty line. gcc/ada/ChangeLog: * env.h: Remove last empty line. Diff: --- gcc/ada/env.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gcc/ada/env.h b/gcc/ada/env.h index b80b7e9a0fca..58a92b9d7f23 100644 --- a/gcc/ada/env.h +++ b/gcc/ada/env.h @@ -33,5 +33,4 @@ extern void __gnat_getenv (char *name, int *len, char **value); extern void __gnat_setenv (char *name, char *value); extern char **__gnat_environ (void); extern void __gnat_unsetenv (char *name); -extern void __gnat_clearenv (void); - +extern void __gnat_clearenv(void);
[gcc r15-6762] ada: Turn Is_Effective_Use_Clause from syntactic to semantic flag
https://gcc.gnu.org/g:8c850ddf0f2648e9be6067bb76cc2322c1a8b6a5 commit r15-6762-g8c850ddf0f2648e9be6067bb76cc2322c1a8b6a5 Author: Piotr Trojanek Date: Fri Dec 20 13:09:22 2024 +0100 ada: Turn Is_Effective_Use_Clause from syntactic to semantic flag For a USE clause being effective is a semantic property, not a syntactic. AST cleanup; behavior is unaffected. gcc/ada/ChangeLog: * gen_il-gen-gen_nodes.adb (Gen_Nodes): Change Is_Effective_Use_Clause from syntactic to semantic property. Diff: --- gcc/ada/gen_il-gen-gen_nodes.adb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/ada/gen_il-gen-gen_nodes.adb b/gcc/ada/gen_il-gen-gen_nodes.adb index ca46bcebdd98..1f5dc6d3803e 100644 --- a/gcc/ada/gen_il-gen-gen_nodes.adb +++ b/gcc/ada/gen_il-gen-gen_nodes.adb @@ -782,7 +782,7 @@ begin -- Gen_IL.Gen.Gen_Nodes Cc (N_Use_Package_Clause, N_Later_Decl_Item, (Sy (Name, Node_Id, Default_Empty), -Sy (Is_Effective_Use_Clause, Flag), +Sm (Is_Effective_Use_Clause, Flag), Sm (Entity_Or_Associated_Node, Node_Id), -- just Associated_Node Sm (Hidden_By_Use_Clause, Elist_Id), Sm (More_Ids, Flag), @@ -1497,8 +1497,8 @@ begin -- Gen_IL.Gen.Gen_Nodes Cc (N_Use_Type_Clause, Node_Kind, (Sy (Subtype_Mark, Node_Id, Default_Empty), -Sy (Is_Effective_Use_Clause, Flag), Sy (All_Present, Flag), +Sm (Is_Effective_Use_Clause, Flag), Sm (Hidden_By_Use_Clause, Elist_Id), Sm (More_Ids, Flag), Sm (Next_Use_Clause, Node_Id),
[gcc r15-6769] ipa-cp: Fold-convert values when necessary (PR 118138)
https://gcc.gnu.org/g:d019ab4f115caab48316c185c007765719e93052 commit r15-6769-gd019ab4f115caab48316c185c007765719e93052 Author: Martin Jambor Date: Sat Jan 4 20:40:07 2025 +0100 ipa-cp: Fold-convert values when necessary (PR 118138) PR 118138 and quite a few duplicates that it has acquired in a short time show that even though we are careful to make sure we do not loose any bits when newly allowing type conversions in jump-functions, we still need to perform the fold conversions during IPA constant propagation and not just at the end in order to properly perform sign-extensions or zero-extensions as appropriate. This patch does just that, changing a safety predicate we already use at the appropriate places to return the necessary type. gcc/ChangeLog: 2025-01-03 Martin Jambor PR ipa/118138 * ipa-cp.cc (ipacp_value_safe_for_type): Return the appropriate type instead of a bool, accept NULL_TREE VALUEs. (propagate_vals_across_arith_jfunc): Use the new returned value of ipacp_value_safe_for_type. (propagate_vals_across_ancestor): Likewise. (propagate_scalar_across_jump_function): Likewise. gcc/testsuite/ChangeLog: 2025-01-03 Martin Jambor PR ipa/118138 * gcc.dg/ipa/pr118138.c: New test. Diff: --- gcc/ipa-cp.cc | 33 +++-- gcc/testsuite/gcc.dg/ipa/pr118138.c | 30 ++ 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index 294389fba4c7..d89324a00775 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -1448,19 +1448,23 @@ initialize_node_lattices (struct cgraph_node *node) } } -/* Return true if VALUE can be safely IPA-CP propagated to a parameter of type - PARAM_TYPE. */ +/* Return VALUE if it is NULL_TREE or if it can be directly safely IPA-CP + propagated to a parameter of type PARAM_TYPE, or return a fold-converted + VALUE to PARAM_TYPE if that is possible. Return NULL_TREE otherwise. */ -static bool +static tree ipacp_value_safe_for_type (tree param_type, tree value) { + if (!value) +return NULL_TREE; tree val_type = TREE_TYPE (value); if (param_type == val_type - || useless_type_conversion_p (param_type, val_type) - || fold_convertible_p (param_type, value)) -return true; + || useless_type_conversion_p (param_type, val_type)) +return value; + if (fold_convertible_p (param_type, value)) +return fold_convert (param_type, value); else -return false; +return NULL_TREE; } /* Return the result of a (possibly arithmetic) operation on the constant @@ -2210,8 +2214,8 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs, { tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, src_val, res_type); - if (!cstval - || !ipacp_value_safe_for_type (res_type, cstval)) + cstval = ipacp_value_safe_for_type (res_type, cstval); + if (!cstval) break; ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, @@ -2235,8 +2239,8 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs, tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, src_val, res_type); - if (cstval - && ipacp_value_safe_for_type (res_type, cstval)) + cstval = ipacp_value_safe_for_type (res_type, cstval); + if (cstval) ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, src_offset); else @@ -2284,8 +2288,8 @@ propagate_vals_across_ancestor (struct cgraph_edge *cs, for (src_val = src_lat->values; src_val; src_val = src_val->next) { tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value); - - if (t && ipacp_value_safe_for_type (param_type, t)) + t = ipacp_value_safe_for_type (param_type, t); + if (t) ret |= dest_lat->add_value (t, cs, src_val, src_idx); else ret |= dest_lat->set_contains_variable (); @@ -2310,7 +2314,8 @@ propagate_scalar_across_jump_function (struct cgraph_edge *cs, if (jfunc->type == IPA_JF_CONST) { tree val = ipa_get_jf_constant (jfunc); - if (ipacp_value_safe_for_type (param_type, val)) + val = ipacp_value_safe_for_type (param_type, val); + if (val) return dest_lat->add_value (val, cs, NULL, 0); else return dest_lat->set_contains_variable (); diff --git a/gcc/testsuite/gcc.dg/ipa/pr118138.c b/gcc/testsuite/gcc.dg/ipa/pr118138.c new file mode 100644 index ..5c94253f58b2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr118138.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-inli
[gcc r14-11199] AArch64: correct Cortex-X4 MIDR
https://gcc.gnu.org/g:26f78a4249b051c7755a44ba1ab1743f4133b0c2 commit r14-11199-g26f78a4249b051c7755a44ba1ab1743f4133b0c2 Author: Tamar Christina Date: Fri Jan 10 21:33:57 2025 + AArch64: correct Cortex-X4 MIDR The Parts Num field for the MIDR for Cortex-X4 is wrong. It's currently the parts number for a Cortex-A720 (which does have the right number). The correct number can be found in the Cortex-X4 Technical Reference Manual [1] on page 382 in Issue Number 5. [1] https://developer.arm.com/documentation/102484/latest/ gcc/ChangeLog: * config/aarch64/aarch64-cores.def (AARCH64_CORE): Fix cortex-x4 parts num. Diff: --- gcc/config/aarch64/aarch64-cores.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index a919ab7d8a5a..b1eaf5512b57 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -185,7 +185,7 @@ AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8M AARCH64_CORE("cortex-x3", cortexx3, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4e, -1) -AARCH64_CORE("cortex-x4", cortexx4, cortexa57, V9_2A, (SVE2_BITPERM, MEMTAG, PROFILE), neoversen2, 0x41, 0xd81, -1) +AARCH64_CORE("cortex-x4", cortexx4, cortexa57, V9_2A, (SVE2_BITPERM, MEMTAG, PROFILE), neoversen2, 0x41, 0xd82, -1) AARCH64_CORE("cortex-x925", cortexx925, cortexa57, V9_2A, (SVE2_BITPERM, MEMTAG, PROFILE), neoversen2, 0x41, 0xd85, -1)
[gcc r15-6815] Use relations when simplifying MIN and MAX.
https://gcc.gnu.org/g:b0eeb540497c7b9dee01f8724f9a4978b53a12ae commit r15-6815-gb0eeb540497c7b9dee01f8724f9a4978b53a12ae Author: Andrew MacLeod Date: Fri Jan 10 13:33:01 2025 -0500 Use relations when simplifying MIN and MAX. Query for known relations between the operands, and pass that to fold_range to help simplify MIN and MAX relations. Make it type agnostic as well. Adapt testcases from DOM to EVRP (e suffix) and test floats (f suffix). PR tree-optimization/88575 gcc/ * vr-values.cc (simplify_using_ranges::fold_cond_with_ops): Query relation between op0 and op1 and utilize it. (simplify_using_ranges::simplify): Do not eliminate float checks. gcc/testsuite/ * gcc.dg/tree-ssa/minmax-27.c: Disable VRP. * gcc.dg/tree-ssa/minmax-27e.c: New. * gcc.dg/tree-ssa/minmax-27f.c: New. * gcc.dg/tree-ssa/minmax-28.c: Disable VRP. * gcc.dg/tree-ssa/minmax-28e.c: New. * gcc.dg/tree-ssa/minmax-28f.c: New. Diff: --- gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/minmax-27e.c | 118 + gcc/testsuite/gcc.dg/tree-ssa/minmax-27f.c | 118 + gcc/testsuite/gcc.dg/tree-ssa/minmax-28.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/minmax-28e.c | 117 gcc/testsuite/gcc.dg/tree-ssa/minmax-28f.c | 117 gcc/vr-values.cc | 13 +++- 7 files changed, 481 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c index 4b94203b0d05..a99af6eb521e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-dom2" } */ +/* { dg-options "-O2 -fdump-tree-dom2 -fno-tree-vrp" } */ int min1(int a, int b) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-27e.c b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27e.c new file mode 100644 index ..8498ffd20173 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27e.c @@ -0,0 +1,118 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-evrp" } */ + + +int min1(int a, int b) +{ +if (a <= b) +return a < b ? a : b; +return 0; +} + +int min2(int a, int b) +{ +if (a <= b) +return a > b ? b : a; +return 0; +} + +int min3(int a, int b) +{ +if (a < b) +return a < b ? a : b; +return 0; +} + +int min4(int a, int b) +{ +if (a < b) +return a > b ? b : a; +return 0; +} + +int min5(int a, int b) +{ +if (a <= b) +return a <= b ? a : b; +return 0; +} + +int min6(int a, int b) +{ +if (a <= b) +return a >= b ? b : a; +return 0; +} + +int min7(int a, int b) +{ +if (a < b) +return a <= b ? a : b; +return 0; +} + +int min8(int a, int b) +{ +if (b > a) +return a >= b ? b : a; +return 0; +} + +int min9(int a, int b) +{ +if (b >= a) +return a < b ? a : b; +return 0; +} + +int min10(int a, int b) +{ +if (b >= a) +return a > b ? b : a; +return 0; +} + +int min11(int a, int b) +{ +if (b > a) +return a < b ? a : b; +return 0; +} + +int min12(int a, int b) +{ +if (b > a) +return a > b ? b : a; +return 0; +} + +int min13(int a, int b) +{ +if (b >= a) +return a <= b ? a : b; +return 0; +} + +int min14(int a, int b) +{ +if (b >= a) +return a >= b ? b : a; +return 0; +} + +int min15(int a, int b) +{ +if (b > a) +return a <= b ? a : b; +return 0; +} + +int min16(int a, int b) +{ +if (b > a) +return a >= b ? b : a; +return 0; +} + +/* { dg-final { scan-tree-dump-not "MIN_EXPR" "evrp" } } */ + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-27f.c b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27f.c new file mode 100644 index ..63398d4495f0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-27f.c @@ -0,0 +1,118 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -fdump-tree-evrp" } */ + + +float min1(float a, float b) +{ +if (a <= b) +return a < b ? a : b; +return 0.0; +} + +float min2(float a, float b) +{ +if (a <= b) +return a > b ? b : a; +return 0.0; +} + +float min3(float a, float b) +{ +if (a < b) +return a < b ? a : b; +return 0.0; +} + +float min4(float a, float b) +{ +if (a < b) +return a > b ? b : a; +return 0.0; +} + +float min5(float a, float b) +{ +if (a <= b) +return a <= b ? a : b; +return 0.0; +} + +float min6(float a, float b) +{ +if (a <= b) +return a >= b ? b : a; +return 0.0; +} + +float min7(float a, float b) +{ +if (a < b) +return a <= b ? a : b; +return 0.0; +} + +float mi
[gcc r13-9304] c++: ICE with noexcept and local specialization, again [PR114349]
https://gcc.gnu.org/g:0430ec8881d657ddedff6c9d9fa4ea5db125f462 commit r13-9304-g0430ec8881d657ddedff6c9d9fa4ea5db125f462 Author: Marek Polacek Date: Fri Jan 10 17:26:18 2025 -0500 c++: ICE with noexcept and local specialization, again [PR114349] Patrick noticed that my r14-9339-gdc6c3bfb59baab patch is wrong; we're dealing with a noexcept-spec there, not a noexcept-expr, so setting cp_noexcept_operand et al is incorrect. Back to the drawing board then. To fix noexcept84.C, we should probably avoid doing push_to_top_level in certain cases. maybe_push_to_top_level didn't work here as-is, so I changed it to not push to top level if decl_function_context is non-null, when we are not dealing with a lambda. This also fixes c++/114349, introduced by r14-9339. This GCC 13 backport squashes r14-9659 and r14-9339. PR c++/114349 gcc/cp/ChangeLog: * name-lookup.cc (maybe_push_to_top_level): For a non-lambda, don't push to top level if decl_function_context is non-null. * pt.cc (maybe_instantiate_noexcept): Use maybe_push_to_top_level. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/noexcept84.C: New test. * g++.dg/cpp0x/noexcept85.C: New test. * g++.dg/cpp0x/noexcept86.C: New test. Diff: --- gcc/cp/name-lookup.cc | 11 +++ gcc/cp/pt.cc| 4 ++-- gcc/testsuite/g++.dg/cpp0x/noexcept84.C | 32 gcc/testsuite/g++.dg/cpp0x/noexcept85.C | 33 + gcc/testsuite/g++.dg/cpp0x/noexcept86.C | 25 + 5 files changed, 99 insertions(+), 6 deletions(-) diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc index 7c61bc3bf611..1ea25f076b85 100644 --- a/gcc/cp/name-lookup.cc +++ b/gcc/cp/name-lookup.cc @@ -8244,10 +8244,13 @@ maybe_push_to_top_level (tree d) { /* Push if D isn't function-local, or is a lambda function, for which name resolution is already done. */ - bool push_to_top -= !(current_function_decl - && !LAMBDA_FUNCTION_P (d) - && decl_function_context (d) == current_function_decl); + const bool push_to_top += (LAMBDA_FUNCTION_P (d) + || (TREE_CODE (d) == TYPE_DECL + && TREE_TYPE (d) + && LAMBDA_TYPE_P (TREE_TYPE (d))) + || !current_function_decl + || !decl_function_context (d)); if (push_to_top) push_to_top_level (); diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 5a6bf80c3d42..ddfa3c25d10e 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -26772,7 +26772,7 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t complain) } else if (push_tinst_level (fn)) { - push_to_top_level (); + const bool push_to_top = maybe_push_to_top_level (fn); push_access_scope (fn); push_deferring_access_checks (dk_no_deferred); input_location = DECL_SOURCE_LOCATION (fn); @@ -26809,7 +26809,7 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t complain) pop_deferring_access_checks (); pop_access_scope (fn); pop_tinst_level (); - pop_from_top_level (); + maybe_pop_from_top_level (push_to_top); } else spec = noexcept_false_spec; diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept84.C b/gcc/testsuite/g++.dg/cpp0x/noexcept84.C new file mode 100644 index ..06f33264f77c --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/noexcept84.C @@ -0,0 +1,32 @@ +// PR c++/114114 +// { dg-do compile { target c++11 } } + +template +constexpr void +test () +{ + constexpr bool is_yes = B; + struct S { +constexpr S() noexcept(is_yes) { } + }; + S s; +} + +constexpr bool foo() { return true; } + +template +constexpr void +test2 () +{ + constexpr T (*pfn)() = &foo; + struct S { +constexpr S() noexcept(pfn()) { } + }; + S s; +} + +int main() +{ + test(); + test2(); +} diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept85.C b/gcc/testsuite/g++.dg/cpp0x/noexcept85.C new file mode 100644 index ..b415bb46bc94 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/noexcept85.C @@ -0,0 +1,33 @@ +// PR c++/114349 +// { dg-do compile { target c++11 } } + +using A = struct {}; +template class, typename, typename> +using B = A; +template +using C = typename T::D; +struct E { + using D = B; +}; +template constexpr bool foo (A) { return false; } +template struct F { + using G = T; + using H = E; + F(const F &); + void operator=(F) noexcept(foo (H::D{})); +}; +template +using I = F; +template +using J = I; +struct K { + typedef J L; + L k; + K(); +}; +struct M { + bool bar () const; + K::L m; +}; +K n; +bool M::bar () const { n.k = m; return true; } diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept86.C b/gcc/testsuite/g++.dg/cpp0x/noexcept86.C new file mode 100644 index ..2d040c090f50 --- /dev/nu
[gcc r15-6819] c++: modules and function attributes
https://gcc.gnu.org/g:664bd76a23def2d458bb3c531486b4c220f29c11 commit r15-6819-g664bd76a23def2d458bb3c531486b4c220f29c11 Author: Jason Merrill Date: Fri Jan 10 18:00:20 2025 -0500 c++: modules and function attributes 30_threads/stop_token/stop_source/109339.cc was failing because we weren't representing attribute access on the METHOD_TYPE for _Stop_state_ref. The modules code expected attributes to appear on tt_variant_type and not on tt_derived_type, but that's backwards since build_type_attribute_variant gives a type with attributes its own TYPE_MAIN_VARIANT. gcc/cp/ChangeLog: * module.cc (trees_out::type_node): Write attributes for tt_derived_type, not tt_variant_type. (trees_in::tree_node): Likewise for reading. gcc/testsuite/ChangeLog: * g++.dg/modules/attrib-2_a.C: New test. * g++.dg/modules/attrib-2_b.C: New test. Diff: --- gcc/cp/module.cc | 17 + gcc/testsuite/g++.dg/modules/attrib-2_a.C | 12 gcc/testsuite/g++.dg/modules/attrib-2_b.C | 9 + 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 321d4164a6a7..c932c4d0a90d 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -9189,7 +9189,10 @@ trees_out::type_node (tree type) tree_node (raises); } - tree_node (TYPE_ATTRIBUTES (type)); + /* build_type_attribute_variant creates a new TYPE_MAIN_VARIANT, so +variants should all have the same set of attributes. */ + gcc_checking_assert (TYPE_ATTRIBUTES (type) + == TYPE_ATTRIBUTES (TYPE_MAIN_VARIANT (type))); if (streaming_p ()) { @@ -9406,6 +9409,8 @@ trees_out::type_node (tree type) break; } + tree_node (TYPE_ATTRIBUTES (type)); + /* We may have met the type during emitting the above. */ if (ref_node (type) != WK_none) { @@ -10090,6 +10095,13 @@ trees_in::tree_node (bool is_use) break; } + /* In the exporting TU, a derived type with attributes was built by + build_type_attribute_variant as a distinct copy, with itself as + TYPE_MAIN_VARIANT. We repeat that on import to get the version + without attributes as TYPE_CANONICAL. */ + if (tree attribs = tree_node ()) + res = cp_build_type_attribute_variant (res, attribs); + int tag = i (); if (!tag) { @@ -10133,9 +10145,6 @@ trees_in::tree_node (bool is_use) TYPE_USER_ALIGN (res) = true; } - if (tree attribs = tree_node ()) - res = cp_build_type_attribute_variant (res, attribs); - int quals = i (); if (quals >= 0 && !get_overrun ()) res = cp_build_qualified_type (res, quals); diff --git a/gcc/testsuite/g++.dg/modules/attrib-2_a.C b/gcc/testsuite/g++.dg/modules/attrib-2_a.C new file mode 100644 index ..96f667ceec8c --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/attrib-2_a.C @@ -0,0 +1,12 @@ +// { dg-additional-options "-fmodules -Wno-global-module" } +// { dg-module-cmi M } + +export module M; + +export +{ + struct A { int i; }; + + __attribute ((access (none, 1))) + void f(const A&); +} diff --git a/gcc/testsuite/g++.dg/modules/attrib-2_b.C b/gcc/testsuite/g++.dg/modules/attrib-2_b.C new file mode 100644 index ..c12ad117ce4f --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/attrib-2_b.C @@ -0,0 +1,9 @@ +// { dg-additional-options "-fmodules -Wmaybe-uninitialized" } + +import M; + +int main() +{ + A a; + f(a); +}
[gcc r15-6818] c++: modules and class attributes
https://gcc.gnu.org/g:fdabd93cde4aae38d6a67fe0927eca8cea1b22b9 commit r15-6818-gfdabd93cde4aae38d6a67fe0927eca8cea1b22b9 Author: Jason Merrill Date: Sat Nov 23 10:00:18 2024 +0100 c++: modules and class attributes std/time/traits/is_clock.cc was getting a warning about applying the deprecated attribute to a variant of auto_ptr, which was wrong because it's on the primary type. This turned out to be because we were ignoring the attributes on the definition of auto_ptr because the forward declaration in unique_ptr.h has no attributes. We need to merge attributes as usual in a redeclaration. gcc/cp/ChangeLog: * module.cc (trees_in::decl_value): Merge attributes. gcc/testsuite/ChangeLog: * g++.dg/modules/attrib-1_a.C: New test. * g++.dg/modules/attrib-1_b.C: New test. Diff: --- gcc/cp/module.cc | 4 gcc/testsuite/g++.dg/modules/attrib-1_a.C | 13 + gcc/testsuite/g++.dg/modules/attrib-1_b.C | 10 ++ 3 files changed, 27 insertions(+) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 4fbe522264b3..321d4164a6a7 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -8637,6 +8637,10 @@ trees_in::decl_value () TYPE_STUB_DECL (type) = stub_decl ? stub_decl : inner; if (stub_decl) TREE_TYPE (stub_decl) = type; + + /* Handle separate declarations with different attributes. */ + tree &eattr = TYPE_ATTRIBUTES (TREE_TYPE (existing)); + eattr = merge_attributes (eattr, TYPE_ATTRIBUTES (type)); } if (inner_tag) diff --git a/gcc/testsuite/g++.dg/modules/attrib-1_a.C b/gcc/testsuite/g++.dg/modules/attrib-1_a.C new file mode 100644 index ..d5f89d0c0688 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/attrib-1_a.C @@ -0,0 +1,13 @@ +// { dg-additional-options "-fmodules -Wno-global-module" } +// { dg-module-cmi M } + +module; + +template struct A { + void f() const { } +} __attribute__ ((deprecated ("y tho"))); + +export module M; + +export template +A a;// { dg-warning "deprecated" } diff --git a/gcc/testsuite/g++.dg/modules/attrib-1_b.C b/gcc/testsuite/g++.dg/modules/attrib-1_b.C new file mode 100644 index ..48ac751b03d1 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/attrib-1_b.C @@ -0,0 +1,10 @@ +// { dg-additional-options -fmodules } + +template struct A; + +import M; + +int main() +{ + a.f(); +}
[gcc r15-6778] c: Fix up expr location for __builtin_stdc_rotate_* [PR118376]
https://gcc.gnu.org/g:76b7f60ffdb26c56d2c71bbeddf7da601de0e50e commit r15-6778-g76b7f60ffdb26c56d2c71bbeddf7da601de0e50e Author: Jakub Jelinek Date: Fri Jan 10 15:07:41 2025 +0100 c: Fix up expr location for __builtin_stdc_rotate_* [PR118376] Seems I forgot to set_c_expr_source_range for the __builtin_stdc_rotate_* case (the other __builtin_stdc_* cases already have it), which means the locations in expr are uninitialized, sometimes causing ICEs in linemap code, at other times just valgrind errors about uninitialized var uses. 2025-01-10 Jakub Jelinek PR c/118376 * c-parser.cc (c_parser_postfix_expression): Call set_c_expr_source_range before break in the __builtin_stdc_rotate_* case. * gcc.dg/pr118376.c: New test. Diff: --- gcc/c/c-parser.cc | 1 + gcc/testsuite/gcc.dg/pr118376.c | 11 +++ 2 files changed, 12 insertions(+) diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index c46aac5f0a2b..d2f45912cc43 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -12906,6 +12906,7 @@ c_parser_postfix_expression (c_parser *parser) expr.value = build2_loc (loc, COMPOUND_EXPR, TREE_TYPE (expr.value), instrument_expr, expr.value); + set_c_expr_source_range (&expr, loc, close_paren_loc); break; } tree barg1 = arg; diff --git a/gcc/testsuite/gcc.dg/pr118376.c b/gcc/testsuite/gcc.dg/pr118376.c new file mode 100644 index ..16b2f54549b4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr118376.c @@ -0,0 +1,11 @@ +/* PR c/118376 */ +/* { dg-do compile } */ +/* { dg-options "-Wsign-conversion" } */ + +unsigned x; + +void +foo () +{ + __builtin_memset (&x, (long long) __builtin_stdc_rotate_right (x, 0), 1); +} /* { dg-warning "conversion to 'int' from 'long long int' may change the sign of the result" "" { target *-*-* } .-1 } */
[gcc r12-10893] tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield
https://gcc.gnu.org/g:5d6afc601443fa5c03680fb7f39b7dc1f36766a8 commit r12-10893-g5d6afc601443fa5c03680fb7f39b7dc1f36766a8 Author: Richard Biener Date: Tue Jun 25 16:13:02 2024 +0200 tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield The following makes analysis and transform agree on constraints. PR tree-optimization/115646 * tree-call-cdce.cc (check_pow): Check for bit_sz values as allowed by transform. * gcc.dg/pr115646.c: New testcase. (cherry picked from commit 453b1d291d1a0f89087ad91cf6b1bed1ec68eff3) Diff: --- gcc/testsuite/gcc.dg/pr115646.c | 14 ++ gcc/tree-call-cdce.cc | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/pr115646.c b/gcc/testsuite/gcc.dg/pr115646.c new file mode 100644 index ..7938a309513f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr115646.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target int32plus } */ + +extern double pow(double x, double y); + +struct S { +unsigned int a : 3, b : 8, c : 21; +}; + +void foo (struct S *p) +{ + pow (p->c, 42); +} diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc index 83991fe373e2..918781298357 100644 --- a/gcc/tree-call-cdce.cc +++ b/gcc/tree-call-cdce.cc @@ -260,7 +260,7 @@ check_pow (gcall *pow_call) /* If the type of the base is too wide, the resulting shrink wrapping condition will be too conservative. */ - if (bit_sz > MAX_BASE_INT_BIT_SIZE) + if (bit_sz != 8 && bit_sz != 16 && bit_sz != MAX_BASE_INT_BIT_SIZE) return false; return true;
[gcc r15-6777] rtl: Remove invalid compare simplification [PR117186]
https://gcc.gnu.org/g:06c4cf398947b53b4bfc65752f9f879bb2d07924 commit r15-6777-g06c4cf398947b53b4bfc65752f9f879bb2d07924 Author: Richard Sandiford Date: Fri Jan 10 12:51:15 2025 + rtl: Remove invalid compare simplification [PR117186] g:d882fe5150fbbeb4e44d007bb4964e5b22373021, posted at https://gcc.gnu.org/pipermail/gcc-patches/2000-July/033786.html , added code to treat: (set (reg:CC cc) (compare:CC (gt:M (reg:CC cc) 0) (lt:M (reg:CC cc) 0))) as a nop. This PR shows that that isn't always correct. The compare in the set above is between two 0/1 booleans (at least on STORE_FLAG_VALUE==1 targets), whereas the unknown comparison that produced the incoming (reg:CC cc) is unconstrained; it could be between arbitrary integers, or even floats. The fold is therefore replacing a cc that is valid for both signed and unsigned comparisons with one that is only known to be valid for signed comparisons. (gt (compare (gt cc 0) (lt cc 0) 0) does simplify to: (gt cc 0) but: (gtu (compare (gt cc 0) (lt cc 0) 0) does not simplify to: (gtu cc 0) The optimisation didn't come with a testcase, but it was added for i386's cmpstrsi, now cmpstrnsi. That probably doesn't matter as much as it once did, since it's now conditional on -minline-all-stringops. But the patch is almost 25 years old, so whatever the original motivation was, it seems likely that other things now rely on it. It therefore seems better to try to preserve the optimisation on rtl rather than get rid of it. To do that, we need to look at how the result of the outer compare is used. We'd therefore be looking at four instructions (the gt, the lt, the compare, and the use of the compare), but combine already allows that for 3-instruction combinations thanks to: /* If the source is a COMPARE, look for the use of the comparison result and try to simplify it unless we already have used undobuf.other_insn. */ When applied to boolean inputs, a comparison operator is effectively a boolean logical operator (AND, ANDNOT, XOR, etc.). simplify_logical_relational_operation already had code to simplify logical operators between two comparison results, but: * It only handled IOR, which doesn't cover all the cases needed here. The others are easily added. * It treated comparisons of integers as having an ORDERED/UNORDERED result. Therefore: * it would not treat "true for LT + EQ + GT" as "always true" for comparisons between integers, because the mask excluded the UNORDERED condition. * it would try to convert "true for LT + GT" into LTGT even for comparisons between integers. To prevent an ICE later, the code used: /* Many comparison codes are only valid for certain mode classes. */ if (!comparison_code_valid_for_mode (code, mode)) return 0; However, this used the wrong mode, since "mode" is here the integer result of the comparisons (and the mode of the IOR), not the mode of the things being compared. Thus the effect was to reject all floating-point-only codes, even when comparing floats. I think instead the code should detect whether the comparison is between integer values and remove UNORDERED from consideration if so. It then always produces a valid comparison (or an always true/false result), and so comparison_code_valid_for_mode is not needed. In particular, "true for LT + GT" becomes NE for comparisons between integers but remains LTGT for comparisons between floats. * There was a missing check for whether the comparison inputs had side effects. While there, it also seemed worth extending simplify_logical_relational_operation to unsigned comparisons, since that makes the testing easier. As far as that testing goes: the patch exhaustively tests all combinations of integer comparisons in: (cmp1 (cmp2 X Y) (cmp3 X Y)) for the 10 integer comparisons, giving 1000 fold attempts in total. It then tries all combinations of (X in {-1,0,1} x Y in {-1,0,1}) on the result of the fold, giving 9 checks per fold, or 9000 in total. That's probably more than is typical for self-tests, but it seems to complete in neglible time, even for -O0 builds. gcc/ PR rtl-optimization/117186 * rtl.h (simplify_context::simplify_logical_relational_operation): Add an invert0_p parameter. * simplify-rtx.cc (unsigned_comparison_to_mask): New function. (mask_to_unsigned_comparison): Likewise. (comparison_code_valid_for_mode): Delete. (simplify_context::simplify_logical_relational_operation): Add
[gcc r15-6772] testsuite: generalized field-merge tests for <32-bit int [PR118025]
https://gcc.gnu.org/g:d3c91b0439f67a6dc20ebb3bee4eeaf436eb7190 commit r15-6772-gd3c91b0439f67a6dc20ebb3bee4eeaf436eb7190 Author: Alexandre Oliva Date: Fri Jan 10 09:32:27 2025 -0300 testsuite: generalized field-merge tests for <32-bit int [PR118025] Explicitly convert constants to the desired types, so as to not elicit warnings about implicit truncations, nor execution errors, on targets whose ints are narrower than 32 bits. for gcc/testsuite/ChangeLog PR testsuite/118025 * gcc.dg/field-merge-1.c: Convert constants to desired types. * gcc.dg/field-merge-3.c: Likewise. * gcc.dg/field-merge-4.c: Likewise. * gcc.dg/field-merge-5.c: Likewise. * gcc.dg/field-merge-11.c: Likewise. * gcc.dg/field-merge-17.c: Don't mess with padding bits. Diff: --- gcc/testsuite/gcc.dg/field-merge-1.c | 4 ++-- gcc/testsuite/gcc.dg/field-merge-11.c | 10 +++--- gcc/testsuite/gcc.dg/field-merge-17.c | 4 +++- gcc/testsuite/gcc.dg/field-merge-3.c | 4 ++-- gcc/testsuite/gcc.dg/field-merge-4.c | 6 +++--- gcc/testsuite/gcc.dg/field-merge-5.c | 6 +++--- 6 files changed, 20 insertions(+), 14 deletions(-) diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c index 4405d40ee79d..4e7f6ae9332a 100644 --- a/gcc/testsuite/gcc.dg/field-merge-1.c +++ b/gcc/testsuite/gcc.dg/field-merge-1.c @@ -25,8 +25,8 @@ struct TB { unsigned char s; } __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian"))); -#define vc 0xaa -#define vi 0x12345678 +#define vc (unsigned char)0xaa +#define vi (unsigned int)0x12345678 struct TL vL = { vc, vi, vc, vi, vc, vi, vc }; struct TB vB = { vc, vi, vc, vi, vc, vi, vc }; diff --git a/gcc/testsuite/gcc.dg/field-merge-11.c b/gcc/testsuite/gcc.dg/field-merge-11.c index fe627cddd7fd..9e606e3bef16 100644 --- a/gcc/testsuite/gcc.dg/field-merge-11.c +++ b/gcc/testsuite/gcc.dg/field-merge-11.c @@ -10,7 +10,11 @@ struct s { int c; } __attribute__ ((aligned (4))); -struct s p = { 42, (short)(0xef1 - 0x1000), 0x12345678 }; +struct s p = { + (short)(unsigned short)42, + (short)(unsigned short)(0xef1 - 0x1000), + (int)(unsigned int)0x12345678 +}; void f (void) { if (0 @@ -19,9 +23,9 @@ void f (void) { || (int)(signed char)p.b != (int)(signed char)(0xef1 - 0x1000) || (unsigned)(unsigned char)p.b != (unsigned)(unsigned char)(0xef1 - 0x1000) || (unsigned)p.b != (unsigned short)(0xef1 - 0x1000) - || (int)(short)p.b != (int)(0xef1 - 0x1000) + || (int)(short)p.b != (int)(short)(unsigned short)(0xef1 - 0x1000) || (long)(unsigned char)(p.c >> 8) != (long)(unsigned char)0x123456 - || p.c != 0x12345678 + || p.c != (int)(unsigned int)0x12345678 ) __builtin_abort (); } diff --git a/gcc/testsuite/gcc.dg/field-merge-17.c b/gcc/testsuite/gcc.dg/field-merge-17.c index a42658ac5c51..35ead9540606 100644 --- a/gcc/testsuite/gcc.dg/field-merge-17.c +++ b/gcc/testsuite/gcc.dg/field-merge-17.c @@ -3,6 +3,8 @@ /* Check that we can optimize misaligned double-words. */ +#include + struct s { short a; long long b; @@ -33,7 +35,7 @@ int main () { if (fp () > 0) __builtin_abort (); unsigned char *pc = (unsigned char *)&p; - for (int i = 0; i < sizeof (p); i++) + for (int i = 0; i < offsetof (struct s, e) + sizeof (p.e); i++) { pc[i] = 1; if (fp () < 0) diff --git a/gcc/testsuite/gcc.dg/field-merge-3.c b/gcc/testsuite/gcc.dg/field-merge-3.c index a9fe404fa426..e9af4915ad8c 100644 --- a/gcc/testsuite/gcc.dg/field-merge-3.c +++ b/gcc/testsuite/gcc.dg/field-merge-3.c @@ -15,8 +15,8 @@ struct T2 { unsigned int z; } __attribute__((__aligned__(8))); -#define vc 0xaa -#define vi 0x12345678 +#define vc (unsigned char)0xaa +#define vi (unsigned int)0x12345678 struct T1 v1 = { { vc + !BIG_ENDIAN_P, vc + BIG_ENDIAN_P }, vc, vi }; struct T2 v2 = { (vc << 8) | (vc - 1), vc, vi }; diff --git a/gcc/testsuite/gcc.dg/field-merge-4.c b/gcc/testsuite/gcc.dg/field-merge-4.c index c629069e52b2..7c63123a282d 100644 --- a/gcc/testsuite/gcc.dg/field-merge-4.c +++ b/gcc/testsuite/gcc.dg/field-merge-4.c @@ -18,9 +18,9 @@ struct T2 { unsigned int z; } __attribute__((__packed__, __aligned__(4))); -#define vc 0xaa -#define vs 0xccdd -#define vi 0x12345678 +#define vc (unsigned char)0xaa +#define vs (unsigned short)0xccdd +#define vi (unsigned int)0x12345678 struct T1 v1 = { -1, vc, 1, vs, vi }; struct T2 v2 = { -1, 0, vc, 1, vs, vi }; diff --git a/gcc/testsuite/gcc.dg/field-merge-5.c b/gcc/testsuite/gcc.dg/field-merge-5.c index 1580b14bcc93..1b5d1a8cb16e 100644 --- a/gcc/testsuite/gcc.dg/field-merge-5.c +++ b/gcc/testsuite/gcc.dg/field-merge-5.c @@ -18,9 +18,9 @@ struct T2 { unsigned int z; } __attribute__((__packed__, __aligned__(8))); -#define vc 0xaa -#define vs 0xccdd -#define vi 0x12345678 +#define vc (unsigned char)0xaa +#defin
[gcc r15-6775] [ifcombine] fix mask variable test to match use [PR118344]
https://gcc.gnu.org/g:fd4e979d0c66567c2cb89f97b51abd35a8773d88 commit r15-6775-gfd4e979d0c66567c2cb89f97b51abd35a8773d88 Author: Alexandre Oliva Date: Fri Jan 10 09:32:43 2025 -0300 [ifcombine] fix mask variable test to match use [PR118344] There was a cut&pasto in the rr_and_mask's adjustment to match the combined type: the test on whether there was a mask already was testing the wrong variable, and then it might crash or otherwise fail accessing an undefined mask. This only hit with checking enabled, and rarely at that. for gcc/ChangeLog PR tree-optimization/118344 * gimple-fold.cc (fold_truth_andor_for_ifcombine): Fix typo in rr_and_mask's type adjustment test. for gcc/testsuite/ChangeLog PR tree-optimization/118344 * gcc.dg/field-merge-19.c: New. Diff: --- gcc/gimple-fold.cc| 2 +- gcc/testsuite/gcc.dg/field-merge-19.c | 41 +++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 0ad92de3a218..20b5024d861d 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -8644,7 +8644,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, xlr_bitpos); else lr_mask = wi::shifted_mask (xlr_bitpos, lr_bitsize, false, rnprec); - if (rl_and_mask.get_precision ()) + if (rr_and_mask.get_precision ()) rr_mask = wi::lshift (wide_int::from (rr_and_mask, rnprec, UNSIGNED), xrr_bitpos); else diff --git a/gcc/testsuite/gcc.dg/field-merge-19.c b/gcc/testsuite/gcc.dg/field-merge-19.c new file mode 100644 index ..5622baa52b0a --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-19.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fchecking" } */ + +/* PR tree-optimization/118344 */ + +/* This used to ICE attempting to extend a mask variable after testing the + wrong mask variable. */ + +int d, e, g, h, i, c, j; +static short k; +char o; +static int *p; +static long *a; +int b[0]; +int q(int s, int t, int *u, int *v) { + for (int f = 0; f < s; f++) +if ((t & v[f]) != u[f]) + return 0; + return 1; +} +int w(int s, int t) { + int l[] = {t, t, t, t}, m[] = {e, e, 3, 1}; + int n = q(s, d, l, m); + return n; +} +int x(unsigned s) { + unsigned r; + if (s >= -1) +return 1; + r = 1000; + while (s > 1 / r) +r /= 2; + return g ? 2 : 0; +} +void y() { + for (;;) { +b[w(8, *p)] = h; +for (; a + k; j = o) + i &= c = x(6) < 0; + } +}
[gcc r15-6770] ifcombine field-merge: improve handling of dwords
https://gcc.gnu.org/g:38401c58f4aae31fd29a16607e9018cb1f66c3ed commit r15-6770-g38401c58f4aae31fd29a16607e9018cb1f66c3ed Author: Alexandre Oliva Date: Fri Jan 10 09:32:05 2025 -0300 ifcombine field-merge: improve handling of dwords On 32-bit hosts, data types with 64-bit alignment aren't getting treated as desired by ifcombine field-merging: we limit the choice of modes at BITS_PER_WORD sizes, but when deciding the boundary for a split, we'd limit the choice only by the alignment, so we wouldn't even consider a split at an odd 32-bit boundary. Fix that by limiting the boundary choice by word choice as well. Now, this would still leave misaligned 64-bit fields in 64-bit-aligned data structures unhandled by ifcombine on 32-bit hosts. We already need to loading them as double words, and if they're not byte-aligned, the code gets really ugly, but ifcombine could improve it if it allows double-word loads as a last resort. I've added that. for gcc/ChangeLog * gimple-fold.cc (fold_truth_andor_for_ifcombine): Limit boundary choice by word size as well. Try aligned double-word loads as a last resort. for gcc/testsuite/ChangeLog * gcc.dg/field-merge-17.c: New. Diff: --- gcc/gimple-fold.cc| 30 --- gcc/testsuite/gcc.dg/field-merge-17.c | 46 +++ 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 0402c7666b66..c8a726e0ae3f 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -8381,16 +8381,40 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, { /* Consider the possibility of recombining loads if any of the fields straddles across an alignment boundary, so that either -part can be loaded along with the other field. */ +part can be loaded along with the other field. Since we +limit access modes to BITS_PER_WORD, don't exceed that, +otherwise on a 32-bit host and a 64-bit-aligned data +structure, we'll fail the above for a field that straddles +across two words, and would fail here for not even trying to +split it at between 32-bit words. */ HOST_WIDE_INT boundary = compute_split_boundary_from_align - (ll_align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize); + (MIN (ll_align, BITS_PER_WORD), +ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize); if (boundary < 0 || !get_best_mode (boundary - first_bit, first_bit, 0, ll_end_region, ll_align, BITS_PER_WORD, volatilep, &lnmode) || !get_best_mode (end_bit - boundary, boundary, 0, ll_end_region, ll_align, BITS_PER_WORD, volatilep, &lnmode2)) - return 0; + { + if (ll_align <= BITS_PER_WORD) + return 0; + + /* As a last resort, try double-word access modes. This +enables us to deal with misaligned double-word fields +that straddle across 3 separate words. */ + boundary = compute_split_boundary_from_align + (MIN (ll_align, 2 * BITS_PER_WORD), +ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize); + if (boundary < 0 + || !get_best_mode (boundary - first_bit, first_bit, +0, ll_end_region, ll_align, 2 * BITS_PER_WORD, +volatilep, &lnmode) + || !get_best_mode (end_bit - boundary, boundary, +0, ll_end_region, ll_align, 2 * BITS_PER_WORD, +volatilep, &lnmode2)) + return 0; + } /* If we can't have a single load, but can with two, figure out whether the two compares can be separated, i.e., whether the entirety of the diff --git a/gcc/testsuite/gcc.dg/field-merge-17.c b/gcc/testsuite/gcc.dg/field-merge-17.c new file mode 100644 index ..06c8ec16e86c --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-17.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O -fdump-tree-ifcombine-details" } */ + +/* Check that we can optimize misaligned double-words. */ + +struct s { + short a; + long long b; + int c; + long long d; + short e; +} __attribute__ ((packed, aligned (8))); + +struct s p = { 0, 0, 0, 0, 0 }; + +__attribute__ ((__noinline__, __noipa__, __noclone__)) +int fp () +{ + if (p.a + || p.b + || p.c + || p.d + || p.e) +return 1; + else +return -1; +} + +int main () { + /* Unlikely, but play safe. */ + if (sizeof (long long) == sizeof (short)) +return 0; + if (fp () > 0) +__builtin_abort (); + unsigned char *pc = (unsigned char *)&p; + for (int i = 0; i < sizeof (p); i++) +{ + pc[i] = 1; + if (fp () < 0) + __builtin_abort (); + pc
[gcc r15-6771] testsuite: generalize ifcombine field-merge tests [PR118025]
https://gcc.gnu.org/g:261ffe685f3865ea61599d61d6b32b92e476a342 commit r15-6771-g261ffe685f3865ea61599d61d6b32b92e476a342 Author: Alexandre Oliva Date: Fri Jan 10 09:32:20 2025 -0300 testsuite: generalize ifcombine field-merge tests [PR118025] A number of tests that check for specific ifcombine transformations fail on AVR and PRU targets, whose type sizes and alignments aren't conducive of the expected transformations. Adjust the expectations. Most execution tests should run successfully regardless of the transformations, but a few that could conceivably fail if short and char have the same bit width now check for that and bypass the tests that would fail. Conversely, one test that had such a runtime test, but that would work regardless, no longer has that runtime test, and its types are narrowed so that the transformations on 32-bit targets are more likely to be the same as those that used to take place on 64-bit targets. This latter change is somewhat obviated by a separate patch, but I've left it in place anyway. for gcc/testsuite/ChangeLog PR testsuite/118025 * gcc.dg/field-merge-1.c: Skip BIT_FIELD_REF counting on AVR and PRU. * gcc.dg/field-merge-3.c: Bypass the test if short doesn't have the expected size. * gcc.dg/field-merge-8.c: Likewise. * gcc.dg/field-merge-9.c: Likewise. Skip optimization counting on AVR and PRU. * gcc.dg/field-merge-13.c: Skip optimization counting on AVR and PRU. * gcc.dg/field-merge-15.c: Likewise. * gcc.dg/field-merge-17.c: Likewise. * gcc.dg/field-merge-16.c: Likewise. Drop runtime bypass. Use smaller types. * gcc.dg/field-merge-14.c: Add comments. Diff: --- gcc/testsuite/gcc.dg/field-merge-1.c | 2 +- gcc/testsuite/gcc.dg/field-merge-13.c | 2 +- gcc/testsuite/gcc.dg/field-merge-14.c | 3 ++- gcc/testsuite/gcc.dg/field-merge-15.c | 2 +- gcc/testsuite/gcc.dg/field-merge-16.c | 17 +++-- gcc/testsuite/gcc.dg/field-merge-17.c | 2 +- gcc/testsuite/gcc.dg/field-merge-3.c | 2 ++ gcc/testsuite/gcc.dg/field-merge-8.c | 2 ++ gcc/testsuite/gcc.dg/field-merge-9.c | 4 +++- 9 files changed, 20 insertions(+), 16 deletions(-) diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c index 1818e104437e..4405d40ee79d 100644 --- a/gcc/testsuite/gcc.dg/field-merge-1.c +++ b/gcc/testsuite/gcc.dg/field-merge-1.c @@ -58,7 +58,7 @@ int main () { return 0; } -/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" { target { ! { avr-*-* pru-*-* } } } } } */ /* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/field-merge-13.c b/gcc/testsuite/gcc.dg/field-merge-13.c index 7e4f4c499347..eeef73338f8e 100644 --- a/gcc/testsuite/gcc.dg/field-merge-13.c +++ b/gcc/testsuite/gcc.dg/field-merge-13.c @@ -90,4 +90,4 @@ int main () { return 0; } -/* { dg-final { scan-tree-dump-times "optimizing" 9 "ifcombine" } } */ +/* { dg-final { scan-tree-dump-times "optimizing" 9 "ifcombine" { target { ! { avr-*-* pru-*-* } } } } } */ diff --git a/gcc/testsuite/gcc.dg/field-merge-14.c b/gcc/testsuite/gcc.dg/field-merge-14.c index 91d84cfebf19..73259e0936e4 100644 --- a/gcc/testsuite/gcc.dg/field-merge-14.c +++ b/gcc/testsuite/gcc.dg/field-merge-14.c @@ -1,7 +1,8 @@ /* { dg-do run } */ /* { dg-options "-O -fdump-tree-ifcombine-details" } */ -/* Check that we don't get confused by multiple conversions. */ +/* Check that we don't get confused by multiple conversions. Conceivably, we + could combine both tests using b, but the current logic won't do that. */ __attribute__((noipa)) int f(int *a,int *d) diff --git a/gcc/testsuite/gcc.dg/field-merge-15.c b/gcc/testsuite/gcc.dg/field-merge-15.c index 34641e893c92..fc3846452716 100644 --- a/gcc/testsuite/gcc.dg/field-merge-15.c +++ b/gcc/testsuite/gcc.dg/field-merge-15.c @@ -33,4 +33,4 @@ int main () { return 0; } -/* { dg-final { scan-tree-dump-times "optimizing" 6 "ifcombine" } } */ +/* { dg-final { scan-tree-dump-times "optimizing" 6 "ifcombine" { target { ! { avr-*-* pru-*-* } } } } } */ diff --git a/gcc/testsuite/gcc.dg/field-merge-16.c b/gcc/testsuite/gcc.dg/field-merge-16.c index 2ca23ea663a4..afdaf45b6a94 100644 --- a/gcc/testsuite/gcc.dg/field-merge-16.c +++ b/gcc/testsuite/gcc.dg/field-merge-16.c @@ -4,17 +4,17 @@ /* Check that tests for sign-extension bits are handled correctly. */ struct s { - short a; - short b; - unsigned short c; - unsigned short d; -} __attribu
[gcc r15-6773] [ifcombine] adjust for narrowing converts before shifts [PR118206]
https://gcc.gnu.org/g:c96a6c2c264776d8138c6b303d005e74f047cfa0 commit r15-6773-gc96a6c2c264776d8138c6b303d005e74f047cfa0 Author: Alexandre Oliva Date: Fri Jan 10 09:32:33 2025 -0300 [ifcombine] adjust for narrowing converts before shifts [PR118206] A narrowing conversion and a shift both drop bits from the loaded value, but we need to take into account which one comes first to get the right number of bits and mask. Fold when applying masks to parts, comparing the parts, and combining the results, in the odd chance either mask happens to be zero. for gcc/ChangeLog PR tree-optimization/118206 * gimple-fold.cc (decode_field_reference): Account for upper bits dropped by narrowing conversions whether before or after a right shift. (fold_truth_andor_for_ifcombine): Fold masks, compares, and combined results. for gcc/testsuite/ChangeLog PR tree-optimization/118206 * gcc.dg/field-merge-18.c: New. Diff: --- gcc/gimple-fold.cc| 39 - gcc/testsuite/gcc.dg/field-merge-18.c | 46 +++ 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index c8a726e0ae3f..d95f04213ee4 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -7547,6 +7547,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, int shiftrt = 0; tree res_ops[2]; machine_mode mode; + bool convert_before_shift = false; *load = NULL; *psignbit = false; @@ -7651,6 +7652,12 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, if (*load) loc[3] = gimple_location (*load); exp = res_ops[0]; + /* This looks backwards, but we're going back the def chain, so if we +find the conversion here, after finding a shift, that's because the +convert appears before the shift, and we should thus adjust the bit +pos and size because of the shift after adjusting it due to type +conversion. */ + convert_before_shift = true; } /* Identify the load, if there is one. */ @@ -7693,6 +7700,15 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, *pvolatilep = volatilep; /* Adjust shifts... */ + if (convert_before_shift + && outer_type && *pbitsize > TYPE_PRECISION (outer_type)) +{ + HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type); + if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) + *pbitpos += excess; + *pbitsize -= excess; +} + if (shiftrt) { if (!*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) @@ -7701,7 +7717,8 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, } /* ... and bit position. */ - if (outer_type && *pbitsize > TYPE_PRECISION (outer_type)) + if (!convert_before_shift + && outer_type && *pbitsize > TYPE_PRECISION (outer_type)) { HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type); if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) @@ -8377,6 +8394,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region, ll_align, BITS_PER_WORD, volatilep, &lnmode)) l_split_load = false; + /* ??? If ll and rl share the same load, reuse that? + See PR 118206 -> gcc.dg/field-merge-18.c */ else { /* Consider the possibility of recombining loads if any of the @@ -8757,11 +8776,11 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, /* Apply masks. */ for (int j = 0; j < 2; j++) if (mask[j] != wi::mask (0, true, mask[j].get_precision ())) - op[j] = build2_loc (locs[j][2], BIT_AND_EXPR, type, - op[j], wide_int_to_tree (type, mask[j])); + op[j] = fold_build2_loc (locs[j][2], BIT_AND_EXPR, type, + op[j], wide_int_to_tree (type, mask[j])); - cmp[i] = build2_loc (i ? rloc : lloc, wanted_code, truth_type, - op[0], op[1]); + cmp[i] = fold_build2_loc (i ? rloc : lloc, wanted_code, truth_type, + op[0], op[1]); } /* Reorder the compares if needed. */ @@ -8773,7 +8792,15 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, if (parts == 1) result = cmp[0]; else if (!separatep || !maybe_separate) -result = build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]); +{ + /* Only fold if any of the cmp is known, otherwise we may lose the +sequence point, and that may prevent further optimizations. */ + if (TREE_CODE (cmp[0]) == INTEGER_CST + || TREE_CODE (cmp[1]) == INTEGER_CST) + result = fold_build2_loc (rloc, orig_code, truth_type, cm
[gcc r15-6774] [ifcombine] reuse left-hand mask to decode right-hand xor operand
https://gcc.gnu.org/g:740c84975ceb7426da656dc7115445872a9e5b6f commit r15-6774-g740c84975ceb7426da656dc7115445872a9e5b6f Author: Alexandre Oliva Date: Fri Jan 10 09:32:38 2025 -0300 [ifcombine] reuse left-hand mask to decode right-hand xor operand If fold_truth_andor_for_ifcombine applies a mask to an xor, say because the result of the xor is compared with a power of two [minus one], we have to apply the same mask when processing both the left- and right-hand xor paths for the transformation to be sound. Arrange for decode_field_reference to propagate the incoming mask along with the expression to the right-hand operand. Don't require the right-hand xor operand to be a constant, that was a cut&pasto. for gcc/ChangeLog * gimple-fold.cc (decode_field_reference): Add xor_pand_mask. Propagate pand_mask to the right-hand xor operand. Don't require the right-hand xor operand to be a constant. (fold_truth_andor_for_ifcombine): Pass right-hand mask when appropriate. Diff: --- gcc/gimple-fold.cc | 23 +-- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index d95f04213ee4..0ad92de3a218 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -7519,8 +7519,9 @@ gimple_binop_def_p (enum tree_code code, tree t, tree op[2]) *XOR_P is to be FALSE if EXP might be a XOR used in a compare, in which case, if XOR_CMP_OP is a zero constant, it will be overridden with *PEXP, - *XOR_P will be set to TRUE, and the left-hand operand of the XOR will be - decoded. If *XOR_P is TRUE, XOR_CMP_OP is supposed to be NULL, and then the + *XOR_P will be set to TRUE, *XOR_PAND_MASK will be copied from *PAND_MASK, + and the left-hand operand of the XOR will be decoded. If *XOR_P is TRUE, + XOR_CMP_OP and XOR_PAND_MASK are supposed to be NULL, and then the right-hand operand of the XOR will be decoded. *LOAD is set to the load stmt of the innermost reference, if any, @@ -7537,7 +7538,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, HOST_WIDE_INT *pbitpos, bool *punsignedp, bool *preversep, bool *pvolatilep, wide_int *pand_mask, bool *psignbit, - bool *xor_p, tree *xor_cmp_op, + bool *xor_p, tree *xor_cmp_op, wide_int *xor_pand_mask, gimple **load, location_t loc[4]) { tree exp = *pexp; @@ -7599,15 +7600,14 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, and_mask = *pand_mask; /* Turn (a ^ b) [!]= 0 into a [!]= b. */ - if (xor_p && gimple_binop_def_p (BIT_XOR_EXPR, exp, res_ops) - && uniform_integer_cst_p (res_ops[1])) + if (xor_p && gimple_binop_def_p (BIT_XOR_EXPR, exp, res_ops)) { /* No location recorded for this one, it's entirely subsumed by the compare. */ if (*xor_p) { exp = res_ops[1]; - gcc_checking_assert (!xor_cmp_op); + gcc_checking_assert (!xor_cmp_op && !xor_pand_mask); } else if (!xor_cmp_op) /* Not much we can do when xor appears in the right-hand compare @@ -7618,6 +7618,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, *xor_p = true; exp = res_ops[0]; *xor_cmp_op = *pexp; + *xor_pand_mask = *pand_mask; } } @@ -8152,19 +8153,21 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, bool l_xor = false, r_xor = false; ll_inner = decode_field_reference (&ll_arg, &ll_bitsize, &ll_bitpos, &ll_unsignedp, &ll_reversep, &volatilep, -&ll_and_mask, &ll_signbit, &l_xor, &lr_arg, +&ll_and_mask, &ll_signbit, +&l_xor, &lr_arg, &lr_and_mask, &ll_load, ll_loc); lr_inner = decode_field_reference (&lr_arg, &lr_bitsize, &lr_bitpos, &lr_unsignedp, &lr_reversep, &volatilep, -&lr_and_mask, &lr_signbit, &l_xor, 0, +&lr_and_mask, &lr_signbit, &l_xor, 0, 0, &lr_load, lr_loc); rl_inner = decode_field_reference (&rl_arg, &rl_bitsize, &rl_bitpos, &rl_unsignedp, &rl_reversep, &volatilep, -&rl_and_mask, &rl_signbit, &r_xor, &rr_arg, +&rl_and_mask, &rl_signbit, +&r_xor, &rr_arg, &rr_and_mask, &rl_load, rl_loc); rr_inner = decode_field_reference (&rr_arg, &rr_bitsize, &rr_bitpos, &rr_unsignedp, &rr_r
[gcc r15-6776] [ifcombine] drop other misuses of uniform_integer_cst_p
https://gcc.gnu.org/g:47ac6ca9cb08d915532c59a3895497523a6edb58 commit r15-6776-g47ac6ca9cb08d915532c59a3895497523a6edb58 Author: Alexandre Oliva Date: Fri Jan 10 09:32:47 2025 -0300 [ifcombine] drop other misuses of uniform_integer_cst_p As Jakub pointed out in PR118206, the use of uniform_integer_cst_p in ifcombine makes no sense, we're not dealing with vectors. Indeed, I've been misunderstanding and misusing it since I cut&pasted it from some preexisting match predicate in earlier version of the ifcombine field-merge patch. for gcc/ChangeLog * gimple-fold.cc (decode_field_reference): Drop misuses of uniform_integer_cst_p. (fold_truth_andor_for_ifcombine): Likewise. Diff: --- gcc/gimple-fold.cc | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 20b5024d861d..a3987c4590ae 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -7577,7 +7577,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, /* Recognize and save a masking operation. Combine it with an incoming mask. */ if (pand_mask && gimple_binop_def_p (BIT_AND_EXPR, exp, res_ops) - && uniform_integer_cst_p (res_ops[1])) + && TREE_CODE (res_ops[1]) == INTEGER_CST) { loc[1] = gimple_location (SSA_NAME_DEF_STMT (exp)); exp = res_ops[0]; @@ -7632,7 +7632,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT *pbitsize, /* Take note of shifts. */ if (gimple_binop_def_p (RSHIFT_EXPR, exp, res_ops) - && uniform_integer_cst_p (res_ops[1])) + && TREE_CODE (res_ops[1]) == INTEGER_CST) { loc[2] = gimple_location (SSA_NAME_DEF_STMT (exp)); exp = res_ops[0]; @@ -8092,7 +8092,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, else if ((lcode == LT_EXPR || lcode == GE_EXPR) && INTEGRAL_TYPE_P (TREE_TYPE (ll_arg)) && TYPE_UNSIGNED (TREE_TYPE (ll_arg)) - && uniform_integer_cst_p (lr_arg) + && TREE_CODE (lr_arg) == INTEGER_CST && wi::popcount (wi::to_wide (lr_arg)) == 1) { ll_and_mask = ~(wi::to_wide (lr_arg) - 1); @@ -8104,7 +8104,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, else if ((lcode == LE_EXPR || lcode == GT_EXPR) && INTEGRAL_TYPE_P (TREE_TYPE (ll_arg)) && TYPE_UNSIGNED (TREE_TYPE (ll_arg)) - && uniform_integer_cst_p (lr_arg) + && TREE_CODE (lr_arg) == INTEGER_CST && wi::popcount (wi::to_wide (lr_arg) + 1) == 1) { ll_and_mask = ~wi::to_wide (lr_arg); @@ -8123,7 +8123,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, else if ((rcode == LT_EXPR || rcode == GE_EXPR) && INTEGRAL_TYPE_P (TREE_TYPE (rl_arg)) && TYPE_UNSIGNED (TREE_TYPE (rl_arg)) - && uniform_integer_cst_p (rr_arg) + && TREE_CODE (rr_arg) == INTEGER_CST && wi::popcount (wi::to_wide (rr_arg)) == 1) { rl_and_mask = ~(wi::to_wide (rr_arg) - 1); @@ -8133,7 +8133,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, else if ((rcode == LE_EXPR || rcode == GT_EXPR) && INTEGRAL_TYPE_P (TREE_TYPE (rl_arg)) && TYPE_UNSIGNED (TREE_TYPE (rl_arg)) - && uniform_integer_cst_p (rr_arg) + && TREE_CODE (rr_arg) == INTEGER_CST && wi::popcount (wi::to_wide (rr_arg) + 1) == 1) { rl_and_mask = ~wi::to_wide (rr_arg); @@ -8392,7 +8392,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, HOST_WIDE_INT ll_align = TYPE_ALIGN (TREE_TYPE (ll_inner)); poly_uint64 ll_end_region = 0; if (TYPE_SIZE (TREE_TYPE (ll_inner)) - && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (ll_inner + && tree_fits_poly_uint64_p (TYPE_SIZE (TREE_TYPE (ll_inner ll_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (ll_inner))); if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region, ll_align, BITS_PER_WORD, volatilep, &lnmode)) @@ -8585,7 +8585,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, HOST_WIDE_INT lr_align = TYPE_ALIGN (TREE_TYPE (lr_inner)); poly_uint64 lr_end_region = 0; if (TYPE_SIZE (TREE_TYPE (lr_inner)) - && uniform_integer_cst_p (TYPE_SIZE (TREE_TYPE (lr_inner + && tree_fits_poly_uint64_p (TYPE_SIZE (TREE_TYPE (lr_inner lr_end_region = tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (lr_inner))); if (!get_best_mode (end_bit - first_bit, first_bit, 0, lr_end_region, lr_align, BITS_PER_WORD, volatilep, &rnmode))
[gcc r15-6793] rtl-optimization/117467 - limit ext-dce memory use
https://gcc.gnu.org/g:03faac507913803de76eab04fd74e754c70aa8c4 commit r15-6793-g03faac507913803de76eab04fd74e754c70aa8c4 Author: Richard Biener Date: Fri Jan 10 12:30:29 2025 +0100 rtl-optimization/117467 - limit ext-dce memory use The following puts in a hard limit on ext-dce because it might end up requiring memory on the order of the number of basic blocks times the number of pseudo registers. The limiting follows what GCSE based passes do and thus I re-use --param max-gcse-memory here. This doesn't in any way address the implementation issues of the pass, but it reduces the memory-use when compiling the module_first_rk_step_part1.F90 TU from 521.wrf_r from 25GB to 1GB. PR rtl-optimization/117467 PR rtl-optimization/117934 * ext-dce.cc (ext_dce_execute): Do nothing if a memory allocation estimate exceeds what is allowed by --param max-gcse-memory. Diff: --- gcc/ext-dce.cc | 16 1 file changed, 16 insertions(+) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 6cf641873494..e257e3bc873a 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "df.h" #include "print-rtl.h" #include "dbgcnt.h" +#include "diagnostic-core.h" /* These should probably move into a C++ class. */ static vec livein; @@ -1110,6 +,21 @@ static bool ext_dce_rd_confluence_n (edge) { return true; } void ext_dce_execute (void) { + /* Limit the amount of memory we use for livein, with 4 bits per + reg per basic-block including overhead that maps to one byte + per reg per basic-block. */ + uint64_t memory_request += (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num (); + if (memory_request / 1024 > (uint64_t)param_max_gcse_memory) +{ + warning (OPT_Wdisabled_optimization, + "ext-dce disabled: %d basic blocks and %d registers; " + "increase %<--param max-gcse-memory%> above %wu", + n_basic_blocks_for_fn (cfun), max_reg_num (), + memory_request / 1024); + return; +} + /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful to this pass. Clear it for those cases. */ maybe_clear_subreg_promoted_p ();
[gcc r15-6794] Fix bootstrap on !HARDREG_PRE_REGNOS targets
https://gcc.gnu.org/g:553411851d9d20510979eb4daa6675d01a79aea4 commit r15-6794-g553411851d9d20510979eb4daa6675d01a79aea4 Author: Richard Biener Date: Fri Jan 10 15:40:36 2025 +0100 Fix bootstrap on !HARDREG_PRE_REGNOS targets Pushed as obvious. * gcse.cc (pass_hardreg_pre::gate): Wrap possibly unused fun argument. Diff: --- gcc/gcse.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/gcse.cc b/gcc/gcse.cc index 3f3f7fe15b0c..4ae19f28430e 100644 --- a/gcc/gcse.cc +++ b/gcc/gcse.cc @@ -4351,7 +4351,7 @@ public: }; // class pass_rtl_pre bool -pass_hardreg_pre::gate (function *fun) +pass_hardreg_pre::gate (function * ARG_UNUSED (fun)) { #ifdef HARDREG_PRE_REGNOS return optimize > 0
[gcc r12-10895] tree-optimization/116057 - wrong code with CCP and vector CTORs
https://gcc.gnu.org/g:c8b549857d968d634a74709112e5acc9f9caf35c commit r12-10895-gc8b549857d968d634a74709112e5acc9f9caf35c Author: Richard Biener Date: Wed Jul 24 13:16:35 2024 +0200 tree-optimization/116057 - wrong code with CCP and vector CTORs The following fixes an issue with CCPs likely_value when faced with a vector CTOR containing undef SSA names and constants. This should be classified as CONSTANT and not UNDEFINED. PR tree-optimization/116057 * tree-ssa-ccp.cc (likely_value): Also walk CTORs in stmt operands to look for constants. * gcc.dg/torture/pr116057.c: New testcase. (cherry picked from commit 1ea551514b9c285d801ac5ab8d78b22483ff65af) Diff: --- gcc/testsuite/gcc.dg/torture/pr116057.c | 20 gcc/tree-ssa-ccp.cc | 11 +++ 2 files changed, 31 insertions(+) diff --git a/gcc/testsuite/gcc.dg/torture/pr116057.c b/gcc/testsuite/gcc.dg/torture/pr116057.c new file mode 100644 index ..a7021c8e746e --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116057.c @@ -0,0 +1,20 @@ +/* { dg-do run } */ +/* { dg-additional-options "-Wno-psabi" } */ + +#define vect8 __attribute__((vector_size(8))) + +vect8 int __attribute__((noipa)) +f(int a) +{ + int b; + vect8 int t={1,1}; + if(a) return t; + return (vect8 int){0, b}; +} + +int main () +{ + if (f(0)[0] != 0) +__builtin_abort (); + return 0; +} diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc index 3c63f2dd8a3b..629cb3c2d82d 100644 --- a/gcc/tree-ssa-ccp.cc +++ b/gcc/tree-ssa-ccp.cc @@ -750,6 +750,17 @@ likely_value (gimple *stmt) continue; if (is_gimple_min_invariant (op)) has_constant_operand = true; + else if (TREE_CODE (op) == CONSTRUCTOR) + { + unsigned j; + tree val; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (op), j, val) + if (CONSTANT_CLASS_P (val)) + { + has_constant_operand = true; + break; + } + } } if (has_constant_operand)
[gcc r12-10894] tree-optimization/115669 - fix SLP reduction association
https://gcc.gnu.org/g:882f7e53a7664f2c76b10dc020e720ba9f55f022 commit r12-10894-g882f7e53a7664f2c76b10dc020e720ba9f55f022 Author: Richard Biener Date: Thu Jun 27 11:26:08 2024 +0200 tree-optimization/115669 - fix SLP reduction association The following avoids associating a reduction path as that might get STMT_VINFO_REDUC_IDX out-of-sync with the SLP operand order. This is a latent issue with SLP reductions but now easily exposed as we're doing single-lane SLP reductions. When we achieved SLP only we can move and update this meta-data. PR tree-optimization/115669 * tree-vect-slp.cc (vect_build_slp_tree_2): Do not reassociate chains that participate in a reduction. * gcc.dg/vect/pr115669.c: New testcase. (cherry picked from commit 7886830bb45c4f5dca0496d4deae9a45204d78f5) Diff: --- gcc/testsuite/gcc.dg/vect/pr115669.c | 22 ++ gcc/tree-vect-slp.cc | 3 +++ 2 files changed, 25 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/pr115669.c b/gcc/testsuite/gcc.dg/vect/pr115669.c new file mode 100644 index ..361a17a64e68 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115669.c @@ -0,0 +1,22 @@ +/* { dg-additional-options "-fwrapv" } */ + +#include "tree-vect.h" + +int a = 10; +unsigned b; +long long c[100]; +int foo() +{ + long long *d = c; + for (short e = 0; e < a; e++) +b += ~(d ? d[e] : 0); + return b; +} + +int main() +{ + check_vect (); + if (foo () != -10) +abort (); + return 0; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 19cab93761c9..0462fa01020d 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1825,6 +1825,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, else if (is_a (vinfo) /* ??? We don't handle !vect_internal_def defs below. */ && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + /* ??? Do not associate a reduction, this will wreck REDUC_IDX + mapping as long as that exists on the stmt_info level. */ + && STMT_VINFO_REDUC_IDX (stmt_info) == -1 && is_gimple_assign (stmt_info->stmt) && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt)) || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)
[gcc r15-6795] arm: [MVE intrinsics] Fix tuples field name (PR 118332)
https://gcc.gnu.org/g:288ac095b4df1a47a4cf9ba2efdc56a568a6e951 commit r15-6795-g288ac095b4df1a47a4cf9ba2efdc56a568a6e951 Author: Christophe Lyon Date: Wed Jan 8 18:51:27 2025 + arm: [MVE intrinsics] Fix tuples field name (PR 118332) The previous fix only worked for C, for C++ we need to add more information to the underlying type so that finish_class_member_access_expr accepts it. We use the same logic as in aarch64's register_tuple_type for AdvSIMD tuples. This patch makes gcc.target/arm/mve/intrinsics/pr118332.c pass in C++ mode. gcc/ChangeLog: PR target/118332 * config/arm/arm-mve-builtins.cc (wrap_type_in_struct): Delete. (register_type_decl): Delete. (register_builtin_tuple_types): Use lang_hooks.types.simulate_record_decl. Diff: --- gcc/config/arm/arm-mve-builtins.cc | 52 ++ 1 file changed, 8 insertions(+), 44 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index 4c52415f3f1b..42b53cc05e77 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -463,47 +463,6 @@ register_vector_type (vector_type_index type) acle_vector_types[0][type] = vectype; } -/* Return a structure type that contains a single field of type FIELD_TYPE. - The field is called 'val', as mandated by ACLE. */ -static tree -wrap_type_in_struct (tree field_type) -{ - tree field = build_decl (input_location, FIELD_DECL, - get_identifier ("val"), field_type); - tree struct_type = lang_hooks.types.make_type (RECORD_TYPE); - DECL_FIELD_CONTEXT (field) = struct_type; - TYPE_FIELDS (struct_type) = field; - layout_type (struct_type); - return struct_type; -} - -/* Register a built-in TYPE_DECL called NAME for TYPE. This is used/needed - when TYPE is a structure type. */ -static void -register_type_decl (tree type, const char *name) -{ - tree decl = build_decl (input_location, TYPE_DECL, - get_identifier (name), type); - TYPE_NAME (type) = decl; - TYPE_STUB_DECL (type) = decl; - lang_hooks.decls.pushdecl (decl); - /* ??? Undo the effect of set_underlying_type for C. The C frontend - doesn't recognize DECL as a built-in because (as intended) the decl has - a real location instead of BUILTINS_LOCATION. The frontend therefore - treats the decl like a normal C "typedef struct foo foo;", expecting - the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead - of the named one we attached above. It then sets DECL_ORIGINAL_TYPE - on the supposedly unnamed decl, creating a circularity that upsets - dwarf2out. - - We don't want to follow the normal C model and create "struct foo" - tags for tuple types since (a) the types are supposed to be opaque - and (b) they couldn't be defined as a real struct anyway. Treating - the TYPE_DECLs as "typedef struct foo foo;" without creating - "struct foo" would lead to confusing error messages. */ - DECL_ORIGINAL_TYPE (decl) = NULL_TREE; -} - /* Register tuple types of element type TYPE under their arm_mve_types.h names. */ static void @@ -538,13 +497,18 @@ register_builtin_tuple_types (vector_type_index type) && TYPE_MODE_RAW (arrtype) == TYPE_MODE (arrtype) && TYPE_ALIGN (arrtype) == 64); - tree tuple_type = wrap_type_in_struct (arrtype); + /* Build a structure type that contains a single field of type ARRTYPE. +The field is called 'val', as mandated by ACLE. */ + tree field = build_decl (input_location, FIELD_DECL, + get_identifier ("val"), arrtype); + tree tuple_type + = lang_hooks.types.simulate_record_decl (input_location, +buffer, +make_array_slice (&field, 1)); gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type)) && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type) && TYPE_ALIGN (tuple_type) == 64); - register_type_decl (tuple_type, buffer); - acle_vector_types[num_vectors >> 1][type] = tuple_type; } }
[gcc r15-6790] docs: Document new hardreg PRE pass
https://gcc.gnu.org/g:016e2f00d40d76676f38fb9d268ac550e5ec878a commit r15-6790-g016e2f00d40d76676f38fb9d268ac550e5ec878a Author: Andrew Carlotti Date: Wed Dec 18 15:59:24 2024 + docs: Document new hardreg PRE pass gcc/ChangeLog: * doc/passes.texi: Document hardreg PRE pass. Diff: --- gcc/doc/passes.texi | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index 59a143292c78..282fc1a6a12b 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -959,6 +959,12 @@ global constant and copy propagation. The source file for this pass is @file{gcse.cc}, and the LCM routines are in @file{lcm.cc}. +A third version of this pass is run on some targets to optimise assignments to +specific hard registers. This can be used in cases where a register has a +single purpose, such as specifying a mode as an extra input for specific +instructions (@pxref{mode switching optimization} for another way of handling +instruction modes). + @item Loop optimization This pass performs several loop related optimizations. @@ -1018,6 +1024,7 @@ combination approaches as well. The pass runs twice, once before register allocation and once after register allocation. The code is located in @file{late-combine.cc}. +@anchor{mode switching optimization} @item Mode switching optimization This pass looks for instructions that require the processor to be in a
[gcc r15-6789] Add new hardreg PRE pass
https://gcc.gnu.org/g:e7f98d9603808b1c17106d3d9f2000bc34f2c50c commit r15-6789-ge7f98d9603808b1c17106d3d9f2000bc34f2c50c Author: Andrew Carlotti Date: Tue Oct 15 17:31:28 2024 +0100 Add new hardreg PRE pass This pass is used to optimise assignments to the FPMR register in aarch64. I chose to implement this as a middle-end pass because it mostly reuses the existing RTL PRE code within gcse.cc. Compared to RTL PRE, the key difference in this new pass is that we insert new writes directly to the destination hardreg, instead of writing to a new pseudo-register and copying the result later. This requires changes to the analysis portion of the pass, because sets cannot be moved before existing instructions that set, use or clobber the hardreg, and the value becomes unavailable after any uses of clobbers of the hardreg. Any uses of the hardreg in debug insns will be deleted. We could do better than this, but for the aarch64 fpmr I don't think we emit useful debuginfo for deleted fp8 instructions anyway (and I don't even know if it's possible to have a debug fpmr use when entering hardreg PRE). gcc/ChangeLog: * config/aarch64/aarch64.h (HARDREG_PRE_REGNOS): New macro. * gcse.cc (doing_hardreg_pre_p): New global variable. (do_load_motion): New boolean check. (current_hardreg_regno): New global variable. (compute_local_properties): Unset transp for hardreg clobbers. (prune_hardreg_uses): New function. (want_to_gcse_p): Use different checks for hardreg PRE. (oprs_unchanged_p): Disable load motion for hardreg PRE pass. (hash_scan_set): For hardreg PRE, skip non-hardreg sets and check for hardreg clobbers. (record_last_mem_set_info): Skip for hardreg PRE. (compute_pre_data): Prune hardreg uses from transp bitmap. (pre_expr_reaches_here_p_work): Add sentence to comment. (insert_insn_start_basic_block): New functions. (pre_edge_insert): Don't add hardreg sets to predecessor block. (pre_delete): Use hardreg for the reaching reg. (reset_hardreg_debug_uses): New function. (pre_gcse): For hardreg PRE, reset debug uses and don't insert copies. (one_pre_gcse_pass): Disable load motion for hardreg PRE. (execute_hardreg_pre): New. (class pass_hardreg_pre): New. (pass_hardreg_pre::gate): New. (make_pass_hardreg_pre): New. * passes.def (pass_hardreg_pre): New pass. * tree-pass.h (make_pass_hardreg_pre): New. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/fpmr-1.c: New test. * gcc.target/aarch64/acle/fpmr-2.c: New test. * gcc.target/aarch64/acle/fpmr-3.c: New test. * gcc.target/aarch64/acle/fpmr-4.c: New test. Diff: --- gcc/config/aarch64/aarch64.h | 4 + gcc/gcse.cc| 339 ++--- gcc/passes.def | 1 + gcc/testsuite/gcc.target/aarch64/acle/fpmr-1.c | 58 + gcc/testsuite/gcc.target/aarch64/acle/fpmr-2.c | 15 ++ gcc/testsuite/gcc.target/aarch64/acle/fpmr-3.c | 18 ++ gcc/testsuite/gcc.target/aarch64/acle/fpmr-4.c | 23 ++ gcc/tree-pass.h| 1 + 8 files changed, 427 insertions(+), 32 deletions(-) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 3f3a475eb01d..1ab49e229b08 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -1652,6 +1652,10 @@ enum class aarch64_tristate_mode : int { NO, YES, MAYBE }; { int (aarch64_tristate_mode::MAYBE), \ int (aarch64_local_sme_state::ANY) } +/* Zero terminated list of regnos for which hardreg PRE should be + applied. */ +#define HARDREG_PRE_REGNOS { FPM_REGNUM, 0 } + #endif #endif /* GCC_AARCH64_H */ diff --git a/gcc/gcse.cc b/gcc/gcse.cc index 839cac3ddab3..3f3f7fe15b0c 100644 --- a/gcc/gcse.cc +++ b/gcc/gcse.cc @@ -415,6 +415,17 @@ static int gcse_create_count; /* Doing code hoisting. */ static bool doing_code_hoisting_p = false; + +/* Doing hardreg_pre. */ +static bool doing_hardreg_pre_p = false; + +inline bool +do_load_motion () +{ + return flag_gcse_lm && !doing_hardreg_pre_p; +} + +static unsigned int current_hardreg_regno; /* For available exprs */ static sbitmap *ae_kill; @@ -689,14 +700,32 @@ compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc, int indx = expr->bitmap_index; struct gcse_occr *occr; - /* The expression is transparent in this block if it is not killed. -We start by assuming all are transparent [none are killed], and -then reset the bits for those that are. */ + /* In most cases, the expre
[gcc r15-6791] s390: Add expander for uaddc/usubc optabs
https://gcc.gnu.org/g:8a2d5bc28089b2660310b964ef75fb05eb387f88 commit r15-6791-g8a2d5bc28089b2660310b964ef75fb05eb387f88 Author: Stefan Schulze Frielinghaus Date: Fri Jan 10 15:14:08 2025 +0100 s390: Add expander for uaddc/usubc optabs gcc/ChangeLog: * config/s390/s390-protos.h (s390_emit_compare): Add mode parameter for the resulting RTX. * config/s390/s390.cc (s390_emit_compare): Dito. (s390_emit_compare_and_swap): Change. (s390_expand_vec_strlen): Change. (s390_expand_cs_hqi): Change. (s390_expand_split_stack_prologue): Change. * config/s390/s390.md (*add3_carry1_cc): Renamed to ... (add3_carry1_cc): this and in order to use the corresponding gen function, encode CC mode into pattern. (*sub3_borrow_cc): Renamed to ... (sub3_borrow_cc): this and in order to use the corresponding gen function, encode CC mode into pattern. (*add3_alc_carry1_cc): Renamed to ... (add3_alc_carry1_cc): this and in order to use the corresponding gen function, encode CC mode into pattern. (sub3_slb_borrow1_cc): New. (uaddc5): New. (usubc5): New. gcc/testsuite/ChangeLog: * gcc.target/s390/uaddc-1.c: New test. * gcc.target/s390/uaddc-2.c: New test. * gcc.target/s390/uaddc-3.c: New test. * gcc.target/s390/usubc-1.c: New test. * gcc.target/s390/usubc-2.c: New test. * gcc.target/s390/usubc-3.c: New test. Diff: --- gcc/config/s390/s390-protos.h | 2 +- gcc/config/s390/s390.cc | 20 ++-- gcc/config/s390/s390.md | 115 ++- gcc/testsuite/gcc.target/s390/uaddc-1.c | 156 gcc/testsuite/gcc.target/s390/uaddc-2.c | 25 + gcc/testsuite/gcc.target/s390/uaddc-3.c | 27 ++ gcc/testsuite/gcc.target/s390/usubc-1.c | 156 gcc/testsuite/gcc.target/s390/usubc-2.c | 25 + gcc/testsuite/gcc.target/s390/usubc-3.c | 29 ++ 9 files changed, 519 insertions(+), 36 deletions(-) diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index 00848008dcf0..e8c7f8308496 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -86,7 +86,7 @@ extern int tls_symbolic_operand (rtx); extern bool s390_match_ccmode (rtx_insn *, machine_mode); extern machine_mode s390_tm_ccmode (rtx, rtx, bool); extern machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx); -extern rtx s390_emit_compare (enum rtx_code, rtx, rtx); +extern rtx s390_emit_compare (machine_mode, enum rtx_code, rtx, rtx); extern rtx_insn *s390_emit_jump (rtx, rtx); extern bool symbolic_reference_mentioned_p (rtx); extern bool tls_symbolic_reference_mentioned_p (rtx); diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 08acb69de3e8..a98e067bb06f 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -2029,9 +2029,9 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, the IF_THEN_ELSE of the conditional branch testing the result. */ rtx -s390_emit_compare (enum rtx_code code, rtx op0, rtx op1) +s390_emit_compare (machine_mode mode, enum rtx_code code, rtx op0, rtx op1) { - machine_mode mode = s390_select_ccmode (code, op0, op1); + machine_mode cc_mode = s390_select_ccmode (code, op0, op1); rtx cc; /* Force OP1 into register in order to satisfy VXE TFmode patterns. */ @@ -2043,17 +2043,17 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1) /* Do not output a redundant compare instruction if a compare_and_swap pattern already computed the result and the machine modes are compatible. */ - gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode) + gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), cc_mode) == GET_MODE (op0)); cc = op0; } else { - cc = gen_rtx_REG (mode, CC_REGNUM); - emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1))); + cc = gen_rtx_REG (cc_mode, CC_REGNUM); + emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op0, op1))); } - return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx); + return gen_rtx_fmt_ee (code, mode, cc, const0_rtx); } /* If MEM is not a legitimate compare-and-swap memory operand, return a new @@ -2103,7 +2103,7 @@ s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, default: gcc_unreachable (); } - return s390_emit_compare (code, cc, const0_rtx); + return s390_emit_compare (VOIDmode, code, cc, const0_rtx); } /* Emit a jump instruction to TARGET and return it. If COND is @@ -6647,7 +6647,7 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx alignment) Now we have to check whether the resulting index lies
[gcc r15-6780] aarch64: Add new +fcma flag
https://gcc.gnu.org/g:9bbb91e8e0a3a26fe2ff651a89011ca5a0b4794d commit r15-6780-g9bbb91e8e0a3a26fe2ff651a89011ca5a0b4794d Author: Andrew Carlotti Date: Thu Aug 1 11:54:20 2024 +0100 aarch64: Add new +fcma flag This includes +fcma as a dependency of +sve, and means that we can finally support fcma intrinsics on a64fx. Also add fcma to the Features list in several cpunative testcases that incorrectly included sve without fcma. gcc/ChangeLog: * config/aarch64/aarch64-arches.def (V8_3A): Add FCMA. * config/aarch64/aarch64-option-extensions.def (FCMA): New flag. (SVE): Add FCMA dependency. * config/aarch64/aarch64.h (TARGET_COMPLEX): Use new flag. * config/aarch64/arm_neon.h: Use new flag for fcma intrinsics. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cpunative/info_15: Add fcma to Features. * gcc.target/aarch64/cpunative/info_16: Ditto. * gcc.target/aarch64/cpunative/info_17: Ditto. * gcc.target/aarch64/cpunative/info_8: Ditto. * gcc.target/aarch64/cpunative/info_9: Ditto. Diff: --- gcc/config/aarch64/aarch64-arches.def | 2 +- gcc/config/aarch64/aarch64-option-extensions.def | 4 +++- gcc/config/aarch64/aarch64.h | 2 +- gcc/config/aarch64/arm_neon.h | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/info_15 | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/info_16 | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/info_17 | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/info_8 | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/info_9 | 2 +- 9 files changed, 11 insertions(+), 9 deletions(-) diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index 62a7d9fd2442..d85b14be4c3f 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -33,7 +33,7 @@ AARCH64_ARCH("armv8-a", generic_armv8_a, V8A, 8, (SIMD)) AARCH64_ARCH("armv8.1-a", generic_armv8_a, V8_1A, 8, (V8A, LSE, CRC, RDMA)) AARCH64_ARCH("armv8.2-a", generic_armv8_a, V8_2A, 8, (V8_1A)) -AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, RCPC)) +AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, RCPC, FCMA)) AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 6a70a63afd0a..c41c4998c5cb 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -151,6 +151,8 @@ AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), "asimdfhm") AARCH64_FMV_FEATURE("fp16fml", FP16FML, (F16FML)) +AARCH64_OPT_FMV_EXTENSION("fcma", FCMA, (SIMD), (), (), "fcma") + AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc") AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC), (), (), "lrcpc3") @@ -163,7 +165,7 @@ AARCH64_OPT_FMV_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16") AARCH64_FMV_FEATURE("rpres", RPRES, ()) -AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16), (), (), "sve") +AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16, FCMA), (), (), "sve") /* This specifically does not imply +sve. */ AARCH64_OPT_EXTENSION("sve-b16b16", SVE_B16B16, (), (), (), "sveb16b16") diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 75ea2a6910ee..250edb7d426d 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -364,7 +364,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED #define TARGET_JSCVT (TARGET_FLOAT && TARGET_ARMV8_3) /* Armv8.3-a Complex number extension to AdvSIMD extensions. */ -#define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3) +#define TARGET_COMPLEX AARCH64_HAVE_ISA (FCMA) /* Floating-point rounding instructions from Armv8.5-a. */ #define TARGET_FRINT (AARCH64_HAVE_ISA (V8_5A) && TARGET_FLOAT) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 20849b0b8a6d..da145adf6749 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26975,7 +26975,7 @@ vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c) /* AdvSIMD Complex numbers intrinsics. */ #pragma GCC push_options -#pragma GCC target ("arch=armv8.3-a") +#pragma GCC target ("+nothing+fcma") #pragma GCC push_options #pragma GCC target ("+fp16") diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 index 6b425ea20135..1a31a75d6b48 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpu
[gcc r15-6784] aarch64: Add new +rcpc2 flag
https://gcc.gnu.org/g:5747c121e9caa66a8173ad01db78769be08c407e commit r15-6784-g5747c121e9caa66a8173ad01db78769be08c407e Author: Andrew Carlotti Date: Tue Jul 30 18:48:48 2024 +0100 aarch64: Add new +rcpc2 flag gcc/ChangeLog: * config/aarch64/aarch64-arches.def (V8_4A): Add RCPC2. * config/aarch64/aarch64-option-extensions.def (RCPC2): New flag. (RCPC3): Add RCPC2 dependency. * config/aarch64/aarch64.h (TARGET_RCPC2): Use new flag. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cpunative/native_cpu_21.c: Add rcpc2 to expected feature string instead of rcpc. * gcc.target/aarch64/cpunative/native_cpu_22.c: Ditto. Diff: --- gcc/config/aarch64/aarch64-arches.def | 2 +- gcc/config/aarch64/aarch64-option-extensions.def | 4 +++- gcc/config/aarch64/aarch64.h | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index 8c2aa4e477fc..bcd08e21fde5 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -34,7 +34,7 @@ AARCH64_ARCH("armv8-a", generic_armv8_a, V8A, 8, (SIMD)) AARCH64_ARCH("armv8.1-a", generic_armv8_a, V8_1A, 8, (V8A, LSE, CRC, RDMA)) AARCH64_ARCH("armv8.2-a", generic_armv8_a, V8_2A, 8, (V8_1A)) AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, RCPC, FCMA, JSCVT)) -AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) +AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM, RCPC2)) AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES, FRINTTS, FLAGM2)) AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A)) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 00533c38839b..3558a6ed5ad7 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -159,7 +159,9 @@ AARCH64_OPT_FMV_EXTENSION("fcma", FCMA, (SIMD), (), (), "fcma") AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc") -AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC), (), (), "lrcpc3") +AARCH64_OPT_FMV_EXTENSION("rcpc2", RCPC2, (RCPC), (), (), "ilrcpc") + +AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC2), (), (), "lrcpc3") AARCH64_OPT_FMV_EXTENSION("frintts", FRINTTS, (FP), (), (), "frint") diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 1c8fcd5c582f..3f3a475eb01d 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -427,7 +427,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* The RCPC2 extensions from Armv8.4-a that allow immediate offsets to LDAPR and sign-extending versions.*/ -#define TARGET_RCPC2 ((AARCH64_HAVE_ISA (V8_4A) && TARGET_RCPC) || TARGET_RCPC3) +#define TARGET_RCPC2 AARCH64_HAVE_ISA (RCPC2) /* RCPC3 (Release Consistency) extensions, optional from Armv8.2-a. */ #define TARGET_RCPC3 AARCH64_HAVE_ISA (RCPC3) diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c index c1d5896e1eb0..904cdf452263 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc2\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n} } } */ /* Check that an Armv8-A core doesn't fall apart on extensions without midr values. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c index 4533a2bf5912..feb959b11b0e 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\+pauth\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+js
[gcc r15-6786] aarch64: Add new +xs flag
https://gcc.gnu.org/g:f06c6f8bf33e0b13d410b2305c58803a79754009 commit r15-6786-gf06c6f8bf33e0b13d410b2305c58803a79754009 Author: Andrew Carlotti Date: Tue Jul 30 19:01:27 2024 +0100 aarch64: Add new +xs flag GCC does not emit tlbi instructions, so this only affects the flags passed through to the assembler. gcc/ChangeLog: * config/aarch64/aarch64-arches.def (V8_7A): Add XS. * config/aarch64/aarch64-option-extensions.def (XS): New flag. Diff: --- gcc/config/aarch64/aarch64-arches.def| 2 +- gcc/config/aarch64/aarch64-option-extensions.def | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index ff873a372431..fd4881a8ebfb 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -37,7 +37,7 @@ AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, R AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM, RCPC2)) AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES, FRINTTS, FLAGM2)) AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) -AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A, WFXT)) +AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A, WFXT, XS)) AARCH64_ARCH("armv8.8-a", generic_armv8_a, V8_8A, 8, (V8_7A, MOPS)) AARCH64_ARCH("armv8.9-a", generic_armv8_a, V8_9A, 8, (V8_8A, CSSC)) AARCH64_ARCH("armv8-r", generic_armv8_a, V8R , 8, (V8_4A)) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index d0d01f91c0fc..a1133accfce5 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -227,6 +227,8 @@ AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "") AARCH64_OPT_FMV_EXTENSION("wfxt", WFXT, (), (), (), "wfxt") +AARCH64_OPT_EXTENSION("xs", XS, (), (), (), "") + AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "smef64f64") AARCH64_FMV_FEATURE("sme-f64f64", SME_F64, (SME_F64F64))
[gcc r15-6782] aarch64: Add new +frintts flag
https://gcc.gnu.org/g:32a45a216e63a205eed62f26c20ba919a77b025b commit r15-6782-g32a45a216e63a205eed62f26c20ba919a77b025b Author: Andrew Carlotti Date: Tue Jul 30 18:36:22 2024 +0100 aarch64: Add new +frintts flag gcc/ChangeLog: * config/aarch64/aarch64-arches.def (V8_5A): Add FRINTTS * config/aarch64/aarch64-option-extensions.def (FRINTTS): New flag. * config/aarch64/aarch64.h (TARGET_FRINT): Use new flag. * config/aarch64/arm_acle.h: Use new flag for frintts intrinsics. * config/aarch64/arm_neon.h: Ditto. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cpunative/native_cpu_21.c: Add frintts to expected feature string. * gcc.target/aarch64/cpunative/native_cpu_22.c: Ditto. Diff: --- gcc/config/aarch64/aarch64-arches.def | 2 +- gcc/config/aarch64/aarch64-option-extensions.def | 2 ++ gcc/config/aarch64/aarch64.h | 2 +- gcc/config/aarch64/arm_acle.h | 2 +- gcc/config/aarch64/arm_neon.h | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c | 2 +- 7 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index a21e5de496e9..e0f6cc21d198 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -35,7 +35,7 @@ AARCH64_ARCH("armv8.1-a", generic_armv8_a, V8_1A, 8, (V8A, LSE, CRC, AARCH64_ARCH("armv8.2-a", generic_armv8_a, V8_2A, 8, (V8_1A)) AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, RCPC, FCMA, JSCVT)) AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) -AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) +AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES, FRINTTS)) AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A)) AARCH64_ARCH("armv8.8-a", generic_armv8_a, V8_8A, 8, (V8_7A, MOPS)) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 96518ba6..9921e51c85f9 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -159,6 +159,8 @@ AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc") AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC), (), (), "lrcpc3") +AARCH64_OPT_FMV_EXTENSION("frintts", FRINTTS, (FP), (), (), "frint") + AARCH64_OPT_FMV_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm") /* An explicit +bf16 implies +simd, but +bf16+nosimd still enables scalar BF16 diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index f652869625a8..1c8fcd5c582f 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -367,7 +367,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED #define TARGET_COMPLEX AARCH64_HAVE_ISA (FCMA) /* Floating-point rounding instructions from Armv8.5-a. */ -#define TARGET_FRINT (AARCH64_HAVE_ISA (V8_5A) && TARGET_FLOAT) +#define TARGET_FRINT AARCH64_HAVE_ISA (FRINTTS) /* TME instructions are enabled. */ #define TARGET_TME AARCH64_HAVE_ISA (TME) diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h index 985a18fba678..7976c117daf7 100644 --- a/gcc/config/aarch64/arm_acle.h +++ b/gcc/config/aarch64/arm_acle.h @@ -130,7 +130,7 @@ __jcvt (double __a) #pragma GCC pop_options #pragma GCC push_options -#pragma GCC target ("arch=armv8.5-a") +#pragma GCC target ("+nothing+frintts") __extension__ extern __inline float __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint32zf (float __a) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index da145adf6749..33594cb65d28 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -27638,7 +27638,7 @@ vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, #pragma GCC pop_options #pragma GCC push_options -#pragma GCC target ("arch=armv8.5-a") +#pragma GCC target ("+nothing+simd+frintts") __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c index 603ee48d584b..aa70d1d22b82 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arc
[gcc r15-6785] aarch64: Add new +wfxt flag
https://gcc.gnu.org/g:4984119b75e0fb8b653dd46b5d196017c90fd6a5 commit r15-6785-g4984119b75e0fb8b653dd46b5d196017c90fd6a5 Author: Andrew Carlotti Date: Tue Jul 30 18:56:01 2024 +0100 aarch64: Add new +wfxt flag GCC does not currently emit the wfet or wfit instructions, so this primarily affects the flags passed through to the assembler. gcc/ChangeLog: * config/aarch64/aarch64-arches.def (V8_7A): Add WFXT. * config/aarch64/aarch64-option-extensions.def (WFXT): New flag. Diff: --- gcc/config/aarch64/aarch64-arches.def| 2 +- gcc/config/aarch64/aarch64-option-extensions.def | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index bcd08e21fde5..ff873a372431 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -37,7 +37,7 @@ AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, R AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM, RCPC2)) AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES, FRINTTS, FLAGM2)) AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) -AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A)) +AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A, WFXT)) AARCH64_ARCH("armv8.8-a", generic_armv8_a, V8_8A, 8, (V8_7A, MOPS)) AARCH64_ARCH("armv8.9-a", generic_armv8_a, V8_9A, 8, (V8_8A, CSSC)) AARCH64_ARCH("armv8-r", generic_armv8_a, V8R , 8, (V8_4A)) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 3558a6ed5ad7..d0d01f91c0fc 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -225,6 +225,8 @@ AARCH64_OPT_EXTENSION("pauth", PAUTH, (), (), (), "paca pacg") AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "") +AARCH64_OPT_FMV_EXTENSION("wfxt", WFXT, (), (), (), "wfxt") + AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "smef64f64") AARCH64_FMV_FEATURE("sme-f64f64", SME_F64, (SME_F64F64))
[gcc r15-6787] docs: Add new AArch64 flags
https://gcc.gnu.org/g:abbe2905eddcedf2c6247e6bb76136e850458d74 commit r15-6787-gabbe2905eddcedf2c6247e6bb76136e850458d74 Author: Andrew Carlotti Date: Tue Nov 5 17:24:12 2024 + docs: Add new AArch64 flags gcc/ChangeLog: * doc/invoke.texi: Add new AArch64 flags. Diff: --- gcc/doc/invoke.texi | 22 ++ 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 17fe2c64c1f8..dd0d2b41a1a9 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21653,11 +21653,11 @@ and the features that they enable by default: @item @samp{armv8-a} @tab Armv8-A @tab @samp{+fp}, @samp{+simd} @item @samp{armv8.1-a} @tab Armv8.1-A @tab @samp{armv8-a}, @samp{+crc}, @samp{+lse}, @samp{+rdma} @item @samp{armv8.2-a} @tab Armv8.2-A @tab @samp{armv8.1-a} -@item @samp{armv8.3-a} @tab Armv8.3-A @tab @samp{armv8.2-a}, @samp{+pauth} -@item @samp{armv8.4-a} @tab Armv8.4-A @tab @samp{armv8.3-a}, @samp{+flagm}, @samp{+fp16fml}, @samp{+dotprod} -@item @samp{armv8.5-a} @tab Armv8.5-A @tab @samp{armv8.4-a}, @samp{+sb}, @samp{+ssbs}, @samp{+predres} +@item @samp{armv8.3-a} @tab Armv8.3-A @tab @samp{armv8.2-a}, @samp{+pauth}, @samp{+fcma}, @samp{+jscvt} +@item @samp{armv8.4-a} @tab Armv8.4-A @tab @samp{armv8.3-a}, @samp{+flagm}, @samp{+fp16fml}, @samp{+dotprod}, @samp{+rcpc2} +@item @samp{armv8.5-a} @tab Armv8.5-A @tab @samp{armv8.4-a}, @samp{+sb}, @samp{+ssbs}, @samp{+predres}, @samp{+frintts}, @samp{+flagm2} @item @samp{armv8.6-a} @tab Armv8.6-A @tab @samp{armv8.5-a}, @samp{+bf16}, @samp{+i8mm} -@item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a} +@item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+wfxt}, @samp{+xs} @item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops} @item @samp{armv8.9-a} @tab Armv8.9-a @tab @samp{armv8.8-a} @item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, @samp{+sve2} @@ -21997,6 +21997,8 @@ Enable the instructions to accelerate memory operations like @code{memcpy}, @option{-march=armv8.8-a} @item flagm Enable the Flag Manipulation instructions Extension. +@item flagm2 +Enable the FlagM2 flag conversion instructions. @item pauth Enable the Pointer Authentication Extension. @item cssc @@ -22020,6 +22022,16 @@ instructions. @item sme2p1 Enable the Scalable Matrix Extension version 2.1. This also enables SME2 instructions. +@item fcma +Enable the complex number SIMD extensions. +@item jscvt +Enable the @code{fjcvtzs} JavaScript conversion instruction. +@item frintts +Enable floating-point round to integral value instructions. +@item wfxt +Enable @code{wfet} and @code{wfit} instructions. +@item xs +Enable the XS memory attribute extension. @item lse128 Enable the LSE128 128-bit atomic instructions extension. This also enables LSE instructions. @@ -22030,6 +22042,8 @@ This also enables the LSE128 extension. Enable support for Armv9.4-a Guarded Control Stack extension. @item the Enable support for Armv8.9-a/9.4-a translation hardening extension. +@item rcpc2 +Enable the RCpc2 extension. @item rcpc3 Enable the RCpc3 (Release Consistency) extension. @item fp8
[gcc r15-6783] aarch64: Add new +flagm2 flag
https://gcc.gnu.org/g:f5915726fd14cbf76a170338d6a91b11817e808a commit r15-6783-gf5915726fd14cbf76a170338d6a91b11817e808a Author: Andrew Carlotti Date: Tue Jul 30 18:43:51 2024 +0100 aarch64: Add new +flagm2 flag GCC does not currently emit the axflag or xaflag instructions, so this primarily affects the flags passed through to the assembler. gcc/ChangeLog: * config/aarch64/aarch64-arches.def (V8_5A): Add FLAGM2. * config/aarch64/aarch64-option-extensions.def (FLAGM2): New flag. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cpunative/native_cpu_21.c: Add flagm2 to expected feature string instead of flagm. * gcc.target/aarch64/cpunative/native_cpu_22.c: Ditto. Diff: --- gcc/config/aarch64/aarch64-arches.def | 2 +- gcc/config/aarch64/aarch64-option-extensions.def | 2 ++ gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index e0f6cc21d198..8c2aa4e477fc 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -35,7 +35,7 @@ AARCH64_ARCH("armv8.1-a", generic_armv8_a, V8_1A, 8, (V8A, LSE, CRC, AARCH64_ARCH("armv8.2-a", generic_armv8_a, V8_2A, 8, (V8_1A)) AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, RCPC, FCMA, JSCVT)) AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) -AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES, FRINTTS)) +AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES, FRINTTS, FLAGM2)) AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A)) AARCH64_ARCH("armv8.8-a", generic_armv8_a, V8_8A, 8, (V8_7A, MOPS)) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 9921e51c85f9..00533c38839b 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -103,6 +103,8 @@ AARCH64_OPT_FMV_EXTENSION("rng", RNG, (), (), (), "rng") AARCH64_OPT_FMV_EXTENSION("flagm", FLAGM, (), (), (), "flagm") +AARCH64_OPT_FMV_EXTENSION("flagm2", FLAGM2, (FLAGM), (), (), "flagm2") + AARCH64_OPT_FMV_EXTENSION("lse", LSE, (), (), (), "atomics") AARCH64_OPT_FMV_EXTENSION("fp", FP, (), (), (), "fp") diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c index aa70d1d22b82..c1d5896e1eb0 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+flagm\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n} } } */ /* Check that an Armv8-A core doesn't fall apart on extensions without midr values. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c index ccd5d0d9bb7d..4533a2bf5912 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+flagm\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\+pauth\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+flagm2\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+frintts\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\+pauth\n} } } */ /* Check that an Armv8-A core doesn't fall apart on extensions without midr values and that it enables optional features. */
[gcc r15-6779] aarch64: Use PAUTH instead of V8_3A in some places
https://gcc.gnu.org/g:20385cb92cbd4a1934661ab97a162c1e25935836 commit r15-6779-g20385cb92cbd4a1934661ab97a162c1e25935836 Author: Andrew Carlotti Date: Tue Jul 30 16:26:04 2024 +0100 aarch64: Use PAUTH instead of V8_3A in some places gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_expand_epilogue): Use TARGET_PAUTH. * config/aarch64/aarch64.md: Update comment. Diff: --- gcc/config/aarch64/aarch64.cc | 6 +++--- gcc/config/aarch64/aarch64.md | 8 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 6fe0fa2722bd..ad31e9d255c0 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -10283,12 +10283,12 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) 1) Sibcalls don't return in a normal way, so if we're about to call one we must authenticate. - 2) The RETAA instruction is not available before ARMv8.3-A, so if we are - generating code for !TARGET_ARMV8_3 we can't use it and must + 2) The RETAA instruction is not available without FEAT_PAuth, so if we + are generating code for !TARGET_PAUTH we can't use it and must explicitly authenticate. */ if (aarch64_return_address_signing_enabled () - && (sibcall || !TARGET_ARMV8_3)) + && (sibcall || !TARGET_PAUTH)) { switch (aarch64_ra_sign_key) { diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 0ed3c93b379e..44f5b7a54d25 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -7846,11 +7846,11 @@ [(set_attr "type" "f_cvtf2i")] ) -;; Pointer authentication patterns are always provided. In architecture -;; revisions prior to ARMv8.3-A these HINT instructions operate as NOPs. +;; Pointer authentication patterns are always provided. On targets that +;; don't implement FEAT_PAuth these HINT instructions operate as NOPs. ;; This lets the user write portable software which authenticates pointers -;; when run on something which implements ARMv8.3-A, and which runs -;; correctly, but does not authenticate pointers, where ARMv8.3-A is not +;; when run on something which implements FEAT_PAuth, and which runs +;; correctly, but does not authenticate pointers, where FEAT_PAuth is not ;; implemented. ;; Signing/Authenticating R30 using SP as the salt.
[gcc r15-6788] Disable a broken multiversioning optimisation
https://gcc.gnu.org/g:21212f08d8258fa6d4cfdd21a35d0ee7c44ccbea commit r15-6788-g21212f08d8258fa6d4cfdd21a35d0ee7c44ccbea Author: Andrew Carlotti Date: Tue Jan 7 18:32:23 2025 + Disable a broken multiversioning optimisation This patch skips redirect_to_specific clone for aarch64 and riscv, because the optimisation has two flaws: 1. It checks the value of the "target" attribute, even on targets that don't use this attribute for multiversioning. 2. The algorithm used is too aggressive, and will eliminate the indirection in some cases where the runtime choice of callee version can't be determined statically at compile time. A correct would need to verify that: - if the current caller version were selected at runtime, then the chosen callee version would be eligible for selection. - if any higher priority callee version were selected at runtime, then a higher priority caller version would have been eligble for selection (and hence the current caller version wouldn't have been selected). The current checks only verify a more restrictive version of the first condition, and don't check the second condition at all. Fixing the optimisation properly would require implementing target hooks to check for implications between version attributes, which is too complicated for this stage. However, I would like to see this hook implemented in the future, since it could also help deduplicate other multiversioning code. Since this behaviour has existed for x86 and powerpc for a while, I think it's best to preserve the existing behaviour on those targets, unless any maintainer for those targets disagrees. gcc/ChangeLog: * multiple_target.cc (redirect_to_specific_clone): Assert that "target" attribute is used for FMV before checking it. (ipa_target_clone): Skip redirect_to_specific_clone on some targets. gcc/testsuite/ChangeLog: * g++.target/aarch64/mv-pragma.C: New test. Diff: --- gcc/multiple_target.cc | 15 +++--- gcc/testsuite/g++.target/aarch64/mv-pragma.C | 31 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc index 552b9626aa71..d8becf4d9a96 100644 --- a/gcc/multiple_target.cc +++ b/gcc/multiple_target.cc @@ -442,7 +442,14 @@ expand_target_clones (struct cgraph_node *node, bool definition) /* When NODE is a target clone, consider all callees and redirect to a clone with equal target attributes. That prevents multiple - multi-versioning dispatches and a call-chain can be optimized. */ + multi-versioning dispatches and a call-chain can be optimized. + + This optimisation might pick the wrong version in some cases, since knowing + that we meet the target requirements for a matching callee version does not + tell us that we won't also meet the target requirements for a higher + priority callee version at runtime. Since this is longstanding behaviour + for x86 and powerpc, we preserve it for those targets, but skip the optimisation + for targets that use the "target_version" attribute for multi-versioning. */ static void redirect_to_specific_clone (cgraph_node *node) @@ -451,6 +458,7 @@ redirect_to_specific_clone (cgraph_node *node) if (fv == NULL) return; + gcc_assert (TARGET_HAS_FMV_TARGET_ATTRIBUTE); tree attr_target = lookup_attribute ("target", DECL_ATTRIBUTES (node->decl)); if (attr_target == NULL_TREE) return; @@ -503,8 +511,9 @@ ipa_target_clone (void) for (unsigned i = 0; i < to_dispatch.length (); i++) create_dispatcher_calls (to_dispatch[i]); - FOR_EACH_FUNCTION (node) -redirect_to_specific_clone (node); + if (TARGET_HAS_FMV_TARGET_ATTRIBUTE) +FOR_EACH_FUNCTION (node) + redirect_to_specific_clone (node); return 0; } diff --git a/gcc/testsuite/g++.target/aarch64/mv-pragma.C b/gcc/testsuite/g++.target/aarch64/mv-pragma.C new file mode 100644 index ..545d0735438d --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/mv-pragma.C @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O0" } */ + +#pragma GCC target ("+sve") + +__attribute__((target_version("default"))) +int foo () +{ + return 1; +} + +__attribute__((target_version("sve2"))) +int foo () +{ + return 2; +} + +__attribute__((target_version("default"))) +int bar () +{ + return foo(); +} + +__attribute__((target_version("sha3"))) +int bar () +{ + return foo() + 5; +} + +/* { dg-final { scan-assembler-times "\n\tbl\t_Z3foov\n" 2 } } */
[gcc r15-6781] aarch64: Add new +jscvt flag
https://gcc.gnu.org/g:2c8913576fa0bf234bde3c9c1f137a1b9cca95f1 commit r15-6781-g2c8913576fa0bf234bde3c9c1f137a1b9cca95f1 Author: Andrew Carlotti Date: Thu Aug 1 11:54:41 2024 +0100 aarch64: Add new +jscvt flag gcc/ChangeLog: * config/aarch64/aarch64-arches.def (V8_3A): Add JSCVT. * config/aarch64/aarch64-option-extensions.def (JSCVT): New flag. * config/aarch64/aarch64.h (TARGET_JSCVT): Use new flag. * config/aarch64/arm_acle.h: Use new flag for jscvt intrinsics. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cpunative/native_cpu_21.c: Add jscvt to expected feature string. * gcc.target/aarch64/cpunative/native_cpu_22.c: Ditto. Diff: --- gcc/config/aarch64/aarch64-arches.def | 2 +- gcc/config/aarch64/aarch64-option-extensions.def | 2 ++ gcc/config/aarch64/aarch64.h | 2 +- gcc/config/aarch64/arm_acle.h | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index d85b14be4c3f..a21e5de496e9 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -33,7 +33,7 @@ AARCH64_ARCH("armv8-a", generic_armv8_a, V8A, 8, (SIMD)) AARCH64_ARCH("armv8.1-a", generic_armv8_a, V8_1A, 8, (V8A, LSE, CRC, RDMA)) AARCH64_ARCH("armv8.2-a", generic_armv8_a, V8_2A, 8, (V8_1A)) -AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, RCPC, FCMA)) +AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, RCPC, FCMA, JSCVT)) AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index c41c4998c5cb..96518ba6 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -151,6 +151,8 @@ AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), "asimdfhm") AARCH64_FMV_FEATURE("fp16fml", FP16FML, (F16FML)) +AARCH64_OPT_FMV_EXTENSION("jscvt", JSCVT, (FP), (), (), "jscvt") + AARCH64_OPT_FMV_EXTENSION("fcma", FCMA, (SIMD), (), (), "fcma") AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc") diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 250edb7d426d..f652869625a8 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -361,7 +361,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED #define TARGET_ARMV8_3 AARCH64_HAVE_ISA (V8_3A) /* Javascript conversion instruction from Armv8.3-a. */ -#define TARGET_JSCVT (TARGET_FLOAT && TARGET_ARMV8_3) +#define TARGET_JSCVT AARCH64_HAVE_ISA (JSCVT) /* Armv8.3-a Complex number extension to AdvSIMD extensions. */ #define TARGET_COMPLEX AARCH64_HAVE_ISA (FCMA) diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h index 5c3478d4ee82..985a18fba678 100644 --- a/gcc/config/aarch64/arm_acle.h +++ b/gcc/config/aarch64/arm_acle.h @@ -119,7 +119,7 @@ __revl (unsigned long __value) } #pragma GCC push_options -#pragma GCC target ("arch=armv8.3-a") +#pragma GCC target ("+nothing+jscvt") __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __jcvt (double __a) diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c index 1d90e9ec9d97..603ee48d584b 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_21.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+flagm\+lse\+dotprod\+rdma\+crc\+fp16fml\+rcpc\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+flagm\+lse\+dotprod\+rdma\+crc\+fp16fml\+jscvt\+rcpc\+i8mm\+bf16\+sve2-aes\+sve2-bitperm\+sve2-sha3\+sve2-sm4\+sb\+ssbs\n} } } */ /* Check that an Armv8-A core doesn't fall apart on extensions without midr values. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c index 17050a0b72c9..e0ba97fb6e9a 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_22.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg
[gcc r14-11197] Disable a broken multiversioning optimisation
https://gcc.gnu.org/g:d0191d1631647436c4707ca6b915a46adf591d87 commit r14-11197-gd0191d1631647436c4707ca6b915a46adf591d87 Author: Andrew Carlotti Date: Tue Jan 7 18:32:23 2025 + Disable a broken multiversioning optimisation This patch skips redirect_to_specific clone for aarch64 and riscv, because the optimisation has two flaws: 1. It checks the value of the "target" attribute, even on targets that don't use this attribute for multiversioning. 2. The algorithm used is too aggressive, and will eliminate the indirection in some cases where the runtime choice of callee version can't be determined statically at compile time. A correct would need to verify that: - if the current caller version were selected at runtime, then the chosen callee version would be eligible for selection. - if any higher priority callee version were selected at runtime, then a higher priority caller version would have been eligble for selection (and hence the current caller version wouldn't have been selected). The current checks only verify a more restrictive version of the first condition, and don't check the second condition at all. Fixing the optimisation properly would require implementing target hooks to check for implications between version attributes, which is too complicated for this stage. However, I would like to see this hook implemented in the future, since it could also help deduplicate other multiversioning code. Since this behavior has existed for x86 and powerpc for a while, I think it's best to preserve the existing behavior on those targets, unless any maintainer for those targets disagrees. gcc/ChangeLog: * multiple_target.cc (redirect_to_specific_clone): Assert that "target" attribute is used for FMV before checking it. (ipa_target_clone): Skip redirect_to_specific_clone on some targets. gcc/testsuite/ChangeLog: * g++.target/aarch64/mv-pragma.C: New test. Diff: --- gcc/multiple_target.cc | 16 +++--- gcc/testsuite/g++.target/aarch64/mv-pragma.C | 31 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc index 1fdd279da04a..60958a56c92d 100644 --- a/gcc/multiple_target.cc +++ b/gcc/multiple_target.cc @@ -437,7 +437,15 @@ expand_target_clones (struct cgraph_node *node, bool definition) /* When NODE is a target clone, consider all callees and redirect to a clone with equal target attributes. That prevents multiple - multi-versioning dispatches and a call-chain can be optimized. */ + multi-versioning dispatches and a call-chain can be optimized. + + This optimisation might pick the wrong version in some cases, since knowing + that we meet the target requirements for a matching callee version does not + tell us that we won't also meet the target requirements for a higher + priority callee version at runtime. Since this is longstanding behavior + for x86 and powerpc, we preserve it for those targets, but skip the + optimisation for targets that use the "target_version" attribute for + multi-versioning. */ static void redirect_to_specific_clone (cgraph_node *node) @@ -446,6 +454,7 @@ redirect_to_specific_clone (cgraph_node *node) if (fv == NULL) return; + gcc_assert (TARGET_HAS_FMV_TARGET_ATTRIBUTE); tree attr_target = lookup_attribute ("target", DECL_ATTRIBUTES (node->decl)); if (attr_target == NULL_TREE) return; @@ -498,8 +507,9 @@ ipa_target_clone (void) for (unsigned i = 0; i < to_dispatch.length (); i++) create_dispatcher_calls (to_dispatch[i]); - FOR_EACH_FUNCTION (node) -redirect_to_specific_clone (node); + if (TARGET_HAS_FMV_TARGET_ATTRIBUTE) +FOR_EACH_FUNCTION (node) + redirect_to_specific_clone (node); return 0; } diff --git a/gcc/testsuite/g++.target/aarch64/mv-pragma.C b/gcc/testsuite/g++.target/aarch64/mv-pragma.C new file mode 100644 index ..545d0735438d --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/mv-pragma.C @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O0" } */ + +#pragma GCC target ("+sve") + +__attribute__((target_version("default"))) +int foo () +{ + return 1; +} + +__attribute__((target_version("sve2"))) +int foo () +{ + return 2; +} + +__attribute__((target_version("default"))) +int bar () +{ + return foo(); +} + +__attribute__((target_version("sha3"))) +int bar () +{ + return foo() + 5; +} + +/* { dg-final { scan-assembler-times "\n\tbl\t_Z3foov\n" 2 } } */
[gcc r15-6792] c++: ICE with pack indexing and partial inst [PR117937]
https://gcc.gnu.org/g:d64447946b0c8964dfd9731c3792af0fe4158cda commit r15-6792-gd64447946b0c8964dfd9731c3792af0fe4158cda Author: Marek Polacek Date: Thu Dec 19 17:26:27 2024 -0500 c++: ICE with pack indexing and partial inst [PR117937] Here we ICE in expand_expr_real_1: if (exp) { tree context = decl_function_context (exp); gcc_assert (SCOPE_FILE_SCOPE_P (context) || context == current_function_decl on something like this test: void f (auto... args) { [&](seq) { g(args...[i]...); }(seq<0>()); } because while current_function_decl is: f(int)::)> [with long unsigned int ...i = {0}] (correct), context is: f(int)::)> which is only the partial instantiation. I think that when tsubst_pack_index gets a partial instantiation, e.g. {*args#0} as the pack, we should still tsubst it. The args#0's value-expr can be __closure->__args#0 where the closure's context is the partially instantiated operator(). So we should let retrieve_local_specialization find the right args#0. PR c++/117937 gcc/cp/ChangeLog: * pt.cc (tsubst_pack_index): tsubst the pack even when it's not PACK_EXPANSION_P. gcc/testsuite/ChangeLog: * g++.dg/cpp26/pack-indexing13.C: New test. * g++.dg/cpp26/pack-indexing14.C: New test. Diff: --- gcc/cp/pt.cc | 8 gcc/testsuite/g++.dg/cpp26/pack-indexing13.C | 23 +++ gcc/testsuite/g++.dg/cpp26/pack-indexing14.C | 18 ++ 3 files changed, 49 insertions(+) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 67964d41ab8b..ff0a3a4f7d81 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -14063,6 +14063,14 @@ tsubst_pack_index (tree t, tree args, tsubst_flags_t complain, tree in_decl) tree pack = PACK_INDEX_PACK (t); if (PACK_EXPANSION_P (pack)) pack = tsubst_pack_expansion (pack, args, complain, in_decl); + else +{ + /* PACK can be {*args#0} whose args#0's value-expr refers to +a partially instantiated closure. Let tsubst find the +fully-instantiated one. */ + gcc_assert (TREE_CODE (pack) == TREE_VEC); + pack = tsubst (pack, args, complain, in_decl); +} if (TREE_CODE (pack) == TREE_VEC && TREE_VEC_LENGTH (pack) == 0) { if (complain & tf_error) diff --git a/gcc/testsuite/g++.dg/cpp26/pack-indexing13.C b/gcc/testsuite/g++.dg/cpp26/pack-indexing13.C new file mode 100644 index ..e0dd9c21c67b --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/pack-indexing13.C @@ -0,0 +1,23 @@ +// PR c++/117937 +// { dg-do compile { target c++26 } } + +using size_t = decltype(sizeof(0)); + +template +struct seq {}; + +void g(auto...) {} + +void +f (auto... args) +{ + [&](seq) { + g(args...[i]...); + }(seq<0>()); +} + +int +main () +{ + f(0); +} diff --git a/gcc/testsuite/g++.dg/cpp26/pack-indexing14.C b/gcc/testsuite/g++.dg/cpp26/pack-indexing14.C new file mode 100644 index ..c8a67ee16edb --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/pack-indexing14.C @@ -0,0 +1,18 @@ +// PR c++/117937 +// { dg-do compile { target c++26 } } + +void operate_one(const int) {} + +template +void operate_multi(T... args) +{ +[&]() +{ + ::operate_one(args...[idx]); +}.template operator()<0>(); +} + +int main() +{ +::operate_multi(0); +}
[gcc r15-6796] [PR118017][LRA]: Fix test for i686
https://gcc.gnu.org/g:94d8de53388793f4d5fc0d0aa00fef32ca4aa870 commit r15-6796-g94d8de53388793f4d5fc0d0aa00fef32ca4aa870 Author: Vladimir N. Makarov Date: Fri Jan 10 10:36:24 2025 -0500 [PR118017][LRA]: Fix test for i686 My previous patch for PR118017 contains a test which fails on i686. The patch fixes this. gcc/testsuite/ChangeLog: PR target/118017 * gcc.target/i386/pr118017.c: Check target int128. Diff: --- gcc/testsuite/gcc.target/i386/pr118017.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr118017.c b/gcc/testsuite/gcc.target/i386/pr118017.c index c82d71e8d293..28797a0ad73f 100644 --- a/gcc/testsuite/gcc.target/i386/pr118017.c +++ b/gcc/testsuite/gcc.target/i386/pr118017.c @@ -1,5 +1,5 @@ /* PR target/118017 */ -/* { dg-do compile } */ +/* { dg-do compile { target int128 } } */ /* { dg-options "-Og -frounding-math -mno-80387 -mno-mmx -Wno-psabi" } */ typedef __attribute__((__vector_size__ (64))) _Float128 F;
[gcc r15-6797] Fix some memory leaks
https://gcc.gnu.org/g:9193641d1695293006ed0b818bb4161a1b6fbed2 commit r15-6797-g9193641d1695293006ed0b818bb4161a1b6fbed2 Author: Richard Biener Date: Fri Jan 10 15:17:58 2025 +0100 Fix some memory leaks The following fixes memory leaks found compiling SPEC CPU 2017 with valgrind. * df-core.cc (rest_of_handle_df_finish): Release dflow for problems without free function (like LR). * gimple-crc-optimization.cc (crc_optimization::loop_may_calculate_crc): Release loop_bbs on all exits. * tree-vectorizer.h (supportable_indirect_convert_operation): Change. * tree-vect-generic.cc (expand_vector_conversion): Adjust. * tree-vect-stmts.cc (vectorizable_conversion): Use auto_vec for converts. (supportable_indirect_convert_operation): Get a reference to the output vector of converts. Diff: --- gcc/df-core.cc | 2 ++ gcc/gimple-crc-optimization.cc | 6 +- gcc/tree-vect-generic.cc | 2 +- gcc/tree-vect-stmts.cc | 12 ++-- gcc/tree-vectorizer.h | 2 +- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/gcc/df-core.cc b/gcc/df-core.cc index a7011decf0bb..abfc0e63d352 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -808,6 +808,8 @@ rest_of_handle_df_finish (void) struct dataflow *dflow = df->problems_in_order[i]; if (dflow->problem->free_fun) dflow->problem->free_fun (); + else + free (dflow); } free (df->postorder); diff --git a/gcc/gimple-crc-optimization.cc b/gcc/gimple-crc-optimization.cc index 0e1f2a99d72b..a98cbe6752b5 100644 --- a/gcc/gimple-crc-optimization.cc +++ b/gcc/gimple-crc-optimization.cc @@ -947,6 +947,7 @@ crc_optimization::loop_may_calculate_crc (class loop *loop) fprintf (dump_file, "The number of conditional " "branches in the loop isn't 2.\n"); + free (loop_bbs); return false; } @@ -977,8 +978,11 @@ crc_optimization::loop_may_calculate_crc (class loop *loop) return true; } - if (++checked_xor_count == 2) + if (++checked_xor_count == 2) + { + free (loop_bbs); return false; + } } } } diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index fa5e9a54dbf9..c2f7a29d539b 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1757,7 +1757,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) auto_vec > converts; if (supportable_indirect_convert_operation (code, ret_type, arg_type, - &converts, + converts, arg)) { new_rhs = arg; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index c0e38d00246d..f5b3608f6b13 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -5583,7 +5583,7 @@ vectorizable_conversion (vec_info *vinfo, scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type); scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type); opt_scalar_mode rhs_mode_iter; - vec > converts = vNULL; + auto_vec > converts; /* Supportable by target? */ switch (modifier) @@ -5597,7 +5597,7 @@ vectorizable_conversion (vec_info *vinfo, if (supportable_indirect_convert_operation (code, vectype_out, vectype_in, - &converts, + converts, op0)) { gcc_assert (converts.length () <= 2); @@ -15170,7 +15170,7 @@ bool supportable_indirect_convert_operation (code_helper code, tree vectype_out, tree vectype_in, - vec > *converts, + vec > &converts, tree op0) { bool found_mode = false; @@ -15187,7 +15187,7 @@ supportable_indirect_convert_operation (code_helper code, vectype_in, &tc1)) { - converts->safe_push (std::make_pair (vectype_out, tc1)); + converts.safe_push (std::make_pair (vectype_out, tc1)); return true; } @@ -15278,9 +15278,9 @@ supportable_indirect_convert_operation (code_helper code, if (found_mode) { - converts->safe_push (std::make_pair (cvt_type, tc2)); + converts.safe_push (std::make_pair (cvt_type, tc2)); if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out)) - converts->s
[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Correction régression coarray dummy_3
https://gcc.gnu.org/g:8eb0b9eb3d8c7a538117b7054e7c8f7d1ea8e9a5 commit 8eb0b9eb3d8c7a538117b7054e7c8f7d1ea8e9a5 Author: Mikael Morin Date: Fri Jan 10 14:09:37 2025 +0100 Correction régression coarray dummy_3 Diff: --- gcc/fortran/trans-expr.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 57a976df58ff..14f92e7575dc 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -832,6 +832,9 @@ gfc_get_vptr_from_expr (tree expr) int gfc_descriptor_rank (tree descriptor) { + if (TREE_TYPE (descriptor) != NULL_TREE) +return GFC_TYPE_ARRAY_RANK (TREE_TYPE (descriptor)); + tree dim = gfc_get_descriptor_dimension (descriptor); tree dim_type = TREE_TYPE (dim); gcc_assert (TREE_CODE (dim_type) == ARRAY_TYPE);
[gcc r15-6798] c++: modules and DECL_REPLACEABLE_P
https://gcc.gnu.org/g:e86daddbde93b4b508f0957e9aac0b935f99ed93 commit r15-6798-ge86daddbde93b4b508f0957e9aac0b935f99ed93 Author: Jason Merrill Date: Fri Nov 22 12:00:20 2024 +0100 c++: modules and DECL_REPLACEABLE_P We need to remember that the ::operator new is replaceable to avoid a bogus error about __builtin_operator_new finding a non-replaceable function. This affected __get_temporary_buffer in stl_tempbuf.h. gcc/cp/ChangeLog: * module.cc (trees_out::core_bools): Write replaceable_operator. (trees_in::core_bools): Read it. gcc/testsuite/ChangeLog: * g++.dg/modules/operator-2_a.C: New test. * g++.dg/modules/operator-2_b.C: New test. Diff: --- gcc/cp/module.cc| 2 ++ gcc/testsuite/g++.dg/modules/operator-2_a.C | 14 ++ gcc/testsuite/g++.dg/modules/operator-2_b.C | 8 3 files changed, 24 insertions(+) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 7288c46a7baa..4fbe522264b3 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -5640,6 +5640,7 @@ trees_out::core_bools (tree t, bits_out& bits) WB (t->function_decl.has_debug_args_flag); WB (t->function_decl.versioned_function); + WB (t->function_decl.replaceable_operator); /* decl_type is a (misnamed) 2 bit discriminator. */ unsigned kind = t->function_decl.decl_type; @@ -5796,6 +5797,7 @@ trees_in::core_bools (tree t, bits_in& bits) RB (t->function_decl.has_debug_args_flag); RB (t->function_decl.versioned_function); + RB (t->function_decl.replaceable_operator); /* decl_type is a (misnamed) 2 bit discriminator. */ unsigned kind = 0; diff --git a/gcc/testsuite/g++.dg/modules/operator-2_a.C b/gcc/testsuite/g++.dg/modules/operator-2_a.C new file mode 100644 index ..0b1f6e80422f --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/operator-2_a.C @@ -0,0 +1,14 @@ +// { dg-additional-options -fmodules } +// { dg-module-cmi M } + +module; + +#include + +export module M; + +export template +inline T* alloc (__SIZE_TYPE__ n) +{ + return (T*) __builtin_operator_new (n * sizeof (T), std::nothrow_t{}); +}; diff --git a/gcc/testsuite/g++.dg/modules/operator-2_b.C b/gcc/testsuite/g++.dg/modules/operator-2_b.C new file mode 100644 index ..fb21ccb6d30f --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/operator-2_b.C @@ -0,0 +1,8 @@ +// { dg-additional-options -fmodules } + +import M; + +int main() +{ + int *p = alloc(42); +}
[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Correction régression dummy_3
https://gcc.gnu.org/g:310e573c848de18bd55d2800c60bb675892f4d99 commit 310e573c848de18bd55d2800c60bb675892f4d99 Author: Mikael Morin Date: Fri Jan 10 19:03:04 2025 +0100 Correction régression dummy_3 Diff: --- gcc/fortran/trans-expr.cc | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 14f92e7575dc..77e8a55af457 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -919,8 +919,17 @@ gfc_class_array_data_assign (stmtblock_t *block, tree lhs_desc, tree rhs_desc, type = TREE_TYPE (tmp); else { - gcc_assert (TREE_TYPE (tmp) == TREE_TYPE (tmp2)); - type = TREE_TYPE (tmp); + int corank = GFC_TYPE_ARRAY_CORANK (TREE_TYPE (lhs_desc)); + int corank2 = GFC_TYPE_ARRAY_CORANK (TREE_TYPE (rhs_desc)); + if (corank > 0 && corank2 == 0) + type = TREE_TYPE (tmp2); + else if (corank2 > 0 && corank == 0) + type = TREE_TYPE (tmp); + else + { + gcc_assert (TREE_TYPE (tmp) == TREE_TYPE (tmp2)); + type = TREE_TYPE (tmp); + } } tmp = build4_loc (input_location, ARRAY_RANGE_REF, type, tmp,
[gcc r15-6799] c++: Fix ICE with invalid defaulted operator <=> [PR118387]
https://gcc.gnu.org/g:4c688399db12c509c081d52b8926ac6d7de6068c commit r15-6799-g4c688399db12c509c081d52b8926ac6d7de6068c Author: Jakub Jelinek Date: Fri Jan 10 18:42:58 2025 +0100 c++: Fix ICE with invalid defaulted operator <=> [PR118387] In the following testcase there are 2 issues, one is that B doesn't have operator<=> and the other is that A's operator<=> has int return type, i.e. not the standard comparison category. Because of the int return type, retcat is cc_last; when we first try to synthetize it, it is therefore with tentative false and complain tf_none, we find that B doesn't have operator<=> and because retcat isn't tc_last, don't try to search for other operators in genericize_spaceship. And then mark the operator deleted. When trying to explain the use of the deleted operator, tentative is still false, but complain is tf_error_or_warning. do_one_comp will first do: tree comp = build_new_op (loc, code, flags, lhs, rhs, NULL_TREE, NULL_TREE, &overload, tentative ? tf_none : complain); and because complain isn't tf_none, it will actually diagnose the bug already, but then (tentative || complain) is true and we call genericize_spaceship, which has if (tag == cc_last && is_auto (type)) { ... } gcc_checking_assert (tag < cc_last); and because tag is cc_last and type isn't auto, we just ICE on that assertion. The patch fixes it by returning error_mark_node from genericize_spaceship instead of failing the assertion. Note, the PR raises another problem. If on the same testcase the B b; line is removed, we silently synthetize operator<=> which will crash at runtime due to returning without a return statement. That is because the standard says that in that case it should return static_cast(std::strong_ordering::equal); but I can't find anywhere wording which would say that if that isn't valid, the function is deleted. https://eel.is/c++draft/class.compare#class.spaceship-2.2 seems to talk just about cases where there are some members and their comparison is invalid it is deleted, but here there are none and it follows https://eel.is/c++draft/class.compare#class.spaceship-3.sentence-2 So, we synthetize with tf_none, see the static_cast is invalid, don't add error_mark_node statement silently, but as the function isn't deleted, we just silently emit it. Should the standard be amended to say that the operator should be deleted even if it has no elements and the static cast from https://eel.is/c++draft/class.compare#class.spaceship-3.sentence-2 ? 2025-01-10 Jakub Jelinek PR c++/118387 * method.cc (genericize_spaceship): For tag == cc_last if type is not auto just return error_mark_node instead of failing checking assertion. * g++.dg/cpp2a/spaceship-synth17.C: New test. Diff: --- gcc/cp/method.cc | 4 ++-- gcc/testsuite/g++.dg/cpp2a/spaceship-synth17.C | 19 +++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc index 64535f52019f..304d11d145c5 100644 --- a/gcc/cp/method.cc +++ b/gcc/cp/method.cc @@ -1097,8 +1097,8 @@ genericize_spaceship (location_t loc, tree type, tree op0, tree op1) if (type == error_mark_node) return error_mark_node; } - - gcc_checking_assert (tag < cc_last); + else if (tag == cc_last) +return error_mark_node; tree r; bool scalar = SCALAR_TYPE_P (TREE_TYPE (op0)); diff --git a/gcc/testsuite/g++.dg/cpp2a/spaceship-synth17.C b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth17.C new file mode 100644 index ..a7793314ce1e --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth17.C @@ -0,0 +1,19 @@ +// PR c++/118387 +// { dg-do compile { target c++20 } } + +#include + +struct B {}; + +struct A +{ + B b; // { dg-error "no match for 'operator<=>' in '\[^\n\r]*' \\\(operand types are 'B' and 'B'\\\)" } + int operator<=> (const A &) const = default; +}; + +int +main () +{ + A a; + return a <=> a; // { dg-error "use of deleted function 'constexpr int A::operator<=>\\\(const A&\\\) const'" } +}
[gcc r15-6800] Do not call cp_parser_omp_dispatch directly in cp_parser_pragma
https://gcc.gnu.org/g:b5a679898986ae22ffdec538374c5378c26a229f commit r15-6800-gb5a679898986ae22ffdec538374c5378c26a229f Author: Paul-Antoine Arras Date: Mon Jan 6 16:06:43 2025 +0100 Do not call cp_parser_omp_dispatch directly in cp_parser_pragma This is a followup to ed49709acda OpenMP: C++ front-end support for dispatch + adjust_args. The call to cp_parser_omp_dispatch only belongs in cp_parser_omp_construct. In cp_parser_pragma, handle PRAGMA_OMP_DISPATCH by calling cp_parser_omp_construct. gcc/cp/ChangeLog: * parser.cc (cp_parser_pragma): Replace call to cp_parser_omp_dispatch with cp_parser_omp_construct and check context. gcc/testsuite/ChangeLog: * g++.dg/gomp/dispatch-8.C: New test. Diff: --- gcc/cp/parser.cc | 4 +++- gcc/testsuite/g++.dg/gomp/dispatch-8.C | 10 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 80bc2d8e9e1e..9600b1409164 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -53057,7 +53057,9 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context, bool *if_p) break; case PRAGMA_OMP_DISPATCH: - cp_parser_omp_dispatch (parser, pragma_tok); + if (context != pragma_stmt && context != pragma_compound) + goto bad_stmt; + cp_parser_omp_construct (parser, pragma_tok, if_p); return true; case PRAGMA_IVDEP: diff --git a/gcc/testsuite/g++.dg/gomp/dispatch-8.C b/gcc/testsuite/g++.dg/gomp/dispatch-8.C new file mode 100644 index ..b8e8e73db1ff --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/dispatch-8.C @@ -0,0 +1,10 @@ +// { dg-do compile } + +// Check that an appropriate diagnostic is emitted when a dispatch directive +// appears in a pragma_member context. + +void k(); +struct t { + #pragma omp dispatch // { dg-error "expected declaration specifiers before end of line" } + k(); // { dg-error ".*" } +};
[gcc r13-9296] c++: constexpr error with fn redecl in local scope [PR111132]
https://gcc.gnu.org/g:294140d752fc9a3a790497da9f1e968e9849b40f commit r13-9296-g294140d752fc9a3a790497da9f1e968e9849b40f Author: Marek Polacek Date: Tue Apr 2 12:59:38 2024 -0400 c++: constexpr error with fn redecl in local scope [PR32] We evaluate constexpr functions on the original, pre-genericization bodies. That means that the function body we're evaluating will not have gone through cp_genericize_r's "Map block scope extern declarations to visible declarations with the same name and type in outer scopes if any". Here: constexpr bool bar() { return true; } // #1 constexpr bool foo() { constexpr bool bar(void); // #2 return bar(); } it means that we: 1) register_constexpr_fundef (#1) 2) cp_genericize (#1) nothing interesting happens 3) register_constexpr_fundef (foo) does copy_fn, so we have two copies of the BIND_EXPR 4) cp_genericize (foo) this remaps #2 to #1, but only on one copy of the BIND_EXPR 5) retrieve_constexpr_fundef (foo) we find it, no problem 6) retrieve_constexpr_fundef (#2) and here #2 isn't found in constexpr_fundef_table, because we're working on the BIND_EXPR copy where #2 wasn't mapped to #1 so we fail. We've only registered #1. It should work to use DECL_LOCAL_DECL_ALIAS (which used to be extern_decl_map). We evaluate constexpr functions on pre-cp_fold bodies to avoid diagnostic problems, but the remapping I'm proposing should not interfere with diagnostics. This is not a problem for a global scope redeclaration; there we go through duplicate_decls which keeps the DECL_UID: DECL_UID (olddecl) = olddecl_uid; and DECL_UID is what constexpr_fundef_hasher::hash uses. PR c++/32 gcc/cp/ChangeLog: * constexpr.cc (get_function_named_in_call): Use cp_get_fndecl_from_callee. * cvt.cc (cp_get_fndecl_from_callee): If there's a DECL_LOCAL_DECL_ALIAS, use it. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/constexpr-redeclaration3.C: New test. * g++.dg/cpp0x/constexpr-redeclaration4.C: New test. (cherry picked from commit 8c9063825ce726fcbbc067d8a6d062cc2d4acf5e) Diff: --- gcc/cp/constexpr.cc | 10 -- gcc/cp/cvt.cc | 18 -- gcc/testsuite/g++.dg/cpp0x/constexpr-redeclaration3.C | 13 + gcc/testsuite/g++.dg/cpp0x/constexpr-redeclaration4.C | 14 ++ 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index d34b8bdbd166..8a66528d5caf 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -702,16 +702,14 @@ build_constexpr_constructor_member_initializers (tree type, tree body) /* We have an expression tree T that represents a call, either CALL_EXPR or AGGR_INIT_EXPR. If the call is lexically to a named function, - retrun the _DECL for that function. */ + return the _DECL for that function. */ static tree get_function_named_in_call (tree t) { - tree fun = cp_get_callee (t); - if (fun && TREE_CODE (fun) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (fun, 0)) == FUNCTION_DECL) -fun = TREE_OPERAND (fun, 0); - return fun; + tree callee = cp_get_callee (t); + tree fun = cp_get_fndecl_from_callee (callee, /*fold*/false); + return fun ? fun : callee; } /* Subroutine of check_constexpr_fundef. BODY is the body of a function diff --git a/gcc/cp/cvt.cc b/gcc/cp/cvt.cc index 17827d06a4a6..1b34dc73f128 100644 --- a/gcc/cp/cvt.cc +++ b/gcc/cp/cvt.cc @@ -1001,8 +1001,22 @@ cp_get_fndecl_from_callee (tree fn, bool fold /* = true */) { if (fn == NULL_TREE) return fn; + + /* We evaluate constexpr functions on the original, pre-genericization + bodies. So block-scope extern declarations have not been mapped to + declarations in outer scopes. Use the namespace-scope declaration, + if any, so that retrieve_constexpr_fundef can find it (PR32). */ + auto fn_or_local_alias = [] (tree f) +{ + if (DECL_LOCAL_DECL_P (f)) + if (tree alias = DECL_LOCAL_DECL_ALIAS (f)) + if (alias != error_mark_node) + return alias; + return f; +}; + if (TREE_CODE (fn) == FUNCTION_DECL) -return fn; +return fn_or_local_alias (fn); tree type = TREE_TYPE (fn); if (type == NULL_TREE || !INDIRECT_TYPE_P (type)) return NULL_TREE; @@ -1013,7 +1027,7 @@ cp_get_fndecl_from_callee (tree fn, bool fold /* = true */) || TREE_CODE (fn) == FDESC_EXPR) fn = TREE_OPERAND (fn, 0); if (TREE_CODE (fn) == FUNCTION_DECL) -return fn; +return fn_or_local_alias (fn); return NULL_TREE; } diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-redeclaration3.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-redeclaration3.C new f
[gcc r13-9297] c++: wrong looser excep spec for dep noexcept [PR113158]
https://gcc.gnu.org/g:c22c3a743d9827e58b9ff11a09d7227500c0dae3 commit r13-9297-gc22c3a743d9827e58b9ff11a09d7227500c0dae3 Author: Marek Polacek Date: Thu Feb 15 17:07:43 2024 -0500 c++: wrong looser excep spec for dep noexcept [PR113158] Here we find ourselves in maybe_check_overriding_exception_spec in a template context where we can't instantiate a dependent noexcept. That's OK, but we have to defer the checking otherwise we give wrong errors. PR c++/113158 gcc/cp/ChangeLog: * search.cc (maybe_check_overriding_exception_spec): Defer checking when a noexcept couldn't be instantiated & evaluated to false/true. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/noexcept83.C: New test. (cherry picked from commit 876fa432ef4074053fa65b1855e7d43320515576) Diff: --- gcc/cp/search.cc| 11 ++ gcc/testsuite/g++.dg/cpp0x/noexcept83.C | 37 + 2 files changed, 48 insertions(+) diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc index 3f521b3bd72f..211e5724a04d 100644 --- a/gcc/cp/search.cc +++ b/gcc/cp/search.cc @@ -1928,6 +1928,17 @@ maybe_check_overriding_exception_spec (tree overrider, tree basefn) || UNPARSED_NOEXCEPT_SPEC_P (over_throw)) return true; + /* We also have to defer checking when we're in a template and couldn't + instantiate & evaluate the noexcept to true/false. */ + if (processing_template_decl) +if ((base_throw +&& base_throw != noexcept_true_spec +&& base_throw != noexcept_false_spec) + || (over_throw + && over_throw != noexcept_true_spec + && over_throw != noexcept_false_spec)) + return true; + if (!comp_except_specs (base_throw, over_throw, ce_derived)) { auto_diagnostic_group d; diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept83.C b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C new file mode 100644 index ..47832bbb44d3 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C @@ -0,0 +1,37 @@ +// PR c++/113158 +// { dg-do compile { target c++11 } } + +template +struct V { + static constexpr bool t = false; +}; +struct base { +virtual int f() = 0; +}; + +template +struct derived : base { +int f() noexcept(V::t) override; +}; + +struct base2 { +virtual int f() noexcept = 0; +}; + +template +struct W { + static constexpr bool t = B; +}; + +template +struct derived2 : base2 { +int f() noexcept(W::t) override; // { dg-error "looser exception specification" } +}; + +void +g () +{ + derived d1; + derived2 d2; // { dg-message "required from here" } + derived2 d3; +}
[gcc r13-9299] c++: wrong std::is_convertible with cv-qual fn [PR109680]
https://gcc.gnu.org/g:005b1f418350a3ef7c5280b19a82fb28c0856e7c commit r13-9299-g005b1f418350a3ef7c5280b19a82fb28c0856e7c Author: Marek Polacek Date: Tue May 2 17:36:00 2023 -0400 c++: wrong std::is_convertible with cv-qual fn [PR109680] This PR points out that std::is_convertible has given the wrong answer in static_assert (!std::is_convertible_v , ""); since r13-2822 implemented __is_{,nothrow_}convertible. std::is_convertible uses the imaginary To test() { return std::declval(); } to do its job. Here, From is 'int () const'. std::declval is defined as: template typename std::add_rvalue_reference::type declval() noexcept; std::add_rvalue_reference is defined as "If T is a function type that has no cv- or ref- qualifier or an object type, provides a member typedef type which is T&&, otherwise type is T." In our case, T is cv-qualified, so the result is T, so we end up with int () const declval() noexcept; which is invalid. In other words, this is pretty much like: using T = int () const; T fn1(); // bad, fn returning a fn T& fn2(); // bad, cannot declare reference to qualified function type T* fn3(); // bad, cannot declare pointer to qualified function type using U = int (); U fn4(); // bad, fn returning a fn U& fn5(); // OK U* fn6(); // OK I think is_convertible_helper needs to simulate std::declval better. To that end, I'm introducing build_trait_object, to be used where a declval is needed. PR c++/109680 gcc/cp/ChangeLog: * method.cc (build_trait_object): New. (assignable_expr): Use it. (ref_xes_from_temporary): Likewise. (is_convertible_helper): Likewise. Check FUNC_OR_METHOD_TYPE_P. gcc/testsuite/ChangeLog: * g++.dg/ext/is_convertible6.C: New test. (cherry picked from commit 4c2ffb02fd4104d77c5d907662f04434dc4c3fe8) Diff: --- gcc/cp/method.cc | 39 +++--- gcc/testsuite/g++.dg/ext/is_convertible6.C | 16 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc index 09ea6d732dfc..c9d9e3516f3b 100644 --- a/gcc/cp/method.cc +++ b/gcc/cp/method.cc @@ -1907,6 +1907,27 @@ build_stub_object (tree reftype) return convert_from_reference (stub); } +/* Build a std::declval() expression and return it. */ + +tree +build_trait_object (tree type) +{ + /* TYPE can't be a function with cv-/ref-qualifiers: std::declval is + defined as + + template + typename std::add_rvalue_reference::type declval() noexcept; + + and std::add_rvalue_reference yields T when T is a function with + cv- or ref-qualifiers, making the definition ill-formed. */ + if (FUNC_OR_METHOD_TYPE_P (type) + && (type_memfn_quals (type) != TYPE_UNQUALIFIED + || type_memfn_rqual (type) != REF_QUAL_NONE)) +return error_mark_node; + + return build_stub_object (type); +} + /* Determine which function will be called when looking up NAME in TYPE, called with a single ARGTYPE argument, or no argument if ARGTYPE is null. FLAGS and COMPLAIN are as for build_new_method_call. @@ -2055,8 +2076,8 @@ static tree assignable_expr (tree to, tree from) { cp_unevaluated cp_uneval_guard; - to = build_stub_object (to); - from = build_stub_object (from); + to = build_trait_object (to); + from = build_trait_object (from); tree r = cp_build_modify_expr (input_location, to, NOP_EXPR, from, tf_none); return r; } @@ -2235,7 +2256,9 @@ ref_xes_from_temporary (tree to, tree from, bool direct_init_p) return false; /* We don't check is_constructible: if T isn't constructible from U, we won't be able to create a conversion. */ - tree val = build_stub_object (from); + tree val = build_trait_object (from); + if (val == error_mark_node) +return false; if (!TYPE_REF_P (from) && TREE_CODE (from) != FUNCTION_TYPE) val = CLASS_TYPE_P (from) ? force_rvalue (val, tf_none) : rvalue (val); return ref_conv_binds_to_temporary (to, val, direct_init_p).is_true (); @@ -2250,7 +2273,15 @@ is_convertible_helper (tree from, tree to) if (VOID_TYPE_P (from) && VOID_TYPE_P (to)) return integer_one_node; cp_unevaluated u; - tree expr = build_stub_object (from); + tree expr = build_trait_object (from); + /* std::is_{,nothrow_}convertible test whether the imaginary function + definition + + To test() { return std::declval(); } + + is well-formed. A function can't return a function. */ + if (FUNC_OR_METHOD_TYPE_P (to) || expr == error_mark_node) +return error_mark_node; deferring_access_check_sentinel acs (dk_no_deferred); return perform_implicit_conversion (to, expr, tf_none); } diff --git a/gcc/testsuite/g++.dg/ext/is_convertible6.
[gcc r13-9293] c++: ICE with enum and conversion fn in template [PR115657]
https://gcc.gnu.org/g:45ff9fcd465f445ca43a584e20a4568f4e41539f commit r13-9293-g45ff9fcd465f445ca43a584e20a4568f4e41539f Author: Marek Polacek Date: Thu Aug 15 18:47:29 2024 -0400 c++: ICE with enum and conversion fn in template [PR115657] Here we initialize an enumerator with a class prvalue with a conversion function. When we fold it in build_enumerator, we create a TARGET_EXPR for the object, and subsequently crash in tsubst_expr, which should not see such a code. Normally, we fix similar problems by using an IMPLICIT_CONV_EXPR but here I may get away with not using the result of fold_non_dependent_expr unless the result is a constant. A TARGET_EXPR is not constant. PR c++/115657 gcc/cp/ChangeLog: * decl.cc (build_enumerator): Call maybe_fold_non_dependent_expr instead of fold_non_dependent_expr. gcc/testsuite/ChangeLog: * g++.dg/cpp1y/constexpr-recursion2.C: New test. * g++.dg/template/conv21.C: New test. (cherry picked from commit 53283c3231a7b94e728619cccbf21170fb36b2a8) Diff: --- gcc/cp/decl.cc| 10 -- gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C | 22 ++ gcc/testsuite/g++.dg/template/conv21.C| 14 ++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index 0126684c7896..dbd1ee664c60 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -16905,9 +16905,15 @@ build_enumerator (tree name, tree value, tree enumtype, tree attributes, tree type; /* scalar_constant_value will pull out this expression, so make sure - it's folded as appropriate. */ + it's folded as appropriate. + + Creating a TARGET_EXPR in a template breaks when substituting, and + here we would create it for instance when using a class prvalue with + a user-defined conversion function. So don't use such a tree. We + instantiate VALUE here to get errors about bad enumerators even in + a template that does not get instantiated. */ if (processing_template_decl) -value = fold_non_dependent_expr (value); +value = maybe_fold_non_dependent_expr (value); /* If the VALUE was erroneous, pretend it wasn't there; that will result in the enum being assigned the next value in sequence. */ diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C new file mode 100644 index ..f268f52e2b5b --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-recursion2.C @@ -0,0 +1,22 @@ +// PR c++/115657 +// { dg-do compile { target c++14 } } +// { dg-options "-Wall" } + +// Like constexpr-recursion1.C but use a class with a conversion function. + +struct X { + constexpr operator int() { return 0; } +}; + +template +constexpr X f1 () +{ + enum E { a = f1<0> () }; // { dg-error "called in a constant expression before its definition is complete|is not an integer constant" } + return {}; +} + +constexpr X f3 () +{ + enum E { a = f3 () };// { dg-error "called in a constant expression before its definition is complete|is not an integer constant" } + return {}; +} diff --git a/gcc/testsuite/g++.dg/template/conv21.C b/gcc/testsuite/g++.dg/template/conv21.C new file mode 100644 index ..1dc7b3d50d9e --- /dev/null +++ b/gcc/testsuite/g++.dg/template/conv21.C @@ -0,0 +1,14 @@ +// PR c++/115657 +// { dg-do compile { target c++11 } } + +struct NonIntegral +{ +constexpr operator int() { return 0; } +}; + +template struct TemplatedStructural +{ +enum { e = NonIntegral{} }; +}; + +template struct TemplatedStructural;
[gcc r13-9298] c++: ICE initializing array of aggrs [PR117985]
https://gcc.gnu.org/g:ff0e01a99daa1784f1d0adc5b2a1aab86693b1ca commit r13-9298-gff0e01a99daa1784f1d0adc5b2a1aab86693b1ca Author: Marek Polacek Date: Thu Dec 12 14:56:07 2024 -0500 c++: ICE initializing array of aggrs [PR117985] This crash started with my r12-7803 but I believe the problem lies elsewhere. build_vec_init has cleanup_flags whose purpose is -- if I grok this correctly -- to avoid destructing an object multiple times. Let's say we are initializing an array of A. Then we might end up in a scenario similar to initlist-eh1.C: try { call A::A in a loop // #0 try { call a fn using the array } finally { // #1 call A::~A in a loop } } catch { // #2 call A::~A in a loop } cleanup_flags makes us emit a statement like D.3048 = 2; at #0 to disable performing the cleanup at #2, since #1 will take care of the destruction of the array. But if we are not emitting the loop because we can use a constant initializer (and use a single { a, b, ...}), we shouldn't generate the statement resetting the iterator to its initial value. Otherwise we crash in gimplify_var_or_parm_decl because it gets the stray decl D.3048. PR c++/117985 gcc/cp/ChangeLog: * init.cc (build_vec_init): Pop CLEANUP_FLAGS if we're not generating the loop. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/initlist-array23.C: New test. * g++.dg/cpp0x/initlist-array24.C: New test. (cherry picked from commit 40e5636e086e51f5908a1a01be9cba2218dc26d8) Diff: --- gcc/cp/init.cc| 9 + gcc/testsuite/g++.dg/cpp0x/initlist-array23.C | 28 +++ gcc/testsuite/g++.dg/cpp0x/initlist-array24.C | 27 ++ 3 files changed, 64 insertions(+) diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc index 4977ce343984..65cb35b98500 100644 --- a/gcc/cp/init.cc +++ b/gcc/cp/init.cc @@ -5019,6 +5019,15 @@ build_vec_init (tree base, tree maxindex, tree init, { if (!saw_non_const) { + /* If we're not generating the loop, we don't need to reset the +iterator. */ + if (cleanup_flags + && !vec_safe_is_empty (*cleanup_flags)) + { + auto l = (*cleanup_flags)->last (); + gcc_assert (TREE_PURPOSE (l) == iterator); + (*cleanup_flags)->pop (); + } tree const_init = build_constructor (atype, const_vec); return build2 (INIT_EXPR, atype, obase, const_init); } diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-array23.C b/gcc/testsuite/g++.dg/cpp0x/initlist-array23.C new file mode 100644 index ..cda2afb9fccc --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/initlist-array23.C @@ -0,0 +1,28 @@ +// PR c++/117985 +// { dg-do compile { target c++11 } } + +struct _Vector_impl { + constexpr +_Vector_impl() {} +}; +struct _Vector_base { + ~_Vector_base(); + _Vector_impl _M_impl; +}; +struct vector : private _Vector_base {}; +struct string { + string(); +}; +struct VEC { + vector pane{}; +}; +struct FOO { + VEC screen[1]{}; + string debug_name; +}; + +int +main () +{ + FOO{}; +} diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-array24.C b/gcc/testsuite/g++.dg/cpp0x/initlist-array24.C new file mode 100644 index ..7dda00d5c0b8 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/initlist-array24.C @@ -0,0 +1,27 @@ +// PR c++/117985 +// { dg-do compile { target c++20 } } + +struct _Vector_impl { + constexpr _Vector_impl() {} +}; +struct _Vector_base { + constexpr ~_Vector_base() {} + _Vector_impl _M_impl; +}; +struct vector : private _Vector_base {}; +struct string { + string(); +}; +struct VEC { + vector pane{}; +}; +struct FOO { + VEC screen[1]{}; + string debug_name; +}; + +int +main () +{ + FOO{}; +}
[gcc r13-9300] c++: concept in default argument [PR109859]
https://gcc.gnu.org/g:a2fd45adfdb3ff219fd97d158fb66cc99ef0afd4 commit r13-9300-ga2fd45adfdb3ff219fd97d158fb66cc99ef0afd4 Author: Marek Polacek Date: Wed Sep 18 15:44:31 2024 -0400 c++: concept in default argument [PR109859] 1) We're hitting the assert in cp_parser_placeholder_type_specifier. It says that if it turns out to be false, we should do error() instead. Do so, then. 2) lambda-targ8.C should compile fine, though. The problem was that local_variables_forbidden_p wasn't cleared when we're about to parse the optional template-parameter-list for a lambda in a default argument. PR c++/109859 gcc/cp/ChangeLog: * parser.cc (cp_parser_lambda_declarator_opt): Temporarily clear local_variables_forbidden_p. (cp_parser_placeholder_type_specifier): Turn an assert into an error. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/concepts-defarg3.C: New test. * g++.dg/cpp2a/lambda-targ8.C: New test. Reviewed-by: Jason Merrill (cherry picked from commit 4bcfaaed25b1b8ecc81f6a28d9ca76f00870dedf) Diff: --- gcc/cp/parser.cc | 9 +++-- gcc/testsuite/g++.dg/cpp2a/concepts-defarg3.C | 8 gcc/testsuite/g++.dg/cpp2a/lambda-targ8.C | 10 ++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 47c15cff3468..d25ceff9ec38 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -11564,6 +11564,11 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, tree lambda_expr) "lambda templates are only available with " "%<-std=c++20%> or %<-std=gnu++20%>"); + /* Even though the whole lambda may be a default argument, its +template-parameter-list is a context where it's OK to create +new parameters. */ + auto lvf = make_temp_override (parser->local_variables_forbidden_p, 0u); + cp_lexer_consume_token (parser->lexer); template_param_list = cp_parser_template_parameter_list (parser); @@ -20204,8 +20209,8 @@ cp_parser_placeholder_type_specifier (cp_parser *parser, location_t loc, /* In a default argument we may not be creating new parameters. */ if (parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN) { - /* If this assert turns out to be false, do error() instead. */ - gcc_assert (tentative); + if (!tentative) + error_at (loc, "invalid use of concept-name %qD", con); return error_mark_node; } return build_constrained_parameter (con, proto, args); diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-defarg3.C b/gcc/testsuite/g++.dg/cpp2a/concepts-defarg3.C new file mode 100644 index ..6fe82f91e434 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-defarg3.C @@ -0,0 +1,8 @@ +// PR c++/109859 +// { dg-do compile { target c++20 } } + +template +concept C = true; + +template // { dg-error "invalid use of concept-name .C." } +int f(); diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-targ8.C b/gcc/testsuite/g++.dg/cpp2a/lambda-targ8.C new file mode 100644 index ..3685b0ef880b --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/lambda-targ8.C @@ -0,0 +1,10 @@ +// PR c++/109859 +// { dg-do compile { target c++20 } } + +template +concept A = true; + +template {}> +int x; + +void g() { (void) x<>; }
[gcc r13-9301] c++: ICE with temporary of class type in array DMI [PR109966]
https://gcc.gnu.org/g:9ad64458dd8fdb384e45aa3647380de53e04cedd commit r13-9301-g9ad64458dd8fdb384e45aa3647380de53e04cedd Author: Marek Polacek Date: Mon Mar 11 17:45:55 2024 -0400 c++: ICE with temporary of class type in array DMI [PR109966] This ICE started with the fairly complicated r13-765. We crash in gimplify_var_or_parm_decl because a stray VAR_DECL leaked there. The problem is ultimately that potential_prvalue_result_of wasn't correctly handling arrays and replace_placeholders_for_class_temp_r replaced a PLACEHOLDER_EXPR in a TARGET_EXPR which is used in the context of copy elision. If I have M m[2] = { M{""}, M{""} }; then we don't invoke the M(const M&) copy-ctor. One part of the fix is to use TARGET_EXPR_ELIDING_P rather than potential_prvalue_result_of. That unfortunately doesn't handle the case like struct N { N(M); }; N arr[2] = { M{""}, M{""} }; because TARGET_EXPRs that initialize a function argument are not marked TARGET_EXPR_ELIDING_P even though gimplify_arg drops such TARGET_EXPRs on the floor. We can use a pset to avoid replacing placeholders in them. I made an attempt to use set_target_expr_eliding in convert_for_arg_passing but that regressed constexpr-diag1.C, and does not seem like a prudent change in stage 4 anyway. PR c++/109966 gcc/cp/ChangeLog: * typeck2.cc (potential_prvalue_result_of): Remove. (replace_placeholders_for_class_temp_r): Check TARGET_EXPR_ELIDING_P. Use a pset. Don't replace_placeholders in TARGET_EXPRs that initialize a function argument. gcc/testsuite/ChangeLog: * g++.dg/cpp1y/nsdmi-aggr20.C: New test. * g++.dg/cpp1y/nsdmi-aggr21.C: New test. (cherry picked from commit 6039925631780741ba77666ef2ef743aa2a925a8) Diff: --- gcc/cp/typeck2.cc | 55 +--- gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr20.C | 17 + gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr21.C | 59 +++ 3 files changed, 92 insertions(+), 39 deletions(-) diff --git a/gcc/cp/typeck2.cc b/gcc/cp/typeck2.cc index daa651b260fb..27263f503f18 100644 --- a/gcc/cp/typeck2.cc +++ b/gcc/cp/typeck2.cc @@ -1396,41 +1396,6 @@ digest_init_flags (tree type, tree init, int flags, tsubst_flags_t complain) return digest_init_r (type, init, 0, flags, complain); } -/* Return true if SUBOB initializes the same object as FULL_EXPR. - For instance: - - A a = A{}; // initializer - A a = (A{});// initializer - A a = (1, A{}); // initializer - A a = true ? A{} : A{}; // initializer - auto x = A{}.x; // temporary materialization - auto x = foo(A{}); // temporary materialization - - FULL_EXPR is the whole expression, SUBOB is its TARGET_EXPR subobject. */ - -static bool -potential_prvalue_result_of (tree subob, tree full_expr) -{ - if (subob == full_expr) -return true; - else if (TREE_CODE (full_expr) == TARGET_EXPR) -{ - tree init = TARGET_EXPR_INITIAL (full_expr); - if (TREE_CODE (init) == COND_EXPR) - return (potential_prvalue_result_of (subob, TREE_OPERAND (init, 1)) - || potential_prvalue_result_of (subob, TREE_OPERAND (init, 2))); - else if (TREE_CODE (init) == COMPOUND_EXPR) - return potential_prvalue_result_of (subob, TREE_OPERAND (init, 1)); - /* ??? I don't know if this can be hit. */ - else if (TREE_CODE (init) == PAREN_EXPR) - { - gcc_checking_assert (false); - return potential_prvalue_result_of (subob, TREE_OPERAND (init, 0)); - } -} - return false; -} - /* Callback to replace PLACEHOLDER_EXPRs in a TARGET_EXPR (which isn't used in the context of guaranteed copy elision). */ @@ -1438,11 +1403,13 @@ static tree replace_placeholders_for_class_temp_r (tree *tp, int *, void *data) { tree t = *tp; - tree full_expr = *static_cast(data); + auto pset = static_cast *>(data); /* We're looking for a TARGET_EXPR nested in the whole expression. */ if (TREE_CODE (t) == TARGET_EXPR - && !potential_prvalue_result_of (t, full_expr)) + /* That serves as temporary materialization, not an initializer. */ + && !TARGET_EXPR_ELIDING_P (t) + && !pset->add (t)) { tree init = TARGET_EXPR_INITIAL (t); while (TREE_CODE (init) == COMPOUND_EXPR) @@ -1457,6 +1424,16 @@ replace_placeholders_for_class_temp_r (tree *tp, int *, void *data) gcc_checking_assert (!find_placeholders (init)); } } + /* TARGET_EXPRs initializing function arguments are not marked as eliding, + even though gimplify_arg drops them on the floor. Don't go replacing + placeholders in them. */ + else if (TREE_CODE (t) == CALL_EXPR || TREE_CODE (t) == AGGR_INIT_EXPR
[gcc r13-9302] c++: ICE with reference NSDMI [PR114854]
https://gcc.gnu.org/g:c2581c76c0738298d27b417067dfc2e500f5e21a commit r13-9302-gc2581c76c0738298d27b417067dfc2e500f5e21a Author: Marek Polacek Date: Wed May 8 15:43:58 2024 -0400 c++: ICE with reference NSDMI [PR114854] Here we crash on a cp_gimplify_expr/TARGET_EXPR assert: /* A TARGET_EXPR that expresses direct-initialization should have been elided by cp_gimplify_init_expr. */ gcc_checking_assert (!TARGET_EXPR_DIRECT_INIT_P (*expr_p)); the TARGET_EXPR in question is created for the NSDMI in: class Vector { int m_size; }; struct S { const Vector &vec{}; }; where we first need to create a Vector{} temporary, and then bind the vec reference to it. The temporary is represented by a TARGET_EXPR and it cannot be elided. When we create an object of type S, we get D.2848 = {.vec=(const struct Vector &) &TARGET_EXPR } where the TARGET_EXPR is no longer direct-initializing anything. Fixed by not setting TARGET_EXPR_DIRECT_INIT_P in convert_like_internal/ck_user. PR c++/114854 gcc/cp/ChangeLog: * call.cc (convert_like_internal) : Don't set TARGET_EXPR_DIRECT_INIT_P. gcc/testsuite/ChangeLog: * g++.dg/cpp1y/nsdmi-aggr22.C: New test. (cherry picked from commit 1a05332bbac98a4c002bef3fb45a3ad9d56b3a71) Diff: --- gcc/cp/call.cc| 6 +- gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C | 12 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index f3efacafe137..d4aaeba94f6d 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -8475,16 +8475,12 @@ convert_like_internal (conversion *convs, tree expr, tree fn, int argnum, && TYPE_HAS_DEFAULT_CONSTRUCTOR (totype) && !processing_template_decl) { - bool direct = CONSTRUCTOR_IS_DIRECT_INIT (expr); if (abstract_virtuals_error (NULL_TREE, totype, complain)) return error_mark_node; expr = build_value_init (totype, complain); expr = get_target_expr (expr, complain); if (expr != error_mark_node) - { - TARGET_EXPR_LIST_INIT_P (expr) = true; - TARGET_EXPR_DIRECT_INIT_P (expr) = direct; - } + TARGET_EXPR_LIST_INIT_P (expr) = true; return expr; } diff --git a/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C b/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C new file mode 100644 index ..a4f9ae19ca9d --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/nsdmi-aggr22.C @@ -0,0 +1,12 @@ +// PR c++/114854 +// { dg-do compile { target c++14 } } + +struct Vector { + int m_size; +}; +struct S { + const Vector &vec{}; +}; + +void spawn(S); +void test() { spawn({}); }
[gcc r13-9294] c++: fix ICE with constexpr ARRAY_REF [PR110382]
https://gcc.gnu.org/g:a82583e43eaacab9a111d1fca14c7d272aa3e039 commit r13-9294-ga82583e43eaacab9a111d1fca14c7d272aa3e039 Author: Marek Polacek Date: Fri Jul 21 17:48:37 2023 -0400 c++: fix ICE with constexpr ARRAY_REF [PR110382] This code in cxx_eval_array_reference has been hard to get right. In r12-2304 I added some code; in r13-5693 I removed some of it. Here the problematic line is "S s = arr[0];" which causes a crash on the assert in verify_ctor_sanity: gcc_assert (!ctx->object || !DECL_P (ctx->object) || ctx->global->get_value (ctx->object) == ctx->ctor); ctx->object is the VAR_DECL 's', which is correct here. The second line points to the problem: we replaced ctx->ctor in cxx_eval_array_reference: new_ctx.ctor = build_constructor (elem_type, NULL); // #1 which I think we shouldn't have; the CONSTRUCTOR we created in cxx_eval_constant_expression/DECL_EXPR new_ctx.ctor = build_constructor (TREE_TYPE (r), NULL); had the right type. We still need #1 though. E.g., in constexpr-96241.C, we never set ctx.ctor/object before calling cxx_eval_array_reference, so we have to build a CONSTRUCTOR there. And in constexpr-101371-2.C we have a ctx.ctor, but it has the wrong type, so we need a new one. We can fix the problem by always clearing the object, and, as an optimization, only create/free a new ctor when actually needed. PR c++/110382 gcc/cp/ChangeLog: * constexpr.cc (cxx_eval_array_reference): Create a new constructor only when we don't already have a matching one. Clear the object when the type is non-scalar. gcc/testsuite/ChangeLog: * g++.dg/cpp1y/constexpr-110382.C: New test. (cherry picked from commit 6e424febfbcb27c21a7fe3a137e614765f9cf9d2) Diff: --- gcc/cp/constexpr.cc | 13 +++-- gcc/testsuite/g++.dg/cpp1y/constexpr-110382.C | 17 + 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index f885a806c0a2..d34b8bdbd166 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -4301,15 +4301,24 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree t, else val = build_value_init (elem_type, tf_warning_or_error); - if (!SCALAR_TYPE_P (elem_type)) + /* Create a new constructor only if we don't already have a suitable one. */ + const bool new_ctor = (!SCALAR_TYPE_P (elem_type) +&& (!ctx->ctor +|| !same_type_ignoring_top_level_qualifiers_p + (elem_type, TREE_TYPE (ctx->ctor; + if (new_ctor) { new_ctx = *ctx; + /* We clear the object here. We used to replace it with T, but that +caused problems (101371, 108158); and anyway, T is the initializer, +not the target object. */ + new_ctx.object = NULL_TREE; new_ctx.ctor = build_constructor (elem_type, NULL); ctx = &new_ctx; } t = cxx_eval_constant_expression (ctx, val, lval, non_constant_p, overflow_p); - if (!SCALAR_TYPE_P (elem_type) && t != ctx->ctor) + if (new_ctor && t != ctx->ctor) free_constructor (ctx->ctor); return t; } diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-110382.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-110382.C new file mode 100644 index ..317c5ecfcd52 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-110382.C @@ -0,0 +1,17 @@ +// PR c++/110382 +// { dg-do compile { target c++14 } } + +struct S { + double a = 0; +}; + +constexpr double +g () +{ + S arr[1]; + S s = arr[0]; + (void) arr[0]; + return s.a; +} + +int main() { return g (); }
[gcc r13-9292] c++: fix ICE with designated initializer [PR110114]
https://gcc.gnu.org/g:dcad20ccb6cd3c3db076c50cc36b1bf15aa78495 commit r13-9292-gdcad20ccb6cd3c3db076c50cc36b1bf15aa78495 Author: Marek Polacek Date: Wed Jul 19 08:47:29 2023 -0400 c++: fix ICE with designated initializer [PR110114] r13-1227 added an assert checking that the index in a CONSTRUCTOR is a FIELD_DECL. That's a reasonable assumption but in this case we never called reshape_init due to the type being incomplete, and so the index remained an identifier node: get_class_binding never got around to looking up the FIELD_DECL. We can avoid the crash by returning early in implicit_conversion_1; we'd return NULL anyway due to: if (i < CONSTRUCTOR_NELTS (ctor)) return NULL; in build_aggr_conv. PR c++/110114 gcc/cp/ChangeLog: * call.cc (implicit_conversion_1): Return early if the type isn't complete. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/initlist100.C: Adjust expected diagnostic. * g++.dg/cpp2a/desig28.C: New test. * g++.dg/cpp2a/desig29.C: New test. (cherry picked from commit 2cb0dc866e8f95151df5d759157708108e850dd9) Diff: --- gcc/cp/call.cc | 19 +++ gcc/testsuite/g++.dg/cpp0x/initlist100.C | 4 ++-- gcc/testsuite/g++.dg/cpp2a/desig28.C | 17 + gcc/testsuite/g++.dg/cpp2a/desig29.C | 10 ++ 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 70c7f6178b88..18a3db8b1dc2 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -2058,15 +2058,18 @@ implicit_conversion_1 (tree to, tree from, tree expr, bool c_cast_p, complain &= ~tf_error; /* Call reshape_init early to remove redundant braces. */ - if (expr && BRACE_ENCLOSED_INITIALIZER_P (expr) - && CLASS_TYPE_P (to) - && COMPLETE_TYPE_P (complete_type (to)) - && !CLASSTYPE_NON_AGGREGATE (to)) + if (expr && BRACE_ENCLOSED_INITIALIZER_P (expr) && CLASS_TYPE_P (to)) { - expr = reshape_init (to, expr, complain); - if (expr == error_mark_node) - return NULL; - from = TREE_TYPE (expr); + to = complete_type (to); + if (!COMPLETE_TYPE_P (to)) + return nullptr; + if (!CLASSTYPE_NON_AGGREGATE (to)) + { + expr = reshape_init (to, expr, complain); + if (expr == error_mark_node) + return nullptr; + from = TREE_TYPE (expr); + } } if (TYPE_REF_P (to)) diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist100.C b/gcc/testsuite/g++.dg/cpp0x/initlist100.C index 9d80a004c173..6865d34a6f97 100644 --- a/gcc/testsuite/g++.dg/cpp0x/initlist100.C +++ b/gcc/testsuite/g++.dg/cpp0x/initlist100.C @@ -2,9 +2,9 @@ // { dg-do compile { target c++11 } } namespace std { -template class initializer_list; // { dg-message "declaration" } +template class initializer_list; } template struct B { B (std::initializer_list); }; struct C { virtual int foo (); }; -struct D : C {} d { B { D {} } }; // { dg-error "incomplete|no matching" } +struct D : C {} d { B { D {} } }; // { dg-error "no matching" } diff --git a/gcc/testsuite/g++.dg/cpp2a/desig28.C b/gcc/testsuite/g++.dg/cpp2a/desig28.C new file mode 100644 index ..b63265fea514 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/desig28.C @@ -0,0 +1,17 @@ +// PR c++/110114 +// { dg-do compile { target c++20 } } + +struct A { +int a,b; +}; + +struct B; + +void foo(const A &) {} +void foo(const B &) {} + +int +main () +{ + foo({.a=0}); +} diff --git a/gcc/testsuite/g++.dg/cpp2a/desig29.C b/gcc/testsuite/g++.dg/cpp2a/desig29.C new file mode 100644 index ..bd1a82b041dd --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/desig29.C @@ -0,0 +1,10 @@ +// PR c++/110114 +// { dg-do compile { target c++20 } } + +struct B; + +void foo(const B &) {} + +int main() { +foo({.a=0}); // { dg-error "invalid" } +}
[gcc r13-9295] c++: mutable temps in rodata [PR116369]
https://gcc.gnu.org/g:dfe7b5e9e7010f10e9737d5f15f5b48ba536e8f5 commit r13-9295-gdfe7b5e9e7010f10e9737d5f15f5b48ba536e8f5 Author: Marek Polacek Date: Thu Aug 29 15:13:03 2024 -0400 c++: mutable temps in rodata [PR116369] Here we wrongly mark the reference temporary for g TREE_READONLY, so it's put in .rodata and so we can't modify its subobject even when the subobject is marked mutable. This is so since r9-869. r14-1785 fixed a similar problem, but not in set_up_extended_ref_temp. PR c++/116369 gcc/cp/ChangeLog: * call.cc (set_up_extended_ref_temp): Don't mark a temporary TREE_READONLY if its type is TYPE_HAS_MUTABLE_P. gcc/testsuite/ChangeLog: * g++.dg/tree-ssa/initlist-opt7.C: New test. (cherry picked from commit 2801a49d1144bce5568b527d1972952ad3420f66) Diff: --- gcc/cp/call.cc| 4 +++- gcc/testsuite/g++.dg/tree-ssa/initlist-opt7.C | 13 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 18a3db8b1dc2..f3efacafe137 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -13650,7 +13650,9 @@ set_up_extended_ref_temp (tree decl, tree expr, vec **cleanups, init = cp_fully_fold (init); if (TREE_CONSTANT (init)) { - if (literal_type_p (type) && CP_TYPE_CONST_NON_VOLATILE_P (type)) + if (literal_type_p (type) + && CP_TYPE_CONST_NON_VOLATILE_P (type) + && !TYPE_HAS_MUTABLE_P (type)) { /* 5.19 says that a constant expression can include an lvalue-rvalue conversion applied to "a glvalue of literal type diff --git a/gcc/testsuite/g++.dg/tree-ssa/initlist-opt7.C b/gcc/testsuite/g++.dg/tree-ssa/initlist-opt7.C new file mode 100644 index ..2420db502a67 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/initlist-opt7.C @@ -0,0 +1,13 @@ +// PR c++/116369 +// { dg-do run { target c++11 } } + +struct f{ + mutable int t; +}; + +const f &g = {1}; + +int main() +{ + g.t++; +}
[gcc r15-6802] libatomic: Cleanup AArch64 ifunc selection
https://gcc.gnu.org/g:81bcf412c1c221bc2557666a6ca8381dac1de097 commit r15-6802-g81bcf412c1c221bc2557666a6ca8381dac1de097 Author: Wilco Dijkstra Date: Fri Jan 10 18:01:58 2025 + libatomic: Cleanup AArch64 ifunc selection Simplify and cleanup ifunc selection logic. Since LRCPC3 does not imply LSE2, has_rcpc3() should also check LSE2 is enabled. Passes regress and bootstrap, OK for commit? libatomic: * config/linux/aarch64/host-config.h (has_lse2): Cleanup. (has_lse128): Likewise. (has_rcpc3): Add early check for LSE2. Diff: --- libatomic/config/linux/aarch64/host-config.h | 76 +--- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h index f75d27bf2ff5..d0d44bf18eaa 100644 --- a/libatomic/config/linux/aarch64/host-config.h +++ b/libatomic/config/linux/aarch64/host-config.h @@ -91,69 +91,63 @@ has_lse2 (unsigned long hwcap, const __ifunc_arg_t *features) /* Check for LSE2. */ if (hwcap & HWCAP_USCAT) return true; - /* No point checking further for atomic 128-bit load/store if LSE - prerequisite not met. */ - if (!(hwcap & HWCAP_ATOMICS)) -return false; - if (!(hwcap & HWCAP_CPUID)) -return false; - unsigned long midr; - asm volatile ("mrs %0, midr_el1" : "=r" (midr)); + /* If LSE and CPUID are supported, check MIDR. */ + if (hwcap & HWCAP_CPUID && hwcap & HWCAP_ATOMICS) +{ + unsigned long midr; + asm volatile ("mrs %0, midr_el1" : "=r" (midr)); - /* Neoverse N1 supports atomic 128-bit load/store. */ - if (MIDR_IMPLEMENTOR (midr) == 'A' && MIDR_PARTNUM (midr) == 0xd0c) -return true; + /* Neoverse N1 supports atomic 128-bit load/store. */ + return MIDR_IMPLEMENTOR (midr) == 'A' && MIDR_PARTNUM (midr) == 0xd0c; +} return false; } -/* LSE128 atomic support encoded in ID_AA64ISAR0_EL1.Atomic, - bits[23:20]. The expected value is 0b0011. Check that. */ +/* LSE128 atomic support encoded in ID_AA64ISAR0_EL1.Atomic, bits[23:20]. + The minimum value for LSE128 is 0b0011. */ #define AT_FEAT_FIELD(isar0) (((isar0) >> 20) & 15) static inline bool has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features) { - if (hwcap & _IFUNC_ARG_HWCAP - && features->_hwcap2 & HWCAP2_LSE128) -return true; - /* A 0 HWCAP2_LSE128 bit may be just as much a sign of missing HWCAP2 bit - support in older kernels as it is of CPU feature absence. Try fallback - method to guarantee LSE128 is not implemented. - - In the absence of HWCAP_CPUID, we are unable to check for LSE128. - If feature check available, check LSE2 prerequisite before proceeding. */ - if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT)) - return false; - - unsigned long isar0; - asm volatile ("mrs %0, ID_AA64ISAR0_EL1" : "=r" (isar0)); - if (AT_FEAT_FIELD (isar0) >= 3) + if (hwcap & _IFUNC_ARG_HWCAP && features->_hwcap2 & HWCAP2_LSE128) return true; + + /* If LSE2 and CPUID are supported, check for LSE128. */ + if (hwcap & HWCAP_CPUID && hwcap & HWCAP_USCAT) +{ + unsigned long isar0; + asm volatile ("mrs %0, ID_AA64ISAR0_EL1" : "=r" (isar0)); + return AT_FEAT_FIELD (isar0) >= 3; +} + return false; } -/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20]. The - expected value is 0b0011. Check that. */ +/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20]. + The minimum value for LRCPC3 is 0b0011. */ static inline bool has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features) { - if (hwcap & _IFUNC_ARG_HWCAP - && features->_hwcap2 & HWCAP2_LRCPC3) -return true; - /* Try fallback feature check method to guarantee LRCPC3 is not implemented. - - In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return. - If feature check available, check LSE2 prerequisite before proceeding. */ - if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT)) + /* LSE2 is a prerequisite for atomic LDIAPP/STILP - check HWCAP_USCAT since + has_lse2 is more expensive and Neoverse N1 does not have LRCPC3. */ + if (!(hwcap & HWCAP_USCAT)) return false; - unsigned long isar1; - asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1)); - if (AT_FEAT_FIELD (isar1) >= 3) + + if (hwcap & _IFUNC_ARG_HWCAP && features->_hwcap2 & HWCAP2_LRCPC3) return true; + + if (hwcap & HWCAP_CPUID) +{ + unsigned long isar1; + asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1)); + return AT_FEAT_FIELD (isar1) >= 3; +} + return false; }
[gcc r15-6803] c++: add fixed test [PR118391]
https://gcc.gnu.org/g:d201715989ce2f110e09cd15246969f9fcae5c61 commit r15-6803-gd201715989ce2f110e09cd15246969f9fcae5c61 Author: Marek Polacek Date: Fri Jan 10 15:05:00 2025 -0500 c++: add fixed test [PR118391] Fixed by r15-6740. PR c++/118391 gcc/testsuite/ChangeLog: * g++.dg/cpp2a/lambda-uneval20.C: New test. Diff: --- gcc/testsuite/g++.dg/cpp2a/lambda-uneval20.C | 15 +++ 1 file changed, 15 insertions(+) diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-uneval20.C b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval20.C new file mode 100644 index ..fa8b504da9db --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval20.C @@ -0,0 +1,15 @@ +// PR c++/118391 +// { dg-do compile { target c++20 } } + +template +using A = int; + +template +using B = decltype([] {}.template operator()()); + +template +using C = A>; + +C x; + +int main() {}
[gcc(refs/vendors/redhat/heads/gcc-14-branch)] Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9706ee9' into redhat/gcc-14-branch
https://gcc.gnu.org/g:e525669e462dd777a1af9932fe9188937acdeb69 commit e525669e462dd777a1af9932fe9188937acdeb69 Merge: b84ce6a258e0 a2de88e5d49f Author: Jakub Jelinek Date: Fri Jan 10 19:56:03 2025 +0100 Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9706ee9' into redhat/gcc-14-branch Diff: gcc/ChangeLog | 768 + gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog |70 + gcc/ada/exp_aggr.adb |32 +- gcc/ada/freeze.adb |26 +- gcc/ada/gcc-interface/trans.cc |10 +- gcc/ada/gnatvsn.ads| 3 +- gcc/ada/libgnarl/s-taprop__dummy.adb |11 +- gcc/ada/libgnat/a-ngrear.adb |24 +- gcc/ada/par-ch6.adb| 1 + gcc/ada/version.c | 5 +- gcc/analyzer/ChangeLog |43 + gcc/analyzer/analyzer.cc |15 +- gcc/analyzer/analyzer.h| 4 +- gcc/analyzer/engine.cc | 2 +- gcc/analyzer/kf.cc |26 + gcc/analyzer/known-function-manager.cc |38 +- gcc/analyzer/known-function-manager.h | 5 + gcc/analyzer/sm-file.cc| 8 + gcc/analyzer/sm-malloc.cc | 1 + gcc/analyzer/sm-signal.cc |11 +- gcc/builtins.cc|42 +- gcc/c-family/ChangeLog |29 + gcc/c-family/c-common.cc | 8 +- gcc/c-family/c-cppbuiltin.cc |13 +- gcc/c/ChangeLog|10 + gcc/c/c-parser.cc |12 +- gcc/cfgexpand.cc | 7 +- gcc/common/config/i386/cpuinfo.h | 1 + gcc/config/aarch64/aarch64-cores.def | 7 + gcc/config/aarch64/aarch64-sve-builtins-base.cc|11 +- gcc/config/aarch64/aarch64-sve-builtins-sve2.cc| 8 +- gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 4 +- gcc/config/aarch64/aarch64-sve-builtins-sve2.h | 4 +- gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- gcc/config/aarch64/aarch64-sve2.md | 8 +- gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/config/aarch64/aarch64.cc | 1 + gcc/config/aarch64/tuning_models/fujitsu_monaka.h |65 + gcc/config/arm/arm-mve-builtins.cc |42 +- gcc/config/arm/arm-protos.h| 1 + gcc/config/arm/arm.cc |24 + gcc/config/arm/arm_mve.h | 4 + gcc/config/arm/arm_mve_types.h | 4 + gcc/config/arm/constraints.md | 8 +- gcc/config/arm/predicates.md | 4 + gcc/config/arm/sync.md | 2 +- gcc/config/avr/avr.cc | 168 +- gcc/config/avr/avr.md | 7 +- gcc/config/i386/i386-builtin.def |10 +- gcc/config/i386/i386-expand.cc |12 + gcc/config/i386/i386.md| 2 +- gcc/config/i386/mmx.md |92 +- gcc/config/i386/sse.md | 5 +- gcc/config/loongarch/lasx.md | 2 +- gcc/config/loongarch/lasxintrin.h | 4 +- gcc/config/loongarch/loongarch-builtins.cc | 4 +- gcc/config/loongarch/lsx.md| 2 +- gcc/config/loongarch/lsxintrin.h | 4 +- gcc/config/pa/pa.cc| 1 + gcc/config/pa/pa.md|22 +- gcc/config/pa/predicates.md| 2 +- gcc/config/s390/s390.cc| 2 +- gcc/config/v850/v850.opt.urls |81 +- gcc/config/vax/vax.opt.urls|21 +- gcc/cp/ChangeLog | 208 + gcc/cp/call.cc |10 +- gcc/cp/constexpr.cc|27 +- gcc/cp/constraint.cc | 4 +- gcc/cp/decl.cc |13 +- gcc/cp/init.cc |21 +- gcc/cp/logic.cc|68 +- gcc/cp/module.cc |19 +- gcc/cp/parser.cc | 9 +- gcc/cp/pt.cc
[gcc/redhat/heads/gcc-14-branch] (322 commits) Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9
The branch 'redhat/heads/gcc-14-branch' was updated to point to: e525669e462d... Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9 It previously pointed to: b84ce6a258e0... Merge commit 'r14-10877-g2a9fbe1920779b65eb817db7ce0c60096b Diff: Summary of changes (added commits): --- e525669... Merge commit 'r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9 a2de88e... testsuite: arm: Add pattern for armv8-m.base to cmse-15.c t (*) d0191d1... Disable a broken multiversioning optimisation (*) 02b1172... tree-optimization/117912 - bogus address equivalences for _ (*) 91b524a... doc: cpp: fix version test example syntax (*) 1bf4bfc... Daily bump. (*) a4c0f16... libstdc++: Use feature test macro for pmr::polymorphic_allo (*) e6d2bcf... libstdc++: Improve Doxygen docs for std::allocator_traits s (*) 734d7da... libstdc++: Undeprecate std::pmr::polymorphic_allocator::des (*) 72fe42c... libstdc++: Give std::memory_order a fixed underlying type [ (*) d05d583... libstdc++: Fix typo in comment in src/c++17/fs_dir.cc (*) 0cdd4c9... libstdc++: Make std::println use locale from ostream (LWG 4 (*) 2f20d09... libstdc++: Fix some typos and grammatical errors in docs (*) b84070e... libstdc++: Document when std::string::shrink_to_fit was add (*) cfe866e... libstdc++: Remove __builtin_expect from consteval assertion (*) f0eb0ba... libstdc++: Fix parallel std::exclusive_scan [PR108236] (*) 3590d9f... libstdc++: Fix debug containers for constant evaluation [PR (*) 845a0b7... libstdc++: Disable __gnu_debug::__is_singular(T*) in conste (*) 1e696ca... libstdc++: Skip redundant assertions in std::array equality (*) 83fa082... libstdc++: Skip redundant assertions in std::span construct (*) 67c457d... libstdc++: Fix std::deque::insert(pos, first, last) undefin (*) f73ecaf... c++: ICE during requires-expr partial subst [PR118060] (*) 70cea06... c++: constexpr potentiality of CAST_EXPR [PR117925] (*) aa1e19d... c++: relax ICE for unexpected trees during constexpr [PR117 (*) 4dbfc2f... c++: template-id dependence wrt local static arg [PR117792] (*) f236c89... libstdc++: Avoid unnecessary copies in ranges::min/max [PR1 (*) 03d0440... libstdc++: Implement LWG 3563 changes to keys_view and valu (*) 9d650c2... libstdc++: Fix complexity of drop_view::begin() const [PR11 (*) 1f509da... testsuite: arm: Use -Os in memset-inline-8* tests (*) dedaccb... c++: ICE initializing array of aggrs [PR117985] (*) 21600f3... c++: unresolved overload with comma op [PR115430] (*) 3fe6135... c++: noexcept and pointer to member function type [PR113108 (*) df3ae94... c++: ICE with structured bindings and m-d array [PR102594] (*) 078089a... c++: mutable temps in rodata [PR116369] (*) 13242e5... c++: ICE with enum and conversion fn in template [PR115657] (*) da983b3... c++: ICE with reference NSDMI [PR114854] (*) e6dfe71... c++: concept in default argument [PR109859] (*) 61de759... arm: [MVE intrinsics] Fix support for predicate constants [ (*) 409e766... libstdc++: Update references to gcc.gnu.org/onlinedocs (*) a7c5c49... libstdc++: Fix std::future::wait_until for subsecond negati (*) 785ddc2... libstdc++: Add Doxygen docs for std::forward_like (*) 7178e38... libstdc++: Fix incorrect DocBook element in manual (*) da82bf0... c++: Honor complain in cp_build_function_call_vec for check (*) ec9ccda... c++: Diagnose earlier non-static data members with cv conta (*) 1dd428c... warn-access: Fix up matching_alloc_calls_p [PR118024] (*) fbbc1a4... cse: Fix up record_jump_equiv checks [PR117095] (*) 7ae55c2... c++: allow stores to anon union vars to change current unio (*) 5a78e36... docs: Clarify -fsanitize=hwaddress target support [PR117960 (*) 26615af... doloop: Fix up doloop df use [PR116799] (*) b602b32... bitintlower: Fix up ?ROTATE_EXPR lowering [PR117847] (*) 326b6ba... openmp: Add crtoffloadtableS.o and use it [PR117851] (*) 5d71861... docs: Fix up __sync_* documentation [PR117642] (*) 448f84a... builtins: Handle BITINT_TYPE in __builtin_iseqsig folding [ (*) 0183fb1... c: Fix sizeof error recovery [PR117745] (*) 54c381d... builtins: Fix up DFP ICEs on __builtin_fpclassify [PR102674 (*) 59eec2e... builtins: Fix up DFP ICEs on __builtin_is{inf,finite,normal (*) 6b417da... c-family: Yet another fix for _BitInt & __sync_* builtins [ (*) 3190d62... phiopt: Fix a pasto in spaceship_replacement [PR117612] (*) 6d0503f... c-family: Fix ICE with __sync_*_and_* on _BitInt [PR117641] (*) 98eabda... expand: Fix up ICE on VCE from _Complex types to _BitInt [P (*) e3b2c17... bitintlower: Handle PAREN_EXPR [PR117459] (*) 04d7d02... m2: Fix up dependencies some more (*) bef6c31... c++: Fix ICE on constexpr virtual function [PR117317] (*) c56b465... store-merging: Apply --param=store-merging-max-size= in mor (*) 67379c5... store-merging: Don't use sub_byte_op_p mode for empty_ctor_ (*) c4d2308... Daily bump. (*
[gcc r14-11198] testsuite: arm: Add pattern for armv8-m.base to cmse-15.c test
https://gcc.gnu.org/g:a2de88e5d49f7084677ef2728cd99db0a9706ee9 commit r14-11198-ga2de88e5d49f7084677ef2728cd99db0a9706ee9 Author: Torbjörn SVENSSON Date: Tue Jan 7 21:04:17 2025 +0100 testsuite: arm: Add pattern for armv8-m.base to cmse-15.c test Since armv8-m.base uses thumb1 that does not suport sibcall/tailcall, a pattern is needed that uses PUSH/BL/POP sequence instead of a single B instruction to reuse an already existing function in the compile unit. gcc/testsuite/ChangeLog: * gcc.target/arm/cmse/cmse-15.c: Added pattern for armv8-m.base. Signed-off-by: Torbjörn SVENSSON (cherry picked from commit cfd7c54bdfe109f7e801122a093d0d2a85324fc5) Diff: --- gcc/testsuite/gcc.target/arm/cmse/cmse-15.c | 12 1 file changed, 12 insertions(+) diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c index 5188f1d697f1..0e35830c35ea 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c +++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c @@ -65,6 +65,10 @@ int nonsecure1 (ns_foo_t ** ns_foo_p) ** bl __gnu_cmse_nonsecure_call ** | ** b nonsecure0 +** | +** push{r4, lr} +** bl nonsecure0 +** pop {r4, pc} ** ) ** ... */ @@ -129,6 +133,10 @@ int secure1 (s_bar_t ** s_bar_p) ** blx r[0-3] ** | ** b secure0 +** | +** push{r4, lr} +** bl secure0 +** pop {r4, pc} ** ) ** ... */ @@ -146,6 +154,10 @@ int secure2 (s_bar_ptr s_bar_p) ** blx r[0-3] ** | ** b secure1 +** | +** push{r4, lr} +** bl secure1 +** pop {r4, pc} ** ) ** ... */
[gcc r15-6801] testsuite: arm: Add pattern for armv8-m.base to cmse-15.c test
https://gcc.gnu.org/g:cfd7c54bdfe109f7e801122a093d0d2a85324fc5 commit r15-6801-gcfd7c54bdfe109f7e801122a093d0d2a85324fc5 Author: Torbjörn SVENSSON Date: Tue Jan 7 21:04:17 2025 +0100 testsuite: arm: Add pattern for armv8-m.base to cmse-15.c test Since armv8-m.base uses thumb1 that does not suport sibcall/tailcall, a pattern is needed that uses PUSH/BL/POP sequence instead of a single B instruction to reuse an already existing function in the compile unit. gcc/testsuite/ChangeLog: * gcc.target/arm/cmse/cmse-15.c: Added pattern for armv8-m.base. Signed-off-by: Torbjörn SVENSSON Diff: --- gcc/testsuite/gcc.target/arm/cmse/cmse-15.c | 12 1 file changed, 12 insertions(+) diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c index 5188f1d697f1..0e35830c35ea 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c +++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c @@ -65,6 +65,10 @@ int nonsecure1 (ns_foo_t ** ns_foo_p) ** bl __gnu_cmse_nonsecure_call ** | ** b nonsecure0 +** | +** push{r4, lr} +** bl nonsecure0 +** pop {r4, pc} ** ) ** ... */ @@ -129,6 +133,10 @@ int secure1 (s_bar_t ** s_bar_p) ** blx r[0-3] ** | ** b secure0 +** | +** push{r4, lr} +** bl secure0 +** pop {r4, pc} ** ) ** ... */ @@ -146,6 +154,10 @@ int secure2 (s_bar_ptr s_bar_p) ** blx r[0-3] ** | ** b secure1 +** | +** push{r4, lr} +** bl secure1 +** pop {r4, pc} ** ) ** ... */
[gcc r13-9291] c++: ICE with __has_unique_object_representations [PR115476]
https://gcc.gnu.org/g:b0426fbc85333775ef97cc135e87dd6cae876af7 commit r13-9291-gb0426fbc85333775ef97cc135e87dd6cae876af7 Author: Marek Polacek Date: Mon Jun 17 17:53:12 2024 -0400 c++: ICE with __has_unique_object_representations [PR115476] Here we started to ICE with r13-25: in check_trait_type, for "X[]" we return true here: if (kind == 1 && TREE_CODE (type) == ARRAY_TYPE && !TYPE_DOMAIN (type)) return true; // Array of unknown bound. Don't care about completeness. and then end up crashing in record_has_unique_obj_representations: 4836 if (cur != wi::to_offset (sz)) because sz is null. https://eel.is/c++draft/type.traits#tab:meta.unary.prop-row-47-column-3-sentence-1 says that the preconditions for __has_unique_object_representations are: "T shall be a complete type, cv void, or an array of unknown bound" and that "For an array type T, the same result as has_unique_object_representations_v>" so T[] should be treated as T. So we should use kind==2 for the trait. PR c++/115476 gcc/cp/ChangeLog: * semantics.cc (finish_trait_expr) : Move below to call check_trait_type with kind==2. gcc/testsuite/ChangeLog: * g++.dg/cpp1z/has-unique-obj-representations4.C: New test. (cherry picked from commit c314867fc06d475e3c2ace32032e0d72e3915b55) Diff: --- gcc/cp/semantics.cc | 2 +- .../g++.dg/cpp1z/has-unique-obj-representations4.C | 16 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 079ad5c93bf1..886186403691 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12246,7 +12246,6 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_HAS_NOTHROW_COPY: case CPTK_HAS_TRIVIAL_COPY: case CPTK_HAS_TRIVIAL_DESTRUCTOR: -case CPTK_HAS_UNIQUE_OBJ_REPRESENTATIONS: if (!check_trait_type (type1)) return error_mark_node; break; @@ -12256,6 +12255,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_STD_LAYOUT: case CPTK_IS_TRIVIAL: case CPTK_IS_TRIVIALLY_COPYABLE: +case CPTK_HAS_UNIQUE_OBJ_REPRESENTATIONS: if (!check_trait_type (type1, /* kind = */ 2)) return error_mark_node; break; diff --git a/gcc/testsuite/g++.dg/cpp1z/has-unique-obj-representations4.C b/gcc/testsuite/g++.dg/cpp1z/has-unique-obj-representations4.C new file mode 100644 index ..d6949dc7005e --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/has-unique-obj-representations4.C @@ -0,0 +1,16 @@ +// PR c++/115476 +// { dg-do compile { target c++11 } } + +struct X; +static_assert(__has_unique_object_representations(X), ""); // { dg-error "invalid use of incomplete type" } +static_assert(__has_unique_object_representations(X[]), ""); // { dg-error "invalid use of incomplete type" } +static_assert(__has_unique_object_representations(X[1]), ""); // { dg-error "invalid use of incomplete type" } +static_assert(__has_unique_object_representations(X[][1]), ""); // { dg-error "invalid use of incomplete type" } + +struct X { + int x; +}; +static_assert(__has_unique_object_representations(X), ""); +static_assert(__has_unique_object_representations(X[]), ""); +static_assert(__has_unique_object_representations(X[1]), ""); +static_assert(__has_unique_object_representations(X[][1]), "");
[gcc r15-6768] nvptx: Add '__builtin_frame_address(0)' test case
https://gcc.gnu.org/g:86175a64f167e3b1701132fa1684d76230054c36 commit r15-6768-g86175a64f167e3b1701132fa1684d76230054c36 Author: Thomas Schwinge Date: Fri Dec 13 11:40:01 2024 +0100 nvptx: Add '__builtin_frame_address(0)' test case Documenting the status quo. gcc/testsuite/ * gcc.target/nvptx/__builtin_frame_address_0-1.c: New. Diff: --- .../gcc.target/nvptx/__builtin_frame_address_0-1.c | 36 ++ 1 file changed, 36 insertions(+) diff --git a/gcc/testsuite/gcc.target/nvptx/__builtin_frame_address_0-1.c b/gcc/testsuite/gcc.target/nvptx/__builtin_frame_address_0-1.c new file mode 100644 index ..35817769d31f --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/__builtin_frame_address_0-1.c @@ -0,0 +1,36 @@ +/* Document what we do for '__builtin_frame_address(0)'. */ + +/* { dg-do compile } + TODO We can't 'assemble' this -- it's invalid PTX code. */ +/* { dg-options -O3 } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { check-function-bodies {** } {} } } */ + +void sink(void *); + +void f(void) +{ + void *p; + p = __builtin_frame_address(0); + sink(p); +} +/* +** f: +** \.visible \.func f +** { +** { +** \.param\.u64 %out_arg1; +** st\.param\.u64 \[%out_arg1\], %frame; +** call sink, \(%out_arg1\); +** } +** ret; +*/ + +/* The concept of a '%frame' pointer doesn't apply like this for + '-mno-soft-stack': PTX "native" stacks (TODO), and for '-msoft-stack' in + this form also constitutes invalid PTX code (TODO). + + { dg-final { scan-assembler-not {%frame} { xfail *-*-* } } } */ + +/* As this is an internal-use built-in function, we don't bother with + emitting proper error diagnostics. */
[gcc r14-11196] tree-optimization/117912 - bogus address equivalences for __builtin_object_size
https://gcc.gnu.org/g:02b1172dad4c444953868f411fc8dd4483f320e7 commit r14-11196-g02b1172dad4c444953868f411fc8dd4483f320e7 Author: Richard Biener Date: Thu Dec 5 10:47:13 2024 +0100 tree-optimization/117912 - bogus address equivalences for __builtin_object_size VN again is the culprit for exploiting address equivalences before __builtin_object_size got the chance to do its job. This time it isn't about union members but adjacent structure fields where an address to one after the last element of an array field can spill over to the next field. The following protects all out-of-bound accesses on the upper bound side (singling out TYPE_MAX_VALUE + 1 is more expensive). It ignores other out-of-bound addresses that would invoke UB. Zero-sized arrays are a bit awkward because the C++ represents them with a -1U upper bound. There's a similar issue for zero-sized components whose address can be the same as the adjacent field in C. PR tree-optimization/117912 * tree-ssa-sccvn.cc (copy_reference_ops_from_ref): For addresses of zero-sized components do not set ->off if the object size pass didn't run. For OOB ARRAY_REF accesses in address expressions avoid setting ->off if the object size pass didn't run. (valueize_refs_1): Likewise. * c-c++-common/torture/pr117912-1.c: New testcase. * c-c++-common/torture/pr117912-2.c: Likewise. * c-c++-common/torture/pr117912-3.c: Likewise. (cherry picked from commit 233972ab3b5338d7a5d1d7af9108c1f366170e44) Diff: --- gcc/testsuite/c-c++-common/torture/pr117912-1.c | 28 gcc/testsuite/c-c++-common/torture/pr117912-2.c | 28 gcc/testsuite/c-c++-common/torture/pr117912-3.c | 61 + gcc/tree-ssa-sccvn.cc | 51 +++-- 4 files changed, 164 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/c-c++-common/torture/pr117912-1.c b/gcc/testsuite/c-c++-common/torture/pr117912-1.c new file mode 100644 index ..2750585c7f77 --- /dev/null +++ b/gcc/testsuite/c-c++-common/torture/pr117912-1.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +struct S { int a; int b[24]; int c[24]; int d; }; +volatile int *p; + +void __attribute__((noipa)) +bar (int *q) +{ + p = q; +} + +__SIZE_TYPE__ __attribute__((noipa)) +foo (struct S *p) +{ + bar (&p->b[24]); + bar (&p->c[0]); + return __builtin_object_size (&p->c[0], 1); +} + +int +main() +{ + struct S s; + __SIZE_TYPE__ x = foo (&s); + if (x < sizeof (int) * 24) +__builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/c-c++-common/torture/pr117912-2.c b/gcc/testsuite/c-c++-common/torture/pr117912-2.c new file mode 100644 index ..a3a621575635 --- /dev/null +++ b/gcc/testsuite/c-c++-common/torture/pr117912-2.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +struct S { int a; int b[0]; int c[24]; int d; }; +volatile int *p; + +void __attribute__((noipa)) +bar (int *q) +{ + p = q; +} + +__SIZE_TYPE__ __attribute__((noipa)) +foo (struct S *p) +{ + bar (&p->b[0]); + bar (&p->c[0]); + return __builtin_object_size (&p->c[0], 1); +} + +int +main() +{ + struct S s; + __SIZE_TYPE__ x = foo (&s); + if (x < sizeof (int) * 24) +__builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/c-c++-common/torture/pr117912-3.c b/gcc/testsuite/c-c++-common/torture/pr117912-3.c new file mode 100644 index ..64e981d2a5e7 --- /dev/null +++ b/gcc/testsuite/c-c++-common/torture/pr117912-3.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-additional-options "-std=gnu++20" { target c++ } } */ + +struct B {}; +struct A { int a; +#ifdef __cplusplus + [[no_unique_address]] +#endif + struct B b; + char c[]; }; +volatile void *p; + +void __attribute__((noipa)) +bar (void *q) +{ + p = q; +} + +__SIZE_TYPE__ __attribute__((noipa)) +foo (struct A *p) +{ + bar (&p->b); + bar (&p->c); + return __builtin_object_size (&p->c, 1); +} + +__SIZE_TYPE__ __attribute__((noipa)) +baz (void) +{ + struct A *p = (struct A *) __builtin_malloc (__builtin_offsetof (struct A, c) + 64); + bar (&p->b); + bar (&p->c); + return __builtin_object_size (&p->c, 1); +} + +__SIZE_TYPE__ __attribute__((noipa)) +qux (struct A *p) +{ + bar (&p->b); + bar (&p->c); + return __builtin_object_size (&p->c, 3); +} + +__SIZE_TYPE__ __attribute__((noipa)) +boo (void) +{ + struct A *p = (struct A *) __builtin_malloc (__builtin_offsetof (struct A, c) + 64); + bar (&p->b); + bar (&p->c); + return __builtin_object_size (&p->c, 3); +} + +int +main () +{ + static struct A a = { .a = 1, .b = {}, .c = { 1, 2, 3, 4, 0 } }; + if (foo (&a) < 5) +__builtin_abort (); + if (baz () < 64) +__builtin_abort (); +} diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index 0b5c638df455..ff27b75313e0 100644 --- a/gcc/tree-ssa-sccvn.cc
[gcc r15-6804] libstdc++: Fix unused parameter warnings in
https://gcc.gnu.org/g:c9353e0fcd0ddc0d48ae8a2b0518f0f82670d708 commit r15-6804-gc9353e0fcd0ddc0d48ae8a2b0518f0f82670d708 Author: Jonathan Wakely Date: Fri Jan 10 10:32:22 2025 + libstdc++: Fix unused parameter warnings in This fixes warnings like the following during bootstrap: sparc-sun-solaris2.11/libstdc++-v3/include/bits/atomic_futex.h:324:53: warning: unused parameter ‘__mo’ [-Wunused-parameter] 324 | _M_load_when_equal(unsigned __val, memory_order __mo) |~^~~~ libstdc++-v3/ChangeLog: * include/bits/atomic_futex.h (__atomic_futex_unsigned): Remove names of unused parameters in non-futex implementation. Diff: --- libstdc++-v3/include/bits/atomic_futex.h | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libstdc++-v3/include/bits/atomic_futex.h b/libstdc++-v3/include/bits/atomic_futex.h index e69420d23055..9326cba67153 100644 --- a/libstdc++-v3/include/bits/atomic_futex.h +++ b/libstdc++-v3/include/bits/atomic_futex.h @@ -305,14 +305,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { } _GLIBCXX_ALWAYS_INLINE unsigned -_M_load(memory_order __mo) +_M_load(memory_order) { unique_lock __lock(_M_mutex); return _M_data; } _GLIBCXX_ALWAYS_INLINE unsigned -_M_load_when_not_equal(unsigned __val, memory_order __mo) +_M_load_when_not_equal(unsigned __val, memory_order) { unique_lock __lock(_M_mutex); while (_M_data == __val) @@ -321,7 +321,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } _GLIBCXX_ALWAYS_INLINE void -_M_load_when_equal(unsigned __val, memory_order __mo) +_M_load_when_equal(unsigned __val, memory_order) { unique_lock __lock(_M_mutex); while (_M_data != __val) @@ -330,7 +330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template _GLIBCXX_ALWAYS_INLINE bool - _M_load_when_equal_for(unsigned __val, memory_order __mo, + _M_load_when_equal_for(unsigned __val, memory_order, const chrono::duration<_Rep, _Period>& __rtime) { unique_lock __lock(_M_mutex); @@ -340,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template _GLIBCXX_ALWAYS_INLINE bool - _M_load_when_equal_until(unsigned __val, memory_order __mo, + _M_load_when_equal_until(unsigned __val, memory_order, const chrono::time_point<_Clock, _Duration>& __atime) { unique_lock __lock(_M_mutex); @@ -349,7 +349,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } _GLIBCXX_ALWAYS_INLINE void -_M_store_notify_all(unsigned __val, memory_order __mo) +_M_store_notify_all(unsigned __val, memory_order) { unique_lock __lock(_M_mutex); _M_data = __val;
[gcc r15-6806] AArch64: correct Cortex-X4 MIDR
https://gcc.gnu.org/g:ddcfae1d1dfe5875875c9897f0dda14e342b2534 commit r15-6806-gddcfae1d1dfe5875875c9897f0dda14e342b2534 Author: Tamar Christina Date: Fri Jan 10 21:13:50 2025 + AArch64: correct Cortex-X4 MIDR The Parts Num field for the MIDR for Cortex-X4 is wrong. It's currently the parts number for a Cortex-A720 (which does have the right number). The correct number can be found in the Cortex-X4 Technical Reference Manual [1] on page 382 in Issue Number 5. [1] https://developer.arm.com/documentation/102484/latest/ gcc/ChangeLog: * config/aarch64/aarch64-cores.def (AARCH64_CORE): Fix cortex-x4 parts num. Diff: --- gcc/config/aarch64/aarch64-cores.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index caf61437d180..5ac81332b67c 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -193,7 +193,7 @@ AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8M AARCH64_CORE("cortex-x3", cortexx3, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversev2, 0x41, 0xd4e, -1) -AARCH64_CORE("cortex-x4", cortexx4, cortexa57, V9_2A, (SVE2_BITPERM, MEMTAG, PROFILE), neoversev3, 0x41, 0xd81, -1) +AARCH64_CORE("cortex-x4", cortexx4, cortexa57, V9_2A, (SVE2_BITPERM, MEMTAG, PROFILE), neoversev3, 0x41, 0xd82, -1) AARCH64_CORE("cortex-x925", cortexx925, cortexa57, V9_2A, (SVE2_BITPERM, MEMTAG, PROFILE), cortexx925, 0x41, 0xd85, -1) AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1)
[gcc r15-6807] vect: Force alignment peeling to vectorize more early break loops [PR118211]
https://gcc.gnu.org/g:68326d5d1a593dc0bf098c03aac25916168bc5a9 commit r15-6807-g68326d5d1a593dc0bf098c03aac25916168bc5a9 Author: Alex Coplan Date: Mon Mar 11 13:09:10 2024 + vect: Force alignment peeling to vectorize more early break loops [PR118211] This allows us to vectorize more loops with early exits by forcing peeling for alignment to make sure that we're guaranteed to be able to safely read an entire vector iteration without crossing a page boundary. To make this work for VLA architectures we have to allow compile-time non-constant target alignments. We also have to override the result of the target's preferred_vector_alignment hook if it isn't a power-of-two multiple of the TYPE_SIZE of the chosen vector type. gcc/ChangeLog: PR tree-optimization/118211 PR tree-optimization/116126 * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): Set need_peeling_for_alignment flag on read DRs instead of failing vectorization. Punt on gathers. (dr_misalignment): Handle non-constant target alignments. (vect_compute_data_ref_alignment): If need_peeling_for_alignment flag is set on the DR, then override the target alignment chosen by the preferred_vector_alignment hook to choose a safe alignment. (vect_supportable_dr_alignment): Override support_vector_misalignment hook if need_peeling_for_alignment is set on the DR: in this case we must return dr_unaligned_unsupported in order to force peeling. * tree-vect-loop-manip.cc (vect_do_peeling): Allow prolog peeling by a compile-time non-constant amount. * tree-vectorizer.h (dr_vec_info): Add new flag need_peeling_for_alignment. gcc/testsuite/ChangeLog: PR tree-optimization/118211 PR tree-optimization/116126 * gcc.dg/tree-ssa/cunroll-13.c: Don't vectorize. * gcc.dg/tree-ssa/cunroll-14.c: Likewise. * gcc.dg/unroll-6.c: Likewise. * gcc.dg/tree-ssa/gen-vect-28.c: Likewise. * gcc.dg/vect/vect-104.c: Expect to vectorize. * gcc.dg/vect/vect-early-break_108-pr113588.c: Likewise. * gcc.dg/vect/vect-early-break_109-pr113588.c: Likewise. * gcc.dg/vect/vect-early-break_110-pr113467.c: Likewise. * gcc.dg/vect/vect-early-break_3.c: Likewise. * gcc.dg/vect/vect-early-break_65.c: Likewise. * gcc.dg/vect/vect-early-break_8.c: Likewise. * gfortran.dg/vect/vect-5.f90: Likewise. * gfortran.dg/vect/vect-8.f90: Likewise. * gcc.dg/vect/vect-switch-search-line-fast.c: Co-Authored-By: Tamar Christina Diff: --- gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c| 1 + gcc/testsuite/gcc.dg/unroll-6.c| 2 +- gcc/testsuite/gcc.dg/vect/vect-104.c | 1 + .../gcc.dg/vect/vect-early-break_108-pr113588.c| 2 +- .../gcc.dg/vect/vect-early-break_109-pr113588.c| 2 +- .../gcc.dg/vect/vect-early-break_110-pr113467.c| 2 +- gcc/testsuite/gcc.dg/vect/vect-early-break_3.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-early-break_65.c| 2 +- gcc/testsuite/gcc.dg/vect/vect-early-break_8.c | 2 +- .../gcc.dg/vect/vect-switch-search-line-fast.c | 3 +- gcc/testsuite/gfortran.dg/vect/vect-5.f90 | 1 + gcc/testsuite/gfortran.dg/vect/vect-8.f90 | 5 +- gcc/tree-vect-data-refs.cc | 113 ++--- gcc/tree-vect-loop-manip.cc| 6 -- gcc/tree-vectorizer.h | 5 + 17 files changed, 119 insertions(+), 34 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c index 98cb56a8564b..154e2963f12d 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fgimple -fdump-tree-cunroll-blocks-details" } */ +/* { dg-options "-O3 -fgimple -fdump-tree-cunroll-blocks-details -fno-tree-vectorize" } */ #if __SIZEOF_INT__ < 4 __extension__ typedef __INT32_TYPE__ i32; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c index 5f112da310c8..4b369f7ad278 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fdump-tree-cunroll-blocks-details" } */ +/* { dg-options "-O3 -fdump-tree-cunroll-blocks-details -fno-tree-vectorize" } */ struct a {int a[100];}; void t(struct a *a) diff --git a/gcc/testsuite/
[gcc r15-6811] vect: Also cost gconds for scalar [PR118211]
https://gcc.gnu.org/g:086031c058598512d09bf898e4db3735b3e1f22c commit r15-6811-g086031c058598512d09bf898e4db3735b3e1f22c Author: Alex Coplan Date: Mon Jun 24 13:54:48 2024 +0100 vect: Also cost gconds for scalar [PR118211] Currently we only cost gconds for the vector loop while we omit costing them when analyzing the scalar loop; this unfairly penalizes the vector loop in the case of loops with early exits. This (together with the previous patches) enables us to vectorize std::find with 64-bit element sizes. gcc/ChangeLog: PR tree-optimization/118211 PR tree-optimization/116126 * tree-vect-loop.cc (vect_compute_single_scalar_iteration_cost): Don't skip over gconds. Diff: --- gcc/tree-vect-loop.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index bb1138bfcfba..edd7d4d87630 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1688,7 +1688,9 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) gimple *stmt = gsi_stmt (si); stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); - if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) + if (!is_gimple_assign (stmt) + && !is_gimple_call (stmt) + && !is_a (stmt)) continue; /* Skip stmts that are not vectorized inside the loop. */
[gcc r15-6810] vect: Ensure we add vector skip guard even when versioning for aliasing [PR118211]
https://gcc.gnu.org/g:f4e259b4a66c81c234608056117836e13606e4c8 commit r15-6810-gf4e259b4a66c81c234608056117836e13606e4c8 Author: Alex Coplan Date: Thu Jul 25 16:34:05 2024 + vect: Ensure we add vector skip guard even when versioning for aliasing [PR118211] This fixes a latent wrong code issue whereby vect_do_peeling determined the wrong condition for inserting the vector skip guard. Specifically in the case where the loop niters are unknown at compile time we used to check: !LOOP_REQUIRES_VERSIONING (loop_vinfo) but LOOP_REQUIRES_VERSIONING is true for loops which we have versioned for aliasing, and that has nothing to do with prolog peeling. I think this condition should instead be checking specifically if we aren't versioning for alignment. As it stands, when we version for alignment, we don't peel, so the vector skip guard is indeed redundant in that case. With the testcase added (reduced from the Fortran frontend) we would version for aliasing, omit the vector skip guard, and then at runtime we would peel sufficient iterations for alignment that there wasn't a full vector iteration left when we entered the vector body, thus overflowing the output buffer. gcc/ChangeLog: PR tree-optimization/118211 PR tree-optimization/116126 * tree-vect-loop-manip.cc (vect_do_peeling): Adjust skip_vector condition to only omit the edge if we're versioning for alignment. gcc/testsuite/ChangeLog: PR tree-optimization/118211 PR tree-optimization/116126 * gcc.dg/vect/vect-early-break_130.c: New test. Diff: --- gcc/testsuite/gcc.dg/vect/vect-early-break_130.c | 91 gcc/tree-vect-loop-manip.cc | 2 +- 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c new file mode 100644 index ..ce43fcd5681c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c @@ -0,0 +1,91 @@ +/* { dg-require-effective-target mmap } */ +/* { dg-add-options vect_early_break } */ + +#include +#include +#include +#include +#include + +/* This was reduced from gcc/fortran/scanner.cc:gfc_widechar_to_char. + The problem was that we omitted adding the vector skip guard when + versioning for aliasing. When invoked on a string that is 28 bytes + long, that caused us to enter the vector body after having peeled 15 + iterations, leaving only 13 iterations to be performed as vector, but + the vector body performs 16 (thus overflowing the res buffer by three + bytes). */ +__attribute__((noipa)) +void f (const uint32_t *s, char *res, int length) +{ + unsigned long i; + + for (i = 0; i < length; i++) +{ + if (s[i] > 255) +__builtin_abort (); + res[i] = (char)s[i]; +} +} + +int main(void) +{ + long pgsz = sysconf (_SC_PAGESIZE); + if (pgsz == -1) { +fprintf (stderr, "sysconf failed: %m\n"); +return 0; + } + + void *p = mmap (NULL, + pgsz * 2, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) { +fprintf (stderr, "mmap failed: %m\n"); +return 0; + } + + if (mprotect (p + pgsz, pgsz, PROT_NONE)) { +fprintf (stderr, "mprotect failed: %m\n"); +return 0; + } + + uint32_t in[128]; + memset (in, 0, sizeof(in)); + + uintptr_t x = (uintptr_t)in; + + /* We want to make our input pointer maximally misaligned (so we have + to peel the greatest possible number of iterations for alignment). + We need two bits of alignment for our uint32_t pointer to be + aligned. Assuming we process 16 chars per vector iteration, we + will need to load 16 uint32_ts, thus we need a further 4 bits of + alignment. */ + const uintptr_t align_bits = 2 + 4; + const uintptr_t align_p2 = (1 << align_bits); + const uintptr_t align_p2m1 = align_p2 - 1; + + if (x & align_p2m1 <= 4) +x &= -align_p2; /* Round down. */ + else +x = (x + align_p2m1) & -align_p2; /* Round up. */ + + /* Add one uint32_t to get maximally misaligned. */ + uint32_t *inp = (uint32_t *)x + 1; + + const char *str = "dec-comparison-complex_1.f90"; + long n; +#pragma GCC novector + for (n = 0; str[n]; n++) +inp[n] = str[n]; + + if (n > pgsz) +__builtin_abort (); + + char *buf = p + pgsz - n; + f (inp, buf, n); + +#pragma GCC novector + for (int i = 0; i < n; i++) +if (buf[i] != str[i]) + __builtin_abort (); +} diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 9a55a5611ccc..06ca99eaab95 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3271,7 +3271,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
[gcc r15-6809] vect: Fix dominators when adding a guard to skip the vector loop [PR118211]
https://gcc.gnu.org/g:f1c6789ab6c5443ccefab96c74b0e862119d1781 commit r15-6809-gf1c6789ab6c5443ccefab96c74b0e862119d1781 Author: Tamar Christina Date: Mon Jul 8 12:16:11 2024 +0100 vect: Fix dominators when adding a guard to skip the vector loop [PR118211] The alignment peeling changes exposed a latent missing dominator update with early break vectorization, specifically when inserting the vector skip edge, since the new edge bypasses the prolog skip block and thus has the potential to subvert its dominance. This patch fixes that. gcc/ChangeLog: PR tree-optimization/118211 PR tree-optimization/116126 * tree-vect-loop-manip.cc (vect_do_peeling): Update immediate dominators of nodes that were dominated by the prolog skip block after inserting vector skip edge. Initialize prolog variable to NULL to avoid bogus -Wmaybe-uninitialized during bootstrap. gcc/testsuite/ChangeLog: PR tree-optimization/118211 PR tree-optimization/116126 * g++.dg/vect/vect-early-break_6.cc: New test. Co-Authored-By: Alex Coplan Diff: --- gcc/testsuite/g++.dg/vect/vect-early-break_6.cc | 25 gcc/tree-vect-loop-manip.cc | 26 - 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/vect/vect-early-break_6.cc b/gcc/testsuite/g++.dg/vect/vect-early-break_6.cc new file mode 100644 index ..fdd9af832a74 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/vect-early-break_6.cc @@ -0,0 +1,25 @@ +// { dg-do compile } +// ICE in verify_dominators, reduced from charset.cc (libstdc++). + +void convert_escape(int *); +int cpp_interpret_string_1_to, cpp_interpret_string_1_tbuf; +char *cpp_interpret_string_1_base; +char cpp_interpret_string_1_limit; +void cpp_interpret_string_1() { + char *p; + for (;;) { +cpp_interpret_string_1_base = p; +while (p < &cpp_interpret_string_1_limit && *p) + p++; +if (p > cpp_interpret_string_1_base) + if (cpp_interpret_string_1_to) +goto fail; +if (p >= &cpp_interpret_string_1_limit) + break; +int *tbuf_ptr = +cpp_interpret_string_1_to ? &cpp_interpret_string_1_tbuf : __null; +convert_escape(tbuf_ptr); + } +fail: + ; +} diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 4505e5d87ddb..9a55a5611ccc 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3197,7 +3197,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, prob_prolog = prob_epilog = profile_probability::guessed_always () .apply_scale (estimated_vf - 1, estimated_vf); - class loop *prolog, *epilog = NULL; + class loop *prolog = NULL, *epilog = NULL; class loop *first_loop = loop; bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP; @@ -3464,6 +3464,30 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, skip_e = guard_e; e = EDGE_PRED (guard_to, 0); e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); + + /* Handle any remaining dominator updates needed after +inserting the loop skip edge above. */ + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + && prolog_peeling) + { + /* Adding a skip edge to skip a loop with multiple exits +means the dominator of the join blocks for all exits shifts +from the prolog skip guard to the loop skip guard. */ + auto prolog_skip_bb + = single_pred (loop_preheader_edge (prolog)->src); + auto needs_update + = get_dominated_by (CDI_DOMINATORS, prolog_skip_bb); + + /* Update everything except for the immediate children of +the prolog skip block (the prolog and vector preheaders). +Those should remain dominated by the prolog skip block itself, +since the loop guard edge goes to the epilogue. */ + for (auto bb : needs_update) + if (bb != EDGE_SUCC (prolog_skip_bb, 0)->dest + && bb != EDGE_SUCC (prolog_skip_bb, 1)->dest) + set_immediate_dominator (CDI_DOMINATORS, bb, guard_bb); + } + slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e); /* Simply propagate profile info from guard_bb to guard_to which is
[gcc r15-6808] vect: Don't guard scalar epilogue for inverted loops [PR118211]
https://gcc.gnu.org/g:0a46245174123ad2802753e7fee689a541570ca0 commit r15-6808-g0a46245174123ad2802753e7fee689a541570ca0 Author: Alex Coplan Date: Fri Jun 7 11:13:02 2024 + vect: Don't guard scalar epilogue for inverted loops [PR118211] For loops with LOOP_VINFO_EARLY_BREAKS_VECT_PEELED we should always enter the scalar epilogue, so avoid emitting a guard on entry to the epilogue. gcc/ChangeLog: PR tree-optimization/118211 PR tree-optimization/116126 * tree-vect-loop-manip.cc (vect_do_peeling): Avoid emitting an epilogue guard for inverted early-exit loops. Diff: --- gcc/tree-vect-loop-manip.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 15cac0fe27df..4505e5d87ddb 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3530,7 +3530,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, /* If we have a peeled vector iteration we will never skip the epilog loop and we can simplify the cfg a lot by not doing the edge split. */ - if (skip_epilog || LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + if (skip_epilog + || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))) { guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, niters, niters_vector_mult_vf);