[gcc r15-6653] [PR testsuite/118055] Trivial testsuite adjustment for m68k target
https://gcc.gnu.org/g:a856b4d97b8d328fdcb169b792ac5456e40f8c00 commit r15-6653-ga856b4d97b8d328fdcb169b792ac5456e40f8c00 Author: Jeff Law Date: Tue Jan 7 07:43:19 2025 -0700 [PR testsuite/118055] Trivial testsuite adjustment for m68k target After a bit of a prod from Hans... Make the obvious change to these tests to get them passing again on m68k. PR testsuite/118055 gcc/testsuite * gcc.dg/tree-ssa/pr83403-1.c: Add m68k*-*-* to targets needing additional arguments for peeling. * gcc.dg/tree-ssa/pr83403-2.c: Similarly. Diff: --- gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c index 3cfda4f183cd..64f2bbc76fee 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ /* { dg-additional-options "--param max-completely-peeled-insns=200" { target { s390*-*-* } } } */ -/* { dg-additional-options "--param max-completely-peeled-insns=300" { target { arm*-*-* cris-*-* } } } */ +/* { dg-additional-options "--param max-completely-peeled-insns=300" { target { arm*-*-* cris-*-* m68k*-*-* } } } */ #define TYPE unsigned int diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c index 00fa04ecb851..3f520720ca2f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ /* { dg-additional-options "--param max-completely-peeled-insns=200" { target { s390*-*-* } } } */ -/* { dg-additional-options "--param max-completely-peeled-insns=300" { target { arm*-*-* cris-*-* } } } */ +/* { dg-additional-options "--param max-completely-peeled-insns=300" { target { arm*-*-* cris-*-* m68k*-*-* } } } */ #define TYPE int
[gcc r15-6654] cfgexpand: Factor out getting the stack decl index
https://gcc.gnu.org/g:4b1a2878ba3241ec5c0a1bf05ff47bfcd09c3711 commit r15-6654-g4b1a2878ba3241ec5c0a1bf05ff47bfcd09c3711 Author: Andrew Pinski Date: Fri Nov 15 20:22:02 2024 -0800 cfgexpand: Factor out getting the stack decl index This is the first patch in improving this code. Since there are a few places which get the index and they check the same thing, let's factor that out into one function. Bootstrapped and tested on x86_64-linux-gnu. gcc/ChangeLog: * cfgexpand.cc (INVALID_STACK_INDEX): New defined. (decl_stack_index): New function. (visit_op): Use decl_stack_index. (visit_conflict): Likewise. (add_scope_conflicts_1): Likewise. Signed-off-by: Andrew Pinski Diff: --- gcc/cfgexpand.cc | 62 +--- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index abab385293a5..cdebb00cd792 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -337,6 +337,8 @@ static unsigned stack_vars_alloc; static unsigned stack_vars_num; static hash_map *decl_to_stack_part; +#define INVALID_STACK_INDEX ((unsigned)-1) + /* Conflict bitmaps go on this obstack. This allows us to destroy all of them in one big sweep. */ static bitmap_obstack stack_var_bitmap_obstack; @@ -525,6 +527,27 @@ stack_var_conflict_p (unsigned x, unsigned y) return bitmap_bit_p (a->conflicts, y); } +/* Returns the DECL's index into the stack_vars array. + If the DECL does not exist return INVALID_STACK_INDEX. */ +static unsigned +decl_stack_index (tree decl) +{ + if (!decl) +return INVALID_STACK_INDEX; + if (!DECL_P (decl)) +return INVALID_STACK_INDEX; + if (DECL_RTL_IF_SET (decl) != pc_rtx) +return INVALID_STACK_INDEX; + unsigned *v = decl_to_stack_part->get (decl); + if (!v) +return INVALID_STACK_INDEX; + + unsigned indx = *v; + gcc_checking_assert (indx != INVALID_STACK_INDEX); + gcc_checking_assert (indx < stack_vars_num); + return indx; +} + /* Callback for walk_stmt_ops. If OP is a decl touched by add_stack_var enter its partition number into bitmap DATA. */ @@ -533,14 +556,9 @@ visit_op (gimple *, tree op, tree, void *data) { bitmap active = (bitmap)data; op = get_base_address (op); - if (op - && DECL_P (op) - && DECL_RTL_IF_SET (op) == pc_rtx) -{ - unsigned *v = decl_to_stack_part->get (op); - if (v) - bitmap_set_bit (active, *v); -} + unsigned idx = decl_stack_index (op); + if (idx != INVALID_STACK_INDEX) +bitmap_set_bit (active, idx); return false; } @@ -553,20 +571,15 @@ visit_conflict (gimple *, tree op, tree, void *data) { bitmap active = (bitmap)data; op = get_base_address (op); - if (op - && DECL_P (op) - && DECL_RTL_IF_SET (op) == pc_rtx) + unsigned num = decl_stack_index (op); + if (num != INVALID_STACK_INDEX + && bitmap_set_bit (active, num)) { - unsigned *v = decl_to_stack_part->get (op); - if (v && bitmap_set_bit (active, *v)) - { - unsigned num = *v; - bitmap_iterator bi; - unsigned i; - gcc_assert (num < stack_vars_num); - EXECUTE_IF_SET_IN_BITMAP (active, 0, i, bi) - add_stack_var_conflict (num, i); - } + bitmap_iterator bi; + unsigned i; + gcc_assert (num < stack_vars_num); + EXECUTE_IF_SET_IN_BITMAP (active, 0, i, bi) + add_stack_var_conflict (num, i); } return false; } @@ -638,15 +651,14 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict) if (gimple_clobber_p (stmt)) { tree lhs = gimple_assign_lhs (stmt); - unsigned *v; /* Handle only plain var clobbers. Nested functions lowering and C++ front-end inserts clobbers which are not just plain variables. */ if (!VAR_P (lhs)) continue; - if (DECL_RTL_IF_SET (lhs) == pc_rtx - && (v = decl_to_stack_part->get (lhs))) - bitmap_clear_bit (work, *v); + unsigned indx = decl_stack_index (lhs); + if (indx != INVALID_STACK_INDEX) + bitmap_clear_bit (work, indx); } else if (!is_gimple_debug (stmt)) {
[gcc r15-6656] cfgexpand: Handle integral vector types and constructors for scope conflicts [PR105769]
https://gcc.gnu.org/g:4f4722b0722ec343df70e5ec5fd9d5c682ff8149 commit r15-6656-g4f4722b0722ec343df70e5ec5fd9d5c682ff8149 Author: Andrew Pinski Date: Fri Nov 15 20:22:04 2024 -0800 cfgexpand: Handle integral vector types and constructors for scope conflicts [PR105769] This is an expansion of the last patch to also track pointers via vector types and the constructor that are used with vector types. In this case we had: ``` _15 = (long unsigned int) &bias; _10 = (long unsigned int) &cov_jn; _12 = {_10, _15}; ... MEM[(struct vec *)&cov_jn] ={v} {CLOBBER(bob)}; bias ={v} {CLOBBER(bob)}; MEM[(struct function *)&D.6156] ={v} {CLOBBER(bob)}; ... MEM [(void *)&D.6172 + 32B] = _12; MEM[(struct function *)&D.6157] ={v} {CLOBBER(bob)}; ``` Anyways tracking the pointers via vector types to say they are alive at the point where the store of the vector happens fixes the bug by saying it is alive at the same time as another variable is alive. Bootstrapped and tested on x86_64-linux-gnu. PR tree-optimization/105769 gcc/ChangeLog: * cfgexpand.cc (vars_ssa_cache::operator()): For constructors walk over the elements. gcc/testsuite/ChangeLog: * g++.dg/torture/pr105769-1.C: New test. Signed-off-by: Andrew Pinski Diff: --- gcc/cfgexpand.cc | 20 +++-- gcc/testsuite/g++.dg/torture/pr105769-1.C | 67 +++ 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index f6c9f7755a4c..2b27076658fd 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -728,7 +728,7 @@ vars_ssa_cache::operator() (tree name) gcc_assert (TREE_CODE (name) == SSA_NAME); if (!POINTER_TYPE_P (TREE_TYPE (name)) - && !INTEGRAL_TYPE_P (TREE_TYPE (name))) + && !ANY_INTEGRAL_TYPE_P (TREE_TYPE (name))) return empty; if (exists (name)) @@ -758,7 +758,7 @@ vars_ssa_cache::operator() (tree name) continue; if (!POINTER_TYPE_P (TREE_TYPE (use)) - && !INTEGRAL_TYPE_P (TREE_TYPE (use))) + && !ANY_INTEGRAL_TYPE_P (TREE_TYPE (use))) continue; /* Mark the old ssa name needs to be update from the use. */ @@ -772,10 +772,22 @@ vars_ssa_cache::operator() (tree name) so we don't go into an infinite loop for some phi nodes with loops. */ create (use); + gimple *g = SSA_NAME_DEF_STMT (use); + + /* CONSTRUCTOR here is always a vector initialization, +walk each element too. */ + if (gimple_assign_single_p (g) + && TREE_CODE (gimple_assign_rhs1 (g)) == CONSTRUCTOR) + { + tree ctr = gimple_assign_rhs1 (g); + unsigned i; + tree elm; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (ctr), i, elm) + work_list.safe_push (std::make_pair (elm, use)); + } /* For assignments, walk each operand for possible addresses. For PHI nodes, walk each argument. */ - gimple *g = SSA_NAME_DEF_STMT (use); - if (gassign *a = dyn_cast (g)) + else if (gassign *a = dyn_cast (g)) { /* operand 0 is the lhs. */ for (unsigned i = 1; i < gimple_num_ops (g); i++) diff --git a/gcc/testsuite/g++.dg/torture/pr105769-1.C b/gcc/testsuite/g++.dg/torture/pr105769-1.C new file mode 100644 index ..3fe973656b84 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr105769-1.C @@ -0,0 +1,67 @@ +// { dg-do run } + +// PR tree-optimization/105769 + +// The partitioning code would incorrectly have bias +// and a temporary in the same partitioning because +// it was thought bias was not alive when those were alive +// do to vectorization of a store of pointers (that included bias). + +#include + +template +struct vec { + T dat[n]; + vec() {} + explicit vec(const T& x) { for(size_t i = 0; i < n; i++) dat[i] = x; } + T& operator [](size_t i) { return dat[i]; } + const T& operator [](size_t i) const { return dat[i]; } +}; + +template +using mat = vec>; +template +using sq_mat = mat; +using map_t = std::function; +template +using est_t = std::function; +template using est2_t = std::function; +map_t id_map() { return [](size_t j) -> size_t { return j; }; } + +template +est2_t jacknife(const est_t> est, sq_mat& cov, vec& bias) { + return [est, &cov, &bias](map_t map) -> void + { +bias = est(map); +for(size_t i = 0; i < n; i++) +{ + bias[i].print(); +} + }; +} + +template +void print_cov_ratio() { + sq_mat<2, T> cov_jn; + vec<2, T> bias; + jacknife<2, T>([](map_t map) -> vec<2, T> { vec<2, T> retv; retv[0] = 1; retv[1] = 1; return retv; }, cov_jn, bias)(id_map()); +} +struct ab { + long long unsigned a; + short unsigned b; + double operator()() { return a; } + ab& operator=(double rhs) { a = rhs; return *this; } + void print(); +};
[gcc r15-6655] cfgexpand: Rewrite add_scope_conflicts_2 to use cache and look back further [PR111422]
https://gcc.gnu.org/g:0014a858a14b825818d6b557c3d5193f85790bde commit r15-6655-g0014a858a14b825818d6b557c3d5193f85790bde Author: Andrew Pinski Date: Fri Nov 15 20:22:03 2024 -0800 cfgexpand: Rewrite add_scope_conflicts_2 to use cache and look back further [PR111422] After fixing loop-im to do the correct overflow rewriting for pointer types too. We end up with code like: ``` _9 = (unsigned long) &g; _84 = _9 + 18446744073709551615; _11 = _42 + _84; _44 = (signed char *) _11; ... *_44 = 10; g ={v} {CLOBBER(eos)}; ... n[0] = &f; *_44 = 8; g ={v} {CLOBBER(eos)}; ``` Which was not being recongized by the scope conflicts code. This was because it only handled one level walk backs rather than multiple ones. This fixes the issue by having a cache which records all references to addresses of stack variables. Unlike the previous patch, this only records and looks at addresses of stack variables. The cache uses a bitmap and uses the index as the bit to look at. PR middle-end/117426 PR middle-end/111422 gcc/ChangeLog: * cfgexpand.cc (struct vars_ssa_cache): New class. (vars_ssa_cache::vars_ssa_cache): New constructor. (vars_ssa_cache::~vars_ssa_cache): New deconstructor. (vars_ssa_cache::create): New method. (vars_ssa_cache::exists): New method. (vars_ssa_cache::add_one): New method. (vars_ssa_cache::update): New method. (vars_ssa_cache::dump): New method. (add_scope_conflicts_2): Factor mostly out to vars_ssa_cache::operator(). New cache argument. Walk the bitmap cache for the stack variables addresses. (vars_ssa_cache::operator()): New method factored out from add_scope_conflicts_2. Rewrite to be a full walk of all operands and use a worklist. (add_scope_conflicts_1): Add cache new argument for the addr cache. Just call add_scope_conflicts_2 for the phi result instead of calling for the uses and don't call walk_stmt_load_store_addr_ops for phis. Update call to add_scope_conflicts_2 to add cache argument. (add_scope_conflicts): Add cache argument and update calls to add_scope_conflicts_1. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr117426-1.c: New test. Signed-off-by: Andrew Pinski Diff: --- gcc/cfgexpand.cc | 292 ++ gcc/testsuite/gcc.dg/torture/pr117426-1.c | 53 ++ 2 files changed, 308 insertions(+), 37 deletions(-) diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index cdebb00cd792..f6c9f7755a4c 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -584,35 +584,243 @@ visit_conflict (gimple *, tree op, tree, void *data) return false; } -/* Helper function for add_scope_conflicts_1. For USE on - a stmt, if it is a SSA_NAME and in its SSA_NAME_DEF_STMT is known to be - based on some ADDR_EXPR, invoke VISIT on that ADDR_EXPR. */ +/* A cache for ssa name to address of stack variables. + When taking into account if a ssa name refers to an + address of a stack variable, we need to walk the + expressions backwards to find the addresses. This + cache is there so we don't need to walk the expressions + all the time. */ +struct vars_ssa_cache +{ +private: + /* Currently an entry is a bitmap of all of the known stack variables + addresses that are referenced by the ssa name. + When the bitmap is the nullptr, then there is no cache. + Currently only empty bitmaps are shared. + The reason for why empty cache is not just a null is so we know the + cache for an entry is filled in. */ + struct entry + { +bitmap bmap = nullptr; + }; + entry *vars_ssa_caches; +public: -static inline void -add_scope_conflicts_2 (tree use, bitmap work, - walk_stmt_load_store_addr_fn visit) + vars_ssa_cache(); + ~vars_ssa_cache(); + const_bitmap operator() (tree name); + void dump (FILE *file); + +private: + /* Can't copy. */ + vars_ssa_cache(const vars_ssa_cache&) = delete; + vars_ssa_cache(vars_ssa_cache&&) = delete; + + /* The shared empty bitmap. */ + bitmap empty; + + /* Unshare the index, currently only need + to unshare if the entry was empty. */ + void unshare(int indx) + { +if (vars_ssa_caches[indx].bmap == empty) + vars_ssa_caches[indx].bmap = BITMAP_ALLOC (&stack_var_bitmap_obstack); + } + void create (tree); + bool exists (tree use); + void add_one (tree old_name, unsigned); + bool update (tree old_name, tree use); +}; + +/* Constructor of the cache, create the cache array. */ +vars_ssa_cache::vars_ssa_cache () +{ + vars_ssa_caches = new entry[num_ssa_names]{}; + + /* Create the shared empty bitmap too. */ + empty = BITMAP_ALLOC (&stack_var_bitmap_
[gcc r15-6657] perform affine fold to unsigned on non address expressions. [PR114932]
https://gcc.gnu.org/g:405c99c17210a58df1a237219e773e689f17 commit r15-6657-g405c99c17210a58df1a237219e773e689f17 Author: Tamar Christina Date: Mon Jan 6 17:52:14 2025 + perform affine fold to unsigned on non address expressions. [PR114932] When the patch for PR114074 was applied we saw a good boost in exchange2. This boost was partially caused by a simplification of the addressing modes. With the patch applied IV opts saw the following form for the base addressing; Base: (integer(kind=4) *) &block + ((sizetype) ((unsigned long) l0_19(D) * 324) + 36) vs what we normally get: Base: (integer(kind=4) *) &block + ((sizetype) ((integer(kind=8)) l0_19(D) * 81) + 9) * 4 This is because the patch promoted multiplies where one operand is a constant from a signed multiply to an unsigned one, to attempt to fold away the constant. This patch attempts the same but due to the various problems with SCEV and niters not being able to analyze the resulting forms (i.e. PR114322) we can't do it during SCEV or in the general form like in fold-const like extract_muldiv attempts. Instead this applies the simplification during IVopts initialization when we create the IV. This allows IV opts to see the simplified form without influencing the rest of the compiler. as mentioned in PR114074 it would be good to fix the missed optimization in the other passes so we can perform this in general. The reason this has a big impact on Fortran code is that Fortran doesn't seem to have unsigned integer types. As such all it's addressing are created with signed types and folding does not happen on them due to the possible overflow. concretely on AArch64 this changes the results from generation: mov x27, -108 mov x24, -72 mov x23, -36 add x21, x1, x0, lsl 2 add x19, x20, x22 .L5: add x0, x22, x19 add x19, x19, 324 ldr d1, [x0, x27] add v1.2s, v1.2s, v15.2s str d1, [x20, 216] ldr d0, [x0, x24] add v0.2s, v0.2s, v15.2s str d0, [x20, 252] ldr d31, [x0, x23] add v31.2s, v31.2s, v15.2s str d31, [x20, 288] bl digits_20_ cmp x21, x19 bne .L5 into: .L5: ldr d1, [x19, -108] add v1.2s, v1.2s, v15.2s str d1, [x20, 216] ldr d0, [x19, -72] add v0.2s, v0.2s, v15.2s str d0, [x20, 252] ldr d31, [x19, -36] add x19, x19, 324 add v31.2s, v31.2s, v15.2s str d31, [x20, 288] bl digits_20_ cmp x21, x19 bne .L5 The two patches together results in a 10% performance increase in exchange2 in SPECCPU 2017 and a 4% reduction in binary size and a 5% improvement in compile time. There's also a 5% performance improvement in fotonik3d and similar reduction in binary size. The patch folds every IV to unsigned to canonicalize them. At the end of the pass we match.pd will then remove unneeded conversions. Note that we cannot force everything to unsigned, IVops requires that array address expressions remain as such. Folding them results in them becoming pointer expressions for which some optimizations in IVopts do not run. gcc/ChangeLog: PR tree-optimization/114932 * tree-ssa-loop-ivopts.cc (alloc_iv): Perform affine unsigned fold. gcc/testsuite/ChangeLog: PR tree-optimization/114932 * gcc.dg/tree-ssa/pr64705.c: Update dump file scan. * gcc.target/i386/pr115462.c: The testcase shares 3 IVs which calculates the same thing but with a slightly different increment offset. The test checks for 3 complex addressing loads, one for each IV. But with this change they now all share one IV. That is the loop now only has one complex addressing. This is ultimately driven by the backend costing and the current costing says this is preferred so updating the testcase. * gfortran.dg/addressing-modes_1.f90: New test. Diff: --- gcc/testsuite/gcc.dg/tree-ssa/pr64705.c | 2 +- gcc/testsuite/gcc.target/i386/pr115462.c | 2 +- gcc/testsuite/gfortran.dg/addressing-modes_1.f90 | 37 gcc/tree-ssa-loop-ivopts.cc | 20 ++--- 4 files changed, 49 insertions(+), 12 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr64705.c b/gcc/testsuite/gcc.dg/tree-ssa/pr64705.c index fd24e38a53e9..3c9c2e5deed1 100644
[gcc r15-6658] Only apply adjust_args in OpenMP dispatch if variant substitution occurs
https://gcc.gnu.org/g:aa688dd6302fd9fd4a6ede232bbe63781e672ae9 commit r15-6658-gaa688dd6302fd9fd4a6ede232bbe63781e672ae9 Author: Paul-Antoine Arras Date: Mon Jan 6 17:00:10 2025 +0100 Only apply adjust_args in OpenMP dispatch if variant substitution occurs This is a followup to 084ea8ad584 OpenMP: middle-end support for dispatch + adjust_args. This patch fixes a bug that caused arguments in an OpenMP dispatch call to be modified even when no variant substitution occurred. gcc/ChangeLog: * gimplify.cc (gimplify_call_expr): Create variable variant_substituted_p to control whether adjust_args applies. gcc/testsuite/ChangeLog: * c-c++-common/gomp/adjust-args-4.c: New test. Diff: --- gcc/gimplify.cc | 13 + gcc/testsuite/c-c++-common/gomp/adjust-args-4.c | 24 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index bd324be926ae..251d581f44cd 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -3857,7 +3857,8 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) enum gimplify_status ret; int i, nargs; gcall *call; - bool builtin_va_start_p = false, omp_dispatch_p = false; + bool builtin_va_start_p = false, omp_dispatch_p = false, + variant_substituted_p = false; location_t loc = EXPR_LOCATION (*expr_p); gcc_assert (TREE_CODE (*expr_p) == CALL_EXPR); @@ -4035,7 +4036,10 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) { tree variant = omp_resolve_declare_variant (fndecl); if (variant != fndecl) - CALL_EXPR_FN (*expr_p) = build1 (ADDR_EXPR, fnptrtype, variant); + { + CALL_EXPR_FN (*expr_p) = build1 (ADDR_EXPR, fnptrtype, variant); + variant_substituted_p = true; + } } /* There is a sequence point before the call, so any side effects in @@ -4325,8 +4329,9 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) } } - if ((need_device_ptr && !is_device_ptr) - || (need_device_addr && !has_device_addr)) + if (variant_substituted_p + && ((need_device_ptr && !is_device_ptr) + || (need_device_addr && !has_device_addr))) { if (dispatch_device_num == NULL_TREE) { diff --git a/gcc/testsuite/c-c++-common/gomp/adjust-args-4.c b/gcc/testsuite/c-c++-common/gomp/adjust-args-4.c new file mode 100644 index ..377932e1b9cc --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/adjust-args-4.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fdump-tree-gimple" } */ + +/* Ensure that adjust_args is only applied when variant substitution happens. */ + +void h(int *); +void f(int *); +#pragma omp declare variant(f) match(construct={dispatch}) adjust_args(need_device_ptr : x) +void g(int *x); + +void foo(int *y) +{ + #pragma omp dispatch +h(y); + #pragma omp dispatch +f(y); + #pragma omp dispatch +g(y); +} + +/* { dg-final { scan-tree-dump-times "h \\(y\\);" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "f \\(y\\);" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "D\.\[0-9]+ = __builtin_omp_get_mapped_ptr \\(y, D\.\[0-9]+\\);" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "f \\(D\.\[0-9]+\\);" 1 "gimple" } } */
[gcc r12-10888] Zen5 tuning part 4: update reassocation width
https://gcc.gnu.org/g:4d7efc031fbd925565b049670bf755aca21bd2e3 commit r12-10888-g4d7efc031fbd925565b049670bf755aca21bd2e3 Author: Jan Hubicka Date: Tue Sep 3 18:20:34 2024 +0200 Zen5 tuning part 4: update reassocation width Zen5 has 6 instead of 4 ALUs and the integer multiplication can now execute in 3 of them. FP units can do 2 additions and 2 multiplications with latency 2 and 3. This patch updates reassociation width accordingly. This has potential of increasing register pressure but unlike while benchmarking znver1 tuning I did not noticed this actually causing problem on spec, so this patch bumps up reassociation width to 6 for everything except for integer vectors, where there are 4 units with typical latency of 1. Bootstrapped/regtested x86_64-linux, comitted. gcc/ChangeLog: * config/i386/i386.cc (ix86_reassociation_width): Update for Znver5. * config/i386/x86-tune-costs.h (znver5_costs): Update reassociation widths. (cherry picked from commit f0ab3de6ec0e3540f2e57f3f5628005f0a4e3fa5) Diff: --- gcc/config/i386/i386.cc | 10 +++--- gcc/config/i386/x86-tune-costs.h | 23 +-- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 2087f8633eb8..ea25e56ad644 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22923,13 +22923,17 @@ ix86_reassociation_width (unsigned int op, machine_mode mode) if (width == 1) return 1; - /* Integer vector instructions execute in FP unit + /* Znver1-4 Integer vector instructions execute in FP unit and can execute 3 additions and one multiplication per cycle. */ if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2 - || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4 - || ix86_tune == PROCESSOR_ZNVER5) + || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4) && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) return 1; + /* Znver5 can do 2 integer multiplications per cycle with latency +of 3. */ + if (ix86_tune == PROCESSOR_ZNVER5 + && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) + width = 6; /* Account for targets that splits wide vectors into multiple parts. */ if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index b8e7ab9372ea..0f2308bb079c 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2068,16 +2068,19 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ - /* Zen can execute 4 integer operations per cycle. FP operations - take 3 cycles and it can execute 2 integer additions and 2 - multiplications thus reassociation may make sense up to with of 6. - SPEC2k6 bencharks suggests - that 4 works better than 6 probably due to register pressure. - - Integer vector operations are taken by FP unit and execute 3 vector - plus/minus operations per cycle but only one multiply. This is adjusted - in ix86_reassociation_width. */ - 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + /* Zen5 can execute: + - integer ops: 6 per cycle, at most 3 multiplications. + latency 1 for additions, 3 for multiplications (pipelined) + + Setting width of 9 for multiplication is probably excessive + for register pressure. + - fp ops: 2 additions per cycle, latency 2-3 + 2 multiplicaitons per cycle, latency 3 + - vector intger ops: 4 additions, latency 1 + 2 multiplications, latency 4 + We increase width to 6 for multiplications + in ix86_reassociation_width. */ + 6, 6, 4, 6, /* reassoc int, fp, vec_int, vec_fp. */ znver2_memcpy, znver2_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
[gcc r15-6642] ada: Fix constants overlayed by variables
https://gcc.gnu.org/g:d0f0f9fb8d5410c7306b56a6395e9c82b096a149 commit r15-6642-gd0f0f9fb8d5410c7306b56a6395e9c82b096a149 Author: Piotr Trojanek Date: Fri Dec 20 00:13:57 2024 +0100 ada: Fix constants overlayed by variables Code cleanup suggested by GNATcheck rule Constant_Overlays. gcc/ada/ChangeLog: * repinfo-input.adb (Decode_Name, Read_Name_With_Prefix): Use constant overlay with pragma Import. Diff: --- gcc/ada/repinfo-input.adb | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/ada/repinfo-input.adb b/gcc/ada/repinfo-input.adb index 78de4412228a..c5cc34a63505 100644 --- a/gcc/ada/repinfo-input.adb +++ b/gcc/ada/repinfo-input.adb @@ -341,7 +341,8 @@ package body Repinfo.Input is else declare - S : String (Integer (Lo) .. Integer (Hi)); + S : constant String (Integer (Lo) .. Integer (Hi)); + pragma Import (Ada, S); for S'Address use Text (Lo)'Address; begin @@ -627,7 +628,8 @@ package body Repinfo.Input is else declare - S : String (Integer (Lo) .. Integer (Hi)); + S : constant String (Integer (Lo) .. Integer (Hi)); + pragma Import (Ada, S); for S'Address use Text (Lo)'Address; begin
[gcc r15-6650] RISC-V: Add missing dg-runtest to run the testcase under gcc.target/riscv/rvv/
https://gcc.gnu.org/g:bacaf016aa3f40a7a9a3fd96c4f8bebb5312f6a3 commit r15-6650-gbacaf016aa3f40a7a9a3fd96c4f8bebb5312f6a3 Author: Tsung Chun Lin Date: Tue Jan 7 07:07:23 2025 -0700 RISC-V: Add missing dg-runtest to run the testcase under gcc.target/riscv/rvv/ gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/rvv.exp: Add dg-runtest to run the testcase under gcc.target/riscv/rvv/. Diff: --- gcc/testsuite/gcc.target/riscv/rvv/rvv.exp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp index b85c192c2bef..d82710e9c416 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp @@ -35,6 +35,8 @@ dg-init # Main loop. set CFLAGS "$DEFAULT_CFLAGS -O3" +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ + "" $CFLAGS dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/base/*.\[cS\]]] \ "" $CFLAGS dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/xsfvector/*.\[cS\]]] \
[gcc r15-6659] libgomp.texi: Minor update to omp_get_num_devices/omp_get_initial_device
https://gcc.gnu.org/g:d3ccd89fa0d34d44226af47fe82c27ba7833fe65 commit r15-6659-gd3ccd89fa0d34d44226af47fe82c27ba7833fe65 Author: Tobias Burnus Date: Tue Jan 7 16:43:30 2025 +0100 libgomp.texi: Minor update to omp_get_num_devices/omp_get_initial_device libgomp/ChangeLog: * libgomp.texi (OpenMP 6.0): Fix typo. (omp_get_default_device): Update the wording as the value returned by omp_get_initial_device is now ambiguous. (omp_get_num_devices): Minor wording tweak. (omp_get_initial_device): Note that the function may also return omp_initial_device since OpenMP 6. Diff: --- libgomp/libgomp.texi | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 9a42355ff5d4..7d8cd70287ee 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -550,7 +550,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @item @code{interchange} loop-transformation construct @tab N @tab @item @code{reverse} loop-transformation construct @tab N @tab @item @code{split} loop-transformation construct @tab N @tab -@item @code{stipe} loop-transformation construct @tab N @tab +@item @code{stripe} loop-transformation construct @tab N @tab @item @code{tile} permitting association of grid and inter-tile loops @tab N @tab @item @code{strict} modifier keyword to @code{num_threads} @tab N @tab @item @code{safesync} clause to the @code{parallel} construct @tab N @tab @@ -1760,10 +1760,9 @@ The effect of running this routine in a @code{target} region is unspecified. Get the value of the @emph{default-device-var} ICV, which is used for target regions without a device clause. The value is either a nonnegative device number, @code{omp_initial_device} or -@code{omp_invalid_device}. Note that for the host, the ICV can have two values -and, hence, this routine might return either the value of the named constant -@code{omp_initial_device} or the value returned by the -@code{omp_get_initial_device} routine. +@code{omp_invalid_device}. Note that for the host, the ICV can have two values: +either the value of the named constant @code{omp_initial_device} or the value +returned by the @code{omp_get_num_devices} routine. The effect of running this routine in a @code{target} region is unspecified. @@ -1791,7 +1790,7 @@ The effect of running this routine in a @code{target} region is unspecified. @subsection @code{omp_get_num_devices} -- Number of target devices @table @asis @item @emph{Description}: -Returns the number of target devices. +Returns the number of available non-host devices. The effect of running this routine in a @code{target} region is unspecified. @@ -1938,7 +1937,8 @@ run-time function is desired. @item @emph{Description}: This function returns a device number that represents the host device. Since OpenMP 5.1, this is equal to the value returned by the -@code{omp_get_num_devices} function. +@code{omp_get_num_devices} function; since OpenMP 6.0 it may also return +the value of @code{omp_initial_device}. The effect of running this routine in a @code{target} region is unspecified.
[gcc r15-6652] rtl-optimization/118298 - constant iteration loops and #pragma unroll
https://gcc.gnu.org/g:34501ef418da13b361614235077c2162caabab73 commit r15-6652-g34501ef418da13b361614235077c2162caabab73 Author: Richard Biener Date: Tue Jan 7 13:18:27 2025 +0100 rtl-optimization/118298 - constant iteration loops and #pragma unroll When the RTL unroller handles constant iteration loops it bails out prematurely when heuristics wouldn't apply any unrolling before checking #pragma unroll. PR rtl-optimization/118298 * loop-unroll.cc (decide_unroll_constant_iterations): Honor loop->unroll even if the loop is too big for heuristics. Diff: --- gcc/loop-unroll.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc index 2bd6e888b266..b49520553182 100644 --- a/gcc/loop-unroll.cc +++ b/gcc/loop-unroll.cc @@ -372,7 +372,8 @@ decide_unroll_constant_iterations (class loop *loop, int flags) nunroll = targetm.loop_unroll_adjust (nunroll, loop); /* Skip big loops. */ - if (nunroll <= 1) + if (nunroll <= 1 + && !(loop->unroll > 1 && loop->unroll < USHRT_MAX)) { if (dump_file) fprintf (dump_file, ";; Not considering loop, is too big\n");
[gcc r15-6651] Fixup convert-dfp*.c
https://gcc.gnu.org/g:cda313ba6215d20150db6e952ce62138e9127d21 commit r15-6651-gcda313ba6215d20150db6e952ce62138e9127d21 Author: Richard Biener Date: Tue Jan 7 15:07:12 2025 +0100 Fixup convert-dfp*.c The testcases use -save-temps which doesn't play nice with -flto and multilib testing resulting in spurious UNRESOLVED like /usr/lib64/gcc/x86_64-suse-linux/14/../../../../x86_64-suse-linux/bin/ld: i386:x86-64 architecture of input file `./convert-dfp-2.ltrans0.ltrans.o' is incompatible with i386 output The following skips the testcases when using -flto. * gcc.dg/torture/convert-dfp-2.c: Skip with -flto. * gcc.dg/torture/convert-dfp.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/torture/convert-dfp-2.c | 1 + gcc/testsuite/gcc.dg/torture/convert-dfp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/gcc/testsuite/gcc.dg/torture/convert-dfp-2.c b/gcc/testsuite/gcc.dg/torture/convert-dfp-2.c index 3e4ecb57ba64..f62b47159128 100644 --- a/gcc/testsuite/gcc.dg/torture/convert-dfp-2.c +++ b/gcc/testsuite/gcc.dg/torture/convert-dfp-2.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ /* { dg-require-effective-target float16_runtime } */ /* { dg-require-effective-target dfprt } */ /* { dg-options "-save-temps" } */ diff --git a/gcc/testsuite/gcc.dg/torture/convert-dfp.c b/gcc/testsuite/gcc.dg/torture/convert-dfp.c index ec136896ca7b..a20253dd75b7 100644 --- a/gcc/testsuite/gcc.dg/torture/convert-dfp.c +++ b/gcc/testsuite/gcc.dg/torture/convert-dfp.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ /* { dg-require-effective-target float16_runtime } */ /* { dg-require-effective-target dfprt } */ /* { dg-options "-save-temps" } */
[gcc/redhat/heads/gcc-15-branch] (764 commits) Merge commit 'r15-6659-gd3ccd89fa0d34d44226af47fe82c27ba783
The branch 'redhat/heads/gcc-15-branch' was updated to point to: 06d4f5dc909f... Merge commit 'r15-6659-gd3ccd89fa0d34d44226af47fe82c27ba783 It previously pointed to: d2b3fd44a4b2... Merge commit 'r15-5896-g4114b7fb1cb4cb90b9fafc22213d7d9579b Diff: Summary of changes (added commits): --- 06d4f5d... Merge commit 'r15-6659-gd3ccd89fa0d34d44226af47fe82c27ba783 d3ccd89... libgomp.texi: Minor update to omp_get_num_devices/omp_get_i (*) aa688dd... Only apply adjust_args in OpenMP dispatch if variant substi (*) 405c99c... perform affine fold to unsigned on non address expressions. (*) 4f4722b... cfgexpand: Handle integral vector types and constructors fo (*) 0014a85... cfgexpand: Rewrite add_scope_conflicts_2 to use cache and l (*) 4b1a287... cfgexpand: Factor out getting the stack decl index (*) a856b4d... [PR testsuite/118055] Trivial testsuite adjustment for m68k (*) 34501ef... rtl-optimization/118298 - constant iteration loops and #pra (*) cda313b... Fixup convert-dfp*.c (*) bacaf01... RISC-V: Add missing dg-runtest to run the testcase under gc (*) 0c28cf5... Update copyright years. (*) edec353... ada: Adjust pragma obsolescent message (*) 5f5022a... ada: Drop g-cpp* units not needed by the compiler (*) 980415b... ada: Do not create temporaries for initialization statement (*) a80bb35... ada: Remove unused AST flag Address_Warning_Posted (*) 0f1bc0d... ada: Do not raise exceptions from Exp_Aggr.Packed_Array_Agg (*) 360cd35... ada: Cleanup preanalysis of static expressions (part 2) (*) d0f0f9f... ada: Fix constants overlayed by variables (*) 0a71f5b... ada: Improve protection against wrong use from GDB (*) e8aadcb... ada: Fix violations of GNAT-specific GNATcheck rules (*) 6721757... ada: Remove dead code in detection of null record definitio (*) ec62ba1... ada: Fix abort deferral for finally parts (*) 7107891... ada: Improved checking of uses of package renamings (*) 4b64d6d... ada: Remove flag Is_Inherited_Pragma which is only set and (*) 8f4194d... ada: Avoid conversion from String to Name_Id at runtime (*) b014d25... ada: Untangle check for restriction No_Implementation_Attri (*) a1b92cc... ada: Handle attributes related to Ada 2012 iterators as int (*) 69dfa02... ada: Remove unnecessary qualifiers for First/Next list oper (*) ce13a3a... ada: Fix internal error on container aggregate for bounded (*) c7799a8... ada: Add guard to System.Val_Real.Large_Powfive against pat (*) 0f83183... ada: Drop vxworks-smp-ppc-link.spec (*) 5697da3... ada: Add "finally" GNAT extension (*) a47c6d8... ada: Elide the copy for bit-packed aggregates in (safe) ass (*) eccfadd... ada: Reject references to attribute Result in Exceptional_C (*) 0307abc... ada: Move checks for consequences of Exceptional_Cases to G (*) d734902... ada: Fix comments about Subprogram_Variant and Exceptional_ (*) 67e3db7... ada: Put_Image spec incorrectly ignored for Fixed_Point_Typ (*) f409c45... ada: Error on instantiation with defaulted formal type refe (*) 90f504d... ada: Use the syntax of Ada 2012 if-expression in -gnatR3 ou (*) c5ce245... ada: Preserve Warning_Doc_Switch in gnatprove invocation (*) bd1df4e... ada: Restrict previous change made to expansion of allocato (*) 25b380d... Fortran: Ensure deep copy of allocatable components in cyli (*) 61400b6... LoongArch: Optimize initializing fp resgister to zero (*) 1ea6fef... [PR modula2/118010, modula2/118183] Unable to rebuild the b (*) d897090... Fortran: Extend cylic type detection for deallocate [PR1166 (*) 70035b6... AArch64: Remove AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS (*) e53277d... testsuite: add testcase for fixed PR98000 (*) f9ff478... testsuite: add testcase for fixed PR117546 (*) e58b0dd... Daily bump. (*) 1b1a33f... expand: drop stack adjustments after barrier [PR118006] (*) 6069f02... aarch64: remove extra XTN in vector concatenation (*) 7a7903d... Fix type in some Min() calls (#119248) (*) f0b8256... Fix few size types in memprof (#119114) (*) 1bd0356... Add type __sanitizer::ssize (#116957) (*) 0bb38b2... Replace uptr by usize/SIZE_T in interfaces (*) ed1493e... crypto/tls: fix Config.Time in tests using expired certific (*) 803808b... or1k: add .note.GNU-stack section on linux (*) fa778ba... Respect -fprofile-prefix-map for getcwd in .gcno files (*) f9c99d4... SVE intrinsics: Fold svmul by -1 to svneg for unsigned type (*) 144ddb0... Ada: fix spurious relinking of gnatbind for cross compilers (*) 72b2731... ipa-cp: Make dumping of bit masks representing -1 nicer (*) 668cad0... tree-switch-conversion: don't apply switch size limit on ju (*) 830bead... AArch64: Implement four and eight chunk VLA concats [PR1182 (*) f345ae3... ada: Fix small thinko in previous change to two-pass aggreg (*) ad8242d... ada: Streamline runtime support of finalization collections (*) 0d73643... ada: Fix predicate involving array in
[gcc r15-6669] [PATCH] testsuite: enable effective-target sync_char_short on RISC-V
https://gcc.gnu.org/g:e8a3f6bfb8d92756bc33c3a520bca1ff644d64b7 commit r15-6669-ge8a3f6bfb8d92756bc33c3a520bca1ff644d64b7 Author: Andreas Schwab Date: Tue Jan 7 12:31:39 2025 -0700 [PATCH] testsuite: enable effective-target sync_char_short on RISC-V gcc/testuite/ * lib/target-supports.exp (check_effective_target_sync_char_short): Enable for riscv*-*-*. Diff: --- gcc/testsuite/lib/target-supports.exp | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 0ff00d189ff5..e6a876d9301a 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -10051,6 +10051,7 @@ proc check_effective_target_sync_char_short { } { || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9]) || ([istarget arc*-*-*] && [check_effective_target_arc_atomic]) || [istarget loongarch*-*-*] +|| [istarget riscv*-*-*] || [check_effective_target_mips_llsc] }}] }
[gcc r15-6668] [PATCH] riscv: add mising masking in lrsc expander (PR118137)
https://gcc.gnu.org/g:013e66ea95a241c472b9d87430efaf6c759cf5c0 commit r15-6668-g013e66ea95a241c472b9d87430efaf6c759cf5c0 Author: Andreas Schwab Date: Tue Jan 7 12:23:37 2025 -0700 [PATCH] riscv: add mising masking in lrsc expander (PR118137) gcc: PR target/118137 * config/riscv/sync.md ("lrsc_atomic_exchange"): Apply mask to shifted value. gcc/testsuite: PR target/118137 * gcc.dg/atomic/pr118137.c: New. Diff: --- gcc/config/riscv/sync.md | 1 + gcc/testsuite/gcc.dg/atomic/pr118137.c | 29 + 2 files changed, 30 insertions(+) diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md index 58f32d253f1b..726800a96623 100644 --- a/gcc/config/riscv/sync.md +++ b/gcc/config/riscv/sync.md @@ -467,6 +467,7 @@ rtx shifted_value = gen_reg_rtx (SImode); riscv_lshift_subword (mode, value, shift, &shifted_value); + emit_move_insn (shifted_value, gen_rtx_AND (SImode, shifted_value, mask)); emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem, shifted_value, model, diff --git a/gcc/testsuite/gcc.dg/atomic/pr118137.c b/gcc/testsuite/gcc.dg/atomic/pr118137.c new file mode 100644 index ..7cdb2240aa36 --- /dev/null +++ b/gcc/testsuite/gcc.dg/atomic/pr118137.c @@ -0,0 +1,29 @@ +/* Test that subword atomic operations only affect the subword. */ +/* { dg-do run } */ +/* { dg-require-effective-target sync_char_short } */ + +void +foo (char *x) +{ + __sync_fetch_and_or (x, 0xff); +} + +void +bar (short *y) +{ + __atomic_fetch_or (y, 0x, 0); +} + + +int +main () +{ + char b[4] = {}; + foo(b); + + short h[2] = {}; + bar(h); + + if (b[1] || b[2] || b[3] || h[1]) +__builtin_abort(); +}
[gcc r15-6660] AArch64: Block combine_and_move from creating FP literal loads
https://gcc.gnu.org/g:45d306a835cb3f865a897dc7c04efbe1f9f46c28 commit r15-6660-g45d306a835cb3f865a897dc7c04efbe1f9f46c28 Author: Wilco Dijkstra Date: Fri Nov 1 14:44:56 2024 + AArch64: Block combine_and_move from creating FP literal loads The IRA combine_and_move pass runs if the scheduler is disabled and aggressively combines moves. The movsf/df patterns allow all FP immediates since they rely on a split pattern. However splits do not happen during IRA, so the result is extra literal loads. To avoid this, split early during expand and block creation of FP immediates that need this split. Mark a few testcases that rely on late splitting as xfail. double f(void) { return 128.0; } -O2 -fno-schedule-insns gives: adrpx0, .LC0 ldr d0, [x0, #:lo12:.LC0] ret After patch: mov x0, 4638707616191610880 fmovd0, x0 ret Passes bootstrap & regress, OK for commit? gcc: * config/aarch64/aarch64.md (movhf_aarch64): Use aarch64_valid_fp_move. (movsf_aarch64): Likewise. (movdf_aarch64): Likewise. * config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function. * config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise. gcc/testsuite: * gcc.target/aarch64/dbl_mov_immediate_1.c: Add xfail for -0.0. * gcc.target/aarch64/fmul_scvtf_1.c: Fixup test cases, add xfail, reduce duplication. Diff: --- gcc/config/aarch64/aarch64-protos.h| 1 + gcc/config/aarch64/aarch64.cc | 30 ++ gcc/config/aarch64/aarch64.md | 50 .../gcc.target/aarch64/dbl_mov_immediate_1.c | 8 +-- gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c| 68 +++--- 5 files changed, 78 insertions(+), 79 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 70a134f0365b..fa7bc8029be0 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -857,6 +857,7 @@ opt_machine_mode aarch64_v64_mode (scalar_mode); opt_machine_mode aarch64_v128_mode (scalar_mode); opt_machine_mode aarch64_full_sve_mode (scalar_mode); bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); +bool aarch64_valid_fp_move (rtx, rtx, machine_mode); bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index d6a8e4c20952..3e700ed41e97 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -11299,6 +11299,36 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) return aarch64_simd_valid_mov_imm (v_op); } +/* Return TRUE if DST and SRC with mode MODE is a valid fp move. */ +bool +aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode) +{ + if (!TARGET_FLOAT) +return false; + + if (aarch64_reg_or_fp_zero (src, mode)) +return true; + + if (!register_operand (dst, mode)) +return false; + + if (MEM_P (src)) +return true; + + if (!DECIMAL_FLOAT_MODE_P (mode)) +{ + if (aarch64_can_const_movi_rtx_p (src, mode) + || aarch64_float_const_representable_p (src) + || aarch64_float_const_zero_rtx_p (src)) + return true; + + /* Block FP immediates which are split during expand. */ + if (aarch64_float_const_rtx_p (src)) + return false; +} + + return can_create_pseudo_p (); +} /* Return the fixed registers used for condition codes. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 6758a1db1173..0ed3c93b379e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1760,14 +1760,33 @@ && ! (GET_CODE (operands[1]) == CONST_DOUBLE && aarch64_float_const_zero_rtx_p (operands[1]))) operands[1] = force_reg (mode, operands[1]); + +if (!DECIMAL_FLOAT_MODE_P (mode) + && GET_CODE (operands[1]) == CONST_DOUBLE + && can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], mode) + && !aarch64_float_const_representable_p (operands[1]) + && !aarch64_float_const_zero_rtx_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])) + { + unsigned HOST_WIDE_INT ival; + bool res = aarch64_reinterpret_float_as_int (operands[1], &ival); + gcc_assert (res); + + machine_mode intmode + = int_mode_for_size (GET_MODE_BITSIZE (mode), 0).require (); + rtx tmp = gen_reg_rtx (intmode); + emit_move_insn (tmp, gen_int_mode (ival, intmode)); + emit_move_insn (operands[0], gen_lowpart (mode, tmp)); + DONE; + } } )
[gcc r15-6661] AArch64: Switch off early scheduling
https://gcc.gnu.org/g:c5db3f50bdf34ea96fd193a2a66d686401053bd2 commit r15-6661-gc5db3f50bdf34ea96fd193a2a66d686401053bd2 Author: Wilco Dijkstra Date: Fri Nov 1 14:40:26 2024 + AArch64: Switch off early scheduling The early scheduler takes up ~33% of the total build time, however it doesn't provide a meaningful performance gain. This is partly because modern OoO cores need far less scheduling, partly because the scheduler tends to create many unnecessary spills by increasing register pressure. Building applications 56% faster is far more useful than ~0.1% improvement on SPEC, so switch off early scheduling on AArch64. Codesize reduces by ~0.2%. Fix various tests that depend on scheduling by explicitly adding -fschedule-insns. gcc: * common/config/aarch64/aarch64-common.cc: Switch off fschedule_insns. gcc/testsuite: * gcc.dg/guality/pr36728-3.c: Remove XFAIL. * gcc.dg/guality/pr68860-1.c: Likewise. * gcc.dg/guality/pr68860-2.c: Likewise. * gcc.target/aarch64/ldp_aligned.c: Fix test. * gcc.target/aarch64/ldp_always.c: Likewise. * gcc.target/aarch64/ldp_stp_10.c: Add -fschedule-insns. * gcc.target/aarch64/ldp_stp_12.c: Likewise. * gcc.target/aarch64/ldp_stp_13.c: Remove test. * gcc.target/aarch64/ldp_stp_21.c: Add -fschedule-insns. * gcc.target/aarch64/ldp_stp_8.c: Likewise. * gcc.target/aarch64/ldp_vec_v2sf.c: Likewise. * gcc.target/aarch64/ldp_vec_v2si.c: Likewise. * gcc.target/aarch64/test_frame_16.c: Fix test. * gcc.target/aarch64/sve/vcond_12.c: Add -fschedule-insns. * gcc.target/aarch64/sve/acle/general/ldff1_3.c: Likewise. Diff: --- gcc/common/config/aarch64/aarch64-common.cc| 2 ++ gcc/testsuite/gcc.dg/guality/pr36728-3.c | 2 +- gcc/testsuite/gcc.dg/guality/pr68860-1.c | 2 +- gcc/testsuite/gcc.dg/guality/pr68860-2.c | 2 +- gcc/testsuite/gcc.target/aarch64/ldp_aligned.c | 18 ++-- gcc/testsuite/gcc.target/aarch64/ldp_always.c | 33 ++ gcc/testsuite/gcc.target/aarch64/ldp_stp_10.c | 2 +- gcc/testsuite/gcc.target/aarch64/ldp_stp_12.c | 2 +- gcc/testsuite/gcc.target/aarch64/ldp_stp_13.c | 18 gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c | 2 +- gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c | 2 +- gcc/testsuite/gcc.target/aarch64/ldp_vec_v2sf.c| 2 +- gcc/testsuite/gcc.target/aarch64/ldp_vec_v2si.c| 2 +- .../gcc.target/aarch64/sve/acle/general/ldff1_3.c | 2 +- gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c| 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_16.c | 2 +- 16 files changed, 18 insertions(+), 77 deletions(-) diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index dd9413475043..92df8b619305 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -53,6 +53,8 @@ static const struct default_options aarch_option_optimization_table[] = { OPT_LEVELS_ALL, OPT_fomit_frame_pointer, NULL, 0 }, /* Enable -fsched-pressure by default when optimizing. */ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, +/* Disable early scheduling due to high compile-time overheads. */ +{ OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 }, /* Enable redundant extension instructions removal at -O2 and higher. */ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_mearly_ra_, NULL, AARCH64_EARLY_RA_ALL }, diff --git a/gcc/testsuite/gcc.dg/guality/pr36728-3.c b/gcc/testsuite/gcc.dg/guality/pr36728-3.c index 589009b2e82c..4700d50f8bcd 100644 --- a/gcc/testsuite/gcc.dg/guality/pr36728-3.c +++ b/gcc/testsuite/gcc.dg/guality/pr36728-3.c @@ -30,7 +30,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7) /* { dg-final { gdb-test 14 "arg5" "5" } } */ /* { dg-final { gdb-test 14 "arg6" "6" } } */ /* { dg-final { gdb-test 14 "arg7" "30" } } */ -/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { any-opts "-O3" } } } } } */ +/* { dg-final { gdb-test 14 "y" "2" } } */ /* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */ /* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */ /* { dg-final { gdb-test 16 "arg3" "3" } } */ diff --git a/gcc/testsuite/gcc.dg/guality/pr68860-1.c b/gcc/testsuite/gcc.dg/guality/pr68860-1.c index bbd9d6e60329..8c8d8354587f 100644 --- a/gcc/testsuite/gcc.dg/guality/pr68860-1.c +++ b/gcc/testsuite/gcc.dg/guality/pr68860-1.c @@ -31,7 +31,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int a /* { dg-final { gdb-test 14 "arg6" "6" } } */ /* { dg-final { gdb-test 14 "arg7" "30" } } */ /* { dg-final { gdb-test 14 "arg8" "7"
[gcc r15-6662] Document unsigned constants in intrinsic modules.
https://gcc.gnu.org/g:d6b1d5deb23063b8eac980def0bc4e438b44ee85 commit r15-6662-gd6b1d5deb23063b8eac980def0bc4e438b44ee85 Author: Thomas Koenig Date: Tue Jan 7 15:23:29 2025 +0100 Document unsigned constants in intrinsic modules. gcc/fortran/ChangeLog: * intrinsic.texi (ISO_FORTRAN_ENV): Also mention INT8 in the text. Document UINT8, UINT16, UINT32 and UINT64. (ISO_C_BINDING): New table for unsigned KIND numbers. Diff: --- gcc/fortran/intrinsic.texi | 35 ++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index 7c7e4c9372bd..55933d23e188 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -15305,7 +15305,7 @@ Identifies the preconnected unit identified by the asterisk @item @code{INT8}, @code{INT16}, @code{INT32}, @code{INT64}: Kind type parameters to specify an INTEGER type with a storage -size of 16, 32, and 64 bits. It is negative if a target platform +size of 8, 16, 32, and 64 bits. It is negative if a target platform does not support the particular kind. (Fortran 2008 or later.) @item @code{INTEGER_KINDS}: @@ -15368,6 +15368,12 @@ in the failed state. (TS 18508 or later.) @item @code{STAT_UNLOCKED}: Scalar default-integer constant used as @code{STAT=} return value by @code{UNLOCK} to denote that the lock variable is unlocked. (Fortran 2008 or later.) + +@item @code{UINT8}, @code{UINT16}, @code{UINT32}, @code{UINT64}: +Kind type parameters to specify an UNSIGNED type with a storage +size of 8, 16, 32, and 64 bits. It is negative if a target platform +does not support the particular kind. (Extension, @pxref{Unsigned integers}.) + @end table The module provides the following derived type: @@ -15455,6 +15461,33 @@ Furthermore, if @code{_Float128} is supported in C, the named constants @item @code{CHARACTER}@tab @code{C_CHAR}@tab @code{char} @end multitable +GNU Fortran also provides as an extension, named constants for +@code{UNSIGNED} integers @pxref{Unsigned integers}. +@multitable @columnfractions .20 0.40 0.40 +@headitem Fortran Type @tab Named constant @tab C type +@item @code{UNSIGNED}@tab @code{C_UNSIGNED}@tab @code{unsigned int} +@item @code{UNSIGNED}@tab @code{C_UNSIGNED_SHORT} @tab @code{unsigned short} +@item @code{UNSIGNED}@tab @code{C_UNSIGNED_CHAR} @tab @code{unsigned char} +@item @code{UNSINGED}@tab @code{C_UNSIGNED_LONG} @tab @code{unsigned long} +@item @code{UNSINGED}@tab @code{C_UNSIGNED_LONG_LONG} @tab @code{unsigned long long} +@item @code{UNSIGNED}@tab @code{C_UINTMAX_T} @tab @code{uintmax_t} +@item @code{UNSIGNED}@tab @code{C_UINT8_T} @tab @code{uint8_t} +@item @code{UNSIGNED}@tab @code{C_UINT16_T}@tab @code{uint16_t} +@item @code{UNSIGNED}@tab @code{C_UINT32_T}@tab @code{uint32_t} +@item @code{UNSIGNED}@tab @code{C_UINT64_T}@tab @code{uint64_t} +@item @code{UNSIGNED}@tab @code{C_UINT128_T} @tab @code{uint128_t} +@item @code{UNSIGNED}@tab @code{C_UINT_FAST8_T}@tab @code{uint_fast8_t} +@item @code{UNSIGNED}@tab @code{C_UINT_FAST16_T} @tab @code{uint_fast16_t} +@item @code{UNSIGNED}@tab @code{C_UINT_FAST32_T} @tab @code{uint_fast32_t} +@item @code{UNSIGNED}@tab @code{C_UINT_FAST64_T} @tab @code{uint_fast64_t} +@item @code{UNSIGNED}@tab @code{C_UINT_FAST128_T} @tab @code{uint_fast128_t} +@item @code{UNSIGNED}@tab @code{C_UINT_LEAST8_T} @tab @code{uint_least8_t} +@item @code{UNSIGNED}@tab @code{C_UINT_LEAST16_T} @tab @code{uint_least16_t} +@item @code{UNSIGNED}@tab @code{C_UINT_LEAST32_T} @tab @code{uint_least32_t} +@item @code{UNSIGNED}@tab @code{C_UINT_LEAST64_T} @tab @code{uint_least64_t} +@item @code{UNSIGNED}@tab @code{C_UINT_LEAST128_T} @tab @code{uint_least128_t} +@end multitable + Additionally, the following parameters of type @code{CHARACTER(KIND=C_CHAR)} are defined.
[gcc r15-6663] testsuite: RISC-V: Add effective target for E ABI variant
https://gcc.gnu.org/g:904f332cce3de59b99a48751e69717cbd3592901 commit r15-6663-g904f332cce3de59b99a48751e69717cbd3592901 Author: Dimitar Dimitrov Date: Thu Dec 12 20:22:59 2024 +0200 testsuite: RISC-V: Add effective target for E ABI variant Add new effective target check for either ILP32E or ILP64E ABI variants. Initial implementation only checks for RV32E or RV64E ISA, which in turn implies that ILP32E/ILP64E ABI is used. The RV32I+ILP32E and RV64I+ILP64E combinations are not yet caught by the check, but they do not seem to be widely used currently. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_riscv_abi_e): New procedure. Signed-off-by: Dimitar Dimitrov Diff: --- gcc/testsuite/lib/target-supports.exp | 20 1 file changed, 20 insertions(+) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 45ba2f47a9d1..0ff00d189ff5 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1904,6 +1904,26 @@ proc check_effective_target_riscv_a { } { }] } +# Return 1 if the ABI is either ILP32E or ILP64E, 0 otherwise. +# Cache the result. + +proc check_effective_target_riscv_abi_e { } { +# If current ISA is E, then only E ABI is supported. +if { [check_no_compiler_messages riscv_abi_e assembly { + #ifndef __riscv_e + #error "Not __riscv_e" + #endif + }] } { + return 1 + } + +# E ABI can be chosen by both E and I base ISA variants. +# +# TODO - check for I ISA and E ABI combination. + +return 0 +} + # Return 1 if the target arch supports the atomic LRSC extension, 0 otherwise. # Cache the result.
[gcc r15-6664] testsuite: RISC-V: Skip V and Zvbb tests for ILP32E/ILP64E ABIs
https://gcc.gnu.org/g:62e7c496696eb68186616a2fa3654a876d21d695 commit r15-6664-g62e7c496696eb68186616a2fa3654a876d21d695 Author: Dimitar Dimitrov Date: Mon Nov 25 20:48:00 2024 +0200 testsuite: RISC-V: Skip V and Zvbb tests for ILP32E/ILP64E ABIs Some tests add options for V and Zvbb extensions, but those extensions are not compatible with the E ABI variants. This leads to spurious test failures when toolchain's default ABI is ILP32E or ILP64E: spawn ... -march=rv32ecv_zvbb ... cc1: error: ILP32E ABI does not support the 'D' extension cc1: sorry, unimplemented: Currently the 'V' implementation requires the 'M' extension Fix by skipping the tests when toolchain's default ABI is E variant. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vandn-1.c: Skip if default is E ABI. * gcc.target/riscv/rvv/autovec/binop/vrolr-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vwsll-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vwsll-template.h: Ditto. * gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/clz-1.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/ctz-1.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/popcount-1.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/popcount-2.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/popcount-3.c: Ditto. * gcc.target/riscv/rvv/base/cmpmem-1.c: Ditto. * gcc.target/riscv/rvv/base/cmpmem-3.c: Ditto. * gcc.target/riscv/rvv/base/cmpmem-4.c: Ditto. * gcc.target/riscv/rvv/base/cpymem-1.c: Ditto. * gcc.target/riscv/rvv/base/cpymem-2.c: Ditto. * gcc.target/riscv/rvv/base/cpymem-3.c: Ditto. * gcc.target/riscv/rvv/base/movmem-1.c: Ditto. * gcc.target/riscv/rvv/base/pr115068.c: Ditto. * gcc.target/riscv/rvv/base/setmem-1.c: Ditto. * gcc.target/riscv/rvv/base/setmem-2.c: Ditto. * gcc.target/riscv/rvv/base/setmem-3.c: Ditto. * gcc.target/riscv/rvv/base/vwaddsub-1.c: Ditto. Signed-off-by: Dimitar Dimitrov Diff: --- gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vwsll-template.h | 2 +- .../riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/clz-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/ctz-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/popcount-1.c| 2 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/popcount-2.c| 2 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/popcount-3.c| 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-3.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-4.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/setmem-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/vwaddsub-1.c| 2 +- 22 files changed, 22 insertions(+), 22 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c index 3bb5bf8dd5ba..dfdc64b568dc 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do compile { target { ! riscv_abi_e } } } */ /* { dg-add-options "riscv_v" } */ /* { dg-add-options "riscv_zvbb" } */ /* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c index 55dac27697cb..1c5f6e046d02 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-
[gcc r15-6665] testsuite: RISC-V: Skip tests using -mcpu= for ILP32E/ILP64E ABIs
https://gcc.gnu.org/g:3eaf74beac9c587edab13aa946b52a09b759d7fe commit r15-6665-g3eaf74beac9c587edab13aa946b52a09b759d7fe Author: Dimitar Dimitrov Date: Wed Dec 4 17:51:11 2024 +0200 testsuite: RISC-V: Skip tests using -mcpu= for ILP32E/ILP64E ABIs The tests are specifying -mcpu with D extension, which is not compatible with the ILP32E and ILP64E ABIs. Fix by skipping the tests if toolchain's default ABI is an E variant. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr109508.c: Skip for E ABI. * gcc.target/riscv/pr114139.c: Ditto. Signed-off-by: Dimitar Dimitrov Diff: --- gcc/testsuite/gcc.target/riscv/pr109508.c | 2 +- gcc/testsuite/gcc.target/riscv/pr114139.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/pr109508.c b/gcc/testsuite/gcc.target/riscv/pr109508.c index 65f291e17ed8..9ed06a4daed4 100644 --- a/gcc/testsuite/gcc.target/riscv/pr109508.c +++ b/gcc/testsuite/gcc.target/riscv/pr109508.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do compile { target { ! riscv_abi_e } } } */ /* { dg-options "-mcpu=sifive-s76" } */ typedef char __attribute__((__vector_size__ (1))) V; diff --git a/gcc/testsuite/gcc.target/riscv/pr114139.c b/gcc/testsuite/gcc.target/riscv/pr114139.c index 1d4eeb65f5c5..d4e7593f5c04 100644 --- a/gcc/testsuite/gcc.target/riscv/pr114139.c +++ b/gcc/testsuite/gcc.target/riscv/pr114139.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do compile { target { ! riscv_abi_e } } } */ /* { dg-options "-O2 -fpic -mexplicit-relocs -mcpu=sifive-p450" } */ static void *p;
[gcc r15-6667] Fix regression in ft32 port after recent switch table adjustments
https://gcc.gnu.org/g:a550edc3fae828cef67aac050b80179a97bb2fad commit r15-6667-ga550edc3fae828cef67aac050b80179a97bb2fad Author: Jeff Law Date: Tue Jan 7 12:20:15 2025 -0700 Fix regression in ft32 port after recent switch table adjustments This is a trivial bug that showed up after Mark W's recent patch to not apply the size limit on jump tables. The ft32 port has limited immediate ranges on comparisons and the casesi expander didn't honor those. It'd blindly pass along an out of range constant. This patch adds the trivial adjustment to force an out of range constant into a register. It fixes these regressions: > Tests that now fail, but worked before (3 tests): > > ft32-sim: gcc: gcc.c-torture/compile/pr34093.c -O1 (test for excess errors) > ft32-sim: gcc: gcc.dg/torture/pr106809.c -O1 (test for excess errors) > ft32-sim: gcc: gcc.dg/torture/pr106809.c -O1 (test for excess errors) Tested in my tester.No other tests were fixed. gcc/ * config/ft32/ft32.md (casesi expander): Force operands[2] into a register if it's not a suitable rimm operand. Diff: --- gcc/config/ft32/ft32.md | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/config/ft32/ft32.md b/gcc/config/ft32/ft32.md index a1680666928a..4d66abe009e1 100644 --- a/gcc/config/ft32/ft32.md +++ b/gcc/config/ft32/ft32.md @@ -760,6 +760,12 @@ operands[0] = index; } + /* operands[2] could be an integer that is out of range for + the comparison insn we're going to emit. If so, force + it into a register. */ + if (!ft32_rimm_operand (operands[2], SImode)) +operands[2] = force_reg (SImode, operands[2]); + { rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]); emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4]));
[gcc r15-6670] Fix testsuite expectations for RVV after recent change
https://gcc.gnu.org/g:c6b5398e9e3c387910e1736f06525a0724a84b3e commit r15-6670-gc6b5398e9e3c387910e1736f06525a0724a84b3e Author: Jeff Law Date: Tue Jan 7 14:27:28 2025 -0700 Fix testsuite expectations for RVV after recent change Tamar's recent improvement to improve affine unsigned folding for exchange2 twiddle code generation for a couple tests in the RVV testsuite just enough to cause testsuite failures. I've looked at both tests before/after Tamar's change and the code is clearly better -- essentially tighter vector loops due to improvements in address arithmetic. Additionally we have fewer vsetvls after Tamar's patch. Given that I'm just making the obvious adjustments to the expected assembly and pushing to the trunk. gcc/testsuite * gcc.target/riscv/rvv/vsetvl/vlmax_conflict-3.c: Update expected output. * gcc.target/riscv/rvv/vsetvl/vlmax_conflict-12.c: Likewise. Diff: --- gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-12.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-3.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-12.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-12.c index b96f2671f998..5cecb453d724 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-12.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-12.c @@ -35,4 +35,4 @@ void f2 (int32_t * restrict in, int32_t * restrict in2, int32_t * restrict out, } } -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-3.c index 2c8d3671c0ed..795611e53da3 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-3.c @@ -26,5 +26,5 @@ void f2 (int32_t * restrict in, int32_t * restrict in2, int32_t * restrict out, } } -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
[gcc r15-6671] Prefer scalar_int_mode if the size - 1 is equal to UNITS_PER_WORD.
https://gcc.gnu.org/g:d953c2c5714ed8503c4ae1b7d059a62e4e9a0624 commit r15-6671-gd953c2c5714ed8503c4ae1b7d059a62e4e9a0624 Author: Tsung Chun Lin Date: Tue Jan 7 14:48:31 2025 -0700 Prefer scalar_int_mode if the size - 1 is equal to UNITS_PER_WORD. Don't use the QI vector if its size is equal to UNITS_PER_WORD for better code generation. Before patch: vsetivlizero,4,e8,mf4,ta,ma vmv.v.i v1,0 addia4,sp,12 vse8.v v1,0(a4) After patch: sw zero,12(sp) gcc/ * expr.cc (widest_fixed_size_mode_for_size): Prefer scalar modes over vector modes in more cases. gcc/testsuite/ * gcc.target/riscv/rvv/autovec/pr113469.c: Update expected output. * gcc.target/riscv/rvv/base/movqi-1.c: New test. Diff: --- gcc/expr.cc | 3 ++- gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c | 1 - gcc/testsuite/gcc.target/riscv/rvv/base/movqi-1.c | 18 ++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/gcc/expr.cc b/gcc/expr.cc index 635bb9efa9eb..235e79546113 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -1062,12 +1062,13 @@ widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op) gcc_checking_assert (size > 1); /* Use QI vector only if size is wider than a WORD. */ - if (can_use_qi_vectors (op) && size > UNITS_PER_WORD) + if (can_use_qi_vectors (op)) { machine_mode mode; fixed_size_mode candidate; FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) if (is_a (mode, &candidate) + && GET_MODE_SIZE (candidate) > UNITS_PER_WORD && GET_MODE_INNER (candidate) == QImode) { if (GET_MODE_SIZE (candidate) >= size) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c index 52e2580c53e6..6549ae61c672 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c @@ -51,5 +51,4 @@ void p(int buf, __builtin_va_list ab, int q) { } while (k); } -/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 } } */ /* { dg-final { scan-assembler-times {vsetivli\tzero,\s*8,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/movqi-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/movqi-1.c new file mode 100644 index ..bc461035e5b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/movqi-1.c @@ -0,0 +1,18 @@ +/* Test that we do not use QI vector to initilize the memory if the + * size of QI vector isn't larger than UNITS_PER_WORD */ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */ + +struct s { + int a; + int b : 1; +}; + +void q(struct s*); + +void g() { + struct s r = { 15, 0 }; + q(&r); +} + +/* { dg-final { scan-assembler-times {sw\tzero,12\(sp\)} 1 } } */
[gcc r15-6672] [PATCH] libgcc/m68k: More fixes for soft float
https://gcc.gnu.org/g:0115ef57efa9966fa7f448185dd5c741f58d4fac commit r15-6672-g0115ef57efa9966fa7f448185dd5c741f58d4fac Author: Keith Packard Date: Tue Jan 7 14:54:11 2025 -0700 [PATCH] libgcc/m68k: More fixes for soft float Fix __extenddfxf2: * Remove bogus denorm handling block which would never execute -- the converted exp value is always positive as EXCESSX > EXCESSD. * Compute the whole significand in dl instead of doing part of it in ldl. * Mask off exponent from dl.l.upper so the denorm shift test works. * Insert the hidden one bit into dl.l.upper as needed. Fix __truncxfdf2 denorm handling. All that is required is to shift the significand right by the correct amount; it already has all of the necessary bits set including the explicit one. Compute the shift amount, then perform the wide shift across both elements of the significand. Fix __fixxfsi: * The value was off by a factor of two as the significand contains 32 bits, not 31 so we need to shift by one more than the equivalent code in __fixdfsi. * Simplify the code having realized that the lower 32 bits of the significand can never appear in the results. Return positive qNaN instead of negative. For floats, qNaN is 0x7fff_. For doubles, qNaN is 0x7fff___. Return correctly signed zero on float and double divide underflow. This means that Ld$underflow now expects d7 to contain the sign bit, just like the other return paths. libgcc/ * config/m68k/fpgnulib.c (extenddfxf2): Simplify code by removing code that should never execute. Fix denorm shift test and insert hidden bit as needed. (__truncxfdf2): Properly compue and shift the significant right. * config/m68k/lb1sf68.S (__fixxfsi): Correct shift counts and simplify. (QUIET_NAN): Make it a positive quiet NaN and fix return values to inject sign properly. Diff: --- libgcc/config/m68k/fpgnulib.c | 78 --- libgcc/config/m68k/lb1sf68.S | 17 ++ 2 files changed, 47 insertions(+), 48 deletions(-) diff --git a/libgcc/config/m68k/fpgnulib.c b/libgcc/config/m68k/fpgnulib.c index 70bfd442d750..a7d4258dff01 100644 --- a/libgcc/config/m68k/fpgnulib.c +++ b/libgcc/config/m68k/fpgnulib.c @@ -449,34 +449,37 @@ __extenddfxf2 (double d) } exp = EXPD (dl) - EXCESSD + EXCESSX; - /* Check for underflow and denormals. */ - if (exp < 0) + + dl.l.upper &= MANTDMASK; + + /* Recover from a denorm. */ + if (exp == -EXCESSD + EXCESSX) { - if (exp < -53) - { - ldl.l.middle = 0; - ldl.l.lower = 0; - } - else if (exp < -30) - { - ldl.l.lower = (ldl.l.middle & MANTXMASK) >> ((1 - exp) - 32); - ldl.l.middle &= ~MANTXMASK; - } - else + exp++; + while ((dl.l.upper & HIDDEND) == 0) { - ldl.l.lower >>= 1 - exp; - ldl.l.lower |= (ldl.l.middle & MANTXMASK) << (32 - (1 - exp)); - ldl.l.middle = (ldl.l.middle & ~MANTXMASK) | (ldl.l.middle & MANTXMASK >> (1 - exp)); + exp--; + dl.l.upper = (dl.l.upper << 1) | (dl.l.lower >> 31); + dl.l.lower = dl.l.lower << 1; } - exp = 0; } + /* Handle inf and NaN */ - if (exp == EXPDMASK - EXCESSD + EXCESSX) -exp = EXPXMASK; + else if (exp == EXPDMASK - EXCESSD + EXCESSX) +{ + exp = EXPXMASK; + /* Add hidden one bit for NaN */ + if (dl.l.upper != 0 || dl.l.lower != 0) +dl.l.upper |= HIDDEND; +} + else +{ + dl.l.upper |= HIDDEND; +} + ldl.l.upper |= exp << 16; - ldl.l.middle = HIDDENX; /* 31-20: # mantissa bits in ldl.l.middle - # mantissa bits in dl.l.upper */ - ldl.l.middle |= (dl.l.upper & MANTDMASK) << (31 - 20); + ldl.l.middle = dl.l.upper << (31 - 20); /* 1+20: explicit-integer-bit + # mantissa bits in dl.l.upper */ ldl.l.middle |= dl.l.lower >> (1 + 20); /* 32 - 21: # bits of dl.l.lower in ldl.l.middle */ @@ -508,21 +511,21 @@ __truncxfdf2 (long double ld) /* Check for underflow and denormals. */ if (exp <= 0) { - if (exp < -53) + long shift = 1 - exp; + if (shift > 52) { ldl.l.middle = 0; ldl.l.lower = 0; } - else if (exp < -30) + else if (shift >= 32) { - ldl.l.lower = (ldl.l.middle & MANTXMASK) >> ((1 - exp) - 32); - ldl.l.middle &= ~MANTXMASK; + ldl.l.lower = (ldl.l.middle) >> (shift - 32); + ldl.l.middle = 0; } else { - ldl.l.lower >>= 1 - exp; - ldl.l.lower |= (ldl.l.middle & MANTXMASK) << (32 - (1 - exp)); - ldl.l.middle = (ldl.l.middle & ~MANTXMASK) | (ldl.l.middle & MANTXMASK >> (1 - exp)); + ldl.l.lower = (ldl.l.m
[gcc r15-6673] RISC-V: vector absolute difference expander [PR117722]
https://gcc.gnu.org/g:b755c151fde4ad736405bb2e13a7de0420161179 commit r15-6673-gb755c151fde4ad736405bb2e13a7de0420161179 Author: Vineet Gupta Date: Tue Jan 7 14:28:25 2025 -0800 RISC-V: vector absolute difference expander [PR117722] This improves codegen for x264 sum of absolute difference routines. The insn count is same, but we avoid double widening ops and ensuing whole register moves. Also for more general applicability, we chose to implement abs diff vs. the sum of abs diff variant. Suggested-by: Robin Dapp Co-authored-by: Pan Li Signed-off-by: Vineet Gupta PR target/117722 gcc/ChangeLog: * config/riscv/autovec.md: Add uabd expander. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr117722.c: New test. Diff: --- gcc/config/riscv/autovec.md| 26 ++ .../gcc.target/riscv/rvv/autovec/pr117722.c| 23 +++ 2 files changed, 49 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 8d22b5f9c59a..8426f12757f3 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2928,3 +2928,29 @@ riscv_vector::expand_strided_store (mode, operands); DONE; }) + +; +; == Absolute difference (not including sum) +; +(define_expand "uabd3" + [(match_operand:V_VLSI 0 "register_operand") + (match_operand:V_VLSI 1 "register_operand") + (match_operand:V_VLSI 2 "register_operand")] + "TARGET_VECTOR" + { +rtx max = gen_reg_rtx (mode); +insn_code icode = code_for_pred (UMAX, mode); +rtx ops1[] = {max, operands[1], operands[2]}; +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); + +rtx min = gen_reg_rtx (mode); +icode = code_for_pred (UMIN, mode); +rtx ops2[] = {min, operands[1], operands[2]}; +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2); + +icode = code_for_pred (MINUS, mode); +rtx ops3[] = {operands[0], max, min}; +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3); + +DONE; + }); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c new file mode 100644 index ..b675930818e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O2" } */ + +/* Generate sum of absolute difference as sub (max, min). + This helps with x264 sad routines. */ + +inline int abs(int i) +{ + return (i < 0 ? -i : i); +} + +int pixel_sad_n(unsigned char *pix1, unsigned char *pix2, int n) +{ + int sum = 0; + for( int i = 0; i < n; i++ ) + sum += abs(pix1[i] - pix2[i]); + + return sum; +} + +/* { dg-final { scan-assembler {vmin\.v} } } */ +/* { dg-final { scan-assembler {vmax\.v} } } */ +/* { dg-final { scan-assembler {vsub\.v} } } */
[gcc r15-6674] Match: Refactor the signed SAT_SUB match patterns [NFC]
https://gcc.gnu.org/g:5080dbb807063061dbbe0a497d04629575f8c2af commit r15-6674-g5080dbb807063061dbbe0a497d04629575f8c2af Author: Pan Li Date: Wed Dec 11 19:09:08 2024 +0800 Match: Refactor the signed SAT_SUB match patterns [NFC] This patch would like to refactor the all signed SAT_ADD patterns, aka: * Extract type check outside. * Re-arrange the related match pattern forms together. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Refactor sorts of signed SAT_SUB match patterns. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 98 +--- 1 file changed, 40 insertions(+), 58 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index ec508c59caae..5b5265afe96a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3391,6 +3391,46 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (wi::bit_and (wi::to_wide (@1), wi::to_wide (@3)) == 0))) ) +(if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)) + (match (signed_integer_sat_sub @0 @1) + /* T Z = (T)((UT)X - (UT)Y); + SAT_S_SUB = (X ^ Y) & (X ^ Z) < 0 ? (-(T)(X < 0) ^ MAX) : Z */ + (cond^ (lt (bit_and:c (bit_xor:c @0 @1) + (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) + (nop_convert @1) +integer_zerop) +(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) +@2)) + (match (signed_integer_sat_sub @0 @1) + /* T Z = (T)((UT)X - (UT)Y); + SAT_S_SUB = (X ^ Y) & (X ^ Z) >= 0 ? Z : (-(T)(X < 0) ^ MAX) */ + (cond^ (ge (bit_and:c (bit_xor:c @0 @1) + (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) + (nop_convert @1) +integer_zerop) +@2 +(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value))) + (match (signed_integer_sat_sub @0 @1) + /* T Z = (T)((UT)X - (UT)Y); + SAT_S_SUB = (X ^ Y) < 0 & (X ^ Z) < 0 ? (-(T)(X < 0) ^ MAX) : Z */ + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) + (nop_convert @1 + integer_zerop) + (lt (bit_xor:c @0 @1) integer_zerop)) +(bit_xor:c (nop_convert (negate (nop_convert (convert + (lt @0 integer_zerop) + max_value) +@2)) + (match (signed_integer_sat_sub @0 @1) + /* Z = .SUB_OVERFLOW (X, Y) + SAT_S_SUB = IMAGPART (Z) != 0 ? (-(T)(X < 0) ^ MAX) : REALPART (Z) */ + (cond^ (ne (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop) +(bit_xor:c (nop_convert? + (negate (nop_convert? (convert (lt @0 integer_zerop) + max_value) +(realpart @2)) + (if (types_match (type, @0, @1) + /* The boundary condition for case 10: IMM = 1: SAT_U_SUB = X >= IMM ? (X - IMM) : 0. simplify (X != 0 ? X + ~0 : 0) to X - (X != 0). */ @@ -3402,64 +3442,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (with { tree itype = TREE_TYPE (@2); } (convert (minus @2 (convert:itype @1)) -/* Signed saturation sub, case 1: - T minus = (T)((UT)X - (UT)Y); - SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus; - - The T and UT are type pair like T=int8_t, UT=uint8_t. */ -(match (signed_integer_sat_sub @0 @1) - (cond^ (lt (bit_and:c (bit_xor:c @0 @1) - (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) -(nop_convert @1) - integer_zerop) - (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) - @2) - (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type - -/* Signed saturation sub, case 2: - T minus = (T)((UT)X - (UT)Y); - SAT_S_SUB = (X ^ Y) & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus; - - The T and UT are type pair like T=int8_t, UT=uint8_t. */ -(match (signed_integer_sat_sub @0 @1) - (cond^ (ge (bit_and:c (bit_xor:c @0 @1) - (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) -(nop_convert @1) - integer_zerop) - @2 - (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)) - (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type - -/* Signed saturation sub, case 3: - Z = .SUB_OVERFLOW (X, Y) - SAT_S_SUB = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : REALPART_EXPR (Z); - - The T and UT are type pair like T=int8_t, UT=uint8_t. */ -(match (signed_integer_sat_sub @0 @1) - (cond^ (ne (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop) - (bit_xor:c (nop_convert? - (negate (nop_convert? (convert (lt
[gcc r15-6677] Match: Update the comments for indicating SAT_* pattern
https://gcc.gnu.org/g:4435e82708872f705c47eeb63bbcdfc54b0449fc commit r15-6677-g4435e82708872f705c47eeb63bbcdfc54b0449fc Author: Pan Li Date: Thu Dec 12 10:56:35 2024 +0800 Match: Update the comments for indicating SAT_* pattern Given the SAT_* patterns are grouped for each alu and signed or not, add leading comments to indicate the beginning of the pattern. gcc/ChangeLog: * match.pd: Update comments for sat_* pattern. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index cb48c8c2505e..1d0c9f58f99d 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3115,6 +3115,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) || POINTER_TYPE_P (itype)) && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)) +/* Saturation add for unsigned integer. */ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)) (match (usadd_overflow_mask @0 @1) /* SAT_U_ADD = (X + Y) | -(X > (X + Y)). @@ -3189,6 +3190,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) integer_minus_onep (realpart @2)) (if (types_match (type, @0) && int_fits_type_p (@1, type) +/* Saturation sub for unsigned integer. */ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)) (match (unsigned_integer_sat_sub @0 @1) /* SAT_U_SUB = X > Y ? X - Y : 0 */ @@ -3278,6 +3280,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (wi::eq_p (sum, wi::uhwi (0, precision +/* Saturation truncate for unsigned integer. */ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)) (match (unsigned_integer_sat_trunc @0) /* SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))) */ @@ -3337,6 +3340,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (nop_convert? (convert (lt @0 integer_zerop) max_value))) +/* Saturation add for signed integer. */ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)) (match (signed_integer_sat_add @0 @1) /* T SUM = (T)((UT)X + (UT)Y) @@ -3391,6 +3395,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) @2) (if (wi::bit_and (wi::to_wide (@1), wi::to_wide (@3)) == 0 +/* Saturation sub for signed integer. */ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)) (match (signed_integer_sat_sub @0 @1) /* T Z = (T)((UT)X - (UT)Y); @@ -3427,6 +3432,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (realpart @2)) (if (types_match (type, @0, @1) +/* Saturation truncate for signed integer. */ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)) (match (signed_integer_sat_trunc @0) /* SAT_S_TRUNC(X) = (unsigned)X + NT_MAX + 1 > Unsigned_MAX ? (NT)X */
[gcc r15-6675] Match: Refactor the signed SAT_TRUNC match patterns [NFC]
https://gcc.gnu.org/g:d20e9b7b5a4dd99f0486d2b0a946208a9563e196 commit r15-6675-gd20e9b7b5a4dd99f0486d2b0a946208a9563e196 Author: Pan Li Date: Wed Dec 11 19:37:06 2024 +0800 Match: Refactor the signed SAT_TRUNC match patterns [NFC] This patch would like to refactor the all signed SAT_TRUNC patterns, aka: * Extract type check outside. * Re-arrange the related match pattern forms together. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Refactor sorts of signed SAT_TRUNC match patterns Signed-off-by: Pan Li Diff: --- gcc/match.pd | 65 ++-- 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 5b5265afe96a..8b72eaf713a0 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3431,6 +3431,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (realpart @2)) (if (types_match (type, @0, @1) +(if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)) + (match (signed_integer_sat_trunc @0) + /* SAT_S_TRUNC(X) = (unsigned)X + NT_MAX + 1 > Unsigned_MAX ? (NT)X */ + (cond^ (gt (plus:c (convert@4 @0) INTEGER_CST@1) INTEGER_CST@2) +(bit_xor:c (nop_convert? +(negate (nop_convert? (convert (lt @0 integer_zerop) + INTEGER_CST@3) +(convert @0)) + (if (!TYPE_UNSIGNED (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@4))) + (with +{ + unsigned itype_prec = TYPE_PRECISION (TREE_TYPE (@0)); + unsigned otype_prec = TYPE_PRECISION (type); + wide_int offset = wi::uhwi (HOST_WIDE_INT_1U << (otype_prec - 1), +itype_prec); // Aka 128 for int8_t + wide_int limit_0 = wi::mask (otype_prec, false, itype_prec); // Aka 255 + wide_int limit_1 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 3, + itype_prec); // Aka 253 + wide_int limit_2 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 2, + itype_prec); // Aka 254 + wide_int otype_max = wi::mask (otype_prec - 1, false, otype_prec); + wide_int itype_max = wi::mask (otype_prec - 1, false, itype_prec); + wide_int int_cst_1 = wi::to_wide (@1); + wide_int int_cst_2 = wi::to_wide (@2); + wide_int int_cst_3 = wi::to_wide (@3); +} +(if (((wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_0)) +|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_2)) +|| (wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_2)) +|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_1))) +&& wi::eq_p (int_cst_3, otype_max))) + /* The boundary condition for case 10: IMM = 1: SAT_U_SUB = X >= IMM ? (X - IMM) : 0. simplify (X != 0 ? X + ~0 : 0) to X - (X != 0). */ @@ -3442,39 +3474,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (with { tree itype = TREE_TYPE (@2); } (convert (minus @2 (convert:itype @1)) -/* Signed saturation truncate, case 1 and case 2, sizeof (WT) > sizeof (NT). - SAT_S_TRUNC(X) = (unsigned)X + NT_MAX + 1 > Unsigned_MAX ? (NT)X. */ -(match (signed_integer_sat_trunc @0) - (cond^ (gt (plus:c (convert@4 @0) INTEGER_CST@1) INTEGER_CST@2) - (bit_xor:c (nop_convert? - (negate (nop_convert? (convert (lt @0 integer_zerop) - INTEGER_CST@3) - (convert @0)) - (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) - && !TYPE_UNSIGNED (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@4))) - (with - { - unsigned itype_prec = TYPE_PRECISION (TREE_TYPE (@0)); - unsigned otype_prec = TYPE_PRECISION (type); - wide_int offset = wi::uhwi (HOST_WIDE_INT_1U << (otype_prec - 1), - itype_prec); // Aka 128 for int8_t - wide_int limit_0 = wi::mask (otype_prec, false, itype_prec); // Aka 255 - wide_int limit_1 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 3, - itype_prec); // Aka 253 - wide_int limit_2 = wi::uhwi ((HOST_WIDE_INT_1U << otype_prec) - 2, - itype_prec); // Aka 254 - wide_int otype_max = wi::mask (otype_prec - 1, false, otype_prec); - wide_int itype_max = wi::mask (otype_prec - 1, false, itype_prec); - wide_int int_cst_1 = wi::to_wide (@1); - wide_int int_cst_2 = wi::to_wide (@2); - wide_int int_cst_3 = wi::to_wide (@3); - } - (if (((wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_0)) -|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_2)) -|| (wi::eq_p (int_cst_1, offset) && wi::eq_p (int_cst_2, limit_2)) -|| (wi::eq_p (int_cst_1, itype_max) && wi::eq_p (int_cst_2, limit_1))) - && wi::eq_p (int_cst_3, otype_max)) - /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN -->
[gcc r15-6676] Match: Refactor the signed SAT_* match for saturated value [NFC]
https://gcc.gnu.org/g:cfe45ab382488313d8635ccaac970a11891a2c8c commit r15-6676-gcfe45ab382488313d8635ccaac970a11891a2c8c Author: Pan Li Date: Thu Dec 12 10:48:08 2024 +0800 Match: Refactor the signed SAT_* match for saturated value [NFC] This patch would like to refactor the all signed SAT_* patterns for the saturated value. Aka, overflow to INT_MAX when > 0 and downflow to INT_MIN when < 0. Thus, we can remove sorts of duplicated expression in different patterns. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Extract saturated value match for signed SAT_*. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 38 +- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 8b72eaf713a0..cb48c8c2505e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3330,6 +3330,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (wi::eq_p (trunc_max, int_cst_1) && wi::eq_p (max, int_cst_2))) +(if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)) + /* SAT_VAL = (-(T)(X < 0) ^ MAX) */ + (match (signed_integer_sat_val @0) + (bit_xor:c (nop_convert? (negate + (nop_convert? (convert (lt @0 integer_zerop) +max_value))) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)) (match (signed_integer_sat_add @0 @1) /* T SUM = (T)((UT)X + (UT)Y) @@ -3338,7 +3345,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (nop_convert @1 (bit_not (bit_xor:c @0 @1))) integer_zerop) -(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) +(signed_integer_sat_val @0) @2)) (match (signed_integer_sat_add @0 @1) /* T SUM = (T)((UT)X + (UT)Y) @@ -3356,17 +3363,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (nop_convert @1 integer_zerop) (ge (bit_xor:c @0 @1) integer_zerop)) -(bit_xor:c (nop_convert (negate (nop_convert (convert - (lt @0 integer_zerop) - max_value) +(signed_integer_sat_val @0) @2)) (match (signed_integer_sat_add @0 @1) /* SUM = .ADD_OVERFLOW (X, Y) SAT_S_ADD = IMAGPART_EXPR (SUM) != 0 ? (-(T)(X < 0) ^ MAX) : SUM */ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) -(bit_xor:c (nop_convert? -(negate (nop_convert? (convert (lt @0 integer_zerop) - max_value) +(signed_integer_sat_val @0) (realpart @2))) (match (signed_integer_sat_add @0 @1) /* T SUM = (T)((UT)X + (UT)Y) @@ -3375,9 +3378,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (nop_convert @1 integer_zerop) (bit_not (lt (bit_xor:c @0 @1) integer_zerop))) -(bit_xor:c (nop_convert (negate (nop_convert (convert - (lt @0 integer_zerop) - max_value) +(signed_integer_sat_val @0) @2)) (match (signed_integer_sat_add @0 @1) /* T SUM = (T)((UT)X + (UT)IMM); @@ -3386,10 +3387,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cond^ (lt (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) INTEGER_CST@1))) (bit_xor:c @0 INTEGER_CST@3)) integer_zerop) -(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) +(signed_integer_sat_val @0) @2) - (if (wi::bit_and (wi::to_wide (@1), wi::to_wide (@3)) == 0))) -) + (if (wi::bit_and (wi::to_wide (@1), wi::to_wide (@3)) == 0 (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)) (match (signed_integer_sat_sub @0 @1) @@ -3399,7 +3399,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) (nop_convert @1) integer_zerop) -(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) +(signed_integer_sat_val @0) @2)) (match (signed_integer_sat_sub @0 @1) /* T Z = (T)((UT)X - (UT)Y); @@ -3409,7 +3409,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (nop_convert @1) integer_zerop) @2 -(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value))) +(signed_integer_sat_val @0))) (match (signed_integer_sat_sub @0 @1) /* T Z = (T)((UT)X - (UT)Y); SAT_S_SUB = (X ^ Y) < 0 & (X ^ Z) < 0 ? (-(T)(X < 0) ^ M
[gcc r15-6614] AArch64: Remove AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
https://gcc.gnu.org/g:70035b6c13852435d7ae396c0762ee26897d4d45 commit r15-6614-g70035b6c13852435d7ae396c0762ee26897d4d45 Author: Jennifer Schmitz Date: Tue Nov 26 00:43:48 2024 -0800 AArch64: Remove AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS This patch removes the AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS tunable and use_new_vector_costs entry in aarch64-tuning-flags.def and makes the AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS paths in the backend the default. To that end, the function aarch64_use_new_vector_costs_p and its uses were removed. To prevent costing vec_to_scalar operations with 0, as described in https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665481.html, we adjusted vectorizable_store such that the variable n_adjacent_stores also covers vec_to_scalar operations. This way vec_to_scalar operations are not costed individually, but as a group. As suggested by Richard Sandiford, the "known_ne" in the multilane-check was replaced by "maybe_ne" in order to treat nunits==1+1X as a vector rather than a scalar. Two tests were adjusted due to changes in codegen. In both cases, the old code performed loop unrolling once, but the new code does not: Example from gcc.target/aarch64/sve/strided_load_2.c (compiled with -O2 -ftree-vectorize -march=armv8.2-a+sve -mtune=generic -moverride=tune=none): f_int64_t_32: cbz w3, .L92 mov x4, 0 uxtwx3, w3 + cntdx5 + whilelo p7.d, xzr, x3 + mov z29.s, w5 mov z31.s, w2 - whilelo p6.d, xzr, x3 - mov x2, x3 - index z30.s, #0, #1 - uqdecd x2 - ptrue p5.b, all - whilelo p7.d, xzr, x2 + index z30.d, #0, #1 + ptrue p6.b, all .p2align 3,,7 .L94: - ld1dz27.d, p7/z, [x0, #1, mul vl] - ld1dz28.d, p6/z, [x0] - movprfx z29, z31 - mul z29.s, p5/m, z29.s, z30.s - incwx4 - uunpklo z0.d, z29.s - uunpkhi z29.d, z29.s - ld1dz25.d, p6/z, [x1, z0.d, lsl 3] - ld1dz26.d, p7/z, [x1, z29.d, lsl 3] - add z25.d, z28.d, z25.d + ld1dz27.d, p7/z, [x0, x4, lsl 3] + movprfx z28, z31 + mul z28.s, p6/m, z28.s, z30.s + ld1dz26.d, p7/z, [x1, z28.d, uxtw 3] add z26.d, z27.d, z26.d - st1dz26.d, p7, [x0, #1, mul vl] - whilelo p7.d, x4, x2 - st1dz25.d, p6, [x0] - incwz30.s - incbx0, all, mul #2 - whilelo p6.d, x4, x3 + st1dz26.d, p7, [x0, x4, lsl 3] + add z30.s, z30.s, z29.s + incdx4 + whilelo p7.d, x4, x3 b.any .L94 .L92: ret Example from gcc.target/aarch64/sve/strided_store_2.c (compiled with -O2 -ftree-vectorize -march=armv8.2-a+sve -mtune=generic -moverride=tune=none): f_int64_t_32: cbz w3, .L84 - addvl x5, x1, #1 mov x4, 0 uxtwx3, w3 - mov z31.s, w2 + cntdx5 whilelo p7.d, xzr, x3 - mov x2, x3 - index z30.s, #0, #1 - uqdecd x2 - ptrue p5.b, all - whilelo p6.d, xzr, x2 + mov z29.s, w5 + mov z31.s, w2 + index z30.d, #0, #1 + ptrue p6.b, all .p2align 3,,7 .L86: - ld1dz28.d, p7/z, [x1, x4, lsl 3] - ld1dz27.d, p6/z, [x5, x4, lsl 3] - movprfx z29, z30 - mul z29.s, p5/m, z29.s, z31.s - add z28.d, z28.d, #1 - uunpklo z26.d, z29.s - st1dz28.d, p7, [x0, z26.d, lsl 3] - incwx4 - uunpkhi z29.d, z29.s + ld1dz27.d, p7/z, [x1, x4, lsl 3] + movprfx z28, z30 + mul z28.s, p6/m, z28.s, z31.s add z27.d, z27.d, #1 - whilelo p6.d, x4, x2 - st1dz27.d, p7, [x0, z29.d, lsl 3] - incwz30.s + st1dz27.d, p7, [x0, z28.d, uxtw 3] + incdx4 + add z30.s, z30.s, z29.s whilelo p7.d, x4, x3 b.any .L86 .L84: ret The patch was bootstrapped and tested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz gcc/ * tree-vect-stmts.cc (vectorizable_store): Extend the use of n_adjacent_stores to also cover vec_to_scalar operations. * config/aarch64/aarch64-tuning-flags.def: Remove use_new_vector_costs as tuning option. * config/aarch64/aarch64.cc (aarch64_use_new_vector_costs_p): Remove. (aarch64_vector_costs::add_stmt_cost): Re
[gcc r15-6615] Fortran: Extend cylic type detection for deallocate [PR116669]
https://gcc.gnu.org/g:d897090949086d1a094429f043a4dcb7bbc74448 commit r15-6615-gd897090949086d1a094429f043a4dcb7bbc74448 Author: Andre Vehreschild Date: Mon Dec 9 14:56:27 2024 +0100 Fortran: Extend cylic type detection for deallocate [PR116669] Using cycles in derived/class types lead to the compiler doing a endless recursion in several locations, when the cycle was not immediate. An immediate cyclic dependency is present in, for example T T::comp. Cylcic dependencies of the form T T2::comp; T2 T::comp2; are now detected and the recursive bit in the derived type's attr is set. gcc/fortran/ChangeLog: PR fortran/116669 * class.cc (gfc_find_derived_vtab): Use attr to determine cyclic type dependendies. * expr.cc (gfc_has_default_initializer): Prevent endless recursion by storing already visited derived types. * resolve.cc (resolve_cyclic_derived_type): Determine if a type is used in its hierarchy in a cyclic way. (resolve_fl_derived0): Call resolve_cyclic_derived_type. (resolve_fl_derived): Ensure vtab is generated when cyclic derived types have allocatable components. * trans-array.cc (structure_alloc_comps): Prevent endless loop for derived type cycles. * trans-expr.cc (gfc_get_ultimate_alloc_ptr_comps_caf_token): Off topic, just prevent memory leaks. gcc/testsuite/ChangeLog: * gfortran.dg/class_array_15.f03: Freeing more memory. * gfortran.dg/recursive_alloc_comp_6.f90: New test. Diff: --- gcc/fortran/class.cc | 19 +-- gcc/fortran/expr.cc| 38 ++ gcc/fortran/resolve.cc | 58 -- gcc/fortran/trans-array.cc | 25 ++ gcc/fortran/trans-expr.cc | 10 +++- gcc/testsuite/gfortran.dg/class_array_15.f03 | 2 +- .../gfortran.dg/recursive_alloc_comp_6.f90 | 28 +++ 7 files changed, 136 insertions(+), 44 deletions(-) diff --git a/gcc/fortran/class.cc b/gcc/fortran/class.cc index e0dd571cd68b..3e0dce1b54d8 100644 --- a/gcc/fortran/class.cc +++ b/gcc/fortran/class.cc @@ -2507,20 +2507,6 @@ gfc_find_derived_vtab (gfc_symbol *derived) { gfc_component *c; gfc_symbol *parent = NULL, *parent_vtab = NULL; - bool rdt = false; - - /* Is this a derived type with recursive allocatable -components? */ - c = (derived->attr.unlimited_polymorphic - || derived->attr.abstract) ? - NULL : derived->components; - for (; c; c= c->next) - if (c->ts.type == BT_DERIVED - && c->ts.u.derived == derived) - { - rdt = true; - break; - } gfc_get_symbol (name, ns, &vtype); if (!gfc_add_flavor (&vtype->attr, FL_DERIVED, NULL, @@ -2703,9 +2689,8 @@ gfc_find_derived_vtab (gfc_symbol *derived) c->attr.access = ACCESS_PRIVATE; c->tb = XCNEW (gfc_typebound_proc); c->tb->ppc = 1; - if (derived->attr.unlimited_polymorphic - || derived->attr.abstract - || !rdt) + if (derived->attr.unlimited_polymorphic || derived->attr.abstract + || !derived->attr.recursive) c->initializer = gfc_get_null_expr (NULL); else { diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc index 0b8d69436761..0e40b2493a5c 100644 --- a/gcc/fortran/expr.cc +++ b/gcc/fortran/expr.cc @@ -5017,28 +5017,44 @@ is_non_empty_structure_constructor (gfc_expr * e) bool gfc_has_default_initializer (gfc_symbol *der) { + static hash_set seen_derived_types; gfc_component *c; + /* The rewrite to a result variable and breaks is only needed, because + there is no scope_guard in C++ yet. */ + bool result = false; gcc_assert (gfc_fl_struct (der->attr.flavor)); + seen_derived_types.add (der); for (c = der->components; c; c = c->next) -if (gfc_bt_struct (c->ts.type)) +if (gfc_bt_struct (c->ts.type) + && !seen_derived_types.contains (c->ts.u.derived)) { -if (!c->attr.pointer && !c->attr.proc_pointer -&& !(c->attr.allocatable && der == c->ts.u.derived) -&& ((c->initializer - && is_non_empty_structure_constructor (c->initializer)) -|| gfc_has_default_initializer (c->ts.u.derived))) - return true; + if (!c->attr.pointer && !c->attr.proc_pointer + && !(c->attr.allocatable && der == c->ts.u.derived) + && ((c->initializer +&& is_non_empty_structure_constructor (c->initializer)) +
[gcc r15-6616] [PR modula2/118010, modula2/118183] Unable to rebuild the bootstrap tools and Wtypemismatch
https://gcc.gnu.org/g:1ea6fef426e37a09edd97bca65733930c214978d commit r15-6616-g1ea6fef426e37a09edd97bca65733930c214978d Author: Gaius Mulley Date: Tue Jan 7 11:20:45 2025 + [PR modula2/118010, modula2/118183] Unable to rebuild the bootstrap tools and Wtypemismatch This patch combines fixes for both PR-118010 (Wtypemismatch) and PR-118183 (unable to rebuild the bootstrap tools). PR-118010 required a new data type (COFF_T) to be exported from SYSTEM and used in all return types for libc.lseek. The patch also includes COFF_T implemented in mc and this data type has been propagated though the translated versions of pge and mc. Finally the patch adjusts the modula-2 declaration of location_t to reflect the new gcc 64 bit type. A new command line option -fm2-file-offset-bits= has been implemented to override the default 64 bit declaration of COFF_T. gcc/ChangeLog: PR modula2/118010 * doc/gm2.texi (Compiler options): New option -fm2-file-offset-bits=. gcc/m2/ChangeLog: PR modula2/118010 PR modula2/118183 * gm2-compiler/M2GCCDeclare.mod (Import): COffT, GetCOffTType. (DeclareDefaultSimpleTypes): Declare COFF_T. * gm2-compiler/M2GenGCC.mod (GetParamSize): Correct first parameter to BuildSize to use location rather than a token position. * gm2-compiler/M2Options.def (SetFileOffsetBits): New procedure. (GetFileOffsetBits): New procedure function. * gm2-compiler/M2Options.mod (SetFileOffsetBits): New procedure. (GetFileOffsetBits): New procedure function. (OffTBits): New variable. * gm2-compiler/M2System.def (COffT): New variable. * gm2-compiler/M2System.mod (MakeExtraSystemTypes): Declare COffT. * gm2-compiler/P1SymBuild.mod (EndBuildProcedure): Call PutProcedureDefined. * gm2-compiler/P2SymBuild.mod (Debug): Reimplement. * gm2-compiler/SymbolTable.mod (InitProcedureDeclaration): Initialize ProcedureTok. * gm2-gcc/gcctypes.def (location_t): Declare as CARDINAL64. * gm2-gcc/m2linemap.cc (m2linemap_GetLocationBinary): Add call to linemap_add indication a LC_LEAVE. * gm2-gcc/m2options.h (M2Options_SetFileOffsetBits): New procedure. (M2Options_GetFileOffsetBits): New procedure function. * gm2-gcc/m2type.cc (m2_offt_type_node): New variable. (m2type_GetCSizeTType): Reword comment. (m2type_GetCSSizeTType): Reword comment. (m2type_GetCOffTType): New function. (build_m2_offt_type_node): New function. (m2type_InitSystemTypes): Initialize m2_offt_type_node. * gm2-gcc/m2type.def (GetCSizeTType): Reword comment. (GetCOffTType): New procedure function. * gm2-gcc/m2type.h (m2type_GetCOffTType): New prototype. * gm2-lang.cc (gm2_langhook_handle_option): New clause OPT_fm2_file_offset_bits_. * gm2-libs-coroutines/SYSTEM.def: Add COFF_T to export list. * gm2-libs-iso/SYSTEM.def: Ditto. * gm2-libs-min/SYSTEM.def: Ditto. * gm2-libs/SYSTEM.def: Add COFF_T and CARDINAL64 to export list. * gm2-libs/libc.def (lseek): Change return type to COFF_T. * lang.opt (-fm2-file-offset-bits=): New option. * mc-boot-ch/Glibc.c (libc_lseek): Change result to use off_t. * mc-boot/GASCII.cc: Rebuilt. * mc-boot/GASCII.h: Ditto. * mc-boot/GArgs.cc: Ditto. * mc-boot/GArgs.h: Ditto. * mc-boot/GAssertion.cc: Ditto. * mc-boot/GAssertion.h: Ditto. * mc-boot/GBreak.cc: Ditto. * mc-boot/GBreak.h: Ditto. * mc-boot/GCOROUTINES.h: Ditto. * mc-boot/GCmdArgs.cc: Ditto. * mc-boot/GCmdArgs.h: Ditto. * mc-boot/GDebug.cc: Ditto. * mc-boot/GDebug.h: Ditto. * mc-boot/GDynamicStrings.cc: Ditto. * mc-boot/GDynamicStrings.h: Ditto. * mc-boot/GEnvironment.cc: Ditto. * mc-boot/GEnvironment.h: Ditto. * mc-boot/GFIO.cc: Ditto. * mc-boot/GFIO.h: Ditto. * mc-boot/GFormatStrings.cc: Ditto. * mc-boot/GFormatStrings.h: Ditto. * mc-boot/GFpuIO.cc: Ditto. * mc-boot/GFpuIO.h: Ditto. * mc-boot/GIO.cc: Ditto. * mc-boot/GIO.h: Ditto. * mc-boot/GIndexing.cc: Ditto. * mc-boot/GIndexing.h: Ditto. * mc-boot/GM2Dependent.cc: Ditto. * mc-boot/GM2Dependent.h: Ditto. * mc-boot/GM2EXCEPTION.cc: Ditto. * mc-boot/GM2EXCEPTION.h: Ditto. * mc-boot/GM2RTS.cc: Ditto. (M2RTS_Halt): Call libc_exit.
[gcc r13-9285] Zen5 tuning part 4: update reassocation width
https://gcc.gnu.org/g:52507e15aa31bc66e99f2273306f1b45be919bba commit r13-9285-g52507e15aa31bc66e99f2273306f1b45be919bba Author: Jan Hubicka Date: Tue Sep 3 18:20:34 2024 +0200 Zen5 tuning part 4: update reassocation width Zen5 has 6 instead of 4 ALUs and the integer multiplication can now execute in 3 of them. FP units can do 2 additions and 2 multiplications with latency 2 and 3. This patch updates reassociation width accordingly. This has potential of increasing register pressure but unlike while benchmarking znver1 tuning I did not noticed this actually causing problem on spec, so this patch bumps up reassociation width to 6 for everything except for integer vectors, where there are 4 units with typical latency of 1. Bootstrapped/regtested x86_64-linux, comitted. gcc/ChangeLog: * config/i386/i386.cc (ix86_reassociation_width): Update for Znver5. * config/i386/x86-tune-costs.h (znver5_costs): Update reassociation widths. (cherry picked from commit f0ab3de6ec0e3540f2e57f3f5628005f0a4e3fa5) Diff: --- gcc/config/i386/i386.cc | 10 +++--- gcc/config/i386/x86-tune-costs.h | 23 +-- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 8323b2e7cd39..395eeab70064 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -23262,13 +23262,17 @@ ix86_reassociation_width (unsigned int op, machine_mode mode) if (width == 1) return 1; - /* Integer vector instructions execute in FP unit + /* Znver1-4 Integer vector instructions execute in FP unit and can execute 3 additions and one multiplication per cycle. */ if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2 - || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4 - || ix86_tune == PROCESSOR_ZNVER5) + || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4) && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) return 1; + /* Znver5 can do 2 integer multiplications per cycle with latency +of 3. */ + if (ix86_tune == PROCESSOR_ZNVER5 + && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) + width = 6; /* Account for targets that splits wide vectors into multiple parts. */ if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 02fad74c4d1c..b89ac640ea5f 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2100,16 +2100,19 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ - /* Zen can execute 4 integer operations per cycle. FP operations - take 3 cycles and it can execute 2 integer additions and 2 - multiplications thus reassociation may make sense up to with of 6. - SPEC2k6 bencharks suggests - that 4 works better than 6 probably due to register pressure. - - Integer vector operations are taken by FP unit and execute 3 vector - plus/minus operations per cycle but only one multiply. This is adjusted - in ix86_reassociation_width. */ - 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + /* Zen5 can execute: + - integer ops: 6 per cycle, at most 3 multiplications. + latency 1 for additions, 3 for multiplications (pipelined) + + Setting width of 9 for multiplication is probably excessive + for register pressure. + - fp ops: 2 additions per cycle, latency 2-3 + 2 multiplicaitons per cycle, latency 3 + - vector intger ops: 4 additions, latency 1 + 2 multiplications, latency 4 + We increase width to 6 for multiplications + in ix86_reassociation_width. */ + 6, 6, 4, 6, /* reassoc int, fp, vec_int, vec_fp. */ znver2_memcpy, znver2_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
[gcc r12-10887] Zen5 tuning part 3: scheduler tweaks
https://gcc.gnu.org/g:f0718f1d7815c7845243a182c66f4a454efbfb72 commit r12-10887-gf0718f1d7815c7845243a182c66f4a454efbfb72 Author: Jan Hubicka Date: Tue Sep 3 16:26:16 2024 +0200 Zen5 tuning part 3: scheduler tweaks this patch adds support for new fussion in znver5 documented in the optimization manual: The Zen5 microarchitecture adds support to fuse reg-reg MOV Instructions with certain ALU instructions. The following conditions need to be met for fusion to happen: - The MOV should be reg-reg mov with Opcode 0x89 or 0x8B - The MOV is followed by an ALU instruction where the MOV and ALU destination register match. - The ALU instruction may source only registers or immediate data. There cannot be any memory source. - The ALU instruction sources either the source or dest of MOV instruction. - If ALU instruction has 2 reg sources, they should be different. - The following ALU instructions can fuse with an older qualified MOV instruction: ADD ADC AND XOR OP SUB SBB INC DEC NOT SAL / SHL SHR SAR (I assume OP is OR) I also increased issue rate from 4 to 6. Theoretically znver5 can do more, but with our model we can't realy use it. Increasing issue rate to 8 leads to infinite loop in scheduler. Finally, I also enabled fuse_alu_and_branch since it is supported by znver5 (I think by earlier zens too). New fussion pattern moves quite few instructions around in common code: @@ -2210,13 +2210,13 @@ .cfi_offset 3, -32 leaq63(%rsi), %rbx movq%rbx, %rbp + shrq$6, %rbp + salq$3, %rbp subq$16, %rsp .cfi_def_cfa_offset 48 movq%rdi, %r12 - shrq$6, %rbp - movq%rsi, 8(%rsp) - salq$3, %rbp movq%rbp, %rdi + movq%rsi, 8(%rsp) call_Znwm movq8(%rsp), %rsi movl$0, 8(%r12) @@ -2224,8 +2224,8 @@ movq%rax, (%r12) movq%rbp, 32(%r12) testq %rsi, %rsi - movq%rsi, %rdx cmovns %rsi, %rbx + movq%rsi, %rdx sarq$63, %rdx shrq$58, %rdx sarq$6, %rbx which should help decoder bandwidth and perhaps also cache, though I was not able to measure off-noise effect on SPEC. gcc/ChangeLog: * config/i386/i386.h (TARGET_FUSE_MOV_AND_ALU): New tune. * config/i386/x86-tune-sched.cc (ix86_issue_rate): Updat for znver5. (ix86_adjust_cost): Add TODO about znver5 memory latency. (ix86_fuse_mov_alu_p): New. (ix86_macro_fusion_pair_p): Use it. * config/i386/x86-tune.def (X86_TUNE_FUSE_ALU_AND_BRANCH): Add ZNVER5. (X86_TUNE_FUSE_MOV_AND_ALU): New tune; (cherry picked from commit e2125a600552bc6e0329e3f1224eea14804db8d3) Diff: --- gcc/config/i386/i386.h| 2 ++ gcc/config/i386/x86-tune-sched.cc | 59 +++ gcc/config/i386/x86-tune.def | 6 +++- 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2bf294eb172a..ed988ca280ea 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -413,6 +413,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS] #define TARGET_FUSE_ALU_AND_BRANCH \ ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH] +#define TARGET_FUSE_MOV_AND_ALU \ + ix86_tune_features[X86_TUNE_FUSE_MOV_AND_ALU] #define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU] #define TARGET_AVOID_LEA_FOR_ADDR \ ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR] diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index ebfde5962495..f170f6476ce4 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -419,6 +419,8 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, enum attr_unit unit = get_attr_unit (insn); int loadcost; + /* TODO: On znver5 complex addressing modes have +greater latency. */ if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) loadcost = 4; else @@ -524,6 +526,60 @@ ix86_macro_fusion_p () return TARGET_FUSE_CMP_AND_BRANCH; } +static bool +ix86_fuse_mov_alu_p (rtx_insn *mov, rtx_insn *alu) +{ + /* Validate mov: + - It should be reg-reg move with opcode 0x89 or 0x8B. */ + rtx set1 = PATTERN (mov); + if (GET_CODE (set1) != SET + || !GENERAL_REG_P (SET_SRC (set1)) + || !GENERAL_REG_P (SET_DEST (set1))) +return false; + rtx reg = SET_DEST (set1); + /* - it should have 0x89 or 0x8B opcode. */ +
[gcc r15-6617] LoongArch: Optimize initializing fp resgister to zero
https://gcc.gnu.org/g:61400b669688848dc764d946a4d16fb51a27e286 commit r15-6617-g61400b669688848dc764d946a4d16fb51a27e286 Author: Deng Jianbo Date: Tue Dec 31 19:33:23 2024 +0800 LoongArch: Optimize initializing fp resgister to zero In LoongArch, currently uses instruction movgr2fr.{d|w} to move zero from fixed-point register to floating-pointer regsiter for initializing fp register to zero. When LSX or LASX is enabled, we can use instruction vxor.v which has lower latency than instruction movgr2fr.{d|w} to set fp register to zero directly. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_output_move): Optimize instructions for initializing fp regsiter to zero. gcc/testsuite/ChangeLog: * gcc.target/loongarch/mov-zero-1.c: New test. * gcc.target/loongarch/mov-zero-2.c: New test. Diff: --- gcc/config/loongarch/loongarch.cc | 2 ++ gcc/testsuite/gcc.target/loongarch/mov-zero-1.c | 15 +++ gcc/testsuite/gcc.target/loongarch/mov-zero-2.c | 15 +++ 3 files changed, 32 insertions(+) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 89237c377e77..d506354c48a0 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -4772,6 +4772,8 @@ loongarch_output_move (rtx dest, rtx src) gcc_unreachable (); } } + if (ISA_HAS_LSX && src == CONST0_RTX (GET_MODE (src))) + return "vxor.v\t%w0,%w0,%w0"; return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1"; } diff --git a/gcc/testsuite/gcc.target/loongarch/mov-zero-1.c b/gcc/testsuite/gcc.target/loongarch/mov-zero-1.c new file mode 100644 index ..4744f2f2fdb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/mov-zero-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlsx" } */ +/* { dg-final { scan-assembler-times "vxor\\.v" 2 } } */ + +double +get_double_zero () +{ + return 0; +} + +float +get_float_zero () +{ + return 0; +} diff --git a/gcc/testsuite/gcc.target/loongarch/mov-zero-2.c b/gcc/testsuite/gcc.target/loongarch/mov-zero-2.c new file mode 100644 index ..6cb48052d0b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/mov-zero-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-lsx" } */ +/* { dg-final { scan-assembler-times "movgr2fr" 2 } } */ + +double +get_double_zero () +{ + return 0; +} + +float +get_float_zero () +{ + return 0; +}
[gcc r15-6618] Fortran: Ensure deep copy of allocatable components in cylic types [PR114612]
https://gcc.gnu.org/g:25b380dc63cc7202ed1c7f2048994c3820a96fcd commit r15-6618-g25b380dc63cc7202ed1c7f2048994c3820a96fcd Author: Andre Vehreschild Date: Fri Dec 13 12:07:01 2024 +0100 Fortran: Ensure deep copy of allocatable components in cylic types [PR114612] gcc/fortran/ChangeLog: PR fortran/114612 * trans-array.cc (structure_alloc_comps): Ensure deep copy is also done for types having cycles. gcc/testsuite/ChangeLog: * gfortran.dg/alloc_comp_deep_copy_4.f03: New test. Diff: --- gcc/fortran/trans-array.cc | 7 +++--- .../gfortran.dg/alloc_comp_deep_copy_4.f03 | 29 ++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 057f6a63fdf5..44b091af2c69 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -10584,10 +10584,9 @@ structure_alloc_comps (gfc_symbol * der_type, tree decl, tree dest, false, false, NULL_TREE, NULL_TREE); gfc_add_expr_to_block (&fnblock, tmp); } - else if ((c->attr.allocatable) - && !c->attr.proc_pointer && !same_type - && (!(cmp_has_alloc_comps && c->as) || c->attr.codimension - || caf_in_coarray (caf_mode))) + else if (c->attr.allocatable && !c->attr.proc_pointer + && (!(cmp_has_alloc_comps && c->as) || c->attr.codimension + || caf_in_coarray (caf_mode))) { rank = c->as ? c->as->rank : 0; if (c->attr.codimension) diff --git a/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_4.f03 b/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_4.f03 new file mode 100644 index ..3c445be032f6 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/alloc_comp_deep_copy_4.f03 @@ -0,0 +1,29 @@ +!{ dg-do run } +! +! Contributed Vladimir Terzi +! Check that deep-copy for b=a works. + +program pr114672 +type node +integer::val +type(node),allocatable::next +end type + +type(node)::a,b + +allocate(a%next) +a%val=1 +a%next%val=2 +!print*,a%val,a%next%val +b=a +b%val=3 +b%next%val=4 +if (loc(b) == loc(a)) stop 1 +if (loc(b%next) == loc(a%next)) stop 2 +!print*,a%val,a%next%val +deallocate(b%next) +if (.NOT. allocated(a%next)) stop 3 +!print*,a%val,a%next%val +deallocate(a%next) +end +
[gcc r13-9284] Zen5 tuning part 3: scheduler tweaks
https://gcc.gnu.org/g:7392e9e480afe3143e72a99f7b5ac99b2f49c284 commit r13-9284-g7392e9e480afe3143e72a99f7b5ac99b2f49c284 Author: Jan Hubicka Date: Tue Sep 3 16:26:16 2024 +0200 Zen5 tuning part 3: scheduler tweaks this patch adds support for new fussion in znver5 documented in the optimization manual: The Zen5 microarchitecture adds support to fuse reg-reg MOV Instructions with certain ALU instructions. The following conditions need to be met for fusion to happen: - The MOV should be reg-reg mov with Opcode 0x89 or 0x8B - The MOV is followed by an ALU instruction where the MOV and ALU destination register match. - The ALU instruction may source only registers or immediate data. There cannot be any memory source. - The ALU instruction sources either the source or dest of MOV instruction. - If ALU instruction has 2 reg sources, they should be different. - The following ALU instructions can fuse with an older qualified MOV instruction: ADD ADC AND XOR OP SUB SBB INC DEC NOT SAL / SHL SHR SAR (I assume OP is OR) I also increased issue rate from 4 to 6. Theoretically znver5 can do more, but with our model we can't realy use it. Increasing issue rate to 8 leads to infinite loop in scheduler. Finally, I also enabled fuse_alu_and_branch since it is supported by znver5 (I think by earlier zens too). New fussion pattern moves quite few instructions around in common code: @@ -2210,13 +2210,13 @@ .cfi_offset 3, -32 leaq63(%rsi), %rbx movq%rbx, %rbp + shrq$6, %rbp + salq$3, %rbp subq$16, %rsp .cfi_def_cfa_offset 48 movq%rdi, %r12 - shrq$6, %rbp - movq%rsi, 8(%rsp) - salq$3, %rbp movq%rbp, %rdi + movq%rsi, 8(%rsp) call_Znwm movq8(%rsp), %rsi movl$0, 8(%r12) @@ -2224,8 +2224,8 @@ movq%rax, (%r12) movq%rbp, 32(%r12) testq %rsi, %rsi - movq%rsi, %rdx cmovns %rsi, %rbx + movq%rsi, %rdx sarq$63, %rdx shrq$58, %rdx sarq$6, %rbx which should help decoder bandwidth and perhaps also cache, though I was not able to measure off-noise effect on SPEC. gcc/ChangeLog: * config/i386/i386.h (TARGET_FUSE_MOV_AND_ALU): New tune. * config/i386/x86-tune-sched.cc (ix86_issue_rate): Updat for znver5. (ix86_adjust_cost): Add TODO about znver5 memory latency. (ix86_fuse_mov_alu_p): New. (ix86_macro_fusion_pair_p): Use it. * config/i386/x86-tune.def (X86_TUNE_FUSE_ALU_AND_BRANCH): Add ZNVER5. (X86_TUNE_FUSE_MOV_AND_ALU): New tune; (cherry picked from commit e2125a600552bc6e0329e3f1224eea14804db8d3) Diff: --- gcc/config/i386/i386.h| 2 ++ gcc/config/i386/x86-tune-sched.cc | 59 +++ gcc/config/i386/x86-tune.def | 6 +++- 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 08309367c18b..25c6540fb2c9 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -418,6 +418,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS] #define TARGET_FUSE_ALU_AND_BRANCH \ ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH] +#define TARGET_FUSE_MOV_AND_ALU \ + ix86_tune_features[X86_TUNE_FUSE_MOV_AND_ALU] #define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU] #define TARGET_AVOID_LEA_FOR_ADDR \ ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR] diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index cbaba5f9e3c3..28b9ed84d03b 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -435,6 +435,8 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, enum attr_unit unit = get_attr_unit (insn); int loadcost; + /* TODO: On znver5 complex addressing modes have +greater latency. */ if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) loadcost = 4; else @@ -540,6 +542,60 @@ ix86_macro_fusion_p () return TARGET_FUSE_CMP_AND_BRANCH; } +static bool +ix86_fuse_mov_alu_p (rtx_insn *mov, rtx_insn *alu) +{ + /* Validate mov: + - It should be reg-reg move with opcode 0x89 or 0x8B. */ + rtx set1 = PATTERN (mov); + if (GET_CODE (set1) != SET + || !GENERAL_REG_P (SET_SRC (set1)) + || !GENERAL_REG_P (SET_DEST (set1))) +return false; + rtx reg = SET_DEST (set1); + /* - it should have 0x89 or 0x8B opcode. */ + i
[gcc r15-6628] ada: Add "finally" GNAT extension
https://gcc.gnu.org/g:5697da32d52b5659fee31826431f2134e69b1fee commit r15-6628-g5697da32d52b5659fee31826431f2134e69b1fee Author: Ronan Desplanques Date: Tue Dec 17 10:43:56 2024 +0100 ada: Add "finally" GNAT extension This patch adds a new reserved word, "finally", and accompanying new syntax that's similar to the Java equivalent. gcc/ada/ChangeLog: * atree.adb (Parent_Or_List_Containing): New function. * atree.ads (Parent_Or_List_Containing): Likewise. * gen_il-fields.ads: Add new field. * gen_il-gen-gen_nodes.adb (Gen_Nodes): Extend handled sequence of statements node. * par-ch11.adb (P_Handled_Sequence_Of_Statements, P_Exception_Handler): Add new syntactic construct. * par-ch5.adb (P_Sequence_Of_Statements): Likewise. * par.adb: Likewise. * par-util.adb (Check_Future_Keyword): Warn that "finally" becomes a reserved word with extensions. * scans.adb (Initialize_Ada_Keywords): Add new reserved word. * snames.adb-tmpl: Likewise. * snames.ads-tmpl: Likewise. * scans.ads: Likewise. * sem_ch11.adb (Analyze_Handled_Statements): Adapt to new node field. * sem_ch5.adb (Analyze_Exit_Statement): Add legality check. (Analyze_Goto_Statement): Likewise. * sem_ch6.adb (Analyze_Return_Statement): Likewise. * sinfo-utils.adb (Lowest_Common_Ancestor, Destroy_Element): New subprograms. * sinfo-utils.ads (Lowest_Common_Ancestor): New function. * sinfo.ads: Add documentation for new field. * xsnamest.adb: Fix typo in comment. * doc/gnat_rm/gnat_language_extensions.rst: Document new extension. * warnsw.adb: Add new option. * warnsw.ads: Likewise. * exp_ch11.adb (Expand_N_Handled_Sequence_Of_Statements): Add abort deferral to finally part. * gnat_rm.texi: Regenerate. * gnat_ugn.texi: Regenerate. * gcc-interface/trans.cc (Handled_Sequence_Of_Statements_to_gnu): Handle finally statements. Diff: --- gcc/ada/atree.adb| 15 +++ gcc/ada/atree.ads| 5 + gcc/ada/doc/gnat_rm/gnat_language_extensions.rst | 43 gcc/ada/exp_ch11.adb | 12 +++ gcc/ada/gcc-interface/trans.cc | 23 gcc/ada/gen_il-fields.ads| 1 + gcc/ada/gen_il-gen-gen_nodes.adb | 1 + gcc/ada/gnat_rm.texi | 129 +-- gcc/ada/gnat_ugn.texi| 2 +- gcc/ada/par-ch11.adb | 11 +- gcc/ada/par-ch5.adb | 19 gcc/ada/par-util.adb | 8 ++ gcc/ada/par.adb | 20 ++-- gcc/ada/scans.adb| 3 + gcc/ada/scans.ads| 1 + gcc/ada/sem_ch11.adb | 4 + gcc/ada/sem_ch5.adb | 56 +- gcc/ada/sem_ch6.adb | 17 +++ gcc/ada/sinfo-utils.adb | 68 gcc/ada/sinfo-utils.ads | 4 + gcc/ada/sinfo.ads| 4 + gcc/ada/snames.adb-tmpl | 4 +- gcc/ada/snames.ads-tmpl | 10 ++ gcc/ada/warnsw.adb | 1 + gcc/ada/warnsw.ads | 6 ++ gcc/ada/xsnamest.adb | 2 +- 26 files changed, 421 insertions(+), 48 deletions(-) diff --git a/gcc/ada/atree.adb b/gcc/ada/atree.adb index 8cc22394b0c8..c2e026bcc6dc 100644 --- a/gcc/ada/atree.adb +++ b/gcc/ada/atree.adb @@ -2076,6 +2076,21 @@ package body Atree is end if; end Node_Parent; + --- + -- Parent_Or_List_Containing -- + --- + + function Parent_Or_List_Containing (X : Union_Id) return Union_Id is + begin + if X in Node_Range then + return Link (Node_Id (X)); + elsif X in List_Range then + return Union_Id (List_Parent (List_Id (X))); + else + raise Program_Error; + end if; + end Parent_Or_List_Containing; + - -- Present -- - diff --git a/gcc/ada/atree.ads b/gcc/ada/atree.ads index 834cc3150f5e..3adfb824a175 100644 --- a/gcc/ada/atree.ads +++ b/gcc/ada/atree.ads @@ -456,6 +456,11 @@ package Atree is -- Parent has the same name as the one in Nlists; Node_Parent can be used -- more easily in the debugger. + function Parent_Or_List_Containing (X : Union_Id) return Union_Id; + -- X must be in Node_Range or
[gcc r15-6626] ada: Reject references to attribute Result in Exceptional_Cases
https://gcc.gnu.org/g:eccfadd5e22abba75138dd9762d8a873db80eaee commit r15-6626-geccfadd5e22abba75138dd9762d8a873db80eaee Author: Piotr Trojanek Date: Mon Dec 16 14:36:13 2024 +0100 ada: Reject references to attribute Result in Exceptional_Cases Functions with aspect Side_Effects should not reference attribute Result in consequences of their aspect Exceptional_Cases. gcc/ada/ChangeLog: * sem_prag.adb (Analyze_Exceptional_Cases_In_Decl_Part): Reject references to attribute Result. Diff: --- gcc/ada/sem_prag.adb | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb index b7de1cd8afa3..64b5b01869da 100644 --- a/gcc/ada/sem_prag.adb +++ b/gcc/ada/sem_prag.adb @@ -2216,7 +2216,8 @@ package body Sem_Prag is -- in the consequences of an exceptional contract unless they are either -- passed by reference or occur in the prefix of a reference to the 'Old -- attribute or as direct prefixes of attributes that do not actually - -- read data from the object (SPARK RM 6.1.9(1)). + -- read data from the object (SPARK RM 6.1.9(1)). References to + -- attribute Result should not occur either. - -- Check_Param -- @@ -2266,6 +2267,10 @@ package body Sem_Prag is then return Skip; end if; + when Name_Result => + Error_Msg_N + ("attribute Result in consequence of Exceptional_Cases", +N); when others => null; end case;
[gcc r15-6624] ada: Fix comments about Subprogram_Variant and Exceptional_Cases
https://gcc.gnu.org/g:d734902a117855a3f98b067ff8ed90052569af80 commit r15-6624-gd734902a117855a3f98b067ff8ed90052569af80 Author: Piotr Trojanek Date: Mon Dec 16 13:52:43 2024 +0100 ada: Fix comments about Subprogram_Variant and Exceptional_Cases The comment about Subprogram_Variant was outdated after more types have been allowed by the corresponding SPARK RM rule; the comment about Exceptional_Cases was incorrect, after being copy-pasted. gcc/ada/ChangeLog: * sem_prag.adb (Analyze_Exceptional_Contract, Analyze_Variant): Fix comments. Diff: --- gcc/ada/sem_prag.adb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb index 5b5ff320e80c..042028739748 100644 --- a/gcc/ada/sem_prag.adb +++ b/gcc/ada/sem_prag.adb @@ -2320,8 +2320,7 @@ package body Sem_Prag is Errors := Serious_Errors_Detected; - -- Preanalyze_Assert_Expression, but without enforcing any of the two - -- acceptable types. + -- Preanalyze_Assert_Expression enforcing the expression type Preanalyze_Assert_Expression (Consequence, Any_Boolean); @@ -31380,7 +31379,7 @@ package body Sem_Prag is Errors := Serious_Errors_Detected; - -- Preanalyze_Assert_Expression, but without enforcing any of the two + -- Preanalyze_Assert_Expression, but without enforcing any of the -- acceptable types. Preanalyze_Assert_Expression (Expr);
[gcc r15-6625] ada: Move checks for consequences of Exceptional_Cases to GNAT
https://gcc.gnu.org/g:0307abc8626da292843a080c809d59be5bfb720b commit r15-6625-g0307abc8626da292843a080c809d59be5bfb720b Author: Piotr Trojanek Date: Mon Dec 16 14:15:57 2024 +0100 ada: Move checks for consequences of Exceptional_Cases to GNAT Previously checks for consequence expressions of Exceptional_Cases aspects were done in GNATprove backend. However, we can do them in the frontend, where they will apply to all subprograms, regardless of the SPARK_Mode aspect. gcc/ada/ChangeLog: * sem_prag.adb (Analyze_Exceptional_Cases_In_Decl_Part): Move check from GNATprove backend to GNAT frontend. Diff: --- gcc/ada/sem_prag.adb | 68 1 file changed, 68 insertions(+) diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb index 042028739748..b7de1cd8afa3 100644 --- a/gcc/ada/sem_prag.adb +++ b/gcc/ada/sem_prag.adb @@ -2211,6 +2211,72 @@ package body Sem_Prag is procedure Check_Duplication (Id : Node_Id; Contracts : List_Id); -- Iterate through the identifiers in each contract to find duplicates + function Check_Param (N : Node_Id) return Traverse_Result; + -- Parameters of modes OUT or IN OUT of the subprogram shall not occur + -- in the consequences of an exceptional contract unless they are either + -- passed by reference or occur in the prefix of a reference to the 'Old + -- attribute or as direct prefixes of attributes that do not actually + -- read data from the object (SPARK RM 6.1.9(1)). + + - + -- Check_Param -- + - + + function Check_Param (N : Node_Id) return Traverse_Result is + begin + case Nkind (N) is +when N_Identifier | N_Expanded_Name => + declare + Id : constant Entity_Id := Entity (N); + begin + if Present (Id) +and then Ekind (Id) in E_Out_Parameter + | E_In_Out_Parameter +and then Scope (Id) = Spec_Id +and then not Is_By_Reference_Type (Etype (Id)) +and then not Is_Aliased (Id) + then + declare +Mode : constant String := + (if Ekind (Id) = E_Out_Parameter then "out" + else "in out"); + begin +Error_Msg_N + ("formal parameter of mode """ & Mode + & """ in consequence of Exceptional_Cases", N); +Error_Msg_N + ("\only parameters passed by reference are allowed", + N); + end; + end if; + end; + +when N_Attribute_Reference => + case Attribute_Name (N) is + when Name_Old => + return Skip; + when Name_Constrained + | Name_First + | Name_Last + | Name_Length + | Name_Range + => + if Nkind (Prefix (N)) in N_Identifier +| N_Expanded_Name + then +return Skip; + end if; + when others => null; + end case; + +when others => null; + end case; + + return OK; + end Check_Param; + + procedure Check_Params is new Traverse_More_Proc (Check_Param); + -- -- Analyze_Exceptional_Contract -- -- @@ -2324,6 +2390,8 @@ package body Sem_Prag is Preanalyze_Assert_Expression (Consequence, Any_Boolean); + Check_Params (Consequence); + -- Emit a clarification message when the consequence contains at -- least one undefined reference, possibly due to contract freezing.
[gcc r15-6627] ada: Elide the copy for bit-packed aggregates in (safe) assignments
https://gcc.gnu.org/g:a47c6d8a1e4ec4fc635bbe9f9fadfc871b13bae2 commit r15-6627-ga47c6d8a1e4ec4fc635bbe9f9fadfc871b13bae2 Author: Eric Botcazou Date: Tue Dec 17 11:20:03 2024 +0100 ada: Elide the copy for bit-packed aggregates in (safe) assignments The in-place expansion has been historically disabled for them, but there does not seem to be any good reason left for this. gcc/ada/ChangeLog: * exp_aggr.adb (Expand_Array_Aggregate): Do not exclude aggregates of bit-packed array types in assignments from in-place expansion. Diff: --- gcc/ada/exp_aggr.adb | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb index f771660d23a0..a51e02bc3082 100644 --- a/gcc/ada/exp_aggr.adb +++ b/gcc/ada/exp_aggr.adb @@ -6129,7 +6129,6 @@ package body Exp_Aggr is Nkind (Parent_Node) = N_Assignment_Statement and then (Is_Limited_Type (Typ) or else (not Has_Default_Init_Comps (N) - and then not Is_Bit_Packed_Array (Typ) and then In_Place_Assign_OK (N, Get_Base_Object (Name (Parent_Node);
[gcc r15-6623] ada: Put_Image spec incorrectly ignored for Fixed_Point_Type'Base'Image call.
https://gcc.gnu.org/g:67e3db712e36e15486709ea39759a53f15c3d0e6 commit r15-6623-g67e3db712e36e15486709ea39759a53f15c3d0e6 Author: Steve Baird Date: Thu Dec 12 17:06:00 2024 -0800 ada: Put_Image spec incorrectly ignored for Fixed_Point_Type'Base'Image call. If a Put_Image aspect specification (introduced in Ada 2022) is given for a fixed point type Fx, then in some cases a call to Fx'Base'Image would incorrectly ignore the aspect specification and would instead return the pre-Ada2022 version of the image. However, a call to Fx'Image would do the right thing. gcc/ada/ChangeLog: * exp_put_image.adb (Image_Should_Call_Put_Image): Cope with the case where the attribute prefix for an Image attribute reference denotes an Itype constructed for a fixed point type. Calling Has_Aspect with such an Itype misses applicable aspect specifications; we need to look on the right list. This comes up if the prefix of the attribute reference is Some_Fixed_Point_Type'Base. Diff: --- gcc/ada/exp_put_image.adb | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/gcc/ada/exp_put_image.adb b/gcc/ada/exp_put_image.adb index dff9bba55a85..ef4494b7f112 100644 --- a/gcc/ada/exp_put_image.adb +++ b/gcc/ada/exp_put_image.adb @@ -1176,11 +1176,28 @@ package body Exp_Put_Image is declare U_Type : constant Entity_Id := Underlying_Type (Entity (Prefix (N))); begin - if Has_Aspect (U_Type, Aspect_Put_Image) then + if Has_Aspect (U_Type, Aspect_Put_Image) + or else not Is_Scalar_Type (U_Type) + then +return True; + end if; + + -- Deal with Itypes. One case where this is needed is for a + -- fixed-point type with a Put_Image aspect specification. + + -- ??? Should we be checking for Itype case here, or in Has_Aspect? + -- In other words, do we want to do what we are doing here for all + -- aspects, not just for Put_Image? + + if Is_Itype (U_Type) + and then Has_Aspect (Defining_Identifier + (Associated_Node_For_Itype (U_Type)), +Aspect_Put_Image) + then return True; end if; - return not Is_Scalar_Type (U_Type); + return False; end; end Image_Should_Call_Put_Image;
[gcc r15-6632] ada: Remove unnecessary qualifiers for First/Next list operations
https://gcc.gnu.org/g:69dfa02bdb9eaadc24552f280a390a0737386a54 commit r15-6632-g69dfa02bdb9eaadc24552f280a390a0737386a54 Author: Piotr Trojanek Date: Thu Mar 2 22:43:12 2023 +0100 ada: Remove unnecessary qualifiers for First/Next list operations Code cleanup related to work on expression functions for GNATprove (which require accessibility checks even when they are not expanded and thus have no explicit return statements). gcc/ada/ChangeLog: * accessibility.adb (First_Selector): Remove redundant and locally inconsistent parenthesis. (Check_Return_Construct_Accessibility): Remove qualifier from list operation. * sem_util.adb (Is_Prim_Of_Abst_Type_With_Nonstatic_CW_Pre_Post): Likewise. Diff: --- gcc/ada/accessibility.adb | 6 +++--- gcc/ada/sem_util.adb | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/gcc/ada/accessibility.adb b/gcc/ada/accessibility.adb index 376eb9d0bb94..1d7d3a137862 100644 --- a/gcc/ada/accessibility.adb +++ b/gcc/ada/accessibility.adb @@ -1213,7 +1213,7 @@ package body Accessibility is return First (Choices (Assoc)); elsif Nkind (Assoc) = N_Discriminant_Association then -return (First (Selector_Names (Assoc))); +return First (Selector_Names (Assoc)); else raise Program_Error; @@ -1292,7 +1292,7 @@ package body Accessibility is exit; end if; -Nlists.Next (Return_Con); +Next (Return_Con); end loop; pragma Assert (Present (Return_Con)); @@ -1693,7 +1693,7 @@ package body Accessibility is if not Is_List_Member (Assoc) then exit; else -Nlists.Next (Assoc); +Next (Assoc); end if; end loop; end Check_Return_Construct_Accessibility; diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb index 24f27d081673..a11afa8be2d3 100644 --- a/gcc/ada/sem_util.adb +++ b/gcc/ada/sem_util.adb @@ -12654,8 +12654,7 @@ package body Sem_Util is and then Class_Present (Prag) then Pragma_Arg := - Nlists.First - (Pragma_Argument_Associations (Prag)); + First (Pragma_Argument_Associations (Prag)); if not Is_Static_Expression (Expression (Pragma_Arg)) then return True;
[gcc r15-6643] ada: Cleanup preanalysis of static expressions (part 2)
https://gcc.gnu.org/g:360cd3542e9d9bd6e101a9fc6509f8f6013664cc commit r15-6643-g360cd3542e9d9bd6e101a9fc6509f8f6013664cc Author: Javier Miranda Date: Thu Dec 19 10:41:59 2024 + ada: Cleanup preanalysis of static expressions (part 2) According to RM 13.14(8/4), a static expression in an aspect specification does not cause freezing; however, the frontend performs many calls to Preanalyze_Spec_Expression made during the analysis of aspects. This patch, suggested by Eric Botcazou, takes care of this additional code cleanup which requires also replacing many occurrences of the global variable In_Spec_Expression by calls to Preanalysis_Active. gcc/ada/ChangeLog: * exp_util.adb (Insert_Actions): Document behavior under strict preanalysis. * sem.ads (In_Strict_Preanalysis): New subprogram. (Preanalysis_Active): Replace 'and' operator by 'and then'. * sem.adb (In_Strict_Preanalysis): Ditto. * sem_attr.adb (Check_Dereference): Replace In_Spec_Expression occurrence by call to Preanalysis_Active, and document it. (Resolve_Attribute [Atribute_Access]): Ditto. (Eval_Attribute): No evaluation under strict preanalysis. (Validate_Static_Object_Name): No action under strict preanalysis. * sem_ch13.adb (Check_Aspect_At_End_Of_Declarations): Replace calls to Preanalyze_Spec_Expression by calls to Preanalyze_And_Resolve. (Check_Aspect_At_Freeze_Point): Ditto. (Resolve_Aspect_Expressions [Dynamic/Static/Predicate aspects]): Code cleanup adjusting the code to emulate Preanalyze_And_Resolve, instead of Preanalyze_Spec_Expression. (Resolve_Aspect_Expressions [CPU/Interrupt_Priority/Priority/ Storage_Size aspects]): Replace calls to Preanalyze_Spec_Expression by call to Preanalyze_And _Resolve. * sem_ch3.adb (Analyze_Object_Declaration): Replace In_Spec_Expression occurrence by call to Preanalysis_Active. (Find_Type_Of_Object): Add documentation. * sem_ch4.adb (Analyze_Case_Expression): Replace In_Spec_Expression occurrence by call to Preanalysis_Active. * sem_ch6.adb (Analyze_Expression_Function): Minor code reorganization moving the code preanalyzing the expression after the new body has been inserted in the tree to ensure that its Parent attribute is available for preanalysis. * sem_cat.adb (Validate_Static_Object_Name): No action under strict preanalysis. * sem_elab.adb (Check_For_Eliminated_Subprogram): Replace In_Spec_Expression occurrence by call to Preanalysis_Active. * sem_eval.adb (Eval_Intrinsic_Call [Name_Enclosing_Entity]): Ditto. * sem_elim.adb (Check_For_Eliminated_Subprogram): Ditto. * sem_res.adb (Resolve_Entity_Name): Ditto. Diff: --- gcc/ada/exp_util.adb | 16 gcc/ada/sem.adb | 11 ++- gcc/ada/sem.ads | 5 + gcc/ada/sem_attr.adb | 25 + gcc/ada/sem_cat.adb | 1 + gcc/ada/sem_ch13.adb | 25 +++-- gcc/ada/sem_ch3.adb | 7 +-- gcc/ada/sem_ch4.adb | 2 +- gcc/ada/sem_ch6.adb | 46 +++--- gcc/ada/sem_elab.adb | 2 +- gcc/ada/sem_elim.adb | 5 +++-- gcc/ada/sem_eval.adb | 2 +- gcc/ada/sem_res.adb | 2 +- 13 files changed, 87 insertions(+), 62 deletions(-) diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb index 66ba73226ed0..69d6e25794e8 100644 --- a/gcc/ada/exp_util.adb +++ b/gcc/ada/exp_util.adb @@ -7708,20 +7708,20 @@ package body Exp_Util is return; end if; - -- Insert the action when the context is "Handling of Default and Per- - -- Object Expressions" only when requested by the caller. - - if Spec_Expr_OK then - null; - -- Ignore insert of actions from inside default expression (or other -- similar "spec expression") in the special spec-expression analyze -- mode. Any insertions at this point have no relevance, since we are -- only doing the analyze to freeze the types of any static expressions. -- See section "Handling of Default and Per-Object Expressions" in the - -- spec of package Sem for further details. + -- spec of package Sem for further details. However, if the user does + -- nevertheless request the insert, then obey it. + + -- Under strict preanalysis we cannot ignore insert of actions because + -- we may be adding to the tree a subtype declaration that is required + -- for proper preanalysis (see Sem_Ch3.Find_Type_Of_Object). - elsif In_Spec_Expression then + if In_Spec_Expression and then not Spec_Expr_OK then + pragma Assert (not In_Strict_P
[gcc r15-6641] ada: Improve protection against wrong use from GDB
https://gcc.gnu.org/g:0a71f5b5ca5bac57c660f563940b8baeb3552870 commit r15-6641-g0a71f5b5ca5bac57c660f563940b8baeb3552870 Author: Piotr Trojanek Date: Fri Dec 20 00:09:15 2024 +0100 ada: Improve protection against wrong use from GDB A code cleanup in routine intended to be used from DGB, suggested by running GNATcheck rule Boolean_Negations. However, this code can be tuned to protect against more illegal uses. gcc/ada/ChangeLog: * exp_disp.adb (Write_DT): Add guards that prevent crashes on illegal node numbers. Diff: --- gcc/ada/exp_disp.adb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/ada/exp_disp.adb b/gcc/ada/exp_disp.adb index f45c32356a90..6d0f2c87017a 100644 --- a/gcc/ada/exp_disp.adb +++ b/gcc/ada/exp_disp.adb @@ -8674,9 +8674,10 @@ package body Exp_Disp is begin -- Protect this procedure against wrong usage. Required because it will - -- be used directly from GDB + -- be used directly from GDB. - if not (Typ <= Last_Node_Id) + if Typ not in First_Node_Id .. Last_Node_Id +or else Nkind (Typ) not in N_Entity or else not Is_Tagged_Type (Typ) then Write_Str ("wrong usage: Write_DT must be used with tagged types");
[gcc r15-6644] ada: Do not raise exceptions from Exp_Aggr.Packed_Array_Aggregate_Handled
https://gcc.gnu.org/g:0f1bc0d5c0b7b77f3ec0704f9f999cceef13adea commit r15-6644-g0f1bc0d5c0b7b77f3ec0704f9f999cceef13adea Author: Eric Botcazou Date: Fri Dec 20 16:49:50 2024 +0100 ada: Do not raise exceptions from Exp_Aggr.Packed_Array_Aggregate_Handled An exception is now raised during bootstrap and this causes compatibility issues with older compilers. gcc/ada/ChangeLog: * exp_aggr.adb (Packed_Array_Aggregate_Handled): Remove declaration and handler for Not_Handled local exception. Check the return value of Get_Component_Val instead. (Get_Component_Val): Return No_Uint instead of raising Not_Handled. Diff: --- gcc/ada/exp_aggr.adb | 29 - 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb index 428115f81ba6..378168fe40b9 100644 --- a/gcc/ada/exp_aggr.adb +++ b/gcc/ada/exp_aggr.adb @@ -8962,9 +8962,6 @@ package body Exp_Aggr is Typ : constant Entity_Id := Etype (N); Ctyp : constant Entity_Id := Component_Type (Typ); - Not_Handled : exception; - -- Exception raised if this aggregate cannot be handled - begin -- Handle one- or two dimensional bit packed array @@ -8997,7 +8994,7 @@ package body Exp_Aggr is -- Given a expression value N of the component type Ctyp, returns a -- value of Csiz (component size) bits representing this value. If -- the value is nonstatic or any other reason exists why the value - -- cannot be returned, then Not_Handled is raised. + -- cannot be returned, then No_Uint is returned. --- -- Get_Component_Val -- @@ -9020,7 +9017,7 @@ package body Exp_Aggr is if not Compile_Time_Known_Value (N) or else Nkind (N) = N_String_Literal then - raise Not_Handled; + return No_Uint; end if; Val := Expr_Rep_Value (N); @@ -9098,6 +9095,9 @@ package body Exp_Aggr is -- justified modular type processing), so we do not have to -- worry about that here. + Val : Uint; + -- Temporary value + Lit : Node_Id; -- Integer literal for resulting constructed value @@ -9146,16 +9146,23 @@ package body Exp_Aggr is if Len = 0 then Aggregate_Val := Uint_0; + else Expr := First (Expressions (N)); - Aggregate_Val := Get_Component_Val (Expr) * Uint_2 ** Shift; + Val := Get_Component_Val (Expr); + if No (Val) then + return False; + end if; + Aggregate_Val := Val * Uint_2 ** Shift; for J in 2 .. Len loop Shift := Shift + Incr; Next (Expr); - Aggregate_Val := - Aggregate_Val + - Get_Component_Val (Expr) * Uint_2 ** Shift; + Val := Get_Component_Val (Expr); + if No (Val) then +return False; + end if; + Aggregate_Val := Aggregate_Val + Val * Uint_2 ** Shift; end loop; end if; @@ -9182,10 +9189,6 @@ package body Exp_Aggr is end; end; end; - - exception - when Not_Handled => - return False; end Packed_Array_Aggregate_Handled;
[gcc r15-6646] ada: Do not create temporaries for initialization statements
https://gcc.gnu.org/g:980415be73a4c762302eeba0813e435116bccc70 commit r15-6646-g980415be73a4c762302eeba0813e435116bccc70 Author: Eric Botcazou Date: Tue Dec 10 10:24:47 2024 +0100 ada: Do not create temporaries for initialization statements Assignment statements marked with the No_Ctrl_Actions or No_Finalize_Actions flag are initialization statements and, therefore, no temporaries are needed to hold the value of the right-hand side for them. gcc/ada/ChangeLog: * gcc-interface/trans.cc (Call_to_gnu): Always use the return slot optimization if the parent node is an initialization statement. (gnat_to_gnu) : Build an INIT_EXPR instead of a MODIFY_EXPR if this is an initialization statement. Diff: --- gcc/ada/gcc-interface/trans.cc | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/gcc/ada/gcc-interface/trans.cc b/gcc/ada/gcc-interface/trans.cc index cda73d509e84..b65a846ee667 100644 --- a/gcc/ada/gcc-interface/trans.cc +++ b/gcc/ada/gcc-interface/trans.cc @@ -5517,10 +5517,17 @@ Call_to_gnu (Node_Id gnat_node, tree *gnu_result_type_p, tree gnu_target, gigi_checking_assert (!Do_Range_Check (gnat_node)); + /* If the parent is an initialization statement, we can use the +return slot optimization. */ + if (Nkind (gnat_parent) == N_Assignment_Statement + && (No_Ctrl_Actions (gnat_parent) + || No_Finalize_Actions (gnat_parent))) + op_code = INIT_EXPR; + /* ??? If the return type has variable size, then force the return slot optimization as we would not be able to create a temporary. That's what has been done historically. */ - if (return_type_with_variable_size_p (gnu_result_type)) + else if (return_type_with_variable_size_p (gnu_result_type)) op_code = INIT_EXPR; /* If this is a call to a pure function returning an array of scalar @@ -7811,6 +7818,12 @@ gnat_to_gnu (Node_Id gnat_node) = build_unary_op (ADDR_EXPR, TREE_TYPE (arg), gnu_lhs); } + /* If the statement is an initialization, build one too. */ + else if (No_Ctrl_Actions (gnat_node) + || No_Finalize_Actions (gnat_node)) + gnu_result + = build_binary_op (INIT_EXPR, NULL_TREE, gnu_lhs, gnu_rhs); + /* Otherwise build a regular assignment. */ else gnu_result
[gcc r15-6648] ada: Adjust pragma obsolescent message
https://gcc.gnu.org/g:edec353862e83d7a46cd85dc51ab549742506470 commit r15-6648-gedec353862e83d7a46cd85dc51ab549742506470 Author: Marc Poulhiès Date: Mon Jan 6 10:59:10 2025 +0100 ada: Adjust pragma obsolescent message Do not mention an explicit version. gcc/ada/ChangeLog: * libgnat/a-calcon.ads: Adjust. * libgnat/a-calend.ads: Adjust. Diff: --- gcc/ada/libgnat/a-calcon.ads | 16 gcc/ada/libgnat/a-calend.ads | 14 ++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/gcc/ada/libgnat/a-calcon.ads b/gcc/ada/libgnat/a-calcon.ads index a7be1f7f3fd7..a94d5c63dff2 100644 --- a/gcc/ada/libgnat/a-calcon.ads +++ b/gcc/ada/libgnat/a-calcon.ads @@ -37,7 +37,8 @@ with Interfaces.C; package Ada.Calendar.Conversions is function To_Ada_Time (Unix_Time : Interfaces.C.long) return Time; - pragma Obsolescent (To_Ada_Time, "Retires in v26"); + pragma Obsolescent + (To_Ada_Time, "This function may be removed in a future version"); -- Old version which will overflow at the 2038 Epochalypse function To_Ada_Time_64 (Unix_Time : Interfaces.C.long_long) return Time; @@ -74,7 +75,9 @@ package Ada.Calendar.Conversions is function To_Duration (tv_sec : Interfaces.C.long; tv_nsec : Interfaces.C.long) return Duration; - pragma Obsolescent (To_Duration, "Retires in v26"); + pragma + Obsolescent + (To_Duration, "This function may be removed in a future version"); -- Old version which will overflow at the 2038 Epochalypse function To_Duration_64 @@ -90,7 +93,10 @@ package Ada.Calendar.Conversions is (D : Duration; tv_sec : out Interfaces.C.long; tv_nsec : out Interfaces.C.long); - pragma Obsolescent (To_Struct_Timespec, "Retires in v26"); + pragma + Obsolescent + (To_Struct_Timespec, +"This function may be removed in a future version"); -- Old version which will overflow at the 2038 Epochalypse procedure To_Struct_Timespec_64 @@ -122,7 +128,9 @@ package Ada.Calendar.Conversions is -- The input date is considered to be in UTC function To_Unix_Time (Ada_Time : Time) return Interfaces.C.long; - pragma Obsolescent (To_Unix_Time, "Retires in v26"); + pragma + Obsolescent + (To_Unix_Time, "This function may be removed in a future version"); -- Old version which will overflow at the 2038 Epochalypse function To_Unix_Time_64 (Ada_Time : Time) return Interfaces.C.long_long; diff --git a/gcc/ada/libgnat/a-calend.ads b/gcc/ada/libgnat/a-calend.ads index 649074558961..032fad8294fb 100644 --- a/gcc/ada/libgnat/a-calend.ads +++ b/gcc/ada/libgnat/a-calend.ads @@ -291,7 +291,8 @@ private package Conversion_Operations is function To_Ada_Time (Unix_Time : Long_Integer) return Time; - pragma Obsolescent (To_Ada_Time, "Retires in v26"); + pragma Obsolescent +(To_Ada_Time, "This function may be removed in a future version"); -- Old Unix to Ada Epoch conversion function To_Ada_Time_64 (Unix_Time : Long_Long_Integer) return Time; @@ -310,7 +311,8 @@ private function To_Duration (tv_sec : Long_Integer; tv_nsec : Long_Integer) return Duration; - pragma Obsolescent (To_Duration, "Retires in v26"); + pragma Obsolescent +(To_Duration, "This function may be removed in a future version"); -- Old Struct timespec to Duration conversion function To_Duration_64 @@ -322,7 +324,10 @@ private (D : Duration; tv_sec : out Long_Integer; tv_nsec : out Long_Integer); - pragma Obsolescent (To_Struct_Timespec, "Retires in v26"); + pragma +Obsolescent + (To_Struct_Timespec, + "This function may be removed in a future version"); -- Old Duration to struct timespec conversion procedure To_Struct_Timespec_64 @@ -342,7 +347,8 @@ private -- Time to struct tm conversion function To_Unix_Time (Ada_Time : Time) return Long_Integer; - pragma Obsolescent (To_Unix_Time, "Retires in v26"); + pragma Obsolescent +(To_Unix_Time, "This function may be removed in a future version"); -- Old Ada to Unix Epoch conversion function To_Unix_Time_64 (Ada_Time : Time) return Long_Long_Integer;
[gcc r15-6647] ada: Drop g-cpp* units not needed by the compiler
https://gcc.gnu.org/g:5f5022a9578cb19de6a0ed6dc3457904c618003f commit r15-6647-g5f5022a9578cb19de6a0ed6dc3457904c618003f Author: Alexandre Oliva Date: Wed Dec 11 10:16:58 2024 -0300 ada: Drop g-cpp* units not needed by the compiler Having moved __gnat_convert_caught_object to g-cstyin.o, we can drop other g-cpp* units that are now needed by programs that actually use their APIs to get more information about C++ exceptions and type_info objects. gcc/ada/ChangeLog: * gcc-interface/Make-lang.in (GNAT_ADA_OBJS, GNATBIND_OBJS): Drop g-cpp, g-cppexc and g-cppstd. Diff: --- gcc/ada/gcc-interface/Make-lang.in | 6 -- 1 file changed, 6 deletions(-) diff --git a/gcc/ada/gcc-interface/Make-lang.in b/gcc/ada/gcc-interface/Make-lang.in index b0c568ad60ba..1fefd6aa31f4 100644 --- a/gcc/ada/gcc-interface/Make-lang.in +++ b/gcc/ada/gcc-interface/Make-lang.in @@ -513,9 +513,6 @@ GNAT_ADA_OBJS+= \ ada/libgnat/a-numeri.o\ ada/libgnat/ada.o \ ada/libgnat/g-byorma.o\ - ada/libgnat/g-cpp.o \ - ada/libgnat/g-cppexc.o\ - ada/libgnat/g-cppstd.o\ ada/libgnat/g-cstyin.o\ ada/libgnat/g-heasor.o\ ada/libgnat/g-htable.o\ @@ -697,9 +694,6 @@ GNATBIND_OBJS += \ ada/libgnat/a-numeri.o \ ada/libgnat/ada.o\ ada/libgnat/g-byorma.o \ - ada/libgnat/g-cpp.o \ - ada/libgnat/g-cppexc.o \ - ada/libgnat/g-cppstd.o \ ada/libgnat/g-cstyin.o \ ada/libgnat/g-hesora.o \ ada/libgnat/g-htable.o \
[gcc r15-6645] ada: Remove unused AST flag Address_Warning_Posted
https://gcc.gnu.org/g:a80bb3525e6e3596e025399691dc7789268fffe2 commit r15-6645-ga80bb3525e6e3596e025399691dc7789268fffe2 Author: Piotr Trojanek Date: Mon Dec 23 10:49:11 2024 +0100 ada: Remove unused AST flag Address_Warning_Posted Flag Address_Warning_Posted was only read and never written, so it can be safely removed. gcc/ada/ChangeLog: * gen_il-fields.ads (Opt_Field_Enum): Remove flag. * gen_il-gen-gen_nodes.adb (N_Attribute_Definition_Clause): Remove field. * sem_ch13.adb (Validate_Address_Clauses): Remove read of the flag. * sinfo.ads (Address_Warning_Posted): Remove flag description. Diff: --- gcc/ada/gen_il-fields.ads| 1 - gcc/ada/gen_il-gen-gen_nodes.adb | 1 - gcc/ada/sem_ch13.adb | 6 ++ gcc/ada/sinfo.ads| 6 -- 4 files changed, 2 insertions(+), 12 deletions(-) diff --git a/gcc/ada/gen_il-fields.ads b/gcc/ada/gen_il-fields.ads index fe2da80c5488..789fb76875d6 100644 --- a/gcc/ada/gen_il-fields.ads +++ b/gcc/ada/gen_il-fields.ads @@ -65,7 +65,6 @@ package Gen_IL.Fields is Activation_Chain_Entity, Acts_As_Spec, Actual_Designated_Subtype, - Address_Warning_Posted, Aggregate_Bounds_Or_Ancestor_Type, Aliased_Present, All_Others, diff --git a/gcc/ada/gen_il-gen-gen_nodes.adb b/gcc/ada/gen_il-gen-gen_nodes.adb index c3a97e6e16e6..b361aeeca1d1 100644 --- a/gcc/ada/gen_il-gen-gen_nodes.adb +++ b/gcc/ada/gen_il-gen-gen_nodes.adb @@ -98,7 +98,6 @@ begin -- Gen_IL.Gen.Gen_Nodes (Sy (Name, Node_Id, Default_Empty), Sy (Chars, Name_Id, Default_No_Name), Sy (Expression, Node_Id, Default_Empty), -Sm (Address_Warning_Posted, Flag), Sm (Check_Address_Alignment, Flag), Sm (Entity_Or_Associated_Node, Node_Id), -- just Entity Sm (From_Aspect_Specification, Flag), diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb index 2beb6b95daf3..00489722921a 100644 --- a/gcc/ada/sem_ch13.adb +++ b/gcc/ada/sem_ch13.adb @@ -18055,11 +18055,9 @@ package body Sem_Ch13 is X_Offs : Uint; begin --- Skip processing of this entry if warning already posted, or if --- alignments are not set. +-- Skip processing of this entry if alignments are not set -if not Address_Warning_Posted (ACCR.N) - and then Known_Alignment (ACCR.X) +if Known_Alignment (ACCR.X) and then Known_Alignment (ACCR.Y) then Expr := Original_Node (Expression (ACCR.N)); diff --git a/gcc/ada/sinfo.ads b/gcc/ada/sinfo.ads index e9b2f0f3c5c8..94bbb2f974e4 100644 --- a/gcc/ada/sinfo.ads +++ b/gcc/ada/sinfo.ads @@ -788,11 +788,6 @@ package Sinfo is --created for the expansion of an allocator with a subtype_indication --and the designated subtype is an unconstrained composite type. - -- Address_Warning_Posted - --Present in N_Attribute_Definition nodes. Set to indicate that we have - --posted a warning for the address clause regarding size or alignment - --issues. Used to inhibit multiple redundant messages. - -- Aggregate_Bounds --Present in array N_Aggregate nodes. If the bounds of the aggregate are --known at compile time, this field points to an N_Range node with those @@ -7549,7 +7544,6 @@ package Sinfo is -- Check_Address_Alignment -- From_Aspect_Specification -- Is_Delayed_Aspect - -- Address_Warning_Posted -- Note: if From_Aspect_Specification is set, then Sloc points to the -- aspect name, and Entity is resolved already to reference the entity
[gcc r15-6622] ada: Error on instantiation with defaulted formal type referencing other formal type
https://gcc.gnu.org/g:f409c452883011c93a91f8057d301920dbc9bbb2 commit r15-6622-gf409c452883011c93a91f8057d301920dbc9bbb2 Author: Gary Dismukes Date: Fri Dec 13 23:36:05 2024 + ada: Error on instantiation with defaulted formal type referencing other formal type The compiler wasn't accounting for default subtypes on generic formal types that reference other formal types of the same generic, leading to errors about invalid subtypes. Several other problems that could lead to blowups or incorrect errors were noticed through testing related cases and fixed along the way. gcc/ada/ChangeLog: * sem_ch12.adb (Analyze_One_Association): In the case of a formal type that has a Default_Subtype_Mark that does not have its Entity field set, this means the default refers to another formal type of the same generic formal part, so locate the matching subtype in the Result_Renamings and set Match's Entity to that subtype prior to the call to Instantiate_Type. (Validate_Formal_TypeDefault.Reference_Formal): Add test of Entity being Present, to prevent blowups on End_Label ids (which don't have Entity set). (Validate_Formal_Type_Default.Validate_Derived_Type_Default): Apply Base_Type to Formal. (Validate_Formal_Type_Default): Guard interface-related semantic checks with a test of Is_Tagged_Type. Diff: --- gcc/ada/sem_ch12.adb | 78 ++-- 1 file changed, 64 insertions(+), 14 deletions(-) diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb index 625d291fc28d..41ace8cc250f 100644 --- a/gcc/ada/sem_ch12.adb +++ b/gcc/ada/sem_ch12.adb @@ -2512,6 +2512,52 @@ package body Sem_Ch12 is if Present (Default_Subtype_Mark (Assoc.Un_Formal)) then Match := New_Copy (Default_Subtype_Mark (Assoc.Un_Formal)); + + -- If the Entity of the default subtype denoted by the + -- unanalyzed formal has not been set, then it must refer + -- to another formal type of the enclosing generic. So we + -- locate the subtype "renaming" in Result_Renamings that + -- corresponds to the formal type (by comparing the simple + -- names), and set Match's Entity to the entity denoted by + -- that subtype's subtype_indication (which will denote the + -- actual subtype corresponding to the other formal type). + -- This must be done before calling Instantiate_Type, since + -- that function relies heavily on the entity being set. + -- (Note also that there's similar code inside procedure + -- Validate_Derived_Type_Instance that deals with retrieving + -- the ancestor type of formal derived types.) + + if No (Entity (Match)) then + declare +pragma Assert (Is_Non_Empty_List (Result_Renamings)); + +Decl : Node_Id := First (Result_Renamings); + + begin +-- Locate subtype referenced by the default subtype +-- in the list of renamings. + +while Present (Decl) loop + if Nkind (Decl) = N_Subtype_Declaration + and then + Chars (Match) = + Chars (Defining_Identifier (Decl)) + then + Set_Entity +(Match, + Entity (Subtype_Indication (Decl))); + + exit; + + else + Next (Decl); + end if; +end loop; + +pragma Assert (Present (Entity (Match))); + end; + end if; + Append_List (Instantiate_Type (Assoc.Un_Formal, Match, Assoc.An_Formal, @@ -18161,6 +18207,7 @@ package body Sem_Ch12 is function Reference_Formal (N : Node_Id) return Traverse_Result is begin if Is_Entity_Name (N) + and then Present (Entity (N)) and then Scope (Entity (N)) = Current_Scope then return Abandon; @@ -18356,7 +18403,7 @@ package body Sem_Ch12 is procedure Validate_Derived_Type_Default is begin - if not Is_Ancestor (Etype (Formal), Def_Sub) then + if not Is_Ancestor (Etype (Base_Type (Formal)), Def_Sub) then Error_Msg_NE ("default must be a descendent of&", Default, Etype (Formal)); end if; @@ -18529,20 +18576,23 @
[gcc r15-6621] ada: Use the syntax of Ada 2012 if-expression in -gnatR3 output
https://gcc.gnu.org/g:90f504df6f22c1a28831875b79783e0718330528 commit r15-6621-g90f504df6f22c1a28831875b79783e0718330528 Author: Eric Botcazou Date: Sun Dec 15 16:37:57 2024 +0100 ada: Use the syntax of Ada 2012 if-expression in -gnatR3 output This uses the syntax of Ada 2012 if-expression in the output produced by the -gnatR3 switch for dynamic expressions. gcc/ada/ChangeLog: * repinfo.adb (List_GCC_Expression.Print_Expr) : Do not output the final "end". Diff: --- gcc/ada/repinfo.adb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/ada/repinfo.adb b/gcc/ada/repinfo.adb index c08a232a3ab0..969aa63c6980 100644 --- a/gcc/ada/repinfo.adb +++ b/gcc/ada/repinfo.adb @@ -683,7 +683,7 @@ package body Repinfo is Print_Expr (Node.Op2); Write_Str (" else "); Print_Expr (Node.Op3); -Write_Str (" end)"); +Write_Str (")"); end if; when Plus_Expr =>
[gcc r15-6620] ada: Preserve Warning_Doc_Switch in gnatprove invocation
https://gcc.gnu.org/g:c5ce2451324b643fefdaf2f463958b6d82010541 commit r15-6620-gc5ce2451324b643fefdaf2f463958b6d82010541 Author: Johannes Kanig Date: Tue Dec 3 19:53:28 2024 +0900 ada: Preserve Warning_Doc_Switch in gnatprove invocation When invoked by gnat2why, the Warning_Doc_Switch was unintenionally reset. gcc/ada/ChangeLog: * gnat1drv.adb: (SPARK_Library_Warning): preserve Warning_Doc_Switch Diff: --- gcc/ada/gnat1drv.adb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/ada/gnat1drv.adb b/gcc/ada/gnat1drv.adb index 120de4afb71f..39aa89c43a31 100644 --- a/gcc/ada/gnat1drv.adb +++ b/gcc/ada/gnat1drv.adb @@ -523,6 +523,8 @@ procedure Gnat1drv is Restore_Warnings ((Warnings_Package.Elab_Warnings => True, Warnings_Package.Warn_On_Suspicious_Contract => True, + Warnings_Package.Warning_Doc_Switch => + Warnsw.Warning_Doc_Switch, others => False)); -- Suppress the generation of name tables for enumerations, which are
[gcc r15-6619] ada: Restrict previous change made to expansion of allocators
https://gcc.gnu.org/g:bd1df4e8f6cef63934640480b67b92494a735737 commit r15-6619-gbd1df4e8f6cef63934640480b67b92494a735737 Author: Eric Botcazou Date: Mon Dec 16 08:59:26 2024 +0100 ada: Restrict previous change made to expansion of allocators There is no need to build a cleanup if exceptions cannot be propagated. gcc/ada/ChangeLog: * exp_ch4.adb (Expand_Allocator_Expression): Do not build a cleanup if restriction No_Exception_Propagation is active. * exp_ch6.adb (Make_Build_In_Place_Call_In_Allocator): Likewise. Diff: --- gcc/ada/exp_ch4.adb | 1 + gcc/ada/exp_ch6.adb | 1 + 2 files changed, 2 insertions(+) diff --git a/gcc/ada/exp_ch4.adb b/gcc/ada/exp_ch4.adb index 75d79019f807..6e8c5c83da5f 100644 --- a/gcc/ada/exp_ch4.adb +++ b/gcc/ada/exp_ch4.adb @@ -636,6 +636,7 @@ package body Exp_Ch4 is and then Nkind (Exp) = N_Function_Call and then not (Is_Entity_Name (Name (Exp)) and then No_Raise (Entity (Name (Exp + and then not Restriction_Active (No_Exception_Propagation) and then RTE_Available (RE_Free) and then not Debug_Flag_QQ); -- Return True if a cleanup needs to be built to deallocate the memory diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb index a339a223f09f..37184fd28ebe 100644 --- a/gcc/ada/exp_ch6.adb +++ b/gcc/ada/exp_ch6.adb @@ -8505,6 +8505,7 @@ package body Exp_Ch6 is and then not For_Special_Return_Object (Allocator) and then not (Is_Entity_Name (Name (Func_Call)) and then No_Raise (Entity (Name (Func_Call + and then not Restriction_Active (No_Exception_Propagation) and then RTE_Available (RE_Free) and then not Debug_Flag_QQ then
[gcc r15-6633] ada: Handle attributes related to Ada 2012 iterators as internal
https://gcc.gnu.org/g:a1b92ccf90b7376dbe36716d75fb270434797ddc commit r15-6633-ga1b92ccf90b7376dbe36716d75fb270434797ddc Author: Piotr Trojanek Date: Tue Mar 26 16:23:41 2024 +0100 ada: Handle attributes related to Ada 2012 iterators as internal Use existing machinery for internal attributes to handle attributes related to Ada 2012 iterators. All these attributes exist exclusively as a mean to delay processing. Code cleanup. The only change in behavior is the wording of error emitted when one of the internal attributes appears in source code: from "illegal attribute" (which used to be emitted in the analysis) to "unrecognized attribute (which is emitted by the parser). gcc/ada/ChangeLog: * exp_attr.adb (Expand_N_Attribute_Reference): Remove explicit handling of attributes related to Ada 2012 iterators. * sem_attr.adb (Analyze_Attribute, Eval_Attribute): Likewise; move attribute Reduce according to alphabetic order. * snames.adb-tmpl (Get_Attribute_Id): Add support for new internal attributes. * snames.ads-tmpl: Recognize names of new internal attributes. (Attribute_Id): Recognize new internal attributes. (Internal_Attribute_Id): Likewise. (Is_Internal_Attribute_Name): Avoid duplication in comment. Diff: --- gcc/ada/exp_attr.adb| 12 gcc/ada/sem_attr.adb| 32 +++- gcc/ada/snames.adb-tmpl | 33 - gcc/ada/snames.ads-tmpl | 32 +++- 4 files changed, 46 insertions(+), 63 deletions(-) diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb index 904293bbd1d6..911b9dcf8070 100644 --- a/gcc/ada/exp_attr.adb +++ b/gcc/ada/exp_attr.adb @@ -2266,18 +2266,6 @@ package body Exp_Attr is case Id is - -- Attributes related to Ada 2012 iterators. They are only allowed in - -- attribute definition clauses and should never be expanded. - - when Attribute_Constant_Indexing - | Attribute_Default_Iterator - | Attribute_Implicit_Dereference - | Attribute_Iterable - | Attribute_Iterator_Element - | Attribute_Variable_Indexing - => - raise Program_Error; - -- Internal attributes used to deal with Ada 2012 delayed aspects. These -- were already rejected by the parser. Thus they shouldn't appear here. diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb index 7295784704fd..53b96501d788 100644 --- a/gcc/ada/sem_attr.adb +++ b/gcc/ada/sem_attr.adb @@ -3423,18 +3423,6 @@ package body Sem_Attr is case Attr_Id is - -- Attributes related to Ada 2012 iterators. Attribute specifications - -- exist for these, but they cannot be queried. - - when Attribute_Constant_Indexing - | Attribute_Default_Iterator - | Attribute_Implicit_Dereference - | Attribute_Iterator_Element - | Attribute_Iterable - | Attribute_Variable_Indexing - => - Error_Msg_N ("illegal attribute", N); - -- Internal attributes used to deal with Ada 2012 delayed aspects. These -- were already rejected by the parser. Thus they shouldn't appear here. @@ -9015,19 +9003,6 @@ package body Sem_Attr is case Id is - -- Attributes related to Ada 2012 iterators; nothing to evaluate for - -- these. - - when Attribute_Constant_Indexing - | Attribute_Default_Iterator - | Attribute_Implicit_Dereference - | Attribute_Iterator_Element - | Attribute_Iterable - | Attribute_Reduce - | Attribute_Variable_Indexing - => - null; - -- Internal attributes used to deal with Ada 2012 delayed aspects. -- These were already rejected by the parser. Thus they shouldn't -- appear here. @@ -10208,6 +10183,13 @@ package body Sem_Attr is end case; end Range_Length; + + -- Reduce -- + + + when Attribute_Reduce => + null; + - -- Ref -- - diff --git a/gcc/ada/snames.adb-tmpl b/gcc/ada/snames.adb-tmpl index d49fdf4d74ac..62ca4de48661 100644 --- a/gcc/ada/snames.adb-tmpl +++ b/gcc/ada/snames.adb-tmpl @@ -125,15 +125,30 @@ package body Snames is function Get_Attribute_Id (N : Name_Id) return Attribute_Id is begin - if N = Name_CPU then - return Attribute_CPU; - elsif N = Name_Dispatching_Domain then - return Attribute_Dispatching_Domain; - elsif N = Name_Interrupt_Priority then - return Attribute_Interrupt_Priority; - else - return Attribute_Id'Val (N - First_Attribute_Name); - end if; + case N is + when Name_Constant_Indexing => +return Attribute_Constant_Indexing; + when Name_CPU => +return Attribute_C
[gcc r15-6635] ada: Avoid conversion from String to Name_Id at runtime
https://gcc.gnu.org/g:8f4194d6c3ccb5ab4bd4b420c37cdeb31b712c2a commit r15-6635-g8f4194d6c3ccb5ab4bd4b420c37cdeb31b712c2a Author: Piotr Trojanek Date: Tue Mar 26 15:05:47 2024 +0100 ada: Avoid conversion from String to Name_Id at runtime Code cleanup. gcc/ada/ChangeLog: * sem_prag.adb (Analyze_Attribute): Replace runtime conversion with existing constant. Diff: --- gcc/ada/sem_prag.adb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb index 64b5b01869da..defa0d787949 100644 --- a/gcc/ada/sem_prag.adb +++ b/gcc/ada/sem_prag.adb @@ -14791,7 +14791,7 @@ package body Sem_Prag is GNAT_Pragma; Check_Valid_Configuration_Pragma; Check_Arg_Count (1); -Check_Optional_Identifier (Arg1, "max_size"); +Check_Optional_Identifier (Arg1, Name_Max_Size); Arg := Get_Pragma_Arg (Arg1); Check_Arg_Is_OK_Static_Expression (Arg, Any_Integer);
[gcc r15-6638] ada: Fix abort deferral for finally parts
https://gcc.gnu.org/g:ec62ba1eb087a73e748a808067b96af5d512f6ae commit r15-6638-gec62ba1eb087a73e748a808067b96af5d512f6ae Author: Ronan Desplanques Date: Thu Dec 19 11:10:12 2024 +0100 ada: Fix abort deferral for finally parts This patch fixes two problems with how abort was deferred in finally parts. First, calls to runtime subprograms are now omitted when aborting is disallowed by active restrictions. Second, Abort_Undefer is now correctly called when the finally part propagates an exception. gcc/ada/ChangeLog: * exp_ch11.adb (Expand_N_Handled_Sequence_Of_Statements): Fix abort deferral. Diff: --- gcc/ada/exp_ch11.adb | 27 +++ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/gcc/ada/exp_ch11.adb b/gcc/ada/exp_ch11.adb index 66f386710089..189e0911fc4d 100644 --- a/gcc/ada/exp_ch11.adb +++ b/gcc/ada/exp_ch11.adb @@ -1302,14 +1302,25 @@ package body Exp_Ch11 is Expand_Cleanup_Actions (Parent (N)); end if; - if Present (Finally_Statements (N)) then - Prepend_To - (Finally_Statements (N), -Build_Runtime_Call (Sloc (N), RE_Abort_Defer)); - - Append_To - (Finally_Statements (N), -Build_Runtime_Call (Sloc (N), RE_Abort_Undefer)); + if Present (Finally_Statements (N)) and then Abort_Allowed then + if Exceptions_OK then +Set_Finally_Statements + (N, + New_List + (Build_Runtime_Call (Sloc (N), RE_Abort_Defer), + Build_Abort_Undefer_Block +(Sloc (N), + Stmts => Finally_Statements (N), + Context => N))); + else +Prepend_To + (Finally_Statements (N), + Build_Runtime_Call (Sloc (N), RE_Abort_Defer)); + +Append_To + (Finally_Statements (N), + Build_Runtime_Call (Sloc (N), RE_Abort_Undefer)); + end if; Analyze_List (Finally_Statements (N)); end if;
[gcc r15-6631] ada: Fix internal error on container aggregate for bounded vectors
https://gcc.gnu.org/g:ce13a3a47865387abac8c9ecf0e2bc4d63bada1b commit r15-6631-gce13a3a47865387abac8c9ecf0e2bc4d63bada1b Author: Eric Botcazou Date: Wed Dec 18 10:16:15 2024 +0100 ada: Fix internal error on container aggregate for bounded vectors The problem is that we analyze references to an object before the actual subtype of the object is established, thus creating a type mismatch that is flagged by the code generator. gcc/ada/ChangeLog: * exp_ch7.ads (Store_After_Actions_In_Scope_Without_Analysis): New procedure declaration. * exp_ch7.adb (Store_New_Actions_In_Scope): New procedure. (Store_Actions_In_Scope): Call Store_New_Actions_In_Scope when the target list is empty. (Store_After_Actions_In_Scope_Without_Analysis): New procedure body. * exp_aggr.adb (Expand_Container_Aggregate): For a declaration that is wrapped in a transient scope, also defer the analysis of the new code until after the declaration is analyzed. Diff: --- gcc/ada/exp_aggr.adb | 9 + gcc/ada/exp_ch7.adb | 56 +++- gcc/ada/exp_ch7.ads | 7 +-- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb index a51e02bc3082..428115f81ba6 100644 --- a/gcc/ada/exp_aggr.adb +++ b/gcc/ada/exp_aggr.adb @@ -7345,12 +7345,13 @@ package body Exp_Aggr is -- If a transient scope has been created around the declaration, we -- need to attach the code to it so that the finalization actions of - -- the declaration will be inserted after it. Otherwise, we directly - -- insert it after the declaration and it will be analyzed only once - -- the declaration is processed. + -- the declaration will be inserted after it; otherwise, we directly + -- insert it after the declaration. In both cases, the code will be + -- analyzed after the declaration is processed, i.e. once the actual + -- subtype of the object is established. if Scope_Is_Transient and then Par = Node_To_Be_Wrapped then -Insert_Actions_After (Par, Aggr_Code); +Store_After_Actions_In_Scope_Without_Analysis (Aggr_Code); else Insert_List_After (Par, Aggr_Code); end if; diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb index 171ad4ef3952..a841d3af60fd 100644 --- a/gcc/ada/exp_ch7.adb +++ b/gcc/ada/exp_ch7.adb @@ -514,7 +514,13 @@ package body Exp_Ch7 is -- cleanup actions are performed at the end of the block. procedure Store_Actions_In_Scope (AK : Scope_Action_Kind; L : List_Id); - -- Shared processing for Store_xxx_Actions_In_Scope + -- Shared processing for the Store_xxx_Actions_In_Scope routines: attach + -- the list L of actions to the list of actions stored in the top of the + -- scope stack specified by AK. + + procedure Store_New_Actions_In_Scope (AK : Scope_Action_Kind; L : List_Id); + -- Same as above for the case where the list of actions stored in the top + -- of the scope stack specified by AK is empty. --- -- Unnesting procedures for CCG and LLVM -- @@ -8907,14 +8913,7 @@ package body Exp_Ch7 is begin if Is_Empty_List (Actions) then - Actions := L; - - if Is_List_Member (SE.Node_To_Be_Wrapped) then -Set_Parent (L, Parent (SE.Node_To_Be_Wrapped)); - else -Set_Parent (L, SE.Node_To_Be_Wrapped); - end if; - + Store_New_Actions_In_Scope (AK, L); Analyze_List (L); elsif AK = Before then @@ -8934,6 +8933,22 @@ package body Exp_Ch7 is Store_Actions_In_Scope (After, L); end Store_After_Actions_In_Scope; + --- + -- Store_After_Actions_In_Scope_Without_Analysis -- + --- + + procedure Store_After_Actions_In_Scope_Without_Analysis (L : List_Id) is + SE : Scope_Stack_Entry renames Scope_Stack.Table (Scope_Stack.Last); + Actions : List_Id renames SE.Actions_To_Be_Wrapped (After); + + begin + if Is_Empty_List (Actions) then + Store_New_Actions_In_Scope (After, L); + else + Insert_List_Before (First (Actions), L); + end if; + end Store_After_Actions_In_Scope_Without_Analysis; + --- -- Store_Before_Actions_In_Scope -- --- @@ -8952,6 +8967,29 @@ package body Exp_Ch7 is Store_Actions_In_Scope (Cleanup, L); end Store_Cleanup_Actions_In_Scope; + + -- Store_New_Actions_In_Scope -- + + + procedure Store_New_Actions_In_Scope (AK : Scope_Action_Kind; L : List_Id) + is + SE : Scope_S
[gcc r15-6630] ada: Add guard to System.Val_Real.Large_Powfive against pathological input
https://gcc.gnu.org/g:c7799a8108e2e53e80d41281e5625b78236d039a commit r15-6630-gc7799a8108e2e53e80d41281e5625b78236d039a Author: Eric Botcazou Date: Tue Dec 17 20:00:38 2024 +0100 ada: Add guard to System.Val_Real.Large_Powfive against pathological input There is no need to keep multiplying the result once it saturates to +Inf. gcc/ada/ChangeLog: * libgnat/s-powflt.ads (Maxpow_Exact): Minor comment fix. * libgnat/s-powlfl.ads (Maxpow_Exact): Likewise. * libgnat/s-powllf.ads (Maxpow_Exact): Likewise. * libgnat/s-valrea.adb (Large_Powfive) [1 parameter]: Exit the loop as soon as the result saturates to +Inf. (Large_Powfive) [2 parameters]: Likewise. Diff: --- gcc/ada/libgnat/s-powflt.ads | 2 +- gcc/ada/libgnat/s-powlfl.ads | 2 +- gcc/ada/libgnat/s-powllf.ads | 4 ++-- gcc/ada/libgnat/s-valrea.adb | 16 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/gcc/ada/libgnat/s-powflt.ads b/gcc/ada/libgnat/s-powflt.ads index eadea7f914db..ae904f056173 100644 --- a/gcc/ada/libgnat/s-powflt.ads +++ b/gcc/ada/libgnat/s-powflt.ads @@ -36,7 +36,7 @@ package System.Powten_Flt is Maxpow_Exact : constant := 10; -- Largest power of five exactly representable with Float. It is equal to - -- floor (M * log 2 / log 5), when M is the size of the mantissa (24). + -- floor (M * log 2 / log 5), where M is the size of the mantissa (24). -- It also works for any number of the form 5*(2**N) and in particular 10. Maxpow : constant := Maxpow_Exact * 2; diff --git a/gcc/ada/libgnat/s-powlfl.ads b/gcc/ada/libgnat/s-powlfl.ads index 13630fcad40a..2bf72d9c17d3 100644 --- a/gcc/ada/libgnat/s-powlfl.ads +++ b/gcc/ada/libgnat/s-powlfl.ads @@ -36,7 +36,7 @@ package System.Powten_LFlt is Maxpow_Exact : constant := 22; -- Largest power of five exactly representable with Long_Float. It is equal - -- to floor (M * log 2 / log 5), when M is the size of the mantissa (53). + -- to floor (M * log 2 / log 5), where M is the size of the mantissa (53). -- It also works for any number of the form 5*(2**N) and in particular 10. Maxpow : constant := Maxpow_Exact * 2; diff --git a/gcc/ada/libgnat/s-powllf.ads b/gcc/ada/libgnat/s-powllf.ads index b470ae044de8..b184b31ea9c0 100644 --- a/gcc/ada/libgnat/s-powllf.ads +++ b/gcc/ada/libgnat/s-powllf.ads @@ -37,8 +37,8 @@ package System.Powten_LLF is Maxpow_Exact : constant := (if Long_Long_Float'Machine_Mantissa = 64 then 27 else 22); -- Largest power of five exactly representable with Long_Long_Float. It is - -- equal to floor (M * log 2 / log 5), when M is the size of the mantissa - -- assumed to be either 64 for IEEE Extended or 53 for IEEE Double. + -- equal to floor (M * log 2 / log 5), where M is the size of the mantissa + -- (assumed to be either 64 for IEEE Extended or 53 for IEEE Double). -- It also works for any number of the form 5*(2**N) and in particular 10. Maxpow : constant := Maxpow_Exact * 2; diff --git a/gcc/ada/libgnat/s-valrea.adb b/gcc/ada/libgnat/s-valrea.adb index f554280c0ead..3b0f0a99a638 100644 --- a/gcc/ada/libgnat/s-valrea.adb +++ b/gcc/ada/libgnat/s-valrea.adb @@ -336,6 +336,7 @@ package body System.Val_Real is pragma Import (Ada, Powfive_300); for Powfive_300'Address use Powfive_300_Address; + H : Double_T; R : Double_T; E : Natural; @@ -359,8 +360,15 @@ package body System.Val_Real is E := Exp - Maxpow; end if; + -- Accumulate 5**Maxpow into R until E <= Maxpow or R saturates to +Inf + while E > Maxpow loop + H := R; R := R * Powfive (Maxpow); + if R = H then +E := Maxpow; +exit; + end if; E := E - Maxpow; end loop; @@ -381,6 +389,7 @@ package body System.Val_Real is pragma Import (Ada, Powfive); for Powfive'Address use Powfive_Address; + H : Double_T; R : Double_T; E : Natural; @@ -407,8 +416,15 @@ package body System.Val_Real is S := 0; end if; + -- Accumulate 5**Maxpow into R until E <= Maxpow or R saturates to +Inf + while E > Maxpow loop + H := R; R := R * Powfive (Maxpow); + if R = H then +E := Maxpow; +exit; + end if; E := E - Maxpow; end loop;
[gcc r15-6639] ada: Remove dead code in detection of null record definitions
https://gcc.gnu.org/g:672175768244d6962e11df5dee0cdf0fb28051ed commit r15-6639-g672175768244d6962e11df5dee0cdf0fb28051ed Author: Piotr Trojanek Date: Thu Dec 19 15:32:56 2024 +0100 ada: Remove dead code in detection of null record definitions Code cleanup; behavior is unaffected. gcc/ada/ChangeLog: * sem_util.adb (Is_Null_Record_Definition): Remove check for Component_List being present after using it; replace check for component item being a component declaration with an assertion; fix style in comment. Diff: --- gcc/ada/sem_util.adb | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb index a11afa8be2d3..55e9979a2155 100644 --- a/gcc/ada/sem_util.adb +++ b/gcc/ada/sem_util.adb @@ -18984,22 +18984,19 @@ package body Sem_Util is function Is_Null_Record_Definition (Record_Def : Node_Id) return Boolean is Item : Node_Id; begin - -- Testing Null_Present is just an optimization, not required. + -- Testing Null_Present is just an optimization, not required if Null_Present (Record_Def) then return True; elsif Present (Variant_Part (Component_List (Record_Def))) then return False; - elsif No (Component_List (Record_Def)) then - return True; end if; Item := First_Non_Pragma (Component_Items (Component_List (Record_Def))); while Present (Item) loop - if Nkind (Item) = N_Component_Declaration - and then Is_Internal_Name (Chars (Defining_Identifier (Item))) - then + pragma Assert (Nkind (Item) = N_Component_Declaration); + if Is_Internal_Name (Chars (Defining_Identifier (Item))) then null; else return False;
[gcc r15-6640] ada: Fix violations of GNAT-specific GNATcheck rules
https://gcc.gnu.org/g:e8aadcb3518359c13fe5bd8fb7804d9cd60b4cad commit r15-6640-ge8aadcb3518359c13fe5bd8fb7804d9cd60b4cad Author: Piotr Trojanek Date: Fri Dec 20 00:07:23 2024 +0100 ada: Fix violations of GNAT-specific GNATcheck rules Code cleanup; semantics is unaffected. gcc/ada/ChangeLog: * diagnostics-pretty_emitter.adb (Get_Last_Line_Char): Fix whitespace. * sem_aggr.adb (Resolve_Array_Aggregate): Fix style. Diff: --- gcc/ada/diagnostics-pretty_emitter.adb | 1 - gcc/ada/sem_aggr.adb | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/gcc/ada/diagnostics-pretty_emitter.adb b/gcc/ada/diagnostics-pretty_emitter.adb index e376ae128038..c624f4001696 100644 --- a/gcc/ada/diagnostics-pretty_emitter.adb +++ b/gcc/ada/diagnostics-pretty_emitter.adb @@ -327,7 +327,6 @@ package body Diagnostics.Pretty_Emitter is is Cur_Loc : Source_Ptr := Get_Line_End (Buf, Loc); begin - while Cur_Loc > Buf'First and then Buf (Cur_Loc) in ASCII.LF | ASCII.CR loop diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb index 5bef9e224846..562240ca4ef6 100644 --- a/gcc/ada/sem_aggr.adb +++ b/gcc/ada/sem_aggr.adb @@ -2968,8 +2968,8 @@ package body Sem_Aggr is Scope_Parent : Node_Id; begin if Nkind (Exp) /= N_Identifier -or else not Present (Entity (Exp)) -or else not Present (Scope (Entity (Exp))) +or else No (Entity (Exp)) +or else No (Scope (Entity (Exp))) or else Ekind (Scope (Entity (Exp))) /= E_Loop then return OK;
[gcc r15-6636] ada: Remove flag Is_Inherited_Pragma which is only set and never used
https://gcc.gnu.org/g:4b64d6d6439b002d17055c1c1112b248664face1 commit r15-6636-g4b64d6d6439b002d17055c1c1112b248664face1 Author: Piotr Trojanek Date: Tue Jan 30 01:10:17 2024 +0100 ada: Remove flag Is_Inherited_Pragma which is only set and never used Code cleanup; behavior is unaffected. Flag Is_Inherited_Pragma is only set in GNAT, but is not actually used, neither by the compiler nor by any backend. gcc/ada/ChangeLog: * contracts.adb (Inherit_Pragma): Don't set flag Is_Inherited_Pragma. * gen_il-fields.ads (Opt_Field_Enum): Remove field identifier. * gen_il-gen-gen_nodes.adb (N_Pragma): Remove field from node. * sinfo.ads (Is_Inherited_Pragma): Remove field description. (N_Pragma): Remove field reference. Diff: --- gcc/ada/contracts.adb| 8 ++-- gcc/ada/gen_il-fields.ads| 1 - gcc/ada/gen_il-gen-gen_nodes.adb | 1 - gcc/ada/sinfo.ads| 5 - 4 files changed, 2 insertions(+), 13 deletions(-) diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb index 365c3e61130d..4aed4dff27b8 100644 --- a/gcc/ada/contracts.adb +++ b/gcc/ada/contracts.adb @@ -3593,8 +3593,7 @@ package body Contracts is procedure Inherit_Pragma (Prag_Id : Pragma_Id) is - Prag : constant Node_Id := Get_Pragma (From_Subp, Prag_Id); - New_Prag : Node_Id; + Prag : constant Node_Id := Get_Pragma (From_Subp, Prag_Id); begin -- A pragma cannot be part of more than one First_Pragma/Next_Pragma @@ -3602,10 +3601,7 @@ package body Contracts is -- flagged as inherited for distinction purposes. if Present (Prag) then -New_Prag := New_Copy_Tree (Prag); -Set_Is_Inherited_Pragma (New_Prag); - -Add_Contract_Item (New_Prag, Subp); +Add_Contract_Item (New_Copy_Tree (Prag), Subp); end if; end Inherit_Pragma; diff --git a/gcc/ada/gen_il-fields.ads b/gcc/ada/gen_il-fields.ads index 70ece337e639..c778f00856d4 100644 --- a/gcc/ada/gen_il-fields.ads +++ b/gcc/ada/gen_il-fields.ads @@ -266,7 +266,6 @@ package Gen_IL.Fields is Is_Ignored_Ghost_Pragma, Is_Implicit_With, Is_In_Discriminant_Check, - Is_Inherited_Pragma, Is_Initialization_Block, Is_Interpolated_String_Literal, Is_Known_Guaranteed_ABE, diff --git a/gcc/ada/gen_il-gen-gen_nodes.adb b/gcc/ada/gen_il-gen-gen_nodes.adb index af5049bf33e2..c3a97e6e16e6 100644 --- a/gcc/ada/gen_il-gen-gen_nodes.adb +++ b/gcc/ada/gen_il-gen-gen_nodes.adb @@ -1439,7 +1439,6 @@ begin -- Gen_IL.Gen.Gen_Nodes Sm (Is_Generic_Contract_Pragma, Flag), Sm (Is_Ignored, Flag), Sm (Is_Ignored_Ghost_Pragma, Flag), -Sm (Is_Inherited_Pragma, Flag), Sm (Next_Pragma, Node_Id), Sm (Next_Rep_Item, Node_Id), Sm (Uneval_Old_Accept, Flag), diff --git a/gcc/ada/sinfo.ads b/gcc/ada/sinfo.ads index 6abda7474bb6..e9b2f0f3c5c8 100644 --- a/gcc/ada/sinfo.ads +++ b/gcc/ada/sinfo.ads @@ -1763,10 +1763,6 @@ package Sinfo is --discriminant check has a correct value cannot be performed in this --case (or the discriminant check may be optimized away). - -- Is_Inherited_Pragma - --This flag is set in an N_Pragma node that appears in a N_Contract node - --to indicate that the pragma has been inherited from a parent context. - -- Is_Initialization_Block --Defined in block nodes. Set when the block statement was created by --the finalization machinery to wrap initialization statements. This @@ -2694,7 +2690,6 @@ package Sinfo is -- Next_Rep_Item -- Is_Generic_Contract_Pragma -- Is_Checked_Ghost_Pragma - -- Is_Inherited_Pragma -- Is_Analyzed_Pragma -- Class_Present set if from Aspect with 'Class -- Uneval_Old_Accept
[gcc r15-6637] ada: Improved checking of uses of package renamings
https://gcc.gnu.org/g:710789117374d335184590950280ad0c5bb70fbe commit r15-6637-g710789117374d335184590950280ad0c5bb70fbe Author: Steve Baird Date: Tue Dec 17 13:27:04 2024 -0800 ada: Improved checking of uses of package renamings In some cases, the RM 8.5.1(3.1) legality rule about uses of renamings of limited views of packages was implemented incorrectly, resulting in rejecting legal uses. gcc/ada/ChangeLog: * gen_il-fields.ads: add new Renames_Limited_View field. * gen_il-gen-gen_entities.adb: add Renames_Limited_View flag for packages. * einfo.ads: add comment documenting Renames_Limited_View flag. * sem_ch8.adb (Analyze_Package_Renaming): Set new Renames_Limited_View flag. Test new Renames_Limited_View flag instead of calling Has_Limited_With. If Has_Limited_With is True, that just means that somebody, sometime during this compilation needed to reference the limited view of the package; so that function returns True too often to be used here. (Find_Expanded_Name): Test new Renames_Limited_View flag instead of calling Has_Limited_With. Diff: --- gcc/ada/einfo.ads | 9 + gcc/ada/gen_il-fields.ads | 1 + gcc/ada/gen_il-gen-gen_entities.adb | 1 + gcc/ada/sem_ch8.adb | 7 --- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads index 1946e68b3c76..a7c0584d8da4 100644 --- a/gcc/ada/einfo.ads +++ b/gcc/ada/einfo.ads @@ -4247,6 +4247,15 @@ package Einfo is -- within an accept statement. For all remaining cases (discriminants, -- loop parameters) the field is Empty. +--Renames_Limited_View +-- Defined in package entities. True for a package renaming if either +-- a) the renamed package is not itself a renaming, and the renaming +-- denotes a limited view of the renamed package (as seen at the +-- point of the renaming declaration, as opposed to later on when +-- the renaming is referenced); or +-- b) the renamed package is itself a renaming and the +-- Renames_Limited_View flag is True for the renamed package. + --Requires_Overriding -- Defined in all subprograms and entries. Set for subprograms that -- require overriding as defined by RM-2005-3.9.3(6/2). Note that this diff --git a/gcc/ada/gen_il-fields.ads b/gcc/ada/gen_il-fields.ads index c778f00856d4..fe2da80c5488 100644 --- a/gcc/ada/gen_il-fields.ads +++ b/gcc/ada/gen_il-fields.ads @@ -879,6 +879,7 @@ package Gen_IL.Fields is Relative_Deadline_Variable, Renamed_In_Spec, Renamed_Or_Alias, -- Shared among Alias, Renamed_Entity, Renamed_Object + Renames_Limited_View, Requires_Overriding, Return_Applies_To, Return_Present, diff --git a/gcc/ada/gen_il-gen-gen_entities.adb b/gcc/ada/gen_il-gen-gen_entities.adb index 8cf66b2611df..3df1f8e72844 100644 --- a/gcc/ada/gen_il-gen-gen_entities.adb +++ b/gcc/ada/gen_il-gen-gen_entities.adb @@ -1281,6 +1281,7 @@ begin -- Gen_IL.Gen.Gen_Entities Sm (Related_Instance, Node_Id), Sm (Renamed_In_Spec, Flag), Sm (Renamed_Or_Alias, Node_Id), +Sm (Renames_Limited_View, Flag), Sm (Scope_Depth_Value, Unat), Sm (SPARK_Aux_Pragma, Node_Id), Sm (SPARK_Aux_Pragma_Inherited, Flag), diff --git a/gcc/ada/sem_ch8.adb b/gcc/ada/sem_ch8.adb index 533b62aef321..c6b8f6c5d7c8 100644 --- a/gcc/ada/sem_ch8.adb +++ b/gcc/ada/sem_ch8.adb @@ -1665,8 +1665,7 @@ package body Sem_Ch8 is Set_Etype (New_P, Standard_Void_Type); elsif Present (Renamed_Entity (Old_P)) -and then (From_Limited_With (Renamed_Entity (Old_P)) -or else Has_Limited_View (Renamed_Entity (Old_P))) +and then Renames_Limited_View (Old_P) and then not Unit_Is_Visible (Cunit (Get_Source_Unit (Renamed_Entity (Old_P then @@ -1691,8 +1690,10 @@ package body Sem_Ch8 is if Present (Renamed_Entity (Old_P)) then Set_Renamed_Entity (New_P, Renamed_Entity (Old_P)); +Set_Renames_Limited_View (New_P, Renames_Limited_View (Old_P)); else Set_Renamed_Entity (New_P, Old_P); +Set_Renames_Limited_View (New_P, From_Limited_With (Old_P)); end if; -- The package renaming declaration may become Ghost if it renames a @@ -7077,7 +7078,7 @@ package body Sem_Ch8 is ("renaming of limited view of package & not usable in this" & " context (RM 8.5.3(3.1/2))", Prefix (N), P_Name); - elsif Has_Limited_View (P_Name) + elsif Renames_Limited_View (Entity (Prefix (N))) and then not Unit_Is_Visible (Cunit (Get_Source_Unit (P_Name))) and then not Is_Visible_Through_Renamings (P_Name)
[gcc r15-6634] ada: Untangle check for restriction No_Implementation_Attributes
https://gcc.gnu.org/g:b014d250119378663d4a5d4a650fa53bbaca1c3f commit r15-6634-gb014d250119378663d4a5d4a650fa53bbaca1c3f Author: Piotr Trojanek Date: Tue Mar 26 16:52:08 2024 +0100 ada: Untangle check for restriction No_Implementation_Attributes Code cleanup; given that no attribute is both defined by Ada 83 and specific to GNAT, the semantics is unaffected. gcc/ada/ChangeLog: * sem_attr.adb (Analyze_Attribute): Simplify logic. Diff: --- gcc/ada/sem_attr.adb | 15 ++- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb index 53b96501d788..39725d23442b 100644 --- a/gcc/ada/sem_attr.adb +++ b/gcc/ada/sem_attr.adb @@ -3214,26 +3214,23 @@ package body Sem_Attr is -- Deal with Ada 83 issues - if not Attribute_83 (Attr_Id) then -if Ada_Version = Ada_83 then - Error_Msg_Name_1 := Aname; - Error_Msg_N ("(Ada 83) attribute% is not standard??", N); -end if; - -if Attribute_Impl_Def (Attr_Id) then - Check_Restriction (No_Implementation_Attributes, N); -end if; + if not Attribute_83 (Attr_Id) and then Ada_Version = Ada_83 then +Error_Msg_Name_1 := Aname; +Error_Msg_N ("(Ada 83) attribute% is not standard??", N); end if; -- Deal with Ada 2005 attributes that are implementation attributes -- because they appear in a version of Ada before Ada 2005, ditto for -- Ada 2012 and Ada 2022 attributes appearing in an earlier version. + -- Likewise for GNAT implementation-defined attributes. if (Attribute_05 (Attr_Id) and then Ada_Version < Ada_2005) or else (Attribute_12 (Attr_Id) and then Ada_Version < Ada_2012) or else (Attribute_22 (Attr_Id) and then Ada_Version < Ada_2022) + or else +Attribute_Impl_Def (Attr_Id) then Check_Restriction (No_Implementation_Attributes, N); end if;
[gcc r15-6629] ada: Drop vxworks-smp-ppc-link.spec
https://gcc.gnu.org/g:0f83183d724d0def1baceef2fa81e7cae92a4ce8 commit r15-6629-g0f83183d724d0def1baceef2fa81e7cae92a4ce8 Author: Alexandre Oliva Date: Tue Aug 15 22:07:27 2023 -0300 ada: Drop vxworks-smp-ppc-link.spec Adding -msmp to linker options in system-vxworks-ppc-rtp-smp.ads obviated vxworks-smp-ppc-link.spec. Drop it. gcc/ada/ChangeLog: * libgnat/system-vxworks-ppc-rtp-smp.ads: Drop --specs=vxworks-ppc-link.spec from Linker_Options. * vxworks-smp-ppc-link.spec: Delete. Diff: --- gcc/ada/libgnat/system-vxworks-ppc-rtp-smp.ads | 1 - gcc/ada/vxworks-smp-ppc-link.spec | 4 2 files changed, 5 deletions(-) diff --git a/gcc/ada/libgnat/system-vxworks-ppc-rtp-smp.ads b/gcc/ada/libgnat/system-vxworks-ppc-rtp-smp.ads index d4bcefd1a44e..1896bfaec221 100644 --- a/gcc/ada/libgnat/system-vxworks-ppc-rtp-smp.ads +++ b/gcc/ada/libgnat/system-vxworks-ppc-rtp-smp.ads @@ -121,7 +121,6 @@ package System is private pragma Linker_Options ("-msmp"); - pragma Linker_Options ("--specs=vxworks-smp-ppc-link.spec"); pragma Linker_Options ("--specs=vxworks-ppc-link.spec"); -- Setup proper set of -L's for this configuration diff --git a/gcc/ada/vxworks-smp-ppc-link.spec b/gcc/ada/vxworks-smp-ppc-link.spec deleted file mode 100644 index b68cac22c142.. --- a/gcc/ada/vxworks-smp-ppc-link.spec +++ /dev/null @@ -1,4 +0,0 @@ -*lib: -+ %{mrtp:%{!shared: \ - -L%:getenv(WIND_BASE /target/lib_smp/usr/lib/ppc/PPC32/common) \ - }}
[gcc r15-6680] i386: Change mnemonics from TCVTROWPS2PBF16[H, L] to TCVTROWPS2BF16[H, L]
https://gcc.gnu.org/g:814cbfc91a3c9f4286d13d04075287f6dac76e74 commit r15-6680-g814cbfc91a3c9f4286d13d04075287f6dac76e74 Author: Haochen Jiang Date: Thu Jan 2 16:55:34 2025 +0800 i386: Change mnemonics from TCVTROWPS2PBF16[H,L] to TCVTROWPS2BF16[H,L] In ISE056, the mnemonics for TCVTROWPS2PBF16[H,L] has been changed to TCVTROWPS2BF16[H,L]. gcc/ChangeLog: * config/i386/amxavx512intrin.h (_tile_cvtrowps2pbf16h_internal): Rename to... (_tile_cvtrowps2bf16h_internal): ...this. (_tile_cvtrowps2pbf16hi_internal): Rename to... (_tile_cvtrowps2bf16hi_internal): ...this. (_tile_cvtrowps2pbf16l_internal): Rename to... (_tile_cvtrowps2bf16l_internal): ...this. (_tile_cvtrowps2pbf16li_internal): Rename to... (_tile_cvtrowps2bf16li_internal): ...this. (_tile_cvtrowps2pbf16h): Rename to... (_tile_cvtrowps2bf16h): ...this. (_tile_cvtrowps2pbf16hi): Rename to... (_tile_cvtrowps2bf16hi): ...this. (_tile_cvtrowps2pbf16l): Rename to... (_tile_cvtrowps2bf16l): ...this. (_tile_cvtrowps2pbf16li): Rename to... (_tile_cvtrowps2bf16li): ...this. gcc/testsuite/ChangeLog: * gcc.target/i386/amxavx512-asmatt-1.c: Adjust intrin call. * gcc.target/i386/amxavx512-asmintel-1.c: Ditto. * gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c: Rename to... * gcc.target/i386/amxavx512-cvtrowps2bf16-2.c: ...this. Rename test functions. Diff: --- gcc/config/i386/amxavx512intrin.h | 32 +++--- gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c | 12 .../gcc.target/i386/amxavx512-asmintel-1.c | 12 ...rowps2pbf16-2.c => amxavx512-cvtrowps2bf16-2.c} | 30 ++-- 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/gcc/config/i386/amxavx512intrin.h b/gcc/config/i386/amxavx512intrin.h index 59d142948fb2..ab5362571d1e 100644 --- a/gcc/config/i386/amxavx512intrin.h +++ b/gcc/config/i386/amxavx512intrin.h @@ -53,38 +53,38 @@ dst; \ }) -#define _tile_cvtrowps2pbf16h_internal(src,A) \ +#define _tile_cvtrowps2bf16h_internal(src,A) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16h\t%1, %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", %1}" \ + ("{tcvtrowps2bf16h\t%1, %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", %1}" \ : "=v" (dst) : "r" ((unsigned) (A))); \ dst; \ }) -#define _tile_cvtrowps2pbf16hi_internal(src,imm) \ +#define _tile_cvtrowps2bf16hi_internal(src,imm)\ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", "#imm"}"\ + ("{tcvtrowps2bf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", "#imm"}" \ : "=v" (dst) :);\ dst; \ }) -#define _tile_cvtrowps2pbf16l_internal(src,A) \ +#define _tile_cvtrowps2bf16l_internal(src,A) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16l\t%1, %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", %1}" \ + ("{tcvtrowps2bf16l\t%1, %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", %1}" \ : "=v" (dst) : "r" ((unsigned) (A))); \ dst; \ }) -#define _tile_cvtrowps2pbf16li_internal(src,imm) \ +#define _tile_cvtrowps2bf16li_internal(src,imm)\ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", "#imm"}"\ + ("{tcvtrowps2bf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#
[gcc r15-6683] tree-optimization/118269 - SLP reduction chain and early breaks
https://gcc.gnu.org/g:2dd4eb15b63ca1f20a637e8b26567d5064a43a4f commit r15-6683-g2dd4eb15b63ca1f20a637e8b26567d5064a43a4f Author: Richard Biener Date: Tue Jan 7 11:15:43 2025 +0100 tree-optimization/118269 - SLP reduction chain and early breaks When we create the SLP reduction chain epilogue for the PHIs for the early exit we fail to properly classify the reduction as SLP reduction chain. The following fixes the corresponding checks. PR tree-optimization/118269 * tree-vect-loop.cc (vect_create_epilog_for_reduction): Use the correct stmt for the REDUC_GROUP_FIRST_ELEMENT lookup. * gcc.dg/vect/vect-early-break_131-pr118269.c: New testcase. Diff: --- .../gcc.dg/vect/vect-early-break_131-pr118269.c | 17 + gcc/tree-vect-loop.cc | 8 +--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_131-pr118269.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_131-pr118269.c new file mode 100644 index ..5f7abcee57c9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_131-pr118269.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-O3" } */ + +short g_113; +int func_1_l_1273, func_1_l_1370, func_1_l_1258; +void func_1() { + int l_1375; + for (; l_1375; l_1375--) { +for (; func_1_l_1370;) + ; +func_1_l_1273 &= !0; +func_1_l_1273 &= g_113; +if (func_1_l_1258) + break; + } +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 2b9d59566357..bb1138bfcfba 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -6039,7 +6039,9 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, # b1 = phi a2 = operation (a1) b2 = operation (b1) */ - bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); + bool slp_reduc += (slp_node + && !REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info))); bool direct_slp_reduc; tree induction_index = NULL_TREE; @@ -6331,7 +6333,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, a multiple of the SLP group size. The same is true if we couldn't use a single defuse cycle. */ - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) + if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info)) || direct_slp_reduc || (slp_reduc && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size)) @@ -6632,7 +6634,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, if (slp_node) { tree initial_value = NULL_TREE; - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info))) initial_value = reduc_info->reduc_initial_values[0]; neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code, initial_value, false);
[gcc r15-6682] arm: [MVE intrinsics] Fix tuples field name (PR 118332)
https://gcc.gnu.org/g:42b24557bdf805d2913d3c75531fe441b1634332 commit r15-6682-g42b24557bdf805d2913d3c75531fe441b1634332 Author: Christophe Lyon Date: Tue Jan 7 16:59:03 2025 + arm: [MVE intrinsics] Fix tuples field name (PR 118332) A recent commit mistakenly changed the field name for tuples from 'val' to '__val', but unlike SVE this name is mandated by ACLE. The patch simply switches back the name to 'val'. PR target/118332 gcc/ChangeLog: * config/arm/arm-mve-builtins.cc (wrap_type_in_struct): Use 'val' instead of '__val'. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/intrinsics/pr118332.c: New test. Diff: --- gcc/config/arm/arm-mve-builtins.cc | 5 ++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/pr118332.c | 5 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index aacdbf159b97..4c52415f3f1b 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -464,13 +464,12 @@ register_vector_type (vector_type_index type) } /* Return a structure type that contains a single field of type FIELD_TYPE. - The field is called __val, but that's an internal detail rather than - an exposed part of the API. */ + The field is called 'val', as mandated by ACLE. */ static tree wrap_type_in_struct (tree field_type) { tree field = build_decl (input_location, FIELD_DECL, - get_identifier ("__val"), field_type); + get_identifier ("val"), field_type); tree struct_type = lang_hooks.types.make_type (RECORD_TYPE); DECL_FIELD_CONTEXT (field) = struct_type; TYPE_FIELDS (struct_type) = field; diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr118332.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr118332.c new file mode 100644 index ..a8f6389c58d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr118332.c @@ -0,0 +1,5 @@ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ + +#include +uint32x4_t first(uint32x4x4_t a) { return a.val[0]; }
[gcc r15-6684] middle-end/118325 - nonlocal goto lowering
https://gcc.gnu.org/g:640b550425180bdab7bc67edc6a48465f4023888 commit r15-6684-g640b550425180bdab7bc67edc6a48465f4023888 Author: Richard Biener Date: Tue Jan 7 15:11:47 2025 +0100 middle-end/118325 - nonlocal goto lowering When nonlocal goto lowering creates an artificial label it fails to adjust its context. PR middle-end/118325 * tree-nested.cc (convert_nl_goto_reference): Assign proper context to generated artificial label. * gcc.dg/pr118325.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/pr118325.c | 16 gcc/tree-nested.cc | 1 + 2 files changed, 17 insertions(+) diff --git a/gcc/testsuite/gcc.dg/pr118325.c b/gcc/testsuite/gcc.dg/pr118325.c new file mode 100644 index ..74f92cc2bb6e --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr118325.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-std=gnu17 -fchecking" } */ + +void f(void*); + +void z() +{ +void g() +{ +__label__ out; +int h(void) { goto out; } +f(h); +out:; +} +f(g); +} diff --git a/gcc/tree-nested.cc b/gcc/tree-nested.cc index d538c8d2a47d..8d75a2f3310f 100644 --- a/gcc/tree-nested.cc +++ b/gcc/tree-nested.cc @@ -2694,6 +2694,7 @@ convert_nl_goto_reference (gimple_stmt_iterator *gsi, bool *handled_ops_p, { new_label = create_artificial_label (UNKNOWN_LOCATION); DECL_NONLOCAL (new_label) = 1; + DECL_CONTEXT (new_label) = target_context; *slot = new_label; } else
[gcc r15-6681] testsuite: Simplify target test and dg-options for AMO tests
https://gcc.gnu.org/g:51708cbd751e3af0d147ceae4da5c74dae1519ce commit r15-6681-g51708cbd751e3af0d147ceae4da5c74dae1519ce Author: Jeevitha Date: Wed Jan 8 01:03:12 2025 -0600 testsuite: Simplify target test and dg-options for AMO tests Removed powerpc*-*-* from the target test as it is always true. Simplified options by removing -mpower9-misc and -mvsx, which are enabled by default with -mdejagnu-cpu=power9. The has_arch_pwr9 check is also true with -mdejagnu-cpu=power9, so it has been removed. 2025-01-08 Jeevitha Palanisamy gcc/testsuite/ * gcc.target/powerpc/amo1.c: Removed powerpc*-*-* from the target and simplified dg-options. * gcc.target/powerpc/amo2.c: Simplified dg-options and added powerpc_vsx target check. Diff: --- gcc/testsuite/gcc.target/powerpc/amo1.c | 5 ++--- gcc/testsuite/gcc.target/powerpc/amo2.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.target/powerpc/amo1.c b/gcc/testsuite/gcc.target/powerpc/amo1.c index c5af373b4e99..9a981cd4219f 100644 --- a/gcc/testsuite/gcc.target/powerpc/amo1.c +++ b/gcc/testsuite/gcc.target/powerpc/amo1.c @@ -1,6 +1,5 @@ -/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ -/* { dg-options "-mvsx -mpower9-misc -O2" } */ -/* { dg-additional-options "-mdejagnu-cpu=power9" { target { ! has_arch_pwr9 } } } */ +/* { dg-do compile { target { lp64 } } } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ /* { dg-require-effective-target powerpc_vsx } */ /* Verify P9 atomic memory operations. */ diff --git a/gcc/testsuite/gcc.target/powerpc/amo2.c b/gcc/testsuite/gcc.target/powerpc/amo2.c index 592f0fb3f92d..9e4ff0ce0645 100644 --- a/gcc/testsuite/gcc.target/powerpc/amo2.c +++ b/gcc/testsuite/gcc.target/powerpc/amo2.c @@ -1,6 +1,6 @@ /* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */ -/* { dg-options "-O2 -mvsx -mpower9-misc" } */ -/* { dg-additional-options "-mdejagnu-cpu=power9" { target { ! has_arch_pwr9 } } } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_vsx } */ #include #include
[gcc r15-6679] i386: Add br_mispredict_scale in cost table.
https://gcc.gnu.org/g:59096f014fedab9bd6787a286acf2afa7ab0fe9e commit r15-6679-g59096f014fedab9bd6787a286acf2afa7ab0fe9e Author: Hongyu Wang Date: Thu Jan 2 10:29:27 2025 +0800 i386: Add br_mispredict_scale in cost table. For later processors, the pipeline went deeper so the penalty for untaken branch can be larger than before. Add a new parameter br_mispredict_scale to describe the penalty, and adopt to noce_max_ifcvt_seq_cost hook to allow longer sequence to be converted with cmove. This improves cpu2017 544 with -Ofast -march=native for 14% on P-core SPR, and 8% on E-core SRF. No other regression observed. gcc/ChangeLog: * config/i386/i386.cc (ix86_noce_max_ifcvt_seq_cost): Adjust cost with ix86_tune_cost->br_mispredict_scale. * config/i386/i386.h (processor_costs): Add br_mispredict_scale. * config/i386/x86-tune-costs.h: Add new br_mispredict_scale to all processor_costs, in which icelake_cost/alderlake_cost with value COSTS_N_INSNS (2) + 3 and other processor with value COSTS_N_INSNS (2). gcc/testsuite/ChangeLog: * gcc.target/i386/cmov12.c: New test. Diff: --- gcc/config/i386/i386.cc| 8 +++- gcc/config/i386/i386.h | 2 ++ gcc/config/i386/x86-tune-costs.h | 33 + gcc/testsuite/gcc.target/i386/cmov12.c | 21 + 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 655335e2f47f..11770aa8a500 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -25088,7 +25088,13 @@ ix86_max_noce_ifcvt_seq_cost (edge e) return param_max_rtl_if_conversion_unpredictable_cost; } - return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2); + /* For modern machines with deeper pipeline, the penalty for branch + misprediction could be higher than before to reset the pipeline + slots. Add parameter br_mispredict_scale as a factor to describe + the impact of reseting the pipeline. */ + + return BRANCH_COST (true, predictable_p) +* ix86_tune_cost->br_mispredict_scale; } /* Return true if SEQ is a good candidate as a replacement for the diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 00626d539a98..e8e528c7811b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -232,6 +232,8 @@ struct processor_costs { to be unrolled. */ const unsigned small_unroll_factor; /* Unroll factor for small loop to be unrolled. */ + const int br_mispredict_scale; /* Branch mispredict scale for ifcvt + threshold. */ }; extern const struct processor_costs *ix86_cost; diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 56a09f12b94d..a4a128cd5dde 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -137,6 +137,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ NULL,/* Func alignment. */ 4, /* Small unroll limit. */ 2, /* Small unroll factor. */ + COSTS_N_INSNS (2), /* Branch mispredict scale. */ }; /* Processor costs (relative to an add) */ @@ -248,6 +249,7 @@ struct processor_costs i386_cost = {/* 386 specific costs */ "4", /* Func alignment. */ 4, /* Small unroll limit. */ 2, /* Small unroll factor. */ + COSTS_N_INSNS (2), /* Branch mispredict scale. */ }; static stringop_algs i486_memcpy[2] = { @@ -360,6 +362,7 @@ struct processor_costs i486_cost = {/* 486 specific costs */ "16",/* Func alignment. */ 4, /* Small unroll limit. */ 2, /* Small unroll factor. */ + COSTS_N_INSNS (2), /* Branch mispredict scale. */ }; static stringop_algs pentium_memcpy[2] = { @@ -470,6 +473,7 @@ struct processor_costs pentium_cost = { "16",/* Func alignment. */ 4, /* Small unroll limit. */ 2, /* Small unroll factor. */ + COSTS_N_INSNS (2), /* Branch mispredict scale. */ }; static const @@ -573,6 +577,7 @@ struct processor_costs lakemont_cost = { "16",/* Func alignment. */ 4, /* Small unroll limit. */ 2, /* Small unroll facto